Emacs-lisp: whrl-count-words-in-paragraphs
- exclude word count drawer in org-export:
#+OPTIONS: d:(not "WORDCOUNT")
;; -*- coding: utf-8 -*-
(defun whrl-count-words-in-paragraphs ()
"
1) Narrows to org-mode subtree.
2) Delete empty whitespaces with equal or more than three empty lines.
3) Count words in org-mode subtree paragraphs.
4) Writes or updates word count results in :WORDCOUNT:-Drawers below the paragraphs.
Disclaimer: Without warranty of any kind. Content may change.
Updates: 2020-03-21
License: MIT
ID: [[l:topic/20200321T205803]]
"
(interactive)
;; narrow to subtree
(org-back-to-heading)
(org-narrow-to-subtree)
;; erase ":WORDCOUNT:"-Drawer if already exist
(while
(re-search-forward "^:WORDCOUNT:[^)]+)\n:END:\n+" nil t)
(replace-match "\n" nil nil)
)
;; erase >= 3 x empty lines and substitute with 2 empty lines
(beginning-of-buffer)
(while
(re-search-forward "\n\\{3,\\}" nil t)
(replace-match "\n\n" nil nil)
)
;; search for paragraph, count words in paragraph and cumulative sum of words
(setq $words-cum 0)
(beginning-of-buffer)
;; one possible definition of a paragraph
(setq $regexp
"\\(\\([()/a-zA-Z0-9:,]\\{2,\\} \\)\\{2,\\}[()/a-zA-Z0-9,:]\\{2,\\}[.?!] \\)\\{2,\\}"
)
;; (setq -regexp "\\(\\([^!?.]\\{2,\\} \\)\\{2,\\}[^!.?]\\{2,\\}[.?!] \\)\\{2,\\}")
(while (re-search-forward $regexp nil t)
(org-backward-paragraph) ; beginning of paragraph
;; exclude org comment-region starting with "#"
(setq $comment-region (string (following-char)))
(when (string= $comment-region "#")
(org-forward-paragraph)
(re-search-forward $regexp nil t)
(org-backward-paragraph)
)
;; get boundaries of paragraph
(setq $a (point))
(org-forward-paragraph)
(setq $b (point))
;; count words in paragraph
(setq $words (count-words-region $a $b))
;; calculate cumulative sum of words in subtree
(setq $words-cum (+ $words $words-cum))
;; insert word count below paragraph in :WORDCOUNT: Drawer
(insert (concat "\n:WORDCOUNT:\n: ("
(format "%s" $words)"/"
(format "%s" $words-cum)
" words)\n:END:\n\n")
)
)
)