Skip to content

Instantly share code, notes, and snippets.

@redguardtoo
Created June 14, 2019 12:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save redguardtoo/4d06089348152d4b374c5204525174fa to your computer and use it in GitHub Desktop.
Save redguardtoo/4d06089348152d4b374c5204525174fa to your computer and use it in GitHub Desktop.
sort chinese characters
(require 'cl-lib)
(defun my-read-file (file)
(with-temp-buffer
(insert-file-contents (file-truename file))
(buffer-string)))
;; (message "content=%s" (my-read-file))
(defun my-format-dictionary ()
(interactive)
(let* ((h (make-hash-table :size 1000))
(text (my-read-file "~/Downloads/dict.txt"))
(dict (my-read-file "./pyim-basedict.pyim"))
(newfile (file-truename "./new-base-dict.pyim"))
key
val)
;; initialized hash table
(dotimes (i (length text))
(when (> (setq key (elt text i)) 256)
;; only count chinese characters
(unless (setq val (gethash key h))
(setq val 0))
(setq val (1+ val))
(puthash key val h)))
(with-temp-buffer
(dolist (l (split-string dict "\n"))
(cond
((string-match "^\\([a-z]+ \\)\\(.*\\)" l)
(let* ((pinyin (match-string 1 l))
(chars (split-string (match-string 2 l) " "))
(newchars (cl-stable-sort chars `(lambda (a b) (> (or (gethash (elt a 0) ,h) 0) (or (gethash (elt b 0) ,h) 0)))))
(newl (concat pinyin (mapconcat 'identity newchars " "))))
(insert (format "%s\n" newl))))
(t
(insert (format "%s\n" l)))))
(write-file newfile))
(message "%s created." newfile)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment