Last active
May 16, 2022 23:23
-
-
Save jidaikobo-shibata/08a752b04107dbc50ef5 to your computer and use it in GitHub Desktop.
Emacs(Elisp): 全角英数字を半角英数字に、半角カナを全角に、UTF-8の濁点分離を直す。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; ------------------------------------------------------------ | |
;;; 全角英数字を半角英数字に、半角カナを全角に、UTF-8の濁点分離を直す | |
;; http://d.hatena.ne.jp/nakamura001/20120529/1338305696 | |
;; http://www.sakito.com/2010/05/mac-os-x-normalization.html | |
;; gist-description: Emacs(Elisp): 全角英数字を半角英数字に、半角カナを全角に、UTF-8の濁点分離を直す。 | |
;; gist-id: 08a752b04107dbc50ef5 | |
;; gist-name: normalize-chars.el | |
;; gist-private: nil | |
(require 'ucs-normalize) | |
(prefer-coding-system 'utf-8) | |
(setq file-name-coding-system 'utf-8-hfs) | |
(setq locale-coding-system 'utf-8-hfs) | |
(defun normalize-chars () | |
"Normarize chars." | |
(interactive) | |
;; 選択範囲があればそこを対象にする | |
(let (type | |
beg | |
end) | |
(if (region-active-p) | |
(progn | |
(setq beg (region-beginning)) | |
(setq end (region-end))) | |
(progn | |
(setq type (read-string "normalize whole buffer?(y, n): " nil)) | |
(if (string= type "y") | |
(progn | |
(setq beg (point-min)) | |
(setq end (point-max))) | |
(error "Error: no target region")))) | |
(japanese-zenkaku-region beg end t) | |
(japanese-hankaku-region beg end t) | |
(ucs-normalize-NFC-region beg end))) | |
;; 音引、句読点等を除外 | |
(eval-after-load "japan-util" | |
'(progn | |
(put-char-code-property ?ー 'ascii nil) | |
(put-char-code-property ?〜 'ascii nil) | |
(put-char-code-property ?〜 'ascii nil) | |
(put-char-code-property ?~ 'ascii nil) | |
(put-char-code-property ?( 'ascii nil) | |
(put-char-code-property ?) 'ascii nil) | |
(put-char-code-property ?、 'ascii nil) | |
(put-char-code-property ?。 'ascii nil) | |
(put-char-code-property ?& 'ascii nil) | |
(put-char-code-property ?? 'ascii nil) | |
(put-char-code-property ?! 'ascii nil))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment