Skip to content

Instantly share code, notes, and snippets.

@kawabata
Created January 8, 2011 03:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kawabata/770523 to your computer and use it in GitHub Desktop.
Save kawabata/770523 to your computer and use it in GitHub Desktop.
ndl-porta.el
;;; ndl-porta.el --- NDL (National Diet Library of Japan) PORTA access
;; Copyright (C) 2011 TK
;; Author: TK <kawabata.taichi@gmail.com>
;; Keywords: isbn, bibtex, library
;;; Commentary
;;
;; 国会図書館 PORTA から様々な書誌情報を取り出す。
;; サンプルとしてbibtexで情報を出力する関数を用意。(ndl-isbn2bibtex 関数)
;;
;; 実行例
;; (ndl-isbn2bibtex "4150105928")
;; "@book{4150105928,
;; title = {第二ファウンデーション : 銀河帝国興亡史3},
;; sorttitle = {ダイニ ファウンデーション},
;; author = {アイザック・アシモフ},
;; translator = {岡部宏之},
;; sortname = {オカベ,ヒロユキ(1931-)},
;; date = {1984.12},
;; publisher = {早川書房},
;; location = {東京},
;; pagetotal = {366p},
;; size = {16cm},
;; price = {税込価格 : 420円},
;; series = {ハヤカワ文庫. SF},
;; ndc = {933},
;; url = {http://api.porta.ndl.go.jp/ndlopac/cgi-bin/ndlopac/ndl-book?kywd=85026435},
;; isbn = {4-15-010592-8},
;; jpno = {85026435},
;; ndlcn = {KS151-126},
;; language = {JPN}
;; }
;; "
;; 参考情報
;; http://porta.ndl.go.jp/wiki/Wiki.jsp?page=外部提供インタフェースについて
(defvar ndl-porta-query-url
"http://api.porta.ndl.go.jp/servicedp/SRUDp?operation=searchRetrieve&recordPacking=xml&recordSchema=dcndl_porta&query="
"国会図書館PORTA問い合わせURL")
(defvar bibtex-field-sort-order
'(title sorttitle subtitle edition origtitle
author editor translator sortname nameaddon organization
journal journaltitle volume pages
date year
publisher location notes annotation pagetotal pagination size
price series keywords
isbn issn jpno
file url
ndc ndlcn language)
"フィールドの編集・ソート順番")
(defun ndl-porta-query (query)
"国会図書館PORTAに問い合わせ、XMLデータを取得する。"
(let* ((buffer
(url-retrieve-synchronously
(concat ndl-porta-query-url query))))
(when buffer
(switch-to-buffer buffer)
(set-buffer-multibyte t)
(prog1
(car (xml-parse-region (point-min) (point-max)))
(kill-buffer buffer)))))
(defun ndl-collect-records (xml)
"XMLデータから、recordを取り出してリスト化する。"
(let* ((records (assoc 'records xml))
result)
(dolist (record records)
(if (eq (car-safe record) 'record)
(setq result (cons record result))))
(nreverse result)))
(defun ndl-xml-pred (element attributes regexp)
`(lambda (x) (and (listp x)
,@(when element
`((equal ',element (car x))))
,@(when attributes
`((equal ',attributes (cadr x))))
,@(when regexp
`((string-match ',regexp (caddr x)))))))
(defun ndl-find (element attributes regexp xml)
"XMLデータで、ELEMENTとATTRIBUTESを持ち、内部要素がREGEXPなものを探し、
その最初の要素内容を返す。REGEXPが指定された場合は、第1マッチを返す。"
(let ((val (find-if (ndl-xml-pred element attributes regexp) xml)))
(when val
(setq val (cddr val))
(if regexp (mapcar (lambda (x) (string-match regexp x) (match-string 1 x)) val) val))))
(defun ndl-collect (element attributes regexp xml)
"XMLデータで、ELEMENTとATTRIBUTESを持ち、内部要素がREGEXPなものを探し、
その全ての要素内容を繋げたものを返す。"
(let ((vals (remove-if-not (ndl-xml-pred element attributes regexp) xml)))
(when vals
(setq vals (apply 'append (mapcar 'cddr vals)))
(if regexp (mapcar (lambda (x) (string-match regexp x) (match-string 1 x)) vals) vals))))
(defun ndl-record2data (record)
"record XMLリストから、データを生成する。"
(message "debug record=%s" record)
(let* ((recordData (assoc 'recordData record))
(dcndl_porta:dc (assoc 'dcndl_porta:dc recordData))
(author (ndl-find 'dc:creator nil "^\\(.+\\)[ ‖]著$" dcndl_porta:dc))
(editor (ndl-find 'dc:creator nil "^\\(.+\\)[ ‖]編$" dcndl_porta:dc))
(translator (ndl-find 'dc:creator nil "^\\(.+\\)[ ‖]訳$" dcndl_porta:dc))
(title (ndl-find 'dc:title nil nil dcndl_porta:dc))
(titleaddon (ndl-find 'dcterms:alternative nil nil dcndl_porta:dc))
(sorttitle (ndl-collect 'dcndl:titleTranscription nil nil dcndl_porta:dc))
(authors (ndl-collect 'dc:creator '((xsi:type . "dcndl:NDLNH")) nil dcndl_porta:dc))
(authors2 (ndl-collect 'dc:creator nil nil dcndl_porta:dc))
(sortname (ndl-collect 'dcndl:creatorTranscription nil nil dcndl_porta:dc))
(sortname2 (ndl-collect 'dc:contributor nil nil dcndl_porta:dc)) ;; English names, etc.
(nameaddon (ndl-collect 'dcndl:creatorAlternative nil nil dcndl_porta:dc))
(date (ndl-find 'dcterms:issued nil nil dcndl_porta:dc))
(year (ndl-find 'dcterms:issued '((xsi:type . "dcterms:W3CDTF")) nil dcndl_porta:dc))
(publisher (ndl-collect 'dc:publisher nil nil dcndl_porta:dc))
(location (ndl-find 'dc:description nil "^出版地 : \\(.+\\)" dcndl_porta:dc))
(subtitle (ndl-find 'dc:description nil "^各巻タイトル : \\(.+\\)" dcndl_porta:dc))
(edition (ndl-find 'dc:description nil "^版表示 : \\(.+\\)" dcndl_porta:dc))
(condition (ndl-find 'dc:description nil "^形態 : \\(.+\\)" dcndl_porta:dc))
(notes (ndl-find 'dc:description nil "^注記 : \\(.+\\)" dcndl_porta:dc))
(contents (ndl-find 'dc:description nil "^内容 : \\(.+\\)" dcndl_porta:dc))
(organization (ndl-find 'dc:description nil "^所属名 : \\(.+\\)" dcndl_porta:dc))
(pagination (ndl-find 'dc:description nil "^装丁 : \\(.+\\)" dcndl_porta:dc))
(origtitle (ndl-find 'dc:description nil "^原タイトル : \\(.+\\)" dcndl_porta:dc))
(annotation (ndl-collect 'dc:description nil nil dcndl_porta:dc))
(price (ndl-find 'dc:description nil "^\\(\\(本体\\|税込\\)価格 : .+\\)" dcndl_porta:dc))
(file (ndl-find 'dc:identifier nil "^fulltext : \\(.+\\)" dcndl_porta:dc))
(series (ndl-find 'dc:description nil "^シリーズ : \\(.+\\)" dcndl_porta:dc))
(keywords (ndl-collect 'dc:subject '((xsi:type . "dcndl:NDLSH")) nil dcndl_porta:dc))
(ndc (ndl-find 'dc:subject '((xsi:type . "dcndl:NDC")) nil dcndl_porta:dc))
(url (ndl-find 'dc:identifier '((xsi:type . "dcterms:URI")) nil dcndl_porta:dc))
(isbn (ndl-find 'dc:identifier '((xsi:type . "dcndl:ISBN")) nil dcndl_porta:dc))
(jpno (ndl-find 'dc:identifier '((xsi:type . "dcndl:JPNO")) nil dcndl_porta:dc))
(ndlcn (ndl-find 'dc:identifier '((xsi:type . "dcndl_porta:NDLCN")) nil dcndl_porta:dc))
(language (ndl-collect 'dc:language '((xsi:type . "dcterms:ISO639-2")) nil dcndl_porta:dc))
;; articles
(journaltitle (ndl-find 'dc:description nil "^雑誌名 : \\(.+\\)" dcndl_porta:dc))
(volume (ndl-find 'dc:description nil "^巻号・年月次 : \\(.+\\)" dcndl_porta:dc))
(pages (ndl-find 'dc:description nil "^掲載ページ : \\(.+\\)" dcndl_porta:dc))
(issn (ndl-find 'dc:identifier '((xsi:type . "dcndl:ISSN")) nil dcndl_porta:dc))
(journal (ndl-find 'dcterms:bibliographicCitation nil "^雑誌名 : \\(.+\\)" dcndl_porta:dc))
;;(type (ndl-find 'dc:type nil))
(type (ndl-find 'dc:type '((xsi:type . "dcndl:NIIType")) nil dcndl_porta:dc))
pagetotal size)
;; 情報の整理
(if (and condition
(string-match "形態 : \\(.+?p\\) ; \\(.+\\)" (car condition)))
(setq pagetotal (list (match-string 1 (car condition)))
size (list (match-string 2 (car condition)))))
(when journal
(if (string-match "\\(.+?\\) ; 巻号 : \\(.+?\\) ; 掲載ページ : \\(.+\\)"
(car journal))
(setq volume (list (match-string 2 (car journal)))
pages (list (match-string 3 (car journal)))
journal (list (match-string 1 (car journal))))
(setq journal journaltitle)))
;; annotation から、" : "を含む文字列を取り除く。
(if annotation
(setq annotation (delete-if (lambda (x) (string-match " : " x )) annotation)))
;; author がない場合は、authors または authors2 を author にする。
(if (null author)
(if authors (setq author authors)
(setq author authors2)))
(if (null sortname) (setq sortname sortname2))
;; date があれば4桁表示でないyear は不要。
(if (and date year
(not (string-match "^[0-9]\\{4\\}$" (car year))))
(setq year nil))
;; author, editor,translatorの文字列修正
(ndl-field-reformat author "\\([ -゚]\\) " "\\1, " nil) ;; 論文等では「性 名前」が見られるので「性, 名前」に直す。
(ndl-field-reformat author "\\([ -゚]\\) " "\\1, " nil) ;; 論文等では、「性 名前」が見られるので「性, 名前」に直す。
(ndl-field-reformat author "" ", " " and ")
(ndl-field-reformat editor "" ", " " and ")
(ndl-field-reformat translator "" ", " " and ")
(ndl-field-reformat sortname "" ", " " and ")
(ndl-field-reformat pages "" " -- " nil)
(ndl-field-reformat pages "" " -- " nil)
;; データ出力
(apply 'append
(mapcar (lambda (x)
(if (symbol-value x) `((,x . ,(symbol-value x)))))
bibtex-field-sort-order))))
(defvar ndl-hankaku-preserve-table
'((?” . ?\xe000)
(?{ . ?\xe001)
(?} . ?\xe002)
(?ー . ?\xe003)
(?@ . ?\xe004)))
(define-translation-table 'ndl-hankaku-preserve-to
ndl-hankaku-preserve-table)
(define-translation-table 'ndl-hankaku-preserve-from
(mapcar (lambda (x) (cons (cdr x) (car x))) ndl-hankaku-preserve-table))
(defun ndl-hankaku (string)
"STRINGを半角にする。"
(with-temp-buffer
(setq string (replace-regexp-in-string "" " (" string))
(setq string (replace-regexp-in-string "" ") " string))
(setq string (replace-regexp-in-string "" ", " string))
(insert string)
(translate-region (point-min) (point-max) 'ndl-hankaku-preserve-to)
(japanese-hankaku-region (point-min) (point-max) t)
(translate-region (point-min) (point-max) 'ndl-hankaku-preserve-from)
(buffer-string)))
(defmacro ndl-field-reformat (field start end separator)
"指定されたFIELDの一部の文字を取り出して区切り、または文字列を置換し、
separatorによって繋げる。"
`(setq ,field
',(let ((val (symbol-value field)))
(if (numberp start)
(setq val (mapcar (lambda (x) (substring x start end)) val)))
(if (and (stringp start) (stringp end))
(setq val (mapcar (lambda (x) (replace-regexp-in-string start end x))
val)))
(if (and val separator)
(setq val (list (mapconcat 'identity val separator))))
val)))
(defun ndl-data2bibtex-key (data)
(if (assoc 'isbn data)
(ndl-isbn-normalize (cadr (assoc 'isbn data)))
(if (assoc 'jpno data)
(cadr (assoc 'jpno data))
(replace-regexp-in-string
"[?=]" "_"
(if (assoc 'url data)
(replace-regexp-in-string "^.+?://" "" (cadr (assoc 'url data)))
(replace-regexp-in-string " " "" (cadr (assoc 'title data))))))))
(defun ndl-data2bibtex (data)
"DATAをBibTex用に加工・修正して出力する。"
;; データ修正
(concat
(if (assoc 'journal data) "@article{" "@book{" )
(ndl-data2bibtex-key data) ",\n "
(mapconcat
(lambda (x) (concat (symbol-name (car x)) " = {"
(if (listp (cdr x)) (mapconcat 'ndl-hankaku
(cdr x) ", ") x) "}"))
data ",\n ")
"\n}\n"))
(defun ndl-xml2bibtex (xml)
"XMLに入っている全てのデータをBibTeX用に出力する。"
(mapconcat (lambda (x) (ndl-data2bibtex (ndl-record2data x)))
(ndl-collect-records xml)
""))
;;; サンプルプログラム
;;;###autoload
(defun ndl-jpno2bibtex (jpno)
"JPNO番号からBibTeXエントリを生成する。"
(ndl-xml2bibtex (ndl-porta-query (concat "(jpno=" jpno ")"))))
;;;###autoload
(defun ndl-isbn2bibtex (isbn)
"ISBN番号からBibTeXエントリを生成する。"
(ndl-xml2bibtex (ndl-porta-query
(concat "(isbn=" (ndl-isbn-normalize isbn) ")"))))
(defun ndl-isbn-normalize (isbn)
"ISBN番号のハイフンを取り除く。10桁の場合は13桁にする。"
(let ((isbn (replace-regexp-in-string "-" "" isbn)))
(if (string-match "^[0-9]\\{9\\}[0-9X]$" isbn)
(ndl-isbn-to-13 isbn)
(if (string-match "^[0-9]\\{13\\}$" isbn) isbn
(error "Not proper ISBN format! -- %s" isbn)))))
(defun ndl-isbn-to-13 (isbn)
"10桁ISBN番号を13桁に変換する。それ以外はnilを返す。"
(if (string-match "^[0-9]\\{9\\}[0-9X]$" isbn)
(let ((sum 0)
(isbn (substring (concat "978" isbn) 0 -1)))
(do ((item (string-to-list isbn) (cdr item))
(chck '(1 3 1 3 1 3 1 3 1 3 1 3) (cdr chck)))
((null chck))
(setq sum (+ sum (* (- (car item) ?0) (car chck)))))
(setq sum (mod sum 10))
(concat isbn (if (= sum 0) "0" (number-to-string (- 10 sum)))))))
(defun ndl-isbn-to-10 (isbn)
"13桁ISBN番号を10桁に変換する。それ以外はnilを返す。"
(if (string-match "^978[0-9]\\{10\\}$" isbn)
(let ((sum 0)
(isbn (substring isbn 3 -1)))
(do ((item (string-to-list isbn) (cdr item))
(chck '(10 9 8 7 6 5 4 3 2) (cdr chck)))
((null chck))
(setq sum (+ sum (* (- (car item) ?0) (car chck)))))
(setq sum (- 11 (mod sum 11)))
(concat isbn (if (= sum 11) "0" (if (= sum 10) "X" (number-to-string sum)))))))
;;; reftex.el extra functions
(defun reftex-browse ()
(interactive)
(reftex-citation t))
;;; bibtex.el extra functions
;; BibTeXファイルを、指定したフィールドで並べ替える。
(defun bibtex-sort-buffer-by-field (arg field)
(interactive "P\nsfield = ")
(bibtex-beginning-of-first-entry) ; Needed by `sort-subr'
(sort-subr (not (null arg))
'bibtex-skip-to-valid-entry ; NEXTREC function
'bibtex-end-of-entry ; ENDREC function
(lambda () ; STARTKEY function
(let ((bounds (bibtex-search-forward-field field t)))
(if bounds (bibtex-text-in-field-bounds bounds t) "")))
nil ; ENDKEY function
'string-lessp)) ; PREDICATE
;; 指定したフィールド以外は全て隠す.
(defvar bibtex-view-fields '("title" "author" "editor"))
(defun bibtex-hide-lines ()
(interactive)
(when (require 'hide-lines nil t)
(hide-non-matching-lines
(concat "^ *\\(@\\|"
(mapconcat 'identity bibtex-view-fields
"\\|") "\\)"))
(message "To view hidden lines, do `M-x show-all-invisible'")))
;; BibTeXデータベースの操作
(defun bibtex-update-field (field value)
"現在のBibTeXエントリのFIELDをVALUEに更新・追加する。"
(save-excursion
(let* ((end (bibtex-end-of-entry))
(beg (bibtex-beginning-of-entry))
(bounds (bibtex-search-forward-field field end)))
(if bounds
(progn
(delete-region (bibtex-start-of-text-in-field bounds)
(bibtex-end-of-text-in-field bounds))
(goto-char (bibtex-start-of-text-in-field bounds))
(insert value ))
(bibtex-end-of-entry)
(bibtex-beginning-of-field)
(bibtex-make-field (list field nil value nil) t nil t)
))))
(defun bibtex-update-entry (bibtex-str)
"指定されたBibTeX文字列で、現在のエントリを上書きする。"
(let ((data (with-temp-buffer
(insert bibtex-str) (goto-char (point-min))
(bibtex-parse-entry))))
(dolist (item data)
(if (not (string-match "=" (car item)))
(bibtex-update-field (car item) (cdr item))))))
(defun bibtex-update-entry-by-isbn ()
"日本のISBN番号に基づいて、現在のBibTeXエントリを作り直す。"
(interactive)
(require 'ndl-porta)
(save-excursion
(let* (isbn jpno)
(bibtex-beginning-of-entry)
(setq isbn (bibtex-search-forward-field "isbn" t))
(if isbn
(bibtex-update-entry
(ndl-isbn2bibtex
(bibtex-text-in-field-bounds isbn t)))
(bibtex-beginning-of-entry)
(setq jpno (bibtex-search-forward-field "jpno" t))
(if jpno
(bibtex-update-entry
(ndl-jpno2bibtex
(bibtex-text-in-field-bounds jpno t))))))))
;; BibTexファイルを並べ替える。
(defun bibtex-field-sort-key (str)
"STR1のソートキーを求める。"
(let* ((word (progn (string-match "\\sw+" str) (match-string 0 str)))
(pos (position (intern word) bibtex-field-sort-order)))
(if pos (format "@%02d" pos) word)))
(defun bibtex-sort-fields ()
"現在のfieldの順番を並び替える。"
(interactive)
(save-excursion
(save-restriction
(let (beg-fields end-fields bounds alist)
(bibtex-beginning-of-entry)
(search-forward ",")
(goto-char (match-beginning 0))
(setq beg-fields (point))
(while (setq bounds (bibtex-parse-field))
(setq alist (cons (buffer-substring-no-properties
(caar bounds) (elt bounds 2))
alist))
(goto-char (setq end-fields (elt bounds 2))))
(goto-char beg-fields)
(delete-region beg-fields end-fields)
(apply 'insert (sort alist (lambda (x y)
(string< (bibtex-field-sort-key x)
(bibtex-field-sort-key y)))))
alist))))
;;; bibtex extra function ends here.
(provide 'ndl-porta)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment