Created
May 7, 2022 23:19
-
-
Save dangom/a797aaa65f97302935c22072ddf60337 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; to download all abstracts: | |
;;; parallel wget --no-clobber --no-parent -r "https://submissions.mirasmart.com/ISMRM2022/Itinerary/Files/PDFFiles/{}.html" ::: $(seq -f "%04g" 00 10000) | |
;;; Make a simple index so I can search through abstracts in emacs | |
;;; So that other people can use it, I have put this into an index.html and use a js search framework that mimics completing-read. | |
;;; See for example https://cheatcode.co/tutorials/how-to-implement-client-side-search-with-fuse-js | |
;;; | |
;; List and open any file on eww | |
(defvar ismrm-abstract-dir "/Users/dg/ismrm2022/PROGRAM/") | |
(defun ismrm--find-abstract-file () | |
(interactive) | |
(let ((default-directory ismrm-abstract-dir)) | |
(concat "file://" | |
(expand-file-name (read-file-name "Choose abstract:"))))) | |
(defun ismrm-find-abstract () | |
(interactive) | |
(eww-browse-url (call-interactively #'ismrm--find-abstract-file))) | |
;; Extract title from html. Title is the content of the first span with class submissionTitle | |
(defun read-html-into-dom (html) | |
(with-temp-buffer | |
(insert-file-contents html) | |
(libxml-parse-html-region (point-min) (point-max)))) | |
;; same as above, but before parsing the region, narrow it to the div with class | |
;; col-lg-8, excluding anything beyond the end of that div | |
(defun read-html-into-dom-narrow (html) | |
(with-temp-buffer | |
(insert-file-contents html) | |
(goto-char (point-min)) | |
(search-forward "<div class=\"col-lg-8") | |
(narrow-to-region (point) (search-forward " </div>")) | |
(libxml-parse-html-region (point-min) (point-max)))) | |
(defun ismrm-cleanup-whitespace (string) | |
(replace-regexp-in-string "[ \t\n]+" " " string)) | |
(defun ismrm-extract-title (html) | |
(let ((title (car (dom-by-class (read-html-into-dom-narrow html) "submissionTitle")))) | |
(if title | |
(ismrm-cleanup-whitespace (replace-regexp-in-string "\n" " " (dom-text title))) | |
"No title"))) | |
(defun ismrm-extract-author (html) | |
(let ((abstract (car (dom-by-class (read-html-into-dom-narrow html) "AffiliationBlockContainer")))) | |
(if abstract | |
(ismrm-cleanup-whitespace | |
(replace-regexp-in-string "<[^>]*>" "" (replace-regexp-in-string "\n" " " (dom-text abstract)))) | |
"No author"))) | |
(use-package dash) | |
(defun construct-abstract-file-alist () | |
(let ((files (mapcar #'expand-file-name (directory-files ismrm-abstract-dir t ".*\\.html$")))) | |
(-zip (mapcar #'ismrm-extract-title files) files))) | |
(setq conf-abstract-file-alist (construct-abstract-file-alist)) | |
;; Cleanup whitespace in string | |
(defun ismrm-cleanup-whitespace (string) | |
(replace-regexp-in-string "[ \t\n]+" " " string)) | |
;; Convert the alist to json and save to file called /tmp/abstracts.json | |
(defun ismrm-save-abstracts-json () | |
(interactive) | |
(let ((json-encoding-pretty-print t)) | |
(with-temp-file "/tmp/abstracts.json" | |
(insert (json-encode conf-abstract-file-alist))))) | |
(defun ismrm-abstract-alist-to-json (alist) | |
(let ((json-encoding-pretty-print t)) | |
(json-encode alist))) | |
(defun abstract-file-name (title) | |
(cdr (assoc title conf-abstract-file-alist))) | |
(defun ismrm-open-abstract (title) | |
"Open abstract in eww. TITLE is the title of the abstract." | |
(interactive (list (completing-read "Choose abstract: " conf-abstract-file-alist))) | |
(let ((file (abstract-file-name title)) | |
(eww-header-line-format (concat (abstract-file-name title) " - " title))) | |
(if file | |
(eww-browse-url (concat "file://" file)) | |
(message "No abstract for %s" title)))) | |
;; Create a json object from all abstracts, with title, author and filename as properties | |
(defun ismrm-abstracts-to-json () | |
(interactive) | |
(let ((json-encoding-pretty-print t)) | |
(with-temp-file "/tmp/abstracts.json" | |
(insert (json-encode (mapcar (lambda (x) (list (car x) (ismrm-extract-author (cdr x)) (abstract-file-name (car x)))) conf-abstract-file-alist)))))) | |
(defun ismrm-abstracts-to-json-list () | |
(interactive) | |
(let ((json-encoding-pretty-print t)) | |
(with-temp-file "/tmp/abstracts.json" | |
(insert (json-encode-plist (mapcar (lambda (x) (list "" :name (car x) :author (ismrm-extract-author (cdr x)) :filename (abstract-file-name (car x)))) conf-abstract-file-alist)))))) | |
(defun ismrm-abstracts-to-json-list-with-id () | |
(interactive) | |
(let ((ismrm-abstract-id -1) | |
(json-encoding-pretty-print t)) | |
(with-temp-file "/tmp/abstracts.json" | |
(insert (json-encode-plist (mapcar (lambda (x) (list "" :id (cl-incf ismrm-abstract-id) :name (car x) :author (ismrm-extract-author (cdr x)) :filename (abstract-file-name (car x)))) conf-abstract-file-alist)))))) | |
(defun ismrm-remove-duplicate-tr-tags () | |
(interactive) | |
(save-excursion | |
(goto-char (point-min)) | |
(while (re-search-forward "</tr>\n<tr>" nil t) | |
(replace-match "")))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment