Skip to content

Instantly share code, notes, and snippets.

@dangom
Created May 7, 2022 23:19
Show Gist options
  • Save dangom/a797aaa65f97302935c22072ddf60337 to your computer and use it in GitHub Desktop.
Save dangom/a797aaa65f97302935c22072ddf60337 to your computer and use it in GitHub Desktop.
;;; to download all abstracts:
;;; parallel wget --no-clobber --no-parent -r "https://submissions.mirasmart.com/ISMRM2022/Itinerary/Files/PDFFiles/{}.html" ::: $(seq -f "%04g" 00 10000)
;;; Make a simple index so I can search through abstracts in emacs
;;; So that other people can use it, I have put this into an index.html and use a js search framework that mimics completing-read.
;;; See for example https://cheatcode.co/tutorials/how-to-implement-client-side-search-with-fuse-js
;;;
;; List and open any file on eww
(defvar ismrm-abstract-dir "/Users/dg/ismrm2022/PROGRAM/")
(defun ismrm--find-abstract-file ()
(interactive)
(let ((default-directory ismrm-abstract-dir))
(concat "file://"
(expand-file-name (read-file-name "Choose abstract:")))))
(defun ismrm-find-abstract ()
(interactive)
(eww-browse-url (call-interactively #'ismrm--find-abstract-file)))
;; Extract title from html. Title is the content of the first span with class submissionTitle
(defun read-html-into-dom (html)
(with-temp-buffer
(insert-file-contents html)
(libxml-parse-html-region (point-min) (point-max))))
;; same as above, but before parsing the region, narrow it to the div with class
;; col-lg-8, excluding anything beyond the end of that div
(defun read-html-into-dom-narrow (html)
(with-temp-buffer
(insert-file-contents html)
(goto-char (point-min))
(search-forward "<div class=\"col-lg-8")
(narrow-to-region (point) (search-forward " </div>"))
(libxml-parse-html-region (point-min) (point-max))))
(defun ismrm-cleanup-whitespace (string)
(replace-regexp-in-string "[ \t\n]+" " " string))
(defun ismrm-extract-title (html)
(let ((title (car (dom-by-class (read-html-into-dom-narrow html) "submissionTitle"))))
(if title
(ismrm-cleanup-whitespace (replace-regexp-in-string "\n" " " (dom-text title)))
"No title")))
(defun ismrm-extract-author (html)
(let ((abstract (car (dom-by-class (read-html-into-dom-narrow html) "AffiliationBlockContainer"))))
(if abstract
(ismrm-cleanup-whitespace
(replace-regexp-in-string "<[^>]*>" "" (replace-regexp-in-string "\n" " " (dom-text abstract))))
"No author")))
(use-package dash)
(defun construct-abstract-file-alist ()
(let ((files (mapcar #'expand-file-name (directory-files ismrm-abstract-dir t ".*\\.html$"))))
(-zip (mapcar #'ismrm-extract-title files) files)))
(setq conf-abstract-file-alist (construct-abstract-file-alist))
;; Cleanup whitespace in string
(defun ismrm-cleanup-whitespace (string)
(replace-regexp-in-string "[ \t\n]+" " " string))
;; Convert the alist to json and save to file called /tmp/abstracts.json
(defun ismrm-save-abstracts-json ()
(interactive)
(let ((json-encoding-pretty-print t))
(with-temp-file "/tmp/abstracts.json"
(insert (json-encode conf-abstract-file-alist)))))
(defun ismrm-abstract-alist-to-json (alist)
(let ((json-encoding-pretty-print t))
(json-encode alist)))
(defun abstract-file-name (title)
(cdr (assoc title conf-abstract-file-alist)))
(defun ismrm-open-abstract (title)
"Open abstract in eww. TITLE is the title of the abstract."
(interactive (list (completing-read "Choose abstract: " conf-abstract-file-alist)))
(let ((file (abstract-file-name title))
(eww-header-line-format (concat (abstract-file-name title) " - " title)))
(if file
(eww-browse-url (concat "file://" file))
(message "No abstract for %s" title))))
;; Create a json object from all abstracts, with title, author and filename as properties
(defun ismrm-abstracts-to-json ()
(interactive)
(let ((json-encoding-pretty-print t))
(with-temp-file "/tmp/abstracts.json"
(insert (json-encode (mapcar (lambda (x) (list (car x) (ismrm-extract-author (cdr x)) (abstract-file-name (car x)))) conf-abstract-file-alist))))))
(defun ismrm-abstracts-to-json-list ()
(interactive)
(let ((json-encoding-pretty-print t))
(with-temp-file "/tmp/abstracts.json"
(insert (json-encode-plist (mapcar (lambda (x) (list "" :name (car x) :author (ismrm-extract-author (cdr x)) :filename (abstract-file-name (car x)))) conf-abstract-file-alist))))))
(defun ismrm-abstracts-to-json-list-with-id ()
(interactive)
(let ((ismrm-abstract-id -1)
(json-encoding-pretty-print t))
(with-temp-file "/tmp/abstracts.json"
(insert (json-encode-plist (mapcar (lambda (x) (list "" :id (cl-incf ismrm-abstract-id) :name (car x) :author (ismrm-extract-author (cdr x)) :filename (abstract-file-name (car x)))) conf-abstract-file-alist))))))
(defun ismrm-remove-duplicate-tr-tags ()
(interactive)
(save-excursion
(goto-char (point-min))
(while (re-search-forward "</tr>\n<tr>" nil t)
(replace-match ""))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment