Skip to content

Instantly share code, notes, and snippets.

@floe
Last active October 17, 2018 08:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save floe/bcc5579c4cca081a3ed888b7889c2c3e to your computer and use it in GitHub Desktop.
Save floe/bcc5579c4cca081a3ed888b7889c2c3e to your computer and use it in GitHub Desktop.
Export reading list from ACM SIGCHI Android App
#!/usr/bin/python3
# written at 7am before coffee. don't @ me.
import os
import re
import tarfile
import sqlite3
import subprocess
import urllib.request
dbfile = "apps/org.sigchi/db/conference_db"
cachepath = "cache.html"
# conference proceedings (proc. and adjunct)
# example ids here are for UIST 2018
# "https://dl.acm.org/citation.cfm?id=3266037&preflayout=flat",
# "https://dl.acm.org/citation.cfm?id=3242587&preflayout=flat"
confdata = [
"http://uist.acm.org/uist2018/pages/toc.html",
"http://uist.acm.org/uist2018/pages/toca.html"
]
# create the backup file
subprocess.run("adb backup -f org.sigchi.ab org.sigchi",shell=True,check=True)
# convert to gzip by changing header
with open("org.sigchi.ab","rb") as abfile:
data = abfile.read()
#print("Original header:" + str(data[0:24]))
header = bytes([ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 ])
with open("org.sigchi.tgz","wb") as tgzfile:
tgzfile.write(header+data[24:])
# untar https://docs.python.org/3.4/library/tarfile.html?highlight=tar
tar = tarfile.open("org.sigchi.tgz")
tar.extract(dbfile)
tar.close()
# get reading list from dbfile
# https://sebastianraschka.com/Articles/2014_sqlite_in_python_tutorial.html
papers = []
db = sqlite3.connect(dbfile)
cur = db.cursor()
cur.execute("SELECT * FROM MY_READING_MODEL")
rows = cur.fetchall()
for row in rows:
pid = '"'+row[1]+"-"+row[2]+'"'
cur.execute("SELECT TITLE,TYPE,EXTERNAL_ID,SIMPLE_AUTHOR_LIST FROM PAPER_MODEL WHERE ID="+pid)
result = cur.fetchall()[0]
papers.append(result)
# get the proceedings index from urls or cache (if available)
html = ""
if os.path.isfile(cachepath):
with open(cachepath,"r") as cachefile:
html = cachefile.read()
else:
for url in confdata:
with urllib.request.urlopen(url) as response:
html += response.read().decode("utf-8")
with open(cachepath,"w") as cachefile:
cachefile.write(html)
# match titles to DOIs/URLs via proceedings index
for paper in papers:
print(paper[0]+" -> ",end="")
regexp = 'href="(.*?)".*?'+paper[0]
paper_url = re.search(regexp,html)
print(paper_url.group(1) if paper_url else "")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment