floe/sigchi_conf_dump.py

## sigchi_conf_dump.py
#!/usr/bin/python3

# written at 7am before coffee. don't @ me.

import os
import re
import tarfile
import sqlite3
import subprocess
import urllib.request

dbfile = "apps/org.sigchi/db/conference_db"
cachepath = "cache.html"

# conference proceedings (proc. and adjunct)
# example ids here are for UIST 2018
#    "https://dl.acm.org/citation.cfm?id=3266037&preflayout=flat",
#    "https://dl.acm.org/citation.cfm?id=3242587&preflayout=flat"
confdata = [
    "http://uist.acm.org/uist2018/pages/toc.html",
    "http://uist.acm.org/uist2018/pages/toca.html"
]

# create the backup file
subprocess.run("adb backup -f org.sigchi.ab org.sigchi",shell=True,check=True)

# convert to gzip by changing header
with open("org.sigchi.ab","rb") as abfile:
    data = abfile.read()
    #print("Original header:" + str(data[0:24]))

header = bytes([ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 ])
with open("org.sigchi.tgz","wb") as tgzfile:
    tgzfile.write(header+data[24:])

# untar https://docs.python.org/3.4/library/tarfile.html?highlight=tar
tar = tarfile.open("org.sigchi.tgz")
tar.extract(dbfile)
tar.close()

# get reading list from dbfile
# https://sebastianraschka.com/Articles/2014_sqlite_in_python_tutorial.html
papers = []
db = sqlite3.connect(dbfile)
cur = db.cursor()
cur.execute("SELECT * FROM MY_READING_MODEL")
rows = cur.fetchall()
for row in rows:
    pid = '"'+row[1]+"-"+row[2]+'"'
    cur.execute("SELECT TITLE,TYPE,EXTERNAL_ID,SIMPLE_AUTHOR_LIST FROM PAPER_MODEL WHERE ID="+pid)
    result = cur.fetchall()[0]
    papers.append(result)

# get the proceedings index from urls or cache (if available)
html = ""
if os.path.isfile(cachepath):
    with open(cachepath,"r") as cachefile:
        html = cachefile.read()
else:
    for url in confdata:
        with urllib.request.urlopen(url) as response:
            html += response.read().decode("utf-8")
    with open(cachepath,"w") as cachefile:
        cachefile.write(html)

# match titles to DOIs/URLs via proceedings index
for paper in papers:
    print(paper[0]+" -> ",end="")
    regexp = 'href="(.*?)".*?'+paper[0]
    paper_url = re.search(regexp,html)
    print(paper_url.group(1) if paper_url else "")
	#!/usr/bin/python3

	# written at 7am before coffee. don't @ me.

	import os
	import re
	import tarfile
	import sqlite3
	import subprocess
	import urllib.request

	dbfile = "apps/org.sigchi/db/conference_db"
	cachepath = "cache.html"

	# conference proceedings (proc. and adjunct)
	# example ids here are for UIST 2018
	# "https://dl.acm.org/citation.cfm?id=3266037&preflayout=flat",
	# "https://dl.acm.org/citation.cfm?id=3242587&preflayout=flat"
	confdata = [
	"http://uist.acm.org/uist2018/pages/toc.html",
	"http://uist.acm.org/uist2018/pages/toca.html"
	]

	# create the backup file
	subprocess.run("adb backup -f org.sigchi.ab org.sigchi",shell=True,check=True)

	# convert to gzip by changing header
	with open("org.sigchi.ab","rb") as abfile:
	data = abfile.read()
	#print("Original header:" + str(data[0:24]))

	header = bytes([ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 ])
	with open("org.sigchi.tgz","wb") as tgzfile:
	tgzfile.write(header+data[24:])

	# untar https://docs.python.org/3.4/library/tarfile.html?highlight=tar
	tar = tarfile.open("org.sigchi.tgz")
	tar.extract(dbfile)
	tar.close()

	# get reading list from dbfile
	# https://sebastianraschka.com/Articles/2014_sqlite_in_python_tutorial.html
	papers = []
	db = sqlite3.connect(dbfile)
	cur = db.cursor()
	cur.execute("SELECT * FROM MY_READING_MODEL")
	rows = cur.fetchall()
	for row in rows:
	pid = '"'+row[1]+"-"+row[2]+'"'
	cur.execute("SELECT TITLE,TYPE,EXTERNAL_ID,SIMPLE_AUTHOR_LIST FROM PAPER_MODEL WHERE ID="+pid)
	result = cur.fetchall()[0]
	papers.append(result)

	# get the proceedings index from urls or cache (if available)
	html = ""
	if os.path.isfile(cachepath):
	with open(cachepath,"r") as cachefile:
	html = cachefile.read()
	else:
	for url in confdata:
	with urllib.request.urlopen(url) as response:
	html += response.read().decode("utf-8")
	with open(cachepath,"w") as cachefile:
	cachefile.write(html)

	# match titles to DOIs/URLs via proceedings index
	for paper in papers:
	print(paper[0]+" -> ",end="")
	regexp = 'href="(.?)".?'+paper[0]
	paper_url = re.search(regexp,html)
	print(paper_url.group(1) if paper_url else "")