Skip to content

Instantly share code, notes, and snippets.

@vishnubob
Created August 25, 2015 20:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vishnubob/b89fbc28e0217da4ac8b to your computer and use it in GitHub Desktop.
Save vishnubob/b89fbc28e0217da4ac8b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: latin-1 -*-
import google
import requests
import magic
def get_pdf(url, title):
print url
pdf = requests.get(url)
with magic.Magic() as m:
res = m.id_buffer(pdf.content)
if 'pdf' not in res.lower():
return False
f = open(title + ".pdf", 'w')
f.write(pdf.content)
return True
refs = "references.txt"
refs = open(refs)
for line in refs:
line = line.strip()
if not line:
continue
if line[0] != '[':
continue
stuff = line.split(',')
for elem in stuff:
if '.' in elem:
continue
title = elem.strip()
break
if title[:3] == "“":
title = title[3:]
search_results = google.search("pdf " + line)
for res in search_results:
if get_pdf(res, title):
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment