Created
December 27, 2014 14:51
-
-
Save tommasoturchi/f242d2d635db9548a99f to your computer and use it in GitHub Desktop.
k2qbook
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
#!/usr/bin/env python | |
import csv | |
import webbrowser | |
import sys | |
import urllib | |
if len(sys.argv)>1: | |
with open("kindle.csv", "r") as f: | |
reader = list(csv.reader(f, delimiter=',', quotechar='|')) | |
if int(sys.argv[1]) > len(reader): | |
print "Done." | |
else: | |
webbrowser.open('quotebook://x-callback-url/add?quote=' + urllib.quote(reader[int(sys.argv[1])][2].encode('utf-8')) + '&author=' + urllib.quote(reader[int(sys.argv[1])][1].encode('utf-8')) + '&source=' + urllib.quote(reader[int(sys.argv[1])][0].encode('utf-8')) + '&x-success=pythonista%3A//k2quotebook%3Faction%3Drun%26argv%3D' + str(int(sys.argv[1])+1)) | |
else: | |
with open("kindle.csv", "r") as f: | |
reader = list(csv.reader(f, delimiter=',', quotechar='|')) | |
webbrowser.open('quotebook://x-callback-url/add?quote=' + urllib.quote(reader[0][2].encode('utf-8')) + '&author=' + urllib.quote(reader[0][1].encode('utf-8')) + '&source=' + urllib.quote(reader[0][0].encode('utf-8')) + '&x-success=pythonista%3A//k2quotebook%3Faction%3Drun%26argv%3D1') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
import sys | |
from bs4 import BeautifulSoup | |
from optparse import OptionParser | |
import mechanize | |
from dateutil.parser import parse | |
xstr = lambda s: s or "" | |
def printx(str,f): | |
f.write(str +'\n') | |
def printHighlights(resp,file): | |
soup = BeautifulSoup(resp.read().decode('utf-8')) | |
title = soup.find("span", {"class":'title'}) | |
if title: | |
book_title = re.sub(r'\(.*?\)', '', xstr(title.string).strip()).rstrip() | |
book_author = xstr(soup.find("span", {"class":'author'}).string).strip().rstrip() | |
lasthighlighted = parse(xstr(soup.find("div", {"class":'lastHighlighted'}).string.encode('utf-8')).strip(), fuzzy=True) | |
print "::: Processing %s, %s" % (book_title , book_author[3:]) | |
for highlight in soup.findAll("span", {"class": "highlight"}): | |
printx ("|" + book_title + "|,|" + book_author[3:] + "|,|" + highlight.string.rstrip() + "|",file) | |
if __name__ == "__main__": | |
f = open('kindle.csv','w') | |
# Browser | |
br = mechanize.Browser() | |
br.set_handle_robots(False) | |
# User-Agent (this is cheating, ok?) | |
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] | |
try: | |
sign_in = br.open("https://kindle.amazon.com/your_highlights") | |
sign_in.set_data(re.sub('<!DOCTYPE(.*)>', '', sign_in.get_data())) | |
br.set_response(sign_in) | |
br.select_form(name="signIn") | |
br["email"] = '' | |
br["password"] = '' | |
resp = br.submit() | |
print "::: Logging in to Amazon" | |
printHighlights(resp,f) | |
while True: | |
resp = br.follow_link(text='Next Book') | |
printHighlights(resp,f) | |
except mechanize._mechanize.LinkNotFoundError: | |
# Ignore this exceotion | |
print "::: No more books" | |
except Exception, e: | |
print >>sys.stderr, "Error logging in to AWS" | |
raise | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment