Last active
July 25, 2016 20:19
-
-
Save Overload119/a185cb87e9ebd3c6125fa11d9b4800b1 to your computer and use it in GitHub Desktop.
I needed a way to sync highlights and vocabulary from my Kindle to my Anki. This would help me learn new words, but also remember the interesting parts of the books I read. I wrote up some utility functions in Ruby, it stores a local version of what you've already synced then uses AnkIWeb to sync. It uses a dictionary API to get the definition o…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'mechanize' | |
require 'sqlite3' | |
require 'wordnet-defaultdb' | |
require 'wordnet' | |
LOCAL_DB_PATH = '<yourpath>.db'.freeze | |
WORDS_TABLE_NAME = 'words_v2'.freeze | |
HIGHLIGHTS_TABLE_NAME = 'highlights_v2'.freeze | |
USERNAME = '<your username>'.freeze | |
PASSWORD = '<your password>'.freeze | |
# Some parameter the API uses, not sure what it does (might be the card ID?) | |
MID = 1446933620314 | |
@db = SQLite3::Database.new(LOCAL_DB_PATH) | |
@agent = Mechanize.new | |
page = @agent.get("https://ankiweb.net/account/login") | |
page.form_with(action: "https://ankiweb.net/account/login") do |form| | |
form.username = USERNAME | |
form.password = PASSWORD | |
end.click_button | |
@wordnet = WordNet::Lexicon.new | |
# Initialize the DB. | |
if @db.execute("SELECT * FROM sqlite_master WHERE name ='#{WORDS_TABLE_NAME}' and type='table'").empty? | |
@db.execute( | |
"CREATE TABLE #{WORDS_TABLE_NAME} ( | |
word VARCHAR(255) NOT NULL, | |
synced INTEGER DEFAULT 0 | |
);" | |
) | |
puts "Created table '#{WORDS_TABLE_NAME}'..." | |
end | |
if @db.execute("SELECT * FROM sqlite_master WHERE name ='#{HIGHLIGHTS_TABLE_NAME}' and type='table'").empty? | |
@db.execute( | |
"CREATE TABLE #{HIGHLIGHTS_TABLE_NAME} ( | |
timestamp INTEGER, | |
highlight TEXT NOT NULL, | |
synced INTEGER DEFAULT 0 | |
);" | |
) | |
puts "Created table '#{HIGHLIGHTS_TABLE_NAME}'..." | |
end | |
def synced_highlight?(highlight_id) | |
rows = @db.execute( | |
"SELECT * FROM #{HIGHLIGHTS_TABLE_NAME} WHERE timestamp = ?", | |
highlight_id | |
) | |
!rows.empty? | |
end | |
def synced_word?(word) | |
rows = @db.execute( | |
"SELECT * FROM #{WORDS_TABLE_NAME} WHERE word = ? AND synced = 1", | |
word | |
) | |
!rows.empty? | |
end | |
def api_save_card(deck_name, front, back, tag = '') | |
@agent.post( | |
'https://ankiweb.net/edit/save', | |
data: [[front, back], tag], | |
deck: deck_name, | |
mid: MID | |
) | |
end | |
def api_get_back_of_card_for_word(word) | |
results = @wordnet.lookup_synsets(word.downcase) | |
return nil if results.nil? || results.empty? | |
back_of_card = '' | |
results.each do |synset| | |
other_words = synset.words.map(&:to_s) | |
back_of_card << | |
"#{synset.part_of_speech}: #{synset.definition} : #{other_words}\n\n" | |
end | |
back_of_card | |
end | |
def sync_highlight(deck_name, text, id) | |
puts "Sync highlight ... '#{text[0 .. 140]}...'" | |
api_save_card(deck_name, text, '', 'books') | |
@db.execute( | |
"INSERT INTO #{HIGHLIGHTS_TABLE_NAME} (highlight, synced, timestamp) | |
VALUES (?, ?, ?)", | |
[text, 1, id] | |
) | |
end | |
def sync_word(deck_name, word, text) | |
api_save_card(deck_name, word, text) | |
@db.execute( | |
"INSERT INTO #{WORDS_TABLE_NAME} (word, synced) | |
VALUES (?, ?)", | |
[word, 1] | |
) | |
end | |
def read_words | |
`sqlite3 /Volumes/Kindle/system/vocabulary/vocab.db "SELECT stem FROM WORDS"` | |
end | |
def read_highlights | |
IO.read('/Volumes/Kindle/documents/My Clippings.txt') | |
end | |
def highlight_timestamp_from_line(line) | |
time_segment = line.split('|').last | |
time_segment.gsub!('Added on', '') | |
time_segment.strip! | |
Time.parse(time_segment).to_i | |
end | |
def sync_highlights | |
read_highlights.split('==========')[0..-2].each do |highlight_data| | |
source_title, | |
meta, | |
text = highlight_data.split("\r\n").select { |x| !x.empty? } | |
# Don't sync bookmarks. | |
next if meta =~ /Your Bookmark on/ | |
# Generate a UNIX timestamp for the highlight which we'll use to uniquely | |
# identify it. | |
highlight_timestamp = highlight_timestamp_from_line(meta) | |
next if synced_highlight?(highlight_timestamp) | |
sync_highlight(source_title, text, highlight_timestamp) | |
end | |
end | |
def sync_words | |
read_words.split("\n").each do |word| | |
back = api_get_back_of_card_for_word(word) | |
if back.nil? || synced_word?(word) | |
puts "Skipped #{word}..." | |
next | |
end | |
sync_word('words', word, back) | |
puts "Synced #{word}..." | |
end | |
end | |
puts "Start syncing words...\n" | |
sync_words | |
puts "Start syncing highlights...\n" | |
sync_highlights |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment