Skip to content

Instantly share code, notes, and snippets.

@searls
Last active January 27, 2018 18:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save searls/47f67fd033f98a9eeef9dce178b7062d to your computer and use it in GitHub Desktop.
Save searls/47f67fd033f98a9eeef9dce178b7062d to your computer and use it in GitHub Desktop.
A little Ruby script to download WaniKani's ~2000 kanji and persist them in ActiveRecord
#!/usr/bin/env ruby
require_relative "../config/environment"
def kanji_at(level)
api_key = ENV['WANIKANI_API_KEY']
uri = URI("https://www.wanikani.com/api/user/#{api_key}/kanji/#{level}")
response = JSON.parse(Net::HTTP.get(uri))
raise response["error"]["message"] if response.has_key?("error")
return response["requested_information"]
end
(1..60).each do |level|
puts "Processing level #{level}"
kanji_at(level).each do |kanji|
kanji_props = kanji.with_indifferent_access.except(:user_specific)
WanikaniKanji.find_or_create_by!(kanji_props.slice(:character)) do |wk_kanji|
wk_kanji.assign_attributes(kanji_props)
end
end
end
#!/usr/bin/env ruby
require_relative "../config/environment"
require 'mojinizer'
puts "Sentences to process: #{Sentence.count}"
wanikani_kanji = WanikaniKanji.all
WK_MAX_LEVEL = 60
missing_wk_kanji = []
level_counts = Hash.new(0)
Sentence.where(:language => 'jpn').find_each.with_index do |sentence, i|
sentence_kanji = sentence.text.scan(/[\p{Han}]/).uniq
max_kanji_level = sentence_kanji.map do |kanji|
wk_kanji = wanikani_kanji.find { |wk_kanji| wk_kanji.character == kanji }
if wk_kanji.present?
wk_kanji.level
else
missing_wk_kanji << kanji
WK_MAX_LEVEL + 1 #<-- Gotta assume we consider these sentences "hard"
end
end.max || 0
level_counts[max_kanji_level] += 1
sentence.update!(:wk_level => max_kanji_level)
puts "Processed #{i+1} sentences" if (i+1) % 10_000 == 0
end
puts "Total occurrences of kanji unknown to Wanikani: #{missing_wk_kanji.size}"
puts "Unique kanji unknown to Wanikani: #{missing_wk_kanji.uniq.size}"
puts "Sentence count by WK level:"
level_counts.keys.sort.each do |level|
puts " #{level} => #{level_counts[level]}"
end
$ ./script/add_wk_kanji_level_to_sentences
Sentences to process: 891800
Processed 10000 sentences
Processed 20000 sentences
Processed 30000 sentences
Processed 40000 sentences
Processed 50000 sentences
Processed 60000 sentences
Processed 70000 sentences
Processed 80000 sentences
Processed 90000 sentences
Processed 100000 sentences
Processed 110000 sentences
Processed 120000 sentences
Processed 130000 sentences
Processed 140000 sentences
Processed 150000 sentences
Processed 160000 sentences
Processed 170000 sentences
Processed 180000 sentences
Total occurrences of kanji unknown to Wanikani: 5785
Unique kanji unknown to Wanikani: 919
Sentence count by WK level:
0 => 2062
1 => 392
2 => 729
3 => 743
4 => 1299
5 => 2945
6 => 2439
7 => 1831
8 => 3775
9 => 4141
10 => 3844
11 => 2774
12 => 3221
13 => 2475
14 => 9123
15 => 4313
16 => 4693
17 => 2550
18 => 3744
19 => 2563
20 => 3024
21 => 2527
22 => 3070
23 => 2457
24 => 3013
25 => 2677
26 => 3150
27 => 3862
28 => 2181
29 => 1783
30 => 2794
31 => 2476
32 => 3474
33 => 2312
34 => 2640
35 => 40652
36 => 2788
37 => 2342
38 => 3201
39 => 1849
40 => 1325
41 => 1234
42 => 1813
43 => 3303
44 => 3882
45 => 2032
46 => 1922
47 => 975
48 => 2170
49 => 1073
50 => 1658
51 => 1190
52 => 1341
53 => 444
54 => 539
55 => 443
56 => 676
57 => 302
58 => 319
59 => 750
60 => 224
61 => 5111
#!/usr/bin/env ruby
require_relative "../config/environment"
def sentences_at(level)
Sentence.
where(:language => 'jpn', :wk_level => level).
joins(:translations).
group('sentences.id')
end
require "csv"
MAX_WK_LEVEL=60
presents_token = PresentsToken.new
path = "tmp/anki-sentence-deck.csv"
CSV.open(path, "wb") do |csv|
(1..MAX_WK_LEVEL).each do |level|
puts "Building sentences for level #{level}"
sentences_at(level).each do |sentence|
furigana = sentence.tokens.map {|t| presents_token.call(t).furigana_tokens}
reading = furigana.map do |furigana_pairs|
furigana_pairs.map do |furigana_pair|
"<ruby><rb>#{furigana_pair.text}<rt>#{furigana_pair.furigana}</ruby>"
end.join
end.join
translations = sentence.translations.select {|t| t.translation.english?}
url_escaped_sentence = ERB::Util.u(sentence.text)
back = <<~BACK
<h3>Reading:</h3>
<p>
#{reading}
</p>
<hr/>
<h3>Translation#{translations.size != 1 ? 's' : ''}:</h3>
<ul style="text-align: left">
#{translations.map {|t| "<li>#{t.text}</li>"}.join}
</ul>
<hr/>
<h3>Actions:</h3>
<ul style="text-align: left">
<li><a href="http://translate.google.com/#ja/en/#{url_escaped_sentence}">Google Translate</a></li>
<li><a href="midori://translate?text=#{url_escaped_sentence}">Midori.app</a></li>
<li><a href="japanese://search/#{url_escaped_sentence}">Japanese.app</a></li>
</ul>
BACK
tags = ["wanikani-level-#{level.to_s.rjust(2, "0")}"]
tags += (level..MAX_WK_LEVEL).map do |higher_level|
"wanikani-all-#{higher_level.to_s.rjust(2, "0")}"
end
csv << [sentence.text, back, tags.join(" ")]
end
end
end
puts "Deck written to #{"tmp/anki-sentence-deck.csv"}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment