-
-
Save sheep0x/c73b5d7ea888672e8c5e to your computer and use it in GitHub Desktop.
tmp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
UA='Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko' | |
echo 'Please make sure you want to download into CWD' >&2 | |
IFS='' read -p 'url> ' bilibili_url # not tested, not sure if it works like raw_input() | |
IFS='' read -p 'fname> ' fname | |
# get e01 https://www.foo.org | |
function get() { | |
wget -U "$UA" -O "$1" -T 10 -t inf "$2" | |
} | |
[[ -f "$fname" ]] && { echo 'File already exists' >&2; exit 2; } # can possibly use --continue though | |
####### from old Anidown | |
tmp=$(sed -e 's|%|%25|g;s|/|%2F|g;s|:|%3A|g;s|?|%3F|g;s|=|%3D|g;s|&|%26|g' <<< "$bilibili_url") | |
flvcd_url="http://www.flvcd.com/parse.php?kw=$tmp&format=" # for bilibili's own video, there is probably always only one definition | |
####### | |
get "parse.php" "$flvcd_url" | |
video_url=$(iconv -f GBK parse.php | ruby -e 'x=$stdin.read[/"([^"]+?acgvideo[^"]+)"/,1]; if x; puts x; else exit 1; end') \ | |
&& get "$fname" "$video_url" | |
prep for R1B OH | |
clean up inst machine tabpages | |
prep VQ | |
#! /usr/bin/env ruby | |
# encoding: UTF-8 | |
# XXX tmp script on LiveUSB. may need improvements | |
# based on the "first, make it work. don't be too fancy" principle, I'm only making it do the exact same thing as VQ | |
=begin | |
module Util | |
self.autoload(:StringIO, 'stringio') | |
# given IO or String, we return the corresponding IO, so that caller | |
# doesn't have to worry about input type | |
def self.ensure_IO(src) | |
if String === src # otherwise, assume IO or File or StringIO | |
require 'stringio' | |
return StringIO.new(src) | |
end | |
src | |
end | |
end | |
=end | |
# not necessarily complete (TODO fill in all basic kana & gana). (further todo: support ancient kana, support transliteration combination, support small tsu, support long sound in kana, support tokidoki-like repeated char mark) | |
KANA_ROMAJI = { | |
?あ => 'a', | |
?い => 'i', | |
?イ => 'I', | |
?う => 'u', | |
?お => 'o', | |
?か => 'ka', | |
?が => 'ga', | |
?き => 'ki', | |
?ぎ => 'gi', | |
?く => 'ku', | |
?ケ => 'KE', | |
?け => 'ke', | |
?げ => 'ge', | |
?こ => 'ko', | |
?ゴ => 'GO', | |
?さ => 'sa', | |
?し => 'shi', | |
?じ => 'ji', | |
?シ => 'SHI', | |
?ス => 'SU', | |
?す => 'su', | |
?ズ => 'ZU', | |
?せ => 'se', | |
?そ => 'so', | |
?タ => 'TA', | |
?た => 'ta', | |
?だ => 'da', | |
?ち => 'chi', | |
?ツ => 'TSU', | |
?つ => 'tsu', | |
?て => 'te', | |
?テ => 'TE', | |
?ト => 'TO', | |
?と => 'to', | |
?ど => 'do', | |
?な => 'na', | |
?の => 'no', | |
?ね => 'ne', | |
?は => 'ha', # XXX no matter wa or ha | |
?ば => 'ba', | |
?ひ => 'hi', | |
?び => 'bi', | |
?ビ => 'BI', | |
?フ => 'FU', | |
?ぶ => 'bu', | |
?プ => 'PU', | |
?ぷ => 'pu', | |
?へ => 'he', | |
?ホ => 'HO', | |
?ぽ => 'po', | |
?ま => 'ma', | |
?み => 'mi', | |
?ミ => 'MI', | |
?マ => 'MA', | |
?ム => 'MU', | |
?や => 'ya', | |
?ゆ => 'yu', | |
?よ => 'yo', | |
?リ => 'RI', | |
?り => 'ri', | |
?ら => 'ra', | |
?る => 'ru', | |
?ル => 'RU', | |
?ワ => 'WA', | |
?ん => 'nx', ?ン => 'NX', # XXX | |
?ァ => '(A)', | |
?ャ => '(YA)', | |
?ゃ => '(ya)', | |
?ゅ => '(yu)', # XXX | |
?ょ => '(yo)', | |
?っ => 'x', # XXX | |
?ー => '-', # XXX | |
#? => '', | |
#? => '', | |
#? => '', | |
} | |
# TODO make test cases to check: | |
# romaji generator that (probably) doesn't crash for valid tango (allowed to crash for invalid ones) | |
# romaji generator that (probably) is conventionally guessable (e.g. tsu/tu, saa/sā up to generator, but sa must be sa) | |
# romaji generator that (probably) generates unambiguous output (e.g. ana can be an'a or a'na, generator must make a distinction) | |
# TODO in addition to romaji that reflect kana, also provide romaji that reflect sound (ha vs wa, wo vs o, he vs e) | |
# TODO textbook explanation vs unambiguous explanation (meaningwise, e.g. work(verb)/work(noun), tetsudai?/tasukeru) vs unambiguous spelling (e.g. otenki/tenki) | |
# (otenki vs tenki is so frustrating... unless you take it as a dice game) | |
# TODO 4+1 style VQ. one katakana word | |
class Tango | |
attr_reader :name, :romaji, :english # "name" is not a good name | |
# TODO I forgot how to define private class methods (although I don't need it here) | |
def self.guess_romaji(tango_str) | |
tango_str.chomp!('(する)') # foo(する) => foo | |
tango_str.each_char.collect do |c| | |
KANA_ROMAJI[c] or raise "Cannot guess romaji for word '#{tango_str}' (confused at #{c})" | |
end.join | |
end | |
def initialize(name, romaji, eng) # romaji does not default to auto-generate | |
@name = name | |
@romaji = romaji || self.class.guess_romaji(name) # TODO how to call class methods? | |
@english = eng | |
end | |
def to_s | |
"< #@name (#@romaji)\t#@english >" | |
end | |
def inspect; self.to_s; end # for debugging | |
end | |
# TODO review how to use module vars and class vars | |
# (seems that in module scope, use @@foo (not foo or @foo). in module funcs, also use @@foo. what about class func and class instance func?) | |
module Goi #語彙 | |
@@words = [] | |
@@ntimes = Hash.new(0) # times picked (currently hacked so that it becomes times answered correctly) | |
def self.load(src, append=false) | |
#src = Util.ensure_IO(src) | |
@@words = [] unless append | |
src.each_line do |l| | |
fields = l.chomp.split(/\t+/) | |
fields.size == 2 and fields[1,0] = [nil] | |
fields.size == 3 or warn 'Bad data line: '+l | |
@@words << Tango.new(*fields) # TODO detect words with same english explanation. detect diff meanings of same word or diff words that have same kana spelling | |
end | |
#require 'pp'; pp @@words | |
#p @@words.size | |
end | |
#def self.pick_words(n) | |
# @@words.sample(n) | |
#end | |
def self.pick_words(n) | |
res = @@words.reject {|w| @@ntimes[w] >= 3 }.sample(n) # TODO magic constant | |
#res += @@words.sample(n-res.size) if res.size < n # could cause duplicates | |
res.each {|w| @@ntimes[w] += 1 } | |
res | |
end | |
end | |
def run_quiz(words) | |
abort 'Quiz must be non-empty (exiting to prevent infinite loop)' if words.empty? | |
words.each {|w| puts w.english } | |
puts | |
score = 0 | |
verdict = ''; extra_info = '' | |
words.each {|w| | |
ans = $stdin.gets.strip | |
if ans == w.romaji | |
score += 1 | |
verdict += '✓' # alternative: 〇× (traditional vs stylish) | |
else | |
verdict += '✗' | |
extra_info += w.name + "\n" | |
Goi.class_variable_get('@@ntimes')[w] -= 1 # XXX tmp hack | |
end | |
} | |
puts "score: #{score}/#{words.size}" | |
puts verdict | |
puts extra_info | |
puts | |
end | |
Goi.load DATA | |
loop do | |
words = Goi.pick_words(5) | |
run_quiz(words) | |
# TODO show progress (drill without progress bar can be scary. I was wondering if the script had bug that loops forever) | |
# (67 words with about 10% fail rate => 67*3*(10/9) = 223 times of answering!) | |
# TODO when done, quit normally. don't quit by getting an empty quiz (which cause aborting) | |
end | |
__END__ | |
おととし year before last | |
さらいねん year after next | |
せんせんげつ month before last | |
せんげつ last month | |
こんげつ this month | |
らいげつ next month | |
さらいげつ month after next | |
せんせんしゅう week before last | |
さらいしゅう week after next | |
おととい day before yesterday | |
あさって day after tomorrow | |
あきはばら Akihabara | |
あきやま Akiyama | |
あさくさ Asakusa | |
あびる(シャワーを) abiru take (a shower) | |
うける(しけんを) ukeru take (an exam) | |
うれしい(1st person) ureshii happy, pleased | |
おてら Buddhist temple | |
おてんき weather | |
おどる dance | |
おまつり festival | |
おみやげ gift | |
かきかた way of writing | |
きっぷ ticket | |
くうこう airport | |
さくら cherry blossom/tree | |
さびしい lonely | |
さま suffix (honored person) | |
さんぽ(する) stroll | |
じつは to tell the truth | |
シャワー shower | |
しょうがっこう elementary school | |
じんじゃ Shinto shrine | |
スーツケース suitcase | |
そつぎょう(する) graduation | |
たすける help | |
タンゴ Tango | |
ちゅうがっこう junior high school | |
つかいかた usage, way to use | |
つくりかた way of making | |
てら Buddhist temple | |
てんき weather | |
とき time, when | |
なつ summer | |
のる ride, get on | |
はしる run | |
はたらく work | |
はる spring | |
はるやすみ spring vacation | |
ビートルズ The Beatles | |
びょうき illness | |
ひるね(する) nap | |
プリンター printer | |
ぶん sentence | |
ぶんぽう grammar | |
へんじ(する) reply | |
ホームステイ(する) homestay | |
ホストファミリー host family | |
マイル mile | |
まつり festival | |
みやげ gift | |
みんな everyone | |
ゆき snow | |
ゆきだるま snowman | |
よみかた way of reading | |
よろこぶ(3rd person) yorokobu be happy, pleased | |
りょこう(する) trip, travel |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment