Skip to content

Instantly share code, notes, and snippets.

@janclarin
Last active October 20, 2017 06:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save janclarin/80617de5073157dc3256a7f71f169595 to your computer and use it in GitHub Desktop.
Save janclarin/80617de5073157dc3256a7f71f169595 to your computer and use it in GitHub Desktop.
Download Wikipedia Kana Stroke Animation Gifs
from bs4 import BeautifulSoup
from urllib import request, parse
BASE_FILE_URL = 'https://commons.wikimedia.org/wiki/File:'
# First param is {Hiragana, Katakana}. Second param is {い, か}
BASE_FILE_NAME = '{}_{}_stroke_order_animation.gif'
HIRAGANA = 'Hiragana'
KATAKANA = 'Katakana'
KANA = [
"あ", "ア", "か", "カ", "さ", "サ", "た", "タ", "な", "ナ", "は", "ハ",
"ま", "マ", "や", "ヤ", "ら", "ラ", "わ", "ワ", "い", "イ", "き", "キ",
"し", "シ", "ち", "チ", "に", "ニ", "ひ", "ヒ", "み", "ミ", "り", "リ",
"う", "ウ", "く", "ク", "す", "ス", "つ", "ツ", "ぬ", "ヌ", "ふ", "フ",
"む", "ム", "ゆ", "ユ", "る", "ル", "え", "エ", "け", "ケ", "せ", "セ",
"て", "テ", "ね", "ネ", "へ", "ヘ", "め", "メ", "れ", "レ", "お", "オ",
"こ", "コ", "そ", "ソ", "と", "ト", "の", "ノ", "ほ", "ホ", "も", "モ",
"よ", "ヨ", "ろ", "ロ", "を", "ヲ"
]
def urlencode(string):
return parse.quote_plus(string, encoding='utf-8')
def get_link(url, filename):
html = request.urlopen(url + filename).read()
soup = BeautifulSoup(html, 'html.parser')
return soup.find_all('img')[0].get('src')
def download_file(url, filename):
try:
res = request.urlopen(url).read()
new_file = open(filename, 'wb')
new_file.write(res)
new_file.close()
print('Downloaded: ' + filename)
except Exception as e:
print(str(e))
def download_gifs(characters, kana_type, folder):
for c in characters:
kana_char = urlencode(c)
url = BASE_FILE_URL
filename = BASE_FILE_NAME.format(kana_type, kana_char)
link = get_link(url, filename)
download_file(link, '{}/{}.gif'.format(folder, c))
def main():
hiragana_folder = 'img/hiragana'
katakana_folder = 'img/katakana'
hiragana = [KANA[i] for i in range(0, len(KANA), 2)]
katakana = [KANA[i] for i in range(1, len(KANA), 2)]
download_gifs(hiragana, HIRAGANA, hiragana_folder)
download_gifs(katakana, KATAKANA, katakana_folder)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment