Skip to content

Instantly share code, notes, and snippets.

@Jagua
Created February 25, 2018 05:38
Show Gist options
  • Save Jagua/daeaca9c1adc04769f66ffb87eb3a2b8 to your computer and use it in GitHub Desktop.
Save Jagua/daeaca9c1adc04769f66ffb87eb3a2b8 to your computer and use it in GitHub Desktop.
scriptencoding utf-8
" gen_gorin_dict.vim - Generate dictionaries of athletes from gorin.jp
"
" generate two files:
" 1. gorin%Y.migemodict
" 2. gorin%Y_msime.txt
"
" usage:
" vim -N -u NONE -i NONE -V1 -e -s -X --cmd "source gen_gorin_dict.vim" --cmd qall!
" (wait about 100 seconds...)
" Note: replace /path/to/your/webapi-vim
set runtimepath+=~/.cache/dein/repos/github.com/mattn/webapi-vim
function! s:get_urls() abort
let url = 'https://www.gorin.jp/athlete/'
let root_node = webapi#html#parseURL(url)
let wordsNodes = root_node
\.find('div', {'class' : 'select-word'}).childNode('ul').childNodes('li')
let words_urls = []
for n in wordsNodes
let relative_path = n.childNode('a').attr['href']
let url = 'https://www.gorin.jp' . relative_path
call add(words_urls, url)
endfor
return words_urls
endfunction
function! s:get_athleate_data(relative_path) abort
let url = 'https://www.gorin.jp' . a:relative_path
let root_node = webapi#html#parseURL(url)
let prof_node = root_node.find('div', {'class' : 'heading profile'})
let h2_node = prof_node.find('h2')
let name_kanji = h2_node.child[0]
let name_kana = h2_node.child[1].value()
if empty(name_kana)
return {}
endif
let [last_name_kanji, first_name_kanji] = split(name_kanji, ' ')
let [last_name_kana, first_name_kana] = split(name_kana, ' ')
let full_name_kanji = last_name_kanji . first_name_kanji
let full_name_kana = last_name_kana . first_name_kana
return {
\ 'last_name_kanji' : last_name_kanji,
\ 'last_name_kana' : last_name_kana,
\ 'first_name_kanji' : first_name_kanji,
\ 'first_name_kana' : first_name_kana,
\ 'full_name_kanji' : full_name_kanji,
\ 'full_name_kana' : full_name_kana,
\}
endfunction
function! s:make_msime_lines(athlete_data) abort
let o = a:athlete_data
return [
\ printf("%s\t%s\t%s", o.last_name_kana, o.last_name_kanji, '姓'),
\ printf("%s\t%s\t%s", o.first_name_kana, o.first_name_kanji, '名'),
\ printf("%s\t%s\t%s", o.full_name_kana, o.full_name_kanji, '人名'),
\]
endfunction
function! s:make_migemodict_lines(athlete_data) abort
let o = a:athlete_data
return [
\ printf("%s\t%s", o.last_name_kana, o.last_name_kanji),
\ printf("%s\t%s", o.first_name_kana, o.first_name_kanji),
\ printf("%s\t%s", o.full_name_kana, o.full_name_kanji),
\]
endfunction
function! s:gather_athlete_data(url) abort
let athlete_data = []
let root_node = webapi#html#parseURL(a:url)
let list_nodes = root_node
\.find('section', {'class' : 'data-block athletes'})
\.childNode('div', {'class' : 'list'})
\.childNode('ul').childNodes('li')
for n in list_nodes
let relative_path = n.childNode('a').attr['href']
let data = s:get_athleate_data(relative_path)
if empty(data)
continue
endif
call add(athlete_data, data)
sleep 10m
endfor
return athlete_data
endfunction
function! s:gather_data() abort
let data = []
for url in s:get_urls()
call extend(data, s:gather_athlete_data(url))
endfor
return data
endfunction
function! s:main() abort
let data = s:gather_data()
let year = strftime('%Y')
let migemodict_lines = []
call map(copy(data), 'extend(migemodict_lines, s:make_migemodict_lines(v:val))')
call writefile(migemodict_lines, printf('gorin%s.migemodict', year))
let msime_lines = []
call map(copy(data), 'extend(msime_lines, s:make_msime_lines(v:val))')
call map(msime_lines, 'iconv(v:val, "utf-8", "cp932") . "\r"')
call writefile(msime_lines, printf('gorin%s_msime.txt', year))
echo 'done.'
endfunction
call s:main()
" vim: set et sw=0 ts=2 fdm=indent:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment