Last active
September 3, 2022 14:58
-
-
Save patarapolw/a13d6e29934680eee9a69bc25a5319a0 to your computer and use it in GitHub Desktop.
Yomichan Vocab-Kanji lookup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { ankiConnect } from '@/ankiconnect' | |
import { kata2hira } from '@/kana' | |
import { load as cheerio } from 'cheerio' | |
import { readFileSync, writeFileSync } from 'fs' | |
async function main() { | |
const filename = 'tmp/vocab.tsv' | |
const txt = readFileSync(filename, 'utf-8') | |
const qs = txt | |
.split('\n') | |
.map((row) => { | |
const cols = row.split('\t') | |
return cols[0] || kata2hira(cols[1] || '') | |
}) | |
.filter((s) => s) | |
if (!qs.length) { | |
return | |
} | |
const dict = new Map<string, Record<string, string>[]>() | |
await ankiConnect | |
.send('findNotes', { | |
query: `deck:Yomichan::Terms (${qs.join(' OR ')})` | |
}) | |
.then((notes) => { | |
return ankiConnect.send('notesInfo', { | |
notes | |
}) | |
}) | |
.then((notes) => { | |
notes.map(({ fields }) => { | |
const entry = Object.fromEntries( | |
Object.entries(fields) | |
.map(([k, { value }]) => { | |
let v = '' | |
switch (k) { | |
case 'Japanese': | |
case 'Reading': | |
case 'POS': | |
v = value | |
break | |
case 'Pitch': { | |
if (!/[\p{sc=Katakana}\p{sc=Hiragana}]/u.test(value)) { | |
break | |
} | |
const $ = cheerio(value) | |
$('[style*="border-top"]').each((_, it) => { | |
const $top = $(it).prev() | |
$top.text( | |
$top | |
.text() | |
.split('') | |
.map((c) => '^' + c) | |
.join('') | |
) | |
}) | |
$('[style*="border-right"]').text(']') | |
const $li = $('li') | |
if ($li.length) { | |
v = $li.first().text() | |
} else { | |
v = $.text() | |
} | |
v = v.replace(/(\^.)+/g, (p0) => '[' + p0.replace(/\^/g, '')) | |
if (v.length > 1) { | |
if (v.startsWith('[')) { | |
v = '^' + v | |
} | |
if (v.endsWith(']')) { | |
v += '_' | |
} | |
} | |
v = v.replace(/[\[\]]/g, '') | |
break | |
} | |
case 'Meaning': { | |
let $ = cheerio(value) | |
const $it = $('li') | |
if ($it.length) { | |
$ = cheerio($it.first().html() || '') | |
} | |
$('li').prepend($('<br>')) | |
$('br').text('\n') | |
v = $.text() | |
v = v | |
.replace(/^\((大辞泉)\).+\n/, '') | |
.replace(/\n[(用法|類語|可能|下接句)].+/g, '\n') | |
.replace(/[①-⑨]/g, (p0) => { | |
const i = String.fromCodePoint( | |
p0.codePointAt(0)! - | |
'①'.codePointAt(0)! + | |
'1'.codePointAt(0)! | |
) | |
return ` (${i}) ` | |
}) | |
.trim() | |
} | |
} | |
if (v) { | |
return [k, v] | |
} | |
return [] | |
}) | |
.filter((a) => a.length) | |
) | |
if (entry['POS'].includes('dan verb')) { | |
delete entry['Pitch'] | |
} | |
const jp = entry['Japanese'] | |
const vs = dict.get(jp) || [] | |
vs.push(entry) | |
dict.set(jp, vs) | |
}) | |
}) | |
writeFileSync( | |
filename, | |
txt | |
.split('\n') | |
.map((row) => { | |
if (!row) return row | |
const cols = row.split('\t') | |
const vs = dict.get(cols[0] || kata2hira(cols[1] || '')) | |
if (!vs) { | |
return row | |
} | |
const v = cols[1] | |
? vs.find((v) => v['Reading'] === kata2hira(cols[1] || '')) | |
: vs[0] | |
if (v) { | |
cols[1] = v['Pitch'] || v['Reading'] || cols[1]! | |
cols[2] = (v['Meaning'] || cols[2]!) | |
.replace(/\n/g, ' ') | |
.replace(/ +/g, ' ') | |
} | |
return cols.join('\t') | |
}) | |
.join('\n'), | |
'utf-8' | |
) | |
} | |
if (require.main === module) { | |
main() | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { ankiConnect } from '@/ankiconnect' | |
import { load as cheerio } from 'cheerio' | |
import { readFileSync, writeFileSync } from 'fs' | |
async function main() { | |
const filename = 'tmp/kanji.tsv' | |
const txt = readFileSync(filename, 'utf-8') | |
const qs = txt | |
.split('\n') | |
.map((rows) => rows.split('\t')[0]!) | |
.filter((k) => k) | |
if (!qs.length) { | |
return | |
} | |
const dict = new Map<string, Record<string, string>>() | |
await ankiConnect | |
.send('findNotes', { | |
query: `deck:Yomichan::Kanji (${qs | |
.map((k) => `Kanji:${k}`) | |
.join(' OR ')})` | |
}) | |
.then((notes) => { | |
return ankiConnect.send('notesInfo', { | |
notes | |
}) | |
}) | |
.then((notes) => { | |
notes.map(({ fields }) => { | |
const entry = Object.fromEntries( | |
Object.entries(fields) | |
.map(([k, { value }]) => { | |
let v = '' | |
switch (k) { | |
case 'Kanji': | |
v = value | |
break | |
case 'Reading_On': | |
case 'Reading_Kun': { | |
v = value.replace(/, /g, ' ') | |
break | |
} | |
case 'Kanji_Meaning': { | |
if ( | |
/[\p{sc=Han}\p{sc=Katakana}\p{sc=Hiragana}]/u.test(value) | |
) { | |
v = cheerio(value).text() | |
} | |
break | |
} | |
} | |
if (v) { | |
return [k, v] | |
} | |
return [] | |
}) | |
.filter((a) => a.length) | |
) | |
const jp = entry['Kanji'] | |
dict.set(jp, entry) | |
}) | |
}) | |
if (!dict.size) { | |
return | |
} | |
const terms = await ankiConnect | |
.send('findNotes', { | |
query: `deck:Yomichan::Terms (${Array.from(dict.keys()) | |
.map((k) => `Japanese:*${k}*`) | |
.join(' OR ')})` | |
}) | |
.then((notes) => { | |
return ankiConnect.send('notesInfo', { | |
notes | |
}) | |
}) | |
.then((notes) => { | |
return notes | |
.map(({ fields }) => { | |
return fields['Japanese']!.value | |
}) | |
.filter((s, i, arr) => arr.indexOf(s) === i) | |
}) | |
writeFileSync( | |
filename, | |
txt | |
.split('\n') | |
.map((row) => { | |
if (!row) return row | |
const cols = row.split('\t') | |
const kanji = cols[0] || '' | |
const entry = dict.get(kanji) | |
if (!entry) { | |
return row | |
} | |
cols[1] = entry['Reading_On'] || cols[1] || '' | |
cols[2] = entry['Reading_Kun'] || cols[2] || '' | |
cols[3] = terms.filter((s) => s.includes(kanji)).join(' ') | |
if (entry['Kanji_Meaning']) { | |
cols[5] = entry['Kanji_Meaning'].replace(/\n/g, ' ') | |
} | |
return cols.map((c) => c || '').join('\t') | |
}) | |
.join('\n'), | |
'utf-8' | |
) | |
} | |
if (require.main === module) { | |
main() | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import axios, { AxiosInstance } from 'axios' | |
export interface INote { | |
deckName: string | |
modelName: string | |
fields: Record<string, string> | |
tags: string[] | |
audio?: unknown[] | |
video?: unknown[] | |
picture?: unknown[] | |
} | |
export type IAddNoteOptions = | |
| { | |
allowDuplicate: false | |
} | |
| { | |
allowDuplicate: true | |
duplicateScope: string | |
duplicateScopeOptions: { | |
deckName: string | |
checkChildren: boolean | |
checkAllModels: boolean | |
} | |
} | |
/** https://foosoft.net/projects/anki-connect/ */ | |
export interface AnkiConnectActions | |
extends Record<string, { params: any; result: any }> { | |
// Graphical Actions | |
guiBrowse: { | |
params: { | |
/** https://docs.ankiweb.net/searching.html */ | |
query: string | |
} | |
/** NoteId[] */ | |
result: string[] | |
} | |
// Model Actions | |
createModel: { | |
params: { | |
modelName: string | |
isOrderFields: string[] | |
css: string | |
isCloze: boolean | |
cardTemplates: { | |
Name: string | |
Front: string | |
Back: string | |
}[] | |
} | |
result: { | |
id: string | |
name: string | |
css: string | |
flds: { | |
name: string | |
ord: number | |
}[] | |
tmpls: { | |
name: string | |
ord: number | |
qfmt: string | |
afmt: string | |
did: null | |
}[] | |
did: number | |
} | |
} | |
modelTemplates: { | |
params: { | |
modelName: string | |
} | |
result: { | |
[cardName: string]: { | |
[side: string]: string | |
} | |
} | |
} | |
modelStyling: { | |
params: { | |
modelName: string | |
} | |
result: { | |
css: string | |
} | |
} | |
// Note Actions | |
addNote: { | |
params: { | |
note: INote & { | |
options: IAddNoteOptions | |
} | |
} | |
/** NoteId */ | |
result: string | |
} | |
addNotes: { | |
params: { | |
notes: (INote & { | |
options: IAddNoteOptions | |
})[] | |
} | |
/** NoteId[], will be `null` if failed */ | |
result: (string | null)[] | |
} | |
canAddNotes: { | |
params: { | |
notes: (INote & { | |
options: IAddNoteOptions | |
})[] | |
} | |
/** boolean[] */ | |
result: boolean[] | |
} | |
updateNoteFields: { | |
params: { | |
notes: (Partial<INote> & { | |
id: string | |
fields: INote['fields'] | |
options: IAddNoteOptions | |
})[] | |
} | |
result: null | |
} | |
addTags: { | |
params: { | |
notes: string[] | |
/** space-separated */ | |
tags: string | |
} | |
result: null | |
} | |
removeTags: { | |
params: { | |
notes: string[] | |
/** space-separated */ | |
tags: string | |
} | |
result: null | |
} | |
getTags: { | |
params: undefined | |
result: string[] | |
} | |
findNotes: { | |
params: { | |
/** https://docs.ankiweb.net/searching.html */ | |
query: string | |
} | |
/** NoteId[] */ | |
result: string[] | |
} | |
notesInfo: { | |
params: { | |
notes: string[] | |
} | |
result: (Omit<INote, 'deckName'> & { | |
noteId: string | |
fields: Record< | |
string, | |
{ | |
value: string | |
order: number | |
} | |
> | |
cards: string[] | |
})[] | |
} | |
} | |
export class AnkiConnect { | |
$api: AxiosInstance | |
constructor(public baseURL = 'http://localhost:8765', public version = 6) { | |
this.$api = axios.create({ | |
baseURL | |
}) | |
} | |
async send<A extends keyof AnkiConnectActions>( | |
action: A, | |
params: AnkiConnectActions[A]['params'], | |
version = this.version | |
): Promise<AnkiConnectActions[A]['result']> { | |
return this.$api.post('/', { action, version, params }).then(({ data }) => { | |
if (data.error) throw new Error(data.error) | |
if (typeof data.result === 'undefined') | |
throw new Error('response is missing required result field') | |
return data.result | |
}) | |
} | |
} | |
export const ankiConnect = new AnkiConnect() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** https://github.com/WaniKani/WanaKana/blob/master/src/utils/romajiToKanaMap.js */ | |
export const SMALL_Y = { ya: 'ゃ', yi: 'ぃ', yu: 'ゅ', ye: 'ぇ', yo: 'ょ' } | |
export const SMALL_VOWELS = { a: 'ぁ', i: 'ぃ', u: 'ぅ', e: 'ぇ', o: 'ぉ' } | |
export const SMALL_KANA = { ...SMALL_Y, ...SMALL_VOWELS } | |
const HIRA_KATA_DIFF = 'ア'.codePointAt(0)! - 'あ'.codePointAt(0)! | |
export function kata2hira(s: string) { | |
return s.replace(/\p{sc=Katakana}/gu, (c) => | |
String.fromCodePoint(c.codePointAt(0)! - HIRA_KATA_DIFF) | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment