Skip to content

Instantly share code, notes, and snippets.

@patarapolw
Last active September 3, 2022 14:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save patarapolw/a13d6e29934680eee9a69bc25a5319a0 to your computer and use it in GitHub Desktop.
Save patarapolw/a13d6e29934680eee9a69bc25a5319a0 to your computer and use it in GitHub Desktop.
Yomichan Vocab-Kanji lookup
import { ankiConnect } from '@/ankiconnect'
import { kata2hira } from '@/kana'
import { load as cheerio } from 'cheerio'
import { readFileSync, writeFileSync } from 'fs'
async function main() {
const filename = 'tmp/vocab.tsv'
const txt = readFileSync(filename, 'utf-8')
const qs = txt
.split('\n')
.map((row) => {
const cols = row.split('\t')
return cols[0] || kata2hira(cols[1] || '')
})
.filter((s) => s)
if (!qs.length) {
return
}
const dict = new Map<string, Record<string, string>[]>()
await ankiConnect
.send('findNotes', {
query: `deck:Yomichan::Terms (${qs.join(' OR ')})`
})
.then((notes) => {
return ankiConnect.send('notesInfo', {
notes
})
})
.then((notes) => {
notes.map(({ fields }) => {
const entry = Object.fromEntries(
Object.entries(fields)
.map(([k, { value }]) => {
let v = ''
switch (k) {
case 'Japanese':
case 'Reading':
case 'POS':
v = value
break
case 'Pitch': {
if (!/[\p{sc=Katakana}\p{sc=Hiragana}]/u.test(value)) {
break
}
const $ = cheerio(value)
$('[style*="border-top"]').each((_, it) => {
const $top = $(it).prev()
$top.text(
$top
.text()
.split('')
.map((c) => '^' + c)
.join('')
)
})
$('[style*="border-right"]').text(']')
const $li = $('li')
if ($li.length) {
v = $li.first().text()
} else {
v = $.text()
}
v = v.replace(/(\^.)+/g, (p0) => '[' + p0.replace(/\^/g, ''))
if (v.length > 1) {
if (v.startsWith('[')) {
v = '^' + v
}
if (v.endsWith(']')) {
v += '_'
}
}
v = v.replace(/[\[\]]/g, '')
break
}
case 'Meaning': {
let $ = cheerio(value)
const $it = $('li')
if ($it.length) {
$ = cheerio($it.first().html() || '')
}
$('li').prepend($('<br>'))
$('br').text('\n')
v = $.text()
v = v
.replace(/^\((大辞泉)\).+\n/, '')
.replace(/\n[(用法|類語|可能|下接句)].+/g, '\n')
.replace(/[①-⑨]/g, (p0) => {
const i = String.fromCodePoint(
p0.codePointAt(0)! -
'①'.codePointAt(0)! +
'1'.codePointAt(0)!
)
return ` (${i}) `
})
.trim()
}
}
if (v) {
return [k, v]
}
return []
})
.filter((a) => a.length)
)
if (entry['POS'].includes('dan verb')) {
delete entry['Pitch']
}
const jp = entry['Japanese']
const vs = dict.get(jp) || []
vs.push(entry)
dict.set(jp, vs)
})
})
writeFileSync(
filename,
txt
.split('\n')
.map((row) => {
if (!row) return row
const cols = row.split('\t')
const vs = dict.get(cols[0] || kata2hira(cols[1] || ''))
if (!vs) {
return row
}
const v = cols[1]
? vs.find((v) => v['Reading'] === kata2hira(cols[1] || ''))
: vs[0]
if (v) {
cols[1] = v['Pitch'] || v['Reading'] || cols[1]!
cols[2] = (v['Meaning'] || cols[2]!)
.replace(/\n/g, ' ')
.replace(/ +/g, ' ')
}
return cols.join('\t')
})
.join('\n'),
'utf-8'
)
}
if (require.main === module) {
main()
}
import { ankiConnect } from '@/ankiconnect'
import { load as cheerio } from 'cheerio'
import { readFileSync, writeFileSync } from 'fs'
async function main() {
const filename = 'tmp/kanji.tsv'
const txt = readFileSync(filename, 'utf-8')
const qs = txt
.split('\n')
.map((rows) => rows.split('\t')[0]!)
.filter((k) => k)
if (!qs.length) {
return
}
const dict = new Map<string, Record<string, string>>()
await ankiConnect
.send('findNotes', {
query: `deck:Yomichan::Kanji (${qs
.map((k) => `Kanji:${k}`)
.join(' OR ')})`
})
.then((notes) => {
return ankiConnect.send('notesInfo', {
notes
})
})
.then((notes) => {
notes.map(({ fields }) => {
const entry = Object.fromEntries(
Object.entries(fields)
.map(([k, { value }]) => {
let v = ''
switch (k) {
case 'Kanji':
v = value
break
case 'Reading_On':
case 'Reading_Kun': {
v = value.replace(/, /g, ' ')
break
}
case 'Kanji_Meaning': {
if (
/[\p{sc=Han}\p{sc=Katakana}\p{sc=Hiragana}]/u.test(value)
) {
v = cheerio(value).text()
}
break
}
}
if (v) {
return [k, v]
}
return []
})
.filter((a) => a.length)
)
const jp = entry['Kanji']
dict.set(jp, entry)
})
})
if (!dict.size) {
return
}
const terms = await ankiConnect
.send('findNotes', {
query: `deck:Yomichan::Terms (${Array.from(dict.keys())
.map((k) => `Japanese:*${k}*`)
.join(' OR ')})`
})
.then((notes) => {
return ankiConnect.send('notesInfo', {
notes
})
})
.then((notes) => {
return notes
.map(({ fields }) => {
return fields['Japanese']!.value
})
.filter((s, i, arr) => arr.indexOf(s) === i)
})
writeFileSync(
filename,
txt
.split('\n')
.map((row) => {
if (!row) return row
const cols = row.split('\t')
const kanji = cols[0] || ''
const entry = dict.get(kanji)
if (!entry) {
return row
}
cols[1] = entry['Reading_On'] || cols[1] || ''
cols[2] = entry['Reading_Kun'] || cols[2] || ''
cols[3] = terms.filter((s) => s.includes(kanji)).join(' ')
if (entry['Kanji_Meaning']) {
cols[5] = entry['Kanji_Meaning'].replace(/\n/g, ' ')
}
return cols.map((c) => c || '').join('\t')
})
.join('\n'),
'utf-8'
)
}
if (require.main === module) {
main()
}
import axios, { AxiosInstance } from 'axios'
export interface INote {
deckName: string
modelName: string
fields: Record<string, string>
tags: string[]
audio?: unknown[]
video?: unknown[]
picture?: unknown[]
}
export type IAddNoteOptions =
| {
allowDuplicate: false
}
| {
allowDuplicate: true
duplicateScope: string
duplicateScopeOptions: {
deckName: string
checkChildren: boolean
checkAllModels: boolean
}
}
/** https://foosoft.net/projects/anki-connect/ */
export interface AnkiConnectActions
extends Record<string, { params: any; result: any }> {
// Graphical Actions
guiBrowse: {
params: {
/** https://docs.ankiweb.net/searching.html */
query: string
}
/** NoteId[] */
result: string[]
}
// Model Actions
createModel: {
params: {
modelName: string
isOrderFields: string[]
css: string
isCloze: boolean
cardTemplates: {
Name: string
Front: string
Back: string
}[]
}
result: {
id: string
name: string
css: string
flds: {
name: string
ord: number
}[]
tmpls: {
name: string
ord: number
qfmt: string
afmt: string
did: null
}[]
did: number
}
}
modelTemplates: {
params: {
modelName: string
}
result: {
[cardName: string]: {
[side: string]: string
}
}
}
modelStyling: {
params: {
modelName: string
}
result: {
css: string
}
}
// Note Actions
addNote: {
params: {
note: INote & {
options: IAddNoteOptions
}
}
/** NoteId */
result: string
}
addNotes: {
params: {
notes: (INote & {
options: IAddNoteOptions
})[]
}
/** NoteId[], will be `null` if failed */
result: (string | null)[]
}
canAddNotes: {
params: {
notes: (INote & {
options: IAddNoteOptions
})[]
}
/** boolean[] */
result: boolean[]
}
updateNoteFields: {
params: {
notes: (Partial<INote> & {
id: string
fields: INote['fields']
options: IAddNoteOptions
})[]
}
result: null
}
addTags: {
params: {
notes: string[]
/** space-separated */
tags: string
}
result: null
}
removeTags: {
params: {
notes: string[]
/** space-separated */
tags: string
}
result: null
}
getTags: {
params: undefined
result: string[]
}
findNotes: {
params: {
/** https://docs.ankiweb.net/searching.html */
query: string
}
/** NoteId[] */
result: string[]
}
notesInfo: {
params: {
notes: string[]
}
result: (Omit<INote, 'deckName'> & {
noteId: string
fields: Record<
string,
{
value: string
order: number
}
>
cards: string[]
})[]
}
}
export class AnkiConnect {
$api: AxiosInstance
constructor(public baseURL = 'http://localhost:8765', public version = 6) {
this.$api = axios.create({
baseURL
})
}
async send<A extends keyof AnkiConnectActions>(
action: A,
params: AnkiConnectActions[A]['params'],
version = this.version
): Promise<AnkiConnectActions[A]['result']> {
return this.$api.post('/', { action, version, params }).then(({ data }) => {
if (data.error) throw new Error(data.error)
if (typeof data.result === 'undefined')
throw new Error('response is missing required result field')
return data.result
})
}
}
export const ankiConnect = new AnkiConnect()
/** https://github.com/WaniKani/WanaKana/blob/master/src/utils/romajiToKanaMap.js */
export const SMALL_Y = { ya: 'ゃ', yi: 'ぃ', yu: 'ゅ', ye: 'ぇ', yo: 'ょ' }
export const SMALL_VOWELS = { a: 'ぁ', i: 'ぃ', u: 'ぅ', e: 'ぇ', o: 'ぉ' }
export const SMALL_KANA = { ...SMALL_Y, ...SMALL_VOWELS }
const HIRA_KATA_DIFF = 'ア'.codePointAt(0)! - 'あ'.codePointAt(0)!
export function kata2hira(s: string) {
return s.replace(/\p{sc=Katakana}/gu, (c) =>
String.fromCodePoint(c.codePointAt(0)! - HIRA_KATA_DIFF)
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment