Skip to content

Instantly share code, notes, and snippets.

@eadmaster
Created May 15, 2024 04:13
Show Gist options
  • Save eadmaster/1405f4086d3b509f602ba8415c8012dc to your computer and use it in GitHub Desktop.
Save eadmaster/1405f4086d3b509f602ba8415c8012dc to your computer and use it in GitHub Desktop.
lrctransliterate_kagome
package main
import (
"fmt"
"strings"
"unicode"
"bufio"
"os"
"github.com/gojp/kana"
"github.com/ikawaha/kagome-dict/ipa"
"github.com/ikawaha/kagome/v2/tokenizer"
)
func main() {
//input := `こぼれたままの流星群 一秒 一秒`
input := ""
input_kana := ""
input_kana2 := ""
t, err := tokenizer.New(ipa.Dict(), tokenizer.OmitBosEos())
if err != nil {
panic(err)
}
scanner := bufio.NewScanner(os.Stdin)
for {
//fmt.Print("Enter Text: ")
// Scans a line from Stdin(Console)
scanner.Scan()
// Holds the string that scanned
input = scanner.Text()
if len(input) == 0 {
break
}
lines := strings.Split(input, "\n")
//fmt.Println(lines)
for _, line := range lines {
line = strings.TrimSpace(line)
if ! strings.Contains(line, "]") {
continue
}
s := strings.SplitN(line, "]", 2) // TODO: handle multiple tags on the same line
ts, line_text := s[0], s[1]
//fmt.Println(line_text)
// transliterate https://github.com/ikawaha/kagome/issues/308
// tokenize
input_kana = ""
input_kana2 = ""
tokens := t.Tokenize(line_text)
tmpChunk := ""
for _, token := range tokens {
//features := strings.Join(token.Features(), ",")
//fmt.Println(token.ID) // general id
//fmt.Println(token.Index) // position
//fmt.Println(token.Class)
/*
if p, ok := token.Pronunciation(); ok {
tmpChunk = p
} else if r, ok := token.Reading(); ok {
tmpChunk = r // fallback to reading if pronunciation is not available
} else {
tmpChunk = strings.ToUpper(token.Surface)
}
input_kana2 = input_kana2 + tmpChunk + " "
*/
if token.Class == tokenizer.UNKNOWN {
input_kana = input_kana + strings.ToUpper(token.Surface) + " "
} else {
token_pronunciation, _ := token.Pronunciation()
input_kana = input_kana + token_pronunciation + " "
}
}
//fmt.Println(input_kana)
//fmt.Println(input_kana2)
yomi := strings.Map(func(r rune) rune {
if unicode.IsLetter(r) {
return r
}
if unicode.IsGraphic(r) {
//if unicode.IsSpace(r) {
return r
}
return -1
}, kana.KanaToRomaji(input_kana2))
if yomi == "" {
continue
}
// remove extra spaces
yomi = strings.TrimSpace(yomi)
yomi = strings.Replace(yomi, " ", " ", -1)
yomi = strings.Replace(yomi, " ", " ", -1)
fmt.Println(ts + "]" + yomi)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment