Skip to content

Instantly share code, notes, and snippets.

@shinderuman
Created July 20, 2019 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shinderuman/11f4e4bfb8baf41fcb376f03c709481f to your computer and use it in GitHub Desktop.
Save shinderuman/11f4e4bfb8baf41fcb376f03c709481f to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"strings"
"unicode"
"github.com/ikawaha/kagome/tokenizer"
)
func main() {
fmt.Println(gienkun("我は劉備の夢を叶えたい"))
fmt.Println(gienkun("これが異世界です"))
}
func gienkun(s string) string {
t := tokenizer.New()
text := strings.TrimSpace(s)
if text == "" {
return ""
}
tokens := t.Tokenize(text)
var items []string
kanaConv := unicode.SpecialCase{
// ひらがなをカタカナに変換
unicode.CaseRange{
0x3041, // Lo: ぁ
0x3093, // Hi: ん
[unicode.MaxCase]rune{
0x30a1 - 0x3041, // UpperCase でカタカナに変換
0, // LowerCase では変換しない
0x30a1 - 0x3041, // TitleCase でカタカナに変換
},
},
// カタカナをひらがなに変換
unicode.CaseRange{
0x30a1, // Lo: ァ
0x30f3, // Hi: ン
[unicode.MaxCase]rune{
0, // UpperCase では変換しない
0x3041 - 0x30a1, // LowerCase でひらがなに変換
0, // TitleCase では変換しない
},
},
}
for _, token := range tokens {
features := token.Features()
if token.Class != tokenizer.UNKNOWN {
if len(features) == 0 || token.Surface == "BOS" || token.Surface == "EOS" {
continue
}
if features[0] == "助詞" || features[6] == "*" {
continue
}
}
items = append(items, strings.ToUpperSpecial(kanaConv, token.Surface))
}
return strings.Join(items, "…")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment