Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@ken39arg
Created December 2, 2020 15:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ken39arg/5eac4df8824ee9858f38600f9edcfe7c to your computer and use it in GitHub Desktop.
Save ken39arg/5eac4df8824ee9858f38600f9edcfe7c to your computer and use it in GitHub Desktop.
package phrasetrie
import (
"regexp"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
)
var pokemonSiritori = "ポケモンしりとりの歌詞が入る"
var pokemon30re = `(?:B(?:isa(?:flor|knosp|sam)|lastoise|ulbasaur)|C(?:aterpie|har(?:izard|m(?:ander|eleon)))|Glu(?:manda|rak|texo)|Ivysaur|Raupy|S(?:chi(?:ggy|llok)|quirtle)|Turtok|Venusaur|Wartortle|カメ(?:ックス|ール)|キャタピー|ゼニガメ|ヒトカゲ|フシギ(?:ソウ|ダネ|バナ)|リザードン?)`
var pokemon30list = []string{"フシギダネ", "Bulbasaur", "Bisasam", "フシギソウ", "Ivysaur", "Bisaknosp", "フシギバナ", "Venusaur", "Bisaflor", "ヒトカゲ", "Charmander", "Glumanda", "リザード", "Charmeleon", "Glutexo", "リザードン", "Charizard", "Glurak", "ゼニガメ", "Squirtle", "Schiggy", "カメール", "Wartortle", "Schillok", "カメックス", "Blastoise", "Turtok", "キャタピー", "Caterpie", "Raupy"}
var dai1re = `(?:A(?:bra|erodactyl|l(?:akazam|pollo)|mo(?:nitas|roso)|quana|r(?:bok|canine|k(?:ani|tos)|ticuno)|ustos)|B(?:e(?:edrill|llsprout)|i(?:bor|sa(?:flor|knosp|sam))|l(?:astoise|itza|uzuk)|u(?:lbasaur|tterfree))|C(?:aterpie|ha(?:n(?:eira|sey)|r(?:izard|m(?:ander|eleon)))|l(?:efa(?:ble|iry)|oyster)|ubone)|D(?:ewgong|i(?:g(?:d(?:ri|a)|lett)|tto)|od(?:rio?|uo?)|r(?:a(?:go(?:n(?:air|i(?:te|r))|ran)|tini)|owzee)|u(?:flor|gtrio))|E(?:evee|kans|le(?:ct(?:abuzz|rode)|ktek)|nto(?:ron|n)|voli|xegg(?:cute|utor))|F(?:arfetch\'d|earow|l(?:a(?:mara|reon)|egmon)|ukano)|G(?:a(?:llopa|rados|stly)|e(?:ngar|o(?:dude|rok|waz))|iflor|l(?:oom|u(?:manda|rak|texo))|ol(?:bat|d(?:een|ini|uck)|em|king)|r(?:aveler|imer|owlithe)|yarados)|H(?:a(?:bitak|unter)|itmon(?:chan|lee)|or(?:nliu|sea)|ypno)|I(?:bitak|vysaur)|J(?:igglypuff|olteon|u(?:gong|rob)|ynx)|K(?:a(?:buto(?:ps)?|dabra|kuna|nga(?:ma|skhan)|rpador)|i(?:cklee|ngler)|leinstein|n(?:o(?:fensa|gga)|uddeluff)|o(?:ffing|k(?:owei|una))|rabby)|L(?:a(?:hmus|pras|vados)|ektrobal|ickitung)|M(?:a(?:ch(?:amp|o(?:ke|llo|mei|p))|g(?:ikarp|mar|ne(?:mite|t(?:ilo|on)))|nkey|rowak|schock|uzi)|e(?:nki|owth|tapod|w(?:t(?:wo|u))?)|oltres|r\.\ Mime|u(?:schas|k)|yrapla)|N(?:ebulak|i(?:do(?:king|queen|r(?:an|in[ao]))|netales)|ockchan)|O(?:ddish|m(?:a(?:nyte|star)|ot)|nix|wei)|P(?:a(?:ntimos|ras(?:e(?:ct|k))?)|ersian|i(?:dge(?:ot(?:to)?|y)|epi|kachu|nsir|xi)|o(?:liw(?:ag|hirl|rath)|n(?:ita|yta)|r(?:enta|ygon))|rimeape|syduck|ummeluff)|Quap(?:po|sel|utzi)|R(?:a(?:ichu|pidash|saff|t(?:icate|t(?:ata|fratz|ikarl))|upy)|e(?:laxo|ttan)|hy(?:don|horn)|i(?:horn|zeros)|ossana)|S(?:a(?:fcon|nd(?:a(?:mer|n)|s(?:hrew|lash))|rzenia)|c(?:h(?:i(?:ggy|llok)|lurp)|yther)|e(?:a(?:dra|king)|e(?:mon|per|l))|hellder|i(?:chlor|msala)|l(?:eim(?:ok|a)|ow(?:bro|poke))|m(?:ettbo|og(?:mog|on))|no(?:bilikat|rlax)|pearow|quirtle|t(?:ar(?:mie|yu)|erndu))|T(?:a(?:ngela|u(?:b(?:o(?:ga|ss)|si)|ros))|ent(?:ac(?:ha|ool|ruel)|oxa)|ra(?:gosso|umato)|urtok)|Ultrigaria|V(?:aporeon|en(?:o(?:moth|nat)|usaur)|i(?:ctreebel|leplume)|olto(?:bal|rb)|ul(?:nona|pix))|W(?:artortle|ee(?:dle|pinbell|zing)|igglytuff)|Z(?:apdos|ubat)|ア(?:ズマオウ|ーボ(?:ック)?)|イ(?:シツブテ|ワーク|ーブイ)|ウ(?:インディ|ツ(?:ドン|ボット))|エ(?:ビワラー|レブー)|オ(?:コリザル|ニ(?:スズメ|ドリル)|ム(?:スター|ナイト))|カ(?:イ(?:リ(?:キー|ュー)|ロス)|ビゴン|ブト(?:プス)?|メ(?:ックス|ール)|モネギ|ラカラ)|ガ(?:ラガラ|ルーラ|ーディ)|キ(?:ャタピー|ュウコン|ングラー)|ギャ(?:ラドス|ロップ)|ク(?:サイハナ|ラブ)|ケ(?:ンタロス|ーシィ)|ゲンガー|コ(?:イ(?:キング|ル)|クーン|ダック|ラッタ|ンパン)|ゴ(?:ル(?:ダック|バット)|ロー(?:ニャ|ン)|ー(?:スト?|リキー))|サ(?:イ(?:ドン|ホーン)|ワムラー|ン(?:ダース?|ド(?:パン)?))|シ(?:ェルダー|ャワーズ|ードラ)|ジュゴン|ス(?:ターミー|トライク|ピアー|リー(?:パー|プ))|ズバット|ゼニガメ|タ(?:ッツー|マタマ)|ダグトリオ|ディグダ|ト(?:サキント|ランセル)|ド(?:ガース|ククラゲ|ード(?:リオ|ー))|ナ(?:ゾノクサ|ッシー)|ニ(?:ド(?:キング|クイン|ラン|リー[ナノ])|ャース|ョロ(?:ボン|[ゾモ]))|ハクリュー|バ(?:タフリー|リヤード)|パ(?:ウワウ|ラ(?:セクト|ス)|ルシェン)|ヒト(?:カゲ|デマン)|ビ(?:リリダマ|ードル)|ピ(?:カチュウ|クシー|ジョ(?:ット|ン)|ッピ)|フ(?:ァイヤー|シギ(?:ソウ|ダネ|バナ)|リーザー|ーディン)|ブー(?:スター|バー)|プ(?:クリン|テラ|リン)|ベ(?:トベ(?:ター|トン)|ロリンガ)|ペルシアン|ポ(?:ッポ|ニータ|リゴン)|マ(?:タドガス|ダツボミ|ルマイン|ンキー)|ミ(?:ニリュウ|ュウ(?:ツー)?)|メ(?:タモン|ノクラゲ)|モ(?:ルフォン|ンジャラ)|ヤド(?:ラン|ン)|ユンゲラー|ラ(?:イチュウ|ッ(?:キー|タ)|フレシア|プラス)|リザードン?|ルージュラ|レアコイル|ロコン|ワンリキー)`
var dai1list = []string{"フシギダネ", "Bulbasaur", "Bisasam", "フシギソウ", "Ivysaur", "Bisaknosp", "フシギバナ", "Venusaur", "Bisaflor", "ヒトカゲ", "Charmander", "Glumanda", "リザード", "Charmeleon", "Glutexo", "リザードン", "Charizard", "Glurak", "ゼニガメ", "Squirtle", "Schiggy", "カメール", "Wartortle", "Schillok", "カメックス", "Blastoise", "Turtok", "キャタピー", "Caterpie", "Raupy", "トランセル", "Metapod", "Safcon", "バタフリー", "Butterfree", "Smettbo", "ビードル", "Weedle", "Hornliu", "コクーン", "Kakuna", "Kokuna", "スピアー", "Beedrill", "Bibor", "ポッポ", "Pidgey", "Taubsi", "ピジョン", "Pidgeotto", "Tauboga", "ピジョット", "Pidgeot", "Tauboss", "コラッタ", "Rattata", "Rattfratz", "ラッタ", "Raticate", "Rattikarl", "オニスズメ", "Spearow", "Habitak", "オニドリル", "Fearow", "Ibitak", "アーボ", "Ekans", "Rettan", "アーボック", "Arbok", "Arbok", "ピカチュウ", "Pikachu", "Pikachu", "ライチュウ", "Raichu", "Raichu", "サンド", "Sandshrew", "Sandan", "サンドパン", "Sandslash", "Sandamer", "ニドラン", "Nidoran", "Nidoran", "ニドリーナ", "Nidorina", "Nidorina", "ニドクイン", "Nidoqueen", "Nidoqueen", "ニドリーノ", "Nidorino", "Nidorino", "ニドキング", "Nidoking", "Nidoking", "ピッピ", "Clefairy", "Piepi", "ピクシー", "Clefable", "Pixi", "ロコン", "Vulpix", "Vulpix", "キュウコン", "Ninetales", "Vulnona", "プリン", "Jigglypuff", "Pummeluff", "プクリン", "Wigglytuff", "Knuddeluff", "ズバット", "Zubat", "Zubat", "ゴルバット", "Golbat", "Golbat", "ナゾノクサ", "Oddish", "Myrapla", "クサイハナ", "Gloom", "Duflor", "ラフレシア", "Vileplume", "Giflor", "パラス", "Paras", "Paras", "パラセクト", "Parasect", "Parasek", "コンパン", "Venonat", "Bluzuk", "モルフォン", "Venomoth", "Omot", "ディグダ", "Diglett", "Digda", "ダグトリオ", "Dugtrio", "Digdri", "ニャース", "Meowth", "Mauzi", "ペルシアン", "Persian", "Snobilikat", "コダック", "Psyduck", "Enton", "ゴルダック", "Golduck", "Entoron", "マンキー", "Mankey", "Menki", "オコリザル", "Primeape", "Rasaff", "ガーディ", "Growlithe", "Fukano", "ウインディ", "Arcanine", "Arkani", "ニョロモ", "Poliwag", "Quapsel", "ニョロゾ", "Poliwhirl", "Quaputzi", "ニョロボン", "Poliwrath", "Quappo", "ケーシィ", "Abra", "Abra", "ユンゲラー", "Kadabra", "Kadabra", "フーディン", "Alakazam", "Simsala", "ワンリキー", "Machop", "Machollo", "ゴーリキー", "Machoke", "Maschock", "カイリキー", "Machamp", "Machomei", "マダツボミ", "Bellsprout", "Knofensa", "ウツドン", "Weepinbell", "Ultrigaria", "ウツボット", "Victreebel", "Sarzenia", "メノクラゲ", "Tentacool", "Tentacha", "ドククラゲ", "Tentacruel", "Tentoxa", "イシツブテ", "Geodude", "Kleinstein", "ゴローン", "Graveler", "Georok", "ゴローニャ", "Golem", "Geowaz", "ポニータ", "Ponyta", "Ponita", "ギャロップ", "Rapidash", "Gallopa", "ヤドン", "Slowpoke", "Flegmon", "ヤドラン", "Slowbro", "Lahmus", "コイル", "Magnemite", "Magnetilo", "レアコイル", "Magneton", "Magneton", "カモネギ", "Farfetch'd", "Porenta", "ドードー", "Doduo", "Dodu", "ドードリオ", "Dodrio", "Dodri", "パウワウ", "Seel", "Jurob", "ジュゴン", "Dewgong", "Jugong", "ベトベター", "Grimer", "Sleima", "ベトベトン", "Muk", "Sleimok", "シェルダー", "Shellder", "Muschas", "パルシェン", "Cloyster", "Austos", "ゴース", "Gastly", "Nebulak", "ゴースト", "Haunter", "Alpollo", "ゲンガー", "Gengar", "Gengar", "イワーク", "Onix", "Onix", "スリープ", "Drowzee", "Traumato", "スリーパー", "Hypno", "Hypno", "クラブ", "Krabby", "Krabby", "キングラー", "Kingler", "Kingler", "ビリリダマ", "Voltorb", "Voltobal", "マルマイン", "Electrode", "Lektrobal", "タマタマ", "Exeggcute", "Owei", "ナッシー", "Exeggutor", "Kokowei", "カラカラ", "Cubone", "Tragosso", "ガラガラ", "Marowak", "Knogga", "サワムラー", "Hitmonlee", "Kicklee", "エビワラー", "Hitmonchan", "Nockchan", "ベロリンガ", "Lickitung", "Schlurp", "ドガース", "Koffing", "Smogon", "マタドガス", "Weezing", "Smogmog", "サイホーン", "Rhyhorn", "Rihorn", "サイドン", "Rhydon", "Rizeros", "ラッキー", "Chansey", "Chaneira", "モンジャラ", "Tangela", "Tangela", "ガルーラ", "Kangaskhan", "Kangama", "タッツー", "Horsea", "Seeper", "シードラ", "Seadra", "Seemon", "トサキント", "Goldeen", "Goldini", "アズマオウ", "Seaking", "Golking", "ヒトデマン", "Staryu", "Sterndu", "スターミー", "Starmie", "Starmie", "バリヤード", "Mr. Mime", "Pantimos", "ストライク", "Scyther", "Sichlor", "ルージュラ", "Jynx", "Rossana", "エレブー", "Electabuzz", "Elektek", "ブーバー", "Magmar", "Magmar", "カイロス", "Pinsir", "Pinsir", "ケンタロス", "Tauros", "Tauros", "コイキング", "Magikarp", "Karpador", "ギャラドス", "Gyarados", "Garados", "ラプラス", "Lapras", "Lapras", "メタモン", "Ditto", "Ditto", "イーブイ", "Eevee", "Evoli", "シャワーズ", "Vaporeon", "Aquana", "サンダース", "Jolteon", "Blitza", "ブースター", "Flareon", "Flamara", "ポリゴン", "Porygon", "Porygon", "オムナイト", "Omanyte", "Amonitas", "オムスター", "Omastar", "Amoroso", "カブト", "Kabuto", "Kabuto", "カブトプス", "Kabutops", "Kabutops", "プテラ", "Aerodactyl", "Aerodactyl", "カビゴン", "Snorlax", "Relaxo", "フリーザー", "Articuno", "Arktos", "サンダー", "Zapdos", "Zapdos", "ファイヤー", "Moltres", "Lavados", "ミニリュウ", "Dratini", "Dratini", "ハクリュー", "Dragonair", "Dragonir", "カイリュー", "Dragonite", "Dragoran", "ミュウツー", "Mewtwo", "Mewtu", "ミュウ", "Mew", "Mew"}
func TestTrie(t *testing.T) {
// 引用: モンコレ MS-12 ヒトカゲ https://takaratomymall.jp/shop/g/g4904810141716/
text := `モンコレに「ヒトカゲ」が登場!
モンコレは、ポケモンの姿を完全再現したハイクオリティなフィギュアのシリーズです。
頭の上から、後ろ、横、足の裏まで、忠実に再現しているから
手に取れば「ヒトカゲ」の新たな秘密に気づくかも!`
expect := `モンコレに「*****」が登場!
モンコレは、ポケモンの姿を完全再現したハイクオリティなフィギュアのシリーズです。
頭の上から、後ろ、横、足の裏まで、忠実に再現しているから
手に取れば「*****」の新たな秘密に気づくかも!`
trie := NewTrie(pokemon30list...).ReplaceAll(text, "*****")
reg := regexp.MustCompile(pokemon30re).ReplaceAllString(text, "*****")
strepl := newReplacer(pokemon30list, "*****").Replace(text)
if diff := cmp.Diff(expect, trie); diff != "" {
t.Errorf("missmatch (trie):\n\nGot: %s\n\nExpext: %s\n\nDiff: %s", trie, expect, diff)
}
if diff := cmp.Diff(expect, reg); diff != "" {
t.Errorf("missmatch (regexp) :\n\nGot: %s\n\nExpext: %s\n\nDiff: %s", reg, expect, diff)
}
if diff := cmp.Diff(expect, strepl); diff != "" {
t.Errorf("missmatch (strings.Replacer) :\n\nGot: %s\n\nExpext: %s\n\nDiff: %s", strepl, expect, diff)
}
}
func newReplacer(arr []string, replace string) *strings.Replacer {
args := make([]string, 2*len(arr))
for i, v := range arr {
args[i*2] = v
args[i*2+1] = replace
}
return strings.NewReplacer(args...)
}
func runbenchReplace(b *testing.B, re string, list []string) {
b.Run("regexp", func(b *testing.B) {
reg := regexp.MustCompile(re)
b.ResetTimer()
for i := 0; i < b.N; i++ {
reg.ReplaceAllString(pokemonSiritori, "*****")
}
})
b.Run("Replacer", func(b *testing.B) {
rep := newReplacer(list, "*****")
b.ResetTimer()
for i := 0; i < b.N; i++ {
rep.Replace(pokemonSiritori)
}
})
b.Run("Trie", func(b *testing.B) {
trie := NewTrie(list...)
b.ResetTimer()
for i := 0; i < b.N; i++ {
trie.ReplaceAll(pokemonSiritori, "*****")
}
})
}
func BenchmarkReplace_30(b *testing.B) {
runbenchReplace(b, pokemon30re, pokemon30list)
}
func BenchmarkReplace_Many(b *testing.B) {
runbenchReplace(b, dai1re, dai1list)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment