Created
December 2, 2020 15:20
-
-
Save ken39arg/5eac4df8824ee9858f38600f9edcfe7c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package phrasetrie | |
import ( | |
"regexp" | |
"strings" | |
"testing" | |
"github.com/google/go-cmp/cmp" | |
) | |
var pokemonSiritori = "ポケモンしりとりの歌詞が入る" | |
var pokemon30re = `(?:B(?:isa(?:flor|knosp|sam)|lastoise|ulbasaur)|C(?:aterpie|har(?:izard|m(?:ander|eleon)))|Glu(?:manda|rak|texo)|Ivysaur|Raupy|S(?:chi(?:ggy|llok)|quirtle)|Turtok|Venusaur|Wartortle|カメ(?:ックス|ール)|キャタピー|ゼニガメ|ヒトカゲ|フシギ(?:ソウ|ダネ|バナ)|リザードン?)` | |
var pokemon30list = []string{"フシギダネ", "Bulbasaur", "Bisasam", "フシギソウ", "Ivysaur", "Bisaknosp", "フシギバナ", "Venusaur", "Bisaflor", "ヒトカゲ", "Charmander", "Glumanda", "リザード", "Charmeleon", "Glutexo", "リザードン", "Charizard", "Glurak", "ゼニガメ", "Squirtle", "Schiggy", "カメール", "Wartortle", "Schillok", "カメックス", "Blastoise", "Turtok", "キャタピー", "Caterpie", "Raupy"} | |
var dai1re = `(?:A(?:bra|erodactyl|l(?:akazam|pollo)|mo(?:nitas|roso)|quana|r(?:bok|canine|k(?:ani|tos)|ticuno)|ustos)|B(?:e(?:edrill|llsprout)|i(?:bor|sa(?:flor|knosp|sam))|l(?:astoise|itza|uzuk)|u(?:lbasaur|tterfree))|C(?:aterpie|ha(?:n(?:eira|sey)|r(?:izard|m(?:ander|eleon)))|l(?:efa(?:ble|iry)|oyster)|ubone)|D(?:ewgong|i(?:g(?:d(?:ri|a)|lett)|tto)|od(?:rio?|uo?)|r(?:a(?:go(?:n(?:air|i(?:te|r))|ran)|tini)|owzee)|u(?:flor|gtrio))|E(?:evee|kans|le(?:ct(?:abuzz|rode)|ktek)|nto(?:ron|n)|voli|xegg(?:cute|utor))|F(?:arfetch\'d|earow|l(?:a(?:mara|reon)|egmon)|ukano)|G(?:a(?:llopa|rados|stly)|e(?:ngar|o(?:dude|rok|waz))|iflor|l(?:oom|u(?:manda|rak|texo))|ol(?:bat|d(?:een|ini|uck)|em|king)|r(?:aveler|imer|owlithe)|yarados)|H(?:a(?:bitak|unter)|itmon(?:chan|lee)|or(?:nliu|sea)|ypno)|I(?:bitak|vysaur)|J(?:igglypuff|olteon|u(?:gong|rob)|ynx)|K(?:a(?:buto(?:ps)?|dabra|kuna|nga(?:ma|skhan)|rpador)|i(?:cklee|ngler)|leinstein|n(?:o(?:fensa|gga)|uddeluff)|o(?:ffing|k(?:owei|una))|rabby)|L(?:a(?:hmus|pras|vados)|ektrobal|ickitung)|M(?:a(?:ch(?:amp|o(?:ke|llo|mei|p))|g(?:ikarp|mar|ne(?:mite|t(?:ilo|on)))|nkey|rowak|schock|uzi)|e(?:nki|owth|tapod|w(?:t(?:wo|u))?)|oltres|r\.\ Mime|u(?:schas|k)|yrapla)|N(?:ebulak|i(?:do(?:king|queen|r(?:an|in[ao]))|netales)|ockchan)|O(?:ddish|m(?:a(?:nyte|star)|ot)|nix|wei)|P(?:a(?:ntimos|ras(?:e(?:ct|k))?)|ersian|i(?:dge(?:ot(?:to)?|y)|epi|kachu|nsir|xi)|o(?:liw(?:ag|hirl|rath)|n(?:ita|yta)|r(?:enta|ygon))|rimeape|syduck|ummeluff)|Quap(?:po|sel|utzi)|R(?:a(?:ichu|pidash|saff|t(?:icate|t(?:ata|fratz|ikarl))|upy)|e(?:laxo|ttan)|hy(?:don|horn)|i(?:horn|zeros)|ossana)|S(?:a(?:fcon|nd(?:a(?:mer|n)|s(?:hrew|lash))|rzenia)|c(?:h(?:i(?:ggy|llok)|lurp)|yther)|e(?:a(?:dra|king)|e(?:mon|per|l))|hellder|i(?:chlor|msala)|l(?:eim(?:ok|a)|ow(?:bro|poke))|m(?:ettbo|og(?:mog|on))|no(?:bilikat|rlax)|pearow|quirtle|t(?:ar(?:mie|yu)|erndu))|T(?:a(?:ngela|u(?:b(?:o(?:ga|ss)|si)|ros))|ent(?:ac(?:ha|ool|ruel)|oxa)|ra(?:gosso|umato)|urtok)|Ultrigaria|V(?:aporeon|en(?:o(?:moth|nat)|usaur)|i(?:ctreebel|leplume)|olto(?:bal|rb)|ul(?:nona|pix))|W(?:artortle|ee(?:dle|pinbell|zing)|igglytuff)|Z(?:apdos|ubat)|ア(?:ズマオウ|ーボ(?:ック)?)|イ(?:シツブテ|ワーク|ーブイ)|ウ(?:インディ|ツ(?:ドン|ボット))|エ(?:ビワラー|レブー)|オ(?:コリザル|ニ(?:スズメ|ドリル)|ム(?:スター|ナイト))|カ(?:イ(?:リ(?:キー|ュー)|ロス)|ビゴン|ブト(?:プス)?|メ(?:ックス|ール)|モネギ|ラカラ)|ガ(?:ラガラ|ルーラ|ーディ)|キ(?:ャタピー|ュウコン|ングラー)|ギャ(?:ラドス|ロップ)|ク(?:サイハナ|ラブ)|ケ(?:ンタロス|ーシィ)|ゲンガー|コ(?:イ(?:キング|ル)|クーン|ダック|ラッタ|ンパン)|ゴ(?:ル(?:ダック|バット)|ロー(?:ニャ|ン)|ー(?:スト?|リキー))|サ(?:イ(?:ドン|ホーン)|ワムラー|ン(?:ダース?|ド(?:パン)?))|シ(?:ェルダー|ャワーズ|ードラ)|ジュゴン|ス(?:ターミー|トライク|ピアー|リー(?:パー|プ))|ズバット|ゼニガメ|タ(?:ッツー|マタマ)|ダグトリオ|ディグダ|ト(?:サキント|ランセル)|ド(?:ガース|ククラゲ|ード(?:リオ|ー))|ナ(?:ゾノクサ|ッシー)|ニ(?:ド(?:キング|クイン|ラン|リー[ナノ])|ャース|ョロ(?:ボン|[ゾモ]))|ハクリュー|バ(?:タフリー|リヤード)|パ(?:ウワウ|ラ(?:セクト|ス)|ルシェン)|ヒト(?:カゲ|デマン)|ビ(?:リリダマ|ードル)|ピ(?:カチュウ|クシー|ジョ(?:ット|ン)|ッピ)|フ(?:ァイヤー|シギ(?:ソウ|ダネ|バナ)|リーザー|ーディン)|ブー(?:スター|バー)|プ(?:クリン|テラ|リン)|ベ(?:トベ(?:ター|トン)|ロリンガ)|ペルシアン|ポ(?:ッポ|ニータ|リゴン)|マ(?:タドガス|ダツボミ|ルマイン|ンキー)|ミ(?:ニリュウ|ュウ(?:ツー)?)|メ(?:タモン|ノクラゲ)|モ(?:ルフォン|ンジャラ)|ヤド(?:ラン|ン)|ユンゲラー|ラ(?:イチュウ|ッ(?:キー|タ)|フレシア|プラス)|リザードン?|ルージュラ|レアコイル|ロコン|ワンリキー)` | |
var dai1list = []string{"フシギダネ", "Bulbasaur", "Bisasam", "フシギソウ", "Ivysaur", "Bisaknosp", "フシギバナ", "Venusaur", "Bisaflor", "ヒトカゲ", "Charmander", "Glumanda", "リザード", "Charmeleon", "Glutexo", "リザードン", "Charizard", "Glurak", "ゼニガメ", "Squirtle", "Schiggy", "カメール", "Wartortle", "Schillok", "カメックス", "Blastoise", "Turtok", "キャタピー", "Caterpie", "Raupy", "トランセル", "Metapod", "Safcon", "バタフリー", "Butterfree", "Smettbo", "ビードル", "Weedle", "Hornliu", "コクーン", "Kakuna", "Kokuna", "スピアー", "Beedrill", "Bibor", "ポッポ", "Pidgey", "Taubsi", "ピジョン", "Pidgeotto", "Tauboga", "ピジョット", "Pidgeot", "Tauboss", "コラッタ", "Rattata", "Rattfratz", "ラッタ", "Raticate", "Rattikarl", "オニスズメ", "Spearow", "Habitak", "オニドリル", "Fearow", "Ibitak", "アーボ", "Ekans", "Rettan", "アーボック", "Arbok", "Arbok", "ピカチュウ", "Pikachu", "Pikachu", "ライチュウ", "Raichu", "Raichu", "サンド", "Sandshrew", "Sandan", "サンドパン", "Sandslash", "Sandamer", "ニドラン", "Nidoran", "Nidoran", "ニドリーナ", "Nidorina", "Nidorina", "ニドクイン", "Nidoqueen", "Nidoqueen", "ニドリーノ", "Nidorino", "Nidorino", "ニドキング", "Nidoking", "Nidoking", "ピッピ", "Clefairy", "Piepi", "ピクシー", "Clefable", "Pixi", "ロコン", "Vulpix", "Vulpix", "キュウコン", "Ninetales", "Vulnona", "プリン", "Jigglypuff", "Pummeluff", "プクリン", "Wigglytuff", "Knuddeluff", "ズバット", "Zubat", "Zubat", "ゴルバット", "Golbat", "Golbat", "ナゾノクサ", "Oddish", "Myrapla", "クサイハナ", "Gloom", "Duflor", "ラフレシア", "Vileplume", "Giflor", "パラス", "Paras", "Paras", "パラセクト", "Parasect", "Parasek", "コンパン", "Venonat", "Bluzuk", "モルフォン", "Venomoth", "Omot", "ディグダ", "Diglett", "Digda", "ダグトリオ", "Dugtrio", "Digdri", "ニャース", "Meowth", "Mauzi", "ペルシアン", "Persian", "Snobilikat", "コダック", "Psyduck", "Enton", "ゴルダック", "Golduck", "Entoron", "マンキー", "Mankey", "Menki", "オコリザル", "Primeape", "Rasaff", "ガーディ", "Growlithe", "Fukano", "ウインディ", "Arcanine", "Arkani", "ニョロモ", "Poliwag", "Quapsel", "ニョロゾ", "Poliwhirl", "Quaputzi", "ニョロボン", "Poliwrath", "Quappo", "ケーシィ", "Abra", "Abra", "ユンゲラー", "Kadabra", "Kadabra", "フーディン", "Alakazam", "Simsala", "ワンリキー", "Machop", "Machollo", "ゴーリキー", "Machoke", "Maschock", "カイリキー", "Machamp", "Machomei", "マダツボミ", "Bellsprout", "Knofensa", "ウツドン", "Weepinbell", "Ultrigaria", "ウツボット", "Victreebel", "Sarzenia", "メノクラゲ", "Tentacool", "Tentacha", "ドククラゲ", "Tentacruel", "Tentoxa", "イシツブテ", "Geodude", "Kleinstein", "ゴローン", "Graveler", "Georok", "ゴローニャ", "Golem", "Geowaz", "ポニータ", "Ponyta", "Ponita", "ギャロップ", "Rapidash", "Gallopa", "ヤドン", "Slowpoke", "Flegmon", "ヤドラン", "Slowbro", "Lahmus", "コイル", "Magnemite", "Magnetilo", "レアコイル", "Magneton", "Magneton", "カモネギ", "Farfetch'd", "Porenta", "ドードー", "Doduo", "Dodu", "ドードリオ", "Dodrio", "Dodri", "パウワウ", "Seel", "Jurob", "ジュゴン", "Dewgong", "Jugong", "ベトベター", "Grimer", "Sleima", "ベトベトン", "Muk", "Sleimok", "シェルダー", "Shellder", "Muschas", "パルシェン", "Cloyster", "Austos", "ゴース", "Gastly", "Nebulak", "ゴースト", "Haunter", "Alpollo", "ゲンガー", "Gengar", "Gengar", "イワーク", "Onix", "Onix", "スリープ", "Drowzee", "Traumato", "スリーパー", "Hypno", "Hypno", "クラブ", "Krabby", "Krabby", "キングラー", "Kingler", "Kingler", "ビリリダマ", "Voltorb", "Voltobal", "マルマイン", "Electrode", "Lektrobal", "タマタマ", "Exeggcute", "Owei", "ナッシー", "Exeggutor", "Kokowei", "カラカラ", "Cubone", "Tragosso", "ガラガラ", "Marowak", "Knogga", "サワムラー", "Hitmonlee", "Kicklee", "エビワラー", "Hitmonchan", "Nockchan", "ベロリンガ", "Lickitung", "Schlurp", "ドガース", "Koffing", "Smogon", "マタドガス", "Weezing", "Smogmog", "サイホーン", "Rhyhorn", "Rihorn", "サイドン", "Rhydon", "Rizeros", "ラッキー", "Chansey", "Chaneira", "モンジャラ", "Tangela", "Tangela", "ガルーラ", "Kangaskhan", "Kangama", "タッツー", "Horsea", "Seeper", "シードラ", "Seadra", "Seemon", "トサキント", "Goldeen", "Goldini", "アズマオウ", "Seaking", "Golking", "ヒトデマン", "Staryu", "Sterndu", "スターミー", "Starmie", "Starmie", "バリヤード", "Mr. Mime", "Pantimos", "ストライク", "Scyther", "Sichlor", "ルージュラ", "Jynx", "Rossana", "エレブー", "Electabuzz", "Elektek", "ブーバー", "Magmar", "Magmar", "カイロス", "Pinsir", "Pinsir", "ケンタロス", "Tauros", "Tauros", "コイキング", "Magikarp", "Karpador", "ギャラドス", "Gyarados", "Garados", "ラプラス", "Lapras", "Lapras", "メタモン", "Ditto", "Ditto", "イーブイ", "Eevee", "Evoli", "シャワーズ", "Vaporeon", "Aquana", "サンダース", "Jolteon", "Blitza", "ブースター", "Flareon", "Flamara", "ポリゴン", "Porygon", "Porygon", "オムナイト", "Omanyte", "Amonitas", "オムスター", "Omastar", "Amoroso", "カブト", "Kabuto", "Kabuto", "カブトプス", "Kabutops", "Kabutops", "プテラ", "Aerodactyl", "Aerodactyl", "カビゴン", "Snorlax", "Relaxo", "フリーザー", "Articuno", "Arktos", "サンダー", "Zapdos", "Zapdos", "ファイヤー", "Moltres", "Lavados", "ミニリュウ", "Dratini", "Dratini", "ハクリュー", "Dragonair", "Dragonir", "カイリュー", "Dragonite", "Dragoran", "ミュウツー", "Mewtwo", "Mewtu", "ミュウ", "Mew", "Mew"} | |
func TestTrie(t *testing.T) { | |
// 引用: モンコレ MS-12 ヒトカゲ https://takaratomymall.jp/shop/g/g4904810141716/ | |
text := `モンコレに「ヒトカゲ」が登場! | |
モンコレは、ポケモンの姿を完全再現したハイクオリティなフィギュアのシリーズです。 | |
頭の上から、後ろ、横、足の裏まで、忠実に再現しているから | |
手に取れば「ヒトカゲ」の新たな秘密に気づくかも!` | |
expect := `モンコレに「*****」が登場! | |
モンコレは、ポケモンの姿を完全再現したハイクオリティなフィギュアのシリーズです。 | |
頭の上から、後ろ、横、足の裏まで、忠実に再現しているから | |
手に取れば「*****」の新たな秘密に気づくかも!` | |
trie := NewTrie(pokemon30list...).ReplaceAll(text, "*****") | |
reg := regexp.MustCompile(pokemon30re).ReplaceAllString(text, "*****") | |
strepl := newReplacer(pokemon30list, "*****").Replace(text) | |
if diff := cmp.Diff(expect, trie); diff != "" { | |
t.Errorf("missmatch (trie):\n\nGot: %s\n\nExpext: %s\n\nDiff: %s", trie, expect, diff) | |
} | |
if diff := cmp.Diff(expect, reg); diff != "" { | |
t.Errorf("missmatch (regexp) :\n\nGot: %s\n\nExpext: %s\n\nDiff: %s", reg, expect, diff) | |
} | |
if diff := cmp.Diff(expect, strepl); diff != "" { | |
t.Errorf("missmatch (strings.Replacer) :\n\nGot: %s\n\nExpext: %s\n\nDiff: %s", strepl, expect, diff) | |
} | |
} | |
func newReplacer(arr []string, replace string) *strings.Replacer { | |
args := make([]string, 2*len(arr)) | |
for i, v := range arr { | |
args[i*2] = v | |
args[i*2+1] = replace | |
} | |
return strings.NewReplacer(args...) | |
} | |
func runbenchReplace(b *testing.B, re string, list []string) { | |
b.Run("regexp", func(b *testing.B) { | |
reg := regexp.MustCompile(re) | |
b.ResetTimer() | |
for i := 0; i < b.N; i++ { | |
reg.ReplaceAllString(pokemonSiritori, "*****") | |
} | |
}) | |
b.Run("Replacer", func(b *testing.B) { | |
rep := newReplacer(list, "*****") | |
b.ResetTimer() | |
for i := 0; i < b.N; i++ { | |
rep.Replace(pokemonSiritori) | |
} | |
}) | |
b.Run("Trie", func(b *testing.B) { | |
trie := NewTrie(list...) | |
b.ResetTimer() | |
for i := 0; i < b.N; i++ { | |
trie.ReplaceAll(pokemonSiritori, "*****") | |
} | |
}) | |
} | |
func BenchmarkReplace_30(b *testing.B) { | |
runbenchReplace(b, pokemon30re, pokemon30list) | |
} | |
func BenchmarkReplace_Many(b *testing.B) { | |
runbenchReplace(b, dai1re, dai1list) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment