Created
January 2, 2020 14:25
-
-
Save syphoxy/36b2ba8cf33ad9e6cde5347a7bd56773 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/xml" | |
"fmt" | |
"log" | |
"os" | |
"sort" | |
"github.com/ikawaha/kagome/tokenizer" | |
) | |
const ( | |
rootDir = "/home/USERNAME/.local/share/Steam/steamapps/common/Sid Meier's Civilization VI" | |
jaStringFile = rootDir + "/steamassets/base/assets/text/vanilla_ja_jp.xml" | |
) | |
var enStringFiles = []string{ | |
rootDir + "/steamassets/base/assets/text/en_us/advisortext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/ancientrivalsscenario_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/australia_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/automation_narration_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/aztec_montezuma_configtext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/aztec_montezuma_gameplaytext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/aztec_montezuma_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/balancemaps_configtext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/benchmarktext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/building_huey_teocalli_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/buildings_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/citizennames_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/citynames_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civics_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilizations_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_buildings_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_citystates_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_civics_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_civilizations_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_concepts_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_districts_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_features_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_governments_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_greatpeople_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_improvements_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_leaders_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_promotions_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_religions_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_resources_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_search_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_technologies_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_units_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civilopedia_wonders_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civroyalescenario_frontendtext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civroyalescenario_ingametext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/civroyalescenario_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/coldwarscenario_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/combat_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/credits.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/difficulties_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacydeals_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacymodifiers_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacynotifications_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacypanel_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_common_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_declarefriendship_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_declarewar_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_defeat_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_delegation_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_denounce_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_embassy_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_firstmeet_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_greeting_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_kudosandwarnings_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_makealliance_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_makedeal_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_makedemand_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_makepeace_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_openborders_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/diplomacystatements_warning_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/districts_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/earthmaps_configtext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/espionage_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/expansion1_credits.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/expansion1_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/expansion2_credits.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/expansion2_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/featuresandterrains_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/frontend_playbycloud.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/frontendtext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/gamesummaries_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/gossip_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/grammar_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/greatpeople_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/greatworks_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/improvements_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/indonesia_khmer_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/ingame_playbycloud.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/ingametext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/leaders_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/macedonia_persia_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/modifiers_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/multiplayerfrontendtext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/my2ktext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/napoleonscenario_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/notifications_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/nubia_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/poland_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/projects_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/promotions_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/quests_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/quotes_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/religion_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/religiouscombatscenario_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/routes_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/technologies_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/types_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/ui_civilopedia_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/ui_endgame_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/ui_gamesummaries_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/ui_mods_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/ui_options_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/ui_technology_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/unit_abilities_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/unitnames_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/units_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/usersetupwarning_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/vikings_packagetext.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/wars_text.xml", | |
rootDir + "/steamassets/base/assets/text/en_us/worldbuilder_text.xml", | |
} | |
type enRowXML struct { | |
XMLName xml.Name `xml:"Row"` | |
Tag string `xml:"Tag,attr"` | |
Text string `xml:"Text"` | |
} | |
type enGameDataXML struct { | |
XMLName xml.Name `xml:"GameData"` | |
FrontEndText []enRowXML `xml:"FrontEndText>Row"` | |
BaseGameText []enRowXML `xml:"BaseGameText>Row"` | |
EnglishText []enRowXML `xml:"EnglishText>Row"` | |
} | |
type jaReplaceXML struct { | |
XMLName xml.Name `xml:"Replace"` | |
Tag string `xml:"Tag,attr"` | |
Text string `xml:"Text"` | |
} | |
type jaGameDataXML struct { | |
XMLName xml.Name `xml:"GameData"` | |
LocalizedText []jaReplaceXML `xml:"LocalizedText>Replace"` | |
} | |
type translation struct { | |
English string | |
Japanese string | |
} | |
func main() { | |
t := tokenizer.New() | |
freq := make(map[string]int64) | |
// load japanese string assets | |
// also generate frequency map | |
f, err := os.Open(jaStringFile) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer f.Close() | |
data := jaGameDataXML{} | |
if err := xml.NewDecoder(f).Decode(&data); err != nil { | |
log.Fatal(err) | |
} | |
translations := make(map[string]translation) | |
for _, i := range data.LocalizedText { | |
x := translations[i.Tag] | |
x.Japanese = i.Text | |
translations[i.Tag] = x | |
for _, j := range t.Analyze(i.Text, tokenizer.Search) { | |
if j.Class != tokenizer.KNOWN { | |
continue | |
} | |
f := j.Features() | |
if f[0] != "名詞" || f[1] != "一般" { | |
continue | |
} | |
freq[j.Surface] += 1 | |
} | |
} | |
// load english string assets | |
for _, fp := range enStringFiles { | |
f, err := os.Open(fp) | |
if err != nil { | |
log.Fatal(err) | |
} | |
data := enGameDataXML{} | |
if err := xml.NewDecoder(f).Decode(&data); err != nil { | |
log.Fatal(err) | |
} | |
for _, i := range data.FrontEndText { | |
x := translations[i.Tag] | |
x.English = i.Text | |
translations[i.Tag] = x | |
} | |
for _, i := range data.BaseGameText { | |
x := translations[i.Tag] | |
x.English = i.Text | |
translations[i.Tag] = x | |
} | |
for _, i := range data.EnglishText { | |
x := translations[i.Tag] | |
x.English = i.Text | |
translations[i.Tag] = x | |
} | |
} | |
words := make([]wordFreq, 0, len(freq)) | |
for k, v := range freq { | |
words = append(words, wordFreq{word: k, freq: v}) | |
} | |
sort.Slice(words, func(i, j int) bool { return words[i].freq > words[j].freq }) | |
for x, w := range words { | |
fmt.Println(x, w) | |
} | |
} | |
type wordFreq struct { | |
word string | |
freq int64 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment