Skip to content

Instantly share code, notes, and snippets.

@avelican
Created May 21, 2022 12:32
Show Gist options
  • Save avelican/58f1468df04c3ff542e03288bfd1e1c0 to your computer and use it in GitHub Desktop.
Save avelican/58f1468df04c3ff542e03288bfd1e1c0 to your computer and use it in GitHub Desktop.
import json
# word_frequency.txt contains one word per line, most common words first
with open("word_frequency.txt","r",encoding="utf-8") as f: words = f.read().splitlines()
wrds = set()
wrds_list = []
wrds_learn = []
for word in words:
for L in range(len(word)):
wrd = word[0:L+1] # hello -> h, he, hel, hell, hello
if wrd not in wrds:
wrds.add(wrd)
wrds_list.append(wrd)
wrds_learn.append(wrd + " -> " + word)
break
with open("wrds.txt", "w", encoding="utf-8") as f: f.write("\n".join(wrds_learn))
with open("wrds.json", "w", encoding="utf-8") as f: json.dump(wrds_learn, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment