Skip to content

Instantly share code, notes, and snippets.

@dchaplinsky
Created October 4, 2022 11:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dchaplinsky/fa6275ceca80a8c3a83320ffb3585ef5 to your computer and use it in GitHub Desktop.
Save dchaplinsky/fa6275ceca80a8c3a83320ffb3585ef5 to your computer and use it in GitHub Desktop.
A simple way to calculate how many leaves the synset has and what its level in the hypernym/hyponym tree
import wn
import csv
from collections import Counter, defaultdict
from tqdm.notebook import tqdm
wn.download("pwn:3.1")
pwn = wn.Wordnet("pwn:3.1")
def count_cardinality_and_levels(pos):
synset_cardinality = Counter()
synset_level = defaultdict(lambda: 1000000)
for synset in wn.taxonomy.leaves(pwn, pos=pos):
for path in synset.hypernym_paths():
path.reverse()
synset_cardinality.update([p.id for p in path])
for level, p in enumerate(path):
synset_level[p.id] = min(synset_level[p.id], level)
synset_level[synset.id] = min(synset_level[synset.id], len(path))
return synset_cardinality, synset_level
synset_cardinality, synset_level = count_cardinality_and_levels("n")
with open("pwn_synset_levels_and_cardinality.csv", "w") as fp_out:
w = csv.DictWriter(fp_out, fieldnames=["id", "lemmas", "gloss", "cardinality", "level"])
w.writeheader()
for synset_id, cardinality in tqdm(synset_cardinality.most_common()):
synset = wn.synset(synset_id)
w.writerow(
{
"id": synset_id,
"lemmas": "; ".join(synset.lemmas()),
"gloss": synset.definition(),
"cardinality": cardinality,
"level": synset_level[synset_id],
}
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment