Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
batch lookup given a list of wnid (wordnet id) on stdin
#!/usr/bin/python
import argparse, fileinput, re
from nltk.corpus import wordnet as wn
parser = argparse.ArgumentParser(
description='read from stdin one wnid per line, output the name and definition of each wnid by looking up in synset',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-t', '--separator', type=str,
default=',', help='field separator character')
parser.add_argument('-r', '--replacement', type=str,
default='', help='replacement string for the separator character occuring in definition')
args = parser.parse_args()
sep = args.separator
for line in fileinput.input():
x = re.search(r'^\s*n(\d+)', line)
if (x):
wnid = x.group(0)
pos = int(x.group(1))
w = wn._synset_from_pos_and_offset('n', pos)
lm = '/'.join([str(lemma.name()) for lemma in wn.synset(w.name()).lemmas()])
dfn = w.definition().replace(sep, args.replacement)
print(sep.join([wnid, lm, dfn]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment