Skip to content

Instantly share code, notes, and snippets.

@sambaiz
Forked from mugenen/wn.py
Last active August 29, 2015 14:10
Show Gist options
  • Save sambaiz/a0508c9ed379b3218e30 to your computer and use it in GitHub Desktop.
Save sambaiz/a0508c9ed379b3218e30 to your computer and use it in GitHub Desktop.
A Python3 frontend of WordNet-Ja database file (sqlite3 format) which is available on http://nlpwww.nict.go.jp/wn-ja/
#!/usr/bin/env python
# encoding: utf-8
import sys
import sqlite3
from collections import namedtuple
conn = sqlite3.connect("wnjpn.db")
Word = namedtuple('Word', 'wordid lang lemma pron pos')
def getWords(lemma):
cur = conn.execute("select * from word where lemma=?", (lemma,))
return [Word(*row) for row in cur]
def getWord(wordid):
cur = conn.execute("select * from word where wordid=?", (wordid,))
return Word(*cur.fetchone())
Sense = namedtuple('Sense', 'synset wordid lang rank lexid freq src')
def getSenses(word):
cur = conn.execute("select * from sense where wordid=?", (word.wordid,))
return [Sense(*row) for row in cur]
def getSense(synset, lang='jpn'):
cur = conn.execute("select * from sense where synset=? and lang=?",
(synset,lang))
row = cur.fetchone()
return row and Sense(*row) or None
Synset = namedtuple('Synset', 'synset pos name src')
def getSynset(synset):
cur = conn.execute("select * from synset where synset=?", (synset,))
return Synset(*cur.fetchone())
SynLink = namedtuple('SynLink', 'synset1 synset2 link src')
def getSynLinks(sense, link):
cur = conn.execute("select * from synlink where synset1=? and link=?",
(sense.synset, link))
return [SynLink(*row) for row in cur]
def getSynLinksRecursive(senses, link, lang='jpn', _depth=0):
for sense in senses:
synLinks = getSynLinks(sense, link)
if synLinks:
print(''.join([' '*2*_depth,
getWord(sense.wordid).lemma,
' ',
getSynset(sense.synset).name]))
_senses = []
for synLink in synLinks:
sense = getSense(synLink.synset2, lang)
if sense:
_senses.append(sense)
getSynLinksRecursive(_senses, link, lang, _depth+1)
def getWordsFromSynset(synset, lang):
cur = conn.execute("select word.* from sense, word where synset=? and word.lang=? and sense.wordid = word.wordid;", (synset,lang))
return [Word(*row) for row in cur]
def getWordsFromSenses(sense, lang):
for s in sense:
print(getSynset(s.synset).name)
syns = getWordsFromSynset(s.synset, lang)
for sy in syns:
print(' ' + sy.lemma)
if __name__ == '__main__':
if len(sys.argv)>=3:
print("%s" % sys.argv[1].encode(sys.stdin.encoding).decode('utf-8'))
words = getWords(sys.argv[1].encode(sys.stdin.encoding).decode('utf-8'))
if words:
for w in words:
sense = getSenses(w)
link = len(sys.argv)>=3 and sys.argv[2] or 'hypo'
lang = len(sys.argv)==4 and sys.argv[3] or 'jpn'
if link == 'syns':
getWordsFromSenses(sense, lang)
else:
getSynLinksRecursive(sense, link, lang)
else:
sys.stderr.write("(nothing found)")
else:
print("""usage: wn.py word link [lang]
word
word to investigate
link
syns - Synonyms
hype - Hypernyms
inst - Instances
hypo - Hyponym
hasi - Has Instance
mero - Meronyms
mmem - Meronyms --- Member
msub - Meronyms --- Substance
mprt - Meronyms --- Part
holo - Holonyms
hmem - Holonyms --- Member
hsub - Holonyms --- Substance
hprt - Holonyms -- Part
attr - Attributes
sim - Similar to
entag - Entails
causg - Causes
dmncg - Domain --- Category
dmnug - Domain --- Usage
dmnrg - Domain --- Region
dmtcg - In Domain --- Category
dmtug - In Domain --- Usage
dmtrg - In Domain --- Region
antsg - Antonyms
lang (default: jpn)
jpn - Japanese
eng - English
""")
@sambaiz
Copy link
Author

sambaiz commented Nov 26, 2014

python3で動くようにした

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment