Skip to content

Instantly share code, notes, and snippets.

@mugenen
Forked from yanbe/wn.py
Created March 4, 2012 11:48
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mugenen/1972633 to your computer and use it in GitHub Desktop.
Save mugenen/1972633 to your computer and use it in GitHub Desktop.
A frontend of WordNet-Ja database file (sqlite3 format) which is available on http://nlpwww.nict.go.jp/wn-ja/
#!/usr/bin/env python
# encoding: utf-8
import sys
import sqlite3
from collections import namedtuple
conn = sqlite3.connect("wnjpn.db")
Word = namedtuple('Word', 'wordid lang lemma pron pos')
def getWords(lemma):
cur = conn.execute("select * from word where lemma=?", (lemma,))
return [Word(*row) for row in cur]
def getWord(wordid):
cur = conn.execute("select * from word where wordid=?", (wordid,))
return Word(*cur.fetchone())
Sense = namedtuple('Sense', 'synset wordid lang rank lexid freq src')
def getSenses(word):
cur = conn.execute("select * from sense where wordid=?", (word.wordid,))
return [Sense(*row) for row in cur]
def getSense(synset, lang='jpn'):
cur = conn.execute("select * from sense where synset=? and lang=?",
(synset,lang))
row = cur.fetchone()
return row and Sense(*row) or None
Synset = namedtuple('Synset', 'synset pos name src')
def getSynset(synset):
cur = conn.execute("select * from synset where synset=?", (synset,))
return Synset(*cur.fetchone())
SynLink = namedtuple('SynLink', 'synset1 synset2 link src')
def getSynLinks(sense, link):
cur = conn.execute("select * from synlink where synset1=? and link=?",
(sense.synset, link))
return [SynLink(*row) for row in cur]
def getSynLinksRecursive(senses, link, lang='jpn', _depth=0):
for sense in senses:
synLinks = getSynLinks(sense, link)
if synLinks:
print ''.join([' '*2*_depth,
getWord(sense.wordid).lemma,
' ',
getSynset(sense.synset).name])
_senses = []
for synLink in synLinks:
sense = getSense(synLink.synset2, lang)
if sense:
_senses.append(sense)
getSynLinksRecursive(_senses, link, lang, _depth+1)
def getWordsFromSynset(synset, lang):
cur = conn.execute("select word.* from sense, word where synset=? and word.lang=? and sense.wordid = word.wordid;", (synset,lang))
return [Word(*row) for row in cur]
def getWordsFromSenses(sense, lang):
for s in sense:
print getSynset(s.synset).name
syns = getWordsFromSynset(s.synset, lang)
for sy in syns:
print ' ' + sy.lemma
if __name__ == '__main__':
if len(sys.argv)>=3:
words = getWords(sys.argv[1].decode(sys.stdin.encoding))
if words:
for w in words:
sense = getSenses(w)
link = len(sys.argv)>=3 and sys.argv[2] or 'hypo'
lang = len(sys.argv)==4 and sys.argv[3] or 'jpn'
if link == 'syns':
getWordsFromSenses(sense, lang)
else:
getSynLinksRecursive(sense, link, lang)
else:
print >>sys.stderr, "(nothing found)"
else:
print """usage: wn.py word link [lang]
word
word to investigate
link
syns - Synonyms
hype - Hypernyms
inst - Instances
hypo - Hyponym
hasi - Has Instance
mero - Meronyms
mmem - Meronyms --- Member
msub - Meronyms --- Substance
mprt - Meronyms --- Part
holo - Holonyms
hmem - Holonyms --- Member
hsub - Holonyms --- Substance
hprt - Holonyms -- Part
attr - Attributes
sim - Similar to
entag - Entails
causg - Causes
dmncg - Domain --- Category
dmnug - Domain --- Usage
dmnrg - Domain --- Region
dmtcg - In Domain --- Category
dmtug - In Domain --- Usage
dmtrg - In Domain --- Region
antsg - Antonyms
lang (default: jpn)
jpn - Japanese
eng - English
"""
@mugenen
Copy link
Author

mugenen commented Mar 4, 2012

以下の点を改変.
ファイル名が変わっていたので対応.
"wnjpn-0.9.db"→"wnjpn.db"
Windowsなどの非UTF-8環境だと動かない
decode('utf-8')からdecode(sys.stdin.encoding)に変更
同表記語が複数あった場合両方表示
dog(名詞),dog(動詞)
synsのリンクがデータベース上に見つからなかったので,link=synsの場合には引数の単語と同じsynsetに含まれる単語をすべて表示

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment