Skip to content

Instantly share code, notes, and snippets.

@egorsmkv
Created April 28, 2021 14:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save egorsmkv/43d0b8cc9018b0e47c07c3bbae3194ba to your computer and use it in GitHub Desktop.
Save egorsmkv/43d0b8cc9018b0e47c07c3bbae3194ba to your computer and use it in GitHub Desktop.
mphdict words forms generator in python
"""
Generator of words forms for LinguisticAndInformationSystems/mphdict
Source code: https://github.com/LinguisticAndInformationSystems/mphdict/blob/master/src/mphdict/mphDb.cs#L214
License: https://github.com/LinguisticAndInformationSystems/mphdict/blob/master/LICENSE.txt
Copyright: uSofTrod
Output is like the following:
1879380;бе"вкіт;0;2886;8;бе"вкіт;1
1879380;бе"вкіт;0;2886;8;бе"вкоту;2
1879380;бе"вкіт;0;2886;8;бе"вкотові;3
1879380;бе"вкіт;0;2886;8;бе"вкоту;3
1879380;бе"вкіт;0;2886;8;бе"вкіт;4
1879380;бе"вкіт;0;2886;8;бе"вкотом;5
1879380;бе"вкіт;0;2886;8;бе"вкоті;6
1879380;бе"вкіт;0;2886;8;бе"вкоте*;7
"""
import sqlite3
DB_PATH = '/home/username/mph_ua.db'
if __name__ == '__main__':
con = sqlite3.connect(DB_PATH)
cur = con.cursor()
c = 0
for row in cur.execute(
'SELECT reestr, type, nom_old, field2, part FROM nom WHERE isdel = FALSE ORDER BY digit, field2, reestr'):
nom_word = row[0]
nom_type = row[1]
nom_old = row[2]
nom_field2 = row[3]
nom_part = row[4]
cur2 = con.cursor()
cur2.execute(f'SELECT indent FROM indents WHERE type = {nom_type}')
_type = cur2.fetchone()
cur3 = con.cursor()
flexes = []
for flex in cur3.execute(f'SELECT flex, field2 FROM flexes WHERE type = {nom_type} ORDER BY field2, digit'):
flexes.append(flex)
w = nom_word
# w = nom_word.replace('"', '')
wt = []
if nom_type != 0:
w_base = w[0:len(w) - _type[0]]
for flex in flexes:
flex_str = ''
if flex[0]:
flex_str = flex[0]
wt.append(nom_old)
wt.append(';')
wt.append(w)
wt.append(';')
wt.append(nom_field2)
wt.append(';')
wt.append(nom_type)
wt.append(';')
wt.append(nom_part)
wt.append(';')
wt.append(w_base + flex_str)
wt.append(';')
wt.append(flex[1])
wt.append('\n')
else:
wt.append(nom_old)
wt.append(';')
wt.append(w)
wt.append(';')
wt.append(nom_field2)
wt.append(';')
wt.append(nom_type)
wt.append(';')
wt.append(nom_part)
wt.append(';')
wt.append(w)
wt.append(';')
wt.append('0')
wt.append('\n')
wt_strings = [str(i) for i in wt]
print(''.join(wt_strings))
print()
# print(_type)
# print(flexes)
# print(w)
c += 1
print('Words=', c)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment