Skip to content

Instantly share code, notes, and snippets.

@PeterDing
Created June 12, 2011 14:32
Show Gist options
  • Save PeterDing/1021610 to your computer and use it in GitHub Desktop.
Save PeterDing/1021610 to your computer and use it in GitHub Desktop.
rearranging the record that stardict saved to readable lines.
#!/usr/bin/env python
#########################################################
# I wrote the little script depended on following dictionaries:
# stardict-cced-2.4.2.tar.bz2, stardict-langdao-ec-gb-2.4.2.tar.bz2,
# stardict-ncce-ec-2.4.2.tar.bz2, stardict-xiangya-medical-2.4.2.tar.bz2
#
# If you want to use other dictionary you will add or remove parts of the code.
#
# Default is designed to remove the record of <Collins Cobuild English Dictionary>
# for Chinese users.
########################################################
import string
class cnvt():
def __init__(self, filename):
self.source = open(filename).read()
self.dict = {}
def select(self):
dict_num = len(self.source.split('<--- Collins Cobuild English Dictionary --->'))
i = 0
for a in range(dict_num):
# selecting Collins by sizes of one word defination
i = self.source.find('<--- Collins Cobuild English Dictionary --->', i)
if i != -1:
ii = self.source.find('<---', i + 4)
tmp = self.source[i:ii]
if ('\n\n' in tmp and tmp.split('\n\n')[-1][0].isdigit()) \
or ('\n\n' in tmp and '=>' in tmp) or ('\n\n' not in tmp):
self.source = self.source.replace(tmp, '')
i = ii
def select2(self):
dict_num = len(self.source.split('*'))
i = 0
for a in range(dict_num):
# selecting quota by sizes of one word defination
i = self.source.find('*', i)
if i != -1:
ii = self.source.find('\n', i + 1)
tmp = self.source[i:ii + 1]
self.source = self.source.replace(tmp, '')
i = ii
def make_dict(self):
ldict = self.source.split('\n\n')
if ldict[-1] == '':
del ldict[-1]
print 'There are %d words!' % len(ldict)
words_list = []
for line in ldict:
tmp = line.split('\n')
if tmp[-1] == '':
del tmp[-1]
head = tmp[0] # selecting word
del tmp[0]
words_list.append(head)
if '<---' not in line:
print 'Error! ---> %s' % head
dict_level2 = {}
dict_name = ''
for item in tmp:
if '<---' in item:
dict_name = item
dict_level2[item] = ''
else:
try:
if 'Collins Cobuild' in item:
dict_level2[dict_name] = dict_level2[dict_name] + ',' + item
else:
word_is_redundant = True
y = item.replace(' ', '')
y = y.replace('-', '')
for i in range(len(y) - 1):
a = ord(y[i])
if 48 <= a <= 122: pass
else:
word_is_redundant = False
break
if word_is_redundant: pass
else:
dict_level2[dict_name] = dict_level2[dict_name] + ',' + item
except KeyError:
print 'Error! ---> %s' % head
self.dict[head] = dict_level2
# comparing sizes of defination of each words, then recording definations
dict_file = open('out', 'w')
for word in words_list:
defines = self.dict[word]
lengths = {}
for dict_name in defines.keys():
lengths[len(defines[dict_name])] = dict_name
sizes = lengths.keys()
sizes.sort()
try:
record = defines[lengths[sizes[-1]]] # biggest defination
except IndexError:
print '<@_@>!!!' + ' --->\t' + word + ' ==> ' + str(defines)
record = ' <@_@>!!! '
dict_file.write(word + '\t\t' + '---- ' + record[1:] + '.' + '\n')
dict_file.write('\n\n\n\\\\ original words:\n')
for word in words_list:
# recording original words
dict_file.write(word + '\n')
dict_file.close()
if __name__ == '__main__':
import sys, os
argv = sys.argv
if len(argv) == 1:
try:
user_home = os.popen('printenv HOME').read()[:-1]
do = cnvt(user_home + '/dic.txt')
except IOError:
print "Warning! Indicating the right address of stardict record's file)"
else:
do = cnvt(sys.argv[1])
do.select2()
do.make_dict()
print 'Outputing file is at %s\n' % os.getcwd()
raw_input()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment