PeterDing/trans.py

## trans.py
#!/usr/bin/env python

#########################################################
# I wrote the little script depended on following dictionaries:
# stardict-cced-2.4.2.tar.bz2, stardict-langdao-ec-gb-2.4.2.tar.bz2,
# stardict-ncce-ec-2.4.2.tar.bz2, stardict-xiangya-medical-2.4.2.tar.bz2
#
# If you want to use other dictionary you will add or remove parts of the code.
#
# Default is designed to remove the record of <Collins Cobuild English Dictionary>
# for Chinese users.
########################################################

import string

class cnvt():

    def __init__(self, filename):
        self.source = open(filename).read()
        self.dict   = {}

    def select(self):
        dict_num = len(self.source.split('<--- Collins Cobuild English Dictionary --->'))
        i = 0
        for a in range(dict_num):
         # selecting Collins by sizes of one word defination

            i = self.source.find('<--- Collins Cobuild English Dictionary --->', i)
            if i != -1:
                ii = self.source.find('<---', i + 4)
                tmp = self.source[i:ii]
                if ('\n\n' in tmp and tmp.split('\n\n')[-1][0].isdigit()) \
                or ('\n\n' in tmp and '=>' in tmp) or ('\n\n' not in tmp):
                    self.source = self.source.replace(tmp, '')
                i = ii

    def select2(self):
        dict_num = len(self.source.split('*'))
        i = 0
        for a in range(dict_num):
         # selecting quota by sizes of one word defination

            i = self.source.find('*', i)
            if i != -1:
                ii = self.source.find('\n', i + 1)
                tmp = self.source[i:ii + 1]
                self.source = self.source.replace(tmp, '')
                i = ii

    def make_dict(self):
        ldict = self.source.split('\n\n')
        if ldict[-1] == '':
            del ldict[-1]

        print 'There are %d words!' % len(ldict)

        words_list = []
        for line in ldict:
            tmp = line.split('\n')
            if tmp[-1] == '':
                del tmp[-1]

            head = tmp[0]           # selecting word
            del tmp[0]
            words_list.append(head)

            if '<---' not in line:
                print 'Error! ---> %s' % head

            dict_level2 = {}

            dict_name = ''
            for item in tmp:
                if '<---' in item:
                    dict_name = item
                    dict_level2[item] = ''

                else:
                    try:
                        if 'Collins Cobuild' in item:
                            dict_level2[dict_name] = dict_level2[dict_name] + ',' + item
                        else:
                            word_is_redundant = True
                            y = item.replace(' ', '')
                            y = y.replace('-', '')
                            for i in range(len(y) - 1):
                                a = ord(y[i])
                                if 48 <= a <= 122: pass
                                else:
                                     word_is_redundant = False
                                     break

                            if word_is_redundant: pass
                            else:
                                dict_level2[dict_name] = dict_level2[dict_name] + ',' + item

                    except KeyError:
                        print 'Error! ---> %s' % head

            self.dict[head] = dict_level2

        # comparing sizes of defination of each words, then recording definations
        dict_file = open('out', 'w')
        for word in words_list:
            defines = self.dict[word]
            lengths = {}
            for dict_name in defines.keys():
                lengths[len(defines[dict_name])] = dict_name

            sizes = lengths.keys()
            sizes.sort()

            try:
                record = defines[lengths[sizes[-1]]]   # biggest defination
            except IndexError:
                print '<@_@>!!!' + ' --->\t' + word + '  ==> ' + str(defines)
                record = ' <@_@>!!! '

            dict_file.write(word + '\t\t' + '---- ' + record[1:] + '.' + '\n')

        dict_file.write('\n\n\n\\\\ original words:\n')
        for word in words_list:
        # recording original words
            dict_file.write(word + '\n')

        dict_file.close()


if __name__ == '__main__':
    import sys, os
    argv = sys.argv
    if len(argv) == 1:
        try:
            user_home  = os.popen('printenv HOME').read()[:-1]
            do = cnvt(user_home + '/dic.txt')
        except IOError:
            print "Warning! Indicating the right address of stardict record's file)"
    else:
        do = cnvt(sys.argv[1])

    do.select2()
    do.make_dict()

    print 'Outputing file is at %s\n' % os.getcwd()

    raw_input()
	#!/usr/bin/env python

	#########################################################
	# I wrote the little script depended on following dictionaries:
	# stardict-cced-2.4.2.tar.bz2, stardict-langdao-ec-gb-2.4.2.tar.bz2,
	# stardict-ncce-ec-2.4.2.tar.bz2, stardict-xiangya-medical-2.4.2.tar.bz2
	#
	# If you want to use other dictionary you will add or remove parts of the code.
	#
	# Default is designed to remove the record of <Collins Cobuild English Dictionary>
	# for Chinese users.
	########################################################

	import string

	class cnvt():

	def __init__(self, filename):
	self.source = open(filename).read()
	self.dict = {}

	def select(self):
	dict_num = len(self.source.split('<--- Collins Cobuild English Dictionary --->'))
	i = 0
	for a in range(dict_num):
	# selecting Collins by sizes of one word defination

	i = self.source.find('<--- Collins Cobuild English Dictionary --->', i)
	if i != -1:
	ii = self.source.find('<---', i + 4)
	tmp = self.source[i:ii]
	if ('\n\n' in tmp and tmp.split('\n\n')[-1][0].isdigit()) \
	or ('\n\n' in tmp and '=>' in tmp) or ('\n\n' not in tmp):
	self.source = self.source.replace(tmp, '')
	i = ii

	def select2(self):
	dict_num = len(self.source.split('*'))
	i = 0
	for a in range(dict_num):
	# selecting quota by sizes of one word defination

	i = self.source.find('*', i)
	if i != -1:
	ii = self.source.find('\n', i + 1)
	tmp = self.source[i:ii + 1]
	self.source = self.source.replace(tmp, '')
	i = ii

	def make_dict(self):
	ldict = self.source.split('\n\n')
	if ldict[-1] == '':
	del ldict[-1]

	print 'There are %d words!' % len(ldict)

	words_list = []
	for line in ldict:
	tmp = line.split('\n')
	if tmp[-1] == '':
	del tmp[-1]

	head = tmp[0] # selecting word
	del tmp[0]
	words_list.append(head)

	if '<---' not in line:
	print 'Error! ---> %s' % head

	dict_level2 = {}

	dict_name = ''
	for item in tmp:
	if '<---' in item:
	dict_name = item
	dict_level2[item] = ''

	else:
	try:
	if 'Collins Cobuild' in item:
	dict_level2[dict_name] = dict_level2[dict_name] + ',' + item
	else:
	word_is_redundant = True
	y = item.replace(' ', '')
	y = y.replace('-', '')
	for i in range(len(y) - 1):
	a = ord(y[i])
	if 48 <= a <= 122: pass
	else:
	word_is_redundant = False
	break

	if word_is_redundant: pass
	else:
	dict_level2[dict_name] = dict_level2[dict_name] + ',' + item

	except KeyError:
	print 'Error! ---> %s' % head

	self.dict[head] = dict_level2

	# comparing sizes of defination of each words, then recording definations
	dict_file = open('out', 'w')
	for word in words_list:
	defines = self.dict[word]
	lengths = {}
	for dict_name in defines.keys():
	lengths[len(defines[dict_name])] = dict_name

	sizes = lengths.keys()
	sizes.sort()

	try:
	record = defines[lengths[sizes[-1]]] # biggest defination
	except IndexError:
	print '<@_@>!!!' + ' --->\t' + word + ' ==> ' + str(defines)
	record = ' <@_@>!!! '

	dict_file.write(word + '\t\t' + '---- ' + record[1:] + '.' + '\n')

	dict_file.write('\n\n\n\\\\ original words:\n')
	for word in words_list:
	# recording original words
	dict_file.write(word + '\n')

	dict_file.close()


	if __name__ == '__main__':
	import sys, os
	argv = sys.argv
	if len(argv) == 1:
	try:
	user_home = os.popen('printenv HOME').read()[:-1]
	do = cnvt(user_home + '/dic.txt')
	except IOError:
	print "Warning! Indicating the right address of stardict record's file)"
	else:
	do = cnvt(sys.argv[1])

	do.select2()
	do.make_dict()

	print 'Outputing file is at %s\n' % os.getcwd()

	raw_input()