xingkaixin/parse.py

## parse.py
# -*- coding:utf-8 -*-

import jieba
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )


def trim(mystr):
        x=''
        for str in mystr:
                if str==' ':
                        continue
                else :
                        x=x+str
        return x

text = '''
	   Dear Kevin,

	   请提供一下科目的清单 223023 translation date 2013011
	   '''

default_mode = jieba.cut(text)
#ull_mode = jieba.cut(text,cut_all=True)
#search_mode = jieba.cut_for_search(text)

#print "精确模式:","/".join(default_mode)
#print "全模式:","/".join(full_mode)
#print "搜索引擎模式:","/".join(search_mode)
words = []
for n in default_mode:
	word = trim(n).replace('\n','').replace('\t','')
	if len(word) < 1:
		pass
	else:
		words.append(word)
print words
for word in words:
	print word.decode('utf-8')
	# -- coding:utf-8 --

	import jieba
	import sys
	reload(sys)
	sys.setdefaultencoding( "utf-8" )


	def trim(mystr):
	x=''
	for str in mystr:
	if str==' ':
	continue
	else :
	x=x+str
	return x

	text = '''
	Dear Kevin,

	请提供一下科目的清单 223023 translation date 2013011
	'''

	default_mode = jieba.cut(text)
	#ull_mode = jieba.cut(text,cut_all=True)
	#search_mode = jieba.cut_for_search(text)

	#print "精确模式:","/".join(default_mode)
	#print "全模式:","/".join(full_mode)
	#print "搜索引擎模式:","/".join(search_mode)
	words = []
	for n in default_mode:
	word = trim(n).replace('\n','').replace('\t','')
	if len(word) < 1:
	pass
	else:
	words.append(word)
	print words
	for word in words:
	print word.decode('utf-8')