#!/usr/bin/env python # coding:utf-8 import os import jieba # Define where we put the files source_dictionary = "/opt/ht/source" keywords_file = "/opt/ht/keywords-new.txt" result_file = "/opt/ht/result.txt" # Convert the Keywords into a list keywords_list = [] f = file(keywords_file) while True: line = f.readline() if len(line) == 0: break keywords_list.append(line) # Deal With the source file file_list = os.listdir(source_dictionary) r_file = open(result_file, "w") for files in file_list: s_article = open(files).read() s_sentence = s_article.split(u"。".encode("utf-8")) for sentences in s_sentence: words_list = list(jieba.cut(sentences,cut_all=False)) result = list(set(words_list) & set(keywords_list)) for words in result: r_file.write("%s;" % words) r_file.write("\n") r_file.write("A New File Start") r_file.close()