#!/usr/bin/env python
# coding:utf-8

import os
import jieba

# Define where we put the  files
source_dictionary = "/opt/ht/source"
keywords_file = "/opt/ht/keywords-new.txt"
result_file = "/opt/ht/result.txt"

# Convert the Keywords into a list
keywords_list = []
f = file(keywords_file)

while True:
    line = f.readline()
    if len(line) == 0:
        break
    keywords_list.append(line)
# Deal With the source file
file_list = os.listdir(source_dictionary)

r_file = open(result_file, "w")

for files in file_list:
    s_article = open(files).read()
    s_sentence = s_article.split(u"。".encode("utf-8"))
    for sentences in s_sentence:
        words_list = list(jieba.cut(sentences,cut_all=False))
        result = list(set(words_list) & set(keywords_list))
        for words in result:
           r_file.write("%s;" % words)
        r_file.write("\n")
r_file.write("A New File Start")

r_file.close()