Skip to content

Instantly share code, notes, and snippets.

@zhuqling
Created March 18, 2013 05:44
Show Gist options
  • Save zhuqling/5185274 to your computer and use it in GitHub Desktop.
Save zhuqling/5185274 to your computer and use it in GitHub Desktop.
敏感关键字匹配在线产品查找
#!/usr/bin/env python3
import collections
# 敏感关键字匹配在线产品查找
title_width = 60
FIELD_ITEMID = 0
FIELD_SKU = 1
FIELD_TITLE = 2
Listing = collections.namedtuple('Listing', "ItemID SKU Title")
def parser_listing(line):
if line and len(line) > 0:
item_id = line[:12]
sku = line[13:150].strip()
title = line[150:].strip().lower()
return Listing(item_id, sku, title)
else:
return None
def matched(keyword, title):
if not lineOfKeyword or len(lineOfKeyword) == 0 or keyword not in title:
return False
for ext_keyword in [keyword+' ', keyword+'-', keyword+', ', keyword+'.', keyword+'/', keyword+';', \
' '+keyword, '-'+keyword, '.'+keyword, '/'+keyword, ';'+keyword]:
if ext_keyword in title:
return True
return False
FILENAME_KEYWORDS = 'C:/Users/zhuqling/Desktop/senstive_keywords.rpt'
FILENAME_LISTING = 'C:/Users/zhuqling/Desktop/listing.rpt'
keywords = []
# 添加关键字
for lineOfKeyword in open(FILENAME_KEYWORDS, encoding='utf8'):
if lineOfKeyword and len(lineOfKeyword) > 0:
keywords.append(lineOfKeyword.strip().lower())
print("关键字数目:{0}".format(len(keywords)))
# 循环所有Listing,判断是否关键字匹配
lists = []
for lineOfListing in open(FILENAME_LISTING, encoding='utf8'):
lists.append(parser_listing(lineOfListing))
print("在线产品数目:{0}".format(len(lists)))
print("匹配结果:")
match_count = 0
for list in lists:
title = list[FIELD_TITLE]
#print(title)
for lineOfKeyword in keywords:
#print(lineOfKeyword)
if matched(lineOfKeyword, title):
print("{0:<20} {1:>14} {2:<24} {3:<{tw}}".format(lineOfKeyword, list[FIELD_ITEMID], list[FIELD_SKU],
list[FIELD_TITLE] if len(list[FIELD_TITLE]) <=title_width else list[FIELD_TITLE][:title_width-3] + "..." ,
tw = title_width))
match_count += 1
print("匹配数目:{0}".format(match_count))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment