Created
August 29, 2012 02:51
-
-
Save c4pt0r/3506340 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#encoding=utf-8 | |
import os | |
import sys | |
import redis | |
def read_list(filename): | |
c = open(filename).read() | |
return [s.lstrip().rstrip() for s in c.split('\n')] | |
r = redis.Redis('nb391x.corp.youdao.com', 6379) | |
raw = r.keys('User.*.email') | |
emails = [i.replace('.email','').replace('User.','') for i in raw] | |
import csv | |
import time | |
import datetime | |
import json | |
fp = open('./out_%s.csv' % time.strftime('%Y%m%d'), 'w') | |
writer = csv.writer(fp) | |
for email in emails: | |
writer.writerow((email,)) | |
fp.close() | |
# 本周过编辑行为的词条 | |
recent = r.lrange('recent.list', 0, -1) | |
this_week = {} | |
before_words = {} | |
cnt =0 | |
ccnt =0 | |
lst = [json.loads(i) for i in recent] | |
special_page = {} | |
normal_page = {} | |
for i in lst: | |
i['date'] = datetime.datetime.strptime(i['date'].split('.')[0], '%Y-%m-%d %H:%M:%S') | |
if i['date'] > datetime.datetime.now() - datetime.timedelta(7): | |
cnt +=1 | |
d = normal_page.get(i['user'], 0) | |
normal_page[i['user']] = d+1 | |
if i['word'].startswith('%') or i['word'].startswith('@'): | |
ccnt += 1 | |
d = special_page.get(i['user'], 0) | |
special_page[i['user']] = d + 1 | |
tmp = this_week.get(i['word'], []) | |
tmp.append((i['date'], i['user'])) | |
this_week[i['word']] = tmp | |
elif i['date'] > datetime.datetime.now() - datetime.timedelta(14) and i['date'] < datetime.datetime.now() - datetime.timedelta(7): | |
tmp = before_words.get(i['word'],[]) | |
tmp.append((i['date'], i['user'])) | |
before_words[i['word']] = tmp | |
print reduce(lambda x, y : x + y, [x[1] for x in normal_page.items() if x[0] in read_list('developer')]) | |
print reduce(lambda x, y : x + y, [x[1] for x in normal_page.items() if x[0] in read_list('insider')]) | |
print reduce(lambda x, y : x + y, [x[1] for x in normal_page.items() if x[0] not in read_list('insider') and x[0] not in read_list('developer')]) | |
# 本周贡献人数 | |
people = set() | |
for item in this_week.values(): | |
for i in item: | |
people.add(i[1]) | |
print u'本周贡献人数:',len(people) | |
print u'外部用户', [s for s in people if s not in read_list('insider') and s not in read_list('developer')] | |
print u'内部用户', [s for s in people if s in read_list('insider')] | |
print u'开发者', [s for s in people if s in read_list('developer')] | |
this_week_people = people | |
#本周之前贡献人数 | |
people = set() | |
for item in before_words.values(): | |
for i in item: | |
people.add(i[1]) | |
before_people = people | |
print u'本周之前的贡献人数(累计)', len(people), people | |
print u'外部用户', [s for s in people if s not in read_list('insider') and s not in read_list('developer')] | |
print u'内部用户', [s for s in people if s in read_list('insider')] | |
print u'开发者', [s for s in people if s in read_list('developer')] | |
new_people = filter(lambda x: x not in before_people, this_week_people) | |
print u'本周新增贡献者', len(new_people), new_people | |
print u'外部用户', [s for s in new_people if s not in read_list('insider') and s not in read_list('developer')] | |
print u'内部用户', [s for s in new_people if s in read_list('insider')] | |
print u'开发者', [s for s in new_people if s in read_list('developer')] | |
lose_people = filter(lambda x: x not in this_week_people, before_people) | |
print u'本周流失贡献者', len(lose_people), lose_people | |
print u'外部用户', [s for s in lose_people if s not in read_list('insider') and s not in read_list('developer')] | |
print u'内部用户', [s for s in lose_people if s in read_list('insider')] | |
print u'开发者', [s for s in lose_people if s in read_list('developer')] | |
new_words = filter(lambda x : x not in before_words, this_week) | |
print u'本周新增的词条', len(new_words), '\n' #','.join(new_words) | |
print u'本周之前的原始词条', len(before_words), '\n'#, ','.join(before_words) | |
print u'累计所有词条', len(set([i['word'] for i in lst])) | |
print u'本周新增特殊页面', len(filter(lambda x:x.startswith('@') or x.startswith('%'), new_words)) | |
print u'本周之前原始特殊页面', len(filter(lambda x:x.startswith('@') or x.startswith('%'), before_words)) | |
print u'累计所有特殊页面', len(set([i['word'] for i in lst if i['word'].startswith('@') or i['word'].startswith('%')])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment