Skip to content

Instantly share code, notes, and snippets.

@c4pt0r
Created August 29, 2012 02:51
Show Gist options
  • Save c4pt0r/3506340 to your computer and use it in GitHub Desktop.
Save c4pt0r/3506340 to your computer and use it in GitHub Desktop.
#encoding=utf-8
import os
import sys
import redis
def read_list(filename):
c = open(filename).read()
return [s.lstrip().rstrip() for s in c.split('\n')]
r = redis.Redis('nb391x.corp.youdao.com', 6379)
raw = r.keys('User.*.email')
emails = [i.replace('.email','').replace('User.','') for i in raw]
import csv
import time
import datetime
import json
fp = open('./out_%s.csv' % time.strftime('%Y%m%d'), 'w')
writer = csv.writer(fp)
for email in emails:
writer.writerow((email,))
fp.close()
# 本周过编辑行为的词条
recent = r.lrange('recent.list', 0, -1)
this_week = {}
before_words = {}
cnt =0
ccnt =0
lst = [json.loads(i) for i in recent]
special_page = {}
normal_page = {}
for i in lst:
i['date'] = datetime.datetime.strptime(i['date'].split('.')[0], '%Y-%m-%d %H:%M:%S')
if i['date'] > datetime.datetime.now() - datetime.timedelta(7):
cnt +=1
d = normal_page.get(i['user'], 0)
normal_page[i['user']] = d+1
if i['word'].startswith('%') or i['word'].startswith('@'):
ccnt += 1
d = special_page.get(i['user'], 0)
special_page[i['user']] = d + 1
tmp = this_week.get(i['word'], [])
tmp.append((i['date'], i['user']))
this_week[i['word']] = tmp
elif i['date'] > datetime.datetime.now() - datetime.timedelta(14) and i['date'] < datetime.datetime.now() - datetime.timedelta(7):
tmp = before_words.get(i['word'],[])
tmp.append((i['date'], i['user']))
before_words[i['word']] = tmp
print reduce(lambda x, y : x + y, [x[1] for x in normal_page.items() if x[0] in read_list('developer')])
print reduce(lambda x, y : x + y, [x[1] for x in normal_page.items() if x[0] in read_list('insider')])
print reduce(lambda x, y : x + y, [x[1] for x in normal_page.items() if x[0] not in read_list('insider') and x[0] not in read_list('developer')])
# 本周贡献人数
people = set()
for item in this_week.values():
for i in item:
people.add(i[1])
print u'本周贡献人数:',len(people)
print u'外部用户', [s for s in people if s not in read_list('insider') and s not in read_list('developer')]
print u'内部用户', [s for s in people if s in read_list('insider')]
print u'开发者', [s for s in people if s in read_list('developer')]
this_week_people = people
#本周之前贡献人数
people = set()
for item in before_words.values():
for i in item:
people.add(i[1])
before_people = people
print u'本周之前的贡献人数(累计)', len(people), people
print u'外部用户', [s for s in people if s not in read_list('insider') and s not in read_list('developer')]
print u'内部用户', [s for s in people if s in read_list('insider')]
print u'开发者', [s for s in people if s in read_list('developer')]
new_people = filter(lambda x: x not in before_people, this_week_people)
print u'本周新增贡献者', len(new_people), new_people
print u'外部用户', [s for s in new_people if s not in read_list('insider') and s not in read_list('developer')]
print u'内部用户', [s for s in new_people if s in read_list('insider')]
print u'开发者', [s for s in new_people if s in read_list('developer')]
lose_people = filter(lambda x: x not in this_week_people, before_people)
print u'本周流失贡献者', len(lose_people), lose_people
print u'外部用户', [s for s in lose_people if s not in read_list('insider') and s not in read_list('developer')]
print u'内部用户', [s for s in lose_people if s in read_list('insider')]
print u'开发者', [s for s in lose_people if s in read_list('developer')]
new_words = filter(lambda x : x not in before_words, this_week)
print u'本周新增的词条', len(new_words), '\n' #','.join(new_words)
print u'本周之前的原始词条', len(before_words), '\n'#, ','.join(before_words)
print u'累计所有词条', len(set([i['word'] for i in lst]))
print
print u'本周新增特殊页面', len(filter(lambda x:x.startswith('@') or x.startswith('%'), new_words))
print u'本周之前原始特殊页面', len(filter(lambda x:x.startswith('@') or x.startswith('%'), before_words))
print u'累计所有特殊页面', len(set([i['word'] for i in lst if i['word'].startswith('@') or i['word'].startswith('%')]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment