Created
November 9, 2015 02:49
-
-
Save diggzhang/93d9fc6a598bf75bca23 to your computer and use it in GitHub Desktop.
数据周报 code review 之后
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# _*_ coding: utf8 _*_ | |
from pymongo import MongoClient | |
from bson.objectid import ObjectId | |
import datetime | |
# link to db | |
dbClient = MongoClient('mongodb://localhost:27017') | |
db = dbClient['yangcong-prod25'] | |
# open collections | |
points = db['points'] | |
users = db['users'] | |
rooms = db['rooms'] | |
# configure daterange | |
lastWeekStartDate = datetime.datetime(2015,10,25) | |
lastWeekEndDate = lastWeekStartDate + datetime.timedelta(days = 7) | |
startDate = datetime.datetime(2015,11,01) | |
endDate = startDate + datetime.timedelta(days = 7) | |
print("###### Part.0 视频播放总数 ######") | |
def playVideoUvWithDateRange(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"eventKey": "openVideo", | |
"createdBy": { | |
"$gte": startDate, | |
"$lte": endDate | |
} | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
print("当周视频播放总数:") | |
print(len(playVideoUvWithDateRange(startDate, endDate))) | |
print("###### Part.0 视频播放总数 ######") | |
print("###### Part.1 总用户数 ######") | |
# 用户累计新增人数 首次在洋葱数学网站内产生事件的用户数量 | |
def calNewUserIn(): | |
pipeLine = [ | |
{"$match": { | |
"from": {"$ne": ['ios', 'android']} | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
newUsersList = calNewUserIn() | |
newUsersArray = [] | |
for x in newUsersList: | |
newUsersArray.append(x['_id']) | |
# print("PC 累计活动用户数:") | |
# print(len(newUsersArray)) | |
def calRegUsers(): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.from": "signup" | |
}}, | |
{"$group": { | |
"_id": "$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
regUsersList = calRegUsers() | |
regUsersArray = [] | |
for x in regUsersList: | |
regUsersArray.append(x['_id']) | |
# print("PC 累计注册用户数:") | |
# print(len(regUsersArray)) | |
regActUsers = [] | |
regActUsers = list(set.intersection(set(newUsersArray), set(regUsersArray))) | |
print("PC端 累计活跃用户数:") | |
print(len(regActUsers)) | |
print("PC端 累计活跃用户数算法 ==> 取 PC端曾经产生过埋点信息的用户 与 PC端注册用户 的 交集") | |
# iOS共计注册 | |
# db.users.find({"usefulData.from":"ios"}).count() | |
def calIosReg(): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.from": "ios" | |
}}, | |
{"$project": { | |
"_id":"$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
print("iOS注册用户:") | |
iosRegUserNum = len(calIosReg()) | |
print(iosRegUserNum) | |
# Android共计注册 | |
# db.users.find({"usefulData.from":"android"}).count() | |
def calAndroidReg(): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.from": "android" | |
}}, | |
{"$project": { | |
"_id": "$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
print("Android注册用户:") | |
androidUserNum = len(calAndroidReg()) | |
print(androidUserNum) | |
print("移动端注册用户总数") | |
print(iosRegUserNum + androidUserNum) | |
print("移动端未注册用户") | |
print("算法 ==> 友盟总数 减去 移动端注册用户总数") | |
print("") | |
print("###### Part.1 总用户数 ######") | |
print("") | |
print("###### Part.2 新增用户数 ######") | |
def calAndroidRegThisWeek(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.registDate": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"usefulData.from": "android" | |
}}, | |
{"$project": { | |
"_id": "$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
print("Android本周注册用户:") | |
numOfAndroidRegThisWeek = len(calAndroidRegThisWeek(startDate, endDate)) | |
print(numOfAndroidRegThisWeek) | |
def calIosRegThisWeek(startDate, endDate): | |
pipeLine = [ | |
{"$match" : { | |
"usefulData.registDate": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"usefulData.from": "ios" | |
}}, | |
{"$project": { | |
"_id": "$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
print("iOS本周注册用户:") | |
numOfIosRegThisWeek = len(calIosRegThisWeek(startDate, endDate)) | |
print(numOfIosRegThisWeek) | |
print("移动端本周注册") | |
print(numOfIosRegThisWeek + numOfAndroidRegThisWeek) | |
print("移动端未注册") | |
print("算法 ===> 友盟本周数量 - 移动端本周注册") | |
print("") | |
# TODO: QQ from | |
def calQqPlatformUsers(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.registDate": { | |
"$gte": startDate, | |
"$lte": endDate | |
}, | |
"usefulData.q": "qqPlatform" | |
}}, | |
{"$group": { | |
"_id": "$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
print('QQ平台用户:') | |
print(len(calQqPlatformUsers(startDate, endDate))) | |
def calOtherPlatformUsers(startDate,endDate): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.registDate": { | |
"$gte": startDate, | |
"$lte": endDate | |
}, | |
"usefulData.q": { | |
"$exists": True, | |
"$ne":"qqPlatform" | |
} | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
print('其它平台用户:') | |
print(len(calOtherPlatformUsers(startDate, endDate))) | |
# 用ObjectId拿出班级创建日期 | |
# 提取出班级里的用户 | |
# 选出激活的用户 | |
# 算出本周新来的教师用户 | |
def calTeachersId(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.registDate": { | |
"$gte": startDate, | |
"$lte": endDate | |
}, | |
"role": "teacher", | |
"rooms": {"$exists": True, "$not": {"$size": 0}} | |
}}, | |
{"$project": { | |
"_id":"$_id", | |
"rooms": "$rooms" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
teacherIdAndRoomId = calTeachersId(startDate, endDate) | |
# 找出新来的教师新建的班级 | |
roomsId = [] | |
for room in teacherIdAndRoomId: | |
roomsId.extend(room['rooms']) | |
# print("共创建教室:") | |
# print(len(roomsId)) | |
#let all rooms id into room collections get all users | |
def allStuInRooms(roomsId): | |
pipeLine = [ | |
{"$match": { | |
"_id": {"$in": roomsId}, | |
"users": {"$exists": True, "$not": {"$size": 0}} | |
}}, | |
{"$project": { | |
"_id":"$users" | |
}} | |
] | |
return list(rooms.aggregate(pipeLine)) | |
allStudents = allStuInRooms(roomsId) | |
# store all studentd id | |
flatAllstudents = [] | |
for student in allStudents: | |
flatAllstudents.extend(student['_id']) | |
print("本周教师创建批量用户数:") | |
teacherCreateStudentsNum = len(flatAllstudents) | |
print(teacherCreateStudentsNum) | |
# check flatAllstudents isin points | |
def stuInRoomAct(startDate, flatAllstudents): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate | |
}, | |
"user": {"$in": flatAllstudents} | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
print("其中激活学生:") | |
teacherCreateStudentsNumActive = len(stuInRoomAct(startDate, flatAllstudents)) | |
print(teacherCreateStudentsNumActive) | |
print("###### Part.2 新增用户数 ######") | |
print("") | |
print("###### Part.3 周活跃 ######") | |
def calPcWork(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"from": "pc" | |
}}, | |
{ | |
"$group": { | |
"_id": "$user" | |
} | |
} | |
] | |
return list(points.aggregate(pipeLine)) | |
pcActUsers = calPcWork(startDate, endDate) | |
pcActUsersArray = [] | |
for x in pcActUsers: | |
pcActUsersArray.append(x['_id']) | |
# print("PC本周活动用户:") | |
# print(len(pcActUsersArray)) | |
def isUsers(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.registDate": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"usefulData.from": "signup" | |
}}, | |
{"$group": { | |
"_id": "$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
def isAllUsers(): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.from": "signup" | |
}}, | |
{"$group": { | |
"_id": "$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
regUsers = isAllUsers() | |
regUsersArray = [] | |
for x in regUsers: | |
regUsersArray.append(x['_id']) | |
# print("PC本周注册用户") | |
# print(len(regUsersArray)) | |
pcOutArray = [] | |
pcOutArray = list(set.intersection(set(regUsersArray), set(pcActUsersArray))) | |
print("PC周活跃统计") | |
print(len(pcOutArray)) | |
def calIosWork(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lte": endDate | |
}, | |
"from": "ios", | |
}}, | |
{ | |
"$group": { | |
"_id": "$user" | |
} | |
} | |
] | |
return list(points.aggregate(pipeLine)) | |
def calAndroidWork(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lte": endDate | |
}, | |
"from": "android" | |
}}, | |
{ | |
"$group": { | |
"_id": "$user" | |
} | |
} | |
] | |
return list(points.aggregate(pipeLine)) | |
mobileOutList = [] | |
mobileOutList.extend(calIosWork(startDate, endDate)); | |
mobileOutList.extend(calAndroidWork(startDate, endDate)); | |
# print(len(mobileOutList)) | |
mobileOutArray = [] | |
for x in mobileOutList: | |
mobileOutArray.append(x['_id']) | |
print("Mobile 周活跃总计") | |
print(len(mobileOutArray)) | |
bothOutArray = [] | |
bothOutArray = list(set.intersection(set(pcOutArray), set(mobileOutArray))) | |
print("两端同活跃统计") | |
print(len(bothOutArray)) | |
print("###### Part.3 周活跃 ######") | |
print("") | |
print("###### Part.4 首页漏斗######") | |
def enterHomeUserId(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"eventKey": "enterHome" | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
allEnterHomeUserId = enterHomeUserId(startDate, endDate) | |
# processing allEnterHomeUserId as a array | |
enterHomeUserIdArray = [] | |
for x in allEnterHomeUserId: | |
enterHomeUserIdArray.append(x['_id']) | |
print("进入首页:") | |
print(len(enterHomeUserIdArray)) | |
# clickSignup | |
def clickSignupUserId(startDate, endDate, userId): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"user": {"$in": userId}, | |
"eventKey": "clickSignup" | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
allClickSignupUserId = clickSignupUserId(startDate, endDate, enterHomeUserIdArray) | |
clickSignupUserIdArray = [] | |
for x in allClickSignupUserId: | |
clickSignupUserIdArray.append(x['_id']) | |
print("点击注册: ") | |
print(len(clickSignupUserIdArray)) | |
# enterSignup | |
def enterSignupUserId(startDate, endDate, userId): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"user": {"$in": userId}, | |
"eventKey": "enterSignup" | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
allEnterSignupUserId = enterSignupUserId(startDate, endDate, clickSignupUserIdArray) | |
enterSignupUserIdArray = [] | |
for x in allEnterSignupUserId: | |
enterSignupUserIdArray.append(x['_id']) | |
print("进入注册页: ") | |
print(len(enterSignupUserIdArray)) | |
# clickSubmitSignup | |
def clickSubmitSignup(startDate, endDate, userId): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"user": {"$in": userId}, | |
"eventKey": "clickSubmitSignup" | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
allClickSubmitSignupUserId = clickSubmitSignup(startDate, endDate, enterSignupUserIdArray) | |
clickSubmitSignupUserIdArray = [] | |
for x in allClickSubmitSignupUserId: | |
clickSubmitSignupUserIdArray.append(x['_id']) | |
print("点击提交注册: ") | |
print(len(clickSubmitSignupUserIdArray)) | |
# tempSignUpPost | |
def tempSignUpPost(startDate, endDate, userId): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"user": {"$in": userId}, | |
"eventKey": "tempSignUpPost" | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
alltempSignUpPostUserId = tempSignUpPost(startDate, endDate, clickSubmitSignupUserIdArray) | |
tempSignUpPostArray = [] | |
for x in alltempSignUpPostUserId: | |
tempSignUpPostArray.append(x['_id']) | |
print("tempSignUpPost:") | |
print(len(tempSignUpPostArray)) | |
# tempSignUpGetMe | |
def tempSignUpGetMe(startDate, endDate, userId): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"user": {"$in": userId}, | |
"eventKey": "tempSignUpGetMe" | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
alltempSignUpGetMeUserId = tempSignUpGetMe(startDate, endDate, tempSignUpPostArray) | |
tempSignUpGetMeArray = [] | |
for x in alltempSignUpGetMeUserId: | |
tempSignUpGetMeArray.append(x['_id']) | |
print("注册成功(tempSignUpGetMe): ") | |
print(len(tempSignUpGetMeArray)) | |
# openVideo | |
def openVideo(startDate, endDate, userId): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"user": {"$in": userId}, | |
"eventKey": "openVideo" | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
allopenVideoUserId = openVideo(startDate, endDate, tempSignUpGetMeArray) | |
openVideoArray = [] | |
for x in allopenVideoUserId: | |
openVideoArray.append(x['_id']) | |
print("打开视频: ") | |
print(len(openVideoArray)) | |
# finishVideo | |
def finishVideo(startDate, endDate, userId): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"user": {"$in": userId}, | |
"eventKey": "finishVideo" | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
allfinishVideoUserId = finishVideo(startDate, endDate, openVideoArray) | |
finishVideoArray = [] | |
for x in allfinishVideoUserId: | |
finishVideoArray.append(x['_id']) | |
print("关闭视频: ") | |
print(len(finishVideoArray)) | |
print("###### Part.4 首页漏斗######") | |
print("###### Part.5 周留存 ######") | |
# TODO: | |
def usersInLastWeek(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
# "from":"pc", | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
} | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
usersInLastWeekList = usersInLastWeek(lastWeekStartDate, lastWeekEndDate) | |
usersInLastWeekArray = [] | |
for x in usersInLastWeekList: | |
usersInLastWeekArray.append(x['_id']) | |
# Mobile行动 | |
def usersInIosAndAndroid(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"from": {"$in": ['ios', 'android']} | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
usersInIosAndAndroidList = usersInIosAndAndroid(lastWeekStartDate, lastWeekEndDate) | |
# user id | |
usersInIosAndAndroidArray = [] | |
for x in usersInIosAndAndroidList: | |
usersInIosAndAndroidArray.append(x['_id']) | |
# round 2 | |
# user or mobile users in user collection | |
def isUser(startDate, endDate): | |
startDate = startDate | |
endDate = endDate | |
pipeLine = [ | |
{"$match": { | |
# "usefulData.registDate": { | |
# "$gte": startDate, | |
# "$lt": endDate | |
# }, | |
"usefulData.from": {"$in": ["batch","signup"]} | |
}}, | |
{"$group": { | |
"_id": "$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
def isMobileUser(startDate, endDate): | |
pipeLine = [ | |
{"$match": { | |
"usefulData.registDate": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"usefulData.from": {"$in": ['ios', 'android']} | |
}}, | |
{"$group": { | |
"_id": "$_id" | |
}} | |
] | |
return list(users.aggregate(pipeLine)) | |
isUserList = isUser(lastWeekStartDate, lastWeekEndDate) | |
isMobileUserList = isMobileUser(lastWeekStartDate, lastWeekEndDate) | |
# points活跃用户中的注册用户 | |
isUserArray = [] | |
for x in isUserList: | |
isUserArray.append(x['_id']) | |
isMobileUserArray = [] | |
for x in isMobileUserList: | |
isMobileUserArray.append(x['_id']) | |
# round 3 | |
# 真正的新增用户数量 | |
# realNewUser = list(set(usersInLastWeekArray) - set(isUserArray)) | |
realNewUser = [] | |
realNewUser = list(set.intersection(set(usersInLastWeekArray), set(isUserArray))) | |
# 新增的注册用户 | |
print("PC 上周新增用户:") | |
print(len(realNewUser)) | |
# realMobileNewUser = list(set(usersInIosAndAndroidArray) - set(isMobileUserArray)) | |
realMobileNewUser = [] | |
realMobileNewUser = list(set.intersection(set(usersInIosAndAndroidArray), set(isMobileUserArray))) | |
print("Mobile 上周新增用户") | |
print(len(realMobileNewUser)) | |
# 上周新增,在本周是否产生行为 | |
def isActThisWeek(startDate, endDate, testUser): | |
pipeLine = [ | |
{"$match": { | |
"createdBy": { | |
"$gte": startDate, | |
"$lt": endDate | |
}, | |
"user": {"$in": testUser} | |
}}, | |
{"$group": { | |
"_id": "$user" | |
}} | |
] | |
return list(points.aggregate(pipeLine)) | |
pcUser = isActThisWeek(startDate, endDate, realNewUser) | |
mobileUser = isActThisWeek(startDate, endDate, realMobileNewUser) | |
print("PC 本周留存:") | |
print(len(pcUser)) | |
print("Mobile 本周留存:") | |
print(len(mobileUser)) | |
usersInThisWeekList = usersInLastWeek(startDate, endDate) | |
mobileUsersInThisWeekList = usersInIosAndAndroid(startDate, endDate) | |
usersInThisWeekArray = [] | |
for x in usersInThisWeekList: | |
usersInThisWeekArray.append(x['_id']) | |
mobileUsersInThisWeekArray = [] | |
for x in mobileUsersInThisWeekList: | |
mobileUsersInThisWeekArray.append(x['_id']) | |
thisWeekIsUsers = isUser(startDate, endDate) | |
thisWeekIsMobileUsers = isMobileUser(startDate, endDate) | |
thisWeekIsUsersArray = [] | |
for x in thisWeekIsUsers: | |
thisWeekIsUsersArray.append(x['_id']) | |
thisWeekMobileUsersArray = [] | |
for x in thisWeekIsMobileUsers: | |
thisWeekMobileUsersArray.append(x['_id']) | |
thisWeekNewUser = [] | |
thisWeekNewUser = list(set.intersection(set(usersInThisWeekArray), set(thisWeekIsUsersArray))) | |
print("PC 本周新增:") | |
print(len(thisWeekNewUser)) | |
thisWeekNewMobileUser = [] | |
thisWeekNewMobileUser = list(set.intersection(set(thisWeekMobileUsersArray), set(mobileUsersInThisWeekArray))) | |
print("Mobile 本周新增:") | |
print(len(thisWeekNewMobileUser)) | |
print("###### Part.5 周留存 ######") | |
print("###### Part.6 ######") | |
print("H5页面") | |
print(">>>需要从线上获取数据 web_tracks_mobile") | |
print("###### Part.6 ######") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment