Skip to content

Instantly share code, notes, and snippets.

@ronfe
Last active July 18, 2017 06:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ronfe/420b9afb678c3aa4733a76353c1c7bae to your computer and use it in GitHub Desktop.
Save ronfe/420b9afb678c3aa4733a76353c1c7bae to your computer and use it in GitHub Desktop.
from pymongo import MongoClient
from bson.objectid import ObjectId
from datetime import datetime
db_path = "path/to/db:27017"
conn = MongoClient(db_path)
conquer_track = conn["conquerTrack"]
## 所有表每日全量更新
## DB name: conquerTrack
## Collection name - Schema
### teams - teams
### schoolConquerors - school_conquerors
### roomConquerors - room_conquerors
### userDaily - user_daily
### Schema begin
teams = {
"_id": ObjectId(), # 地推服务商ID
"name": str() # 地推服务商名字,编辑更新
}
### 只有关联的学校
school_conquerors = {
"_id": ObjectId(), # onions - 学校ID
"capturedBy": ObjectId # 攻克的地推服务商ID,ObjectId, 如果服务商或关联被删除,该字段会重置,默认null
"capturedDate": datetime(1970, 1, 1) # 攻克时间,ISODate,小时及以下单位均为0。如果服务商或关联被删除,该字段会重置,默认值为1970-01-01
}
### 只有关联的班级
room_conquerors = {
"_id": ObjectId(), # room id
"roomRef": str(), # room ref
"teacherId": ObjectId(), # room owner的uid
"schoolId": ObjectId(), # room所在学校的ID
"conquererId": ObjectId() # 对应活跃的地推服务商ID
}
### 每日用户记录,只记录渠道域内的活跃用户
user_daily = {
"_id": ObjectId(),
"userId": ObjectId(), # 记录对应的uid
"role": Str(), # 对应的用户类别,只有student或teacher
"channel": str(), # 对应的用户渠道号,预留给下期
"logDate": datetime(), # 记录日,ISODate格式,小时及以下单位均为0
"teamId": ObjectId(), # 对应的用户地推服务商ID,如果有多个地推服务商,则有多条记录
"conflictType": 0, # 元字段,记录该日该用户指派conqueror是否存在及存在何种规则冲突
# 算法:conflictType(d, n) = K(ROOM, d, n) + 100K(SCHOOL, d, n) - 1
# 正常:无班级有学校 = 99(活跃攻克),单班级无学校 = 0 (活跃攻克)
# K(RULE, d, n) = 用户n在d日RULE中存在的不同服务商ID数量
"conquerAssignRule": str(), # 元字段,该记录的服务商ID是通过何种规则指派
# bySchool - 通过用户数据中school字段
# byRoom - 通过班级地推方式
# byRoom优先
"paidAmount": int(), # 该用户在该日支付成功订单的实际金额总和
"isUnique": False # 该用户当天是否只有一条记录,单记录用户及多记录用户的首条记录为True,用于计算总活跃和总付费筛选
}
### Schema end
from pymongo import MongoClient
import datetime
db_path = "10.8.8.111:27017"
def calc_yesterday():
today = datetime.datetime.now()
y, m, d = today.year, today.month, today.day
# return datetime.datetime(y, m, d-1)
return datetime.datetime(y, m, d) - datetime.timedelta(days=1)
def find_team_by_room(room_id, col):
x = col.find_one({"_id": room_id})
if not x:
return None
return x["conquererId"]
def find_school_by_room(col):
x = col.find()
result = dict()
for doc in x:
result[doc["_id"]] = doc["schoolId"]
return result
def find_payment_by_uid(uid, col, d):
x = col.find({
"eventKey": "paymentSuccess",
"serverTime": {"$gte": d - datetime.timedelta(hours=8)},
"user": uid
})
amount = 0
for doc in x:
if "actualOrderAmount" in doc:
amount += doc["actualOrderAmount"]
return amount
def main():
conn = MongoClient(db_path)
db = conn["trackDev"]
capture_schools = db["schools"]
# get school-team map
school_team_map = dict()
for doc in capture_schools.find():
if datetime.datetime.now() >= doc["capturedDate"]:
school_team_map[doc["_id"]] = doc["capturedBy"]
# populate rooms
# get rooms by school - teachers
users = conn["onions"]["users"]
school_ids = list(school_team_map.keys())
teachers = users.find({
"school": {"$in": school_ids},
"role": "teacher",
}, {"school": 1})
room_ids = list()
teacher_school_map = dict()
rooms = conn["onions"]["rooms"]
capture_rooms = db["rooms"]
capture_rooms.drop()
for t in teachers:
x = rooms.find({"owners": t["_id"]}, {"ref": 1, "owners": 1})
if x.count() > 0:
teacher_school_map[t["_id"]] = t["school"]
for doc in x:
room_ids.append(doc["_id"])
temp = {
"_id": doc["_id"],
"roomRef": doc["ref"],
"teacherId": doc["owners"][0],
"schoolId": teacher_school_map[doc["owners"][0]]
}
temp["conquererId"] = school_team_map[temp["schoolId"]]
capture_rooms.insert_one(temp)
del temp
# generate user daily
daily_users = db["userdailies"]
user_attr = conn["cache"]["userAttr"]
order_events = conn["eventsV4"]["orderEvents"]
yesterday = calc_yesterday()
yesterday_str = datetime.datetime.strftime(yesterday, "%Y%m%d")
x = users.find({"rooms": {"$in": room_ids}}, {
"rooms": 1,
"channel": 1,
"role": 1,
"school": 1
})
# byroom rule
recorded_users = set()
room_school_map = find_school_by_room(capture_rooms)
for t in x:
if "role" not in t or t["role"] not in ["student", "teacher"]:
continue
y = user_attr.find_one({"user": str(t["_id"])}, {"daily": 1})
if not y:
continue
if "daily" not in y or yesterday_str not in y["daily"]:
continue
temp = list()
# generate doc model
model_rec = dict({
"userId": t["_id"],
"role": t["role"],
"logDate": yesterday,
"conquerAssignRule": "byRoom",
"channel": None
})
model_rec["paidAmount"] = find_payment_by_uid(t["_id"], order_events, yesterday)
if "channel" in t:
model_rec["channel"] = t["channel"]
conflict_factor = 1
# room
# init_room, *other_rooms = t["rooms"]
init_room = t["rooms"][0]
other_rooms = t["rooms"][1:]
if len(other_rooms) > 0:
conflict_factor += len(other_rooms)
model_rec["conflictType"] = conflict_factor - 1
init_doc = model_rec.copy()
init_doc["teamId"] = find_team_by_room(init_room, capture_rooms)
init_doc["isUnique"] = True
# if "school" in t and t["school"] in school_ids and t["school"] != room_school_map[init_room]:
# conflict_factor += 100
temp.append(init_doc)
for r in other_rooms:
init_doc = model_rec.copy()
init_doc["teamId"] = find_team_by_room(r, capture_rooms)
init_doc["isUnique"] = False
# if "school" in t and t["school"] in school_ids and t["school"] != room_school_map[r]:
# conflict_factor += 100
temp.append(init_doc)
# insert
daily_users.insert_many(temp)
recorded_users.add(t["_id"])
del temp
# by school rule
x = users.find({"school": {"$in": school_ids}}, {
"channel": 1,
"role": 1,
"school": 1
})
for t in x:
if t["_id"] in recorded_users:
continue
if "role" not in t or t["role"] not in ["student", "teacher"]:
continue
y = user_attr.find_one({"user": str(t["_id"])}, {"daily": 1})
if not y:
continue
if "daily" not in y or yesterday_str not in y["daily"]:
continue
model_rec = dict({
"userId": t["_id"],
"role": t["role"],
"logDate": yesterday,
"teamId": school_team_map[t["school"]],
"conquerAssignRule": "bySchool",
"channel": None,
"conflictType": 99,
"isUnique": True
})
model_rec["paidAmount"] = find_payment_by_uid(t["_id"], order_events, yesterday)
if "channel" in t:
model_rec["channel"] = t["channel"]
daily_users.insert_one(model_rec)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment