Skip to content

Instantly share code, notes, and snippets.

@2248998661
Created December 10, 2020 06:10
Show Gist options
  • Save 2248998661/a8f6304085aed04ea63609f06b45a1f9 to your computer and use it in GitHub Desktop.
Save 2248998661/a8f6304085aed04ea63609f06b45a1f9 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import xlrd
import datetime as dt
import xlsxwriter
from datetime import datetime
import pymysql
serverName = '192.168.1.106' # 使用106只读
userName = 'zeusquery'
passWord = 'zeus258!@#'
port = 3306
database = 'zeus_data'
# 建立连接并获取cursor
conn = pymysql.connect(
host=serverName,
user=userName,
password=passWord,
port=port,
database=database)
cur = conn.cursor()
def read_table(cur, sql_order):
try:
cur.execute(sql_order)
data = cur.fetchall()
cols = cur.description
ls = []
for i in cols:
ls.append(list(i)[0])
frame = pd.DataFrame(list(data), columns=ls)
except:
# frame = pd.DataFrame()
print('获取数据失败')
return frame
#################### 一号中午要改 月份sql
def run():
now_time = datetime.now()
today = dt.datetime.now().strftime('%Y-%m-%d')
j_T = dt.datetime.now().strftime('%Y-%m-%d')
m_T = (now_time + dt.timedelta(days=+1)).strftime("%Y-%m-%d")
# 先设定好日期
# today = pd.to_datetime(datetime.today().date())
print(m_T)
sql = f"SELECT datetime,shop_name,num from tb_jhs_shop WHERE datetime<'{m_T}' and shop_id in('698461607', '749391658', '2830036684', '2560464671', '2597705728', '533497499', '2064892827', '2360209412', '3164711246', '2201478422910', '1968090645', '2965419761')"
print(sql)
df_jlyq = read_table(cur, sql)
# df_jlyq['shop_name'] = df_jlyq['shop_name'].astype(str).str().split('/')[-1]
df_jlyq['datetime'] = df_jlyq['datetime'].astype(str).str[11:-3]
df_jlyq = df_jlyq.pivot_table(index='shop_name', columns='datetime', values=['num'])
print(df_jlyq)
df_jlyq.to_excel(".\data\聚划算.xls", index=True, header=True)
# #获取当天的数据,需要手动修改时间
# # df_jlyq=read_table(cur,"SELECT * FROM dy_jlyq WHERE DATETIME LIKE'2020-10-14%'") #注意,这里要每天手动写入日期
# # df_almm=read_table(cur,"SELECT * FROM almm_order WHERE pay_time >='2020-10-14' AND pay_time<'2020-10-15'") #注意,这里要每天手动写入日期
# jlyq_sql = f"SELECT * FROM dy_jlyq WHERE DATETIME LIKE'{str(Z_T)}%'"
# almm_sql = f"SELECT * FROM almm_order WHERE pay_time >='{str(Z_T)}' AND pay_time<'{str(J_T)}'"
# df_jlyq=read_table(cur,jlyq_sql) #注意,这里要每天手动写入日期
# df_almm=read_table(cur,almm_sql) #注意,这里要每天手动写入日期
#
# #获取月度数据
# df_jlyq_month=read_table(cur,"SELECT * FROM dy_jlyq WHERE DATETIME >='2020-11-01' AND DATETIME<'2020-12-01'") #注意,这里要每天手动写入日期
# df_almm_month=read_table(cur,"SELECT * FROM almm_order WHERE pay_time >='2020-11-01' AND pay_time<'2020-12-01'") #注意,这里要每天手动写入日期
# df_almm_month=df_almm_month.groupby(['advert_id']).agg({'payment':['sum']}) #阿里妈妈的订单按广告汇总
#
# df_jlyq_month['cost']=df_jlyq_month['cost'].astype(float)
# df_jlyq_month['lb_order_money']=df_jlyq_month['lb_order_money'].astype(float) #decimal用Python读取以后是字符格式需要转化一下
# df_jlyq_month['advert_id']=df_jlyq_month['AD_name'].str.split('_').str[-1] #str转字符,split对字符进行分割,然后再转str取最后一段
# df_month=df_jlyq_month.groupby(['company','advert_id']).agg({'cost':['sum'],'lb_order_money':'sum'}) #对sum(阿里妈妈订单的和)和cost 求和
# df_month.reset_index(inplace=True)
# #匹配
# df_month=pd.merge(df_month,df_almm_month,how='left',left_on=['advert_id'],right_on=['advert_id'])
# df_month.reset_index(inplace=True)
# df_month.columns =['num','company','advert_id','cost','lb_order_money','payment']
# df_month["payment"] = np.where(df_month["company"] == '鲁班', df_month["lb_order_money"],df_month["payment"])
# df_month=df_month.groupby(['company']).agg({'cost':['sum'],'payment':'sum'}) #对sum(阿里妈妈订单的和)和cost 求和
# df_month[('payment','sum')]=df_month[('payment','sum')].astype(float)
# df_month['roi']=df_month[('payment','sum')]/df_month[('cost','sum')]
# print(df_month)
#
#
#
# df_jlyq['hour']=df_jlyq['datetime'].astype(str).str[11:13] #添加小时来拿匹配
# df_almm['hour']=df_almm['pay_time'].astype(str).str[11:13] #添加小时来拿匹配
#
# df_almm=df_almm[['payment','advert_id','hour']] #阿里妈妈的订单只保留金额,推广ID,小时
# df_almm['payment']=df_almm['payment'].astype(float) #decimal用Python读取以后是字符格式需要转化一下
# df_jlyq['cost']=df_jlyq['cost'].astype(float)
# df_jlyq['lb_order_money']=df_jlyq['lb_order_money'].astype(float) #decimal用Python读取以后是字符格式需要转化一下
#
# df_almm=df_almm.groupby(['advert_id','hour']).agg({'payment':['sum','count']})
#
# df_almm=df_almm.stack(0).reset_index().drop('level_2',axis=1)
#
# df_jlyq['advert_id']=df_jlyq['AD_name'].str.split('_').str[-1] #str转字符,split对字符进行分割,然后再转str取最后一段
#
# df_jlyq['datetime']=df_jlyq['datetime'].astype(str).str[0:10]
#
# #匹配
# df=pd.merge(df_jlyq,df_almm,how='left',left_on=['advert_id','hour'],right_on=['advert_id','hour'])
# df["sum"] = np.where(df["company"] == '鲁班', df["lb_order_money"],df["sum"])
#
# df0=df.groupby(['company','AD_name']).agg({'cost':['sum'],'sum':'sum'}) #对sum(阿里妈妈订单的和)和cost 求和
#
# df0[('花费','累计')]=df0[('cost','sum')]
# df0[('成交','累计')]=df0[('sum','sum')]
# df0[('ROI','累计')]=df0[('sum','sum')]/df0[('cost','sum')]
# df0.drop([('cost','sum'),('sum','sum')],axis=1,inplace=True)
# print(df0)
# df.drop(['spread','click','click_rate','CPC','CPM','in_shop','in_shop_cost','in_shop_rate','advert_id','count','lb_order_num','lb_order_ROI','lb_order_money','advertisement_id'],axis=1,inplace=True)
#
# date_x=df.iloc[0][0] #获取数据date_x用来填充
# #把1-8点的数据拆开来
# df1=df.loc[df['hour'].isin(['01','02','03','04','05','06','07','08'])] #1到8点的数据
# df2=df.loc[~df['hour'].isin(['01','02','03','04','05','06','07','08'])] #其他时间的数据
#
# df1=df1.groupby(['company','AD_name']).agg({'sum':['sum'],'cost':'sum'}) #分组汇总1到8点的数据
#
# df1=df1.stack(1).reset_index().drop('level_2',axis=1) #降维,去掉多余的列
#
# df1['hour']='01-08'
# df1['datetime']=date_x #填充缺失值
#
# df=df2.append(df1)
#
# df['roi']=df['sum']/df['cost']
# hour=df['hour'].drop_duplicates().array #提取时间列表,去重,然后转列表
# #把0替换成NAN
# df.replace(0,np.nan,inplace=True)
#
# df=pd.pivot_table(df,columns=["hour"],index=['datetime','company','AD_name'],values=['cost','sum','roi'],dropna=False) #dropna跳过空行会删除列
#
# df=pd.merge(df,df0,how='left',left_on=['company','AD_name'],right_on=['company','AD_name'])
#
# df.sort_values(by=['company',('花费','累计')],ascending=False,inplace=True) #按花费降序排列
# print(df)
#
# #计算3家机构的分组求和数据
# df_lb=df.loc['鲁班'].sum()
# df_jc=df.loc['极创'].sum()
# df_jx=df.loc['剧星'].sum()
#
# #获取3家机构的数据行数,因为后面的写入的时候会删除0花费的条数,所以这里要删除0花费条数,然后计算长度
# df0=df.loc[~df[('花费','累计')].isin([np.nan,0])] #去掉花费是0的值
# # df0 = df['花费','累计']
# def ad_num(company):
# try:
# x = df0.loc[company].shape[0]
# # x = df0.loc[company]
# except KeyError:
# x = 0
#
# return x
#
# lb = ad_num('鲁班')
# jc = ad_num('极创')
# jx = ad_num('剧星')
#
# a=df.sum() #计算汇总数据 并且替换掉NAN数据
# for i in hour:
# a[('roi',i)]=a[('sum',i)]/a['cost',i]
# a[('ROI','累计')]=a[('成交','累计')]/a[('花费','累计')] #重新计算ROI数据
# a.drop('sum',axis=0,inplace=True)
# a = a.fillna(value=0) #换掉NAN数据
# a = a.replace(np.inf, '\\')
#
# for i in hour:
# df_lb[('roi',i)]=df_lb[('sum',i)]/df_lb['cost',i]
# df_lb[('ROI','累计')]=df_lb[('成交','累计')]/df_lb[('花费','累计')] #重新计算ROI数据
# df_lb.drop('sum',axis=0,inplace=True)
# df_lb = df_lb.fillna(value=0)
# df_lb = df_lb.replace(np.inf, '\\')
#
# for i in hour:
# df_jc[('roi',i)]=df_jc[('sum',i)]/df_jc['cost',i]
# df_jc[('ROI','累计')]=df_jc[('成交','累计')]/df_jc[('花费','累计')] #重新计算ROI数据
# df_jc.drop('sum',axis=0,inplace=True)
# df_jc = df_jc.fillna(value=0)
# df_jc = df_jc.replace(np.inf, '\\')
#
# for i in hour:
# df_jx[('roi',i)]=df_jx[('sum',i)]/df_jx['cost',i]
# df_jx[('ROI','累计')]=df_jx[('成交','累计')]/df_jx[('花费','累计')] #重新计算ROI数据
# df_jx.drop('sum',axis=0,inplace=True)
# df_jx = df_jx.fillna(value=0)
# df_jx = df_jx.replace(np.inf, '\\')
#
#
# #删除成交的数据
# df.drop('sum',axis=1,inplace=True)
# #删除空数据
#
# df=df.loc[~df[('花费','累计')].isin([np.nan,0])] #去掉花费是0的值
# # df=df.loc[~df[('花费','累计')].isin(np.nan)] #去掉花费是0的值
#
#
# summaryDataFrame = pd.DataFrame(df)
# summaryDataFrame.to_excel(".\douyin\dy.xls", index=True, header=True) # 先保存成EXCEL
#
# summaryDataFrame = pd.DataFrame(df_month)
# summaryDataFrame.to_excel(".\douyin\dy_month.xls", index=True, header=True) # 先保存成EXCEL
if __name__ == '__main__':
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment