Skip to content

Instantly share code, notes, and snippets.

@beneon
Created April 26, 2020 10:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save beneon/8f828e4a401241d0a601846fffc43967 to your computer and use it in GitHub Desktop.
Save beneon/8f828e4a401241d0a601846fffc43967 to your computer and use it in GitHub Desktop.
数据清理-规陪带教数据
# 2020-4-26编码,将三个文件处理以后合并成一个excel
import os
import pandas as pd
import re
# 从第三行开始,第一列医生名称,第二列进修生带教数量,第三列规陪数量,第四列月份,第五列年份
xl_file_path = os.path.join('datafile','summary.xlsx')
df = pd.read_excel(xl_file_path)
#原始表其实是两个表格的联合,先拆成basic和detail两个表
#原始数据有空行,我把姓名给向下填充了,但是其他的内容没有向下填充,所以这里就使用groupby加sum把nan给当0消耗掉
df_basic = df[['姓名','year','进修','规陪']].groupby(['姓名','year']).sum()
#对于detail这个表,遍历拆分出月份和数量两个信息
df_detail = df[['姓名','year','月份']]
def extract_month_quant(month_str):
def gen_pd_series(months,quant):
return pd.Series({'months':months,'quant':quant})
if pd.isna(month_str):
return gen_pd_series(months=0,quant=0)
# 1个月: 1
re_month_format = re.compile(r'(\d+)个月[::]\s*(\d+)')
mo = re_month_format.match(month_str.strip())
if mo:
return gen_pd_series(months=int(mo.group(1)),quant=int(mo.group(2)))
else:
raise Exception(f"{month_str}'s format is wrong for extract_month_quant")
df_detail[['规陪带教时长(月)','人数']] = df_detail['月份'].apply(lambda e:extract_month_quant(e))
df_detail['规陪带教人月数'] = df_detail['规陪带教时长(月)']*df_detail['人数']
df_detail_select = df_detail[['姓名','year','规陪带教人月数']].drop_duplicates().set_index(['姓名','year'])
df_final = df_basic.join(df_detail_select)
# reset index
df_basic = df_basic.reset_index()
df_final = df_final.reset_index()
xlwriter = pd.ExcelWriter(os.path.join('datafile','2017-2019带教统计.xlsx'),engine='xlsxwriter')
df_final.to_excel(excel_writer=xlwriter,sheet_name='统计信息',index=False)
df_basic.to_excel(excel_writer=xlwriter,sheet_name='带教基本信息',index=False)
df_detail.to_excel(excel_writer=xlwriter,sheet_name='规陪带教情况细则',index=False)
xlwriter.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment