Created
August 23, 2018 11:38
-
-
Save parksunwoo/64cc22cf1af6c5abf7154b9bc6847521 to your computer and use it in GitHub Desktop.
supplement_preprocessing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import pandas as pd\n", | |
"\n", | |
"SUPPLEMENT_PATH1 = \"../9th_week/보조데이터/04.무단횡단사고다발지\"\n", | |
"SUPPLEMENT_PATH2 = \"../9th_week/보조데이터/05.보행노인사고다발지\"\n", | |
"SUPPLEMENT_PATH3 = \"../9th_week/보조데이터/06.보행어린이사고다발지\"\n", | |
"SUPPLEMENT_PATH4 = \"../9th_week/보조데이터/07.스쿨존내사고다발지\"\n", | |
"SUPPLEMENT_PATH5 = \"../9th_week/보조데이터/08.자전거사고다발지\"\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def load_supplement_jaywalk(accident_path=SUPPLEMENT_PATH1):\n", | |
" csv_path = os.path.join(accident_path, \"무단횡단사고다발지(2012~2016).csv\")\n", | |
" return pd.read_csv(csv_path, encoding='CP949')\n", | |
"\n", | |
"\n", | |
"def load_supplement_oldwalk(accident_path=SUPPLEMENT_PATH2):\n", | |
" csv_path = os.path.join(accident_path, \"보행노인사고다발지(2012~2016).csv\")\n", | |
" return pd.read_csv(csv_path, encoding='CP949') \n", | |
"\n", | |
"\n", | |
"def load_supplement_childwalk(accident_path=SUPPLEMENT_PATH3):\n", | |
" csv_path = os.path.join(accident_path, \"보행어린이사고다발지(2012~2016).csv\")\n", | |
" return pd.read_csv(csv_path, encoding='CP949') \n", | |
"\n", | |
"\n", | |
"def load_supplement_schoolzone(accident_path=SUPPLEMENT_PATH4):\n", | |
" csv_path = os.path.join(accident_path, \"스쿨존내어린이사고다발지(2012~2016).csv\")\n", | |
" return pd.read_csv(csv_path, encoding='CP949') \n", | |
"\n", | |
"\n", | |
"def load_supplement_bikezone(accident_path=SUPPLEMENT_PATH5):\n", | |
" csv_path = os.path.join(accident_path, \"자전거사고다발지(2012~2016).csv\")\n", | |
" return pd.read_csv(csv_path, encoding='CP949') \n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_jaywalk = load_supplement_jaywalk()\n", | |
"df_elder = load_supplement_oldwalk()\n", | |
"df_child = load_supplement_childwalk()\n", | |
"df_school = load_supplement_schoolzone()\n", | |
"df_bike = load_supplement_bikezone()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_jaywalk[\"사고피해정도\"] = df_jaywalk.사망자수*4 + df_jaywalk.중상자수*3 + df_jaywalk.경상자수*2 + df_jaywalk.부상신고자수*1\n", | |
"df_elder[\"사고피해정도\"] = df_elder.사망자수*4 + df_elder.중상자수*3 + df_elder.경상자수*2 + df_elder.부상신고자수*1\n", | |
"df_child[\"사고피해정도\"] = df_child.사망자수*4 + df_child.중상자수*3 + df_child.경상자수*2 + df_child.부상신고자수*1\n", | |
"df_bike[\"사고피해정도\"] = df_bike.사망자수*4 + df_bike.중상자수*3 + df_bike.경상자수*2 + df_bike.부상신고자수*1\n", | |
"df_school[\"사고피해정도\"] = df_school.사망자수*4 + df_school.중상자수*3 + df_school.경상자수*2 + df_school.부상신고자수*1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def replace_region(df): \n", | |
" df = df.groupby('다발지명').agg({'사고피해정도': 'sum', '발생건수':'sum'})\n", | |
" df.reset_index(inplace=True)\n", | |
" df['시도'] = df['다발지명'].apply(lambda x: x.split(' ')[0])\n", | |
" df['시군구'] = df['다발지명'].apply(lambda x: x.split(' ')[1])\n", | |
" df['시도'].replace(['경상남도', '경상북도', '전라남도', '전라북도', '충청남도', '충청북도'],\n", | |
" ['경남', '경북', '전남', '전북', '충남', '충북'], inplace=True)\n", | |
" df['시도'] = df['시도'].apply(lambda x: x[:2])\n", | |
" df.drop('다발지명', axis=1, inplace=True)\n", | |
" df['시군구'].replace('세종특별자치시','세종', inplace=True)\n", | |
" df['발생지'] = df['시도'] + df['시군구']\n", | |
" df = df.groupby(['발생지'])['사고피해정도', '발생건수'].sum()\n", | |
" df.reset_index(inplace=True)\n", | |
" return df\n", | |
" \n", | |
"walk_list = ['2_mudan','2_old','2_child','2_school','2_bic']\n", | |
"i = 0\n", | |
"for x in [df_jaywalk, df_elder, df_child, df_school, df_bike]:\n", | |
" df = replace_region(x)\n", | |
" df.to_csv(walk_list[i]+'.csv',index=False)\n", | |
" i += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment