Skip to content

Instantly share code, notes, and snippets.

@parksunwoo
Created August 23, 2018 11:38
Show Gist options
  • Save parksunwoo/64cc22cf1af6c5abf7154b9bc6847521 to your computer and use it in GitHub Desktop.
Save parksunwoo/64cc22cf1af6c5abf7154b9bc6847521 to your computer and use it in GitHub Desktop.
supplement_preprocessing
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"\n",
"SUPPLEMENT_PATH1 = \"../9th_week/보조데이터/04.무단횡단사고다발지\"\n",
"SUPPLEMENT_PATH2 = \"../9th_week/보조데이터/05.보행노인사고다발지\"\n",
"SUPPLEMENT_PATH3 = \"../9th_week/보조데이터/06.보행어린이사고다발지\"\n",
"SUPPLEMENT_PATH4 = \"../9th_week/보조데이터/07.스쿨존내사고다발지\"\n",
"SUPPLEMENT_PATH5 = \"../9th_week/보조데이터/08.자전거사고다발지\"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"def load_supplement_jaywalk(accident_path=SUPPLEMENT_PATH1):\n",
" csv_path = os.path.join(accident_path, \"무단횡단사고다발지(2012~2016).csv\")\n",
" return pd.read_csv(csv_path, encoding='CP949')\n",
"\n",
"\n",
"def load_supplement_oldwalk(accident_path=SUPPLEMENT_PATH2):\n",
" csv_path = os.path.join(accident_path, \"보행노인사고다발지(2012~2016).csv\")\n",
" return pd.read_csv(csv_path, encoding='CP949') \n",
"\n",
"\n",
"def load_supplement_childwalk(accident_path=SUPPLEMENT_PATH3):\n",
" csv_path = os.path.join(accident_path, \"보행어린이사고다발지(2012~2016).csv\")\n",
" return pd.read_csv(csv_path, encoding='CP949') \n",
"\n",
"\n",
"def load_supplement_schoolzone(accident_path=SUPPLEMENT_PATH4):\n",
" csv_path = os.path.join(accident_path, \"스쿨존내어린이사고다발지(2012~2016).csv\")\n",
" return pd.read_csv(csv_path, encoding='CP949') \n",
"\n",
"\n",
"def load_supplement_bikezone(accident_path=SUPPLEMENT_PATH5):\n",
" csv_path = os.path.join(accident_path, \"자전거사고다발지(2012~2016).csv\")\n",
" return pd.read_csv(csv_path, encoding='CP949') \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"df_jaywalk = load_supplement_jaywalk()\n",
"df_elder = load_supplement_oldwalk()\n",
"df_child = load_supplement_childwalk()\n",
"df_school = load_supplement_schoolzone()\n",
"df_bike = load_supplement_bikezone()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"df_jaywalk[\"사고피해정도\"] = df_jaywalk.사망자수*4 + df_jaywalk.중상자수*3 + df_jaywalk.경상자수*2 + df_jaywalk.부상신고자수*1\n",
"df_elder[\"사고피해정도\"] = df_elder.사망자수*4 + df_elder.중상자수*3 + df_elder.경상자수*2 + df_elder.부상신고자수*1\n",
"df_child[\"사고피해정도\"] = df_child.사망자수*4 + df_child.중상자수*3 + df_child.경상자수*2 + df_child.부상신고자수*1\n",
"df_bike[\"사고피해정도\"] = df_bike.사망자수*4 + df_bike.중상자수*3 + df_bike.경상자수*2 + df_bike.부상신고자수*1\n",
"df_school[\"사고피해정도\"] = df_school.사망자수*4 + df_school.중상자수*3 + df_school.경상자수*2 + df_school.부상신고자수*1"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"def replace_region(df): \n",
" df = df.groupby('다발지명').agg({'사고피해정도': 'sum', '발생건수':'sum'})\n",
" df.reset_index(inplace=True)\n",
" df['시도'] = df['다발지명'].apply(lambda x: x.split(' ')[0])\n",
" df['시군구'] = df['다발지명'].apply(lambda x: x.split(' ')[1])\n",
" df['시도'].replace(['경상남도', '경상북도', '전라남도', '전라북도', '충청남도', '충청북도'],\n",
" ['경남', '경북', '전남', '전북', '충남', '충북'], inplace=True)\n",
" df['시도'] = df['시도'].apply(lambda x: x[:2])\n",
" df.drop('다발지명', axis=1, inplace=True)\n",
" df['시군구'].replace('세종특별자치시','세종', inplace=True)\n",
" df['발생지'] = df['시도'] + df['시군구']\n",
" df = df.groupby(['발생지'])['사고피해정도', '발생건수'].sum()\n",
" df.reset_index(inplace=True)\n",
" return df\n",
" \n",
"walk_list = ['2_mudan','2_old','2_child','2_school','2_bic']\n",
"i = 0\n",
"for x in [df_jaywalk, df_elder, df_child, df_school, df_bike]:\n",
" df = replace_region(x)\n",
" df.to_csv(walk_list[i]+'.csv',index=False)\n",
" i += 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment