Skip to content

Instantly share code, notes, and snippets.

@tyjeon24
Last active May 23, 2023 14:16
Show Gist options
  • Save tyjeon24/29e2fd68b29f4feb25580eefb9006821 to your computer and use it in GitHub Desktop.
Save tyjeon24/29e2fd68b29f4feb25580eefb9006821 to your computer and use it in GitHub Desktop.
Pandas code convention. Pandas 코드를 가독성 있게 작성해보세요.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>total_bill</th>\n",
" <th>tip</th>\n",
" <th>sex</th>\n",
" <th>smoker</th>\n",
" <th>요일</th>\n",
" <th>time</th>\n",
" <th>size</th>\n",
" <th>new_size</th>\n",
" <th>new_time</th>\n",
" <th>tip_lag</th>\n",
" <th>tip_lag2</th>\n",
" <th>total_bill_avg</th>\n",
" <th>size_count</th>\n",
" <th>size_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19.82</td>\n",
" <td>3</td>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>Sat</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Sat 저녁</td>\n",
" <td>1.01</td>\n",
" <td>1.01</td>\n",
" <td>16.990000</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19.81</td>\n",
" <td>4</td>\n",
" <td>Female</td>\n",
" <td>Yes</td>\n",
" <td>Thu</td>\n",
" <td>점심</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Thu 점심</td>\n",
" <td>1.01</td>\n",
" <td>1.01</td>\n",
" <td>10.340000</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.77</td>\n",
" <td>2</td>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>Sun</td>\n",
" <td>저녁</td>\n",
" <td>4</td>\n",
" <td>Large</td>\n",
" <td>Sun 저녁</td>\n",
" <td>1.66</td>\n",
" <td>1.66</td>\n",
" <td>16.113333</td>\n",
" <td>6</td>\n",
" <td>4.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19.65</td>\n",
" <td>3</td>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>Sat</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Sat 저녁</td>\n",
" <td>3.50</td>\n",
" <td>3.50</td>\n",
" <td>18.343333</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19.49</td>\n",
" <td>3</td>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>Sun</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Sun 저녁</td>\n",
" <td>3.31</td>\n",
" <td>3.31</td>\n",
" <td>23.093333</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>142</th>\n",
" <td>7.51</td>\n",
" <td>2</td>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>Thu</td>\n",
" <td>점심</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Thu 점심</td>\n",
" <td>6.70</td>\n",
" <td>6.70</td>\n",
" <td>30.986667</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143</th>\n",
" <td>7.25</td>\n",
" <td>1</td>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>Sat</td>\n",
" <td>저녁</td>\n",
" <td>1</td>\n",
" <td>Small</td>\n",
" <td>Sat 저녁</td>\n",
" <td>5.00</td>\n",
" <td>5.00</td>\n",
" <td>34.180000</td>\n",
" <td>4</td>\n",
" <td>2.72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>7.25</td>\n",
" <td>5</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>Sun</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Sun 저녁</td>\n",
" <td>5.00</td>\n",
" <td>5.00</td>\n",
" <td>28.223333</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>5.75</td>\n",
" <td>1</td>\n",
" <td>Female</td>\n",
" <td>Yes</td>\n",
" <td>Fri</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Fri 저녁</td>\n",
" <td>2.30</td>\n",
" <td>2.30</td>\n",
" <td>17.276667</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>3.07</td>\n",
" <td>1</td>\n",
" <td>Female</td>\n",
" <td>Yes</td>\n",
" <td>Sat</td>\n",
" <td>저녁</td>\n",
" <td>1</td>\n",
" <td>Small</td>\n",
" <td>Sat 저녁</td>\n",
" <td>1.50</td>\n",
" <td>1.50</td>\n",
" <td>14.473333</td>\n",
" <td>4</td>\n",
" <td>2.72</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>147 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" total_bill tip sex smoker 요일 time size new_size new_time \n",
"0 19.82 3 Male No Sat 저녁 2 Small Sat 저녁 \\\n",
"1 19.81 4 Female Yes Thu 점심 2 Small Thu 점심 \n",
"2 19.77 2 Male No Sun 저녁 4 Large Sun 저녁 \n",
"3 19.65 3 Female No Sat 저녁 2 Small Sat 저녁 \n",
"4 19.49 3 Male No Sun 저녁 2 Small Sun 저녁 \n",
".. ... ... ... ... ... ... ... ... ... \n",
"142 7.51 2 Male No Thu 점심 2 Small Thu 점심 \n",
"143 7.25 1 Female No Sat 저녁 1 Small Sat 저녁 \n",
"144 7.25 5 Male Yes Sun 저녁 2 Small Sun 저녁 \n",
"145 5.75 1 Female Yes Fri 저녁 2 Small Fri 저녁 \n",
"146 3.07 1 Female Yes Sat 저녁 1 Small Sat 저녁 \n",
"\n",
" tip_lag tip_lag2 total_bill_avg size_count size_ratio \n",
"0 1.01 1.01 16.990000 119 80.95 \n",
"1 1.01 1.01 10.340000 119 80.95 \n",
"2 1.66 1.66 16.113333 6 4.08 \n",
"3 3.50 3.50 18.343333 119 80.95 \n",
"4 3.31 3.31 23.093333 119 80.95 \n",
".. ... ... ... ... ... \n",
"142 6.70 6.70 30.986667 119 80.95 \n",
"143 5.00 5.00 34.180000 4 2.72 \n",
"144 5.00 5.00 28.223333 119 80.95 \n",
"145 2.30 2.30 17.276667 119 80.95 \n",
"146 1.50 1.50 14.473333 4 2.72 \n",
"\n",
"[147 rows x 14 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import numpy as np\n",
"\n",
"df = sns.load_dataset(\"tips\")\n",
"df.head()\n",
"\n",
"\n",
"# 목표 : 다음 작업을 연달아 수행하기\n",
"# [1] total_bill 20 이상만 필터링\n",
"# [2] time 열의 'Dinner', 'Lunch' 값을 \"저녁\", \"점심\"으로 변환\n",
"# [3] tip을 정수형으로\n",
"# [4] size는 [1,2,3,4,5,6] 6개 값인데, 이 값을 123, 456으로 구분하여 [\"Small\", \"Large\"]로 변경하고, new_size 라는 컬럼으로 생성\n",
"# [5] 요일 이름을 3글자로만 사용. 예를 들어 Sun은 그대로 Sun이지만, Thur은 Thu로 변경하기.\n",
"# [6] day 열 이름을 \"요일\"로 변경\n",
"# [7] 요일과 time을 합친 new_time 컬럼 만들기 (예) Sat 저녁)\n",
"# [8] total_bill 값 내림차순으로 정렬하기\n",
"# [9] 인덱스 초기화, drop=True 파라미터 추가하기.\n",
"# [10] tip_lag : tip에 대해 shift(1) 만큼의 lag을 만들고, nan 값은 초기값으로 대체하기. fill_value 파라미터 사용하기.\n",
"# [11] tip_lag : tip에 대해 shift(1) 만큼의 lag을 만들고, nan 값은 초기값으로 대체하기. fillna 사용하기.\n",
"# [12] total_bill_avg : total_bill에 대해 길이 3으로 rolling mean 계산하기. NaN값을 기존 total_bill 값으로 채우기(fillna)\n",
"# [13] size_count : 엑셀 countif 함수처럼, 각 행에서 size 열의 값 원소 개수를 세기.\n",
"# [14] size_ratio : size_count와 마찬가지로 계산하되, normalize=True를 써서 비율로 나타내기. round(2)를 써서 소수점 둘째 자리 수까지 나타내기.\n",
"\n",
"df_new = (\n",
" df\n",
" .loc[df[\"total_bill\"] <= 20] # [1]\n",
" .assign(time=lambda x : x[\"time\"].replace(\"Dinner\", \"저녁\").replace(\"Lunch\", \"점심\")) # [2]\n",
" .assign(tip=df[\"tip\"].astype(int)) # [3]\n",
" .assign(new_size=lambda x : x[\"size\"].apply(lambda y : \"Small\" if y <= 3 else \"Large\")) # [4]\n",
" .assign(day=lambda x : x[\"day\"].str[:3]) # [5]\n",
" .rename(columns={\"day\":\"요일\"}) # [6], assign이 아닌 rename 사용.\n",
" .assign(new_time=lambda x : x[\"요일\"].astype(str) + \" \" + x[\"time\"].astype(str)) # [7]\n",
" .sort_values(by=\"total_bill\", ascending=False) # [8]\n",
" .reset_index(drop=True) # [9]\n",
" .assign(tip_lag=df[\"tip\"].shift(1, fill_value=df[\"tip\"][0])) # [10]\n",
" .assign(tip_lag2=df[\"tip\"].shift(1).fillna(df[\"tip\"])) # [11]\n",
" .assign(total_bill_avg=df[\"total_bill\"].rolling(window=3).mean().fillna(df[\"total_bill\"])) # [12]\n",
" .assign(size_count=lambda x : x[\"size\"].apply(lambda y : x[\"size\"].value_counts()[y])) # [13]\n",
" .assign(size_ratio=lambda x : x[\"size\"].apply(lambda y : x[\"size\"].value_counts(normalize=True)[y] * 100).round(2)) # [14]\n",
")\n",
"\n",
"df_new"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>total_bill</th>\n",
" <th>tip</th>\n",
" <th>sex</th>\n",
" <th>smoker</th>\n",
" <th>요일</th>\n",
" <th>time</th>\n",
" <th>size</th>\n",
" <th>new_size</th>\n",
" <th>new_time</th>\n",
" <th>tip_lag</th>\n",
" <th>tip_lag2</th>\n",
" <th>total_bill_avg</th>\n",
" <th>size_count</th>\n",
" <th>size_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19.82</td>\n",
" <td>3</td>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>Sat</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Sat 저녁</td>\n",
" <td>1.01</td>\n",
" <td>1.01</td>\n",
" <td>16.990000</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19.81</td>\n",
" <td>4</td>\n",
" <td>Female</td>\n",
" <td>Yes</td>\n",
" <td>Thu</td>\n",
" <td>점심</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Thu 점심</td>\n",
" <td>1.01</td>\n",
" <td>1.01</td>\n",
" <td>10.340000</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.77</td>\n",
" <td>2</td>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>Sun</td>\n",
" <td>저녁</td>\n",
" <td>4</td>\n",
" <td>Large</td>\n",
" <td>Sun 저녁</td>\n",
" <td>1.66</td>\n",
" <td>1.66</td>\n",
" <td>16.113333</td>\n",
" <td>6</td>\n",
" <td>4.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19.65</td>\n",
" <td>3</td>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>Sat</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Sat 저녁</td>\n",
" <td>3.50</td>\n",
" <td>3.50</td>\n",
" <td>18.343333</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19.49</td>\n",
" <td>3</td>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>Sun</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Sun 저녁</td>\n",
" <td>3.31</td>\n",
" <td>3.31</td>\n",
" <td>23.093333</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>142</th>\n",
" <td>7.51</td>\n",
" <td>2</td>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>Thu</td>\n",
" <td>점심</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Thu 점심</td>\n",
" <td>6.70</td>\n",
" <td>6.70</td>\n",
" <td>30.986667</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143</th>\n",
" <td>7.25</td>\n",
" <td>1</td>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>Sat</td>\n",
" <td>저녁</td>\n",
" <td>1</td>\n",
" <td>Small</td>\n",
" <td>Sat 저녁</td>\n",
" <td>5.00</td>\n",
" <td>5.00</td>\n",
" <td>34.180000</td>\n",
" <td>4</td>\n",
" <td>2.72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>7.25</td>\n",
" <td>5</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>Sun</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Sun 저녁</td>\n",
" <td>5.00</td>\n",
" <td>5.00</td>\n",
" <td>28.223333</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>5.75</td>\n",
" <td>1</td>\n",
" <td>Female</td>\n",
" <td>Yes</td>\n",
" <td>Fri</td>\n",
" <td>저녁</td>\n",
" <td>2</td>\n",
" <td>Small</td>\n",
" <td>Fri 저녁</td>\n",
" <td>2.30</td>\n",
" <td>2.30</td>\n",
" <td>17.276667</td>\n",
" <td>119</td>\n",
" <td>80.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>3.07</td>\n",
" <td>1</td>\n",
" <td>Female</td>\n",
" <td>Yes</td>\n",
" <td>Sat</td>\n",
" <td>저녁</td>\n",
" <td>1</td>\n",
" <td>Small</td>\n",
" <td>Sat 저녁</td>\n",
" <td>1.50</td>\n",
" <td>1.50</td>\n",
" <td>14.473333</td>\n",
" <td>4</td>\n",
" <td>2.72</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>147 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" total_bill tip sex smoker 요일 time size new_size new_time \n",
"0 19.82 3 Male No Sat 저녁 2 Small Sat 저녁 \\\n",
"1 19.81 4 Female Yes Thu 점심 2 Small Thu 점심 \n",
"2 19.77 2 Male No Sun 저녁 4 Large Sun 저녁 \n",
"3 19.65 3 Female No Sat 저녁 2 Small Sat 저녁 \n",
"4 19.49 3 Male No Sun 저녁 2 Small Sun 저녁 \n",
".. ... ... ... ... ... ... ... ... ... \n",
"142 7.51 2 Male No Thu 점심 2 Small Thu 점심 \n",
"143 7.25 1 Female No Sat 저녁 1 Small Sat 저녁 \n",
"144 7.25 5 Male Yes Sun 저녁 2 Small Sun 저녁 \n",
"145 5.75 1 Female Yes Fri 저녁 2 Small Fri 저녁 \n",
"146 3.07 1 Female Yes Sat 저녁 1 Small Sat 저녁 \n",
"\n",
" tip_lag tip_lag2 total_bill_avg size_count size_ratio \n",
"0 1.01 1.01 16.990000 119 80.95 \n",
"1 1.01 1.01 10.340000 119 80.95 \n",
"2 1.66 1.66 16.113333 6 4.08 \n",
"3 3.50 3.50 18.343333 119 80.95 \n",
"4 3.31 3.31 23.093333 119 80.95 \n",
".. ... ... ... ... ... \n",
"142 6.70 6.70 30.986667 119 80.95 \n",
"143 5.00 5.00 34.180000 4 2.72 \n",
"144 5.00 5.00 28.223333 119 80.95 \n",
"145 2.30 2.30 17.276667 119 80.95 \n",
"146 1.50 1.50 14.473333 4 2.72 \n",
"\n",
"[147 rows x 14 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 같은 코드, 주석 위치 변경\n",
"\n",
"df_new = (\n",
" df\n",
" # [1] total_bill 20 이상만 필터링\n",
" .loc[df[\"total_bill\"] <= 20]\n",
"\n",
" # [2] time 열의 'Dinner', 'Lunch' 값을 \"저녁\", \"점심\"으로 변환\n",
" .assign(time=lambda x : x[\"time\"].replace(\"Dinner\", \"저녁\").replace(\"Lunch\", \"점심\"))\n",
"\n",
" # [3] tip을 정수형으로\n",
" .assign(tip=df[\"tip\"].astype(int))\n",
"\n",
" # [4] size는 [1,2,3,4,5,6] 6개 값인데, 이 값을 123, 456으로 구분하여 [\"Small\", \"Large\"]로 변경하고, new_size 라는 컬럼으로 생성\n",
" .assign(new_size=lambda x : x[\"size\"].apply(lambda y : \"Small\" if y <= 3 else \"Large\"))\n",
"\n",
" # [5] 요일 이름을 3글자로만 사용. 예를 들어 Sun은 그대로 Sun이지만, Thur은 Thu로 변경하기.\n",
" .assign(day=lambda x : x[\"day\"].str[:3])\n",
"\n",
" # [6] day 열 이름을 \"요일\"로 변경 : assign이 아닌 rename 사용.\n",
" .rename(columns={\"day\":\"요일\"})\n",
"\n",
" # [7] 요일과 time을 합친 new_time 컬럼 만들기 (예) Sat 저녁)\n",
" .assign(new_time=lambda x : x[\"요일\"].astype(str) + \" \" + x[\"time\"].astype(str))\n",
" \n",
" # [8] total_bill 값 내림차순으로 정렬하기\n",
" .sort_values(by=\"total_bill\", ascending=False)\n",
"\n",
" # [9] 인덱스 초기화, drop=True 파라미터 추가하기.\n",
" .reset_index(drop=True)\n",
" \n",
" # [10] tip_lag : tip에 대해 shift(1) 만큼의 lag을 만들고, nan 값은 초기값으로 대체하기. fill_value 파라미터 사용하기.\n",
" .assign(tip_lag=df[\"tip\"].shift(1, fill_value=df[\"tip\"][0]))\n",
"\n",
" # [11] tip_lag : tip에 대해 shift(1) 만큼의 lag을 만들고, nan 값은 초기값으로 대체하기. fillna 사용하기.\n",
" .assign(tip_lag2=df[\"tip\"].shift(1).fillna(df[\"tip\"]))\n",
" \n",
" # [12] total_bill_avg : total_bill에 대해 길이 3으로 rolling mean 계산하기. NaN값을 기존 total_bill 값으로 채우기(fillna)\n",
" .assign(total_bill_avg=df[\"total_bill\"].rolling(window=3).mean().fillna(df[\"total_bill\"]))\n",
" \n",
" # [13] size_count : 엑셀 countif 함수처럼, 각 행에서 size 열의 값 원소 개수를 세기.\n",
" .assign(size_count=lambda x : x[\"size\"].apply(lambda y : x[\"size\"].value_counts()[y]))\n",
" \n",
" # [14] size_ratio : size_count와 마찬가지로 계산하되, normalize=True를 써서 비율로 나타내기. round(2)를 써서 소수점 둘째 자리 수까지 나타내기.\n",
" .assign(size_ratio=lambda x : x[\"size\"].apply(lambda y : x[\"size\"].value_counts(normalize=True)[y] * 100).round(2))\n",
")\n",
"\n",
"df_new"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment