Skip to content

Instantly share code, notes, and snippets.

@ImagawaKosuke
Created November 7, 2022 06:56
Show Gist options
  • Save ImagawaKosuke/99fcba7cc6a5ead0e30284500d1e0fb4 to your computer and use it in GitHub Desktop.
Save ImagawaKosuke/99fcba7cc6a5ead0e30284500d1e0fb4 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0e2cd3ce",
"metadata": {},
"outputs": [],
"source": [
"#ライブラリのインポート\n",
"import numpy as np\n",
"from matplotlib import pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f9b96a90",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'C:\\\\Users\\\\kosuk\\\\デスクトップ\\\\competition\\\\FirstComp'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"current_dir = os.getcwd()\n",
"current_dir"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "254d294e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>mpg</th>\n",
" <th>cylinders</th>\n",
" <th>displacement</th>\n",
" <th>horsepower</th>\n",
" <th>weight</th>\n",
" <th>acceleration</th>\n",
" <th>model year</th>\n",
" <th>origin</th>\n",
" <th>car name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>29.0</td>\n",
" <td>4</td>\n",
" <td>135.0</td>\n",
" <td>84.00</td>\n",
" <td>2525.0</td>\n",
" <td>16.0</td>\n",
" <td>82</td>\n",
" <td>1</td>\n",
" <td>dodge aries se</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>31.9</td>\n",
" <td>4</td>\n",
" <td>89.0</td>\n",
" <td>71.00</td>\n",
" <td>1925.0</td>\n",
" <td>14.0</td>\n",
" <td>79</td>\n",
" <td>2</td>\n",
" <td>vw rabbit custom</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9</td>\n",
" <td>19.0</td>\n",
" <td>6</td>\n",
" <td>156.0</td>\n",
" <td>108.0</td>\n",
" <td>2930.0</td>\n",
" <td>15.5</td>\n",
" <td>76</td>\n",
" <td>3</td>\n",
" <td>toyota mark ii</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>28.0</td>\n",
" <td>4</td>\n",
" <td>90.0</td>\n",
" <td>75.00</td>\n",
" <td>2125.0</td>\n",
" <td>14.5</td>\n",
" <td>74</td>\n",
" <td>1</td>\n",
" <td>dodge colt</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13</td>\n",
" <td>37.7</td>\n",
" <td>4</td>\n",
" <td>89.0</td>\n",
" <td>62.00</td>\n",
" <td>2050.0</td>\n",
" <td>17.3</td>\n",
" <td>81</td>\n",
" <td>3</td>\n",
" <td>toyota tercel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>194</th>\n",
" <td>384</td>\n",
" <td>40.8</td>\n",
" <td>4</td>\n",
" <td>85.0</td>\n",
" <td>65.00</td>\n",
" <td>2110.0</td>\n",
" <td>19.2</td>\n",
" <td>80</td>\n",
" <td>3</td>\n",
" <td>datsun 210</td>\n",
" </tr>\n",
" <tr>\n",
" <th>195</th>\n",
" <td>385</td>\n",
" <td>20.2</td>\n",
" <td>8</td>\n",
" <td>302.0</td>\n",
" <td>139.0</td>\n",
" <td>3570.0</td>\n",
" <td>12.8</td>\n",
" <td>78</td>\n",
" <td>1</td>\n",
" <td>mercury monarch ghia</td>\n",
" </tr>\n",
" <tr>\n",
" <th>196</th>\n",
" <td>387</td>\n",
" <td>16.0</td>\n",
" <td>8</td>\n",
" <td>304.0</td>\n",
" <td>150.0</td>\n",
" <td>3433.0</td>\n",
" <td>12.0</td>\n",
" <td>70</td>\n",
" <td>1</td>\n",
" <td>amc rebel sst</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>395</td>\n",
" <td>43.4</td>\n",
" <td>4</td>\n",
" <td>90.0</td>\n",
" <td>48.00</td>\n",
" <td>2335.0</td>\n",
" <td>23.7</td>\n",
" <td>80</td>\n",
" <td>2</td>\n",
" <td>vw dasher (diesel)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>396</td>\n",
" <td>26.0</td>\n",
" <td>4</td>\n",
" <td>98.0</td>\n",
" <td>90.00</td>\n",
" <td>2265.0</td>\n",
" <td>15.5</td>\n",
" <td>73</td>\n",
" <td>2</td>\n",
" <td>fiat 124 sport coupe</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>199 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" id mpg cylinders displacement horsepower weight acceleration \\\n",
"0 0 29.0 4 135.0 84.00 2525.0 16.0 \n",
"1 3 31.9 4 89.0 71.00 1925.0 14.0 \n",
"2 9 19.0 6 156.0 108.0 2930.0 15.5 \n",
"3 11 28.0 4 90.0 75.00 2125.0 14.5 \n",
"4 13 37.7 4 89.0 62.00 2050.0 17.3 \n",
".. ... ... ... ... ... ... ... \n",
"194 384 40.8 4 85.0 65.00 2110.0 19.2 \n",
"195 385 20.2 8 302.0 139.0 3570.0 12.8 \n",
"196 387 16.0 8 304.0 150.0 3433.0 12.0 \n",
"197 395 43.4 4 90.0 48.00 2335.0 23.7 \n",
"198 396 26.0 4 98.0 90.00 2265.0 15.5 \n",
"\n",
" model year origin car name \n",
"0 82 1 dodge aries se \n",
"1 79 2 vw rabbit custom \n",
"2 76 3 toyota mark ii \n",
"3 74 1 dodge colt \n",
"4 81 3 toyota tercel \n",
".. ... ... ... \n",
"194 80 3 datsun 210 \n",
"195 78 1 mercury monarch ghia \n",
"196 70 1 amc rebel sst \n",
"197 80 2 vw dasher (diesel) \n",
"198 73 2 fiat 124 sport coupe \n",
"\n",
"[199 rows x 10 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"data = os.path.join(current_dir,\"datas\")\n",
"m_train = pd.read_csv(os.path.join(data, \"train.tsv\"),delimiter='\\t')\n",
"m_train"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "3e5f899f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0\n",
"mpg 0\n",
"cylinders 0\n",
"displacement 0\n",
"horsepower 0\n",
"weight 0\n",
"acceleration 0\n",
"model year 0\n",
"origin 0\n",
"car name 0\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m_train.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7d09de55",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"mpg float64\n",
"cylinders int64\n",
"displacement float64\n",
"horsepower object\n",
"weight float64\n",
"acceleration float64\n",
"model year int64\n",
"origin int64\n",
"car name object\n",
"dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#データの型を確認\n",
"m_train.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "4a45e6cc",
"metadata": {},
"outputs": [],
"source": [
"#重回帰分析を行う\n",
"import seaborn as sns\n",
"from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ddab7ef3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.axisgrid.PairGrid at 0x2ecdd7bf640>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 2160x360 with 6 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#相関を求める\n",
"sns.pairplot(m_train, height=5, markers=\"+\", x_vars=[\"horsepower\", \"cylinders\", \"displacement\", \"weight\", \"acceleration\", \"model year\"], y_vars=[\"mpg\"])\n"
]
},
{
"cell_type": "markdown",
"id": "f48b7174",
"metadata": {},
"source": [
"排気量と重量、加速度、年式にガソリン1ガロンあたりの走行距離に相関があることが分かったので、説明変数として4つ使う。"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0ba74c42",
"metadata": {},
"outputs": [],
"source": [
"#新しくデータフレームを作成する\n",
"X = pd.DataFrame()\n",
"X['cylinders'] = m_train['cylinders']\n",
"X['displacement'] = m_train['displacement']\n",
"X['weight'] = m_train['weight']\n",
"X['model year'] = m_train['model year']\n",
"Y = m_train['mpg']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "a97fdab3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" cylinders displacement weight model year\n",
"0 4 135.0 2525.0 82\n",
"1 4 89.0 1925.0 79\n",
"2 6 156.0 2930.0 76\n",
"3 4 90.0 2125.0 74\n",
"4 4 89.0 2050.0 81\n",
".. ... ... ... ...\n",
"194 4 85.0 2110.0 80\n",
"195 8 302.0 3570.0 78\n",
"196 8 304.0 3433.0 70\n",
"197 4 90.0 2335.0 80\n",
"198 4 98.0 2265.0 73\n",
"\n",
"[199 rows x 4 columns] 0 29.0\n",
"1 31.9\n",
"2 19.0\n",
"3 28.0\n",
"4 37.7\n",
" ... \n",
"194 40.8\n",
"195 20.2\n",
"196 16.0\n",
"197 43.4\n",
"198 26.0\n",
"Name: mpg, Length: 199, dtype: float64\n"
]
}
],
"source": [
"print(X,Y)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "becaf559",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#傾きと切片を求める\n",
"linear_regression = LinearRegression()\n",
"linear_regression.fit(X,Y)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "087c3a1d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.23905993 -0.00154163 -0.00626588 0.76239545]\n"
]
}
],
"source": [
"#傾きの確認\n",
"A = linear_regression.coef_\n",
"print(A)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "b7a3929a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-14.142891763156591\n"
]
}
],
"source": [
"#切片の確認\n",
"B = linear_regression.intercept_\n",
"print(B)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "97240187",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>cylinders</th>\n",
" <th>displacement</th>\n",
" <th>horsepower</th>\n",
" <th>weight</th>\n",
" <th>acceleration</th>\n",
" <th>model year</th>\n",
" <th>origin</th>\n",
" <th>car name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>145.0</td>\n",
" <td>76.00</td>\n",
" <td>3160.0</td>\n",
" <td>19.6</td>\n",
" <td>81</td>\n",
" <td>2</td>\n",
" <td>volvo diesel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>250.0</td>\n",
" <td>98.00</td>\n",
" <td>3525.0</td>\n",
" <td>19.0</td>\n",
" <td>77</td>\n",
" <td>1</td>\n",
" <td>ford granada</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>119.0</td>\n",
" <td>92.00</td>\n",
" <td>2434.0</td>\n",
" <td>15.0</td>\n",
" <td>80</td>\n",
" <td>3</td>\n",
" <td>datsun 510 hatchback</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>258.0</td>\n",
" <td>110.0</td>\n",
" <td>2962.0</td>\n",
" <td>13.5</td>\n",
" <td>71</td>\n",
" <td>1</td>\n",
" <td>amc hornet sportabout (sw)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>97.0</td>\n",
" <td>88.00</td>\n",
" <td>2100.0</td>\n",
" <td>16.5</td>\n",
" <td>72</td>\n",
" <td>3</td>\n",
" <td>toyota corolla 1600 (sw)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>194</th>\n",
" <td>391</td>\n",
" <td>4</td>\n",
" <td>114.0</td>\n",
" <td>91.00</td>\n",
" <td>2582.0</td>\n",
" <td>14.0</td>\n",
" <td>73</td>\n",
" <td>2</td>\n",
" <td>audi 100ls</td>\n",
" </tr>\n",
" <tr>\n",
" <th>195</th>\n",
" <td>392</td>\n",
" <td>4</td>\n",
" <td>156.0</td>\n",
" <td>105.0</td>\n",
" <td>2800.0</td>\n",
" <td>14.4</td>\n",
" <td>80</td>\n",
" <td>1</td>\n",
" <td>dodge colt</td>\n",
" </tr>\n",
" <tr>\n",
" <th>196</th>\n",
" <td>393</td>\n",
" <td>4</td>\n",
" <td>111.0</td>\n",
" <td>80.00</td>\n",
" <td>2155.0</td>\n",
" <td>14.8</td>\n",
" <td>77</td>\n",
" <td>1</td>\n",
" <td>buick opel isuzu deluxe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>394</td>\n",
" <td>8</td>\n",
" <td>400.0</td>\n",
" <td>180.0</td>\n",
" <td>4220.0</td>\n",
" <td>11.1</td>\n",
" <td>77</td>\n",
" <td>1</td>\n",
" <td>pontiac grand prix lj</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>397</td>\n",
" <td>4</td>\n",
" <td>97.0</td>\n",
" <td>78.00</td>\n",
" <td>1940.0</td>\n",
" <td>14.5</td>\n",
" <td>77</td>\n",
" <td>2</td>\n",
" <td>volkswagen rabbit custom</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>199 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" id cylinders displacement horsepower weight acceleration \\\n",
"0 1 6 145.0 76.00 3160.0 19.6 \n",
"1 2 6 250.0 98.00 3525.0 19.0 \n",
"2 4 4 119.0 92.00 2434.0 15.0 \n",
"3 5 6 258.0 110.0 2962.0 13.5 \n",
"4 6 4 97.0 88.00 2100.0 16.5 \n",
".. ... ... ... ... ... ... \n",
"194 391 4 114.0 91.00 2582.0 14.0 \n",
"195 392 4 156.0 105.0 2800.0 14.4 \n",
"196 393 4 111.0 80.00 2155.0 14.8 \n",
"197 394 8 400.0 180.0 4220.0 11.1 \n",
"198 397 4 97.0 78.00 1940.0 14.5 \n",
"\n",
" model year origin car name \n",
"0 81 2 volvo diesel \n",
"1 77 1 ford granada \n",
"2 80 3 datsun 510 hatchback \n",
"3 71 1 amc hornet sportabout (sw) \n",
"4 72 3 toyota corolla 1600 (sw) \n",
".. ... ... ... \n",
"194 73 2 audi 100ls \n",
"195 80 1 dodge colt \n",
"196 77 1 buick opel isuzu deluxe \n",
"197 77 1 pontiac grand prix lj \n",
"198 77 2 volkswagen rabbit custom \n",
"\n",
"[199 rows x 9 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"data = os.path.join(current_dir,\"datas\")\n",
"m_test = pd.read_csv(os.path.join(data, \"test.tsv\"),delimiter='\\t')\n",
"m_test"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "ee2028dc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 26.153075\n",
"1 20.654577\n",
"2 30.457908\n",
"3 19.595560\n",
"4 26.485463\n",
" ... \n",
"194 24.201498\n",
"195 28.107557\n",
"196 29.931234\n",
"197 15.590429\n",
"198 31.299980\n",
"Name: mpg, Length: 199, dtype: float64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m_test['mpg'] = m_test['cylinders']*A[0] + m_test['displacement']*A[1] + m_test['weight']*A[2] + m_test['model year']*A[3] + B\n",
"m_test['mpg']"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "7f7cbd26",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'C:\\\\Users\\\\kosuk\\\\デスクトップ\\\\competition\\\\FirstComp\\\\Output'"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outdata = os.path.join(current_dir,\"Output\")\n",
"outdata"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "8938b590",
"metadata": {},
"outputs": [],
"source": [
"m_test[['id', 'mpg']].to_csv(os.path.join(outdata,'submit_imakoh.csv'), header=False, index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment