Skip to content

Instantly share code, notes, and snippets.

@terapyon
Last active February 23, 2018 11:24
Show Gist options
  • Save terapyon/14b00d1d41c4053c7d38980679c48307 to your computer and use it in GitHub Desktop.
Save terapyon/14b00d1d41c4053c7d38980679c48307 to your computer and use it in GitHub Desktop.
アイスクリーム売上相関
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"scikit-learn を使ってアイスクリームの売上予測"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_pickle(\"アイスクリーム売上データ2016.pickle\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>気温</th>\n",
" <th>売上</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1月</th>\n",
" <td>10.600000</td>\n",
" <td>464.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2月</th>\n",
" <td>12.200000</td>\n",
" <td>397.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3月</th>\n",
" <td>14.900000</td>\n",
" <td>493.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4月</th>\n",
" <td>20.299999</td>\n",
" <td>617.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5月</th>\n",
" <td>25.200001</td>\n",
" <td>890.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6月</th>\n",
" <td>26.299999</td>\n",
" <td>883.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7月</th>\n",
" <td>29.700001</td>\n",
" <td>1292.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8月</th>\n",
" <td>31.600000</td>\n",
" <td>1387.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9月</th>\n",
" <td>27.700001</td>\n",
" <td>843.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10月</th>\n",
" <td>22.600000</td>\n",
" <td>621.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11月</th>\n",
" <td>15.500000</td>\n",
" <td>459.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12月</th>\n",
" <td>13.800000</td>\n",
" <td>561.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 気温 売上\n",
"1月 10.600000 464.0\n",
"2月 12.200000 397.0\n",
"3月 14.900000 493.0\n",
"4月 20.299999 617.0\n",
"5月 25.200001 890.0\n",
"6月 26.299999 883.0\n",
"7月 29.700001 1292.0\n",
"8月 31.600000 1387.0\n",
"9月 27.700001 843.0\n",
"10月 22.600000 621.0\n",
"11月 15.500000 459.0\n",
"12月 13.800000 561.0"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"X = df.loc[:, [\"気温\"]]\n",
"y = df.loc[:, \"売上\"]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>気温</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1月</th>\n",
" <td>10.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2月</th>\n",
" <td>12.200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3月</th>\n",
" <td>14.900000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4月</th>\n",
" <td>20.299999</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5月</th>\n",
" <td>25.200001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6月</th>\n",
" <td>26.299999</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7月</th>\n",
" <td>29.700001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8月</th>\n",
" <td>31.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9月</th>\n",
" <td>27.700001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10月</th>\n",
" <td>22.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11月</th>\n",
" <td>15.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12月</th>\n",
" <td>13.800000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 気温\n",
"1月 10.600000\n",
"2月 12.200000\n",
"3月 14.900000\n",
"4月 20.299999\n",
"5月 25.200001\n",
"6月 26.299999\n",
"7月 29.700001\n",
"8月 31.600000\n",
"9月 27.700001\n",
"10月 22.600000\n",
"11月 15.500000\n",
"12月 13.800000"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1月 464.0\n",
"2月 397.0\n",
"3月 493.0\n",
"4月 617.0\n",
"5月 890.0\n",
"6月 883.0\n",
"7月 1292.0\n",
"8月 1387.0\n",
"9月 843.0\n",
"10月 621.0\n",
"11月 459.0\n",
"12月 561.0\n",
"Name: 売上, dtype: float64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"model = LinearRegression()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/terapyon/dev/misc/odyssey/env36/lib/python3.6/site-packages/scipy/linalg/basic.py:1226: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n",
" warnings.warn(mesg, RuntimeWarning)\n"
]
},
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.8289307709324654"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.score(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"40.70161707825133\n",
"-107.05708154165598\n"
]
}
],
"source": [
"D = model.coef_[0]\n",
"print(D)\n",
"C = model.intercept_\n",
"print(C)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Line: b = 40.702t -107.057\n"
]
}
],
"source": [
"print(\"Line: b = {:.3f}t {:+.3f}\".format(D, C))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment