terapyon/IcecreamSalesCorr.ipynb

## IcecreamSalesCorr.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              IcecreamSalesCorr.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## IcecreamSalesML.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "scikit-learn を使ってアイスクリームの売上予測"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_pickle(\"アイスクリーム売上データ2016.pickle\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>気温</th>\n",
       "      <th>売上</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1月</th>\n",
       "      <td>10.600000</td>\n",
       "      <td>464.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2月</th>\n",
       "      <td>12.200000</td>\n",
       "      <td>397.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3月</th>\n",
       "      <td>14.900000</td>\n",
       "      <td>493.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4月</th>\n",
       "      <td>20.299999</td>\n",
       "      <td>617.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5月</th>\n",
       "      <td>25.200001</td>\n",
       "      <td>890.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6月</th>\n",
       "      <td>26.299999</td>\n",
       "      <td>883.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7月</th>\n",
       "      <td>29.700001</td>\n",
       "      <td>1292.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8月</th>\n",
       "      <td>31.600000</td>\n",
       "      <td>1387.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9月</th>\n",
       "      <td>27.700001</td>\n",
       "      <td>843.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10月</th>\n",
       "      <td>22.600000</td>\n",
       "      <td>621.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11月</th>\n",
       "      <td>15.500000</td>\n",
       "      <td>459.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12月</th>\n",
       "      <td>13.800000</td>\n",
       "      <td>561.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            気温      売上\n",
       "1月   10.600000   464.0\n",
       "2月   12.200000   397.0\n",
       "3月   14.900000   493.0\n",
       "4月   20.299999   617.0\n",
       "5月   25.200001   890.0\n",
       "6月   26.299999   883.0\n",
       "7月   29.700001  1292.0\n",
       "8月   31.600000  1387.0\n",
       "9月   27.700001   843.0\n",
       "10月  22.600000   621.0\n",
       "11月  15.500000   459.0\n",
       "12月  13.800000   561.0"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df.loc[:, [\"気温\"]]\n",
    "y = df.loc[:, \"売上\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>気温</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1月</th>\n",
       "      <td>10.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2月</th>\n",
       "      <td>12.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3月</th>\n",
       "      <td>14.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4月</th>\n",
       "      <td>20.299999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5月</th>\n",
       "      <td>25.200001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6月</th>\n",
       "      <td>26.299999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7月</th>\n",
       "      <td>29.700001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8月</th>\n",
       "      <td>31.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9月</th>\n",
       "      <td>27.700001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10月</th>\n",
       "      <td>22.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11月</th>\n",
       "      <td>15.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12月</th>\n",
       "      <td>13.800000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            気温\n",
       "1月   10.600000\n",
       "2月   12.200000\n",
       "3月   14.900000\n",
       "4月   20.299999\n",
       "5月   25.200001\n",
       "6月   26.299999\n",
       "7月   29.700001\n",
       "8月   31.600000\n",
       "9月   27.700001\n",
       "10月  22.600000\n",
       "11月  15.500000\n",
       "12月  13.800000"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1月      464.0\n",
       "2月      397.0\n",
       "3月      493.0\n",
       "4月      617.0\n",
       "5月      890.0\n",
       "6月      883.0\n",
       "7月     1292.0\n",
       "8月     1387.0\n",
       "9月      843.0\n",
       "10月     621.0\n",
       "11月     459.0\n",
       "12月     561.0\n",
       "Name: 売上, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/terapyon/dev/misc/odyssey/env36/lib/python3.6/site-packages/scipy/linalg/basic.py:1226: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n",
      "  warnings.warn(mesg, RuntimeWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8289307709324654"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.score(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "40.70161707825133\n",
      "-107.05708154165598\n"
     ]
    }
   ],
   "source": [
    "D = model.coef_[0]\n",
    "print(D)\n",
    "C = model.intercept_\n",
    "print(C)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Line: b = 40.702t -107.057\n"
     ]
    }
   ],
   "source": [
    "print(\"Line: b = {:.3f}t {:+.3f}\".format(D, C))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"%matplotlib inline"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"import pandas as pd"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"scikit-learn を使ってアイスクリームの売上予測"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.read_pickle(\"アイスクリーム売上データ2016.pickle\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>気温</th>\n",
	" <th>売上</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>1月</th>\n",
	" <td>10.600000</td>\n",
	" <td>464.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2月</th>\n",
	" <td>12.200000</td>\n",
	" <td>397.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3月</th>\n",
	" <td>14.900000</td>\n",
	" <td>493.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4月</th>\n",
	" <td>20.299999</td>\n",
	" <td>617.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5月</th>\n",
	" <td>25.200001</td>\n",
	" <td>890.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6月</th>\n",
	" <td>26.299999</td>\n",
	" <td>883.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7月</th>\n",
	" <td>29.700001</td>\n",
	" <td>1292.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8月</th>\n",
	" <td>31.600000</td>\n",
	" <td>1387.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9月</th>\n",
	" <td>27.700001</td>\n",
	" <td>843.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>10月</th>\n",
	" <td>22.600000</td>\n",
	" <td>621.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>11月</th>\n",
	" <td>15.500000</td>\n",
	" <td>459.0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>12月</th>\n",
	" <td>13.800000</td>\n",
	" <td>561.0</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" 気温売上\n",
	"1月 10.600000 464.0\n",
	"2月 12.200000 397.0\n",
	"3月 14.900000 493.0\n",
	"4月 20.299999 617.0\n",
	"5月 25.200001 890.0\n",
	"6月 26.299999 883.0\n",
	"7月 29.700001 1292.0\n",
	"8月 31.600000 1387.0\n",
	"9月 27.700001 843.0\n",
	"10月 22.600000 621.0\n",
	"11月 15.500000 459.0\n",
	"12月 13.800000 561.0"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"from sklearn.linear_model import LinearRegression"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"X = df.loc[:, [\"気温\"]]\n",
	"y = df.loc[:, \"売上\"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>気温</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>1月</th>\n",
	" <td>10.600000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2月</th>\n",
	" <td>12.200000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3月</th>\n",
	" <td>14.900000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4月</th>\n",
	" <td>20.299999</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5月</th>\n",
	" <td>25.200001</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6月</th>\n",
	" <td>26.299999</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7月</th>\n",
	" <td>29.700001</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8月</th>\n",
	" <td>31.600000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9月</th>\n",
	" <td>27.700001</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>10月</th>\n",
	" <td>22.600000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>11月</th>\n",
	" <td>15.500000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>12月</th>\n",
	" <td>13.800000</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" 気温\n",
	"1月 10.600000\n",
	"2月 12.200000\n",
	"3月 14.900000\n",
	"4月 20.299999\n",
	"5月 25.200001\n",
	"6月 26.299999\n",
	"7月 29.700001\n",
	"8月 31.600000\n",
	"9月 27.700001\n",
	"10月 22.600000\n",
	"11月 15.500000\n",
	"12月 13.800000"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"X"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1月 464.0\n",
	"2月 397.0\n",
	"3月 493.0\n",
	"4月 617.0\n",
	"5月 890.0\n",
	"6月 883.0\n",
	"7月 1292.0\n",
	"8月 1387.0\n",
	"9月 843.0\n",
	"10月 621.0\n",
	"11月 459.0\n",
	"12月 561.0\n",
	"Name: 売上, dtype: float64"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"y"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"model = LinearRegression()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/terapyon/dev/misc/odyssey/env36/lib/python3.6/site-packages/scipy/linalg/basic.py:1226: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n",
	" warnings.warn(mesg, RuntimeWarning)\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"model.fit(X, y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.8289307709324654"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"model.score(X, y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"40.70161707825133\n",
	"-107.05708154165598\n"
	]
	}
	],
	"source": [
	"D = model.coef_[0]\n",
	"print(D)\n",
	"C = model.intercept_\n",
	"print(C)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Line: b = 40.702t -107.057\n"
	]
	}
	],
	"source": [
	"print(\"Line: b = {:.3f}t {:+.3f}\".format(D, C))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}