Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
%matplotlib inline
df = pd.read_csv("downloaded_japan_population_utf8.csv")
df_year = pd.pivot_table(df, index="集計年", values="総人口(人)", aggfunc=sum)
# 1960年から2018年までの東京都の総人口数データを機械学習にかける。
# ある年の総人口数を説明変数X、その翌年の総人口数を目的変数yに設定する
# (Xとyの両方とも、縦59行×横1列のndarrayで作成する)
X = np.empty((59, 1), dtype=np.uint32)
y = np.empty((59, 1), dtype=np.uint32)
for i in range(59):
X[i, 0] = df_year.iloc[i, 0]
y[i, 0] = df_year.iloc[i+1, 0]
X_train = X[:53]
X_test = X[53:]
y_train = y[:53]
y_test = y[53:]
model = LinearRegression(), y_train)
y_pred = model.predict(X_test)
y_pred = y_pred.astype(np.uint32)
y_pred_gr = np.concatenate([y_train, y_pred])
plt.plot(range(59), y_pred_gr, label="Predicted", color="red")
plt.plot(range(59), y, label="Actual", color="blue")
plt.title("Japan's population")
plt.legend(loc = "upper left")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.