Skip to content

Instantly share code, notes, and snippets.

@denismaster
Last active November 9, 2017 05:08
Show Gist options
  • Save denismaster/97b0bb066e989115d3883246cd0bcd36 to your computer and use it in GitHub Desktop.
Save denismaster/97b0bb066e989115d3883246cd0bcd36 to your computer and use it in GitHub Desktop.
CourseProj
# Анализ популярности операционных систем Unix
# Автор: denismaster
# Github: denismaster
# Лицензия: MIT
# In[1]:
# Импорт библиотек для работы с данными
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6
# Функция загрузки файла
def loadFile(filename, dateCol):
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m')
data = pd.read_csv(filename,index_col=dateCol, parse_dates=[0],date_parser=dateparse)
print('\n Типы данных:')
print(data.dtypes)
print(data.head())
return data
def prepareData(data):
data['Unix']=data['Linux Mint']+data['Steam OS']+data['Ubuntu']
ts = data['Unix']
return ts
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
#Determing rolling statistics
rolmean = timeseries.rolling(window=12).mean()
rolstd = timeseries.rolling(window=12).std()
#Plot rolling statistics:
orig = plt.plot(timeseries, color='blue',label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling mean')
std = plt.plot(rolstd, color='black', label = 'Standard Deviation')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show(block=False)
#Perform Dickey-Fuller test:
print('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print(dfoutput)
if dftest[0]> dftest[4]['5%']:
print('есть единичные корни, ряд не стационарен')
else:
print('единичных корней нет, ряд стационарен')
data = loadFile('data.csv', 'Date')
ts= prepareData(data)
ts.head(10)
plt.plot(ts)
plt.show()
test_stationarity(ts)
ts_log = np.log(ts)
plt.plot(ts_log)
plt.show()
moving_avg = ts_log.rolling(12).mean()
plt.plot(ts_log)
plt.plot(moving_avg, color='red')
plt.show()
ts_log_moving_avg_diff = ts_log - moving_avg
ts_log_moving_avg_diff.head(12)
plt.show()
ts_log_moving_avg_diff.dropna(inplace=True)
test_stationarity(ts_log_moving_avg_diff)
expwighted_avg = ts_log.ewm(halflife=12).mean()
plt.plot(ts_log)
plt.plot(expwighted_avg, color='red')
plt.show()
ts_log_ewma_diff = ts_log - expwighted_avg
test_stationarity(ts_log_ewma_diff)
ts_log_diff = ts_log - ts_log.shift()
plt.plot(ts_log_diff)
plt.show()
ts_log_diff.dropna(inplace=True)
test_stationarity(ts_log_diff)
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(ts_log)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
plt.subplot(411)
plt.plot(ts_log, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
plt.show()
ts_log_decompose = residual
ts_log_decompose.dropna(inplace=True)
test_stationarity(ts_log_decompose)
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(ts_log_diff.values.squeeze(), lags=25)
plot_pacf(ts_log_diff, lags=25)
plt.show()
#ACF and PACF plots:
from statsmodels.tsa.stattools import acf, pacf
lag_acf = acf(ts_log_diff, nlags=20)
lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')
#Plot ACF:
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.title('Autocorrelation Function')
plt.show()
#Plot PACF:
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()
plt.show()
from statsmodels.tsa.arima_model import ARIMA
model = ARIMA(ts_log, order=(2, 1, 0))
results_AR = model.fit(disp=-1)
plt.plot(ts_log_diff)
plt.plot(results_AR.fittedvalues, color='red')
plt.title('RSS: %.4f'% sum((results_AR.fittedvalues-ts_log_diff)**2))
plt.show()
model = ARIMA(ts_log, order=(0, 1, 2))
results_MA = model.fit(disp=-1)
plt.plot(ts_log_diff)
plt.plot(results_MA.fittedvalues, color='red')
plt.title('RSS: %.4f'% sum((results_MA.fittedvalues-ts_log_diff)**2))
plt.show()
model = ARIMA(ts_log, order=(2, 1, 2))
results_ARIMA = model.fit(disp=-1)
plt.plot(ts_log_diff)
plt.plot(results_ARIMA.fittedvalues, color='red')
plt.title('RSS: %.4f'% sum((results_ARIMA.fittedvalues-ts_log_diff)**2))
plt.show()
predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy=True)
predictions_ARIMA_diff.head()
future = results_ARIMA.predict('2017-10-01', '2018-01-01')
future.head()
pred = predictions_ARIMA_diff.append(future)
predictions_ARIMA_diff_cumsum = pred.cumsum() #predictions_ARIMA_diff.cumsum()
predictions_ARIMA_diff_cumsum.head()
predictions_ARIMA_log = pd.Series(ts_log.iloc[0], index=pred.index)
predictions_ARIMA_log = predictions_ARIMA_log.add(predictions_ARIMA_diff_cumsum,fill_value=0)
predictions_ARIMA_log.head()
predictions_ARIMA = np.exp(predictions_ARIMA_log)
plt.plot(ts)
plt.plot(predictions_ARIMA)
plt.title('RMSE: %.4f'% np.sqrt(sum((predictions_ARIMA-ts)**2)/len(predictions_ARIMA)))
plt.show()
predictionRange = predictions_ARIMA.loc['2017-10-01':'2018-01-01']
print(predictionRange.head())
std = predictionRange.std()
print('Dispersion %.4f:'% np.sqrt(std))
Date Windows OS X Linux Unknown iOS Chrome OS Android Playstation Other
2009-01 95.42 3.68 0.64 0.17 0 0 0 0.08 0.02
2009-02 95.39 3.76 0.62 0.14 0 0 0 0.07 0.02
2009-03 95.22 3.87 0.65 0.16 0 0 0 0.08 0.02
2009-04 95.13 3.92 0.66 0.17 0 0 0 0.1 0.02
2009-05 95.25 3.75 0.65 0.24 0 0 0 0.09 0.02
2009-06 94.76 4.07 0.74 0.28 0 0 0 0.11 0.03
2009-07 94.83 4.12 0.76 0.13 0 0 0 0.14 0.02
2009-08 94.69 4.35 0.69 0.1 0 0 0 0.15 0.02
2009-09 94.61 4.44 0.7 0.1 0 0 0 0.12 0.03
2009-10 94.35 4.71 0.68 0.1 0 0 0 0.13 0.02
2009-11 93.98 4.96 0.72 0.17 0 0 0 0.14 0.03
2009-12 94.19 4.72 0.68 0.23 0 0 0 0.15 0.02
2010-01 93.76 5.16 0.7 0.2 0 0 0 0.15 0.02
2010-02 93.36 5.63 0.74 0.11 0 0 0 0.13 0.03
2010-03 93.17 5.78 0.78 0.11 0 0 0 0.13 0.03
2010-04 92.96 5.92 0.79 0.15 0 0 0 0.14 0.03
2010-05 93.04 5.77 0.81 0.18 0.03 0 0 0.14 0.03
2010-06 93.27 5.56 0.8 0.08 0.1 0 0 0.15 0.03
2010-07 93.29 5.53 0.77 0.08 0.14 0 0 0.15 0.03
2010-08 93.11 5.66 0.79 0.08 0.18 0 0 0.15 0.03
2010-09 92.88 5.9 0.78 0.08 0.2 0 0 0.14 0.02
2010-10 92.72 6.05 0.77 0.07 0.23 0 0 0.14 0.02
2010-11 92.51 6.22 0.78 0.07 0.26 0 0 0.13 0.02
2010-12 92.42 6.25 0.75 0.08 0.33 0 0 0.15 0.02
2011-01 92.02 6.56 0.74 0.07 0.44 0 0.01 0.15 0.02
2011-02 91.98 6.59 0.76 0.07 0.43 0 0.01 0.14 0.02
2011-03 92.01 6.53 0.76 0.07 0.46 0 0.02 0.13 0.02
2011-04 91.98 6.47 0.76 0.07 0.53 0 0.03 0.14 0.02
2011-05 91.95 6.35 0.81 0.08 0.61 0 0.04 0.15 0.02
2011-06 91.94 6.27 0.79 0.08 0.7 0 0.05 0.14 0.02
2011-07 91.87 6.23 0.76 0.07 0.83 0 0.07 0.15 0.03
2011-08 91.75 6.28 0.78 0.04 0.9 0 0.08 0.15 0.02
2011-09 91.11 6.78 0.8 0.06 1 0 0.08 0.15 0.02
2011-10 90.47 7.18 0.84 0.07 1.12 0 0.14 0.15 0.03
2011-11 90.5 7.05 0.84 0.04 1.24 0 0.15 0.14 0.04
2011-12 90.29 7.01 0.83 0.07 1.41 0 0.19 0.15 0.06
2012-01 89.62 7.33 0.82 0.07 1.71 0 0.24 0.14 0.07
2012-02 89.5 7.41 0.83 0.08 1.81 0 0.26 0.04 0.07
2012-03 89.37 7.32 0.83 0.14 1.89 0 0.28 0.06 0.1
2012-04 88.75 7.66 0.85 0.05 2.12 0 0.31 0.12 0.14
2012-05 88.81 7.45 0.86 0.05 2.24 0 0.33 0.12 0.14
2012-06 88.89 7.05 0.83 0.05 2.4 0 0.37 0.13 0.26
2012-07 88.85 6.92 0.83 0.06 2.64 0 0.4 0.13 0.16
2012-08 91.54 7.41 0.85 0.06 0 0.01 0.03 0 0.11
2012-09 91.18 7.75 0.88 0.06 0 0.01 0.02 0 0.11
2012-10 91.04 7.87 0.9 0.06 0 0.01 0.03 0 0.09
2012-11 91.02 7.86 0.9 0.06 0 0.01 0.05 0 0.09
2012-12 91.22 7.69 0.88 0.07 0 0.01 0.09 0 0.04
2013-01 90.96 7.95 0.88 0.07 0 0.01 0.12 0 0
2013-02 90.87 7.95 0.93 0.07 0 0.02 0.15 0 0.01
2013-03 91.16 7.7 0.95 0.08 0 0.02 0.08 0 0.01
2013-04 91.28 7.47 1.05 0.08 0 0.02 0.1 0 0
2013-05 90.84 7.61 1.28 0.09 0 0.02 0.15 0 0
2013-06 90.43 7.87 1.3 0.12 0 0.02 0.25 0 0.01
2013-07 90.49 7.81 1.23 0.08 0 0.03 0.35 0 0
2013-08 90.56 7.63 1.23 0.09 0 0.03 0.46 0 0
2013-09 90.89 7.41 1.02 0.1 0 0.04 0.54 0 0
2013-10 90.63 7.42 1.1 0.1 0 0.04 0.71 0 0
2013-11 90.34 7.45 1.15 0.11 0 0.05 0.9 0 0
2013-12 89.55 7.83 1.13 0.08 0 0.1 1.3 0 0.01
2014-01 88.87 8.35 1.13 0.05 0 0.14 1.45 0 0
2014-02 89.65 8.39 1.14 0.26 0 0.16 0.38 0 0
2014-03 89.61 8.58 1.16 0.47 0 0.18 0 0 0
2014-04 89.18 8.85 1.34 0.44 0 0.19 0 0 0
2014-05 88.83 8.85 1.58 0.54 0 0.19 0 0 0
2014-06 89.27 8.56 1.37 0.61 0 0.19 0 0 0
2014-07 89.25 8.59 1.34 0.63 0 0.19 0 0 0
2014-08 89.15 8.65 1.38 0.61 0 0.21 0 0 0
2014-09 88.49 9.15 1.39 0.72 0 0.25 0 0 0
2014-10 88.28 9.13 1.41 0.9 0 0.27 0 0 0
2014-11 88.47 8.98 1.33 0.93 0 0.28 0 0 0
2014-12 88.74 8.67 1.39 0.91 0 0.29 0 0 0
2015-01 88.19 9.1 1.46 0.91 0 0.33 0 0 0.01
2015-02 88.08 9.09 1.55 0.94 0 0.34 0 0 0.01
2015-03 86.99 9.83 1.82 1 0 0.34 0 0 0.01
2015-04 87 9.61 1.91 1.12 0 0.35 0 0 0.01
2015-05 86.09 10.51 1.77 1.23 0 0.38 0 0 0.02
2015-06 86.3 10.17 1.77 1.46 0 0.29 0 0 0.02
2015-07 87.7 8.6 1.52 1.9 0 0.26 0 0 0.01
2015-08 88.07 8.18 1.5 1.94 0 0.3 0 0 0.01
2015-09 87.48 8.63 1.55 1.93 0 0.41 0 0 0.01
2015-10 86.44 9.02 1.57 2.51 0 0.46 0 0 0.01
2015-11 85.84 9.36 1.5 2.81 0 0.47 0 0 0.01
2015-12 84.89 9.8 1.48 3.31 0 0.51 0 0 0.01
2016-01 85.18 9.03 1.47 3.8 0 0.51 0 0 0.01
2016-02 84.82 9.33 1.47 3.83 0 0.55 0 0 0.01
2016-03 85.89 9.38 1.45 2.74 0 0.53 0 0 0.01
2016-04 85.3 9.52 1.55 3.06 0 0.55 0 0 0.01
2016-05 84.54 9.83 1.44 3.59 0 0.59 0 0 0.01
2016-06 84.1 9.95 1.47 4.06 0 0.41 0 0 0.01
2016-07 83.33 9.61 1.54 5.16 0 0.35 0 0 0.01
2016-08 82.45 9.81 1.52 5.81 0 0.41 0 0 0.01
2016-09 81.34 10.09 1.49 6.42 0 0.65 0 0 0.01
2016-10 80.84 10.88 1.44 6.14 0 0.7 0 0 0
2016-11 83.12 11.15 1.53 3.45 0 0.75 0 0 0.01
2016-12 84.27 11 1.57 2.42 0 0.74 0 0 0.01
2017-01 84.4 11.2 1.55 2.07 0 0.77 0 0 0.01
2017-02 84.14 11.6 1.53 1.9 0 0.83 0 0 0.01
2017-03 84.34 11.68 1.54 1.59 0 0.84 0 0 0.01
2017-04 84.22 11.61 1.68 1.73 0 0.75 0 0 0.01
2017-05 83.92 11.76 1.66 1.83 0 0.82 0 0 0.01
2017-06 84.32 11.59 1.74 1.78 0 0.55 0 0 0.01
2017-07 84.46 11.32 1.79 1.97 0 0.45 0 0 0.01
2017-08 83.53 11.95 1.79 2.15 0 0.56 0 0 0.01
2017-09 83.28 12.15 1.66 2.06 0 0.84 0 0 0.01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment