Last active
November 9, 2017 05:08
-
-
Save denismaster/97b0bb066e989115d3883246cd0bcd36 to your computer and use it in GitHub Desktop.
CourseProj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Анализ популярности операционных систем Unix | |
# Автор: denismaster | |
# Github: denismaster | |
# Лицензия: MIT | |
# In[1]: | |
# Импорт библиотек для работы с данными | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pylab as plt | |
from matplotlib.pylab import rcParams | |
rcParams['figure.figsize'] = 15, 6 | |
# Функция загрузки файла | |
def loadFile(filename, dateCol): | |
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m') | |
data = pd.read_csv(filename,index_col=dateCol, parse_dates=[0],date_parser=dateparse) | |
print('\n Типы данных:') | |
print(data.dtypes) | |
print(data.head()) | |
return data | |
def prepareData(data): | |
data['Unix']=data['Linux Mint']+data['Steam OS']+data['Ubuntu'] | |
ts = data['Unix'] | |
return ts | |
from statsmodels.tsa.stattools import adfuller | |
def test_stationarity(timeseries): | |
#Determing rolling statistics | |
rolmean = timeseries.rolling(window=12).mean() | |
rolstd = timeseries.rolling(window=12).std() | |
#Plot rolling statistics: | |
orig = plt.plot(timeseries, color='blue',label='Original') | |
mean = plt.plot(rolmean, color='red', label='Rolling mean') | |
std = plt.plot(rolstd, color='black', label = 'Standard Deviation') | |
plt.legend(loc='best') | |
plt.title('Rolling Mean & Standard Deviation') | |
plt.show(block=False) | |
#Perform Dickey-Fuller test: | |
print('Results of Dickey-Fuller Test:') | |
dftest = adfuller(timeseries, autolag='AIC') | |
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) | |
for key,value in dftest[4].items(): | |
dfoutput['Critical Value (%s)'%key] = value | |
print(dfoutput) | |
if dftest[0]> dftest[4]['5%']: | |
print('есть единичные корни, ряд не стационарен') | |
else: | |
print('единичных корней нет, ряд стационарен') | |
data = loadFile('data.csv', 'Date') | |
ts= prepareData(data) | |
ts.head(10) | |
plt.plot(ts) | |
plt.show() | |
test_stationarity(ts) | |
ts_log = np.log(ts) | |
plt.plot(ts_log) | |
plt.show() | |
moving_avg = ts_log.rolling(12).mean() | |
plt.plot(ts_log) | |
plt.plot(moving_avg, color='red') | |
plt.show() | |
ts_log_moving_avg_diff = ts_log - moving_avg | |
ts_log_moving_avg_diff.head(12) | |
plt.show() | |
ts_log_moving_avg_diff.dropna(inplace=True) | |
test_stationarity(ts_log_moving_avg_diff) | |
expwighted_avg = ts_log.ewm(halflife=12).mean() | |
plt.plot(ts_log) | |
plt.plot(expwighted_avg, color='red') | |
plt.show() | |
ts_log_ewma_diff = ts_log - expwighted_avg | |
test_stationarity(ts_log_ewma_diff) | |
ts_log_diff = ts_log - ts_log.shift() | |
plt.plot(ts_log_diff) | |
plt.show() | |
ts_log_diff.dropna(inplace=True) | |
test_stationarity(ts_log_diff) | |
from statsmodels.tsa.seasonal import seasonal_decompose | |
decomposition = seasonal_decompose(ts_log) | |
trend = decomposition.trend | |
seasonal = decomposition.seasonal | |
residual = decomposition.resid | |
plt.subplot(411) | |
plt.plot(ts_log, label='Original') | |
plt.legend(loc='best') | |
plt.subplot(412) | |
plt.plot(trend, label='Trend') | |
plt.legend(loc='best') | |
plt.subplot(413) | |
plt.plot(seasonal,label='Seasonality') | |
plt.legend(loc='best') | |
plt.subplot(414) | |
plt.plot(residual, label='Residuals') | |
plt.legend(loc='best') | |
plt.tight_layout() | |
plt.show() | |
ts_log_decompose = residual | |
ts_log_decompose.dropna(inplace=True) | |
test_stationarity(ts_log_decompose) | |
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf | |
plot_acf(ts_log_diff.values.squeeze(), lags=25) | |
plot_pacf(ts_log_diff, lags=25) | |
plt.show() | |
#ACF and PACF plots: | |
from statsmodels.tsa.stattools import acf, pacf | |
lag_acf = acf(ts_log_diff, nlags=20) | |
lag_pacf = pacf(ts_log_diff, nlags=20, method='ols') | |
#Plot ACF: | |
plt.subplot(121) | |
plt.plot(lag_acf) | |
plt.axhline(y=0,linestyle='--',color='gray') | |
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray') | |
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray') | |
plt.title('Autocorrelation Function') | |
plt.show() | |
#Plot PACF: | |
plt.subplot(122) | |
plt.plot(lag_pacf) | |
plt.axhline(y=0,linestyle='--',color='gray') | |
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray') | |
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray') | |
plt.title('Partial Autocorrelation Function') | |
plt.tight_layout() | |
plt.show() | |
from statsmodels.tsa.arima_model import ARIMA | |
model = ARIMA(ts_log, order=(2, 1, 0)) | |
results_AR = model.fit(disp=-1) | |
plt.plot(ts_log_diff) | |
plt.plot(results_AR.fittedvalues, color='red') | |
plt.title('RSS: %.4f'% sum((results_AR.fittedvalues-ts_log_diff)**2)) | |
plt.show() | |
model = ARIMA(ts_log, order=(0, 1, 2)) | |
results_MA = model.fit(disp=-1) | |
plt.plot(ts_log_diff) | |
plt.plot(results_MA.fittedvalues, color='red') | |
plt.title('RSS: %.4f'% sum((results_MA.fittedvalues-ts_log_diff)**2)) | |
plt.show() | |
model = ARIMA(ts_log, order=(2, 1, 2)) | |
results_ARIMA = model.fit(disp=-1) | |
plt.plot(ts_log_diff) | |
plt.plot(results_ARIMA.fittedvalues, color='red') | |
plt.title('RSS: %.4f'% sum((results_ARIMA.fittedvalues-ts_log_diff)**2)) | |
plt.show() | |
predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy=True) | |
predictions_ARIMA_diff.head() | |
future = results_ARIMA.predict('2017-10-01', '2018-01-01') | |
future.head() | |
pred = predictions_ARIMA_diff.append(future) | |
predictions_ARIMA_diff_cumsum = pred.cumsum() #predictions_ARIMA_diff.cumsum() | |
predictions_ARIMA_diff_cumsum.head() | |
predictions_ARIMA_log = pd.Series(ts_log.iloc[0], index=pred.index) | |
predictions_ARIMA_log = predictions_ARIMA_log.add(predictions_ARIMA_diff_cumsum,fill_value=0) | |
predictions_ARIMA_log.head() | |
predictions_ARIMA = np.exp(predictions_ARIMA_log) | |
plt.plot(ts) | |
plt.plot(predictions_ARIMA) | |
plt.title('RMSE: %.4f'% np.sqrt(sum((predictions_ARIMA-ts)**2)/len(predictions_ARIMA))) | |
plt.show() | |
predictionRange = predictions_ARIMA.loc['2017-10-01':'2018-01-01'] | |
print(predictionRange.head()) | |
std = predictionRange.std() | |
print('Dispersion %.4f:'% np.sqrt(std)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Date | Windows | OS X | Linux | Unknown | iOS | Chrome OS | Android | Playstation | Other | |
---|---|---|---|---|---|---|---|---|---|---|
2009-01 | 95.42 | 3.68 | 0.64 | 0.17 | 0 | 0 | 0 | 0.08 | 0.02 | |
2009-02 | 95.39 | 3.76 | 0.62 | 0.14 | 0 | 0 | 0 | 0.07 | 0.02 | |
2009-03 | 95.22 | 3.87 | 0.65 | 0.16 | 0 | 0 | 0 | 0.08 | 0.02 | |
2009-04 | 95.13 | 3.92 | 0.66 | 0.17 | 0 | 0 | 0 | 0.1 | 0.02 | |
2009-05 | 95.25 | 3.75 | 0.65 | 0.24 | 0 | 0 | 0 | 0.09 | 0.02 | |
2009-06 | 94.76 | 4.07 | 0.74 | 0.28 | 0 | 0 | 0 | 0.11 | 0.03 | |
2009-07 | 94.83 | 4.12 | 0.76 | 0.13 | 0 | 0 | 0 | 0.14 | 0.02 | |
2009-08 | 94.69 | 4.35 | 0.69 | 0.1 | 0 | 0 | 0 | 0.15 | 0.02 | |
2009-09 | 94.61 | 4.44 | 0.7 | 0.1 | 0 | 0 | 0 | 0.12 | 0.03 | |
2009-10 | 94.35 | 4.71 | 0.68 | 0.1 | 0 | 0 | 0 | 0.13 | 0.02 | |
2009-11 | 93.98 | 4.96 | 0.72 | 0.17 | 0 | 0 | 0 | 0.14 | 0.03 | |
2009-12 | 94.19 | 4.72 | 0.68 | 0.23 | 0 | 0 | 0 | 0.15 | 0.02 | |
2010-01 | 93.76 | 5.16 | 0.7 | 0.2 | 0 | 0 | 0 | 0.15 | 0.02 | |
2010-02 | 93.36 | 5.63 | 0.74 | 0.11 | 0 | 0 | 0 | 0.13 | 0.03 | |
2010-03 | 93.17 | 5.78 | 0.78 | 0.11 | 0 | 0 | 0 | 0.13 | 0.03 | |
2010-04 | 92.96 | 5.92 | 0.79 | 0.15 | 0 | 0 | 0 | 0.14 | 0.03 | |
2010-05 | 93.04 | 5.77 | 0.81 | 0.18 | 0.03 | 0 | 0 | 0.14 | 0.03 | |
2010-06 | 93.27 | 5.56 | 0.8 | 0.08 | 0.1 | 0 | 0 | 0.15 | 0.03 | |
2010-07 | 93.29 | 5.53 | 0.77 | 0.08 | 0.14 | 0 | 0 | 0.15 | 0.03 | |
2010-08 | 93.11 | 5.66 | 0.79 | 0.08 | 0.18 | 0 | 0 | 0.15 | 0.03 | |
2010-09 | 92.88 | 5.9 | 0.78 | 0.08 | 0.2 | 0 | 0 | 0.14 | 0.02 | |
2010-10 | 92.72 | 6.05 | 0.77 | 0.07 | 0.23 | 0 | 0 | 0.14 | 0.02 | |
2010-11 | 92.51 | 6.22 | 0.78 | 0.07 | 0.26 | 0 | 0 | 0.13 | 0.02 | |
2010-12 | 92.42 | 6.25 | 0.75 | 0.08 | 0.33 | 0 | 0 | 0.15 | 0.02 | |
2011-01 | 92.02 | 6.56 | 0.74 | 0.07 | 0.44 | 0 | 0.01 | 0.15 | 0.02 | |
2011-02 | 91.98 | 6.59 | 0.76 | 0.07 | 0.43 | 0 | 0.01 | 0.14 | 0.02 | |
2011-03 | 92.01 | 6.53 | 0.76 | 0.07 | 0.46 | 0 | 0.02 | 0.13 | 0.02 | |
2011-04 | 91.98 | 6.47 | 0.76 | 0.07 | 0.53 | 0 | 0.03 | 0.14 | 0.02 | |
2011-05 | 91.95 | 6.35 | 0.81 | 0.08 | 0.61 | 0 | 0.04 | 0.15 | 0.02 | |
2011-06 | 91.94 | 6.27 | 0.79 | 0.08 | 0.7 | 0 | 0.05 | 0.14 | 0.02 | |
2011-07 | 91.87 | 6.23 | 0.76 | 0.07 | 0.83 | 0 | 0.07 | 0.15 | 0.03 | |
2011-08 | 91.75 | 6.28 | 0.78 | 0.04 | 0.9 | 0 | 0.08 | 0.15 | 0.02 | |
2011-09 | 91.11 | 6.78 | 0.8 | 0.06 | 1 | 0 | 0.08 | 0.15 | 0.02 | |
2011-10 | 90.47 | 7.18 | 0.84 | 0.07 | 1.12 | 0 | 0.14 | 0.15 | 0.03 | |
2011-11 | 90.5 | 7.05 | 0.84 | 0.04 | 1.24 | 0 | 0.15 | 0.14 | 0.04 | |
2011-12 | 90.29 | 7.01 | 0.83 | 0.07 | 1.41 | 0 | 0.19 | 0.15 | 0.06 | |
2012-01 | 89.62 | 7.33 | 0.82 | 0.07 | 1.71 | 0 | 0.24 | 0.14 | 0.07 | |
2012-02 | 89.5 | 7.41 | 0.83 | 0.08 | 1.81 | 0 | 0.26 | 0.04 | 0.07 | |
2012-03 | 89.37 | 7.32 | 0.83 | 0.14 | 1.89 | 0 | 0.28 | 0.06 | 0.1 | |
2012-04 | 88.75 | 7.66 | 0.85 | 0.05 | 2.12 | 0 | 0.31 | 0.12 | 0.14 | |
2012-05 | 88.81 | 7.45 | 0.86 | 0.05 | 2.24 | 0 | 0.33 | 0.12 | 0.14 | |
2012-06 | 88.89 | 7.05 | 0.83 | 0.05 | 2.4 | 0 | 0.37 | 0.13 | 0.26 | |
2012-07 | 88.85 | 6.92 | 0.83 | 0.06 | 2.64 | 0 | 0.4 | 0.13 | 0.16 | |
2012-08 | 91.54 | 7.41 | 0.85 | 0.06 | 0 | 0.01 | 0.03 | 0 | 0.11 | |
2012-09 | 91.18 | 7.75 | 0.88 | 0.06 | 0 | 0.01 | 0.02 | 0 | 0.11 | |
2012-10 | 91.04 | 7.87 | 0.9 | 0.06 | 0 | 0.01 | 0.03 | 0 | 0.09 | |
2012-11 | 91.02 | 7.86 | 0.9 | 0.06 | 0 | 0.01 | 0.05 | 0 | 0.09 | |
2012-12 | 91.22 | 7.69 | 0.88 | 0.07 | 0 | 0.01 | 0.09 | 0 | 0.04 | |
2013-01 | 90.96 | 7.95 | 0.88 | 0.07 | 0 | 0.01 | 0.12 | 0 | 0 | |
2013-02 | 90.87 | 7.95 | 0.93 | 0.07 | 0 | 0.02 | 0.15 | 0 | 0.01 | |
2013-03 | 91.16 | 7.7 | 0.95 | 0.08 | 0 | 0.02 | 0.08 | 0 | 0.01 | |
2013-04 | 91.28 | 7.47 | 1.05 | 0.08 | 0 | 0.02 | 0.1 | 0 | 0 | |
2013-05 | 90.84 | 7.61 | 1.28 | 0.09 | 0 | 0.02 | 0.15 | 0 | 0 | |
2013-06 | 90.43 | 7.87 | 1.3 | 0.12 | 0 | 0.02 | 0.25 | 0 | 0.01 | |
2013-07 | 90.49 | 7.81 | 1.23 | 0.08 | 0 | 0.03 | 0.35 | 0 | 0 | |
2013-08 | 90.56 | 7.63 | 1.23 | 0.09 | 0 | 0.03 | 0.46 | 0 | 0 | |
2013-09 | 90.89 | 7.41 | 1.02 | 0.1 | 0 | 0.04 | 0.54 | 0 | 0 | |
2013-10 | 90.63 | 7.42 | 1.1 | 0.1 | 0 | 0.04 | 0.71 | 0 | 0 | |
2013-11 | 90.34 | 7.45 | 1.15 | 0.11 | 0 | 0.05 | 0.9 | 0 | 0 | |
2013-12 | 89.55 | 7.83 | 1.13 | 0.08 | 0 | 0.1 | 1.3 | 0 | 0.01 | |
2014-01 | 88.87 | 8.35 | 1.13 | 0.05 | 0 | 0.14 | 1.45 | 0 | 0 | |
2014-02 | 89.65 | 8.39 | 1.14 | 0.26 | 0 | 0.16 | 0.38 | 0 | 0 | |
2014-03 | 89.61 | 8.58 | 1.16 | 0.47 | 0 | 0.18 | 0 | 0 | 0 | |
2014-04 | 89.18 | 8.85 | 1.34 | 0.44 | 0 | 0.19 | 0 | 0 | 0 | |
2014-05 | 88.83 | 8.85 | 1.58 | 0.54 | 0 | 0.19 | 0 | 0 | 0 | |
2014-06 | 89.27 | 8.56 | 1.37 | 0.61 | 0 | 0.19 | 0 | 0 | 0 | |
2014-07 | 89.25 | 8.59 | 1.34 | 0.63 | 0 | 0.19 | 0 | 0 | 0 | |
2014-08 | 89.15 | 8.65 | 1.38 | 0.61 | 0 | 0.21 | 0 | 0 | 0 | |
2014-09 | 88.49 | 9.15 | 1.39 | 0.72 | 0 | 0.25 | 0 | 0 | 0 | |
2014-10 | 88.28 | 9.13 | 1.41 | 0.9 | 0 | 0.27 | 0 | 0 | 0 | |
2014-11 | 88.47 | 8.98 | 1.33 | 0.93 | 0 | 0.28 | 0 | 0 | 0 | |
2014-12 | 88.74 | 8.67 | 1.39 | 0.91 | 0 | 0.29 | 0 | 0 | 0 | |
2015-01 | 88.19 | 9.1 | 1.46 | 0.91 | 0 | 0.33 | 0 | 0 | 0.01 | |
2015-02 | 88.08 | 9.09 | 1.55 | 0.94 | 0 | 0.34 | 0 | 0 | 0.01 | |
2015-03 | 86.99 | 9.83 | 1.82 | 1 | 0 | 0.34 | 0 | 0 | 0.01 | |
2015-04 | 87 | 9.61 | 1.91 | 1.12 | 0 | 0.35 | 0 | 0 | 0.01 | |
2015-05 | 86.09 | 10.51 | 1.77 | 1.23 | 0 | 0.38 | 0 | 0 | 0.02 | |
2015-06 | 86.3 | 10.17 | 1.77 | 1.46 | 0 | 0.29 | 0 | 0 | 0.02 | |
2015-07 | 87.7 | 8.6 | 1.52 | 1.9 | 0 | 0.26 | 0 | 0 | 0.01 | |
2015-08 | 88.07 | 8.18 | 1.5 | 1.94 | 0 | 0.3 | 0 | 0 | 0.01 | |
2015-09 | 87.48 | 8.63 | 1.55 | 1.93 | 0 | 0.41 | 0 | 0 | 0.01 | |
2015-10 | 86.44 | 9.02 | 1.57 | 2.51 | 0 | 0.46 | 0 | 0 | 0.01 | |
2015-11 | 85.84 | 9.36 | 1.5 | 2.81 | 0 | 0.47 | 0 | 0 | 0.01 | |
2015-12 | 84.89 | 9.8 | 1.48 | 3.31 | 0 | 0.51 | 0 | 0 | 0.01 | |
2016-01 | 85.18 | 9.03 | 1.47 | 3.8 | 0 | 0.51 | 0 | 0 | 0.01 | |
2016-02 | 84.82 | 9.33 | 1.47 | 3.83 | 0 | 0.55 | 0 | 0 | 0.01 | |
2016-03 | 85.89 | 9.38 | 1.45 | 2.74 | 0 | 0.53 | 0 | 0 | 0.01 | |
2016-04 | 85.3 | 9.52 | 1.55 | 3.06 | 0 | 0.55 | 0 | 0 | 0.01 | |
2016-05 | 84.54 | 9.83 | 1.44 | 3.59 | 0 | 0.59 | 0 | 0 | 0.01 | |
2016-06 | 84.1 | 9.95 | 1.47 | 4.06 | 0 | 0.41 | 0 | 0 | 0.01 | |
2016-07 | 83.33 | 9.61 | 1.54 | 5.16 | 0 | 0.35 | 0 | 0 | 0.01 | |
2016-08 | 82.45 | 9.81 | 1.52 | 5.81 | 0 | 0.41 | 0 | 0 | 0.01 | |
2016-09 | 81.34 | 10.09 | 1.49 | 6.42 | 0 | 0.65 | 0 | 0 | 0.01 | |
2016-10 | 80.84 | 10.88 | 1.44 | 6.14 | 0 | 0.7 | 0 | 0 | 0 | |
2016-11 | 83.12 | 11.15 | 1.53 | 3.45 | 0 | 0.75 | 0 | 0 | 0.01 | |
2016-12 | 84.27 | 11 | 1.57 | 2.42 | 0 | 0.74 | 0 | 0 | 0.01 | |
2017-01 | 84.4 | 11.2 | 1.55 | 2.07 | 0 | 0.77 | 0 | 0 | 0.01 | |
2017-02 | 84.14 | 11.6 | 1.53 | 1.9 | 0 | 0.83 | 0 | 0 | 0.01 | |
2017-03 | 84.34 | 11.68 | 1.54 | 1.59 | 0 | 0.84 | 0 | 0 | 0.01 | |
2017-04 | 84.22 | 11.61 | 1.68 | 1.73 | 0 | 0.75 | 0 | 0 | 0.01 | |
2017-05 | 83.92 | 11.76 | 1.66 | 1.83 | 0 | 0.82 | 0 | 0 | 0.01 | |
2017-06 | 84.32 | 11.59 | 1.74 | 1.78 | 0 | 0.55 | 0 | 0 | 0.01 | |
2017-07 | 84.46 | 11.32 | 1.79 | 1.97 | 0 | 0.45 | 0 | 0 | 0.01 | |
2017-08 | 83.53 | 11.95 | 1.79 | 2.15 | 0 | 0.56 | 0 | 0 | 0.01 | |
2017-09 | 83.28 | 12.15 | 1.66 | 2.06 | 0 | 0.84 | 0 | 0 | 0.01 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment