simple time-series analysis of android market share data
| OS | Dec 2009 | Jan 2010 | Feb 2010 | Mar 2010 | Apr 2010 | May 2010 | Jun 2010 | Jul 2010 | Aug 2010 | Sep 2010 | Oct 2010 | Nov 2010 | Dec 2010 | Jan 2011 | Feb 2011 | Mar 2011 | Apr 2011 | May 2011 | Jun 2011 | Jul 2011 | Aug 2011 | Sep 2011 | Oct 2011 | Nov 2011 | Dec 2011 | Jan 2012 | Feb 2012 | Mar 2012 | Apr 2012 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Android | 5.2 | 7.1 | 9 | 10.3 | 12 | 13 | 14.9 | 17 | 19.6 | 21.4 | 23.5 | 26 | 28.7 | 31.2 | 33 | 34.7 | 36.4 | 38.1 | 40.1 | 41.8 | 43.7 | 44.8 | 46.3 | 46.9 | 47.3 | 48.6 | 50.1 | 51 | 50.8 | |
| RIM | 41.6 | 43 | 42.1 | 41.7 | 41.1 | 41.7 | 40.1 | 39.3 | 37.6 | 37.3 | 35.8 | 33.5 | 31.6 | 30.4 | 28.9 | 27.1 | 25.7 | 24.7 | 23.4 | 21.7 | 19.7 | 18.9 | 17.2 | 16.2 | 16 | 15.2 | 13.2 | 12.3 | 11.6 | |
| Apple | 25.3 | 25.1 | 25.4 | 25.2 | 25.1 | 24.4 | 24.3 | 23.8 | 24.2 | 24.3 | 24.6 | 25 | 25 | 24.7 | 25.2 | 25.5 | 26 | 26.6 | 26.6 | 27 | 27.3 | 27.4 | 28.1 | 28.7 | 29.6 | 29.5 | 30.2 | 30.7 | 31.4 | |
| MS | 18 | 15.7 | 15.1 | 14.4 | 14 | 13.2 | 12.8 | 11.8 | 10.8 | 9.9 | 9.7 | 9 | 8.4 | 8 | 7.7 | 7.5 | 6.7 | 5.8 | 5.8 | 5.7 | 5.7 | 5.6 | 5.4 | 5.2 | 4.7 | 4.4 | 3.9 | 3.9 | 4 | |
| Smartphones | 39.4 | 42.7 | 45.4 | 46.8 | 48.1 | 49.1 | 49.9 | 53.4 | 55.7 | 58.7 | 60.7 | 61.5 | 63.2 | 65.8 | 69.5 | 72.5 | 74.6 | 76.8 | 78.5 | 82.2 | 84.5 | 87.4 | 90 | 91.4 | 97.9 | 101.3 | 104.3 | 106.3 | 107.3 |
| # | |
| # do some simple time series analysis on smartphone os data | |
| # data: http://www.catb.org/esr/comscore/ | |
| # commentary: http://esr.ibiblio.org/?p=4368 | |
| # | |
| # this file has the market share percentages table | |
| osdata = t(read.csv("os_stats.csv",header=TRUE,row.names=1)) | |
| # this file has the raw userbase in millions | |
| osdata2 = t(read.csv("os_stats_2.csv",header=TRUE,row.names=1)) | |
| # fit an ar1 with drift; see http://www.stat.pitt.edu/stoffer/tsa2/Rissues.htm | |
| # for why we use xreg this way to get useful predictions out of R's arima fits | |
| ar110 = arima(ts(osdata[,1]), order=c(1,1,0), xreg=1:length(osdata[,1])) | |
| # no drift | |
| ar110nd = arima(ts(osdata[,1]), order=c(1,1,0)) | |
| # fit the raw user numbers | |
| ar110_2 = arima(ts(osdata2[,1]), order=c(1,1,0), xreg=1:length(osdata[,1])) | |
| ar110nd_2 = arima(ts(osdata2[,1]), order=c(1,1,0)) | |
| # make predictions based on the various fits | |
| nahead=3 | |
| driftxreg=length(osdata[,1])+seq(1,nahead) | |
| ar110p = predict(ar110,n.ahead=nahead,newxreg=driftxreg) | |
| ar110pU = ar110p$pred + 2.0*ar110p$se | |
| ar110pL = ar110p$pred - 2.0*ar110p$se | |
| ar110ndp = predict(ar110nd,n.ahead=nahead) | |
| ar110ndpU = ar110ndp$pred + 2.0*ar110ndp$se | |
| ar110ndpL = ar110ndp$pred - 2.0*ar110ndp$se | |
| driftxreg2=length(osdata2[,1])+seq(1,nahead) | |
| ar110_2p = predict(ar110_2,n.ahead=nahead,newxreg=driftxreg2) | |
| ar110_2pU = ar110_2p$pred + 2.0*ar110_2p$se | |
| ar110_2pL = ar110_2p$pred - 2.0*ar110_2p$se | |
| ar110nd_2p = predict(ar110nd_2,n.ahead=nahead) | |
| ar110nd_2pU = ar110nd_2p$pred + 2.0*ar110nd_2p$se | |
| ar110nd_2pL = ar110nd_2p$pred - 2.0*ar110nd_2p$se | |
| # simulate a series from the drifting model | |
| totalxreg = c(0:length(osdata2[,1]),driftxreg2) | |
| nsims = 300 | |
| ar110_2sim = mat.or.vec(nsims,length(totalxreg)) | |
| for(i in 1:nsims){ | |
| ar110_2sim[i,] = arima.sim(list(order=c(1,1,0),ar=ar110_2$coef[1]), n=length(totalxreg)-1, start.innov=ar110_2$residuals, sd=sqrt(ar110_2$sigma2)) + ar110_2$coef[2]*totalxreg | |
| } | |
| png("android_share.png") | |
| plot(seq(0,length(osdata[,1])+nahead,length.out=10),seq(0,60,length.out=10),type="n",xlab="month",ylab="market share") | |
| title(main="Android Market Share Prediction, AR1 w/&wo/drift") | |
| points(osdata[,1]) | |
| lines(driftxreg, ar110p$pred[1:nahead], lty=1, col="darkblue") | |
| lines(driftxreg, ar110pU[1:nahead], lty=2, col="darkblue") | |
| lines(driftxreg, ar110pL[1:nahead], lty=2, col="darkblue") | |
| lines(driftxreg, ar110ndp$pred[1:nahead], lty=1, col="darkred") | |
| lines(driftxreg, ar110ndpU[1:nahead], lty=2, col="darkred") | |
| lines(driftxreg, ar110ndpL[1:nahead], lty=2, col="darkred") | |
| dev.off() | |
| png("android_users.png") | |
| plot(seq(0,length(osdata2[,1])+nahead,length.out=10),seq(0,60,length.out=10),type="n",xlab="month",ylab="users in millions") | |
| title(main="Android Users Prediction, AR1 w/&wo/drift") | |
| points(osdata2[,1]) | |
| for(i in 1:nsims){ | |
| lines(ar110_2sim[i,2:33],pch=2,col=rgb(0,0,1,alpha=0.1)) | |
| } | |
| lines(driftxreg2, ar110_2p$pred[1:nahead], lty=1, col="darkblue") | |
| lines(driftxreg2, ar110_2pU[1:nahead], lty=2, col="darkblue") | |
| lines(driftxreg2, ar110_2pL[1:nahead], lty=2, col="darkblue") | |
| lines(driftxreg2, ar110nd_2p$pred[1:nahead], lty=1, col="darkred") | |
| lines(driftxreg2, ar110nd_2pU[1:nahead], lty=2, col="darkred") | |
| lines(driftxreg2, ar110nd_2pL[1:nahead], lty=2, col="darkred") | |
| dev.off() |
| OS | Dec 2009 | Jan 2010 | Feb 2010 | Mar 2010 | Apr 2010 | May 2010 | Jun 2010 | Jul 2010 | Aug 2010 | Sep 2010 | Oct 2010 | Nov 2010 | Dec 2010 | Jan 2011 | Feb 2011 | Mar 2011 | Apr 2011 | May 2011 | Jun 2011 | Jul 2011 | Aug 2011 | Sep 2011 | Oct 2011 | Nov 2011 | Dec 2011 | Jan 2012 | Feb 2012 | Mar 2012 | Apr 2012 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Android | 2.05 | 3.03 | 4.09 | 4.82 | 5.77 | 6.38 | 7.44 | 9.08 | 10.92 | 12.56 | 14.26 | 15.99 | 18.14 | 20.53 | 22.93 | 25.16 | 27.15 | 29.26 | 31.48 | 34.36 | 36.93 | 39.16 | 41.67 | 42.87 | 46.31 | 49.23 | 52.25 | 54.21 | 54.51 | |
| RIM | 16.39 | 18.36 | 19.11 | 19.52 | 19.77 | 20.47 | 20.01 | 20.99 | 20.94 | 21.9 | 21.73 | 20.6 | 19.97 | 20 | 20.09 | 19.65 | 19.17 | 18.97 | 18.37 | 17.84 | 16.65 | 16.52 | 15.48 | 14.81 | 15.66 | 15.4 | 13.77 | 13.07 | 12.45 | |
| Apple | 9.97 | 10.72 | 11.53 | 11.79 | 12.07 | 11.98 | 12.13 | 12.71 | 13.48 | 14.26 | 14.93 | 15.38 | 15.8 | 16.25 | 17.51 | 18.49 | 19.4 | 20.43 | 20.88 | 22.19 | 23.07 | 23.95 | 25.29 | 26.23 | 28.98 | 29.88 | 31.5 | 32.63 | 33.69 | |
| MS | 7.09 | 6.7 | 6.86 | 6.74 | 6.73 | 6.48 | 6.39 | 6.3 | 6.02 | 5.81 | 5.89 | 5.54 | 5.31 | 5.26 | 5.35 | 5.44 | 5 | 4.45 | 4.55 | 4.69 | 4.82 | 4.89 | 4.86 | 4.75 | 4.6 | 4.46 | 4.07 | 4.15 | 4.29 | |
| Total | 39.4 | 42.7 | 45.4 | 46.8 | 48.1 | 49.1 | 49.9 | 53.4 | 55.7 | 58.7 | 60.7 | 61.5 | 63.2 | 65.8 | 69.5 | 72.5 | 74.6 | 76.8 | 78.5 | 82.2 | 84.5 | 87.4 | 90 | 91.4 | 97.9 | 101.3 | 104.3 | 106.3 | 107.3 |
Owner
jstults
commented
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's the plot that this script makes.