jstults/os_stats.R

## os_stats.csv

          
            OS 
            Dec 2009
            Jan 2010
            Feb 2010
            Mar 2010
            Apr 2010
            May 2010
            Jun 2010
            Jul 2010
            Aug 2010
            Sep 2010
            Oct 2010
            Nov 2010
            Dec 2010
            Jan 2011
            Feb 2011
            Mar 2011
            Apr 2011
            May 2011
            Jun 2011
            Jul 2011
            Aug 2011
            Sep 2011
            Oct 2011
            Nov 2011
            Dec 2011
            Jan 2012
            Feb 2012
            Mar 2012
            Apr 2012

            
              Android
              5.2
              7.1
              9
              10.3
              12
              13
              14.9
              17
              19.6
              21.4
              23.5
              26
              28.7
              31.2
              33
              34.7
              36.4
              38.1
              40.1
              41.8
              43.7
              44.8
              46.3
              46.9
              47.3
              48.6
              50.1
              51
              50.8

            
              RIM
              41.6
              43
              42.1
              41.7
              41.1
              41.7
              40.1
              39.3
              37.6
              37.3
              35.8
              33.5
              31.6
              30.4
              28.9
              27.1
              25.7
              24.7
              23.4
              21.7
              19.7
              18.9
              17.2
              16.2
              16
              15.2
              13.2
              12.3
              11.6

            
              Apple
              25.3
              25.1
              25.4
              25.2
              25.1
              24.4
              24.3
              23.8
              24.2
              24.3
              24.6
              25
              25
              24.7
              25.2
              25.5
              26
              26.6
              26.6
              27
              27.3
              27.4
              28.1
              28.7
              29.6
              29.5
              30.2
              30.7
              31.4

            
              MS
              18
              15.7
              15.1
              14.4
              14
              13.2
              12.8
              11.8
              10.8
              9.9
              9.7
              9
              8.4
              8
              7.7
              7.5
              6.7
              5.8
              5.8
              5.7
              5.7
              5.6
              5.4
              5.2
              4.7
              4.4
              3.9
              3.9
              4

            
              Smartphones
              39.4
              42.7
              45.4
              46.8
              48.1
              49.1
              49.9
              53.4
              55.7
              58.7
              60.7
              61.5
              63.2
              65.8
              69.5
              72.5
              74.6
              76.8
              78.5
              82.2
              84.5
              87.4
              90
              91.4
              97.9
              101.3
              104.3
              106.3
              107.3

## os_stats.R
#
# do some simple time series analysis on smartphone os data
# data: http://www.catb.org/esr/comscore/
# commentary: http://esr.ibiblio.org/?p=4368
#

# this file has the market share percentages table
osdata = t(read.csv("os_stats.csv",header=TRUE,row.names=1))
# this file has the raw userbase in millions
osdata2 = t(read.csv("os_stats_2.csv",header=TRUE,row.names=1))

# fit an ar1 with drift; see http://www.stat.pitt.edu/stoffer/tsa2/Rissues.htm
# for why we use xreg this way to get useful predictions out of R's arima fits
ar110 = arima(ts(osdata[,1]), order=c(1,1,0), xreg=1:length(osdata[,1]))
# no drift
ar110nd = arima(ts(osdata[,1]), order=c(1,1,0))

# fit the raw user numbers
ar110_2 = arima(ts(osdata2[,1]), order=c(1,1,0), xreg=1:length(osdata[,1]))
ar110nd_2 = arima(ts(osdata2[,1]), order=c(1,1,0))

# make predictions based on the various fits
nahead=3
driftxreg=length(osdata[,1])+seq(1,nahead)
ar110p = predict(ar110,n.ahead=nahead,newxreg=driftxreg)
ar110pU = ar110p$pred + 2.0*ar110p$se
ar110pL = ar110p$pred - 2.0*ar110p$se
ar110ndp = predict(ar110nd,n.ahead=nahead)
ar110ndpU = ar110ndp$pred + 2.0*ar110ndp$se
ar110ndpL = ar110ndp$pred - 2.0*ar110ndp$se

driftxreg2=length(osdata2[,1])+seq(1,nahead)
ar110_2p = predict(ar110_2,n.ahead=nahead,newxreg=driftxreg2)
ar110_2pU = ar110_2p$pred + 2.0*ar110_2p$se
ar110_2pL = ar110_2p$pred - 2.0*ar110_2p$se
ar110nd_2p = predict(ar110nd_2,n.ahead=nahead)
ar110nd_2pU = ar110nd_2p$pred + 2.0*ar110nd_2p$se
ar110nd_2pL = ar110nd_2p$pred - 2.0*ar110nd_2p$se

# simulate a series from the drifting model
totalxreg = c(0:length(osdata2[,1]),driftxreg2)
nsims = 300
ar110_2sim = mat.or.vec(nsims,length(totalxreg))
for(i in 1:nsims){
  ar110_2sim[i,] = arima.sim(list(order=c(1,1,0),ar=ar110_2$coef[1]), n=length(totalxreg)-1, start.innov=ar110_2$residuals, sd=sqrt(ar110_2$sigma2)) + ar110_2$coef[2]*totalxreg
}

png("android_share.png")
plot(seq(0,length(osdata[,1])+nahead,length.out=10),seq(0,60,length.out=10),type="n",xlab="month",ylab="market share")
title(main="Android Market Share Prediction, AR1 w/&wo/drift")
points(osdata[,1])
lines(driftxreg, ar110p$pred[1:nahead], lty=1, col="darkblue")
lines(driftxreg, ar110pU[1:nahead], lty=2, col="darkblue")
lines(driftxreg, ar110pL[1:nahead], lty=2, col="darkblue")
lines(driftxreg, ar110ndp$pred[1:nahead], lty=1, col="darkred")
lines(driftxreg, ar110ndpU[1:nahead], lty=2, col="darkred")
lines(driftxreg, ar110ndpL[1:nahead], lty=2, col="darkred")
dev.off()

png("android_users.png")
plot(seq(0,length(osdata2[,1])+nahead,length.out=10),seq(0,60,length.out=10),type="n",xlab="month",ylab="users in millions")
title(main="Android Users Prediction, AR1 w/&wo/drift")
points(osdata2[,1])
for(i in 1:nsims){
  lines(ar110_2sim[i,2:33],pch=2,col=rgb(0,0,1,alpha=0.1))
}
lines(driftxreg2, ar110_2p$pred[1:nahead], lty=1, col="darkblue")
lines(driftxreg2, ar110_2pU[1:nahead], lty=2, col="darkblue")
lines(driftxreg2, ar110_2pL[1:nahead], lty=2, col="darkblue")
lines(driftxreg2, ar110nd_2p$pred[1:nahead], lty=1, col="darkred")
lines(driftxreg2, ar110nd_2pU[1:nahead], lty=2, col="darkred")
lines(driftxreg2, ar110nd_2pL[1:nahead], lty=2, col="darkred")
dev.off()

## os_stats_2.csv

          
            OS 
            Dec 2009
            Jan 2010
            Feb 2010
            Mar 2010
            Apr 2010
            May 2010
            Jun 2010
            Jul 2010
            Aug 2010
            Sep 2010
            Oct 2010
            Nov 2010
            Dec 2010
            Jan 2011
            Feb 2011
            Mar 2011
            Apr 2011
            May 2011
            Jun 2011
            Jul 2011
            Aug 2011
            Sep 2011
            Oct 2011
            Nov 2011
            Dec 2011
            Jan 2012
            Feb 2012
            Mar 2012
            Apr 2012

            
              Android
              2.05
              3.03
              4.09
              4.82
              5.77
              6.38
              7.44
              9.08
              10.92
              12.56
              14.26
              15.99
              18.14
              20.53
              22.93
              25.16
              27.15
              29.26
              31.48
              34.36
              36.93
              39.16
              41.67
              42.87
              46.31
              49.23
              52.25
              54.21
              54.51

            
              RIM
              16.39
              18.36
              19.11
              19.52
              19.77
              20.47
              20.01
              20.99
              20.94
              21.9
              21.73
              20.6
              19.97
              20
              20.09
              19.65
              19.17
              18.97
              18.37
              17.84
              16.65
              16.52
              15.48
              14.81
              15.66
              15.4
              13.77
              13.07
              12.45

            
              Apple
              9.97
              10.72
              11.53
              11.79
              12.07
              11.98
              12.13
              12.71
              13.48
              14.26
              14.93
              15.38
              15.8
              16.25
              17.51
              18.49
              19.4
              20.43
              20.88
              22.19
              23.07
              23.95
              25.29
              26.23
              28.98
              29.88
              31.5
              32.63
              33.69

            
              MS
              7.09
              6.7
              6.86
              6.74
              6.73
              6.48
              6.39
              6.3
              6.02
              5.81
              5.89
              5.54
              5.31
              5.26
              5.35
              5.44
              5
              4.45
              4.55
              4.69
              4.82
              4.89
              4.86
              4.75
              4.6
              4.46
              4.07
              4.15
              4.29

            
              Total
              39.4
              42.7
              45.4
              46.8
              48.1
              49.1
              49.9
              53.4
              55.7
              58.7
              60.7
              61.5
              63.2
              65.8
              69.5
              72.5
              74.6
              76.8
              78.5
              82.2
              84.5
              87.4
              90
              91.4
              97.9
              101.3
              104.3
              106.3
              107.3
OS	Dec 2009	Jan 2010	Feb 2010	Mar 2010	Apr 2010	May 2010	Jun 2010	Jul 2010	Aug 2010	Sep 2010	Oct 2010	Nov 2010	Dec 2010	Jan 2011	Feb 2011	Mar 2011	Apr 2011	May 2011	Jun 2011	Jul 2011	Aug 2011	Sep 2011	Oct 2011	Nov 2011	Dec 2011	Jan 2012	Feb 2012	Mar 2012	Apr 2012
Android	5.2	7.1	9	10.3	12	13	14.9	17	19.6	21.4	23.5	26	28.7	31.2	33	34.7	36.4	38.1	40.1	41.8	43.7	44.8	46.3	46.9	47.3	48.6	50.1	51	50.8
RIM	41.6	43	42.1	41.7	41.1	41.7	40.1	39.3	37.6	37.3	35.8	33.5	31.6	30.4	28.9	27.1	25.7	24.7	23.4	21.7	19.7	18.9	17.2	16.2	16	15.2	13.2	12.3	11.6
Apple	25.3	25.1	25.4	25.2	25.1	24.4	24.3	23.8	24.2	24.3	24.6	25	25	24.7	25.2	25.5	26	26.6	26.6	27	27.3	27.4	28.1	28.7	29.6	29.5	30.2	30.7	31.4
MS	18	15.7	15.1	14.4	14	13.2	12.8	11.8	10.8	9.9	9.7	9	8.4	8	7.7	7.5	6.7	5.8	5.8	5.7	5.7	5.6	5.4	5.2	4.7	4.4	3.9	3.9	4
Smartphones	39.4	42.7	45.4	46.8	48.1	49.1	49.9	53.4	55.7	58.7	60.7	61.5	63.2	65.8	69.5	72.5	74.6	76.8	78.5	82.2	84.5	87.4	90	91.4	97.9	101.3	104.3	106.3	107.3
	#
	# do some simple time series analysis on smartphone os data
	# data: http://www.catb.org/esr/comscore/
	# commentary: http://esr.ibiblio.org/?p=4368
	#

	# this file has the market share percentages table
	osdata = t(read.csv("os_stats.csv",header=TRUE,row.names=1))
	# this file has the raw userbase in millions
	osdata2 = t(read.csv("os_stats_2.csv",header=TRUE,row.names=1))

	# fit an ar1 with drift; see http://www.stat.pitt.edu/stoffer/tsa2/Rissues.htm
	# for why we use xreg this way to get useful predictions out of R's arima fits
	ar110 = arima(ts(osdata[,1]), order=c(1,1,0), xreg=1:length(osdata[,1]))
	# no drift
	ar110nd = arima(ts(osdata[,1]), order=c(1,1,0))

	# fit the raw user numbers
	ar110_2 = arima(ts(osdata2[,1]), order=c(1,1,0), xreg=1:length(osdata[,1]))
	ar110nd_2 = arima(ts(osdata2[,1]), order=c(1,1,0))

	# make predictions based on the various fits
	nahead=3
	driftxreg=length(osdata[,1])+seq(1,nahead)
	ar110p = predict(ar110,n.ahead=nahead,newxreg=driftxreg)
	ar110pU = ar110p$pred + 2.0*ar110p$se
	ar110pL = ar110p$pred - 2.0*ar110p$se
	ar110ndp = predict(ar110nd,n.ahead=nahead)
	ar110ndpU = ar110ndp$pred + 2.0*ar110ndp$se
	ar110ndpL = ar110ndp$pred - 2.0*ar110ndp$se

	driftxreg2=length(osdata2[,1])+seq(1,nahead)
	ar110_2p = predict(ar110_2,n.ahead=nahead,newxreg=driftxreg2)
	ar110_2pU = ar110_2p$pred + 2.0*ar110_2p$se
	ar110_2pL = ar110_2p$pred - 2.0*ar110_2p$se
	ar110nd_2p = predict(ar110nd_2,n.ahead=nahead)
	ar110nd_2pU = ar110nd_2p$pred + 2.0*ar110nd_2p$se
	ar110nd_2pL = ar110nd_2p$pred - 2.0*ar110nd_2p$se

	# simulate a series from the drifting model
	totalxreg = c(0:length(osdata2[,1]),driftxreg2)
	nsims = 300
	ar110_2sim = mat.or.vec(nsims,length(totalxreg))
	for(i in 1:nsims){
	ar110_2sim[i,] = arima.sim(list(order=c(1,1,0),ar=ar110_2$coef[1]), n=length(totalxreg)-1, start.innov=ar110_2$residuals, sd=sqrt(ar110_2$sigma2)) + ar110_2$coef[2]*totalxreg
	}

	png("android_share.png")
	plot(seq(0,length(osdata[,1])+nahead,length.out=10),seq(0,60,length.out=10),type="n",xlab="month",ylab="market share")
	title(main="Android Market Share Prediction, AR1 w/&wo/drift")
	points(osdata[,1])
	lines(driftxreg, ar110p$pred[1:nahead], lty=1, col="darkblue")
	lines(driftxreg, ar110pU[1:nahead], lty=2, col="darkblue")
	lines(driftxreg, ar110pL[1:nahead], lty=2, col="darkblue")
	lines(driftxreg, ar110ndp$pred[1:nahead], lty=1, col="darkred")
	lines(driftxreg, ar110ndpU[1:nahead], lty=2, col="darkred")
	lines(driftxreg, ar110ndpL[1:nahead], lty=2, col="darkred")
	dev.off()

	png("android_users.png")
	plot(seq(0,length(osdata2[,1])+nahead,length.out=10),seq(0,60,length.out=10),type="n",xlab="month",ylab="users in millions")
	title(main="Android Users Prediction, AR1 w/&wo/drift")
	points(osdata2[,1])
	for(i in 1:nsims){
	lines(ar110_2sim[i,2:33],pch=2,col=rgb(0,0,1,alpha=0.1))
	}
	lines(driftxreg2, ar110_2p$pred[1:nahead], lty=1, col="darkblue")
	lines(driftxreg2, ar110_2pU[1:nahead], lty=2, col="darkblue")
	lines(driftxreg2, ar110_2pL[1:nahead], lty=2, col="darkblue")
	lines(driftxreg2, ar110nd_2p$pred[1:nahead], lty=1, col="darkred")
	lines(driftxreg2, ar110nd_2pU[1:nahead], lty=2, col="darkred")
	lines(driftxreg2, ar110nd_2pL[1:nahead], lty=2, col="darkred")
	dev.off()