Adam Hyland Protonk

## gist:712312
#The following code requires a few packages to be installed, namely animation, WDI, and ggplot2.
#You can install them by typing install.packages("ggplot2"), inserting the package name.  This
#requires an internet connection, because it is downloading a package from a mirror.

#Loads a library to access World Bank data automatically.
library(WDI);
#Loads a graphics library which makes nicer graphs than base R.
library(ggplot2);

#How does this change across years?

## gist:712327
#The following code requires a few packages to be installed, namely animation, WDI, and ggplot2.
#You can install them by typing install.packages("ggplot2"), inserting the package name. This
#requires an internet connection, because it is downloading a package from a mirror.

#Loads a library to access World Bank data automatically.
library(WDI);
#Loads a graphics library which makes nicer graphs than base R.
library(ggplot2);

#How does this change across years?

## gist:712329

#Plots
overall.plot<-qplot(log(GDP.per),lambda,data=GDP.giant,geom="point",alpha=I(0.3))+stat_smooth(method="lm");
byregion.plot<-qplot(log(GDP.per),lambda,data=GDP.giant,colour=region,geom="point",alpha=I(0.3),log="x")+facet_wrap(~region)+scale_y_continuous(limits=c(0,4))+scale_x_continuous('')+opts(aspect.ratio = 0.5);
lambda.year.plot<-qplot(year,lambda,data=GDP.giant,colour=region,geom="smooth")+scale_y_continuous(limits=c(0,2));


## gist:712330
#Build a dataframe for the value of Beta Across the years
i<-0
beta.years<-matrix(0,2,30);
#This is a control structure which calculates a linear model for each year in the dataset and fills
#in the point estimate for the slope and the stardard error of that estimate into a matrix which
#we turn into a data frame.
for(i in sort(unique(GDP.giant$year))) {#
beta.years[1,i-min(unique(GDP.giant$year))+1]<-coef(summary(lm(lambda~log(GDP.per),data=subset(GDP.giant,year==i))))[2,1]#
beta.years[2,i-min(unique(GDP.giant$year))+1]<-coef(summary(lm(lambda~log(GDP.per),data=subset(GDP.giant,year==i))))[2,2]
}

## taxes.R
library(WDI)
library(ggplot2);
taxes.df<-WDI(country="all",indicator=c("GC.TAX.TOTL.GD.ZS","NY.GDP.MKTP.KD.ZG","NY.GDP.PCAP.KD"),start=2001,end=2009,extra=TRUE);
names(taxes.df)[4:6]<-c("Tax.Revenue","GDP.Growth","GDP")
taxes.df<-subset(taxes.df,taxes.df$region != "Aggregates")
mean.taxes.df<-ddply(taxes.df, .(country),summarise,growth=mean(GDP.Growth,na.rm=TRUE),tax=mean(Tax.Revenue,na.rm=TRUE),GDP=mean(GDP,na.rm=TRUE));
taxes.plot<-ggplot(mean.taxes.df,aes(tax,growth,size=GDP))+geom_point(colour="light green")+geom_point(alpha=0.4)+scale_x_continuous("Taxes as a Percentage of GDP",limits=c(0,40))+scale_y_continuous("GDP Growth Rate")+scale_area("GDP per Capita",to=c(1,15));

## growth path.R
library(WDI)
library(ggplot2)
growth.df<-WDI(country="all",indicator="NY.GDP.MKTP.KD.ZG",start=1961,end=2009,extra=TRUE)
growth.df<-subset(growth.df,growth.df$region != "Aggregates")
qplot(year,NY.GDP.MKTP.KD.ZG,data=growth.df,group=country,geom="line",alpha=I(0.1))+scale_y_continuous("GDP Growth");

## Differences and spread.R
library(WDI)
library(ggplot2)
#Need it for GLS later
library(nlme)
growth.df<-WDI(country="all",indicator="NY.GDP.MKTP.KD.ZG",start=1961,end=2009,extra=TRUE);
growth.df<-subset(growth.df,growth.df$region != "Aggregates")
growth.df<-growth.df[,c(2:4)]
names(growth.df)<-c("country","year","growth")
growth.df$country<-as.factor(growth.df$country)

## growth time series.R
library(WDI)
growth.df<-WDI(country="all",indicator="NY.GDP.MKTP.KD.ZG",start=1961,end=2009,extra=TRUE);
growth.df<-subset(growth.df,growth.df$region != "Aggregates")
growth.df<-growth.df[,c(2:4)]
names(growth.df)<-c("country","year","growth")
growth.df$country<-as.factor(growth.df$country)

#TS stuff
pre.ts<-cast(melt.data.frame(growth.df,id.vars=c("country","year")),year~country);
growth.ts<-ts(pre.ts,start=1961,end=2009,frequency=1)

## mean reversion.R
#Downloads the data
library(WDI)

growth.df<-WDI(country="all",indicator="NY.GDP.MKTP.KD.ZG",start=1961,end=2009,extra=TRUE);
growth.df<-subset(growth.df,growth.df$region != "Aggregates")
growth.df<-growth.df[,c(2:4)]
names(growth.df)<-c("country","year","growth")
growth.df$country<-as.factor(growth.df$country)
growth.df<-growth.df[which(complete.cases(growth.df)),]

## lorenz.R
gini<-cbind(c(0.05,0.15,0.30,0.50),c(0.1,0.15,0.2,0.55),c(0.02,0.1,0.25,0.63))
rownames(gini)<-c("Bottom 25%","Lower Middle 25%","Upper Middle 25%","Top 25%")
#cumsum() is one of those functions that eliminates a lot of effort
#adds a row of zeros on the top so that we can better visualize what a cumulative sum is
lorenz<-rbind(c(0,0,0),apply(gini,2,cumsum))
colnames(lorenz)<-c("Region A","Region B","Region C");

#Plot lorenz curves
plot(seq(0,1,by=0.25),lorenz[,1],type="l",main="Lorenz Curves for Regions A, B, & C",ylab="Share of Income",xlab="Share of Population")
legend(0.1,0.9,c("Region A","Region B","Region C","Equal\nDistribution"),fill=c(1,2,3,4))
	#The following code requires a few packages to be installed, namely animation, WDI, and ggplot2.
	#You can install them by typing install.packages("ggplot2"), inserting the package name. This
	#requires an internet connection, because it is downloading a package from a mirror.

	#Loads a library to access World Bank data automatically.
	library(WDI);
	#Loads a graphics library which makes nicer graphs than base R.
	library(ggplot2);

	#How does this change across years?

	#Plots
	overall.plot<-qplot(log(GDP.per),lambda,data=GDP.giant,geom="point",alpha=I(0.3))+stat_smooth(method="lm");
	byregion.plot<-qplot(log(GDP.per),lambda,data=GDP.giant,colour=region,geom="point",alpha=I(0.3),log="x")+facet_wrap(~region)+scale_y_continuous(limits=c(0,4))+scale_x_continuous('')+opts(aspect.ratio = 0.5);
	lambda.year.plot<-qplot(year,lambda,data=GDP.giant,colour=region,geom="smooth")+scale_y_continuous(limits=c(0,2));
	#Build a dataframe for the value of Beta Across the years
	i<-0
	beta.years<-matrix(0,2,30);
	#This is a control structure which calculates a linear model for each year in the dataset and fills
	#in the point estimate for the slope and the stardard error of that estimate into a matrix which
	#we turn into a data frame.
	for(i in sort(unique(GDP.giant$year))) {#
	beta.years[1,i-min(unique(GDP.giant$year))+1]<-coef(summary(lm(lambda~log(GDP.per),data=subset(GDP.giant,year==i))))[2,1]#
	beta.years[2,i-min(unique(GDP.giant$year))+1]<-coef(summary(lm(lambda~log(GDP.per),data=subset(GDP.giant,year==i))))[2,2]
	}
	library(WDI)
	library(ggplot2);
	taxes.df<-WDI(country="all",indicator=c("GC.TAX.TOTL.GD.ZS","NY.GDP.MKTP.KD.ZG","NY.GDP.PCAP.KD"),start=2001,end=2009,extra=TRUE);
	names(taxes.df)[4:6]<-c("Tax.Revenue","GDP.Growth","GDP")
	taxes.df<-subset(taxes.df,taxes.df$region != "Aggregates")
	mean.taxes.df<-ddply(taxes.df, .(country),summarise,growth=mean(GDP.Growth,na.rm=TRUE),tax=mean(Tax.Revenue,na.rm=TRUE),GDP=mean(GDP,na.rm=TRUE));
	taxes.plot<-ggplot(mean.taxes.df,aes(tax,growth,size=GDP))+geom_point(colour="light green")+geom_point(alpha=0.4)+scale_x_continuous("Taxes as a Percentage of GDP",limits=c(0,40))+scale_y_continuous("GDP Growth Rate")+scale_area("GDP per Capita",to=c(1,15));
	library(WDI)
	library(ggplot2)
	growth.df<-WDI(country="all",indicator="NY.GDP.MKTP.KD.ZG",start=1961,end=2009,extra=TRUE)
	growth.df<-subset(growth.df,growth.df$region != "Aggregates")
	qplot(year,NY.GDP.MKTP.KD.ZG,data=growth.df,group=country,geom="line",alpha=I(0.1))+scale_y_continuous("GDP Growth");
	library(WDI)
	library(ggplot2)
	#Need it for GLS later
	library(nlme)
	growth.df<-WDI(country="all",indicator="NY.GDP.MKTP.KD.ZG",start=1961,end=2009,extra=TRUE);
	growth.df<-subset(growth.df,growth.df$region != "Aggregates")
	growth.df<-growth.df[,c(2:4)]
	names(growth.df)<-c("country","year","growth")
	growth.df$country<-as.factor(growth.df$country)
	#Downloads the data
	library(WDI)

	growth.df<-WDI(country="all",indicator="NY.GDP.MKTP.KD.ZG",start=1961,end=2009,extra=TRUE);
	growth.df<-subset(growth.df,growth.df$region != "Aggregates")
	growth.df<-growth.df[,c(2:4)]
	names(growth.df)<-c("country","year","growth")
	growth.df$country<-as.factor(growth.df$country)
	growth.df<-growth.df[which(complete.cases(growth.df)),]
	gini<-cbind(c(0.05,0.15,0.30,0.50),c(0.1,0.15,0.2,0.55),c(0.02,0.1,0.25,0.63))
	rownames(gini)<-c("Bottom 25%","Lower Middle 25%","Upper Middle 25%","Top 25%")
	#cumsum() is one of those functions that eliminates a lot of effort
	#adds a row of zeros on the top so that we can better visualize what a cumulative sum is
	lorenz<-rbind(c(0,0,0),apply(gini,2,cumsum))
	colnames(lorenz)<-c("Region A","Region B","Region C");

	#Plot lorenz curves
	plot(seq(0,1,by=0.25),lorenz[,1],type="l",main="Lorenz Curves for Regions A, B, & C",ylab="Share of Income",xlab="Share of Population")
	legend(0.1,0.9,c("Region A","Region B","Region C","Equal\nDistribution"),fill=c(1,2,3,4))