cflint12/Individual_Project_12_12_2014.R

## Individual_Project_12_12_2014.R
#In order for the Shiny app and part of this code to run, you must have installed ggvis and shiny on your computer.
#This will require R Base 3.1.0 or higher.
#If you are using Ubuntu trusty, this may require you to add an additional line to your /etc/apt/sources.list file
#You can do this by following the instructions below:

#First, type the following into your terminal: sudo gedit /etc/apt/sources.list
#
#Then, paste "deb http://cran.wustl.edu/bin/linux/ubuntu trusty/" on a new line at end of current sources.list
# text file in Text Editor.
#
# Save and close sources.list
#
# Don't forget to update in order to use the new repository by typing the following into your terminal
#
# sudo apt-get update

#If R 3.1.2 was not already installed through that process, you may need to uninstall and then reinstall R
#using the terminal. You can do this by typing: sudo apt-get install r-base

#If any of the packages below are not installed, please uncomment these lines to install them.
# install.packages('WDI')
# install.packages('reshape')
# install.packages('RMySQL')
# install.packages('dplyr')
# install.packages('reshape2')
# install.packages('Hmisc')

#If you are running Ubuntu, RMySQL may not have properly installed. You can re-install it
#by typing the following prompt into the terminal: sudo apt-get install r-cran-rmysql

#First set your working directory to the appropriate place
setwd('/home/cflint/Documents/Individual_Project')

#Next load the necessary libraries
library('WDI')
library('ggvis')
library('reshape')
library('RMySQL')
library('dplyr')
library('reshape2')
library('Hmisc')

#Pull economic indicators of interest and include "extra" for classification purposes
WB_DF <- WDI(country = 'all', indicator = c("NY.GDP.MKTP.CD", "NV.AGR.TOTL.ZS", "NV.IND.TOTL.ZS", "NV.IND.MANF.ZS", "NV.SRV.TETC.ZS",
                                                    "SP.POP.TOTL", "EN.POP.DNST", "NY.GNP.MKTP.PP.CD", "NY.GNP.PCAP.PP.CD"),
             start = 1984, end = 2013, extra = TRUE, cache = NULL)

#Next we will get rid of the variables that are not of interest to us
WB_DF['iso3c'] <- NULL    #An extra country code
WB_DF['capital'] <- NULL  #The capital city of the country
WB_DF['longitude'] <- NULL #The longitude
WB_DF['latitude'] <- NULL  #The latitude
WB_DF['lending'] <- NULL   #The lending

#Create more intuitive column names
Columns <- c("Short_Key", "Long_Name", "Year", "GDP", "Agriculture_Pct_of_GDP", "Industry_Pct_of_GDP", "Manufacturing_Pct_of_GDP", "Services_Pct_of_GDP",
             "Total_Population", "Population_Density_Per_Sq_km", "GNI_Purchasing_Power", "Purchasing_Power_Per_Capita", "Region", "Income")

#Add the column names to the data frame
colnames(WB_DF) <- Columns

#Simplify the income groupings
WB_DF$Income <- as.character(WB_DF$Income)
WB_DF$Income[WB_DF$Income == 'High income: nonOECD'] <- 'High Income'
WB_DF$Income[WB_DF$Income == 'High income: OECD'] <- 'High Income'
WB_DF$Income <- as.factor(WB_DF$Income)

#Shorten the region names
WB_DF$Region <- as.character(WB_DF$Region)
WB_DF$Region[WB_DF$Region == 'Europe & Central Asia (all income levels)'] <- 'Europe & Central Asia'
WB_DF$Region[WB_DF$Region == 'Middle East & North Africa (all income levels)'] <- 'Middle East & North Africa'
WB_DF$Region[WB_DF$Region == 'Latin America & Caribbean (all income levels)'] <- 'Latin America & Caribbean'
WB_DF$Region[WB_DF$Region == 'Sub-Saharan Africa (all income levels)'] <- 'Sub-Saharan Africa'
WB_DF$Region[WB_DF$Region == 'East Asia & Pacific (all income levels)'] <- 'East Asia & Pacific'
WB_DF$Region <- as.factor(WB_DF$Region)

#Look at some descriptive statistics of the data
describe(WB_DF)

#Unfortunately, it appears that much of the data is missing
#Roughly 40% of the GDP breakdown variables (services, manufacturing, etc.) are missing
#About half of the GNI and income per capita is also missing, which makes it very difficult
#to draw trends from.

#Only keep the regional aggregations in the Region data frame
Region_DF <- subset(WB_DF, WB_DF$Long_Name == 'Europe & Central Asia (all income levels)' |
                      WB_DF$Long_Name == 'Middle East & North Africa (all income levels)' |
                      WB_DF$Long_Name == 'South Asia' |
                      WB_DF$Long_Name == 'Latin America & Caribbean (all income levels)' |
                      WB_DF$Long_Name == 'Sub-Saharan Africa (all income levels)' |
                      WB_DF$Long_Name == 'East Asia & Pacific (all income levels)' |
                      WB_DF$Long_Name == 'North America')

#Shorten the region names
Region_DF$Long_Name <- as.character(Region_DF$Long_Name)
Region_DF$Long_Name[Region_DF$Long_Name == 'Europe & Central Asia (all income levels)'] <- 'Europe & Central Asia'
Region_DF$Long_Name[Region_DF$Long_Name == 'Middle East & North Africa (all income levels)'] <- 'Middle East & North Africa'
Region_DF$Long_Name[Region_DF$Long_Name == 'Latin America & Caribbean (all income levels)'] <- 'Latin America & Caribbean'
Region_DF$Long_Name[Region_DF$Long_Name == 'Sub-Saharan Africa (all income levels)'] <- 'Sub-Saharan Africa'
Region_DF$Long_Name[Region_DF$Long_Name == 'East Asia & Pacific (all income levels)'] <- 'East Asia & Pacific'
Region_DF$Long_Name <- as.factor(Region_DF$Long_Name)

#Make a separate data frame for the income aggregations
Income_DF <- subset(WB_DF, WB_DF$Long_Name == 'High income' |
                      WB_DF$Long_Name == 'Low income' |
                      WB_DF$Long_Name == 'Upper middle income' |
                      WB_DF$Long_Name == 'Lower middle income' |
                      WB_DF$Long_Name == 'Not classified')

#Separate the country rows from the non-country tables
Country_DF <- subset(WB_DF, WB_DF$Region != 'Aggregates')

#Re-order the dataframe to ensure that it is in alphabetical order, and more importantly,
#to ensure that the years are in chronological order
Country_DF <- Country_DF[order(Country_DF$Long_Name, Country_DF$Year),]

#Reshape the data frame to get a holistic view of how GDP has changed over time for all countries
Country_GDP_DF <- reshape(Country_DF[,c("Long_Name","Year","GDP")], v.names = "GDP", idvar = "Long_Name",
                          timevar = "Year", direction = "wide")

#Rename the columns
Grouped_Table_Columns <- c("Long_Name", seq(1984,2013))
colnames(Country_GDP_DF) <- Grouped_Table_Columns

#Connect to MySQL and create a database
#Be sure to change to match the credentials for your own database
con <- dbConnect(MySQL(), user="root", password="root",
                 host="localhost")

dbGetQuery(con, "CREATE DATABASE Individual_Project_2014")
dbGetQuery(con, "USE Individual_Project_2014")

#Make sure there are no existing tables
dbListTables(con)

#Write the tables of interest from the data frames created earlier
dbWriteTable(con, "Country_Table", Country_DF)
dbWriteTable(con, "Income_Table", Income_DF)
dbWriteTable(con, "Region_Table", Region_DF)
dbWriteTable(con, "Country_GDP_Table", Country_GDP_DF)

#Look to make sure tables were written correctly
dbListTables(con)

#Create a new table to track the minimum, average and maximum GDP for each country
dbGetQuery(con, "CREATE TABLE GDP_Summary_Table (Short_Key VARCHAR(50), Long_Name VARCHAR(50),
           Avg_GDP BIGINT NOT NULL, Min_GDP BIGINT NOT NULL, Max_GDP BIGINT NOT NULL)")

#Load the table with data
dbGetQuery(con, "INSERT INTO GDP_Summary_Table SELECT Short_Key, Long_Name, Avg(GDP) AS Avg_GDP, Min(GDP) AS Min_GDP, Max(GDP) AS Max_GDP
           FROM Country_Table WHERE GDP <> 'NA' GROUP BY Long_Name")


#Inner join the country, income and region tables to build out more information about the groupings each country is placed in.
Final_WB_DF <- dbGetQuery(con, "Select Country_Table.Short_Key, Country_Table.Long_Name, Country_Table.Year,
                       Country_Table.GDP AS Country_GDP, Country_Table.Total_Population AS Country_Population,
                       Country_Table.Population_Density_Per_Sq_km AS Country_Pop_Dens, Country_Table.Region,
                       Country_Table.Income, Income_Table.GDP AS Income_Group_GDP, Income_Table.Total_Population AS Income_Group_Population,
                       Income_Table.Population_Density_Per_Sq_km AS Income_Group_Pop_Dens, Region_Table.GDP AS Region_GDP,
                       Region_Table.Total_Population AS Region_Population, Region_Table.Population_Density_Per_Sq_km AS Region_Pop_Dens
                       FROM Income_Table INNER JOIN Country_Table ON Country_Table.Income=Income_Table.Long_Name
                       AND Country_Table.Year=Income_Table.Year INNER JOIN Region_Table ON Country_Table.Region = Region_Table.Long_Name
                       AND Country_Table.Year = Region_Table.Year WHERE Country_Table.GDP <> 'NA'")

#Write the new table to a data frame to export for use within the Shiny App
Final_WB_DF <- Final_WB_DF[order(Final_WB_DF$Long_Name, Final_WB_DF$Year),]

#Write the new data frame as a csv file to place within the app folder.
write.csv(Final_WB_DF, file = 'Final_WB_DF.csv')

#Test out visualizations to explore the data further and find what type of visualization would be best for the app.
Test <- filter(Final_WB_DF, Final_WB_DF$Long_Name == 'Uruguay')

Test %>%
  ggvis(x = ~Year, y = ~Country_GDP) %>%
  layer_paths(stroke := 'green') %>%
  layer_paths(x = ~Year, y = ~Income_Group_GDP, stroke := 'red') %>%
  layer_paths(x = ~Year, y = ~Region_GDP, stroke := 'blue') %>%
  add_axis("x", title = 'Year') %>%
  add_axis("y", title = 'GDP') %>%
  set_options(width = 400, height = 200)

#Let's explore the relationship between income group and population density
Income_Pop_Test <- filter(Final_WB_DF, Final_WB_DF$Year == 2013)

boxplot(Income_Pop_Test$Country_Pop_Dens~Income_Pop_Test$Income, outline = FALSE)

#Next, let's take a look at some charts that show the relationship between the type of product
#(services, manufacturing, agriculture, etc.) that countries produce and their overall GDP performance.
Filtered_Country_DF <- filter(Country_DF, is.na(Services_Pct_of_GDP) == FALSE & is.na(GDP) == FALSE)
Filtered_Country_DF <- filter(Country_DF, Income != 'Aggregates')
Filtered_Country_DF <- filter(Country_DF, Income != 'Not Classified')

#Take away a few zeroes, so that it looks nice on the charts
Filtered_Country_DF$GDP <- Filtered_Country_DF$GDP/1000000000000

#We see from these plots that higher-income countries tend to provide more services compared to their counterparts
Filtered_Country_DF %>%
  ggvis(x = ~Services_Pct_of_GDP, y = ~GDP) %>%
  layer_points(fill = ~Income, stroke = ~Income) %>%
  add_axis("x", title = 'Pct Services') %>%
  add_axis("y", title = 'Total GDP (Trillions)')

#Taking another look, we see that region has a lot to do with the composition of a country's GDP.
#For example, it is clear that North America focuses on services, as doeer Europe.
Filtered_Country_DF %>%
  ggvis(x = ~Services_Pct_of_GDP, y = ~GDP) %>%
  layer_points(fill = ~Region, stroke = ~Region) %>%
  add_axis("x", title = 'Pct Services') %>%
  add_axis("y", title = 'Total GDP (Trillions)')

Filtered_Country_DF %>%
  ggvis(x = ~Manufacturing_Pct_of_GDP, y = ~GDP) %>%
  layer_points(fill = ~Income, stroke = ~Income) %>%
  add_axis("x", title = 'Pct Manufacturing') %>%
  add_axis("y", title = 'Total GDP (Trillions)')

Filtered_Country_DF %>%
  ggvis(x = ~Agriculture_Pct_of_GDP, y = ~GDP) %>%
  layer_points(fill = ~Income, stroke = ~Income) %>%
  add_axis("x", title = 'Pct Agriculture') %>%
  add_axis("y", title = 'Total GDP (Trillions)')

Filtered_Country_DF %>%
  ggvis(x = ~Industry_Pct_of_GDP, y = ~GDP) %>%
  layer_points(fill = ~Income, stroke = ~Income) %>%
  add_axis("x", title = 'Pct Industry') %>%
  add_axis("y", title = 'Total GDP (Trillions)')

#Next, we compare purchasing power per capita with population density per square km
Filtered_Country_DF %>%
  ggvis(x = ~Purchasing_Power_Per_Capita, y = ~Population_Density_Per_Sq_km) %>%
  layer_points(fill = ~Income, stroke = ~Income) %>%
  add_axis("x", title = 'Purchasing Power per Capita') %>%
  add_axis("y", title = 'Pop. Density per Sq. km')

#Next, we compare purchasing power per capita with population density per square km
Filtered_Country_DF %>%
  ggvis(x = ~Purchasing_Power_Per_Capita, y = ~Population_Density_Per_Sq_km) %>%
  layer_points(fill = ~Region, stroke = ~Region) %>%
  add_axis("x", title = 'Purchasing Power per Capita') %>%
  add_axis("y", title = 'Pop. Density per Sq. km')

Filtered_Country_DF$Income <- factor(Filtered_Country_DF$Income, levels(Filtered_Country_DF$Income)[c(4,3,6,2,1,5)])

boxplot(Filtered_Country_DF$Services_Pct_of_GDP~Filtered_Country_DF$Income, outline = TRUE,
        main = 'Services as Percent of GDP', col = 'light blue')

cor(Filtered_Country_DF$Services_Pct_of_GDP, Filtered_Country_DF$GDP)

hist(Filtered_Country_DF$Services_Pct_of_GDP, main = 'Histogram of Services as Pct. of GDP',
     col = 'light blue',  xlab = 'Services as Pct. of GDP')

Income_Group_Test <- filter(Final_WB_DF, Final_WB_DF$Income == 'High Income')

Test %>%
  ggvis(x = ~Year, y = ~Country_GDP) %>%
  layer_paths() %>%
  layer_paths(x = ~Year, y = ~Income_Group_GDP) %>%
  layer_paths(x = ~Year, y = ~Region_GDP) %>%
  add_axis("x", title = 'Year') %>%
  add_axis("y", title = 'GDP') %>%
  set_options(width = 400, height = 200)

dbDisconnect(con)
	#In order for the Shiny app and part of this code to run, you must have installed ggvis and shiny on your computer.
	#This will require R Base 3.1.0 or higher.
	#If you are using Ubuntu trusty, this may require you to add an additional line to your /etc/apt/sources.list file
	#You can do this by following the instructions below:

	#First, type the following into your terminal: sudo gedit /etc/apt/sources.list
	#
	#Then, paste "deb http://cran.wustl.edu/bin/linux/ubuntu trusty/" on a new line at end of current sources.list
	# text file in Text Editor.
	#
	# Save and close sources.list
	#
	# Don't forget to update in order to use the new repository by typing the following into your terminal
	#
	# sudo apt-get update

	#If R 3.1.2 was not already installed through that process, you may need to uninstall and then reinstall R
	#using the terminal. You can do this by typing: sudo apt-get install r-base

	#If any of the packages below are not installed, please uncomment these lines to install them.
	# install.packages('WDI')
	# install.packages('reshape')
	# install.packages('RMySQL')
	# install.packages('dplyr')
	# install.packages('reshape2')
	# install.packages('Hmisc')

	#If you are running Ubuntu, RMySQL may not have properly installed. You can re-install it
	#by typing the following prompt into the terminal: sudo apt-get install r-cran-rmysql

	#First set your working directory to the appropriate place
	setwd('/home/cflint/Documents/Individual_Project')

	#Next load the necessary libraries
	library('WDI')
	library('ggvis')
	library('reshape')
	library('RMySQL')
	library('dplyr')
	library('reshape2')
	library('Hmisc')

	#Pull economic indicators of interest and include "extra" for classification purposes
	WB_DF <- WDI(country = 'all', indicator = c("NY.GDP.MKTP.CD", "NV.AGR.TOTL.ZS", "NV.IND.TOTL.ZS", "NV.IND.MANF.ZS", "NV.SRV.TETC.ZS",
	"SP.POP.TOTL", "EN.POP.DNST", "NY.GNP.MKTP.PP.CD", "NY.GNP.PCAP.PP.CD"),
	start = 1984, end = 2013, extra = TRUE, cache = NULL)

	#Next we will get rid of the variables that are not of interest to us
	WB_DF['iso3c'] <- NULL #An extra country code
	WB_DF['capital'] <- NULL #The capital city of the country
	WB_DF['longitude'] <- NULL #The longitude
	WB_DF['latitude'] <- NULL #The latitude
	WB_DF['lending'] <- NULL #The lending

	#Create more intuitive column names
	Columns <- c("Short_Key", "Long_Name", "Year", "GDP", "Agriculture_Pct_of_GDP", "Industry_Pct_of_GDP", "Manufacturing_Pct_of_GDP", "Services_Pct_of_GDP",
	"Total_Population", "Population_Density_Per_Sq_km", "GNI_Purchasing_Power", "Purchasing_Power_Per_Capita", "Region", "Income")

	#Add the column names to the data frame
	colnames(WB_DF) <- Columns

	#Simplify the income groupings
	WB_DF$Income <- as.character(WB_DF$Income)
	WB_DF$Income[WB_DF$Income == 'High income: nonOECD'] <- 'High Income'
	WB_DF$Income[WB_DF$Income == 'High income: OECD'] <- 'High Income'
	WB_DF$Income <- as.factor(WB_DF$Income)

	#Shorten the region names
	WB_DF$Region <- as.character(WB_DF$Region)
	WB_DF$Region[WB_DF$Region == 'Europe & Central Asia (all income levels)'] <- 'Europe & Central Asia'
	WB_DF$Region[WB_DF$Region == 'Middle East & North Africa (all income levels)'] <- 'Middle East & North Africa'
	WB_DF$Region[WB_DF$Region == 'Latin America & Caribbean (all income levels)'] <- 'Latin America & Caribbean'
	WB_DF$Region[WB_DF$Region == 'Sub-Saharan Africa (all income levels)'] <- 'Sub-Saharan Africa'
	WB_DF$Region[WB_DF$Region == 'East Asia & Pacific (all income levels)'] <- 'East Asia & Pacific'
	WB_DF$Region <- as.factor(WB_DF$Region)

	#Look at some descriptive statistics of the data
	describe(WB_DF)

	#Unfortunately, it appears that much of the data is missing
	#Roughly 40% of the GDP breakdown variables (services, manufacturing, etc.) are missing
	#About half of the GNI and income per capita is also missing, which makes it very difficult
	#to draw trends from.

	#Only keep the regional aggregations in the Region data frame
	Region_DF <- subset(WB_DF, WB_DF$Long_Name == 'Europe & Central Asia (all income levels)' \|
	WB_DF$Long_Name == 'Middle East & North Africa (all income levels)' \|
	WB_DF$Long_Name == 'South Asia' \|
	WB_DF$Long_Name == 'Latin America & Caribbean (all income levels)' \|
	WB_DF$Long_Name == 'Sub-Saharan Africa (all income levels)' \|
	WB_DF$Long_Name == 'East Asia & Pacific (all income levels)' \|
	WB_DF$Long_Name == 'North America')

	#Shorten the region names
	Region_DF$Long_Name <- as.character(Region_DF$Long_Name)
	Region_DF$Long_Name[Region_DF$Long_Name == 'Europe & Central Asia (all income levels)'] <- 'Europe & Central Asia'
	Region_DF$Long_Name[Region_DF$Long_Name == 'Middle East & North Africa (all income levels)'] <- 'Middle East & North Africa'
	Region_DF$Long_Name[Region_DF$Long_Name == 'Latin America & Caribbean (all income levels)'] <- 'Latin America & Caribbean'
	Region_DF$Long_Name[Region_DF$Long_Name == 'Sub-Saharan Africa (all income levels)'] <- 'Sub-Saharan Africa'
	Region_DF$Long_Name[Region_DF$Long_Name == 'East Asia & Pacific (all income levels)'] <- 'East Asia & Pacific'
	Region_DF$Long_Name <- as.factor(Region_DF$Long_Name)

	#Make a separate data frame for the income aggregations
	Income_DF <- subset(WB_DF, WB_DF$Long_Name == 'High income' \|
	WB_DF$Long_Name == 'Low income' \|
	WB_DF$Long_Name == 'Upper middle income' \|
	WB_DF$Long_Name == 'Lower middle income' \|
	WB_DF$Long_Name == 'Not classified')

	#Separate the country rows from the non-country tables
	Country_DF <- subset(WB_DF, WB_DF$Region != 'Aggregates')

	#Re-order the dataframe to ensure that it is in alphabetical order, and more importantly,
	#to ensure that the years are in chronological order
	Country_DF <- Country_DF[order(Country_DF$Long_Name, Country_DF$Year),]

	#Reshape the data frame to get a holistic view of how GDP has changed over time for all countries
	Country_GDP_DF <- reshape(Country_DF[,c("Long_Name","Year","GDP")], v.names = "GDP", idvar = "Long_Name",
	timevar = "Year", direction = "wide")

	#Rename the columns
	Grouped_Table_Columns <- c("Long_Name", seq(1984,2013))
	colnames(Country_GDP_DF) <- Grouped_Table_Columns

	#Connect to MySQL and create a database
	#Be sure to change to match the credentials for your own database
	con <- dbConnect(MySQL(), user="root", password="root",
	host="localhost")

	dbGetQuery(con, "CREATE DATABASE Individual_Project_2014")
	dbGetQuery(con, "USE Individual_Project_2014")

	#Make sure there are no existing tables
	dbListTables(con)

	#Write the tables of interest from the data frames created earlier
	dbWriteTable(con, "Country_Table", Country_DF)
	dbWriteTable(con, "Income_Table", Income_DF)
	dbWriteTable(con, "Region_Table", Region_DF)
	dbWriteTable(con, "Country_GDP_Table", Country_GDP_DF)

	#Look to make sure tables were written correctly
	dbListTables(con)

	#Create a new table to track the minimum, average and maximum GDP for each country
	dbGetQuery(con, "CREATE TABLE GDP_Summary_Table (Short_Key VARCHAR(50), Long_Name VARCHAR(50),
	Avg_GDP BIGINT NOT NULL, Min_GDP BIGINT NOT NULL, Max_GDP BIGINT NOT NULL)")

	#Load the table with data
	dbGetQuery(con, "INSERT INTO GDP_Summary_Table SELECT Short_Key, Long_Name, Avg(GDP) AS Avg_GDP, Min(GDP) AS Min_GDP, Max(GDP) AS Max_GDP
	FROM Country_Table WHERE GDP <> 'NA' GROUP BY Long_Name")


	#Inner join the country, income and region tables to build out more information about the groupings each country is placed in.
	Final_WB_DF <- dbGetQuery(con, "Select Country_Table.Short_Key, Country_Table.Long_Name, Country_Table.Year,
	Country_Table.GDP AS Country_GDP, Country_Table.Total_Population AS Country_Population,
	Country_Table.Population_Density_Per_Sq_km AS Country_Pop_Dens, Country_Table.Region,
	Country_Table.Income, Income_Table.GDP AS Income_Group_GDP, Income_Table.Total_Population AS Income_Group_Population,
	Income_Table.Population_Density_Per_Sq_km AS Income_Group_Pop_Dens, Region_Table.GDP AS Region_GDP,
	Region_Table.Total_Population AS Region_Population, Region_Table.Population_Density_Per_Sq_km AS Region_Pop_Dens
	FROM Income_Table INNER JOIN Country_Table ON Country_Table.Income=Income_Table.Long_Name
	AND Country_Table.Year=Income_Table.Year INNER JOIN Region_Table ON Country_Table.Region = Region_Table.Long_Name
	AND Country_Table.Year = Region_Table.Year WHERE Country_Table.GDP <> 'NA'")

	#Write the new table to a data frame to export for use within the Shiny App
	Final_WB_DF <- Final_WB_DF[order(Final_WB_DF$Long_Name, Final_WB_DF$Year),]

	#Write the new data frame as a csv file to place within the app folder.
	write.csv(Final_WB_DF, file = 'Final_WB_DF.csv')

	#Test out visualizations to explore the data further and find what type of visualization would be best for the app.
	Test <- filter(Final_WB_DF, Final_WB_DF$Long_Name == 'Uruguay')

	Test %>%
	ggvis(x = ~Year, y = ~Country_GDP) %>%
	layer_paths(stroke := 'green') %>%
	layer_paths(x = ~Year, y = ~Income_Group_GDP, stroke := 'red') %>%
	layer_paths(x = ~Year, y = ~Region_GDP, stroke := 'blue') %>%
	add_axis("x", title = 'Year') %>%
	add_axis("y", title = 'GDP') %>%
	set_options(width = 400, height = 200)

	#Let's explore the relationship between income group and population density
	Income_Pop_Test <- filter(Final_WB_DF, Final_WB_DF$Year == 2013)

	boxplot(Income_Pop_Test$Country_Pop_Dens~Income_Pop_Test$Income, outline = FALSE)

	#Next, let's take a look at some charts that show the relationship between the type of product
	#(services, manufacturing, agriculture, etc.) that countries produce and their overall GDP performance.
	Filtered_Country_DF <- filter(Country_DF, is.na(Services_Pct_of_GDP) == FALSE & is.na(GDP) == FALSE)
	Filtered_Country_DF <- filter(Country_DF, Income != 'Aggregates')
	Filtered_Country_DF <- filter(Country_DF, Income != 'Not Classified')

	#Take away a few zeroes, so that it looks nice on the charts
	Filtered_Country_DF$GDP <- Filtered_Country_DF$GDP/1000000000000

	#We see from these plots that higher-income countries tend to provide more services compared to their counterparts
	Filtered_Country_DF %>%
	ggvis(x = ~Services_Pct_of_GDP, y = ~GDP) %>%
	layer_points(fill = ~Income, stroke = ~Income) %>%
	add_axis("x", title = 'Pct Services') %>%
	add_axis("y", title = 'Total GDP (Trillions)')

	#Taking another look, we see that region has a lot to do with the composition of a country's GDP.
	#For example, it is clear that North America focuses on services, as doeer Europe.
	Filtered_Country_DF %>%
	ggvis(x = ~Services_Pct_of_GDP, y = ~GDP) %>%
	layer_points(fill = ~Region, stroke = ~Region) %>%
	add_axis("x", title = 'Pct Services') %>%
	add_axis("y", title = 'Total GDP (Trillions)')

	Filtered_Country_DF %>%
	ggvis(x = ~Manufacturing_Pct_of_GDP, y = ~GDP) %>%
	layer_points(fill = ~Income, stroke = ~Income) %>%
	add_axis("x", title = 'Pct Manufacturing') %>%
	add_axis("y", title = 'Total GDP (Trillions)')

	Filtered_Country_DF %>%
	ggvis(x = ~Agriculture_Pct_of_GDP, y = ~GDP) %>%
	layer_points(fill = ~Income, stroke = ~Income) %>%
	add_axis("x", title = 'Pct Agriculture') %>%
	add_axis("y", title = 'Total GDP (Trillions)')

	Filtered_Country_DF %>%
	ggvis(x = ~Industry_Pct_of_GDP, y = ~GDP) %>%
	layer_points(fill = ~Income, stroke = ~Income) %>%
	add_axis("x", title = 'Pct Industry') %>%
	add_axis("y", title = 'Total GDP (Trillions)')

	#Next, we compare purchasing power per capita with population density per square km
	Filtered_Country_DF %>%
	ggvis(x = ~Purchasing_Power_Per_Capita, y = ~Population_Density_Per_Sq_km) %>%
	layer_points(fill = ~Income, stroke = ~Income) %>%
	add_axis("x", title = 'Purchasing Power per Capita') %>%
	add_axis("y", title = 'Pop. Density per Sq. km')

	#Next, we compare purchasing power per capita with population density per square km
	Filtered_Country_DF %>%
	ggvis(x = ~Purchasing_Power_Per_Capita, y = ~Population_Density_Per_Sq_km) %>%
	layer_points(fill = ~Region, stroke = ~Region) %>%
	add_axis("x", title = 'Purchasing Power per Capita') %>%
	add_axis("y", title = 'Pop. Density per Sq. km')

	Filtered_Country_DF$Income <- factor(Filtered_Country_DF$Income, levels(Filtered_Country_DF$Income)[c(4,3,6,2,1,5)])

	boxplot(Filtered_Country_DF$Services_Pct_of_GDP~Filtered_Country_DF$Income, outline = TRUE,
	main = 'Services as Percent of GDP', col = 'light blue')

	cor(Filtered_Country_DF$Services_Pct_of_GDP, Filtered_Country_DF$GDP)

	hist(Filtered_Country_DF$Services_Pct_of_GDP, main = 'Histogram of Services as Pct. of GDP',
	col = 'light blue', xlab = 'Services as Pct. of GDP')

	Income_Group_Test <- filter(Final_WB_DF, Final_WB_DF$Income == 'High Income')

	Test %>%
	ggvis(x = ~Year, y = ~Country_GDP) %>%
	layer_paths() %>%
	layer_paths(x = ~Year, y = ~Income_Group_GDP) %>%
	layer_paths(x = ~Year, y = ~Region_GDP) %>%
	add_axis("x", title = 'Year') %>%
	add_axis("y", title = 'GDP') %>%
	set_options(width = 400, height = 200)

	dbDisconnect(con)