inkhorn/toronto_casino.r

## toronto_casino.r
library(ff)
library(ffbase)
library(stringr)
library(ggplot2)
library(ggthemes)
library(reshape2)
library(RgoogleMaps)

# Loading 2 copies of the same data set so that I can convert one and have the original for its text values
casino = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")
casino.orig = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")

# Here's the dataset of canadian postal codes and latitude/longitude coordinates
pcodes = read.csv.ffdf(file="/home/inkhorn/Downloads/zipcodeset.txt", first.rows=50000, next.rows=50000, colClasses=NA, header=FALSE)

# I'm doing some numerical recoding here.  If you can tell me a cleaner way of doing this
# then by all means please do. I found this process really annoyingly tedious.

casino$Q1_A = ifelse(casino.orig$Q1_A == "Neutral or Mixed Feelings", 3,
              ifelse(casino.orig$Q1_A == "Somewhat in Favour", 4,
              ifelse(casino.orig$Q1_A == "Somewhat Opposed", 2,
              ifelse(casino.orig$Q1_A == "Strongly in Favour", 5,
              ifelse(casino.orig$Q1_A == "Strongly Opposed", 1,NA)))))


casino$Q2_A = ifelse(casino.orig$Q2_A == "Does Not Fit My Image At All", 1,
              ifelse(casino.orig$Q2_A == "Neutral / I am Not Sure",2,
              ifelse(casino.orig$Q2_A == "Fits Image Somewhat", 3,
              ifelse(casino.orig$Q2_A == "Fits Image Perfectly", 4, NA))))

for (i in 8:24) {
  casino[,i] = ifelse(casino.orig[,i] == "Not Important At All", 1,
  ifelse(casino.orig[,i] == "Somewhat Important", 2,
  ifelse(casino.orig[,i] == "Very Important", 3,NA)))}

for (i in c(31:32,47,48,63,64)) {
  casino[,i] = ifelse(casino.orig[,i] == "Highly Suitable",5,
                ifelse(casino.orig[,i] == "Neutral or Mixed Feelings",3,
                ifelse(casino.orig[,i] == "Somewhat Suitable",4,
                ifelse(casino.orig[,i] == "Somewhat Unsuitable",2,
                ifelse(casino.orig[,i] == "Strongly Unsuitable",1,NA)))))}

# There tended to be blank responses in the original dataset.  When seeking to
# plot the responses in their original text option format, I got rid of them in some cases,
# or coded them in "Did not disclose" in others.

casino.orig$Q1_A[casino.orig$Q1_A == ""] = NA
casino.orig$Q1_A = factor(casino.orig$Q1_A, levels=c("Strongly Opposed","Somewhat Opposed","Neutral or Mixed Feelings","Somewhat in Favour","Strongly in Favour"))

# Here's the graph showing how people feel about a new casino
ggplot(subset(casino.orig, !is.na(Q1_A)), aes(x=Q1_A,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("How do you feel about having a new casino in Toronto?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)

# How does the casino fit into your image of toronto...
ggplot(subset(casino.orig, Q2_A!= ''), aes(x=Q2_A,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("How does a new casino in Toronto fit your image of the City of Toronto?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)),geom="text") + scale_y_continuous(labels=percent)

# Where you'd prefer to see it located
ggplot(subset(casino.orig, Q6!= ''), aes(x=Q6,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("If a casino is built, where would you prefer to see it located?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)

# Here I reorder the text labels from the questions asking about suitability of the downtown location
casino.orig$Q7_A_StandAlone = reorder(casino.orig$Q7_A_StandAlone, casino$Q7_A_StandAlone)
casino.orig$Q7_A_Integrated = reorder(casino.orig$Q7_A_Integrated, casino$Q7_A_Integrated)

# Reshaping the downtown ratings data for graphing..
stand.and.integrated.ratings.downtown = cbind(prop.table(as.matrix(table(casino.orig$Q7_A_StandAlone)[1:5])),
                                     prop.table(as.matrix(table(casino.orig$Q7_A_Integrated)[1:5])))

colnames(stand.and.integrated.ratings.downtown) = c("Standalone Casino","Integrated Entertainment Complex")

stand.and.integrated.ratings.downtown.long = melt(stand.and.integrated.ratings.downtown, varnames=c("Rating","Casino Type"), value.name="Percentage")

# Graphing ratings of casino suitability for the downtown location
ggplot(stand.and.integrated.ratings.downtown.long, aes(x=stand.and.integrated.ratings.downtown.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage*100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nin Downtown Toronto by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.downtown.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage*100), hjust=.75),position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

# Reshaping the exhibition place ratings for graphing
stand.and.integrated.ratings.exhibition = cbind(prop.table(as.matrix(table(casino.orig$Q7_B_StandAlone)[2:6])),
                                              prop.table(as.matrix(table(casino.orig$Q7_B_Integrated)[2:6])))

colnames(stand.and.integrated.ratings.exhibition) = c("Standalone Casino","Integrated Entertainment Complex")

stand.and.integrated.ratings.exhibition.long = melt(stand.and.integrated.ratings.exhibition, varnames=c("Rating","Casino Type"), value.name="Percentage")

# Reordering the rating text labels for the graphing.
stand.and.integrated.ratings.exhibition.long$Rating = factor(stand.and.integrated.ratings.exhibition.long$Rating, levels=levels(casino.orig$Q7_A_StandAlone)[1:5])

# Graphing ratings of casino suitability for the exhibition place location
ggplot(stand.and.integrated.ratings.exhibition.long, aes(x=stand.and.integrated.ratings.exhibition.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage*100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nat Exhibition Place by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.exhibition.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage*100), hjust=.75), position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

# Reshaping the Port Lands ratings for graphing
stand.and.integrated.ratings.portlands = cbind(prop.table(as.matrix(table(casino.orig$Q7_C_StandAlone)[2:6])),
                                                prop.table(as.matrix(table(casino.orig$Q7_C_Integrated)[2:6])))

colnames(stand.and.integrated.ratings.portlands) = c("Standalone Casino", "Integrated Entertainment Complex")

stand.and.integrated.ratings.portlands.long = melt(stand.and.integrated.ratings.portlands, varnames=c("Rating","Casino Type"), value.name="Percentage")

# Reording the rating text labels for the graping.
stand.and.integrated.ratings.portlands.long$Rating = factor(stand.and.integrated.ratings.portlands.long$Rating, levels=levels(casino.orig$Q7_A_StandAlone)[1:5])

# Graphing ratings of casino suitability for the port lands location
ggplot(stand.and.integrated.ratings.portlands.long, aes(x=stand.and.integrated.ratings.portlands.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage*100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nat Port Lands by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.portlands.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage*100), hjust=.75), position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

# This was the part in my analysis where I looked at postal codes (FSAs really) and their coordinates
# Sorry I'm not more linear in how I do my analysis vs. write about it :)
# You'll notice that I've imported the geocode file as ffdf.  This led to faster merging with the
# original casino data set.  This meant that I had to coerce the casino.orig data frame into ffdf format
# But I work with it every day at work, so I'm used to it by now, despite its idiosynchracies.

casino.orig$PostalCode = toupper(casino.orig$PostalCode)

pcodes = read.csv.ffdf(file="/home/inkhorn/Downloads/zipcodeset.txt", first.rows=50000, next.rows=50000, colClasses=NA, header=FALSE)
names(pcodes) = c("Postal","Lat","Long","City","Prov")
pcodes$FSA = as.ff(as.factor(toupper(substr(pcodes[,"Postal"], 1,3))))
casino.orig = as.ffdf(casino.orig)
casino.orig$PostalCode = as.ff(as.factor(toupper(casino.orig[,"PostalCode"])))
casino.orig = merge(casino.orig, pcodes, by.x="PostalCode", by.y="FSA", all.x=TRUE)

# This is the code for the full map I generated

casino.gc = casino.orig[which(!is.na(casino.orig[,"Lat"])),]  # making sure only records with coordinates are included...
mymap = MapBackground(lat=casino.gc$Lat, lon=casino.gc$Long)
PlotOnStaticMap(mymap, casino.gc$Lat, casino.gc$Long, cex=1.5, pch=21, bg="orange")

# Here I'm getting a list of cities, winnowing it down, and using it to filter the
# geocode coordinates to zoom in on the map I generated.

cities = data.frame(table(casino.orig[,"City"]))
cities = cities[cities$Freq > 0,]
cities = cities[order(cities$Freq, decreasing=TRUE),]
cities = cities[cities$Var1 != '',]
cities.filter = cities[1:28,] # Here's my top cities variable (i set an arbitrary dividing line...)
names(cities.filter) = c("City","# Responses")

# Here's where I filtered the original casino ffdf so that it only contained the cities
# that I wanted to see in Southern Ontario
casino.top.so = casino.orig[which(casino.orig[,"City"] %in% cities.filter$Var1),]

# here's a transparency function that I used for the southern ontario map

addTrans <- function(color,trans)
{
  # This function adds transparancy to a color.
  # Define transparancy with an integer between 0 and 255
  # 0 being fully transparant and 255 being fully visable
  # Works with either color and trans a vector of equal length,
  # or one of the two of length 1.

  if (length(color)!=length(trans)&!any(c(length(color),length(trans))==1)) stop("Vector lengths not correct")
  if (length(color)==1 & length(trans)>1) color <- rep(color,length(trans))
  if (length(trans)==1 & length(color)>1) trans <- rep(trans,length(color))

  num2hex <- function(x)
  {
    hex <- unlist(strsplit("0123456789ABCDEF",split=""))
    return(paste(hex[(x-x%%16)/16+1],hex[x%%16+1],sep=""))
  }
  rgb <- rbind(col2rgb(color),trans)
  res <- paste("#",apply(apply(rgb,2,num2hex),2,paste,collapse=""),sep="")
  return(res)
}

# Finally here's the southern ontario map code

mymap = MapBackground(lat=casino.top.so$Lat, lon=casino.top.so$Long)
PlotOnStaticMap(mymap, casino.top.so$Lat, casino.top.so$Long, cex=1.5, pch=21, bg=addTrans("orange",10))

# Here's some code for summarizing and plotting the response data to the question
# around issues of importance regarding the new casino (question 3)

q3.summary = matrix(NA, 16,1,dimnames=list(c("Design of the facility",
            "Employment opportunities","Entertainment and cultural activities",
            "Expanded convention facilities", "Integration with surrounding areas",
          "New hotel accommodations","Problem gambling & health concerns",
          "Public safety and social concerns","Public space",
          "Restaurants","Retail","Revenue for the City","Support for local businesses",
          "Tourist attraction","Traffic concerns","Training and career development"),c("% Very Important")))

for (i in 8:23) {
  q3.summary[i-7] = mean(casino[,i] == 3, na.rm=TRUE)}

q3.summary = as.data.frame(q3.summary[order(q3.summary[,1], decreasing = FALSE),])
names(q3.summary)[1] = "% Very Important"
q3.summary$Concern = rownames(q3.summary)
q3.summary = q3.summary[order(q3.summary$"% Very Important", decreasing=FALSE),]
q3.summary$Concern = factor(q3.summary$Concern, levels=q3.summary$Concern)
ggplot(q3.summary, aes(x=Concern, y=q3.summary$"% Very Important")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("Issues of Importance Surrounding\nthe New Casino") + scale_x_discrete(name="Issues of Importance") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + theme_wsj()

# This chunk of code deals with summarizing and plotting the questions surrounding
# what features people might want if a new Integrated Entertainment Complex is built

q7a.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
              "Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

for (i in 36:44) {
  q7a.summary[i-35] = mean(casino[,i], na.rm=TRUE)}
q7a.summary = as.data.frame(q7a.summary[order(q7a.summary[,1], decreasing = FALSE),])
names(q7a.summary)[1] = "% Include"
q7a.summary$feature = rownames(q7a.summary)
q7a.summary$feature = factor(q7a.summary$feature, levels=q7a.summary$feature)

ggplot(q7a.summary, aes(x=feature, y=q7a.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex in Downtown Toronto") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

q7b.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
                                              "Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

for (i in 52:60) {
  q7b.summary[i-51] = mean(casino[,i], na.rm=TRUE)}
q7b.summary = as.data.frame(q7b.summary[order(q7b.summary[,1], decreasing = FALSE),])
names(q7b.summary)[1] = "% Include"
q7b.summary$feature = rownames(q7b.summary)
q7b.summary$feature = factor(q7b.summary$feature, levels=q7b.summary$feature)

ggplot(q7b.summary, aes(x=feature, y=q7b.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex at the Exhbition Place") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

q7c.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
                                              "Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

for (i in 68:76) {
  q7c.summary[i-67] = mean(casino[,i], na.rm=TRUE)}
q7c.summary = as.data.frame(q7c.summary[order(q7c.summary[,1], decreasing = FALSE),])
names(q7c.summary)[1] = "% Include"
q7c.summary$feature = rownames(q7c.summary)
q7c.summary$feature = factor(q7c.summary$feature, levels=q7c.summary$feature)

ggplot(q7c.summary, aes(x=feature, y=q7b.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex in Port Lands") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

# It sucks, but I imported yet another version of the casino dataset so that I wouldn't have to use
# the annoying ffdf indexing notation (e.g. df[,"variable1"])

casino.orig2 = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")

# Finally, here's some code where I processed and plotted the Gender and Age demographic variables

casino$Gender = casino.orig$Gender
casino$Gender = ifelse(!(casino.orig2$Gender %in% c("Female","Male","Transgendered")), "Did not disclose",
                ifelse(casino.orig2$Gender == "Female","Female",
                ifelse(casino.orig2$Gender == "Male","Male","Transgendered")))

casino$Gender = factor(casino$Gender, levels=c("Transgendered","Did not disclose","Female","Male"))
ggplot(casino, aes(x=Gender,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("Gender Distribution of Respondents") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)),
                                                                                                                                                                                                                                                                                                                                               geom="text") + scale_y_continuous(labels=percent)

casino$Age = ifelse(casino.orig2$Age == "", "Did not disclose",
              ifelse(casino.orig2$Age == "Under 15", "Under 15",
            ifelse(casino.orig2$Age == "15-24", "15-24",
                   ifelse(casino.orig2$Age == "25-34", "25-34",
            ifelse(casino.orig2$Age == "35-44", "35-44",
                   ifelse(casino.orig2$Age == "45-54","45-54",
                ifelse(casino.orig2$Age == "55-64","55-64",
                ifelse(casino.orig2$Age == "65 or older", "65 or older","Did not disclose"))))))))
casino$Age = factor(casino$Age, levels=c("Did not disclose","Under 15","15-24","25-34","35-44","45-54","55-64","65 or older"))

ggplot(casino, aes(x=Age,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("Age Distribution of Respondents") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)
	library(ff)
	library(ffbase)
	library(stringr)
	library(ggplot2)
	library(ggthemes)
	library(reshape2)
	library(RgoogleMaps)

	# Loading 2 copies of the same data set so that I can convert one and have the original for its text values
	casino = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")
	casino.orig = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")

	# Here's the dataset of canadian postal codes and latitude/longitude coordinates
	pcodes = read.csv.ffdf(file="/home/inkhorn/Downloads/zipcodeset.txt", first.rows=50000, next.rows=50000, colClasses=NA, header=FALSE)

	# I'm doing some numerical recoding here. If you can tell me a cleaner way of doing this
	# then by all means please do. I found this process really annoyingly tedious.

	casino$Q1_A = ifelse(casino.orig$Q1_A == "Neutral or Mixed Feelings", 3,
	ifelse(casino.orig$Q1_A == "Somewhat in Favour", 4,
	ifelse(casino.orig$Q1_A == "Somewhat Opposed", 2,
	ifelse(casino.orig$Q1_A == "Strongly in Favour", 5,
	ifelse(casino.orig$Q1_A == "Strongly Opposed", 1,NA)))))


	casino$Q2_A = ifelse(casino.orig$Q2_A == "Does Not Fit My Image At All", 1,
	ifelse(casino.orig$Q2_A == "Neutral / I am Not Sure",2,
	ifelse(casino.orig$Q2_A == "Fits Image Somewhat", 3,
	ifelse(casino.orig$Q2_A == "Fits Image Perfectly", 4, NA))))

	for (i in 8:24) {
	casino[,i] = ifelse(casino.orig[,i] == "Not Important At All", 1,
	ifelse(casino.orig[,i] == "Somewhat Important", 2,
	ifelse(casino.orig[,i] == "Very Important", 3,NA)))}

	for (i in c(31:32,47,48,63,64)) {
	casino[,i] = ifelse(casino.orig[,i] == "Highly Suitable",5,
	ifelse(casino.orig[,i] == "Neutral or Mixed Feelings",3,
	ifelse(casino.orig[,i] == "Somewhat Suitable",4,
	ifelse(casino.orig[,i] == "Somewhat Unsuitable",2,
	ifelse(casino.orig[,i] == "Strongly Unsuitable",1,NA)))))}

	# There tended to be blank responses in the original dataset. When seeking to
	# plot the responses in their original text option format, I got rid of them in some cases,
	# or coded them in "Did not disclose" in others.

	casino.orig$Q1_A[casino.orig$Q1_A == ""] = NA
	casino.orig$Q1_A = factor(casino.orig$Q1_A, levels=c("Strongly Opposed","Somewhat Opposed","Neutral or Mixed Feelings","Somewhat in Favour","Strongly in Favour"))

	# Here's the graph showing how people feel about a new casino
	ggplot(subset(casino.orig, !is.na(Q1_A)), aes(x=Q1_A,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("How do you feel about having a new casino in Toronto?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)

	# How does the casino fit into your image of toronto...
	ggplot(subset(casino.orig, Q2_A!= ''), aes(x=Q2_A,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("How does a new casino in Toronto fit your image of the City of Toronto?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)),geom="text") + scale_y_continuous(labels=percent)

	# Where you'd prefer to see it located
	ggplot(subset(casino.orig, Q6!= ''), aes(x=Q6,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("If a casino is built, where would you prefer to see it located?") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)

	# Here I reorder the text labels from the questions asking about suitability of the downtown location
	casino.orig$Q7_A_StandAlone = reorder(casino.orig$Q7_A_StandAlone, casino$Q7_A_StandAlone)
	casino.orig$Q7_A_Integrated = reorder(casino.orig$Q7_A_Integrated, casino$Q7_A_Integrated)

	# Reshaping the downtown ratings data for graphing..
	stand.and.integrated.ratings.downtown = cbind(prop.table(as.matrix(table(casino.orig$Q7_A_StandAlone)[1:5])),
	prop.table(as.matrix(table(casino.orig$Q7_A_Integrated)[1:5])))

	colnames(stand.and.integrated.ratings.downtown) = c("Standalone Casino","Integrated Entertainment Complex")

	stand.and.integrated.ratings.downtown.long = melt(stand.and.integrated.ratings.downtown, varnames=c("Rating","Casino Type"), value.name="Percentage")

	# Graphing ratings of casino suitability for the downtown location
	ggplot(stand.and.integrated.ratings.downtown.long, aes(x=stand.and.integrated.ratings.downtown.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nin Downtown Toronto by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.downtown.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage100), hjust=.75),position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

	# Reshaping the exhibition place ratings for graphing
	stand.and.integrated.ratings.exhibition = cbind(prop.table(as.matrix(table(casino.orig$Q7_B_StandAlone)[2:6])),
	prop.table(as.matrix(table(casino.orig$Q7_B_Integrated)[2:6])))

	colnames(stand.and.integrated.ratings.exhibition) = c("Standalone Casino","Integrated Entertainment Complex")

	stand.and.integrated.ratings.exhibition.long = melt(stand.and.integrated.ratings.exhibition, varnames=c("Rating","Casino Type"), value.name="Percentage")

	# Reordering the rating text labels for the graphing.
	stand.and.integrated.ratings.exhibition.long$Rating = factor(stand.and.integrated.ratings.exhibition.long$Rating, levels=levels(casino.orig$Q7_A_StandAlone)[1:5])

	# Graphing ratings of casino suitability for the exhibition place location
	ggplot(stand.and.integrated.ratings.exhibition.long, aes(x=stand.and.integrated.ratings.exhibition.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nat Exhibition Place by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.exhibition.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage100), hjust=.75), position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

	# Reshaping the Port Lands ratings for graphing
	stand.and.integrated.ratings.portlands = cbind(prop.table(as.matrix(table(casino.orig$Q7_C_StandAlone)[2:6])),
	prop.table(as.matrix(table(casino.orig$Q7_C_Integrated)[2:6])))

	colnames(stand.and.integrated.ratings.portlands) = c("Standalone Casino", "Integrated Entertainment Complex")

	stand.and.integrated.ratings.portlands.long = melt(stand.and.integrated.ratings.portlands, varnames=c("Rating","Casino Type"), value.name="Percentage")

	# Reording the rating text labels for the graping.
	stand.and.integrated.ratings.portlands.long$Rating = factor(stand.and.integrated.ratings.portlands.long$Rating, levels=levels(casino.orig$Q7_A_StandAlone)[1:5])

	# Graphing ratings of casino suitability for the port lands location
	ggplot(stand.and.integrated.ratings.portlands.long, aes(x=stand.and.integrated.ratings.portlands.long$"Casino Type", fill=Rating, y=Percentage,label=sprintf("%.02f %%", Percentage100))) + geom_bar(position="dodge") + coord_flip() + ggtitle("Ratings of Casino Suitability \nat Port Lands by Casino Type") + scale_x_discrete(name="") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + geom_text(aes(x=stand.and.integrated.ratings.portlands.long$"Casino Type", y=Percentage, ymax=Percentage, label=sprintf("%.01f%%",Percentage100), hjust=.75), position = position_dodge(width=1),size=4) + scale_fill_few(palette="light") + theme_wsj()

	# This was the part in my analysis where I looked at postal codes (FSAs really) and their coordinates
	# Sorry I'm not more linear in how I do my analysis vs. write about it :)
	# You'll notice that I've imported the geocode file as ffdf. This led to faster merging with the
	# original casino data set. This meant that I had to coerce the casino.orig data frame into ffdf format
	# But I work with it every day at work, so I'm used to it by now, despite its idiosynchracies.

	casino.orig$PostalCode = toupper(casino.orig$PostalCode)

	pcodes = read.csv.ffdf(file="/home/inkhorn/Downloads/zipcodeset.txt", first.rows=50000, next.rows=50000, colClasses=NA, header=FALSE)
	names(pcodes) = c("Postal","Lat","Long","City","Prov")
	pcodes$FSA = as.ff(as.factor(toupper(substr(pcodes[,"Postal"], 1,3))))
	casino.orig = as.ffdf(casino.orig)
	casino.orig$PostalCode = as.ff(as.factor(toupper(casino.orig[,"PostalCode"])))
	casino.orig = merge(casino.orig, pcodes, by.x="PostalCode", by.y="FSA", all.x=TRUE)

	# This is the code for the full map I generated

	casino.gc = casino.orig[which(!is.na(casino.orig[,"Lat"])),] # making sure only records with coordinates are included...
	mymap = MapBackground(lat=casino.gc$Lat, lon=casino.gc$Long)
	PlotOnStaticMap(mymap, casino.gc$Lat, casino.gc$Long, cex=1.5, pch=21, bg="orange")

	# Here I'm getting a list of cities, winnowing it down, and using it to filter the
	# geocode coordinates to zoom in on the map I generated.

	cities = data.frame(table(casino.orig[,"City"]))
	cities = cities[cities$Freq > 0,]
	cities = cities[order(cities$Freq, decreasing=TRUE),]
	cities = cities[cities$Var1 != '',]
	cities.filter = cities[1:28,] # Here's my top cities variable (i set an arbitrary dividing line...)
	names(cities.filter) = c("City","# Responses")

	# Here's where I filtered the original casino ffdf so that it only contained the cities
	# that I wanted to see in Southern Ontario
	casino.top.so = casino.orig[which(casino.orig[,"City"] %in% cities.filter$Var1),]

	# here's a transparency function that I used for the southern ontario map

	addTrans <- function(color,trans)
	{
	# This function adds transparancy to a color.
	# Define transparancy with an integer between 0 and 255
	# 0 being fully transparant and 255 being fully visable
	# Works with either color and trans a vector of equal length,
	# or one of the two of length 1.

	if (length(color)!=length(trans)&!any(c(length(color),length(trans))==1)) stop("Vector lengths not correct")
	if (length(color)==1 & length(trans)>1) color <- rep(color,length(trans))
	if (length(trans)==1 & length(color)>1) trans <- rep(trans,length(color))

	num2hex <- function(x)
	{
	hex <- unlist(strsplit("0123456789ABCDEF",split=""))
	return(paste(hex[(x-x%%16)/16+1],hex[x%%16+1],sep=""))
	}
	rgb <- rbind(col2rgb(color),trans)
	res <- paste("#",apply(apply(rgb,2,num2hex),2,paste,collapse=""),sep="")
	return(res)
	}

	# Finally here's the southern ontario map code

	mymap = MapBackground(lat=casino.top.so$Lat, lon=casino.top.so$Long)
	PlotOnStaticMap(mymap, casino.top.so$Lat, casino.top.so$Long, cex=1.5, pch=21, bg=addTrans("orange",10))

	# Here's some code for summarizing and plotting the response data to the question
	# around issues of importance regarding the new casino (question 3)

	q3.summary = matrix(NA, 16,1,dimnames=list(c("Design of the facility",
	"Employment opportunities","Entertainment and cultural activities",
	"Expanded convention facilities", "Integration with surrounding areas",
	"New hotel accommodations","Problem gambling & health concerns",
	"Public safety and social concerns","Public space",
	"Restaurants","Retail","Revenue for the City","Support for local businesses",
	"Tourist attraction","Traffic concerns","Training and career development"),c("% Very Important")))

	for (i in 8:23) {
	q3.summary[i-7] = mean(casino[,i] == 3, na.rm=TRUE)}

	q3.summary = as.data.frame(q3.summary[order(q3.summary[,1], decreasing = FALSE),])
	names(q3.summary)[1] = "% Very Important"
	q3.summary$Concern = rownames(q3.summary)
	q3.summary = q3.summary[order(q3.summary$"% Very Important", decreasing=FALSE),]
	q3.summary$Concern = factor(q3.summary$Concern, levels=q3.summary$Concern)
	ggplot(q3.summary, aes(x=Concern, y=q3.summary$"% Very Important")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("Issues of Importance Surrounding\nthe New Casino") + scale_x_discrete(name="Issues of Importance") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent) + theme_wsj()

	# This chunk of code deals with summarizing and plotting the questions surrounding
	# what features people might want if a new Integrated Entertainment Complex is built

	q7a.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
	"Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

	for (i in 36:44) {
	q7a.summary[i-35] = mean(casino[,i], na.rm=TRUE)}
	q7a.summary = as.data.frame(q7a.summary[order(q7a.summary[,1], decreasing = FALSE),])
	names(q7a.summary)[1] = "% Include"
	q7a.summary$feature = rownames(q7a.summary)
	q7a.summary$feature = factor(q7a.summary$feature, levels=q7a.summary$feature)

	ggplot(q7a.summary, aes(x=feature, y=q7a.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex in Downtown Toronto") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

	q7b.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
	"Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

	for (i in 52:60) {
	q7b.summary[i-51] = mean(casino[,i], na.rm=TRUE)}
	q7b.summary = as.data.frame(q7b.summary[order(q7b.summary[,1], decreasing = FALSE),])
	names(q7b.summary)[1] = "% Include"
	q7b.summary$feature = rownames(q7b.summary)
	q7b.summary$feature = factor(q7b.summary$feature, levels=q7b.summary$feature)

	ggplot(q7b.summary, aes(x=feature, y=q7b.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex at the Exhbition Place") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

	q7c.summary = matrix(NA, 9,1, dimnames=list(c("No Casino","Casino Only", "Convention Centre Space", "Cultural and Arts Facilities",
	"Hotel","Nightclubs","Restaurants","Retail","Theatre"),c("% Include")))

	for (i in 68:76) {
	q7c.summary[i-67] = mean(casino[,i], na.rm=TRUE)}
	q7c.summary = as.data.frame(q7c.summary[order(q7c.summary[,1], decreasing = FALSE),])
	names(q7c.summary)[1] = "% Include"
	q7c.summary$feature = rownames(q7c.summary)
	q7c.summary$feature = factor(q7c.summary$feature, levels=q7c.summary$feature)

	ggplot(q7c.summary, aes(x=feature, y=q7b.summary$"% Include")) + geom_point(size=5, colour="forest green") + coord_flip() + ggtitle("What People Would Want in an Integrated\nEntertainment Complex in Port Lands") + scale_x_discrete(name="Features") + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + scale_y_continuous(labels=percent,name="% Wanting the Feature") + theme_wsj()

	# It sucks, but I imported yet another version of the casino dataset so that I wouldn't have to use
	# the annoying ffdf indexing notation (e.g. df[,"variable1"])

	casino.orig2 = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")

	# Finally, here's some code where I processed and plotted the Gender and Age demographic variables

	casino$Gender = casino.orig$Gender
	casino$Gender = ifelse(!(casino.orig2$Gender %in% c("Female","Male","Transgendered")), "Did not disclose",
	ifelse(casino.orig2$Gender == "Female","Female",
	ifelse(casino.orig2$Gender == "Male","Male","Transgendered")))

	casino$Gender = factor(casino$Gender, levels=c("Transgendered","Did not disclose","Female","Male"))
	ggplot(casino, aes(x=Gender,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("Gender Distribution of Respondents") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)),
	geom="text") + scale_y_continuous(labels=percent)

	casino$Age = ifelse(casino.orig2$Age == "", "Did not disclose",
	ifelse(casino.orig2$Age == "Under 15", "Under 15",
	ifelse(casino.orig2$Age == "15-24", "15-24",
	ifelse(casino.orig2$Age == "25-34", "25-34",
	ifelse(casino.orig2$Age == "35-44", "35-44",
	ifelse(casino.orig2$Age == "45-54","45-54",
	ifelse(casino.orig2$Age == "55-64","55-64",
	ifelse(casino.orig2$Age == "65 or older", "65 or older","Did not disclose"))))))))
	casino$Age = factor(casino$Age, levels=c("Did not disclose","Under 15","15-24","25-34","35-44","45-54","55-64","65 or older"))

	ggplot(casino, aes(x=Age,y=..count../sum(..count..))) + geom_bar(fill="forest green") + coord_flip() + ggtitle("Age Distribution of Respondents") + scale_x_discrete(name="") + theme_wsj() + theme(title=element_text(size=22),plot.title=element_text(hjust=.8)) + stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom="text") + scale_y_continuous(labels=percent)