Skip to content

Instantly share code, notes, and snippets.

@MartinBodocky
Created April 20, 2014 20:12
Show Gist options
  • Save MartinBodocky/11124032 to your computer and use it in GitHub Desktop.
Save MartinBodocky/11124032 to your computer and use it in GitHub Desktop.
Doing Data Science Chap2 Exercise
#doing data science chapter 2
setwd("~/GitHub/doing_data_science/dds_datasets")
#read dataset to memory
data <- read.csv("dds_ch2_nyt/nyt1.csv")
#categorize
head(data)
dim(data)
data$agecat <- cut(data$Age, c(-Inf,0,18,24,34,44,54,64,Inf))
head(data)
#view
summary(data)
#brackets
install.packages("doBy")
library("doBy")
siterange <- function(x){ c(length(x), min(x), mean(x), max(x)) }
summaryBy(Age~agecat, data = data, FUN = siterange)
#so only signed in users have ages and genders
summaryBy(Gender+Signed_In+Impressions+Clicks~agecat, data = data)
#plot
install.packages("ggplot2")
library(ggplot2)
ggplot(data, aes(x=Impressions, fill=agecat))+geom_histogram(binwidth=1)
ggplot(data, aes(x=agecat, y=Impressions, fill=agecat))+geom_boxplot()
#create click thru rate
#we don't care about clicks if there are no impressions
#if there are clicks with no impressions my assumptions about
#this data are wrong
data$hasimps <-cut(data$Impressions, c(-Inf,0,Inf))
summaryBy(Clicks~hasimps, data = data, FUN = siterange)
ggplot(subset(data, Impressions > 0),
aes(x=Clicks/Impressions, colour=agecat)) + geom_density()
ggplot(subset(data, Clicks > 0),
aes(x=Clicks/Impressions, colour=agecat)) + geom_density()
ggplot(subset(data, Impressions > 0),
aes(x=agecat, y=Clicks, fill=agecat)) + geom_boxplot()
ggplot(subset(data, Clicks > 0),
aes(x=Clicks, colour=agecat)) + geom_density()
#create categories
data$scode[data$Impressions == 0] <- "NoImps"
data$scode[data$Impressions > 0] <- "Imps"
data$scode[data$Clicks > 0] <- "Clicks"
#Convert the column to a factor
data$scode <- factor(data$scode)
summary(data)
#look at levels
clen <- function(x) { c(length(x))}
etable <- summaryBy(Impressions~scode+Gender+agecat, data = data, FUN = clen)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment