Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Cell Phone Analysis
library(jsonlite)
cp = fromJSON(txt = "Cell Phone Data.txt", simplifyDataFrame = TRUE)
num.atts = c(4,9,11,12,13,14,15,16,18,22)
cp[,num.atts] = sapply(cp[,num.atts], function (x) as.numeric(x))
cp$aspect.ratio = cp$att_pixels_y / cp$att_pixels_x
cp$isSmartPhone = ifelse(grepl("smart|iphone|blackberry", cp$name, ignore.case=TRUE) == TRUE | cp$att_screen_size >= 4, "Yes", "No")
library(ggplot2)
library(ggthemr)
library(scales)
ggthemr("camoflauge")
ggplot(cp, aes(x=att_brand, y=price)) + geom_boxplot() + ggtitle("Mobile Phone Price by Brand") + theme(axis.text.x=element_text(angle=90, size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_discrete("Brand")
ggplot(cp, aes(x=att_weight, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Weight") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Weight (oz)") + stat_smooth(se=FALSE)
ggplot(cp, aes(x=att_screen_size, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Screen Size") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Screen Size (in)") + stat_smooth(se=FALSE)
ggplot(cp, aes(x=att_ram, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Amount of RAM") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("RAM (gb)") + stat_smooth(se=FALSE)
ggplot(cp, aes(x=att_sd_card, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by SD Card Capacity") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("SD Card Capacity (gb)") + stat_smooth(se=FALSE)
ggplot(cp, aes(x=ifelse(cp$att_dual_sim == 1, "Yes", "No"), y=price)) + geom_boxplot() + ggtitle("Mobile Phone Price by Dual Sim") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_discrete("Has Dual Sim Card?")
ggplot(cp, aes(x=att_storage, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Storage Capacity") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Storage Capacity (gb)") + stat_smooth(se=FALSE)
ggplot(cp, aes(x=att_battery_mah, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Battery Capacity") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Battery Capacity (mAh)") + stat_smooth(se=FALSE)
ggplot(cp, aes(x=aspect.ratio, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Aspect Ratio") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Aspect Ratio (Y Pixels / X Pixels)") + stat_smooth(se=FALSE)
ggplot(cp, aes(x=isSmartPhone, y=price)) + geom_boxplot() + ggtitle("Mobile Phone Price by Smart Phone Status") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_discrete("Is it a Smart Phone?")
ggplot(cp, aes(x=att_os, y=price)) + geom_boxplot() + ggtitle("Mobile Phone Price by Operating System") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_discrete("Operating System")
library(caret)
control = trainControl(method="cv")
in_train = createDataPartition(cp$price, p=.8, list=FALSE)
model.gbm = train(price ~ att_brand + att_weight + att_screen_size +
att_ram + att_sd_card + att_dual_sim +
att_storage + att_battery_mah + att_os, data=cp,
method="gbm", trControl=control, verbose=FALSE, subset=in_train)
cp$att_brand = factor(cp$)
cp.test = cp[-in_train,]
cp.test = subset(cp.test, att_brand != 'TOTO')
cp.test = na.omit(cp.test)
cp.test$pred.price = predict(model.gbm, cp.test)
ggplot(cp.test, aes(x=pred.price, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Predicted Price") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Predicted Price", labels=dollar) + geom_abline(intercept=0, slope=1, colour="yellow") + stat_smooth(se=FALSE)
@rwpr
Copy link

rwpr commented Apr 10, 2015

Hello inkhorn, I can't download the file "Cell Phone Data.txt". Can you please share the link? If not, can you please teach how to get the data from the url? Appreciate the guidance. Thanks.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment