Skip to content

Instantly share code, notes, and snippets.

View mksamelson's full-sized avatar

Matt Samelson mksamelson

View GitHub Profile
url = 'https://www.bloomberg.com/markets/earnings-calendar/us'
headless_proxy = "127.0.0.1:3128"
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': headless_proxy,
'ftpProxy': headless_proxy,
'sslProxy': headless_proxy,
'noProxy': ''
#I use the Caret package and the xgbtree model
#I use 5 fold cross validation
clf.train = train(churn~.,
data=train_standardized,
method = "xgbTree",
preProcess = NULL,
metric = "Accuracy",
maximize=TRUE,
# Visualize Target
cbind(freq=table(churn.info$churn), percentage=prop.table(table(churn.info$churn))*100)
freq percentage
0 52562 79.07746
1 13907 20.92254
#Change all negative values in the user_spendings columns to "NA"
client.info.dt[,user_spendings:=lapply(.SD,function(x)ifelse(x>=0,x,NA)),.SDcols="user_spendings"]
hist(client.info.dt[month=="8",na.omit(user_spendings)],
labels=TRUE,
xlim=c(0,60),
breaks=seq(0,1300,by=10),
main="user_spendings - August, 2013",
xlab="user_spendings",
#user_spendings
#This variable shows some negative spendings - so we remove as errors
hist(client.info.dt[month=="8",user_spendings],
main="user_spendings - August, 2013",
xlab="user_spendings",
col = "green",
border = "blue")
#Subset the data
client.info.jun = client.info.dt[month=="6"]
client.info.jul = client.info.dt[month=="7"]
client.info.aug = client.info.dt[month=="8"]
#Tag the variables
names(client.info.jun)[5:length(client.info.jun)] = paste(names(client.info.jun)[5:length(client.info.jun)],"6",sep="_")
names(client.info.jul)[5:length(client.info.jul)] = paste(names(client.info.jul)[5:length(client.info.jul)],"7",sep="_")
#Basic inspection
str(client.info)
dim(client.info)
sum(is.na(client.info))
summary(client.info)
#Load historical data and churn information files for basic inspection
client.info = read.csv("clientinfo.csv")
churn.info = read.csv("accountinfo.csv")
#Training set will be 80% of the data and Validation Set 20% of the data
#Class distribution preserved
set.seed(0)
in.train <- createDataPartition(client.info.merged$churn, p=0.8, list=FALSE)
summary(factor(client.info.merged$churn))
ytra <- client.info.merged$churn[in.train]; summary(factor(ytra))
ytst <- client.info.merged$churn[-in.train]; summary(factor(ytst))
train = client.info.merged[in.train]
@mksamelson
mksamelson / housedems
Created March 31, 2016 20:37
House Democrats
party member state cash_avail
(fctr) (fctr) (fctr) (dbl)
1 D Murphy, Patrick Florida 4308768
2 D Van Hollen, Chris Maryland 3663652
3 D Duckworth, Tammy Illinois 3653185
4 D Doggett, Lloyd Texas 3318858
5 D Neal, Richard E Massachusetts 2730845
6 D Sanchez, Loretta California 2121240
7 D Kind, Ron Wisconsin 2107641
8 D Kennedy, Joe III Massachusetts 2098488