Greg Lamp glamp

## calculate_timing.js
export const calculateLoadTimes = () => {
  // Check performance support
  if (performance === undefined) {
    return [];
  }

  // Get a list of "resource" performance entries
  const resources = performance.getEntriesByType("resource");
  if (resources === undefined || resources.length <= 0) {
    return [];

## customer-segmentation.py
import pandas as pd
# http://blog.yhathq.com/static/misc/data/WineKMC.xlsx
df_offers = pd.read_excel("./WineKMC.xlsx", sheetname=0)
df_offers.columns = ["offer_id", "campaign", "varietal", "min_qty", "discount", "origin", "past_peak"]
df_offers.head()

df_transactions = pd.read_excel("./WineKMC.xlsx", sheetname=1)
df_transactions.columns = ["customer_name", "offer_id"]
df_transactions['n'] = 1
df_transactions.head()

## rf_reg.R
library(randomForest)
library(miscTools)
library(ggplot2)

cols <- c('is_red', 'fixed.acidity', 'density', 'pH', 'alcohol')
rf <- randomForest(alcohol ~ ., data=train[,cols], ntree=20)

(r2 <- rSquared(test$alcohol, test$alcohol - predict(rf, test[,cols])))
# [1] 0.6481
(mse <- mean((test$alcohol - predict(rf, test[,cols]))^2))

## get-election-data.R
library(plyr)
library(XML)
library(uuid)
library(reshape2)

results <- ldply(states, function(state) {
  url <- "http://www.electionprojection.com/latest-polls/%s-presidential-polls-trump-vs-clinton-vs-johnson-vs-stein.php"
  state.fmt <- gsub(" ", "-", tolower(state))

  url.state <- sprintf(url, state.fmt)

## scrapy-error.log
Packing version 98c8d04-master

Deploying to Scrapy Cloud project "373200"

Deploy log last 3 lines:

{"message": "500 Server Error: Internal Server Error for url: https://kumo-builder-prod.dc21.scrapinghub.com:2376/v1.27/auth", "error": "internal_error"}


## scikit-example.py
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
target_variable = 'does-make-more-than-50k'
columns = ['age', 'education', 'hours-worked-per-week']
clf.fit(df[columns], df[target_variable])

## log_func.py
import numpy as np
import pylab as pl

x = np.random.uniform(1, 100, 1000)
y = np.log(x) + np.random.normal(0, .3, 1000)

pl.scatter(x, y, s=1, label="log(x) with noise")
pl.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="b", label="log(x) true function")
pl.xlabel("x")
pl.ylabel("f(x) = log(x)")

## logistic_do_regression.py
train_cols = data.columns[1:]
# Index([gre, gpa, prestige_2, prestige_3, prestige_4], dtype=object)

logit = sm.Logit(data['admit'], data[train_cols])

# fit the model
result = logit.fit()

## salesforce.R
library(RForcecom)

sfSessionCredentials <- NULL

connectToSalesForce <- function() {
  if (! is.null(sfSessionCredentials)) {
    return
  }
  # grab the credentials from Environment Variables
  username <- Sys.getenv("SF_USERNAME") # "your salesforce username"

## training_lending_club.R
# only evaluate w/ vintages that have come to term
df.term <- subset(df, year_issued < 2012)
df.term$home_ownership <- factor(df.term$home_ownership)
df.term$is_rent <- df.term$home_ownership=="RENT"
df.term$fico_range <- factor(df.term$fico_range)
df.term$fico_ordered <- as.numeric(df.term$fico_range)

idx <- runif(nrow(df.term)) > 0.75
train <- df.term[idx==FALSE,]
test <- df.term[idx==TRUE,]
	export const calculateLoadTimes = () => {
	// Check performance support
	if (performance === undefined) {
	return [];
	}

	// Get a list of "resource" performance entries
	const resources = performance.getEntriesByType("resource");
	if (resources === undefined \|\| resources.length <= 0) {
	return [];
	import pandas as pd
	# http://blog.yhathq.com/static/misc/data/WineKMC.xlsx
	df_offers = pd.read_excel("./WineKMC.xlsx", sheetname=0)
	df_offers.columns = ["offer_id", "campaign", "varietal", "min_qty", "discount", "origin", "past_peak"]
	df_offers.head()

	df_transactions = pd.read_excel("./WineKMC.xlsx", sheetname=1)
	df_transactions.columns = ["customer_name", "offer_id"]
	df_transactions['n'] = 1
	df_transactions.head()
	library(randomForest)
	library(miscTools)
	library(ggplot2)

	cols <- c('is_red', 'fixed.acidity', 'density', 'pH', 'alcohol')
	rf <- randomForest(alcohol ~ ., data=train[,cols], ntree=20)

	(r2 <- rSquared(test$alcohol, test$alcohol - predict(rf, test[,cols])))
	# [1] 0.6481
	(mse <- mean((test$alcohol - predict(rf, test[,cols]))^2))
	library(plyr)
	library(XML)
	library(uuid)
	library(reshape2)

	results <- ldply(states, function(state) {
	url <- "http://www.electionprojection.com/latest-polls/%s-presidential-polls-trump-vs-clinton-vs-johnson-vs-stein.php"
	state.fmt <- gsub(" ", "-", tolower(state))

	url.state <- sprintf(url, state.fmt)
	Packing version 98c8d04-master

	Deploying to Scrapy Cloud project "373200"

	Deploy log last 3 lines:

	{"message": "500 Server Error: Internal Server Error for url: https://kumo-builder-prod.dc21.scrapinghub.com:2376/v1.27/auth", "error": "internal_error"}
	from sklearn.ensemble import RandomForestClassifier
	clf = RandomForestClassifier()
	target_variable = 'does-make-more-than-50k'
	columns = ['age', 'education', 'hours-worked-per-week']
	clf.fit(df[columns], df[target_variable])
	import numpy as np
	import pylab as pl

	x = np.random.uniform(1, 100, 1000)
	y = np.log(x) + np.random.normal(0, .3, 1000)

	pl.scatter(x, y, s=1, label="log(x) with noise")
	pl.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="b", label="log(x) true function")
	pl.xlabel("x")
	pl.ylabel("f(x) = log(x)")
	train_cols = data.columns[1:]
	# Index([gre, gpa, prestige_2, prestige_3, prestige_4], dtype=object)

	logit = sm.Logit(data['admit'], data[train_cols])

	# fit the model
	result = logit.fit()
	library(RForcecom)

	sfSessionCredentials <- NULL

	connectToSalesForce <- function() {
	if (! is.null(sfSessionCredentials)) {
	return
	}
	# grab the credentials from Environment Variables
	username <- Sys.getenv("SF_USERNAME") # "your salesforce username"
	# only evaluate w/ vintages that have come to term
	df.term <- subset(df, year_issued < 2012)
	df.term$home_ownership <- factor(df.term$home_ownership)
	df.term$is_rent <- df.term$home_ownership=="RENT"
	df.term$fico_range <- factor(df.term$fico_range)
	df.term$fico_ordered <- as.numeric(df.term$fico_range)

	idx <- runif(nrow(df.term)) > 0.75
	train <- df.term[idx==FALSE,]
	test <- df.term[idx==TRUE,]