This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define hyperparameter spaces and search criteria for random grid search | |
GLM_params1 <- list(alpha = seq(0,1,.05),lambda = 10^seq(-7,3,0.5)) # array of values for both alpha and lambda | |
GLM_searchCriteria1 <- list(strategy = "RandomDiscrete", max_runtime_secs = 300) # randomly choose combinations, train no longer than 5mins | |
# Train models using the parameters above until the runtime limit is reached | |
# Save the models and their prediction scores on the validation frame in a grid, for access later | |
GLMgrid <- h2o.grid(algorithm = "glm", grid_id = "GLM_grid1", x = x, | |
y = y, training_frame = train, validation_frame = valid, | |
hyper_params = GLM_params1, search_criteria = GLM_searchCriteria1, seed = 1234) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Initiate remote h2o cluster (receives and processes dataset) | |
# No modeling is done locally - an address key is saved to reference the remote version | |
h2o.init(nthreads = -1) | |
# Prepare h2o inputs for modeling | |
recs.reduced2.h2o <- as.h2o(recs.reduced2) # Coerce DF to an h2o object | |
set.seed(0) # For reproducibility of train/test split | |
# Split h2o data into training, validation, and test frames | |
data.split <- h2o.splitFrame(recs.reduced2.h2o,ratios = c(.7,.2)) | |
train <- data.split[[1]] # For training | |
valid <- data.split[[2]] # For validating trained models and comparing different hyperparameter vectors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy import Spider | |
from scrapy.spiders import CrawlSpider , Rule | |
from scrapy.linkextractors import LinkExtractor | |
from scrapy.http import Request | |
from scrapy.selector import Selector | |
from cleantechScrapy.items import GTMArticleItem | |
from cleantechScrapy.items import Cleantech100Item | |
##### Crawl Spider to scrape GTM articles ##### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Use Reshape2 to melt exercise data from "wide" to "long" format | |
# Perform left_join on sequence ID to combine exercise information with demographic | |
exerciseMins <- melt(data = exercise,id.vars = "seqID", | |
measure.vars = c('minsVigWork','minsModWork','minsWalkBike','minsVigRec','minsModRec'), | |
variable.name = 'exercise.type',value.name = 'mins.per.day') %>% | |
left_join(demographics,by='seqID') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd('./tables') | |
# Loop through directory with all input tables as CSV's | |
# Save as dataframes in R environment for shiny use | |
for (i in 1:length(dir())){ | |
tablename <- unlist(strsplit(dir()[i],'.csv'))[1] | |
table <- read.csv(dir()[i],stringsAsFactors = F,colClasses = c(seqID='character')) | |
assign(tablename,table) | |
} |