Skip to content

Instantly share code, notes, and snippets.

View ledell's full-sized avatar
💭
Check out H2O AutoML: http://docs.h2o.ai/h2o/latest-stable/h2o-docs/automl.html

Erin LeDell ledell

💭
Check out H2O AutoML: http://docs.h2o.ai/h2o/latest-stable/h2o-docs/automl.html
View GitHub Profile
@ledell
ledell / kaggledays-sf_h2o_automl_6000.R
Last active March 3, 2022 03:02
KaggleDays SF: H2O AutoML solution
### Kaggle Days SF: Hackathon submission (8th place)
# I used the latest version of H2O (3.24.0.1)
# Latest stable always here: http://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
# H2O 3.24.0.1: http://h2o-release.s3.amazonaws.com/h2o/rel-yates/1/index.html
# If you are a Python user, you can use the demo Python code available on the H2O AutoML User Guide
# instead: http://docs.h2o.ai/h2o/latest-stable/h2o-docs/automl.html
# Unfortunately it was a private competition, so the data is not publicly available!
@ledell
ledell / DeepThings_on_aRxiv.R
Last active February 2, 2021 21:13
A list of papers on arxiv.org with the over-hyped Deep* prefix in the title.
# https://ropensci.org/tutorials/arxiv_tutorial.html
install.packages("aRxiv")
library(aRxiv)
library(stringr)
library(ggplot2)
# Query arxiv: 6892 results including "Deep"; 49 DeepThings (Sept 21, 2017)
df <- arxiv_search('ti:"Deep"', batchsize = 1000, limit = 100000)
titles <- grep(pattern = "Deep[[:upper:]][[:lower:]]+",
@ledell
ledell / covid_meetups_rladies_wimlds.R
Created December 17, 2020 05:24
Count the number of R-Ladies & WiMLDS meetups since COVID quarantine started
# Count the number of covid meetups for R-Ladies and WiMLDS
library(meetupr)
library(tidyverse)
# Look up all R-Ladies & WiMLDS groups by "topic id" & count the events.
# You can find topic ids for associated tags by querying
# [here](https://secure.meetup.com/meetup_api/console/?path=/find/topics).
# The `topic_id` for topic, "R-Ladies", is 1513883.
# The `topic_id` for topic, "WiMLDS", is 1517030.
@ledell
ledell / covid_meetups.R
Last active December 17, 2020 05:23
Count the number of meetups for a group since COVID quarantine started
# Meetups since quarantine started (feel free to adjust the date to your local lockdown date)
library(meetupr)
library(tidyverse)
meetup_urlname <- "Bay-Area-Women-in-Machine-Learning-and-Data-Science" #insert your meetup urlnamne here
events <- get_events(urlname = meetup_urlname,
event_status = "past")
events %>%
import h2o
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
from h2o.grid.grid_search import H2OGridSearch
from __future__ import print_function
h2o.init(nthreads=-1)
library(h2o)
h2o.init()
# Load the HIGGS dataset
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")
y <- "response"
x <- setdiff(names(train), y)
family <- "binomial"
# This example is outdated because we have the H2O Stacked Ensemble function now (so it's better to use that):
# http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html
import h2o
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.estimators.random_forest import H2ORandomForestEstimator
from sklearn import metrics #will be replaced with ensemble_performance later
@ledell
ledell / install_latest_h2o.R
Created June 17, 2020 23:24
Install the latest H2O R package (latest stable version is sometimes head of what's on CRAN)
install.packages("h2o", repos="http://h2o-release.s3.amazonaws.com/h2o/latest_stable_R", method="curl")
@ledell
ledell / h2o_automl_mushroom_classification.py
Last active June 17, 2020 12:52
H2O AutoML - Mushroom classfication
# My version of the code at this blog post:
# https://towardsdatascience.com/automl-a-tool-to-improve-your-workflow-1a132248371f
import h2o
from h2o.automl import H2OAutoML
h2o.init()
train = h2o.import_file("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")
y = "C1" #e = edible, p = poisonous
@ledell
ledell / h2oautoml_saveload.R
Last active May 17, 2020 09:46
R function to save and load H2O AutoML projects (models & leaderboards)
library(R.utils)
# Note: For saving H2O AutoML objects, if path is NULL (default),
# then save in pwd with project_name as folder name
# This function (or something similar to it) will be part of H2O soon...
# Written by: https://github.com/tomasfryda
.dump_aml_frames <- function(aml, path) {
frames <- c(attr(aml@leaderboard, "id"), attr(aml@event_log, "id"))
frames <- c(frames,
unlist(sapply(aml@leaderboard$model_id, function(model_id)