I hereby claim:
- I am ledell on github.
- I am ledell (https://keybase.io/ledell) on keybase.
- I have a public key ASCjbi8x4k_o5EN-N0Vu1Ns_UnyXr3hwC2W4Lr7xrR4djQo
To claim this, I am signing this object:
# library(devtools) | |
# devtools::install_github("rladies/meetupr") | |
library(meetupr) | |
# Log in to meetup.com, your key is here: https://secure.meetup.com/meetup_api/key/ | |
api_key <- "YOUR_API_KEY_HERE" | |
# `urlname` is a human-readable unique id for a meetup, e.g. https://www.meetup.com/R-Ladies-Budapest/ | |
urlname <- "R-Ladies-Budapest" |
I hereby claim:
To claim this, I am signing this object:
# https://ropensci.org/tutorials/arxiv_tutorial.html | |
install.packages("aRxiv") | |
library(aRxiv) | |
library(stringr) | |
library(ggplot2) | |
# Query arxiv: 6892 results including "Deep"; 49 DeepThings (Sept 21, 2017) | |
df <- arxiv_search('ti:"Deep"', batchsize = 1000, limit = 100000) | |
titles <- grep(pattern = "Deep[[:upper:]][[:lower:]]+", |
# H2O's K-Means algo can estimate the optimal number of clusters (method by Leland Wilkinson) | |
# http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/k-means.html#estimating-k-in-k-means | |
# | |
# This demo is an extension of Kasia's blog post here: | |
# https://kkulma.github.io/2017-04-24-determining-optimal-number-of-clusters-in-your-data/ | |
library(rattle) # wine data | |
# Remove the factor col & convert to an H2O Frame | |
# Note: You can skip the scale() here since H2O K-Means standardizes automatically |
library(h2o) | |
h2o.init() | |
# Load the HIGGS dataset | |
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv") | |
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv") | |
y <- "response" | |
x <- setdiff(names(train), y) | |
family <- "binomial" |
# Set API Key | |
Sys.setenv(SIGOPT_API_TOKEN="HERE") | |
# Start a local H2O cluster for training models | |
library(h2o) | |
h2o.init(nthreads = -1) | |
# Load a dataset | |
data(iris) | |
y <- "Species" |
library(h2o) | |
h2o.init(nthreads = -1) | |
# Import a sample binary outcome train/test set into H2O | |
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv") | |
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv") | |
# Identify predictors and response | |
y <- "response" | |
x <- setdiff(names(train), y) |
import h2o | |
from h2o.estimators.random_forest import H2ORandomForestEstimator | |
from h2o.estimators.gbm import H2OGradientBoostingEstimator | |
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator | |
from h2o.grid.grid_search import H2OGridSearch | |
from __future__ import print_function | |
h2o.init(nthreads=-1) |
# This example is outdated because we have the H2O Stacked Ensemble function now (so it's better to use that): | |
# http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html | |
import h2o | |
from h2o.estimators.gbm import H2OGradientBoostingEstimator | |
from h2o.estimators.deeplearning import H2ODeepLearningEstimator | |
from h2o.estimators.glm import H2OGeneralizedLinearEstimator | |
from h2o.estimators.random_forest import H2ORandomForestEstimator | |
from sklearn import metrics #will be replaced with ensemble_performance later |
import h2o | |
from h2o.estimators.glm import H2OGeneralizedLinearEstimator | |
def make_Z(models): | |
''' | |
Takes a list of models and creates level-one data | |
''' |