Erin LeDell ledell

## meetupr_rladies_budapest_random.R
# library(devtools)
# devtools::install_github("rladies/meetupr")
library(meetupr)

# Log in to meetup.com, your key is here: https://secure.meetup.com/meetup_api/key/
api_key <- "YOUR_API_KEY_HERE"

# `urlname` is a human-readable unique id for a meetup, e.g. https://www.meetup.com/R-Ladies-Budapest/
urlname <- "R-Ladies-Budapest"

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ledell
                / keybase.md
            
            
              Created
              October 26, 2017 03:01
            
              
                keybase.md
              
          
    Keybase proof

I hereby claim:

I am ledell on github.
I am ledell (https://keybase.io/ledell) on keybase.
I have a public key ASCjbi8x4k_o5EN-N0Vu1Ns_UnyXr3hwC2W4Lr7xrR4djQo

To claim this, I am signing this object:

  
## DeepThings_on_aRxiv.R
# https://ropensci.org/tutorials/arxiv_tutorial.html
install.packages("aRxiv")

library(aRxiv)
library(stringr)
library(ggplot2)

# Query arxiv: 6892 results including "Deep"; 49 DeepThings (Sept 21, 2017)
df <- arxiv_search('ti:"Deep"', batchsize = 1000, limit = 100000)
titles <- grep(pattern = "Deep[[:upper:]][[:lower:]]+",

## h2o_kmeans_estimate_k_wine.R
# H2O's K-Means algo can estimate the optimal number of clusters (method by Leland Wilkinson)
# http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/k-means.html#estimating-k-in-k-means
#
# This demo is an extension of Kasia's blog post here:
# https://kkulma.github.io/2017-04-24-determining-optimal-number-of-clusters-in-your-data/

library(rattle)  # wine data

# Remove the factor col & convert to an H2O Frame
# Note: You can skip the scale() here since H2O K-Means standardizes automatically

## h2o_xgboost_grid_example.R
library(h2o)
h2o.init()


# Load the HIGGS dataset
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")
y <- "response"
x <- setdiff(names(train), y)
family <- "binomial"

## h2o_rf_sigopt_demo_iris.R
# Set API Key
Sys.setenv(SIGOPT_API_TOKEN="HERE")

# Start a local H2O cluster for training models
library(h2o)
h2o.init(nthreads = -1)

# Load a dataset
data(iris)
y <- "Species"

## h2o-stacked-ensemble-demo.R
library(h2o)
h2o.init(nthreads = -1)

# Import a sample binary outcome train/test set into H2O
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")

# Identify predictors and response
y <- "response"
x <- setdiff(names(train), y)

## h2o-stacked-ensemble-demo.py
import h2o
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
from h2o.grid.grid_search import H2OGridSearch
from __future__ import print_function


h2o.init(nthreads=-1)

## h2o_stacking_example.py
# This example is outdated because we have the H2O Stacked Ensemble function now (so it's better to use that):
# http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html

import h2o
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.estimators.random_forest import H2ORandomForestEstimator
from sklearn import metrics  #will be replaced with ensemble_performance later

## stack.py
import h2o
from h2o.estimators.glm import H2OGeneralizedLinearEstimator


def make_Z(models):
	'''
	Takes a list of models and creates level-one data
	'''
	# library(devtools)
	# devtools::install_github("rladies/meetupr")
	library(meetupr)

	# Log in to meetup.com, your key is here: https://secure.meetup.com/meetup_api/key/
	api_key <- "YOUR_API_KEY_HERE"

	# `urlname` is a human-readable unique id for a meetup, e.g. https://www.meetup.com/R-Ladies-Budapest/
	urlname <- "R-Ladies-Budapest"
	# https://ropensci.org/tutorials/arxiv_tutorial.html
	install.packages("aRxiv")

	library(aRxiv)
	library(stringr)
	library(ggplot2)

	# Query arxiv: 6892 results including "Deep"; 49 DeepThings (Sept 21, 2017)
	df <- arxiv_search('ti:"Deep"', batchsize = 1000, limit = 100000)
	titles <- grep(pattern = "Deep[[:upper:]][[:lower:]]+",
	# H2O's K-Means algo can estimate the optimal number of clusters (method by Leland Wilkinson)
	# http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/k-means.html#estimating-k-in-k-means
	#
	# This demo is an extension of Kasia's blog post here:
	# https://kkulma.github.io/2017-04-24-determining-optimal-number-of-clusters-in-your-data/

	library(rattle) # wine data

	# Remove the factor col & convert to an H2O Frame
	# Note: You can skip the scale() here since H2O K-Means standardizes automatically
	library(h2o)
	h2o.init()


	# Load the HIGGS dataset
	train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
	test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")
	y <- "response"
	x <- setdiff(names(train), y)
	family <- "binomial"
	# Set API Key
	Sys.setenv(SIGOPT_API_TOKEN="HERE")

	# Start a local H2O cluster for training models
	library(h2o)
	h2o.init(nthreads = -1)

	# Load a dataset
	data(iris)
	y <- "Species"
	library(h2o)
	h2o.init(nthreads = -1)

	# Import a sample binary outcome train/test set into H2O
	train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
	test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")

	# Identify predictors and response
	y <- "response"
	x <- setdiff(names(train), y)
	import h2o
	from h2o.estimators.random_forest import H2ORandomForestEstimator
	from h2o.estimators.gbm import H2OGradientBoostingEstimator
	from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
	from h2o.grid.grid_search import H2OGridSearch
	from __future__ import print_function


	h2o.init(nthreads=-1)
	# This example is outdated because we have the H2O Stacked Ensemble function now (so it's better to use that):
	# http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html

	import h2o
	from h2o.estimators.gbm import H2OGradientBoostingEstimator
	from h2o.estimators.deeplearning import H2ODeepLearningEstimator
	from h2o.estimators.glm import H2OGeneralizedLinearEstimator
	from h2o.estimators.random_forest import H2ORandomForestEstimator
	from sklearn import metrics #will be replaced with ensemble_performance later
	import h2o
	from h2o.estimators.glm import H2OGeneralizedLinearEstimator



	def make_Z(models):
	'''
	Takes a list of models and creates level-one data
	'''