Ed Berry eddjberry

## round_nearest.R
# function to round a value to the nearest digit
# e.g. if nearest = 5 then 42 would round to 40
# and 47 would be rounded to 45
# source: http://r.789695.n4.nabble.com/Rounding-to-the-nearest-5-td863189.html

round_nearest <- function(x, nearest) {
  nearest * round(x / nearest)
}

## group_prop.R
group_prop <- function(df, ...) {
  # enquo the dots
  vars <- enquos(...)

  # count then calculate
  # proportions
  df_count <- df %>%
    count(!!!vars)

  if (length(vars) > 1) {

## str_proper.R
# a function to format strings
# to be in Proper case
str_proper <- function(string) {
  # get the first letter
  first_letter = substring(string, first = 1, last = 1)

  # get the other letters
  other_letters = substring(string, first = 2)

  # combine the first letter (upper case)

## tibble_select_column.R
# create a tibble----------------------
tbl <- tibble::tibble(x = letters[1:5],
                      y = letters[5:1])

# returns a tibble --------------------
dplyr::select(tbl, x)

tbl[1]

tbl[, 1]

## show_palette_cols.R
library(scales)
library(viridis)

show_col(viridis(12))

## filter_at_remove_nas.R
# create some data
(df <- data_frame(x = 1:2,
                  y = c(NA, NA),
                  z = c(NA, 3)))

# remove rows where either col y or z contain NA
# i.e. keep rows where all variables are not NA
df %>%
  filter_at(vars(y:z), all_vars(!is.na(.)))

## split_df_csv.R
library(tidyverse)

# Nest iris by Species
iris_nest <- iris %>%
  group_by(Species) %>%
  nest()

# Get the data list and set the names of the list to Species
# write_csv for each df in the data list with its name as the filename
iris_nest %>%

## sim_binom.R
sim_binom <- function(n_samples = 1000, n_features = 2,
                      true_target_prob = 0.5, beta = NULL, seed = NULL) {

  if(!is.null(seed)) {
    set.seed(seed)
  }

  x = matrix(rnorm(n_samples * n_features),
             nrow = n_samples, ncol = n_features)

## sparklyr_cv_pipeline_example.R
# Load packages
library(dplyr)
library(sparklyr)

# Set up connect
sc <- spark_connect(master = "local")

# Create a Spark DataFrame of mtcars
mtcars_sdf <- copy_to(sc, mtcars)
	# function to round a value to the nearest digit
	# e.g. if nearest = 5 then 42 would round to 40
	# and 47 would be rounded to 45
	# source: http://r.789695.n4.nabble.com/Rounding-to-the-nearest-5-td863189.html

	round_nearest <- function(x, nearest) {
	nearest * round(x / nearest)
	}
	group_prop <- function(df, ...) {
	# enquo the dots
	vars <- enquos(...)

	# count then calculate
	# proportions
	df_count <- df %>%
	count(!!!vars)

	if (length(vars) > 1) {
	# a function to format strings
	# to be in Proper case
	str_proper <- function(string) {
	# get the first letter
	first_letter = substring(string, first = 1, last = 1)

	# get the other letters
	other_letters = substring(string, first = 2)

	# combine the first letter (upper case)
	# create a tibble----------------------
	tbl <- tibble::tibble(x = letters[1:5],
	y = letters[5:1])

	# returns a tibble --------------------
	dplyr::select(tbl, x)

	tbl[1]

	tbl[, 1]
	# create some data
	(df <- data_frame(x = 1:2,
	y = c(NA, NA),
	z = c(NA, 3)))

	# remove rows where either col y or z contain NA
	# i.e. keep rows where all variables are not NA
	df %>%
	filter_at(vars(y:z), all_vars(!is.na(.)))
	library(tidyverse)

	# Nest iris by Species
	iris_nest <- iris %>%
	group_by(Species) %>%
	nest()

	# Get the data list and set the names of the list to Species
	# write_csv for each df in the data list with its name as the filename
	iris_nest %>%
	sim_binom <- function(n_samples = 1000, n_features = 2,
	true_target_prob = 0.5, beta = NULL, seed = NULL) {

	if(!is.null(seed)) {
	set.seed(seed)
	}

	x = matrix(rnorm(n_samples * n_features),
	nrow = n_samples, ncol = n_features)
	# Load packages
	library(dplyr)
	library(sparklyr)

	# Set up connect
	sc <- spark_connect(master = "local")

	# Create a Spark DataFrame of mtcars
	mtcars_sdf <- copy_to(sc, mtcars)