Korkrid Kyle Akepanidtaworn korkridake

## rvest_bank_notes.R
# ---------------------------------------------------------
# Rvest in action: list of people on bank notes
# https://en.wikipedia.org/wiki/List_of_people_on_banknotes
# Author: Kyle Akepanidtaworn, Data Scientist
# https://www.linkedin.com/in/korkridakepan/
# © Copyright 2018, Kyle Akepanidtaworn, All rights reserved.
# ---------------------------------------------------------

# ---------------------------------------------------------
# Step 1: Import Library

## vector_analysis_20181112_10_22.R
# As inputs your function should take a vector of 0s and 1s; Every time you see a sequence of 1s in the data you need to increase the number of children by 1; Be careful with the two subsequent sequences of 1s, where the difference between them is less than 5 (i.e. when there are less than 5 0s in between them, then it is the same child and not a new child); To help you social planner provides some examples of what your function should return:

#Input: c(1,1,1,1,0,0,0,0)
#Output: 1 1 1 1 1 1 1 1

#Input: c(0,0,0,0,1,1,1,1,0,0,0,0,0,1,1,1)
#Output: 0 0 0 0 1 1 1 1 1 1 1 1 1 2 2 2

#Input: c(0,0,0,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1)
#Output: 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2

## rolling_average_using_groupby_and_varying_window_length_20181112_11_45.R
# Stackoverflow link: https://stackoverflow.com/questions/53267115/rolling-average-using-groupby-and-varying-window-length

# Question: I'm trying to create a rolling average of a column based on an ID column and a measurement time label in R, but I am having a lot of trouble with it.

# Here is what my dataframe looks like:

# ID Measurement Value
#
# A    1           10
#

## sample_tuple_file_treatment_20181113_10_02.R
library(readtext)

# mytab = readtext('E:/KorkridAke_GithubRepo/sample_tuple_file.txt')
# readtext object consisting of 1 document and 0 docvars.
# # data.frame [1 x 2]
# doc_id                text
# <chr>                 <chr>
#   1 sample_tuple_file.txt "\"(1,2), (1,\"..."
mytuple = strsplit(mytab$text, ', ')
mytuple = mytuple[[1]]

## ordering_columns_horizontal_barcharts_plotly_20181111.R
x1 <- c(62.7, 89.2) # answered yes; can change, as values come from another function
x2 <- sapply(x1, function(x) 100-x) # answered no
y <- c("Class1", "Class2")
data <- data.frame(y, x1, x2)
# data$y <- factor(data$y, levels = c("Class2", "Class1"))

library(plotly)

top_labels <- c('Yes', 'No')

## how_to_iterate_basetable_to_mastertable_20181114.py
# ------------------------------------------------
# ------------------------------------------------
# My Pythonic Implementation of Base-to-Master Searching
# Author: @Korkrid Akepanidtaworn
# ------------------------------------------------
# ------------------------------------------------

# ------------------------------------------------
# ------------------------------------------------
# Standard python library

## irkernel_installation.R
# -----------------------------------------------------------------------------------------------
# -----------------------------------------------------------------------------------------------
# 1/2) Installing via supplied binary packages (default on Windows + Mac OS X)
# Source: https://irkernel.github.io/installation/#binary-panel
# -----------------------------------------------------------------------------------------------
# -----------------------------------------------------------------------------------------------
install.packages(c('repr', 'IRdisplay', 'evaluate', 'crayon', 'pbdZMQ', 'devtools', 'uuid', 'digest'))
devtools::install_github('IRkernel/IRkernel')

IRkernel::installspec()

## irkernel_basic_compute.R
# ------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------
# How to Install Packages Through Jupyter Notebook
# https://stackoverflow.com/questions/42459423/cannot-install-r-packages-in-jupyter-notebook
# install.packages("tidyverse", repos='http://cran.us.r-project.org')
# install.packages("MASS", repos='http://cran.us.r-project.org')
# ------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------
library(dplyr)
library(ggplot2)

## databricks_ep_1.txt
# ---------------------------------------------------------------
# ---------------------------------------------------------------
# GLOBAL CONFIGURATIONS
# ---------------------------------------------------------------
# ---------------------------------------------------------------

# ---------------------------------------------------------------
# File location and type
# ---------------------------------------------------------------
file_location = "/FileStore/tables/Cars93.csv"

## combine_impute_and_nonimpute_20181116.R
# Stackoverflow: https://stackoverflow.com/questions/53338606/combine-imputed-and-non-imputed-data
# Combine imputed and non imputed data
id <- c(1,2,3,4,5,6,7,8,9,10)
age <- c(60,NA,90,55,60,61,77,67,88,90)
bmi <- c(30,NA,NA,23,24,NA,27,23,26,21)
time <- c(62,88,85,NA,68,62,89,62,70,99)
dat <- data.frame(id, age, bmi, time)
dat

id <- c(1,2,3,4,5,6,7,8,9,10)
	# ---------------------------------------------------------
	# Rvest in action: list of people on bank notes
	# https://en.wikipedia.org/wiki/List_of_people_on_banknotes
	# Author: Kyle Akepanidtaworn, Data Scientist
	# https://www.linkedin.com/in/korkridakepan/
	# © Copyright 2018, Kyle Akepanidtaworn, All rights reserved.
	# ---------------------------------------------------------

	# ---------------------------------------------------------
	# Step 1: Import Library
	# As inputs your function should take a vector of 0s and 1s; Every time you see a sequence of 1s in the data you need to increase the number of children by 1; Be careful with the two subsequent sequences of 1s, where the difference between them is less than 5 (i.e. when there are less than 5 0s in between them, then it is the same child and not a new child); To help you social planner provides some examples of what your function should return:

	#Input: c(1,1,1,1,0,0,0,0)
	#Output: 1 1 1 1 1 1 1 1

	#Input: c(0,0,0,0,1,1,1,1,0,0,0,0,0,1,1,1)
	#Output: 0 0 0 0 1 1 1 1 1 1 1 1 1 2 2 2

	#Input: c(0,0,0,0,1,1,1,1,0,0,1,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,1)
	#Output: 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2
	# Stackoverflow link: https://stackoverflow.com/questions/53267115/rolling-average-using-groupby-and-varying-window-length

	# Question: I'm trying to create a rolling average of a column based on an ID column and a measurement time label in R, but I am having a lot of trouble with it.

	# Here is what my dataframe looks like:

	# ID Measurement Value
	#
	# A 1 10
	#
	library(readtext)

	# mytab = readtext('E:/KorkridAke_GithubRepo/sample_tuple_file.txt')
	# readtext object consisting of 1 document and 0 docvars.
	# # data.frame [1 x 2]
	# doc_id text
	# <chr> <chr>
	# 1 sample_tuple_file.txt "\"(1,2), (1,\"..."
	mytuple = strsplit(mytab$text, ', ')
	mytuple = mytuple[[1]]
	x1 <- c(62.7, 89.2) # answered yes; can change, as values come from another function
	x2 <- sapply(x1, function(x) 100-x) # answered no
	y <- c("Class1", "Class2")
	data <- data.frame(y, x1, x2)
	# data$y <- factor(data$y, levels = c("Class2", "Class1"))

	library(plotly)

	top_labels <- c('Yes', 'No')
	# ------------------------------------------------
	# ------------------------------------------------
	# My Pythonic Implementation of Base-to-Master Searching
	# Author: @Korkrid Akepanidtaworn
	# ------------------------------------------------
	# ------------------------------------------------

	# ------------------------------------------------
	# ------------------------------------------------
	# Standard python library
	# -----------------------------------------------------------------------------------------------
	# -----------------------------------------------------------------------------------------------
	# 1/2) Installing via supplied binary packages (default on Windows + Mac OS X)
	# Source: https://irkernel.github.io/installation/#binary-panel
	# -----------------------------------------------------------------------------------------------
	# -----------------------------------------------------------------------------------------------
	install.packages(c('repr', 'IRdisplay', 'evaluate', 'crayon', 'pbdZMQ', 'devtools', 'uuid', 'digest'))
	devtools::install_github('IRkernel/IRkernel')

	IRkernel::installspec()
	# ------------------------------------------------------------------------------------------------
	# ------------------------------------------------------------------------------------------------
	# How to Install Packages Through Jupyter Notebook
	# https://stackoverflow.com/questions/42459423/cannot-install-r-packages-in-jupyter-notebook
	# install.packages("tidyverse", repos='http://cran.us.r-project.org')
	# install.packages("MASS", repos='http://cran.us.r-project.org')
	# ------------------------------------------------------------------------------------------------
	# ------------------------------------------------------------------------------------------------
	library(dplyr)
	library(ggplot2)
	# ---------------------------------------------------------------
	# ---------------------------------------------------------------
	# GLOBAL CONFIGURATIONS
	# ---------------------------------------------------------------
	# ---------------------------------------------------------------

	# ---------------------------------------------------------------
	# File location and type
	# ---------------------------------------------------------------
	file_location = "/FileStore/tables/Cars93.csv"
	# Stackoverflow: https://stackoverflow.com/questions/53338606/combine-imputed-and-non-imputed-data
	# Combine imputed and non imputed data
	id <- c(1,2,3,4,5,6,7,8,9,10)
	age <- c(60,NA,90,55,60,61,77,67,88,90)
	bmi <- c(30,NA,NA,23,24,NA,27,23,26,21)
	time <- c(62,88,85,NA,68,62,89,62,70,99)
	dat <- data.frame(id, age, bmi, time)
	dat

	id <- c(1,2,3,4,5,6,7,8,9,10)