ivopbernardo

## caret_examples.r
# caret library example used in blogpost:
# https://towardsdatascience.com/a-guide-to-using-caret-in-r-71dec0bda208

library(caTools)
library(caret)

# Train Test Split on both Iris and Mtcars

train_test_split <- function(df) {
  set.seed(42)

## randomforests.r
# Training a Random Forest in R - used in blog post:
# https://towardsdatascience.com/data-science-tutorials-training-a-random-forest-in-r-a883cc1bacd1

library(dplyr)
library(randomForest)
library(ranger)
library(Metrics)

# Load london bike csv
london_bike <- read.csv('./london_merged.csv')

## xgboostr.r
# Training an XGBoost in R - used in blog post:
# https://towardsdatascience.com/data-science-tutorials-training-an-xgboost-using-r-cf3c00b1425

library(dplyr)
library(xgboost)
library(Metrics)
library(ggplot2)

# Load london bike csv
london_bike <- read.csv('./london_merged.csv')

## decisiontree.R
# Training a decision tree in R - used in blog post:
# https://medium.com/codex/data-science-tutorials-training-a-decision-tree-using-r-d6266936d86

library(dplyr)
library(rpart)
library(rpart.plot)
library(caret)
library(Metrics)
library(ggplot2)

## nltk_intro.py
# Getting started with NLTK scripts - used in blog post:
# https://towardsdatascience.com/getting-started-with-nltk-eb4ed6eb7a37

from nltk import tokenize

python_wiki = '''
Python is a high-level, interpreted, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.
Python is dynamically-typed and garbage-collected. It supports multiple programming paradigms, including structured (particularly procedural), object-oriented and functional programming. It is often described as a "batteries included" language due to its comprehensive standard library.
Guido van Rossum began working on Python in the late 1980s as a successor to the ABC programming language and first released it in 1991 as Python 0.9.0.[33] Python 2.0 was released in 2000 and introduced new features such as list comprehensions, cycle-detecting garbage collection, reference counting, and Unicode support. Python 3.0, released in 2008, was a major revision that is not completely

## dplyr.r
# dplyr library example used in blog post:
# https://towardsdatascience.com/8-cool-dplyr-function-to-learn-in-r-8736d7fa899c

library(dplyr)

starwars_df <- starwars

# Filter using Dplyr
filter_droids <- starwars %>%
  filter(species == 'Droid')

## mlr_hyperparam.r
# mlr library example clode - used in blog post:
# https://towardsdatascience.com/decision-tree-hyperparameter-tuning-in-r-using-mlr-3248bfd2d88c

titanic <- read.csv('train.csv')

library(dplyr)
library(rpart)
library(rpart.plot)
library(Metrics)
library(mlr)

## function_best_practices.r
# R Function Best Practices used in blog post:
# https://towardsdatascience.com/writing-better-r-functions-best-practices-and-tips-d48ef0691c24

library(ggplot2)

#----------------------------------#

# Function Indentation

# Proper Indentation - Bad Example

## h2o_example.r
# Load h2o
library(h2o)
library(ggplot2)

# Load Dataset - London Bike
london_bike <- read.csv('./london_merged.csv')

# Transforming Weather code and Season to factor
london_bike$weather_code <- as.factor(london_bike$weather_code)
london_bike$season <- as.factor(london_bike$season)

## reading_raster_data_and_getting_values.py
import rasterio
from rasterio.plot import show

url = "zip+file:data/mdt.zip!mdt.tif"
lisbon_elevation = rasterio.open(url)

# Plot the raster data to get a sense of it
show(lisbon_elevation, cmap="terrain")

# Get the elevation from the raster data
	# caret library example used in blogpost:
	# https://towardsdatascience.com/a-guide-to-using-caret-in-r-71dec0bda208

	library(caTools)
	library(caret)

	# Train Test Split on both Iris and Mtcars

	train_test_split <- function(df) {
	set.seed(42)
	# Training a Random Forest in R - used in blog post:
	# https://towardsdatascience.com/data-science-tutorials-training-a-random-forest-in-r-a883cc1bacd1

	library(dplyr)
	library(randomForest)
	library(ranger)
	library(Metrics)

	# Load london bike csv
	london_bike <- read.csv('./london_merged.csv')
	# Training an XGBoost in R - used in blog post:
	# https://towardsdatascience.com/data-science-tutorials-training-an-xgboost-using-r-cf3c00b1425

	library(dplyr)
	library(xgboost)
	library(Metrics)
	library(ggplot2)

	# Load london bike csv
	london_bike <- read.csv('./london_merged.csv')
	# Training a decision tree in R - used in blog post:
	# https://medium.com/codex/data-science-tutorials-training-a-decision-tree-using-r-d6266936d86

	library(dplyr)
	library(rpart)
	library(rpart.plot)
	library(caret)
	library(Metrics)
	library(ggplot2)
	# Getting started with NLTK scripts - used in blog post:
	# https://towardsdatascience.com/getting-started-with-nltk-eb4ed6eb7a37

	from nltk import tokenize

	python_wiki = '''
	Python is a high-level, interpreted, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.
	Python is dynamically-typed and garbage-collected. It supports multiple programming paradigms, including structured (particularly procedural), object-oriented and functional programming. It is often described as a "batteries included" language due to its comprehensive standard library.
	Guido van Rossum began working on Python in the late 1980s as a successor to the ABC programming language and first released it in 1991 as Python 0.9.0.[33] Python 2.0 was released in 2000 and introduced new features such as list comprehensions, cycle-detecting garbage collection, reference counting, and Unicode support. Python 3.0, released in 2008, was a major revision that is not completely
	# dplyr library example used in blog post:
	# https://towardsdatascience.com/8-cool-dplyr-function-to-learn-in-r-8736d7fa899c

	library(dplyr)

	starwars_df <- starwars

	# Filter using Dplyr
	filter_droids <- starwars %>%
	filter(species == 'Droid')
	# mlr library example clode - used in blog post:
	# https://towardsdatascience.com/decision-tree-hyperparameter-tuning-in-r-using-mlr-3248bfd2d88c

	titanic <- read.csv('train.csv')

	library(dplyr)
	library(rpart)
	library(rpart.plot)
	library(Metrics)
	library(mlr)
	# R Function Best Practices used in blog post:
	# https://towardsdatascience.com/writing-better-r-functions-best-practices-and-tips-d48ef0691c24

	library(ggplot2)

	#----------------------------------#

	# Function Indentation

	# Proper Indentation - Bad Example
	# Load h2o
	library(h2o)
	library(ggplot2)

	# Load Dataset - London Bike
	london_bike <- read.csv('./london_merged.csv')

	# Transforming Weather code and Season to factor
	london_bike$weather_code <- as.factor(london_bike$weather_code)
	london_bike$season <- as.factor(london_bike$season)
	import rasterio
	from rasterio.plot import show

	url = "zip+file:data/mdt.zip!mdt.tif"
	lisbon_elevation = rasterio.open(url)

	# Plot the raster data to get a sense of it
	show(lisbon_elevation, cmap="terrain")

	# Get the elevation from the raster data