João Paulo Nogueira joaopcnogueira

## how_to_build_regressive_features.R
# Creating a spine table with three columns ----
# customer_id: identifier of the customer, for which we are going to predict the next month sales
# year_month: reference date
# sales: the metric we want to predict
spine_tbl <- tibble(
  customer_id = c(rep("João", 24), rep("Denise", 24)),
  year_month = c( seq( ymd("2021-11-01"), ymd("2023-10-01"), by = '1 month' ), seq( ymd("2021-11-01"), ymd("2023-10-01"), by = '1 month' ) ),
  sales = sample(100:1000, 48, replace = TRUE)
)

## lpsolve.R
# Import lpSolve package
library(lpSolve)

#
# Set up the problem: maximize
# z = 2*x1 + 11*x2 subject to
# 2*x1 + 2*x2 <= 20
#   x1 + 2*x2 <= 12
# 3*x1 + 4*x2 <= 36
#   x1        <= 5

## python_project.py
# Work Directory (/home/user/python_project/):
# - data
# - data/employee.csv
# - src
#

import os

# WORK_DIR="/home/user/python_project/"
WORK_DIR = os.getcwd()

## condaenv.txt
# For Windows users# Note: <> denotes changes to be made

#Create a conda environment
conda create --name <environment-name> python=<version:2.7/3.5>

#To create a requirements.txt file:
conda list #Gives you list of packages used for the environment

conda list -e > requirements.txt #Save all the info about packages to your folder

## target_mean_encoder.R
library(dplyr)

# creating a toy dataset
data = tibble(vehicle = c("car", "bus", "bike", "bus", "car", "bike"),
              target = c(23,34,56,78,33,65))

# print dataframe
data

# OUTPUT

## titanic-pipeline4.py
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from category_encoders import OneHotEncoder
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.compose import ColumnTransformer

## titanic-pipeline3.py
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from category_encoders import OneHotEncoder
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV

## titanic-pipeline2.py
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from category_encoders import OneHotEncoder
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate

# lendo o dataset

## titanic-pipeline.py
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from category_encoders import OneHotEncoder

# lendo o dataset
df = pd.read_csv("train.csv")

## groupkfold_example.py
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GroupKFold

# Loading the data
iris = datasets.load_iris()
design_matrix = np.concatenate((iris['data'], iris['target'].reshape(150,1)), axis=1)
	# Creating a spine table with three columns ----
	# customer_id: identifier of the customer, for which we are going to predict the next month sales
	# year_month: reference date
	# sales: the metric we want to predict
	spine_tbl <- tibble(
	customer_id = c(rep("João", 24), rep("Denise", 24)),
	year_month = c( seq( ymd("2021-11-01"), ymd("2023-10-01"), by = '1 month' ), seq( ymd("2021-11-01"), ymd("2023-10-01"), by = '1 month' ) ),
	sales = sample(100:1000, 48, replace = TRUE)
	)
	# Import lpSolve package
	library(lpSolve)

	#
	# Set up the problem: maximize
	# z = 2x1 + 11x2 subject to
	# 2x1 + 2x2 <= 20
	# x1 + 2*x2 <= 12
	# 3x1 + 4x2 <= 36
	# x1 <= 5
	# Work Directory (/home/user/python_project/):
	# - data
	# - data/employee.csv
	# - src
	#

	import os

	# WORK_DIR="/home/user/python_project/"
	WORK_DIR = os.getcwd()
	# For Windows users# Note: <> denotes changes to be made

	#Create a conda environment
	conda create --name <environment-name> python=<version:2.7/3.5>

	#To create a requirements.txt file:
	conda list #Gives you list of packages used for the environment

	conda list -e > requirements.txt #Save all the info about packages to your folder
	library(dplyr)

	# creating a toy dataset
	data = tibble(vehicle = c("car", "bus", "bike", "bus", "car", "bike"),
	target = c(23,34,56,78,33,65))

	# print dataframe
	data

	# OUTPUT
	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.impute import SimpleImputer
	from category_encoders import OneHotEncoder
	from sklearn.model_selection import KFold
	from sklearn.model_selection import cross_validate
	from sklearn.model_selection import GridSearchCV
	from sklearn.compose import ColumnTransformer
	import numpy as np
	import pandas as pd
	from sklearn import datasets
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.metrics import accuracy_score
	from sklearn.model_selection import GroupKFold

	# Loading the data
	iris = datasets.load_iris()
	design_matrix = np.concatenate((iris['data'], iris['target'].reshape(150,1)), axis=1)