João Paulo Nogueira joaopcnogueira

## backward_elimination.py
import numpy as np
import statsmodels.formula.api as sm
def backward_elimination(X, y, sl):
    """
    X: the data matrix with the independent variables (predictors)
    y: the matrix of the dependent variable (target)
    sl: statistical level, by default the user should add 0.05 (5%)
    """
    X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)
    while(True):

## how_to_build_regressive_features.R
# Creating a spine table with three columns ----
# customer_id: identifier of the customer, for which we are going to predict the next month sales
# year_month: reference date
# sales: the metric we want to predict
spine_tbl <- tibble(
  customer_id = c(rep("João", 24), rep("Denise", 24)),
  year_month = c( seq( ymd("2021-11-01"), ymd("2023-10-01"), by = '1 month' ), seq( ymd("2021-11-01"), ymd("2023-10-01"), by = '1 month' ) ),
  sales = sample(100:1000, 48, replace = TRUE)
)

## backward_elimination2.py
import numpy as np
import statsmodels.formula.api as sm
def backward_elimination2(X, y, sl):
    """
    X: the data matrix with the independent variables (predictors)
    y: the matrix of the dependent variable (target)
    sl: statistical level, by default the user should add 0.05 (5%)
    """
    X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)


## lpsolve.R
# Import lpSolve package
library(lpSolve)

#
# Set up the problem: maximize
# z = 2*x1 + 11*x2 subject to
# 2*x1 + 2*x2 <= 20
#   x1 + 2*x2 <= 12
# 3*x1 + 4*x2 <= 36
#   x1        <= 5

## python_project.py
# Work Directory (/home/user/python_project/):
# - data
# - data/employee.csv
# - src
#

import os

# WORK_DIR="/home/user/python_project/"
WORK_DIR = os.getcwd()

## target_mean_encoder.R
library(dplyr)

# creating a toy dataset
data = tibble(vehicle = c("car", "bus", "bike", "bus", "car", "bike"),
              target = c(23,34,56,78,33,65))

# print dataframe
data

# OUTPUT

## condaenv.txt
# For Windows users# Note: <> denotes changes to be made

#Create a conda environment
conda create --name <environment-name> python=<version:2.7/3.5>

#To create a requirements.txt file:
conda list #Gives you list of packages used for the environment

conda list -e > requirements.txt #Save all the info about packages to your folder

## custom_groupby_functions.R
library(tidyverse)

# toy dataset
df <- tibble(
  clientes = c('joao', 'joao', 'joao', 'lucas', 'lucas', 'julia', 'julia', 'julia', 'julia'),
  produtos = c('celular', 'notebook', 'livro', 'bola', 'carro', 'chapéu', 'moto', 'moto', 'caneta')
)

# função customizada
get_produtos <- function(produtos){

## custom_groupby_functions.py
"""
Defining a custom function to be applied in pandas groupby
"""

import numpy as np
import pandas as pd

clients = ['joao', 'joao', 'joao', 'lucas', 'lucas', 'julia', 'julia', 'julia', 'julia']
products = ['smartphone', 'notebook', 'book', 'ball', 'car', 'hat', 'bike', 'mouse', 'pen']

## titanic-pipeline4.py
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from category_encoders import OneHotEncoder
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.compose import ColumnTransformer
	import numpy as np
	import statsmodels.formula.api as sm
	def backward_elimination(X, y, sl):
	"""
	X: the data matrix with the independent variables (predictors)
	y: the matrix of the dependent variable (target)
	sl: statistical level, by default the user should add 0.05 (5%)
	"""
	X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)
	while(True):
	# Creating a spine table with three columns ----
	# customer_id: identifier of the customer, for which we are going to predict the next month sales
	# year_month: reference date
	# sales: the metric we want to predict
	spine_tbl <- tibble(
	customer_id = c(rep("João", 24), rep("Denise", 24)),
	year_month = c( seq( ymd("2021-11-01"), ymd("2023-10-01"), by = '1 month' ), seq( ymd("2021-11-01"), ymd("2023-10-01"), by = '1 month' ) ),
	sales = sample(100:1000, 48, replace = TRUE)
	)
	import numpy as np
	import statsmodels.formula.api as sm
	def backward_elimination2(X, y, sl):
	"""
	X: the data matrix with the independent variables (predictors)
	y: the matrix of the dependent variable (target)
	sl: statistical level, by default the user should add 0.05 (5%)
	"""
	X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)
	# Import lpSolve package
	library(lpSolve)

	#
	# Set up the problem: maximize
	# z = 2x1 + 11x2 subject to
	# 2x1 + 2x2 <= 20
	# x1 + 2*x2 <= 12
	# 3x1 + 4x2 <= 36
	# x1 <= 5
	# Work Directory (/home/user/python_project/):
	# - data
	# - data/employee.csv
	# - src
	#

	import os

	# WORK_DIR="/home/user/python_project/"
	WORK_DIR = os.getcwd()
	library(dplyr)

	# creating a toy dataset
	data = tibble(vehicle = c("car", "bus", "bike", "bus", "car", "bike"),
	target = c(23,34,56,78,33,65))

	# print dataframe
	data

	# OUTPUT
	# For Windows users# Note: <> denotes changes to be made

	#Create a conda environment
	conda create --name <environment-name> python=<version:2.7/3.5>

	#To create a requirements.txt file:
	conda list #Gives you list of packages used for the environment

	conda list -e > requirements.txt #Save all the info about packages to your folder
	library(tidyverse)

	# toy dataset
	df <- tibble(
	clientes = c('joao', 'joao', 'joao', 'lucas', 'lucas', 'julia', 'julia', 'julia', 'julia'),
	produtos = c('celular', 'notebook', 'livro', 'bola', 'carro', 'chapéu', 'moto', 'moto', 'caneta')
	)

	# função customizada
	get_produtos <- function(produtos){
	"""
	Defining a custom function to be applied in pandas groupby
	"""

	import numpy as np
	import pandas as pd

	clients = ['joao', 'joao', 'joao', 'lucas', 'lucas', 'julia', 'julia', 'julia', 'julia']
	products = ['smartphone', 'notebook', 'book', 'ball', 'car', 'hat', 'bike', 'mouse', 'pen']
	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.impute import SimpleImputer
	from category_encoders import OneHotEncoder
	from sklearn.model_selection import KFold
	from sklearn.model_selection import cross_validate
	from sklearn.model_selection import GridSearchCV
	from sklearn.compose import ColumnTransformer