This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#EXAMPLE USING TITANIC DATASET | |
# Create a groupby object: by_sex_class | |
by_sex_class = titanic.groupby(['sex', 'pclass']) | |
# Write a function that imputes median | |
def impute_median(series): | |
return series.fillna(series.median()) | |
# Impute age and assign to titanic['age'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ecdf(data): | |
"""Compute ECDF for a one-dimensional array of measurements.""" | |
# Number of data points: n | |
n = len(data) | |
# x-data for the ECDF: x | |
x = np.sort(data) | |
# y-data for the ECDF: y | |
y = np.arange(1, n+1) / n |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def bootstrap_replicate_1d(data, func): | |
return func(np.random.choice(data, size=len(data))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def draw_bs_reps(data, func, size=1): | |
"""Draw bootstrap replicates.""" | |
# Initialize array of replicates: bs_replicates | |
bs_replicates = np.empty(size) | |
# Generate replicates | |
for i in range(size): | |
bs_replicates[i] = bootstrap_replicate_1d(data, func) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary modules | |
from sklearn.linear_model import LinearRegression | |
from sklearn.metrics import mean_squared_error | |
from sklearn.model_selection import train_test_split | |
# Create training and test sets | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42) | |
# Create the regressor: reg_all | |
reg_all = LinearRegression() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary modules | |
from scipy.stats import randint | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.model_selection import RandomizedSearchCV | |
# Setup the parameters and distributions to sample from: param_dist | |
param_dist = {"max_depth": [3, None], | |
"max_features": randint(1, 9), | |
"min_samples_leaf": randint(1, 9), | |
"criterion": ["gini", "entropy"]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the Imputer module | |
from sklearn.preprocessing import Imputer | |
from sklearn.svm import SVC | |
# Setup the Imputation transformer: imp | |
imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=0) | |
# Instantiate the SVC classifier: clf | |
clf = SVC() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from warnings import warn | |
import numpy as np | |
import pandas as pd | |
def multilabel_sample(y, size=1000, min_count=5, seed=None): | |
""" Takes a matrix of binary labels `y` and returns | |
the indices for a sample of size `size` if | |
`size` > 1 or `size` * len(y) if size =< 1. | |
The sample is guaranteed to have > `min_count` of |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import networkx as nx | |
def path_exists(G, node1, node2): | |
""" | |
This function checks whether a path exists between two nodes (node1, node2) in graph G. | |
""" | |
visited_nodes = set() | |
queue = [node1] | |
for node in queue: | |
neighbors = G.neighbors(node) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import random | |
filename = "data.csv" | |
n = sum(1 for line in open(filename)) - 1 #numero de linhas no arquivo | |
s = 10000 #numero de amostras desejado (linhas do dataset que se quer) | |
skip = sorted(random.sample(range(1,n+1),n-s)) #the 0-indexed header will not be included in the skip list | |
df = pd.read_csv(filename, skiprows=skip) |
OlderNewer