Skip to content

Instantly share code, notes, and snippets.

@dongr0510
dongr0510 / standard.py
Created August 28, 2020 21:18
NumPy and scikit-learn
from sklearn.preprocessing import scale
# standardizing each column of data
col_standardized = scale(data)
# Column means (round to nearest thousandth)
col_means = col_standardized.mean(axis=0).round(decimals=3)
# Column standard deviations
col_stds = col_standardized.std(axis=0)
from sklearn.preprocessing import MinMaxScaler
zeroOne_scaler = MinMaxScaler()
transformed = zeroOne_scaler.fit_transform(data)
# try user defined range
custom_scaler = MinMaxScaler(feature_range=(-1,1))
transformed = custom_scaler.fit_transform(data)
from sklearn.preprocessing import RobustScaler
robust_scaler = RobustScaler()
transformed = robust_scaler.fit_transform(data)
from sklearn.impute import SimpleImputer
# perform data imputation using mean values from each column
imp_mean = SimpleImputer()
transformed = imp_mean.fit_transform(data)
imp_median = SimpleImputer(strategy='median')
transformed = imp_median.fit_transform(data)
imp_frequent = SimpleImputer(strategy='most_frequent')
from sklearn.decomposition import PCA
# assume we have 10 columns in our dataset
# the default is m-1, which is 9 in this example
pca_obj = PCA()
pc = pca_obj.fit_transform(data).round(3)
pca_obj = PCA(n_components=7)
pc = pca_obj.fit_transform(data).round(3)
def solve_knapsack(profits, weights, capacity):
return knapsack_recursive(profits, weights, capacity, 0)
def knapsack_recursive(profits, weights, capacity, currentIndex):
# base checks
if capacity <= 0 or currentIndex >= len(profits):
return 0
# recursive call after choosing the element at the currentIndex
def solve_knapsack(profits, weights, capacity):
# create a two dimensional array for Memoization, each element is initialized to '-1'
dp = [[-1 for x in range(capacity+1)] for y in range(len(profits))]
return knapsack_recursive(dp, profits, weights, capacity, 0)
def knapsack_recursive(dp, profits, weights, capacity, currentIndex):
# base checks
if capacity <= 0 or currentIndex >= len(profits):
def can_partition(num):
s = sum(num)
# if 's' is a an odd number, we can't have two subsets with same total
if s % 2 != 0:
return False
# we are trying to find a subset of given numbers that has a total sum of 's/2'.
s = int(s / 2)
for each number 'i'
create a new set which INCLUDES number 'i' if it does not exceed 'S', and recursively
process the remaining numbers
create a new set WITHOUT number 'i', and recursively process the remaining numbers
return true if any of the above two sets has a sum equal to 'S', otherwise return false
def can_partition(num, sum):
n = len(num)
dp = [[False for x in range(sum+1)] for y in range(n)]
# populate the sum = 0 columns, as we can always form '0' sum with an empty set
for i in range(0, n):
dp[i][0] = True
# with only one number, we can form a subset only when the required sum is
# equal to its value