Skip to content

Instantly share code, notes, and snippets.

View dmiekoar's full-sized avatar

Daniela A. dmiekoar

View GitHub Profile
@dmiekoar
dmiekoar / onStart.sh
Created April 12, 2020 12:04
Add nbextension #jupyter# python #aws
#!/bin/bash
set -e
sudo -H -i -u ec2-user bash << 'EOF'
echo "Install jupyter nbextension"
source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv
pip install jupyter_contrib_nbextensions
jupyter contrib nbextensions install --user
@dmiekoar
dmiekoar / blocked_status
Created April 1, 2020 09:58
Status and Details #PowerBI
Status = IF(
DATEDIFF(TrustStockVisibility[Reference Date].[Date];TrustStockVisibility[SSL];DAY)>0;
"Available";
IF(DATEDIFF(TrustStockVisibility[Expiry Date];TrustStockVisibility[Reference Date].[Date];DAY)>=0; "SSL";"Expired")
)
Details = IF(
DATEDIFF(TrustStockVisibility[Reference Date].[Date];TrustStockVisibility[SSL];DAY)>0;
IF(
@dmiekoar
dmiekoar / create_calendar
Created April 1, 2020 09:54
Create calendar #powerbi
Expiry Dates =
VAR BaseCalendar =
CALENDARAUTO()
RETURN
GENERATE(
BaseCalendar;
VAR BaseDate = [Date]
VAR YearDate = YEAR(BaseDate)
VAR MonthNumber = MONTH(BaseDate)
RETURN ROW (
@dmiekoar
dmiekoar / loglikelihoodloss.py
Created March 26, 2020 20:29
loglikelihoodloss #python #metrics
# user define objective function, given prediction, return gradient and second
# order gradient this is log likelihood loss
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds))
grad = preds - labels
hess = preds * (1.0 - preds)
return grad, hess
##########################################################
@dmiekoar
dmiekoar / rmsle.py
Last active March 26, 2020 20:43
RMSLE #python #metrics
https://github.com/Microsoft/LightGBM/blob/master/examples/python-guide/advanced_example.py
import math
#A function to calculate Root Mean Squared Logarithmic Error (RMSLE)
def rmsle(y, y_pred):
assert len(y) == len(y_pred)
terms_to_sum = [(math.log(y_pred[i] + 1) - math.log(y[i] + 1)) ** 2.0 for i,pred in enumerate(y_pred)]
return (sum(terms_to_sum) * (1.0/len(y))) ** 0.5
@dmiekoar
dmiekoar / infinite.py
Created March 26, 2020 09:23
Check infinite #python
for i in train['v40_int']:
result = i.is_integer()
if result == False:
print(i)
#print(np.isposinf(X_scaled).sum().sum()); print(np.isneginf(X_scaled).sum().sum())
#np.isnan(X_scaled).sum()
#X_scaled.isnull().sum().sum()
#X_scaled.isna().sum()
@dmiekoar
dmiekoar / check.py
Created March 26, 2020 09:22
Check Zeros #python
# Compute the number of occurrences of a zero value
def check_zeros(dataset):
temp_df = dataset.copy(); n_total = temp_df.shape[0]; idcol = []; counter = []; zeros_data = []
features = [c for c in dataset.columns if c not in ['Outcome']]
for col in features:
zeros_count = n_total - np.count_nonzero(temp_df[col])
idcol.append(col)
counter.append(zeros_count)
zeros_data.append(zeros_count / n_total * 100)
zeros_data = pd.DataFrame({'Zero amount': counter,'% Zero' : zeros_data})
@dmiekoar
dmiekoar / feature.py
Created March 26, 2020 09:22
Feature Selector #python
import sys, os
path = os.path.abspath('../../Feature Selector')
sys.path.append(path)
from feature_selector import FeatureSelector
fs = FeatureSelector(data = X, labels = X.Yards)
fs.identify_single_unique()
single_unique = fs.ops['single_unique']
fs.identify_collinear(correlation_threshold=0.9)
correlated_features = fs.ops['collinear']
from lofo import LOFOImportance, Dataset, plot_importance
from sklearn.model_selection import KFold
nn = X
nn['Yards'] = X['Yards']
# import data
train_df = nn
# extract a sample of the data
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook
import multiprocessing
import warnings
from sklearn.metrics import check_scoring
class FLOFOImportance:
"""