This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def feature_creation(temp, i): | |
names = [] | |
temp[i+'_'+'mean' + '_' + '60'] = temp[i].shift(1 ).rolling(60, min_periods=10).mean() | |
names.append(i+'_'+'mean' + '_' + '60') | |
temp[i+'_'+'std' + '_' + '60'] = temp[i].shift(1 ).rolling(60, min_periods=10).std() | |
names.append(i+'_'+'std' + '_' + '60') | |
temp[i+'_'+'std' + '_' + '10'] = temp[i].shift(1 ).rolling(10, min_periods=6).std() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def visual(X, iot = ['sensor1'], batch =60 , look_b = 50 ): | |
# obtain indexes | |
stops = X[(X['Response']== 1)].Time.values | |
non_stops = X[(X['delta_nxt'] >100)&(X['Y']== 1)].Time.values # for good work | |
idx = np.random.choice(stops, size=batch) | |
for i in idx: | |
temp = X[(X.Time == i)|((i - X.Time ) < look_b )&((i - X.Time) > 0 )] | |
plt.figure(figsize=(30,10)) | |
for n,j in enumerate(iot): | |
ax = plt.subplot(4,5, n+1) # here x*y should equal len(iot) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def class_score(dt, feature, q): | |
# checks how well does a particular feature | |
# split the data based on quantiles | |
q0, q1 = np.quantile(dt[feature], q[0] ), np.quantile(dt[feature], q[1] ) | |
dt['pred'] = ((dt[feature]< q0) |(dt[feature]> q1)).astype(int) | |
if dt[(dt['pred']==1)].shape[0] > 2: | |
score = dt[(dt['Response']==1)&(dt['pred']==1)].shape[0]/ \ | |
dt[(dt['pred']==1)].shape[0] | |
else: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def agg_dev(X, feats, cat = True): | |
X = X.set_index('Id') | |
# for lists of discrete features | |
if cat == True: | |
# count how many features changed in value | |
alpha = np.zeros(len(tmp_mini3)) # placeholder | |
for i in feats: | |
# get lagged values |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def y_prep(tmp, interval = [10,5], x = 'Time', type = 'A' ): | |
# forward fill current stops | |
tmp[['Cur_Stop', 'Cur_Stop_end']] =tmp[['Stop_t','End']].fillna(method='ffill') | |
# back fill future stops | |
tmp[['Nxt_Stop', 'Nxt_Stop_start']] = tmp[['Stop_t','Start']].fillna(method='bfill') | |
# time since last and next stop | |
tmp['delta_cur'] = ((tmp['Cur_Stop_end'] - tmp[x])/np.timedelta64(1, 'm')) | |
tmp['delta_nxt'] = ((tmp['Nxt_Stop_start'] -tmp[x])/np.timedelta64(1, 'm')) | |
tmp['Y'] = (tmp[x]<=interval[0]).astype(int)*(tmp[x]>=interval[1]).astype(int) | |
tmp = tmp[(tmp['Y'] == 0)&(tmp[x] >interval[1])| |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def del_const_feat(X, thres = 5, dtype = int): | |
constant_cols = [] | |
if dtype == int: | |
for i in X.dtypes[(X.dtypes == int)].index: | |
if X.shape[0]- X[i].value_counts().sort_values(ascending = False).values[0] <= thres: | |
constant_cols.append(i) | |
if dtype == float: | |
for i in X.dtypes[(X.dtypes == float)].index: | |
if X[i].std() == 0: | |
constant_cols.append(i) |