Skip to content

Instantly share code, notes, and snippets.

@ikleni
ikleni / feature_creation.py
Last active October 20, 2019 15:15
Creating standard set of features
def feature_creation(temp, i):
names = []
temp[i+'_'+'mean' + '_' + '60'] = temp[i].shift(1 ).rolling(60, min_periods=10).mean()
names.append(i+'_'+'mean' + '_' + '60')
temp[i+'_'+'std' + '_' + '60'] = temp[i].shift(1 ).rolling(60, min_periods=10).std()
names.append(i+'_'+'std' + '_' + '60')
temp[i+'_'+'std' + '_' + '10'] = temp[i].shift(1 ).rolling(10, min_periods=6).std()
@ikleni
ikleni / plots1.py
Last active October 18, 2019 21:58
plots sensors prior to the stop
def visual(X, iot = ['sensor1'], batch =60 , look_b = 50 ):
# obtain indexes
stops = X[(X['Response']== 1)].Time.values
non_stops = X[(X['delta_nxt'] >100)&(X['Y']== 1)].Time.values # for good work
idx = np.random.choice(stops, size=batch)
for i in idx:
temp = X[(X.Time == i)|((i - X.Time ) < look_b )&((i - X.Time) > 0 )]
plt.figure(figsize=(30,10))
for n,j in enumerate(iot):
ax = plt.subplot(4,5, n+1) # here x*y should equal len(iot)
@ikleni
ikleni / quantile_search.py
Last active October 20, 2019 15:21
helps selecting relevant variables for anomaly detection
def class_score(dt, feature, q):
# checks how well does a particular feature
# split the data based on quantiles
q0, q1 = np.quantile(dt[feature], q[0] ), np.quantile(dt[feature], q[1] )
dt['pred'] = ((dt[feature]< q0) |(dt[feature]> q1)).astype(int)
if dt[(dt['pred']==1)].shape[0] > 2:
score = dt[(dt['Response']==1)&(dt['pred']==1)].shape[0]/ \
dt[(dt['pred']==1)].shape[0]
else:
@ikleni
ikleni / agg_feat.py
Last active October 20, 2019 15:26
creates aggregated deviations
def agg_dev(X, feats, cat = True):
X = X.set_index('Id')
# for lists of discrete features
if cat == True:
# count how many features changed in value
alpha = np.zeros(len(tmp_mini3)) # placeholder
for i in feats:
# get lagged values
@ikleni
ikleni / creating_Y.py
Last active October 18, 2019 21:38
creating Y
def y_prep(tmp, interval = [10,5], x = 'Time', type = 'A' ):
# forward fill current stops
tmp[['Cur_Stop', 'Cur_Stop_end']] =tmp[['Stop_t','End']].fillna(method='ffill')
# back fill future stops
tmp[['Nxt_Stop', 'Nxt_Stop_start']] = tmp[['Stop_t','Start']].fillna(method='bfill')
# time since last and next stop
tmp['delta_cur'] = ((tmp['Cur_Stop_end'] - tmp[x])/np.timedelta64(1, 'm'))
tmp['delta_nxt'] = ((tmp['Nxt_Stop_start'] -tmp[x])/np.timedelta64(1, 'm'))
tmp['Y'] = (tmp[x]<=interval[0]).astype(int)*(tmp[x]>=interval[1]).astype(int)
tmp = tmp[(tmp['Y'] == 0)&(tmp[x] >interval[1])|
@ikleni
ikleni / dropping_cnst.py
Last active October 18, 2019 21:32
Dropping constant features
def del_const_feat(X, thres = 5, dtype = int):
constant_cols = []
if dtype == int:
for i in X.dtypes[(X.dtypes == int)].index:
if X.shape[0]- X[i].value_counts().sort_values(ascending = False).values[0] <= thres:
constant_cols.append(i)
if dtype == float:
for i in X.dtypes[(X.dtypes == float)].index:
if X[i].std() == 0:
constant_cols.append(i)