View docSearcher_snippet1.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exports.handler = (event, context, callback) => { | |
var csd = new AWS.CloudSearchDomain({ | |
endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com', | |
apiVersion: '2013-01-01' | |
}); | |
var params = { | |
query: event.query, | |
sort: '_score desc', |
View docIndexer_snippet1.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exports.handler = (event, context, callback) => { | |
// WARNING : | |
// This snippet assumes : event.Records[0].eventName == 'ObjectCreated:Put' | |
// but the ful code deals with both 'ObjectCreated:Put' and 'ObjectRemoved:Delete' | |
var filename = event.Records[0].s3.object.key; | |
var bucketname = event.Records[0].s3.bucket.name; | |
var params = { |
View cron_control.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from psutil import virtual_memory | |
from functools import wraps | |
MIN_VM_SHARE = 0.10 | |
MAX_CRON_PROCESSES = 5 | |
def cron_control(func=None): | |
@wraps(func) | |
def wrapped(*args, **kwargs): |
View cron_killer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import localtime, mktime | |
MAX_RUN_MINUTES = 120 | |
def cron_killer(): | |
def __run_minutes(proc): | |
t_start = localtime(proc.create_time()) | |
t_now = localtime() | |
return (mktime(t_now) - mktime(t_start)) / 60. |
View get_cron_processes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from psutil import process_iter | |
def __get_cron_processes(): | |
processes = [proc for proc in process_iter() if ('python' == proc.name())] | |
processes = [proc for proc in processes if ('python' in proc.cmdline())] | |
processes = [proc for proc in processes if not(proc.username() is 'root')] | |
processes = [proc for proc in processes if not('ipykernel' in proc.cmdline())] | |
return processes |
View pump_similar_variables.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Search for variables that are very similar | |
def show_similars(cols, threshold=0.90): | |
for i1, col1 in enumerate(cols): | |
for i2, col2 in enumerate(cols): | |
if (i1<i2): | |
cm12 = pd.crosstab(dfX[col1], dfX[col2]).values # contingency table | |
cv12 = cramers_corrected_stat(cm12) # Cramer V statistic | |
if (cv12 > threshold): | |
print((col1, col2), int(cv12*100)) |
View pump_predictive_variables.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# select columns that have "few" unique values | |
cramer_cols = [col for col in df.columns.values if (len(df[col].unique())<250)] | |
for col in cramer_cols: | |
try: | |
cm = pd.crosstab(df[col], df['status_group']).values # contingency table | |
cv1 = cramers_corrected_stat(cm) | |
if (cv1>=0.20): | |
print(col, int(cv1*100)) | |
except: |
View pump_quantile_encoding.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nbQs = 4 # quartiles | |
dfX['construction_year_quantile'] = pd.qcut(dfX['construction_year'], nbQs, labels=False)/(nbQs-1.0) |
View pump_null_zero_geographicals_2.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Before overwriting keep track of suspect rows with new binary columns | |
dfX['gps_height_bad'] = (dfX['gps_height']<=0)*1 | |
geos.append('gps_height_bad') | |
dfX['longitude_bad'] = (dfX['longitude']<25)*1 | |
geos.append('longitude_bad') | |
dfX['latitude_bad'] = (dfX['latitude']>-0.5)*1 | |
geos.append('latitude_bad') | |
# Exemple of query via index=basin : mean_geo_df.at['Lake Victoria','latitude'] | |
dfX.loc[dfX['gps_height']<=0, 'gps_height'] = dfX['basin'].apply(lambda x : mean_geo_df.at[x,'gps_height']) |
View pump_null_zero_geographicals_1.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# bound of min/max latitude/longitude/height for Tanzania | |
bound_df = dfX[(dfX['latitude']<-0.5)&(dfX['longitude']>25)&(dfX['gps_height']>0)] | |
# mean of geographical data in each bucket | |
mean_geo_df = bound_df.groupby(['basin',])['latitude','longitude','gps_height'].mean() | |
assert(mean_geo_df.shape[0] == len(dfX['basin'].unique())) | |
# Out[31]: mean_geo_df | |
# latitude longitude gps_height |
NewerOlder