Skip to content

Instantly share code, notes, and snippets.

# Search for variables that are very similar
def show_similars(cols, threshold=0.90):
for i1, col1 in enumerate(cols):
for i2, col2 in enumerate(cols):
if (i1<i2):
cm12 = pd.crosstab(dfX[col1], dfX[col2]).values # contingency table
cv12 = cramers_corrected_stat(cm12) # Cramer V statistic
if (cv12 > threshold):
print((col1, col2), int(cv12*100))
from psutil import process_iter
def __get_cron_processes():
processes = [proc for proc in process_iter() if ('python' == proc.name())]
processes = [proc for proc in processes if ('python' in proc.cmdline())]
processes = [proc for proc in processes if not(proc.username() is 'root')]
processes = [proc for proc in processes if not('ipykernel' in proc.cmdline())]
return processes
from time import localtime, mktime
MAX_RUN_MINUTES = 120
def cron_killer():
def __run_minutes(proc):
t_start = localtime(proc.create_time())
t_now = localtime()
return (mktime(t_now) - mktime(t_start)) / 60.
from psutil import virtual_memory
from functools import wraps
MIN_VM_SHARE = 0.10
MAX_CRON_PROCESSES = 5
def cron_control(func=None):
@wraps(func)
def wrapped(*args, **kwargs):
@mzaradzki
mzaradzki / docIndexer_snippet1.js
Created May 29, 2017 09:13
AWS Lambda to index S3 new files in CloudSearch
exports.handler = (event, context, callback) => {
// WARNING :
// This snippet assumes : event.Records[0].eventName == 'ObjectCreated:Put'
// but the ful code deals with both 'ObjectCreated:Put' and 'ObjectRemoved:Delete'
var filename = event.Records[0].s3.object.key;
var bucketname = event.Records[0].s3.bucket.name;
var params = {
@mzaradzki
mzaradzki / docSearcher_snippet1.js
Created May 29, 2017 16:16
AWS Lambda function to class Cloud Search API with javascript SDK
exports.handler = (event, context, callback) => {
var csd = new AWS.CloudSearchDomain({
endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com',
apiVersion: '2013-01-01'
});
var params = {
query: event.query,
sort: '_score desc',