Skip to content

Instantly share code, notes, and snippets.

@maxpv
maxpv / README.md
Created September 4, 2020 13:04
SoX bulk wav file processing
@maxpv
maxpv / smart-pip-install.py
Last active January 3, 2020 13:37
Smart pip install from Jupyter notebook, automatically selecting python's version used by Jupyter.
# https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/
import sys
!{sys.executable} -m pip install <package>
@maxpv
maxpv / log_error.py
Created September 3, 2019 15:42
Cell magic for logging error in a file. Useful for monitoring (very) long Jupyter jobs.
from IPython.core.magic import register_cell_magic
import datetime
@register_cell_magic
def log_error(line, cell):
try:
exec(cell)
except Exception as e:
with open('error.log', 'w+') as f:
f.write("[{}] Exception ({}): {}".format(datetime.datetime.now(),
@maxpv
maxpv / remove_comments.py
Created March 22, 2019 05:41
Remove PHP/C/Python comments in Python
import re
def remove_comments(s):
for x in re.findall(r'("[^\n]*"(?!\\))|(//[^\n]*$|/(?!\\)\*[\s\S]*?\*(?!\\)/)',s,8):s=s.replace(x[1],'')
s = re.sub(r'(?m) *#.*\n?', '', s)
return s
@maxpv
maxpv / slack_logger.py
Last active February 17, 2019 12:46
Slackbot keras callback logger
import subprocess
from collections import OrderedDict
from collections import Iterable
import six
import time
import numpy as np
from keras.callbacks import Callback
@maxpv
maxpv / average_width_file.sh
Last active January 22, 2019 16:14
Average of maximum file width in all subdirectories
find folder -type f -name "*.txt" -exec wc -L {} \; | cut -f 1 -d ' ' awk '{{ total+=$NF }} END {{ print total/NR }}'
@maxpv
maxpv / soft_binary_metrics.py
Created January 10, 2019 13:03
compute a light report for binary classifiers
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import pandas as pd
def evalutate_metrics(clf, X_test, y_test, threshold=0.5):
y_pred = clf.predict(X_test)[:,1]
y_test_l = np.argmax(y_test, axis=1)
print()
print('Confusion matrix:')
curl -s http://php.net/manual/en/indexes.functions.php | grep -o 'class="index">.*</a>' | sed 's/\(class="index">\|<\/a>\)//g'
@maxpv
maxpv / days_generator.py
Last active November 21, 2018 12:51
Iterating over days in pandas
import pandas as pd
def days_generator(start_date, end=pd.Timestamp.today(), step_day=1, format='%Y-%m-%d'):
"""
Generate a dates in a format from start_date to end
with a step of step_day
"""
s = pd.to_datetime(start_date)
while s <= end:
yield s.strftime(format)
@maxpv
maxpv / getting_threshold_roc.py
Last active January 21, 2019 17:05
Return best threshold from ROC curve by finding closest point to [0,1] (in a dumb but effective way)
from sklearn import metrics
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_raw)
# Just finding point closest to [0,1]
max_indice = np.argmin([((ft[0])**2 + (ft[1]-1)**2) for ft in (zip(fpr, tpr))])
threshold = thresholds[max_indice]
# We can display it on the ROC curve
fig = plt.figure()
plt.plot(fpr,tpr)