Skip to content

Instantly share code, notes, and snippets.

@cfperez
cfperez / kernel_name.py
Created September 29, 2016 23:08
Get the kernel name as it appears in htop/ps
from os.path import basename
from IPython import get_ipython
shell = get_ipython()
print(basename(shell.kernel.config.IPKernelApp.connection_file))
@cfperez
cfperez / FileCache.py
Last active September 19, 2016 21:14
Store long-running function results as files
from pyspark.sql import DataFrame
from functools import wraps
import hashlib
from os import path, remove
import logging
from sys import stderr
def getLogger(name, debug=False, loglevel=None):
logger = logging.getLogger(name)
if not logger.handlers:
@cfperez
cfperez / DataFrameVectorizer.py
Last active September 23, 2017 13:43
Scikit-learn transformer for pandas DataFrames
from sklearn.feature_extraction import DictVectorizer
class DataFrameVectorizer(DictVectorizer):
'''Scikit-learn transformer for DataFrames. Categorical features become one-hot encoded.
>>> df = pd.DataFrame({'school': ['a','b','c'], 'gpa': [2,3,4]})
>>> dv = DataFrameVectorizer()
>>> transformed = dv.fit_transform(df)
>>> dv.inverse_transform(transformed)
#!/usr/bin/env python
# Install a matplotlib style file
from __future__ import print_function
import matplotlib
from matplotlib import style as mplstyle
import os
import sys
import shutil
import argparse
import requests
def arrays_equal_nan(a, b):
'Arrays compare True if they are equal or both NaN, else False'
return np.all((a==b) | ((a!=a) & (b!=b)))
%load_ext autoreload
%autoreload 1
%matplotlib inline
%config InlineBackend.figure_format='retina'
from __future__ import division, print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
def by_n(iterable, n=2):
"""Return an sliding window iterable of tuples of size `n`
E.g.
>>> w = by_n(range(10), n=2)
>>> next(w)
(0,1)
>>> next(w)
(1,2)
"""
@cfperez
cfperez / script.py
Created September 26, 2015 00:19
Python script skeleton
#!/usr/bin/env python
"""Docstring
"""
def main(args=None):
# Pipe into head/tail w/o IOErrors
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE,SIG_DFL)
parsed_args = _parse_args(args)
@cfperez
cfperez / log_transformer.py
Created September 10, 2015 23:52
LogTransformer to log scale features in sklearn Pipelines
from sklearn.base import BaseEstimator,TransformerMixin
class LogTransformer(BaseEstimator,TransformerMixin):
def __init__(self, constant=1, base='e'):
from numpy import log,log10
if base == 'e' or base == np.e:
self.log = log
elif base == '10' or base == 10:
self.log = log10
else:
@cfperez
cfperez / radar_plot.py
Last active September 10, 2015 18:09
radar_plot() wrapper function around matplotlib example
from __future__ import print_function
import matplotlib.pyplot as plt
from matplotlib.path import Path
from matplotlib.spines import Spine
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
import numpy as np
def radar_plot(data, axes=[], colors=[], labels=[], ax=None, frame='circle', **kwargs):
'''Create radar plot from 2D array. Each row is a closed line; each column is a feature/axis.