Skip to content

Instantly share code, notes, and snippets.

View JoshZastrow's full-sized avatar

Josh Zastrow JoshZastrow

View GitHub Profile
import pandas as pd
def filter_columns_by_keyword(df: pd.DataFrame, keyword: str)-> pd.DataFrame:
"""Filters pandas columns by a keyword by searching the column index for a matching substring"""
return df.loc[:, lambda d: d.columns.str.contains(keyword)]
def convert_monthly_ts(df):
return df.reset_index().assign(ds_month = lambda x: x.ds + pd.offsets.MonthBegin()).groupby(["job_area", "ds_month"], as_index=False).y.mean()
@JoshZastrow
JoshZastrow / data_utils.py
Created December 4, 2019 22:29
decorators for data inspection along pipeline
from functools import wraps
import logging
def log_shape(func):
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
logging.info("%s,%s" % (func.__name__, result.shape))
return result
return wrapper
def setup_logger():
'''Sets up logger'''
# formatter = logging.Formatter(
# '[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s'
# )
logging.basicConfig(
level=logging.INFO,
format='[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s')
# handler = logging.StreamHandler()
@JoshZastrow
JoshZastrow / decorators.py
Created January 21, 2019 21:20
Decorator
import functools
import os
## Example 1:
'''
Decorator functions are great ways to template-ize a piece of functional
code that should be run before / after any other function. For example,
checking permissions or connections to a DB could be put into a decorator
and reused before all other functions.
@JoshZastrow
JoshZastrow / img_utils.py
Last active January 4, 2019 22:39
load images from .h5 files
def load_dataset():
train_dataset = h5py.File('datasets/train_happy.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
test_dataset = h5py.File('datasets/test_happy.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
@JoshZastrow
JoshZastrow / external-data-drive-sheets-and-cloud-storage.ipynb
Created October 31, 2018 23:40
External data: Drive, Sheets, and Cloud Storage
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@JoshZastrow
JoshZastrow / xor-classifier.ipynb
Created October 28, 2018 22:09
XOR-Classifier.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@JoshZastrow
JoshZastrow / Classifier.py
Created October 1, 2018 21:42
Log Regression Classifier Model and Training Algorithm
import numpy as np
class classifier():
def __init__(self, lr=0.001, num_dims=6):
self.lr = lr
self.params = {}
self.W = np.random.rand(num_dims, 1)
self.b = np.ones((1,1))
@JoshZastrow
JoshZastrow / PyTorch_embeddings_model.py
Created May 28, 2018 01:27
PyTorch implementation of a sentiment analysis classifier using Embeddings
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils import data
from keras import preprocessing
from keras.datasets import imdb
import numpy as np
# Get Data
@JoshZastrow
JoshZastrow / data_generator.py
Created May 10, 2018 15:13
Data generator function for comma-ai dataset
from __future__ import absolute_import
from __future__ import print_function
# from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
from scipy import misc
import matplotlib.pyplot as plt
import os, math, time
import csv