Skip to content

Instantly share code, notes, and snippets.

View mdbecker's full-sized avatar

Michael Becker mdbecker

View GitHub Profile
@mdbecker
mdbecker / sent_mail.py
Created September 17, 2023 19:09
Script to parse .mbox email files and export them into monthly-separated CSV files.
import mailbox
import csv
import email.utils
from collections import defaultdict
import argparse
from pathlib import Path
from bs4 import BeautifulSoup
from tqdm import tqdm
# Constants for field names
@mdbecker
mdbecker / calibration_groups.py
Last active November 5, 2021 18:47
sklearn_12052
"""
Fixes https://github.com/scikit-learn/scikit-learn/issues/12052
CalibratedClassifierGroupCV is a drop in replacment for CalibratedClassifierCV that supports GroupKFold cv.
This is based off of https://github.com/scikit-learn/scikit-learn/blob/0.24.1/sklearn/calibration.py.
If you are using a different version of sklearn, you can make similar modifications to your version.
Example usage:
```
@mdbecker
mdbecker / sir.py
Created April 4, 2020 17:25
simple sir
initial_susceptible # defaults to 3,600,000 https://github.com/CodeForPhilly/chime/blob/2895a9c4ddcf42b3c96bcf7e03a7e2a15f4983de/src/penn_chime/presentation.py#L200-L206
initial_infected # https://github.com/CodeForPhilly/chime/blob/2895a9c4ddcf42b3c96bcf7e03a7e2a15f4983de/src/penn_chime/models.py#L25-L27
initial_recovered # https://github.com/CodeForPhilly/chime/blob/2895a9c4ddcf42b3c96bcf7e03a7e2a15f4983de/src/penn_chime/models.py#L34
beta = # https://github.com/CodeForPhilly/chime/blob/2895a9c4ddcf42b3c96bcf7e03a7e2a15f4983de/src/penn_chime/models.py#L42-L45
gamma = # https://github.com/CodeForPhilly/chime/blob/2895a9c4ddcf42b3c96bcf7e03a7e2a15f4983de/src/penn_chime/models.py#L39
n_days = # User input, default to 60 or something
def sir(s, i, r, beta, gama, n):
"""The SIR model, one time step."""
s_n = (-beta * s * i) + s
@mdbecker
mdbecker / bad_ticks.json
Created January 17, 2020 04:26
bad_ticks.json
{"date":{"4":1576826679362,"5":1576826680953,"6":1576826682705,"7":1576826715094,"8":1576826738398,"9":1576826749536,"10":1576826964746,"11":1576827009901,"12":1576827049302,"13":1576827049369,"14":1576827067127,"15":1576827067174,"16":1576827067715,"17":1576827071028,"18":1576827128560,"19":1576827181988,"20":1576827228449,"21":1576827233823,"22":1576827236225,"23":1576827244532,"24":1576827326470,"25":1576827331045,"26":1576827338079,"27":1576827342801,"28":1576827342887,"29":1576827362202,"30":1576827369175,"31":1576827406098,"32":1576827475226,"33":1576827479353,"34":1576827479381,"35":1576827481299,"36":1576827481300,"37":1576827484089,"38":1576827484095,"39":1576827495704,"40":1576827501289,"41":1576827508178,"42":1576827515849,"154":1576832407342,"155":1576832407392,"156":1576832428810,"157":1576832428828,"158":1576832429440,"179":1576840413638,"180":1576840413746,"181":1576840440551,"182":1576840440565,"183":1576840441194,"184":1576841274254,"185":1576841274351,"186":1576841285635,"187":1576841285658,
@mdbecker
mdbecker / interrupt_bad_kernels.py
Created October 25, 2018 15:05
Find and kill jupyter kernels that might be running out of control
from os import getpid, kill
from time import sleep
import re
import signal
from notebook.notebookapp import list_running_servers
from requests import get
from requests.compat import urljoin
import ipykernel
import json
@mdbecker
mdbecker / miniboxplot.py
Created October 24, 2018 15:44
miniboxplot a.la. seaborn violinplot
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
class _MiniBoxPlotter(sns.categorical._ViolinPlotter):
def draw_violins(self, ax):
"""Draw the violins onto `ax`."""
for i, group_data in enumerate(self.plot_data):
from sklearn import metrics
def binary_cv_metrics(y, preds, m):
ACC = metrics.accuracy_score(y,preds)
cm = metrics.confusion_matrix(y,preds)
m['confusion_matrix'] = cm
m['Accuracy'] = ACC
m['F1 score'] = metrics.f1_score(y,preds)
m['FPR'] = cm[0,1]/(cm[0,:].sum()*1.0)

Keybase proof

I hereby claim:

  • I am mdbecker on github.
  • I am mdbecker (https://keybase.io/mdbecker) on keybase.
  • I have a public key whose fingerprint is 3269 BEE3 B3B2 23ED 1478 2F5B 73DE 3334 40FF FBF7

To claim this, I am signing this object:

@mdbecker
mdbecker / dataphilly
Last active August 29, 2015 14:07 — forked from pjob/dataphilly
{
"metadata": {
"name": "Data Philly"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
@mdbecker
mdbecker / gist:1309633
Created October 24, 2011 17:50
multiprocess && gevent example
from multiprocessing import Pool as MPool
from time import sleep
import datetime
import multiprocessing
import random
def time_request():
from gevent import monkey; monkey.patch_socket
from jsonrequester import JsonRequester