Skip to content

Instantly share code, notes, and snippets.

View erikbern's full-sized avatar

Erik Bernhardsson erikbern

View GitHub Profile
@erikbern
erikbern / kaplan_meier.py
Created May 22, 2017 01:59
Kaplan-Meier snippet
n, k = len(te), 0
ts, ys = [], []
p = 1.0
for t, e in te:
if e: # whether the event was "observed" (converted) or not observed (may convert in the future)
p *= (n-1) / n
n -= 1
ts.append(t)
ys.append(100. * (1-p))
pyplot.plot(ts, ys, 'b')
def ll_to_3d(lat, lon):
lat *= math.pi / 180
lon *= math.pi / 180
x = math.cos(lat) * math.cos(lon)
z = math.cos(lat) * math.sin(lon)
y = math.sin(lat)
return numpy.array([x, y, z])
import matplotlib.pyplot, numpy, scipy.stats, seaborn
for i in range(50):
p, = scipy.stats.uniform.rvs(size=1)
a, b = scipy.stats.geom.rvs(p, size=2)
if a == b == 1:
continue
print(a, b)
x = numpy.linspace(0, 1, 1000)
y = scipy.stats.beta.pdf(x, a, b)
from num2words import num2words
from matplotlib import pyplot
lang = 'de'
words = [num2words(i, lang=lang) for i in range(1000000)]
fig = pyplot.figure()
ax = fig.add_subplot(111)
ax.semilogx([len(word) for word in words], color='green')
import urllib2, csv
import matplotlib.pyplot as plt
import datetime
import seaborn
import numpy, scipy.stats, math
f = urllib2.urlopen('https://raw.githubusercontent.com/datasets/s-and-p-500/master/data/data.csv')
csv = csv.reader(f)
csv.next() # headers
import numpy
import random
from matplotlib import pyplot
percentiles = [50, 75, 90, 95, 99]
latencies = [[] for p in percentiles]
loads = []
n = 100000
for k in numpy.linspace(0.01, 1.0, 100):
import pymc3, numpy, sys, seaborn, re
def get_dist(fn):
y = [0, 0, 0, 0, 0]
for line in open(fn):
try:
num = re.split('\D', line)[0]
y[int(num) - 1] += 1
except:
print fn, 'can not parse:', line
import numpy
import scipy.optimize
from matplotlib import pyplot
cs = numpy.linspace(0.01, 0.99, 100)
ks = []
for c in cs:
def f(log_k):
k = numpy.exp(log_k)
import random
import numpy
from matplotlib import pyplot
rs = numpy.random.randn(1000)
xs = rs[1:-1] - rs[:-2]
ys = rs[2:] - rs[1:-1]
pyplot.scatter(xs, ys)
pyplot.show()
@erikbern
erikbern / marketing_mc.py
Last active January 15, 2021 00:50
MCMC for simple marketing data
import pymc, pymc.graph
import matplotlib.pyplot as plt
import numpy as np
import sys
channels = [
('A', 2292.04, 9),
('B', 1276.85, 2),
('C', 139.59, 3),
('D', 954.98, 5)