Skip to content

Instantly share code, notes, and snippets.

View al6x's full-sized avatar

Alex Kraft al6x

  • Australia
View GitHub Profile
@al6x
al6x / overlap-bias.jl
Last active August 5, 2025 06:26
Overlap Bias
using Random, Distributions, Statistics, DataFrames, Plots
# parameters
Random.seed!(1)
mu = 0.005 # drift per 30d step
sigma = 0.08 # volatility per step
steps = 12 * 45 * 250 # number of 30d steps
S0 = 1.0 # initial price
ν = 3 # degrees of freedom for Student-t (ν > 2)
@al6x
al6x / tail-estimators.py
Created July 31, 2025 09:09
Tail Estimators
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t
from scipy.optimize import minimize_scalar, minimize
def student_sample(df, n, seed=None):
rng = np.random.default_rng(seed)
return rng.standard_t(df, size=n)
def estimate_hill(x):
@al6x
al6x / hill-vs-mle.py
Created July 30, 2025 10:31
Estimating Tail, Hill Plot vs MLE
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t
from scipy.optimize import minimize_scalar
def student_sample(df, n):
return t.rvs(df, loc=0, scale=1, size=n)
def estimate_hill(x):
x = np.sort(x)[::-1]
vol vol_dc k premium
0.008037 1 0.93 0.001134
0.008037 1 0.94 0.001471
0.008037 1 0.95 0.002009
0.008037 1 0.96 0.002952
0.008037 1 0.97 0.004859
0.008037 1 0.98 0.009124
0.008037 1 0.99 0.017501
0.008037 1 1.0 0.027655
0.01041 2 0.93 0.001216
@al6x
al6x / synthetic_prices.py
Created June 25, 2025 07:28
Synthetic Prices
import pandas as pd
import numpy as np
from scipy.integrate import quad
from scipy.stats import norm
import matplotlib.pyplot as plt
import math
def data_stats():
# Distrs as (weights, scales_rel) where scales_rel - relative to the main scale
@al6x
al6x / mean_vs_vol.py
Created May 16, 2025 08:19
Insanely high mean annual returns for volatile stocks in historical data
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter, LogLocator
# Data:
# lr_rf_1y_t : float — risk-free log return at time t (e.g., log(1.03) for 3%)
# lr_t2 : float — stock actual log return at time t2 = t + period_d
# ema_var_d_t : float — daily variance as log(return)^2, current estimate at time t as EMA(span=365/3)
# h_var_d : float — daily variance as log(return)^2, historical estimate over whole stock history
import './base'
p_([1, 2, 2].empty7()) // => false
p_([[2], [1]].sort_()) // => [[1], [2]]
p_([[1], [1]].uniq_()) // => [[1]]
p_(equal7([1], [1])) // => true
p_([1, 2, 3].median_()) // => 2
p_([1, 2, 3].min_()) // => 1
import pymc as pm
import arviz as az
def fit_bayesian_slow(df, t):
r_t = df['r_t'].values
mvar_t_t0 = t * 0.69 * df['mvar_d_t0'].values
r_rf_t_t0 = t * np.log(df['ar_rf_1y_t0'].values) / 365
with pm.Model() as model:
v0 = pm.HalfNormal('v0', sigma=1)
[0.07662950232169972, 0.08667983817520133, 0.08667983817520133, 0.08667983817520133, 0.18199001797952605, 0.2690013949691558, 0.1636408793113296, 0.18199001797952605, 0.18199001797952605, 0.07662950232169972, 0.07662950232169972, 0.18199001797952605, 0.18199001797952605, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.06833069950700488, 0.06833069950700488, 0.06833069950700488, 0.06833069950700488, 0.06833069950700488, 0.06833069950700488, 0.06833069950700488, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.07662950232169972, 0.08667983817520133, 0.06833069950700488, 0.1636408793113296, 0.1636408793113296, 0.1636408793113296, 0.1636408793113296, 0.2436835869848659, 0.2436835869848659, 0.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from scipy.stats import norm
def fit_normal_mixture(*, n_components, values, random_state, n_init):
values = np.array(values).reshape(-1, 1) # Convert to 2D array
nmm = GaussianMixture(n_components, covariance_type='diag', random_state=random_state, n_init=n_init)
nmm.fit(values)
means = nmm.means_.flatten().tolist()