Skip to content

Instantly share code, notes, and snippets.

View halflearned's full-sized avatar

Vitor Hadad halflearned

View GitHub Profile
@halflearned
halflearned / interleave
Created May 29, 2024 21:36
Partial code to interleave image embeddings into text embeddings
def interleave_image_embeddings(text_embeddings, image_embeddings, tensor_indices, image_token_positions):
# Transform tensor into list of tensors, as they'll temporarily have different lengths
text_embeddings = [tensor for tensor in text_embeddings]
for i in range(len(text_embeddings)):
# Find the indices of the image tokens in the current tensor
idx = tensor_indices == i
@halflearned
halflearned / sylvia_nickell_bias.py
Created March 29, 2024 22:20
Possibly working Nickel bias correction
from linearmodels import PanelOLS
import numpy as np
import pandas as pd
from itertools import product
import seaborn as sns
import matplotlib.pyplot as plt
def generate_data(N, T, rho, sigma_a = 5, sigma_eps=1, burn_in = 100):
# dumb for-loop to generate data, not efficient but easy to understand
data = []
@halflearned
halflearned / vggsounddadapolicy.py
Created June 16, 2023 03:04
VGGSoundDADAPolicy
class VGGSoundDADAPolicy(nn.Module):
""" Transformation policy learned after applying DADA algorithm to VGG-Sound """
def __init__(self, num_time_subpolicies=2, num_freq_subpolicies=2):
super().__init__()
self.num_time_subpolicies = num_time_subpolicies
self.num_freq_subpolicies = num_freq_subpolicies
def forward(self, x):
time_subpolicy_indices = np.random.choice(
@halflearned
halflearned / cifar10_download.py
Created April 25, 2023 23:01
CIFAR-10 data sharding and upload
import os
import boto3
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Subset
def upload_to_s3(local_path, s3_path):
s3 = boto3.client('s3', region_name="us-west-2")
@halflearned
halflearned / download_youtube_audio.py
Last active April 17, 2023 16:32
Downloading youtube audio for vgg-sound
import argparse
import csv
import json
import subprocess
from multiprocessing import Pool
from os import remove, rename, makedirs
from os.path import exists, getsize
from time import sleep
@halflearned
halflearned / quantile_with_ci.py
Created March 16, 2023 21:12
Quantile estimation with confidence interval
import numpy as np
from scipy.stats import norm, expon
def quantile_with_ci(x, q, alpha=.05, num_boot=10_000):
n = len(x)
quantile_estimate = np.quantile(x, q)
idx_boot = np.random.randint(n, size=(num_boot, n))
quantile_boot = np.quantile(x[idx_boot], q=q, axis=1)
quantile_se = np.std(quantile_boot)
half_ci = norm.ppf(1 - alpha/2) * quantile_se
@halflearned
halflearned / relative_expression.py
Created March 5, 2023 23:12
Relative expression plot in python
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_excel("/Users/halflearned/Downloads/qPCR.xlsx")
# Rename expression column to something sensible
df = df.rename(columns={"2^(-ΔΔCq)": "expression"})
# Create the column that will be our horizontal axis
@halflearned
halflearned / test_entire_session.py
Last active April 26, 2022 17:16
Holistic test for entire session using alpha data
import pickle
from datetime import datetime, timezone
from itertools import chain
import numpy as np
import pytest
from sleep_x_ml_reconciliation_service import SleepXMLReconciliation
from sleep_x_ml_reconciliation_service.constants import (
AWAKE_STAGE,
@halflearned
halflearned / rw.r
Created April 15, 2022 16:48
Romano Wolf across regressions
library(purrr)
library(Matrix)
library(sandwich)
library(MASS)
romano_wolf_correction <- function(t.orig, t.boot) {
# See http://ftp.iza.org/dp12845.pdf page 8
abs.t.orig <- abs(t.orig)
@halflearned
halflearned / test_real_sessions.py
Last active April 14, 2022 03:37
Testing reconciliation with real sessions
import pytest
import pickle
from itertools import chain
from datetime import datetime, timezone
from sleep_x_ml_reconciliation_service import SleepXMLReconciliation
from sleep_x_ml_reconciliation_service.constants.reconciliation import (
UNINTENDED_USER_FLAG,
LARGE_SLEEP_GAP_FLAG,
PUT_GAP_TOLERANCE_SECONDS