Skip to content

Instantly share code, notes, and snippets.

@munro
munro / encodeJsonUnicode.js
Created October 19, 2023 18:22
Encode JSON & Escape Unicode in JavaScript
// Similar to json.dumps(obj, *, ensure_ascii=True) in Python
function encodeJsonUnicode(obj) {
return JSON.stringify(obj).replace(/[^\0-\x7F]/g, (c) => {
return '\\u' + ('0000' + c.charCodeAt(0).toString(16)).slice(-4);
});
}
// decoding is simply JSON.parse(...);
@munro
munro / gbm_vs_lr_out_of_bound_data.ipynb
Created October 13, 2023 18:14
Testing Predictions on Out of Bound Data: Comparing Gradient Boosted Trees and Linear Regression
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@munro
munro / iTransformer.py
Created October 12, 2023 13:43
PyTorch: iTransformer: Inverted Transformers Are Effective for Time Series Forecasting
# @TODO this runs, but isn't fitting!
from dataclasses import dataclass
import torch
from torch import nn
@dataclass(frozen=True)
class ForecastPrediction:
@munro
munro / fuzzy_dedupe_slow.py
Created March 15, 2023 15:34
Works well for smaller datasets!
import re
from functools import lru_cache
from typing import Sequence, TypeVar
from fuzzywuzzy import fuzz
from unidecode import unidecode
T = TypeVar("T")
@munro
munro / bigquery_connected_components.sql
Created January 27, 2022 00:58
BigQuery Connected Components algorithm
-- Does a depth of 10
CREATE OR REPLACE PROCEDURE `mydataset`.connected_components_by_edge(
table_name STRING,
left_column STRING,
right_column STRING,
output_temp_table_name STRING
)
OPTIONS(
description="Connected components algorithm, clusters nodes by unidirected edges by the smallest node ID."
)
@munro
munro / intercept_execute_script.js
Last active December 22, 2021 02:08
Selenium Intercept HTTP Requests with JavaScript (Intercepting with a HTTP/HTTPS proxy + custom CA Cert is wayyy too much work!)
// also checkout selenium wire https://pypi.org/project/selenium-wire/
function interceptHttp(onComplete) {
// this will get rid of any previous interceptors, feel free to change
// this if you want to layer them.
window._old_send_ = window._old_send_ || window.XMLHttpRequest.prototype.send;
window._old_open_ = window._old_open_ || window.XMLHttpRequest.prototype.open;
window._old_setRequestHeader_ = window._old_setRequestHeader_ || window.XMLHttpRequest.prototype.setRequestHeader;
window.XMLHttpRequest.prototype.open = function () {
this._openArgs = [...arguments];
// Rand fill buffer: 3.894 GiB/s | Instant { tv_sec: 554538, tv_nsec: 411678934 } | Rand buffer beginning: [131, 217, 107, 91, 100, 45, 141, 113, 182, 115]
// Write speed: 7.058 GiB/s | 1.416909244s | Write buffer beginning: [131, 217, 107, 91, 100, 45, 141, 113, 182, 115]
// Read speed: 10.746 GiB/s | 930.555263ms | File beginning: [131, 217, 107, 91, 100, 45, 141, 113, 182, 115]
// Verified 0 bytes look incorrect | 28.545 GiB/s | 350.32772ms
// Hardware: raid-1 two SAMSUNG MZQL23T8HCLS-00A07
// Capacity: 3.84 Tb
// Form Factor: U.2
// Seq. Read: 6.333 GiB/s
// Ran. Read: 1000k Iops
wget https://cdn.geekbench.com/Geekbench-5.4.1-Linux.tar.gz
tar xvfz Geekbench-5.4.1-Linux.tar.gz
./Geekbench-5.4.1-Linux/geekbench_x86_64
export async function* createAsyncGenerator<T>(
createResource: (opts: { send: (value: T) => void, close: () => void }) => void,
reconnect?: () => Promise<void>
): AsyncGenerator<T, void, undefined> {
while (true) {
let closed = false;
let buffer: T[] = [];
let bufferHasData = createEvent();
createResource({
import contextvars
import asyncio
from contextlib import asynccontextmanager
from typing import TypeVar
T = TypeVar("T")
@asynccontextmanager