Skip to content

Instantly share code, notes, and snippets.

View AntonOsika's full-sized avatar

Anton Osika AntonOsika

View GitHub Profile
import json
import os
def writeHTML(j, f):
f.write(j["fragment"]["bodyHTML"])
for comment in j["comments"]:
f.write(comment["bodyHTML"])
def main(args):
# Filter out a specific message type and store it to gcs as schemaless .jsonl and bq (which requires that schema does not change).
# Different stackdriver labels creates different partitions of the output.
# I recommend adding a version field to each log so that you can easily filter them to different sinks.
filter="jsonPayload.message_type=\"$1\""
PROJECT_NAME=my_proj
gsutil mb gs://$PROJECT_NAME-logs-$1
gcloud beta logging sinks create $1-to-gcs storage.googleapis.com/$PROJECT_NAME-logs-$1 --log-filter="$filter" --project=$PROJECT_NAME
@AntonOsika
AntonOsika / log_mutations.js
Last active June 5, 2020 18:44
Log all mutations to a site to understand what is going on
// Select the node that will be observed for mutations
// Options for the observer (which mutations to observe)
const config = {attributes: true, childList: true, subtree: true};
const htmlify = xs => xs && [...xs].map(x => x.innerHTML)
let counter = 0
// Callback function to execute when mutations are observed
const callback = function (mutationsList, observer) {
def generate_sequences(X, y, mask, batch_size=32, seed=0):
"""
Returns a generator of batched timeseries padded to the longest sequence in the batch,
using right zero padding.
Can be used directly with model.fit_generator if X-keys matches keras Input tensors.
Note that masks typically have 1 dimension less than labels.
Args:
X, list. Each element is a dictionary of 'feature_name': np.array
y, list. Each element is np.array of labels
import pandas as pd
import seaborn as sns
import matplotlib
pd.options.display.float_format = '{:,.2f}'.format
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 150
sns.set_style("whitegrid")
import bisect
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.validation import check_is_fitted
from sklearn.utils import column_or_1d
import numpy as np
class CategoricalTransform(LabelEncoder):
@AntonOsika
AntonOsika / practical_argparse.py
Last active May 14, 2019 11:21
Simple and powerful technique for defining command line arguments.
import argparse
def parsed_arguments(defaults: dict) -> argparse.Namespace:
"""
Sets all variables defined in default_args() as command line arguments.
(e.g. specify job_dir with: --job-dir [job_dir])
Args:
defaults: dict of default argument names and values. Any `_` will be replaced with `-`.
from __future__ import print_function
import datetime
def excluded_datetimes(start_str):
"""
Creates regex matching every datetime chronologically before start_str.
Can be used to exclude files/folders with e.g. rsync file downloads.
"""
@AntonOsika
AntonOsika / downloadURLs.py
Last active March 17, 2018 13:25
Download all HREFs from URL.
import urllib
import os
import re
##############################
# Downloads files for every link it finds.
# The URLs can be handpicked with regex fileURLs.
##############################