Skip to content

Instantly share code, notes, and snippets.

@zyxue
zyxue / lock_demo.py
Last active August 19, 2021 16:11
python multiprocessing lock example
import multiprocessing
import multiprocessing.synchronize
import random
import time
def print_func(process_index: int) -> None:
for x in "hello world":
print(x, end='', flush=True)
@zyxue
zyxue / FlinkToy.java
Created September 15, 2020 05:02
flink-DataStream-toy-example
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
@zyxue
zyxue / utils.py
Last active July 2, 2019 17:32
cheatsheet utils.py
import os
import time
import logging
from functools import update_wrapper
logging.basicConfig(
level=logging.DEBUG, format='%(asctime)s|%(levelname)s|%(message)s')
@zyxue
zyxue / init_new_venv.sh
Last active April 24, 2019 02:32
install common packages in a newly created conda env
conda fgcreate -n venv ipython
source activate venv
pip install \
pandas \
scipy \
jupyter \
jupyterlab \
matplotlib \
# https://github.com/wookayin/gpustat
watch --color -n1.0 gpustat -u -p -P
@zyxue
zyxue / SRNN-vs-LSTM
Created April 17, 2018 19:51
Train a single-neuron RNN to compare performance of vanilla RNN and LSTM on information latching
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense, SimpleRNN
N = 10000
num_repeats = 30
num_epochs = 5
# sequence length options
import scala.collection.mutable.Map
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.sql.Encoder
import org.apache.spark.sql.Encoders
import spark.implicits._
import org.apache.spark.sql.types._
@zyxue
zyxue / grid-cv
Last active September 27, 2017 15:14
demonstration of sklearn GridSearchCV spawning multiple threads on linux
# related SF question: https://stackoverflow.com/questions/46351157/why-gridsearchcv-in-scikit-learn-spawn-so-many-threads
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
Cs = 10 ** np.arange(-2, 2, 0.1)
@zyxue
zyxue / eda_imports.py
Last active October 7, 2019 19:52
python EDA imports
import os
import io
import gzip
import time
import sys
import glob
import json
import re
import csv
import datetime
def execute(cmd, flag_file=None, msg_id='', debug=False):
"""
# http://stackoverflow.com/questions/1606795/catching-stdout-in-realtime-from-subprocess
:param cmd: should never inlcude pipe or redirection, which would requires
a new shell process
This execute logs all stdout and stderr, which could look funny, especially
when it comes to tools like aspc and wget
"""
logger.info('executing: {0}'.format(cmd))
# todo: should check whether cmdsp includes pipe or redirection here