Skip to content

Instantly share code, notes, and snippets.

@zyxue
zyxue / FlinkToy.java
Created Sep 15, 2020
flink-DataStream-toy-example
View FlinkToy.java
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
@zyxue
zyxue / utils.py
Last active Jul 2, 2019
cheatsheet utils.py
View utils.py
import os
import time
import logging
from functools import update_wrapper
logging.basicConfig(
level=logging.DEBUG, format='%(asctime)s|%(levelname)s|%(message)s')
@zyxue
zyxue / init_new_venv.sh
Last active Apr 24, 2019
install common packages in a newly created conda env
View init_new_venv.sh
conda fgcreate -n venv ipython
source activate venv
pip install \
pandas \
scipy \
jupyter \
jupyterlab \
matplotlib \
View gpustat.sh
# https://github.com/wookayin/gpustat
watch --color -n1.0 gpustat -u -p -P
@zyxue
zyxue / SRNN-vs-LSTM
Created Apr 17, 2018
Train a single-neuron RNN to compare performance of vanilla RNN and LSTM on information latching
View SRNN-vs-LSTM
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense, SimpleRNN
N = 10000
num_repeats = 30
num_epochs = 5
# sequence length options
View spark-custom-aggregator
import scala.collection.mutable.Map
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.sql.Encoder
import org.apache.spark.sql.Encoders
import spark.implicits._
import org.apache.spark.sql.types._
@zyxue
zyxue / grid-cv
Last active Sep 27, 2017
demonstration of sklearn GridSearchCV spawning multiple threads on linux
View grid-cv
# related SF question: https://stackoverflow.com/questions/46351157/why-gridsearchcv-in-scikit-learn-spawn-so-many-threads
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
Cs = 10 ** np.arange(-2, 2, 0.1)
@zyxue
zyxue / eda_imports.py
Last active Oct 7, 2019
python EDA imports
View eda_imports.py
import os
import io
import gzip
import time
import sys
import glob
import json
import re
import csv
import datetime
View execute.py
def execute(cmd, flag_file=None, msg_id='', debug=False):
"""
# http://stackoverflow.com/questions/1606795/catching-stdout-in-realtime-from-subprocess
:param cmd: should never inlcude pipe or redirection, which would requires
a new shell process
This execute logs all stdout and stderr, which could look funny, especially
when it comes to tools like aspc and wget
"""
logger.info('executing: {0}'.format(cmd))
# todo: should check whether cmdsp includes pipe or redirection here