Instantly share code, notes, and snippets.

@zyxue
zyxue / SRNN-vs-LSTM
Created Apr 17, 2018
Train a single-neuron RNN to compare performance of vanilla RNN and LSTM on information latching
View SRNN-vs-LSTM
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense, SimpleRNN
N = 10000
num_repeats = 30
num_epochs = 5
# sequence length options
View spark-custom-aggregator
import scala.collection.mutable.Map
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.sql.Encoder
import org.apache.spark.sql.Encoders
import spark.implicits._
import org.apache.spark.sql.types._
@zyxue
zyxue / grid-cv
Last active Sep 27, 2017
demonstration of sklearn GridSearchCV spawning multiple threads on linux
View grid-cv
# related SF question: https://stackoverflow.com/questions/46351157/why-gridsearchcv-in-scikit-learn-spawn-so-many-threads
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
Cs = 10 ** np.arange(-2, 2, 0.1)
@zyxue
zyxue / eda_imports.py
Last active Dec 4, 2018
python EDA imports
View eda_imports.py
import os
import io
import gzip
import time
import sys
import glob
import json
import re
import csv
import datetime
View execute.py
def execute(cmd, flag_file=None, msg_id='', debug=False):
"""
# http://stackoverflow.com/questions/1606795/catching-stdout-in-realtime-from-subprocess
:param cmd: should never inlcude pipe or redirection, which would requires
a new shell process
This execute logs all stdout and stderr, which could look funny, especially
when it comes to tools like aspc and wget
"""
logger.info('executing: {0}'.format(cmd))
# todo: should check whether cmdsp includes pipe or redirection here