Skip to content

Instantly share code, notes, and snippets.

View ceshine's full-sized avatar

CeShine Lee ceshine

View GitHub Profile
@ceshine
ceshine / sgd_alt.py
Created December 6, 2017 01:51
Alternative SGD implementation
# Reference: http://pytorch.org/docs/master/_modules/torch/optim/sgd.html#SGD
class SGD(Optimizer):
def __init__(self, params, lr=required, momentum=0, dampening=0,
weight_decay=0, nesterov=False):
# ...
def __setstate__(self, state):
# ...
@ceshine
ceshine / tokenize.py
Last active January 21, 2018 23:20
tokenization for toxic comment dataset
""" Tested with Python 3.6 """
import re
import pandas as pd
import spacy
import joblib
from tqdm import tqdm
nlp = spacy.load('en')
@ceshine
ceshine / write_fasttext_format.py
Last active January 21, 2018 23:22
Prepare toxi comment dataset in fasttext format
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
LABELS = ["toxic", "severe_toxic", "obscene",
"threat", "insult", "identity_hate"]
EMPTY_ID = len(LABELS)
@ceshine
ceshine / toxic_dataset.py
Created January 24, 2018 04:05
A torchtext example
import re
import logging
import numpy as np
import pandas as pd
import spacy
import torch
from torchtext import data
NLP = spacy.load('en')
@ceshine
ceshine / toxic_dataset_v2.py
Last active February 24, 2018 20:49
Improved dataset loader for Toxic Comment dataset from Kaggle
"""Improved dataset loader for Toxic Comment dataset from Kaggle
Tested against:
* Python 3.6
* Numpy 1.14.0
* Pandas 0.22.0
* PyTorch 0.4.0a0+f83ca63 (should be very close to 0.3.0)
* torchtext 0.2.1
* spacy 2.0.5
* joblib 0.11
"""
@ceshine
ceshine / tvl.R
Last active April 6, 2018 06:17
Talent vs Luck simulation
library(checkpoint)
checkpoint("2018-02-25")
library(ggplot2)
# number of people
N <- 1000
# probability of event interception
P_E <- 0.075
# probability of lucky event
P_L <- 0.5
@ceshine
ceshine / causal_conv1d.py
Created April 2, 2018 01:03
Causal Convolution 1D
import tensorflow as tf
class CausalConv1D(tf.layers.Conv1D):
def __init__(self, filters,
kernel_size,
strides=1,
dilation_rate=1,
activation=None,
use_bias=True,
kernel_initializer=None,
@ceshine
ceshine / temporal_block.py
Last active November 25, 2018 15:03
Temporal Block (for TCNs)
class TemporalBlock(tf.layers.Layer):
def __init__(self, n_outputs, kernel_size, strides, dilation_rate, dropout=0.2,
trainable=True, name=None, dtype=None,
activity_regularizer=None, **kwargs):
super(TemporalBlock, self).__init__(
trainable=trainable, dtype=dtype,
activity_regularizer=activity_regularizer,
name=name, **kwargs
)
self.dropout = dropout
@ceshine
ceshine / tcn.py
Created April 2, 2018 02:23
Temporal Convolutional Networks
class TemporalConvNet(tf.layers.Layer):
def __init__(self, num_channels, kernel_size=2, dropout=0.2,
trainable=True, name=None, dtype=None,
activity_regularizer=None, **kwargs):
super(TemporalConvNet, self).__init__(
trainable=trainable, dtype=dtype,
activity_regularizer=activity_regularizer,
name=name, **kwargs
)
self.layers = []
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
tf.set_random_seed(10)
# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])
is_training = tf.placeholder("bool")
# Define weights