Skip to content

Instantly share code, notes, and snippets.

View ceshine's full-sized avatar

CeShine Lee ceshine

View GitHub Profile
class Model(nn.Module):
def __init__(self, nb_words, hidden_size=128, embedding_size=128, n_layers=1,
wdrop=0.25, edrop=0.1, idrop=0.25, batch_first=True):
super(Model, self).__init__()
# Modified LockedDropout that support batch first arrangement
self.lockdrop = LockedDropout(batch_first=batch_first)
self.idrop = idrop
self.edrop = edrop
self.n_layers = n_layers
self.embedding = nn.Embedding(nb_words, embedding_size)
checkpoint::checkpoint("2017-10-26")
pacman::p_load(data.table)
pacman::p_load(caret)
pacman::p_load(ggplot2)
set.seed(998)
mushrooms <- fread("mushrooms.csv", stringsAsFactors=T)
mushrooms[, .N, by=class]
mushrooms[, eval("veil-type") := NULL]
@ceshine
ceshine / sgd.py
Created December 6, 2017 01:48
PyTorch SGD implementation
# http://pytorch.org/docs/master/_modules/torch/optim/sgd.html#SGD
class SGD(Optimizer):
def __init__(self, params, lr=required, momentum=0, dampening=0,
weight_decay=0, nesterov=False):
# ...
def __setstate__(self, state):
# ...
@ceshine
ceshine / sgd_alt.py
Created December 6, 2017 01:51
Alternative SGD implementation
# Reference: http://pytorch.org/docs/master/_modules/torch/optim/sgd.html#SGD
class SGD(Optimizer):
def __init__(self, params, lr=required, momentum=0, dampening=0,
weight_decay=0, nesterov=False):
# ...
def __setstate__(self, state):
# ...
@ceshine
ceshine / tokenize.py
Last active January 21, 2018 23:20
tokenization for toxic comment dataset
""" Tested with Python 3.6 """
import re
import pandas as pd
import spacy
import joblib
from tqdm import tqdm
nlp = spacy.load('en')
@ceshine
ceshine / write_fasttext_format.py
Last active January 21, 2018 23:22
Prepare toxi comment dataset in fasttext format
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
LABELS = ["toxic", "severe_toxic", "obscene",
"threat", "insult", "identity_hate"]
EMPTY_ID = len(LABELS)
@ceshine
ceshine / toxic_dataset.py
Created January 24, 2018 04:05
A torchtext example
import re
import logging
import numpy as np
import pandas as pd
import spacy
import torch
from torchtext import data
NLP = spacy.load('en')
@ceshine
ceshine / create_folder.py
Created November 2, 2017 01:46
Create a folder if not exists with pathlib
import pathlib
output_folder = "tmp/folder"
pathlib.Path(output_folder).mkdir(parents=True, exist_ok=True)
@ceshine
ceshine / toxic_dataset_v2.py
Last active February 24, 2018 20:49
Improved dataset loader for Toxic Comment dataset from Kaggle
"""Improved dataset loader for Toxic Comment dataset from Kaggle
Tested against:
* Python 3.6
* Numpy 1.14.0
* Pandas 0.22.0
* PyTorch 0.4.0a0+f83ca63 (should be very close to 0.3.0)
* torchtext 0.2.1
* spacy 2.0.5
* joblib 0.11
"""
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
tf.set_random_seed(10)
# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])
is_training = tf.placeholder("bool")
# Define weights