Skip to content

Instantly share code, notes, and snippets.

View bobchennan's full-sized avatar

cnx bobchennan

View GitHub Profile
@bobchennan
bobchennan / pytorch_reformer.py
Last active January 23, 2020 00:58 — forked from lucidrains/pytorch_reformer.py
reformer(pytorch)
import torch
import torch.nn as nn
import torch.nn.functional as F
# helpers
def make_unit_length(x, epsilon=1e-6):
norm = x.norm(p=2, dim=-1, keepdim=True)
return x.div(norm + epsilon)
def main(text_file, out_file):
with open(text_file) as f:
temp = f.readlines()
content = [x.strip() for x in temp]
with open(out_file, 'w') as f:
for i in content:
if i.split()[4] == 'bonafide':
f.write('%s %s\n' % (i.split()[0] + '-' + i.split()[1], i.split()[4]))
else:
@bobchennan
bobchennan / gels.py
Created March 15, 2019 18:49
GELS with deriviative in pytorch
import torch
from torch.autograd import Function
class GELS(Function):
""" Efficient implementation of gels from
Nanxin Chen
bobchennan@gmail.com
"""
@staticmethod
def forward(ctx, A, b):
@bobchennan
bobchennan / lda_test.py
Created April 5, 2018 20:12
test of memory usage for scikit-learn lda
import numpy as np
#from sklearn.discriminant_analysis import _class_cov,LinearDiscriminantAnalysis
from discriminant_analysis import _class_cov,LinearDiscriminantAnalysis
X = np.random.rand(1000000, 100)
y = np.random.randint(0, 20000, (1000000, ))
print len(np.unique(y))
#model=LinearDiscriminantAnalysis(solver='eigen')
#model.fit(X,y)
_class_cov(X, y, np.array([1.]*20000)/20000)
@bobchennan
bobchennan / ConcreteDropout.py
Last active May 23, 2017 18:02
ConcreteDropout for Paper https://arxiv.org/abs/1705.07832. The original code is in the pdf file.
import keras.backend as K
from keras import initializers
from keras.engine import InputSpec
from keras.layers import Dense, Lambda, Wrapper
class ConcreteDropout(Wrapper):
def __init__(self, layer, weight_regularizer=1e-6, dropout_regularizer=1e-5, **kwargs):
assert 'kernel_regularizer' not in kwargs
super(ConcreteDropout, self).__init__(layer, **kwargs)
# The data can be found in https://github.com/tdhopper/topic-modeling-datasets/blob/master/README.md
# The stop word lists can be found in https://github.com/stanfordnlp/CoreNLP/blob/master/data/edu/stanford/nlp/patterns/surface/stopwords.txt
# Data clean in http://www.mjdenny.com/Text_Processing_In_R.html
library(stringr)
library(gtools)
createDataFile = function(){
processFile = function(filepath) {
con = file(filepath, "r")
inAbstract = FALSE
@bobchennan
bobchennan / train.py
Last active March 15, 2017 14:25
keras sharedmem
from multiprocessing import Pipe, Process, Manager
from time import sleep
import sharedmem
batches = Manager().dict()
def train_process(model, num_batches, save_path, q):
cnt = 0
while True:
@bobchennan
bobchennan / ldapjs-example
Created March 16, 2012 15:26
ldapjs-example
var ldap = require('ldapjs'),
mysql = require("mysql"),
server = ldap.createServer(),
addrbooks = {}, userinfo = {},
ldap_port = 389,
basedn = "dc=example, dc=com",
company = "Example",
db = mysql.createClient({
user: "abook",
password: "abook",