Skip to content

Instantly share code, notes, and snippets.

@venuktan
venuktan / src_FileSystemCat.java
Created June 7, 2013 18:29
hadoop-venu-cloudwick
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.InputStream;
import java.net.URI;
/**
* Created with IntelliJ IDEA.
@venuktan
venuktan / Hdf5_getters.java
Created June 14, 2013 23:50
Hdf5getters foe/r all vars
/*
Thierry Bertin-Mahieux (2010) Columbia University
tb2332@columbia.edu
This code contains a set of getters functions to access the fields
from an HDF5 song file (regular file with one song or summary file
with many songs) in Java.
The goal is to reproduce the Python getters behaviour.
@venuktan
venuktan / gist:8910385
Last active December 15, 2020 13:17 — forked from dwf/gist:828099
Pull out a submatrix from a COO SciPy sparse matrix.
import scipy as S
def coo_submatrix_pull(matr, rows, cols):
"""
Pulls out an arbitrary i.e. non-contiguous submatrix out of
a sparse.coo_matrix.
"""
if type(matr) != S.sparse.coo_matrix:
raise TypeError('Matrix must be sparse COOrdinate format')
from __future__ import print_function
from sklearn.datasets import fetch_20newsgroups
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer
from sklearn import metrics
def splitData(df, trainPerc=0.6, cvPerc=0.2, testPerc=0.2):
"""
return: training, cv, test
(as pandas dataframes)
params:
df: pandas dataframe
trainPerc: float | percentage of data for trainin set (default=0.6
cvPerc: float | percentage of data for cross validation set (default=0.2)
testPerc: float | percentage of data for test set (default=0.2)
(trainPerc + cvPerc + testPerc must equal 1.0)
import numpy as np
def Markov(p, s, steps):
for i in range(steps):
s = s * p
print s
return s
p = np.matrix('.5, .5, 0, 0, 0, 0; .4, .1, .5, 0, 0, 0; 0, .3, .2, .5, 0, 0; 0, 0, .2, .3, .5, 0; 0, 0, 0, .1, .4, .5; 0, 0, 0, 0, 0, 1')
s = np.matrix('1, 0, 0, 0, 0, 0')
@venuktan
venuktan / left_right_outer_join_dict.py
Last active October 7, 2015 20:40
python left outer join and right outer join of dictonaries
d1={"foo": 3, "baz": -1, "bar": 5}
d2={"foo": 3, "ven": 10, "bar": 5}
d_1_2= dict(list(d1.items()) + list(d2.items()))
dict([[k, d2.get(k, 0)] for k in d_1_2] )#right outer join
dict([[k, d1.get(k, 0)] for k in d_1_2] )#left outer join
import numpy as np
def sigmoid(x):
return 1.0/(1.0 + np.exp(-x))
def sigmoid_prime(x):
return sigmoid(x)*(1.0-sigmoid(x))
def tanh(x):
return np.tanh(x)
@venuktan
venuktan / python_error_printing.py
Created January 22, 2016 19:53
python_error_printing
def PrintException():
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
linecache.checkcache(filename)
line = linecache.getline(filename, lineno, f.f_globals)
print 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)
import numpy
from nearpy import Engine
from nearpy.hashes import RandomBinaryProjections, HashPermutationMapper, HashPermutations
from nearpy.distances import CosineDistance
from nearpy.filters.nearestfilter import NearestFilter
from nearpy.storage import RedisStorage
import glob
import time
from redis import Redis