Skip to content

Instantly share code, notes, and snippets.

@viveksck
viveksck / gist:64bfb3fbd7937bd11778705f94140e34
Created March 17, 2019 01:22 — forked from seanjtaylor/gist:568141f04a16d518be24
Reshaping a Pandas dataframe into a sparse matrix
import pandas as pd
import scipy.sparse as sps
df = pd.DataFrame({'tag1': ['sean', 'udi', 'bogdan'], 'tag2': ['sean', 'udi', 'udi'], 'freq': [1,2,3]})
# tag1 -> rows, tag2 -> columns
df.set_index(['tag1', 'tag2'], inplace=True)
mat = sps.coo_matrix((df.freq, (df.index.labels[0], df.index.labels[1])))
print(mat.todense())
@viveksck
viveksck / pd_latex_table.py
Created February 9, 2018 22:14 — forked from tbrittoborges/pd_latex_table.py
Pandas recipe for better latex tables
def better_table(table, caption, name):
start = r"""
\begin{{table}}[!htb]
\sisetup{{round-mode=places, round-precision=2}}
\caption{{{}}}\label{{table:{}}}
\centering
""".format(caption, name)
end = r"\end{table}"
@viveksck
viveksck / fractal-dimension.py
Created March 14, 2017 13:07 — forked from rougier/fractal-dimension.py
Fractal dimension computing
# -----------------------------------------------------------------------------
# From https://en.wikipedia.org/wiki/Minkowski–Bouligand_dimension:
#
# In fractal geometry, the Minkowski–Bouligand dimension, also known as
# Minkowski dimension or box-counting dimension, is a way of determining the
# fractal dimension of a set S in a Euclidean space Rn, or more generally in a
# metric space (X, d).
# -----------------------------------------------------------------------------
import scipy.misc
import numpy as np
"""
(C) Mathieu Blondel - 2010
License: BSD 3 clause
Implementation of the collapsed Gibbs sampler for
Latent Dirichlet Allocation, as described in
Finding scientifc topics (Griffiths and Steyvers)
"""
from pyspark import SparkContext
import numpy as np
from sklearn.cross_validation import train_test_split, Bootstrap
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
def run(sc):
@viveksck
viveksck / bootstrap.py
Last active August 29, 2015 14:07 — forked from johnb30/bootstrap.py
from __future__ import division
import numpy as np
import pandas as pd
import random
def sample(data):
sample = [random.choice(data) for _ in xrange(len(data))]
return sample
def bootstrap_t_test(treatment, control, nboot = 1000, direction = "less"):
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
"""
Three ways of computing the Hellinger distance between two discrete
probability distributions using NumPy and SciPy.
"""
import numpy as np
from scipy.linalg import norm
from scipy.spatial.distance import euclidean
import random
class Markov(object):
def __init__(self, open_file):
self.cache = {}
self.open_file = open_file
self.words = self.file_to_words()
self.word_size = len(self.words)
self.database()
from mincepie import mapreducer, launcher
import gflags
import glob
import leargist
import numpy as np
import os
from PIL import Image
import uuid
# constant value