Skip to content

Instantly share code, notes, and snippets.

@mazieres
mazieres / ngrams.py
Last active November 15, 2015 07:08
def ngrams(sequence, depth):
seq = '^' + sequence + '*'
res = []
while depth > 0:
i, j = 0, depth
while j <= len(seq):
res.append(seq[i:j])
i += 1
j += 1
depth -= 1
# This script implements the methodology described in Chap. 7 of
# Mateos, Pablo. "Names, Ethnicity and Populations". Springer, 2014.
# For bipartite (fornames, surnames) graph projection for unsupervised
# learning of names ethnicity.
# It uses Louvain instead of FastCommunity however.
# Works fine with pypy.
#
# by Antoine Mazières (http://mazier.es ; {github|twitter}@mazieres)
# Cortext Lab -- http://www.cortext.net/
import unittest
import networkx as nx
class TestExtract(unittest.TestCase):
def test_graph_from_adj_mat(self):
X = np.array([
[(0,), (1,), (2,)],
[(1,), (0,), (0,)],
[(2,), (0,), (0,)]], dtype=[('weight', '<i8')])
import unittest
class TestExtract(unittest.TestCase):
def test_mk_sparse(self):
# https://en.wikipedia.org/wiki/Sparse_matrix#Dictionary_of_keys_.28DOK.29
raw = {'A': {'x', 'y', 'z'}, 'B': {'w', 'y'}}
expected = {('A', 'x'): 1, ('B', 'y'): 1, ('A', 'z'): 1, ('A', 'y'): 1, ('B', 'w'): 1}
tested = mk_sparse(raw)
msg = '\nExpected:\n{}\nGot:\n{}'.format(expected, tested)
self.assertEqual(expected, tested, msg=msg)
def symmetrize(a):
# http://stackoverflow.com/a/2573982/1565438
return a + a.T - numpy.diag(a.diagonal())
def point_inside_polygon(coord, poly):
'''
http://www.ariel.com.au/a/python-point-int-poly.html
'''
x, y = coord
n = len(poly)
inside =False
p1x,p1y = poly[0]
for i in range(n+1):
p2x,p2y = poly[i % n]
import unittest
class TestExtract(unittest.TestCase):
def test_adjacency_matrix(self):
X = np.array([
[1, 8, 3],
[5, 0, 0],
[0, 4, 2]])
tested = adjacency_matrix(X)
import pandas as pd
import numpy as np
def wannabe_projection(df):
'''
https://stats.stackexchange.com/questions/142132/is-this-a-valid-method-for-unipartite-projection-of-a-bipartite-graph
'''
n_samples = df.shape[0]
res = np.zeros((n_samples, n_samples))
#!/usr/bin/env python
# by @mazieres for cortext.fr
import sqlite3
import sys
import os
from collections import defaultdict
# PATH to the DB downloaded from cortext
@mazieres
mazieres / osetg.py
Created October 3, 2014 08:56
Ordered set python generator
def osetg(seq, idfun=None):
# Ordered set generator
# <http://www.peterbe.com/plog/uniqifiers-benchmark>
if idfun is None:
def idfun(x): return x
seen = {}
for item in seq:
marker = idfun(item)
if marker in seen: continue
seen[marker] = 1