This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def point_inside_polygon(coord, poly): | |
''' | |
http://www.ariel.com.au/a/python-point-int-poly.html | |
''' | |
x, y = coord | |
n = len(poly) | |
inside =False | |
p1x,p1y = poly[0] | |
for i in range(n+1): | |
p2x,p2y = poly[i % n] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def symmetrize(a): | |
# http://stackoverflow.com/a/2573982/1565438 | |
return a + a.T - numpy.diag(a.diagonal()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
class TestExtract(unittest.TestCase): | |
def test_mk_sparse(self): | |
# https://en.wikipedia.org/wiki/Sparse_matrix#Dictionary_of_keys_.28DOK.29 | |
raw = {'A': {'x', 'y', 'z'}, 'B': {'w', 'y'}} | |
expected = {('A', 'x'): 1, ('B', 'y'): 1, ('A', 'z'): 1, ('A', 'y'): 1, ('B', 'w'): 1} | |
tested = mk_sparse(raw) | |
msg = '\nExpected:\n{}\nGot:\n{}'.format(expected, tested) | |
self.assertEqual(expected, tested, msg=msg) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import networkx as nx | |
class TestExtract(unittest.TestCase): | |
def test_graph_from_adj_mat(self): | |
X = np.array([ | |
[(0,), (1,), (2,)], | |
[(1,), (0,), (0,)], | |
[(2,), (0,), (0,)]], dtype=[('weight', '<i8')]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script implements the methodology described in Chap. 7 of | |
# Mateos, Pablo. "Names, Ethnicity and Populations". Springer, 2014. | |
# For bipartite (fornames, surnames) graph projection for unsupervised | |
# learning of names ethnicity. | |
# It uses Louvain instead of FastCommunity however. | |
# Works fine with pypy. | |
# | |
# by Antoine Mazières (http://mazier.es ; {github|twitter}@mazieres) | |
# Cortext Lab -- http://www.cortext.net/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ngrams(sequence, depth): | |
seq = '^' + sequence + '*' | |
res = [] | |
while depth > 0: | |
i, j = 0, depth | |
while j <= len(seq): | |
res.append(seq[i:j]) | |
i += 1 | |
j += 1 | |
depth -= 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xmllint --xpath '//*[@PostTypeId="1"]/@Tags' Posts.xml | sed 's/" Tags="/\n/g' | grep 'machine-learning' | sed 's/<\|>/;/g' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
min_freq = 1 | |
min_weight = 3 | |
min_degree = 2 | |
G.remove_edges_from([(u, v, d) for u, v, d in G.edges(data=True) | |
if d['weight'] <= min_weight]) | |
G.remove_nodes_from([n for n, d in G.nodes(data=True) | |
if d['freq'] <= min_freq or | |
len(G[n]) <= min_degree]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# by @mazieres | |
from itertools import permutations | |
from collections import defaultdict | |
import pandas as pd | |
import numpy as np | |
from matplotlib import pyplot as plt | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def get_imports(script): | |
res = [] | |
patt = re.compile('^(?:import|from).*$', re.MULTILINE) | |
raw_imports = re.findall(patt, script) | |
if raw_imports == []: | |
return None | |
for imp in raw_imports: | |
if '#' in imp: |