Antoine Mazières mazieres

## point_inside_polygon.py
def point_inside_polygon(coord, poly):
    '''
    http://www.ariel.com.au/a/python-point-int-poly.html
    '''
    x, y = coord
    n = len(poly)
    inside =False
    p1x,p1y = poly[0]
    for i in range(n+1):
        p2x,p2y = poly[i % n]

## symmetries_matrix.py
def symmetrize(a):
    # http://stackoverflow.com/a/2573982/1565438
    return a + a.T - numpy.diag(a.diagonal())

## mk_sparse.py
import unittest

class TestExtract(unittest.TestCase):
    def test_mk_sparse(self):
        # https://en.wikipedia.org/wiki/Sparse_matrix#Dictionary_of_keys_.28DOK.29
        raw = {'A': {'x', 'y', 'z'}, 'B': {'w', 'y'}}
        expected = {('A', 'x'): 1, ('B', 'y'): 1, ('A', 'z'): 1, ('A', 'y'): 1, ('B', 'w'): 1}
        tested = mk_sparse(raw)
        msg = '\nExpected:\n{}\nGot:\n{}'.format(expected, tested)
        self.assertEqual(expected, tested, msg=msg)

## graph_from_adjacency_mat.py
import unittest
import networkx as nx

class TestExtract(unittest.TestCase):

    def test_graph_from_adj_mat(self):
        X = np.array([
            [(0,), (1,), (2,)],
            [(1,), (0,), (0,)],
            [(2,), (0,), (0,)]], dtype=[('weight', '<i8')])

## gist:8391e0e40918185afad5
# This script implements the methodology described in Chap. 7 of
# Mateos, Pablo. "Names, Ethnicity and Populations". Springer, 2014.
# For bipartite (fornames, surnames) graph projection for unsupervised
# learning of names ethnicity.
# It uses Louvain instead of FastCommunity however.
# Works fine with pypy.
#
# by Antoine Mazières (http://mazier.es ; {github|twitter}@mazieres)
# Cortext Lab -- http://www.cortext.net/

## ngrams.py
def ngrams(sequence, depth):
    seq = '^' + sequence + '*'
    res = []
    while depth > 0:
        i, j = 0, depth
        while j <= len(seq):
            res.append(seq[i:j])
            i += 1
            j += 1
        depth -= 1

## stack xml onliner.sh
xmllint --xpath '//*[@PostTypeId="1"]/@Tags' Posts.xml | sed 's/" Tags="/\n/g' | grep 'machine-learning' | sed 's/&lt;\|&gt;/;/g'

## filter_graph.py
min_freq = 1
min_weight = 3
min_degree = 2

G.remove_edges_from([(u, v, d) for u, v, d in G.edges(data=True)
                     if d['weight'] <= min_weight])
G.remove_nodes_from([n for n, d in G.nodes(data=True)
                     if d['freq'] <= min_freq or
                     len(G[n]) <= min_degree])

## cooc.py
#!/usr/bin/env python
# by @mazieres
from itertools import permutations
from collections import defaultdict

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt


## extract_imports.py
import re

def get_imports(script):
    res = []
    patt = re.compile('^(?:import|from).*$', re.MULTILINE)
    raw_imports = re.findall(patt, script)
    if raw_imports == []:
        return None
    for imp in raw_imports:
        if '#' in imp:
	def point_inside_polygon(coord, poly):
	'''
	http://www.ariel.com.au/a/python-point-int-poly.html
	'''
	x, y = coord
	n = len(poly)
	inside =False
	p1x,p1y = poly[0]
	for i in range(n+1):
	p2x,p2y = poly[i % n]
	def symmetrize(a):
	# http://stackoverflow.com/a/2573982/1565438
	return a + a.T - numpy.diag(a.diagonal())
	import unittest

	class TestExtract(unittest.TestCase):
	def test_mk_sparse(self):
	# https://en.wikipedia.org/wiki/Sparse_matrix#Dictionary_of_keys_.28DOK.29
	raw = {'A': {'x', 'y', 'z'}, 'B': {'w', 'y'}}
	expected = {('A', 'x'): 1, ('B', 'y'): 1, ('A', 'z'): 1, ('A', 'y'): 1, ('B', 'w'): 1}
	tested = mk_sparse(raw)
	msg = '\nExpected:\n{}\nGot:\n{}'.format(expected, tested)
	self.assertEqual(expected, tested, msg=msg)
	import unittest
	import networkx as nx

	class TestExtract(unittest.TestCase):

	def test_graph_from_adj_mat(self):
	X = np.array([
	[(0,), (1,), (2,)],
	[(1,), (0,), (0,)],
	[(2,), (0,), (0,)]], dtype=[('weight', '<i8')])
	# This script implements the methodology described in Chap. 7 of
	# Mateos, Pablo. "Names, Ethnicity and Populations". Springer, 2014.
	# For bipartite (fornames, surnames) graph projection for unsupervised
	# learning of names ethnicity.
	# It uses Louvain instead of FastCommunity however.
	# Works fine with pypy.
	#
	# by Antoine Mazières (http://mazier.es ; {github\|twitter}@mazieres)
	# Cortext Lab -- http://www.cortext.net/
	def ngrams(sequence, depth):
	seq = '^' + sequence + '*'
	res = []
	while depth > 0:
	i, j = 0, depth
	while j <= len(seq):
	res.append(seq[i:j])
	i += 1
	j += 1
	depth -= 1
	min_freq = 1
	min_weight = 3
	min_degree = 2

	G.remove_edges_from([(u, v, d) for u, v, d in G.edges(data=True)
	if d['weight'] <= min_weight])
	G.remove_nodes_from([n for n, d in G.nodes(data=True)
	if d['freq'] <= min_freq or
	len(G[n]) <= min_degree])
	#!/usr/bin/env python
	# by @mazieres
	from itertools import permutations
	from collections import defaultdict

	import pandas as pd
	import numpy as np
	from matplotlib import pyplot as plt
	import re

	def get_imports(script):
	res = []
	patt = re.compile('^(?:import\|from).*$', re.MULTILINE)
	raw_imports = re.findall(patt, script)
	if raw_imports == []:
	return None
	for imp in raw_imports:
	if '#' in imp: