Skip to content

Instantly share code, notes, and snippets.

View conradlee's full-sized avatar

Conrad Lee conradlee

View GitHub Profile
@conradlee
conradlee / edgelist2pajek.py
Created November 1, 2011 16:48
Converts edgelists to pajek files. Works for very large files.
#!/usr/bin/env python
import os
import sys
import subprocess
import optparse
import tempfile
# Special feature: can convert files so large that they
# don't fit in memory. Works for weighted/unweighted,
# directed/undirected edges.
@conradlee
conradlee / simple_edgelist_parser.py
Created November 4, 2011 11:15
An example edgelist parser
def read_edgelist(in_filename):
with open(in_filename) as f:
return [edge for edge in edge_generator(f)]
def edge_generator(f):
for line in f:
n1, n2, weight = line.rstrip("\n").split()
yield int(n1), int(n2), float(weight)
@conradlee
conradlee / mmap_edgelist_parser.py
Created November 4, 2011 11:31
Edgelist parser using python and numpy's mmap
import numpy
import subprocess
weighted_edge_dtype = [("n1", numpy.uint32),("n2", numpy.uint32),("weight", numpy.float64)]
def convert_edgelist_to_mmap(in_filename):
# First determine number of edges because we will need to
# pre-allocate memmap object and that action requires a size
# Use unix's wc (WordCount) to count lines because it is
@conradlee
conradlee / clique_percolation.py
Created November 5, 2011 19:50
Clique percolation in Python using NetworkX
import networkx as nx
from itertools import combinations
def get_percolated_cliques(G, k):
perc_graph = nx.Graph()
cliques = list(frozenset(c) for c in nx.find_cliques(G) if len(c) >= k)
perc_graph.add_nodes_from(cliques)
# Add an edge in the clique graph for each pair of cliques that percolate
for c1, c2 in combinations(cliques, 2):
@conradlee
conradlee / clique_percolation_indexed.py
Created November 5, 2011 19:56
Clique percolation in Python using NetworkX (with indexing)
import networkx as nx
from collections import defaultdict
def get_percolated_cliques(G, k):
perc_graph = nx.Graph()
cliques = [frozenset(c) for c in nx.find_cliques(G) if len(c) >= k]
perc_graph.add_nodes_from(cliques)
# First index which nodes are in which cliques
membership_dict = defaultdict(list)
@conradlee
conradlee / clique_percolation_networkx.py
Created November 5, 2011 20:37
K-Clique Percolation with Networkx (with docstring, doctest)
import networkx as nx
from collections import defaultdict
from itertools import combinations
def get_percolated_cliques(G, k, cliques=None):
"""
Finds k-percolated cliques in G, e.g,
Unless the cliques argument evaluates to True, this algorithm
first enumerates all cliques in G. These are stored in memory,
@conradlee
conradlee / mean_shift.py
Created November 18, 2011 14:01
Mean shift in python
import numpy as np
from sklearn.neighbors import BallTree
from sklearn.utils import extmath
# For the full-blown implementation, see www.scikit-learn.org
def mean_shift(X, bandwidth, seeds, kernel_update_function, max_iterations=300):
n_points, n_features = X.shape
stop_thresh = 1e-3 * bandwidth # when mean has converged
cluster_centers = []
@conradlee
conradlee / kernel_update_functions.py
Created November 18, 2011 14:14
Mean shift kernel update functions
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
def gaussian_kernel_update(x, points, bandwidth):
distances = euclidean_distances(points, x)
weights = np.exp(-1 * (distances ** 2 / bandwidth ** 2))
return np.sum(points * weights, axis=0) / np.sum(weights)
def flat_kernel_update(x, points, bandwidth):
return np.mean(points, axis=0)
@conradlee
conradlee / bin_points.py
Created November 18, 2011 14:27
Bin (discretize) data points for seeding mean shift clustering method
import numpy as np
from collections import defaultdict
def bin_points(X, bin_size, min_bin_freq):
bin_sizes = defaultdict(int)
for point in X:
binned_point = np.cast[np.int32](point / bin_size)
bin_sizes[tuple(binned_point)] += 1
bin_seeds = np.array([point for point, freq in bin_sizes.iteritems() if freq >= min_bin_freq], dtype=np.float32)
@conradlee
conradlee / mysql-utf8.txt
Created December 1, 2011 13:52
UTF8 Settings in mysq
mysql> show variables like '%character%';
+--------------------------+----------------------------+
| Variable_name | Value |
+--------------------------+----------------------------+
| character_set_client | utf8 |
| character_set_connection | utf8 |
| character_set_database | utf8 |
| character_set_filesystem | binary |
| character_set_results | utf8 |
| character_set_server | utf8 |