Lars larsmans

## nonogram.pl
/*
 * Nonogram/paint-by-numbers solver in SWI-Prolog. Uses CLP(FD),
 * in particular the automaton/3 (finite-state/RE) constraint.
 * Copyright 2011, 2014 Lars Buitinck
 * Copyright 2014 Markus Triska
 * Do with this code as you like, but don't remove the copyright notice.
 */

:- use_module(library(clpfd)).

## heaps.erl
% Copyright (c) 2010-2014, Lars Buitinck
% May be used, redistributed and modified under the terms of the
% GNU Lesser General Public License (LGPL), version 2.1 or later

% Heaps/priority queues in Erlang

% Heaps are data structures that return the entries inserted into them in
% sorted order. This makes them the data structure of choice for implementing
% priority queues, a central element of algorithms such as best-first/A*
% search and Kruskal's minimum-spanning-tree algorithm.

## hellinger.py
"""
Three ways of computing the Hellinger distance between two discrete
probability distributions using NumPy and SciPy.
"""

import numpy as np
from scipy.linalg import norm
from scipy.spatial.distance import euclidean


## gist:3745866
>>> from pandas import DataFrame
>>> from sklearn.feature_extraction.text import CountVectorizer
>>> docs = ["You can catch more flies with honey than you can with vinegar.",
...         "You can lead a horse to water, but you can't make him drink."]
>>> vect = CountVectorizer(min_df=0., max_df=1.0)
>>> X = vect.fit_transform(docs)
>>> print(DataFrame(X.A, columns=vect.get_feature_names()).to_string())
   but  can  catch  drink  flies  him  honey  horse  lead  make  more  than  to  vinegar  water  with  you
0    0    2      1      0      1    0      1      0     0     0     1     1   0        1      0     2    2
1    1    2      0      1      0    1      0      1     1     1     0     0   1        0      1     0    2

## kmeans.py
#!/usr/bin/python
#
# K-means clustering using Lloyd's algorithm in pure Python.
# Written by Lars Buitinck. This code is in the public domain.
#
# The main program runs the clustering algorithm on a bunch of text documents
# specified as command-line arguments. These documents are first converted to
# sparse vectors, represented as lists of (index, value) pairs.

from collections import defaultdict

## csc_columnwise_max.pyx
cimport numpy as np


def csc_columnwise_max(np.ndarray[np.float64_t, ndim=1] data,
                       np.ndarray[int, ndim=1] indices,
                       np.ndarray[int, ndim=1] indptr,
                       np.ndarray[np.float64_t, ndim=1] out):
    cdef double mx
    cdef int n_features = indptr.shape[0] - 1
    cdef int i, j

## kmtransformer.py
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics.pairwise import rbf_kernel


class KMeansTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, centroids):
        self.centroids = centroids

    def fit(self, X, y=None):
        return self

## nearest_developers.py
import numpy as np
import os
import sys

from collections import defaultdict
from git import Repo
from scipy.sparse import csc_matrix

path = sys.argv[1]
extensions = [".py", ".pyx", ".pxd"]

## ubuntu-to-mint.sh
# Updated version of Jeff Shaffner's instructions,
# http://jeffshaffner.wordpress.com/2012/09/27/how-to-convert-ubuntu-12-04-to-linux-mint-13/
# Run with "sudo sh ubuntu-to-mint.sh".

set -e
set -x

apt-get update && apt-get --yes upgrade

add-apt-repository "deb http://packages.linuxmint.com/ olivia \

## README
Sentiment analysis experiment using scikit-learn
================================================

The script sentiment.py reproduces the sentiment analysis approach from Pang,
Lee and Vaithyanathan (2002), who tried to classify movie reviews as positive
or negative, with three differences:

* tf-idf weighting is applied to terms
* the three-fold cross validation split is different
* regularization is tuned by cross validation
	/*
	* Nonogram/paint-by-numbers solver in SWI-Prolog. Uses CLP(FD),
	* in particular the automaton/3 (finite-state/RE) constraint.
	* Copyright 2011, 2014 Lars Buitinck
	* Copyright 2014 Markus Triska
	* Do with this code as you like, but don't remove the copyright notice.
	*/

	:- use_module(library(clpfd)).
	% Copyright (c) 2010-2014, Lars Buitinck
	% May be used, redistributed and modified under the terms of the
	% GNU Lesser General Public License (LGPL), version 2.1 or later

	% Heaps/priority queues in Erlang

	% Heaps are data structures that return the entries inserted into them in
	% sorted order. This makes them the data structure of choice for implementing
	% priority queues, a central element of algorithms such as best-first/A*
	% search and Kruskal's minimum-spanning-tree algorithm.
	"""
	Three ways of computing the Hellinger distance between two discrete
	probability distributions using NumPy and SciPy.
	"""

	import numpy as np
	from scipy.linalg import norm
	from scipy.spatial.distance import euclidean
	>>> from pandas import DataFrame
	>>> from sklearn.feature_extraction.text import CountVectorizer
	>>> docs = ["You can catch more flies with honey than you can with vinegar.",
	... "You can lead a horse to water, but you can't make him drink."]
	>>> vect = CountVectorizer(min_df=0., max_df=1.0)
	>>> X = vect.fit_transform(docs)
	>>> print(DataFrame(X.A, columns=vect.get_feature_names()).to_string())
	but can catch drink flies him honey horse lead make more than to vinegar water with you
	0 0 2 1 0 1 0 1 0 0 0 1 1 0 1 0 2 2
	1 1 2 0 1 0 1 0 1 1 1 0 0 1 0 1 0 2
	#!/usr/bin/python
	#
	# K-means clustering using Lloyd's algorithm in pure Python.
	# Written by Lars Buitinck. This code is in the public domain.
	#
	# The main program runs the clustering algorithm on a bunch of text documents
	# specified as command-line arguments. These documents are first converted to
	# sparse vectors, represented as lists of (index, value) pairs.

	from collections import defaultdict
	cimport numpy as np


	def csc_columnwise_max(np.ndarray[np.float64_t, ndim=1] data,
	np.ndarray[int, ndim=1] indices,
	np.ndarray[int, ndim=1] indptr,
	np.ndarray[np.float64_t, ndim=1] out):
	cdef double mx
	cdef int n_features = indptr.shape[0] - 1
	cdef int i, j
	from sklearn.base import BaseEstimator, TransformerMixin
	from sklearn.metrics.pairwise import rbf_kernel


	class KMeansTransformer(BaseEstimator, TransformerMixin):
	def __init__(self, centroids):
	self.centroids = centroids

	def fit(self, X, y=None):
	return self
	import numpy as np
	import os
	import sys

	from collections import defaultdict
	from git import Repo
	from scipy.sparse import csc_matrix

	path = sys.argv[1]
	extensions = [".py", ".pyx", ".pxd"]
	# Updated version of Jeff Shaffner's instructions,
	# http://jeffshaffner.wordpress.com/2012/09/27/how-to-convert-ubuntu-12-04-to-linux-mint-13/
	# Run with "sudo sh ubuntu-to-mint.sh".

	set -e
	set -x

	apt-get update && apt-get --yes upgrade

	add-apt-repository "deb http://packages.linuxmint.com/ olivia \
	Sentiment analysis experiment using scikit-learn
	================================================

	The script sentiment.py reproduces the sentiment analysis approach from Pang,
	Lee and Vaithyanathan (2002), who tried to classify movie reviews as positive
	or negative, with three differences:

	* tf-idf weighting is applied to terms
	* the three-fold cross validation split is different
	* regularization is tuned by cross validation