Skip to content

Instantly share code, notes, and snippets.

@larsmans
larsmans / kmtransformer.py
Created July 14, 2013 21:12
k-means feature mapper for scikit-learn
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics.pairwise import rbf_kernel
class KMeansTransformer(BaseEstimator, TransformerMixin):
def __init__(self, centroids):
self.centroids = centroids
def fit(self, X, y=None):
return self
@larsmans
larsmans / csc_columnwise_max.pyx
Created March 11, 2013 22:36
Columnwise maximum of scipy.sparse.csc_matrix, in Cython
cimport numpy as np
def csc_columnwise_max(np.ndarray[np.float64_t, ndim=1] data,
np.ndarray[int, ndim=1] indices,
np.ndarray[int, ndim=1] indptr,
np.ndarray[np.float64_t, ndim=1] out):
cdef double mx
cdef int n_features = indptr.shape[0] - 1
cdef int i, j
@larsmans
larsmans / tocsv.py
Last active August 29, 2015 14:10
Brat-to-CSV converter
# Quick and dirty Brat-to-CSV conversion.
from __future__ import print_function
import csv
import io
import re
import sys
# copy server/src/{gtbtokenize,tokenise}.py from Brat
from tokenise import gtb_token_boundary_gen