This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
(C) August 2013, Mathieu Blondel | |
# License: BSD 3 clause | |
Custom group support by Vlad Niculae (vlad@vene.ro) | |
This is a Numba-based reimplementation of the block coordinate descent solver | |
(without line search) described in the paper: | |
Block Coordinate Descent Algorithms for Large-scale Sparse Multiclass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from collections import OrderedDict | |
import numpy as np | |
from sklearn.base import BaseEstimator, TransformerMixin | |
class LexicalSetVectorizer(BaseEstimator, TransformerMixin): | |
def __init__(self, word_sets=None, normalize=False, lower=False, | |
token_pattern=ur'(?u)\b\w\w+\b'): | |
self.word_sets = word_sets | |
self.normalize = normalize |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.grid_search import GridSearchCV | |
from sklearn.pipeline import make_pipeline | |
from sklearn.dummy import DummyClassifier | |
from sklearn.cross_validation import LeaveOneOut | |
docs = ["the cat lives in the hat", "the quick brown fox jumps over a dog", | |
"a clockwork orange"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author: Vlad Niculae <vlad@vene.ro> | |
# Licence: BSD | |
from __future__ import division, print_function | |
import numpy as np | |
from sklearn.utils import check_random_state | |
class SquaredLoss(object): | |
def loss(self, y, pred): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Lemmatize CONLL-style (tabular) POS-tagged file using Treex | |
# Prerequisites: cpan -i -f Treex::Tool::EnglishMorpho::Lemmatizer | |
# (I think the -f is needed because some tests are failing) | |
# Usage example: | |
# $ echo "1\tgoes\t_\tVBZ\n" > example | |
# $ <example ./lemmatize.pl | |
# 1 goes go VBZ | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
The full tagger software package is licensed as GPL version 2. | |
src/ -- All original code we've written -- the files in src/ with one | |
exception below -- we license under the Apache License version 2.0. However, | |
we have several GPL'd dependencies that we include in this package, which, | |
as we understand it, force the full package to be GPL. | |
src/cmu/arktweetnlp/impl/OWLQN.java -- is licensed GPL, originally from the | |
Stanford POS Tagger version 2010-05-26. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
deaths = [596577, 142942, 73831, 41374, 39518, 21176, 7683, 6849] | |
money = [54.1, 7, 4.2, 257.85, 3.2, 147, 14, 22.9] | |
names = ["Heart disease", "COPS", "Diabetes", "Breast cancer", | |
"Suicide", "Prostate cancer", "HIV/AIDS", "Motor neuron disease"] | |
sns.set_style("white") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author: Vlad Niculae <vlad@vene.ro> | |
# License: 2-clause BSD | |
"""2D implementation of the robust Siegel Repeated Median slope estimator | |
This estimator tolerates corruption of up to 50% of the input points in either | |
the X or the Y dimension. | |
Vectorized implementation, and a naive implementation for sanity-check. | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Newton interpolation and numerical differentiation | |
Created on Sun Apr 10 01:22:46 2011 | |
@author: vene | |
""" | |
from __future__ import division | |
from copy import copy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
import matplotlib.pylab as pl | |
from sklearn.svm import SVR | |
from sklearn.metrics import mean_squared_error | |
X = np.array([[13.], # This is dataset no. 3 from Anscombe's quartet. | |
[10.], # I moved the outlier to the first position for | |
[8.], # prettier code. This toy dataset illustrates |
OlderNewer