Vlad Niculae vene

## nls_solvers.ipynb

      
              1 file
            
          
              0 forks
            
          
              2 comments
            
          
              3 stars
            
          
                vene
                / nls_solvers.ipynb
            
            
              Last active
              April 21, 2024 16:54
            
              
                Non-negative least squares in Python
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## lang_sim.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                vene
                / lang_sim.ipynb
            
            
              Last active
              December 30, 2015 22:09
            
              
                Simple language similarity with character n-grams
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## sparse_multiclass_numba.py
"""
(C) August 2013, Mathieu Blondel
# License: BSD 3 clause

Custom group support by Vlad Niculae (vlad@vene.ro)

This is a Numba-based reimplementation of the block coordinate descent solver
(without line search) described in the paper:

    Block Coordinate Descent Algorithms for Large-scale Sparse Multiclass

## lexical_set_vectorizer.py
import re
from collections import OrderedDict
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin

class LexicalSetVectorizer(BaseEstimator, TransformerMixin):
    def __init__(self, word_sets=None, normalize=False, lower=False,
                 token_pattern=ur'(?u)\b\w\w+\b'):
        self.word_sets = word_sets
        self.normalize = normalize

## vect.py
from __future__ import print_function

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.dummy import DummyClassifier
from sklearn.cross_validation import LeaveOneOut

docs = ["the cat lives in the hat", "the quick brown fox jumps over a dog",
        "a clockwork orange"]

## lbfgs_l1logistic.py
"""l-bfgs-b L1-Logistic Regression solver"""

# Author: Vlad Niculae <vlad@vene.ro>
# Suggested by Mathieu Blondel

from __future__ import division, print_function

import numpy as np
from scipy.optimize import fmin_l_bfgs_b

## matrix_completion.py
# Author: Vlad Niculae <vlad@vene.ro>
# Licence: BSD

from __future__ import division, print_function
import numpy as np
from sklearn.utils import check_random_state


class SquaredLoss(object):
    def loss(self, y, pred):

## lemmatize.pl
#!/usr/bin/env perl

# Lemmatize CONLL-style (tabular) POS-tagged file using Treex
# Prerequisites: cpan -i -f Treex::Tool::EnglishMorpho::Lemmatizer
#   (I think the -f is needed because some tests are failing)
# Usage example:
#  $ echo "1\tgoes\t_\tVBZ\n" > example
#  $ <example ./lemmatize.pl
#  1    goes    go    VBZ
#

## LICENSE
The full tagger software package is licensed as GPL version 2.

src/ -- All original code we've written -- the files in src/ with one
exception below -- we license under the Apache License version 2.0. However,
we have several GPL'd dependencies that we include in this package, which,
as we understand it, force the full package to be GPL.

src/cmu/arktweetnlp/impl/OWLQN.java -- is licensed GPL, originally from the
Stanford POS Tagger version 2010-05-26.

## campaign_death.py
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

deaths = [596577, 142942, 73831, 41374, 39518, 21176, 7683, 6849]
money = [54.1, 7, 4.2, 257.85, 3.2, 147, 14, 22.9]
names = ["Heart disease", "COPS", "Diabetes", "Breast cancer",
         "Suicide", "Prostate cancer", "HIV/AIDS", "Motor neuron disease"]

sns.set_style("white")
	"""
	(C) August 2013, Mathieu Blondel
	# License: BSD 3 clause

	Custom group support by Vlad Niculae (vlad@vene.ro)

	This is a Numba-based reimplementation of the block coordinate descent solver
	(without line search) described in the paper:

	Block Coordinate Descent Algorithms for Large-scale Sparse Multiclass
	import re
	from collections import OrderedDict
	import numpy as np
	from sklearn.base import BaseEstimator, TransformerMixin

	class LexicalSetVectorizer(BaseEstimator, TransformerMixin):
	def __init__(self, word_sets=None, normalize=False, lower=False,
	token_pattern=ur'(?u)\b\w\w+\b'):
	self.word_sets = word_sets
	self.normalize = normalize
	from __future__ import print_function

	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.grid_search import GridSearchCV
	from sklearn.pipeline import make_pipeline
	from sklearn.dummy import DummyClassifier
	from sklearn.cross_validation import LeaveOneOut

	docs = ["the cat lives in the hat", "the quick brown fox jumps over a dog",
	"a clockwork orange"]
	"""l-bfgs-b L1-Logistic Regression solver"""

	# Author: Vlad Niculae <vlad@vene.ro>
	# Suggested by Mathieu Blondel

	from __future__ import division, print_function

	import numpy as np
	from scipy.optimize import fmin_l_bfgs_b
	# Author: Vlad Niculae <vlad@vene.ro>
	# Licence: BSD

	from __future__ import division, print_function
	import numpy as np
	from sklearn.utils import check_random_state


	class SquaredLoss(object):
	def loss(self, y, pred):
	#!/usr/bin/env perl

	# Lemmatize CONLL-style (tabular) POS-tagged file using Treex
	# Prerequisites: cpan -i -f Treex::Tool::EnglishMorpho::Lemmatizer
	# (I think the -f is needed because some tests are failing)
	# Usage example:
	# $ echo "1\tgoes\t_\tVBZ\n" > example
	# $ <example ./lemmatize.pl
	# 1 goes go VBZ
	#
	The full tagger software package is licensed as GPL version 2.

	src/ -- All original code we've written -- the files in src/ with one
	exception below -- we license under the Apache License version 2.0. However,
	we have several GPL'd dependencies that we include in this package, which,
	as we understand it, force the full package to be GPL.

	src/cmu/arktweetnlp/impl/OWLQN.java -- is licensed GPL, originally from the
	Stanford POS Tagger version 2010-05-26.
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns

	deaths = [596577, 142942, 73831, 41374, 39518, 21176, 7683, 6849]
	money = [54.1, 7, 4.2, 257.85, 3.2, 147, 14, 22.9]
	names = ["Heart disease", "COPS", "Diabetes", "Breast cancer",
	"Suicide", "Prostate cancer", "HIV/AIDS", "Motor neuron disease"]

	sns.set_style("white")