Skip to content

Instantly share code, notes, and snippets.

@denzilc
denzilc / test_sparse_SVC_convergence.py
Created May 22, 2011 21:53 — forked from agramfort/test_linear_svc.py
Non convergence of sparse SVC
from pprint import pprint
import numpy as np
from scipy import sparse
from scikits.learn.grid_search import GridSearchCV
from scikits.learn.cross_val import StratifiedKFold
from scikits.learn.metrics import f1_score, classification_report
from scikits.learn import svm
from scikits.learn.linear_model import LogisticRegression
from scikits.learn.linear_model.sparse import LogisticRegression as SparseLogisticRegression
@denzilc
denzilc / SparseLinearSVC_vs_SparseSVC.py
Created May 23, 2011 19:37
High difference in classifier accuracies with LinearSVC and SVC
import numpy as np
import cPickle as pickle
from scipy import sparse
from scikits.learn.preprocessing.sparse import Normalizer
from scikits.learn import svm
from scikits.learn.grid_search import GridSearchCV
from scikits.learn.metrics.metrics import f1_score, classification_report,\
confusion_matrix, precision_recall_fscore_support
from scikits.learn.cross_val import StratifiedKFold
@denzilc
denzilc / twitter_oauth.py
Created June 6, 2011 15:07
Twitter Oauth requests
'''
Created on Jun 6, 2011
@author: denzilc
'''
import oauth2 as oauth
import urlparse
import time
@denzilc
denzilc / gradientDescent.m
Created November 1, 2011 21:57
Gradient Descent for the Machine Learning course at Stanford
function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
%GRADIENTDESCENT Performs gradient descent to learn theta
% theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by
% taking num_iters gradient steps with learning rate alpha
% Initialize some useful values
m = length(y); % number of training examples
J_history = zeros(num_iters, 1);
theta_len = length(theta);
@denzilc
denzilc / computeCost.m
Created November 1, 2011 22:00
Compute Cost
function J = computeCost(X, y, theta)
%COMPUTECOST Compute cost for linear regression
% J = COMPUTECOST(X, y, theta) computes the cost of using theta as the
% parameter for linear regression to fit the data points in X and y
% Initialize some useful values
m = length(y); % number of training examples
% You need to return the following variables correctly
J = 0;
@denzilc
denzilc / featureNormalize.m
Created November 2, 2011 12:08
Feature Normalization
function [X_norm, mu, sigma] = featureNormalize(X)
%FEATURENORMALIZE Normalizes the features in X
% FEATURENORMALIZE(X) returns a normalized version of X where
% the mean value of each feature is 0 and the standard deviation
% is 1. This is often a good preprocessing step to do when
% working with learning algorithms.
% You need to set these values correctly
X_norm = X;
mu = zeros(1, size(X, 2));
@denzilc
denzilc / computeCostMulti.m
Created November 2, 2011 19:04
Compute Cost Multiple Variables
function J = computeCostMulti(X, y, theta)
%COMPUTECOSTMULTI Compute cost for linear regression with multiple variables
% J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the
% parameter for linear regression to fit the data points in X and y
% Initialize some useful values
m = length(y); % number of training examples
% You need to return the following variables correctly
J = 0;
@denzilc
denzilc / nnCostFunction.m
Created November 12, 2011 15:44
Neural Network Cost Function
function [J grad] = nnCostFunction(nn_params, ...
input_layer_size, ...
hidden_layer_size, ...
num_labels, ...
X, y, lambda)
%NNCOSTFUNCTION Implements the neural network cost function for a two layer
%neural network which performs classification
% [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
% X, y, lambda) computes the cost and gradient of the neural network. The
% parameters for the neural network are "unrolled" into the vector
@denzilc
denzilc / SpamLord.py
Created March 24, 2012 21:47
NLP-PA1
import sys
import os
import re
import pprint
my_first_pat = "(?:[a-zA-Z]+[\w+\.\-]+[\-a-zA-Z]+)[ ]*(?:@|\bat\b|\(at\)|\bwhere\b|\b@\b)[ ]*(?:(?:(?:[\-a-zA-Z]+)[ ]*(?:\.|\ |\bdot\b|\bdom\b|\bdt\b)[ ]*(?:[\-a-zA-Z]+)[ ]*(?:\.|\ |\bdot\b|\bdom\b)[ ]*(?:[\-a-zA-Z\ ]+))|(?:(?:(?:[\-a-zA-Z]+)[ ]*(?:\.|\ |\bdot\b|\bdom\b|\bdt\b)[ ]*(?:[\-a-zA-Z\ ]+))))"
def striphtml(data):
return re.sub(r'<[^>]*?>', ' ', data)
@denzilc
denzilc / FeatureFactory.py
Created April 10, 2012 20:46 — forked from anonymous/FeatureFactory.py
NLP PA4 Greedy Search
import json, sys, collections
import base64
from Datum import Datum
class FeatureData :
def __init__(self, words, prevLabel, position,
wordsDict):
self.w = words;
self.pL = prevLabel