Instantly share code, notes, and snippets.

View municipios_brasil.py
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup
from time import sleep
BASE_URL = "http://www.cidades.com.br/"
def make_soup(url):
html = urlopen(url).read()
return BeautifulSoup(html)
View linregr.py
#Evaluate the linear regression
def compute_cost(X, y, theta):
'''
Comput cost for linear regression
'''
#Number of training samples
m = y.size
predictions = X.dot(theta).flatten()
View Apriori.py
#-*- coding:utf-8 - *-
def load_dataset():
"Load the sample dataset."
return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
def createC1(dataset):
"Create a list of candidate item sets of size one."
View multlin.py
from numpy import loadtxt, zeros, ones, array, linspace, logspace, mean, std, arange
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from pylab import plot, show, xlabel, ylabel
#Evaluate the linear regression
def feature_normalize(X):
'''
Returns a normalized version of X where
View logistic_reg.py
from numpy import loadtxt, where, zeros, e, array, log, ones, append, linspace
from pylab import scatter, show, legend, xlabel, ylabel, contour, title
from scipy.optimize import fmin_bfgs
def sigmoid(X):
'''Compute the sigmoid function '''
#d = zeros(shape=(X.shape))
den = 1.0 + e ** (-1.0 * X)
View linregr.py
from numpy import loadtxt, zeros, ones, array, linspace, logspace
from pylab import scatter, show, title, xlabel, ylabel, plot, contour
#Evaluate the linear regression
def compute_cost(X, y, theta):
'''
Comput cost for linear regression
'''
#Number of training samples
View bmf25.py
# BM25F Model
def bm25(idf, tf, fl, avgfl, B, K1):
# idf - inverse document frequency
# tf - term frequency in the current document
# fl - field length in the current document
# avgfl - average field length across documents in collection
# B, K1 - free paramters
return idf * ((tf * (K1 + 1)) / (tf + K1 * (1 - B + B * (fl / avgfl))))
View log_regression.py
def sigmoid(X):
'''Compute the sigmoid function '''
#d = zeros(shape=(X.shape))
den = 1.0 + e ** (-1.0 * X)
d = 1.0 / den
return d
View log_regression.py
from numpy import loadtxt, where
from pylab import scatter, show, legend, xlabel, ylabel
#load the dataset
data = loadtxt('ex2data1.txt', delimiter=',')
X = data[:, 0:2]
y = data[:, 2]
pos = where(y == 1)
View tf_idf_final.py
#-*- coding: utf-8 -*-
import re
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk import bigrams, trigrams
import math
stopwords = nltk.corpus.stopwords.words('portuguese')