def load_dataset():
"Load the sample dataset."
return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
def createC1(dataset):
"Create a list of candidate item sets of size one."
from numpy import loadtxt, where
from pylab import scatter, show, legend, xlabel, ylabel
#load the dataset
data = loadtxt('ex2data1.txt', delimiter=',')
X = data[:, 0:2]
y = data[:, 2]
pos = where(y == 1)
import re
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk import bigrams, trigrams
import math
stopwords = nltk.corpus.stopwords.words('portuguese')
def generateRules(L, support_data, min_confidence=0.7):
"""Create the association rules
L: list of frequent item sets
support_data: support data for those itemsets
min_confidence: minimum confidence threshold
rules = []
for i in range(1, len(L)):
for freqSet in L[i]:
def sigmoid(X):
'''Compute the sigmoid function '''
#d = zeros(shape=(X.shape))
den = 1.0 + e ** (-1.0 * X)
d = 1.0 / den
return d
This module represents the recommender system for recommending
new friends based on 'mutual friends'.
__author__ = 'Marcel Caraciolo <>'
from numpy import loadtxt, zeros, ones, array, linspace, logspace
from pylab import scatter, show, title, xlabel, ylabel, plot, contour
#Evaluate the linear regression
def compute_cost(X, y, theta):
Comput cost for linear regression
#Number of training samples
def predict(theta, X):
'''Predict whether the label
is 0 or 1 using learned logistic
regression parameters '''
m, n = X.shape
p = zeros(shape=(m, 1))
h = sigmoid(
for it in range(0, h.shape[0]):
from numpy import loadtxt, zeros, ones, array, linspace, logspace, mean, std, arange
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from pylab import plot, show, xlabel, ylabel
#Evaluate the linear regression
def feature_normalize(X):
Returns a normalized version of X where
#Author: Marcel Pinheiro Caraciolo
#Confusion Matrix Generator
#Version: 0.1
#email: caraciol at gmail . com
from pprint import pprint as _pretty_print
import math
class ConfusionMatrix(object):