This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use natural language toolkit | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem.lancaster import LancasterStemmer | |
# word stemmer | |
stemmer = LancasterStemmer() | |
# 3 classes of training data | |
training_data = [] | |
training_data.append({"class":"greeting", "sentence":"how are you?"}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use natural language toolkit | |
import nltk | |
from nltk.stem.lancaster import LancasterStemmer | |
# word stemmer | |
stemmer = LancasterStemmer() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 3 classes of training data | |
training_data = [] | |
training_data.append({"class":"greeting", "sentence":"how are you?"}) | |
training_data.append({"class":"greeting", "sentence":"how is your day?"}) | |
training_data.append({"class":"greeting", "sentence":"good day"}) | |
training_data.append({"class":"greeting", "sentence":"how is it going today?"}) | |
training_data.append({"class":"goodbye", "sentence":"have a nice day"}) | |
training_data.append({"class":"goodbye", "sentence":"see you later"}) | |
training_data.append({"class":"goodbye", "sentence":"have a nice day"}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# capture unique stemmed words in the training corpus | |
corpus_words = {} | |
class_words = {} | |
# turn a list into a set (of unique items) and then a list again (this removes duplicates) | |
classes = list(set([a['class'] for a in training_data])) | |
for c in classes: | |
# prepare a list of words within each class | |
class_words[c] = [] | |
# loop through each sentence in our training data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# calculate a score for a given class | |
def calculate_class_score(sentence, class_name, show_details=True): | |
score = 0 | |
# tokenize each word in our new sentence | |
for word in nltk.word_tokenize(sentence): | |
# check to see if the stem of the word is in any of our classes | |
if stemmer.stem(word.lower()) in class_words[class_name]: | |
# treat each word with same weight | |
score += 1 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# we can now calculate a score for a new sentence | |
sentence = "good day for us to have lunch?" | |
# now we can find the class with the highest score | |
for c in class_words.keys(): | |
print ("Class: %s Score: %s \n" % (c, calculate_class_score(sentence, c))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# calculate a score for a given class taking into account word commonality | |
def calculate_class_score(sentence, class_name, show_details=True): | |
score = 0 | |
# tokenize each word in our new sentence | |
for word in nltk.word_tokenize(sentence): | |
# check to see if the stem of the word is in any of our classes | |
if stemmer.stem(word.lower()) in class_words[class_name]: | |
# treat each word with relative weight | |
score += (1 / corpus_words[stemmer.stem(word.lower())]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# return the class with highest score for sentence | |
def classify(sentence): | |
high_class = None | |
high_score = 0 | |
# loop through our classes | |
for c in class_words.keys(): | |
# calculate score of sentence for each class | |
score = calculate_class_score_commonality(sentence, c, show_details=False) | |
# keep track of highest score | |
if score > high_score: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NeuralNetwork(): | |
def __init__(self): | |
# Seed the random number generator, so it generates the same numbers | |
# every time the program runs. | |
random.seed(1) | |
# We model a single neuron, with 3 input connections and 1 output connection. | |
# We assign random weights to a 3 x 1 matrix, with values in the range -1 to 1 | |
# and mean 0. | |
self.synaptic_weights = 2 * random.random((3, 1)) - 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
# The training set. We have 4 examples, each consisting of 3 input values | |
# and 1 output value. | |
training_set_inputs = array([[0, 0, 1], [1, 1, 1], [1, 0, 1], [0, 1, 0]]) | |
training_set_outputs = array([[0, 1, 1, 0]]).T |
OlderNewer