Skip to content

Instantly share code, notes, and snippets.

train, test = train_test_split(data, test_size = 0.3)
cols = train.columns[:-1]
gnb = MultinomialNB()
gnb.fit(train[cols], train['sentiment'])
y_pred = gnb.predict(test[cols])
print("Number of mislabeled points out of a total {} points : {}, performance {:05.2f}%"
.format(
pos_reviews = data[data['sentiment'] == 1]
neg_reviews = data[data['sentiment'] == 0]
pnum = np.array(pos_reviews[pos_reviews.columns].sum())
nnum = np.array(neg_reviews[ntg_reviews.columns].sum())
dif = pnum > nnum
word_matrix = []
for i in lemmatized: word_matrix.append([1 if j in i else 0 for j in top5000])
features = pd.DataFrame(word_matrix, columns = top5000, index = pd.DataFrame(filtered_tokens))
features['sentiment'] = data['sentiment'].values
from operator import itemgetter
from collections import Counter
flat_list = [i for sublist in filtered_tokens for i in sublist]
# Count how many times each word appears
count = Counter(flat_list).items()
sorted_count = sorted(count, key = itemgetter(1))
sorted_count.reverse()
en_stopwords = list(set(nltk.corpus.stopwords.words('english')))
# remove punctuation from data
clean = [re.sub(r'[^\w\s]','',i).lower() for i in data]
tokens = [word_tokenize(x) for x in data['text']]
filtered_tokens = []
# tokens that are not stopwords collected here
for i in tokens:
data = pd.DataFrame(columns=['text', 'sentiment'])
for id in movie_reviews.fileids():
text = ' '.join(movie_reviews.words(id))
sentiment = 1 if movie_reviews.categories(id) == 'pos' else 0
data = data.append(pd.DataFrame({'text': text,'sentiment': sentiment}, index=[0]))
import nltk
nltk.download('all')
import regex as re
import pandas as pd
from sklearn.utils import shuffle
from nltk import LancasterStemmer
from nltk.tokenize import word_tokenize
from nltk.corpus import movie_reviews, stopwords
from sklearn.naive_bayes import MultinomialNB
@MarynaLongnickel
MarynaLongnickel / ball
Created April 6, 2018 15:01
rolling ball animation
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
p = np.linspace(-np.pi/2,np.pi/2,10)
x = np.sin(p)
v = np.column_stack((np.concatenate((x,x)),np.concatenate((np.cos(p),-np.cos(p))),[1]*len(p)*2))