Skip to content

Instantly share code, notes, and snippets.

View antondevv's full-sized avatar

Anton Franzen antondevv

View GitHub Profile
@antondevv
antondevv / clean.py
Created November 3, 2021 10:29
import
import string
import re
@antondevv
antondevv / punct.py
Last active November 3, 2021 10:35
remove punct
remove_punctuation = re.compile('[%s]' % re.escape(string.punctuation))
tokens = [remove_punctuation.sub('', w) for w in tokenized]
@antondevv
antondevv / nltk.py
Created November 3, 2021 10:48
nltk
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
tokens = [w for w in tokens if w not in stop_words]
@antondevv
antondevv / stemmer.py
Created November 3, 2021 10:56
porterstemmer
from nltk.stem.porter import PorterStemmer
porter = PorterStemmer()
stemmed_words = [porter.stem(word) for word in tokens]
@antondevv
antondevv / counter.py
Created November 3, 2021 11:02
Counter
from collections import Counter
vocably = Counter()
vocably.update(stemmed_words)
@antondevv
antondevv / vector.py
Created November 3, 2021 11:42
vector
indexing_words = {}
i = 0
for word in vocably:
indexing_words[word] = i
i +=1
vector = np.zeros(len(vocably))
for key, times in vocably.items():
vector[indexing_words[key]] = times
@antondevv
antondevv / vocab.py
Created November 3, 2021 23:30
Create Vocabilary
unique_words = list(set(stemmed_words))
@antondevv
antondevv / vector.py
Created November 4, 2021 00:55
Vector
vector = np.zeros(len(unique_words))
@antondevv
antondevv / bag_of_words.py
Created November 4, 2021 01:05
bag of words
vector = np.zeros(len(unique_words))
for w in stemmed_words:
for i, word in enumerate(unique_words):
if w == word:
vector[i] +=1
import string
import re
from nltk.corpus import stopwords
from collections import Counter
from nltk.stem.porter import PorterStemmer
import numpy as np
text = 'What a beautiful day to be outside, incredibly beautiful day!'
text = text.lower()