Skip to content

Instantly share code, notes, and snippets.

@rodrigobaron
Last active February 12, 2018 13:59
Show Gist options
  • Save rodrigobaron/e37641913f8156b8eea73201c80841d8 to your computer and use it in GitHub Desktop.
Save rodrigobaron/e37641913f8156b8eea73201c80841d8 to your computer and use it in GitHub Desktop.
A simple POC of sentiment analisys using a small portuguese corpus
"""
A simple POC of sentiment analisys using a small portuguese corpus
- The corpus can be downloaded from here http://www.linguateca.pt/Repositorio/ReLi/ReLi-Lex.rar
please visit the page before download http://www.linguateca.pt/Repositorio/ReLi/
"""
from textblob import TextBlob
from textblob.classifiers import NaiveBayesClassifier
import os
import re
base_path = 'ReLi-Lex'
train = []
files = [os.path.join(base_path, f) for f in os.listdir(base_path)]
for file in files:
t = 'pos' if '_Positivos' in file else 'neg'
with open(file, 'r') as content_file:
content = content_file.read()
all = re.findall('\[.*?\]',content)
for w in all:
train.append((w[1:-1], t))
cl = NaiveBayesClassifier(train)
blob = TextBlob("A cerveja é boa. Mas a ressaca é horrível.", classifier=cl)
for s in blob.sentences:
print("[%s] %s" % (s.classify(), s))
#[pos] A cerveja é boa.
#[neg] Mas a ressaca é horrível.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment