Skip to content

Instantly share code, notes, and snippets.

@vipmax
Last active November 23, 2020 14:47
Show Gist options
  • Save vipmax/27800a7beca430adf37fef49b292a07a to your computer and use it in GitHub Desktop.
Save vipmax/27800a7beca430adf37fef49b292a07a to your computer and use it in GitHub Desktop.
import sys
reload(sys) # Reload does the trick!
sys.setdefaultencoding('UTF8')
import tarfile
# extract dictionary package
tar = tarfile.open("polyglot_data/sentiment2/ru/ru.sent.pkl.tar.bz2")
tar.extractall()
tar.close()
# read all new words
import io
new_words = []
new_words_values = []
new_data_file = io.open('project/data.csv','r', encoding='utf8')
for line in new_data_file.readlines():
l = line.split(',')
new_words.append(l[0])
new_words_values.append([int(l[1].replace('\n',''))])
for i in range(len(new_words)):
print new_words[i], new_words_values[i]
# open dictionary
import pickle
f = open('data/tmp/sentiment/ru/ru.sent.pkl', 'r')
dictionary_words, dictionary_words_values = pickle.load(f)
for i in range(len(dictionary_words)):
print dictionary_words[i], dictionary_words_values[i]
dictionary_words = list(dictionary_words)
dictionary_words_values = list(dictionary_words_values)
# add new words to dictionary
for i in range(len(new_words)):
if new_words[i] not in dictionary_words:
print 'adding ', new_words[i], new_words_values[i]
dictionary_words.extend([new_words[i]])
dictionary_words_values.extend([new_words_values[i]])
else: print 'already exist word =', new_words[i]
# write new dictionary to pickle file
result = (dictionary_words, dictionary_words_values)
with open('data/tmp/sentiment/ru/ru.sent.pkl', 'w+') as f:
pickle.dump(result, f)
# add dictionary to package
import tarfile
tar = tarfile.open("polyglot_data/sentiment2/ru/ru.sent.pkl.tar.bz2", "w")
tar.add("data/tmp/sentiment/ru/ru.sent.pkl")
tar.close()
sudo apt-get install python-pip python-dev python-numpy libicu-dev -y
sudo pip install polyglot
polyglot download sentiment2.en
polyglot download sentiment2.ru
import sys
reload(sys) # Reload does the trick!
sys.setdefaultencoding('UTF8')
from polyglot.text import Text
text = Text("Нужно построить зиккурат")
print("{:<16}{}".format("Word", "Polarity")+"\n"+"-"*30)
for w in text.words:
print("{:<16}{:>2}".format(w, w.polarity))
# -*- coding: utf-8 -*-
from polyglot.text import Text
text = Text(u"Нужно построить зиккурат")
print("{:<16}{}".format("Word", "Polarity")+"\n"+"-"*30)
for w in text.words:
print("{:<16}{:>2}".format(w.encode('utf-8'), w.polarity))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment