I hereby claim:
- I am nempickaxe on github.
- I am ilaichi (https://keybase.io/ilaichi) on keybase.
- I have a public key ASC0peYsZX_Z7LwCfPjY9FJz_772TLP9XsoLON6QsTED-go
To claim this, I am signing this object:
import re | |
import nltk | |
import emoji | |
from nltk.tokenize import word_tokenize | |
def tokenize(corpus): | |
data = re.sub(r'[,!?;-]+', '.', corpus) | |
data = nltk.word_tokenize(data) # tokenize string to words | |
data = [ ch.lower() for ch in data | |
if ch.isalpha() |
import textwrap | |
import PIL | |
from PIL import ImageFont | |
from PIL import Image | |
from PIL import ImageDraw | |
def text2png(text, fullpath, color = "#000", bgcolor = "#FFF", fontfullpath = None, fontsize = 13, leftpadding = 3, rightpadding = 3, width = 2000): | |
REPLACEMENT_CHARACTER = '\uFFFD' | |
NEWLINE_REPLACEMENT_STRING = ' ' + REPLACEMENT_CHARACTER + ' ' |
def get_lower_tri_heatmap(df, output="cooc_matrix.png"): | |
mask = np.zeros_like(df, dtype=np.bool) | |
mask[np.triu_indices_from(mask)] = True | |
# Want diagonal elements as well | |
mask[np.diag_indices_from(mask)] = False | |
# Set up the matplotlib figure | |
f, ax = plt.subplots(figsize=(11, 9)) |
I hereby claim:
To claim this, I am signing this object:
import nltk | |
from nltk.tokenize import WordPunctTokenizer | |
from nltk.collocations import BigramCollocationFinder | |
from nltk.metrics import BigramAssocMeasures | |
from nltk.corpus import stopwords | |
nltk.download('stopwords') | |
from nltk.collocations import TrigramCollocationFinder | |
from nltk.metrics import TrigramAssocMeasures | |
from collections import Counter |
import dbm, os | |
import cPickle as pickle | |
from gensim.models import Word2Vec | |
import numpy as np | |
def save_model(model, directory): | |
model.init_sims() # making sure syn0norm is initialised | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
# Saving indexes as DBM'ed dictionary |