I hereby claim:
- I am nempickaxe on github.
- I am ilaichi (https://keybase.io/ilaichi) on keybase.
- I have a public key ASC0peYsZX_Z7LwCfPjY9FJz_772TLP9XsoLON6QsTED-go
To claim this, I am signing this object:
import dbm, os | |
import cPickle as pickle | |
from gensim.models import Word2Vec | |
import numpy as np | |
def save_model(model, directory): | |
model.init_sims() # making sure syn0norm is initialised | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
# Saving indexes as DBM'ed dictionary |
import nltk | |
from nltk.tokenize import WordPunctTokenizer | |
from nltk.collocations import BigramCollocationFinder | |
from nltk.metrics import BigramAssocMeasures | |
from nltk.corpus import stopwords | |
nltk.download('stopwords') | |
from nltk.collocations import TrigramCollocationFinder | |
from nltk.metrics import TrigramAssocMeasures | |
from collections import Counter |
I hereby claim:
To claim this, I am signing this object:
def get_lower_tri_heatmap(df, output="cooc_matrix.png"): | |
mask = np.zeros_like(df, dtype=np.bool) | |
mask[np.triu_indices_from(mask)] = True | |
# Want diagonal elements as well | |
mask[np.diag_indices_from(mask)] = False | |
# Set up the matplotlib figure | |
f, ax = plt.subplots(figsize=(11, 9)) |
import textwrap | |
import PIL | |
from PIL import ImageFont | |
from PIL import Image | |
from PIL import ImageDraw | |
def text2png(text, fullpath, color = "#000", bgcolor = "#FFF", fontfullpath = None, fontsize = 13, leftpadding = 3, rightpadding = 3, width = 2000): | |
REPLACEMENT_CHARACTER = '\uFFFD' | |
NEWLINE_REPLACEMENT_STRING = ' ' + REPLACEMENT_CHARACTER + ' ' |
import re | |
import nltk | |
import emoji | |
from nltk.tokenize import word_tokenize | |
def tokenize(corpus): | |
data = re.sub(r'[,!?;-]+', '.', corpus) | |
data = nltk.word_tokenize(data) # tokenize string to words | |
data = [ ch.lower() for ch in data | |
if ch.isalpha() |
import re | |
import yaml | |
def parse_config(vars_dict, path=None, data=None, tag='!ENV'): | |
""" | |
Load a yaml configuration file and resolve any environment variables | |
The environment variables must have !ENV before them and be in this format | |
to be parsed: $<VAR_NAME>. | |
E.g.: | |
database: |
def get_interval(space_list, width): | |
for i in range(len(space_list)-1): | |
if space_list[i+1]>width: | |
return space_list[i] | |
else: | |
continue | |
return space_list[-1] | |
def get_subtracted_list(space_list, width): | |
return list(map(lambda x: int(((x-width)+abs(x-width))/2), space_list)) |
import sys | |
from types import ModuleType, FunctionType | |
from gc import get_referents | |
# Custom objects know their class. | |
# Function objects seem to know way too much, including modules. | |
# Exclude modules as well. | |
BLACKLIST = type, ModuleType, FunctionType | |
def read_mongo_collection(uri, pipeline=None, given_schema=None, spark=None): | |
""" | |
:param uri: uri for mongo connection | |
:param pipeline: pipeline option for pushing queries to mongo | |
:param given_schema: schema option, will read in mentioned schema | |
:return: dataframe after reading from mongo | |
""" | |
if pipeline: | |
if not given_schema: | |
return spark.read.format("com.mongodb.spark.sql.DefaultSource").option("pipeline", pipeline).option( |