Nem_pickaxe Nempickaxe

## preprocessing_steps.py
import re
import nltk
import emoji
from nltk.tokenize import word_tokenize

def tokenize(corpus):
    data = re.sub(r'[,!?;-]+', '.', corpus)
    data = nltk.word_tokenize(data)  # tokenize string to words
    data = [ ch.lower() for ch in data
             if ch.isalpha()

## text2png.py
import textwrap
import PIL
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw

def text2png(text, fullpath, color = "#000", bgcolor = "#FFF", fontfullpath = None, fontsize = 13, leftpadding = 3, rightpadding = 3, width = 2000):
    REPLACEMENT_CHARACTER = '\uFFFD'
    NEWLINE_REPLACEMENT_STRING = ' ' + REPLACEMENT_CHARACTER + ' '

## create_lower_triangle_heatmap.py
def get_lower_tri_heatmap(df, output="cooc_matrix.png"):
    mask = np.zeros_like(df, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True

    # Want diagonal elements as well
    mask[np.diag_indices_from(mask)] = False

    # Set up the matplotlib figure
    f, ax = plt.subplots(figsize=(11, 9))

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                Nempickaxe
                / keybase.md
            
            
              Created
              May 26, 2019 06:22
            
              
                keybase public key
              
          
    Keybase proof

I hereby claim:

I am nempickaxe on github.
I am ilaichi (https://keybase.io/ilaichi) on keybase.
I have a public key ASC0peYsZX_Z7LwCfPjY9FJz_772TLP9XsoLON6QsTED-go

To claim this, I am signing this object:

  
## get_grams.py
import nltk
from nltk.tokenize import WordPunctTokenizer
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.collocations import TrigramCollocationFinder
from nltk.metrics import TrigramAssocMeasures

from collections import Counter

## word2vec.py
import dbm, os
import cPickle as pickle
from gensim.models import Word2Vec
import numpy as np

def save_model(model, directory):
    model.init_sims() # making sure syn0norm is initialised
    if not os.path.exists(directory):
        os.makedirs(directory)
    # Saving indexes as DBM'ed dictionary
	import re
	import nltk
	import emoji
	from nltk.tokenize import word_tokenize

	def tokenize(corpus):
	data = re.sub(r'[,!?;-]+', '.', corpus)
	data = nltk.word_tokenize(data) # tokenize string to words
	data = [ ch.lower() for ch in data
	if ch.isalpha()
	import textwrap
	import PIL
	from PIL import ImageFont
	from PIL import Image
	from PIL import ImageDraw

	def text2png(text, fullpath, color = "#000", bgcolor = "#FFF", fontfullpath = None, fontsize = 13, leftpadding = 3, rightpadding = 3, width = 2000):
	REPLACEMENT_CHARACTER = '\uFFFD'
	NEWLINE_REPLACEMENT_STRING = ' ' + REPLACEMENT_CHARACTER + ' '
	def get_lower_tri_heatmap(df, output="cooc_matrix.png"):
	mask = np.zeros_like(df, dtype=np.bool)
	mask[np.triu_indices_from(mask)] = True

	# Want diagonal elements as well
	mask[np.diag_indices_from(mask)] = False

	# Set up the matplotlib figure
	f, ax = plt.subplots(figsize=(11, 9))
	import nltk
	from nltk.tokenize import WordPunctTokenizer
	from nltk.collocations import BigramCollocationFinder
	from nltk.metrics import BigramAssocMeasures
	from nltk.corpus import stopwords
	nltk.download('stopwords')
	from nltk.collocations import TrigramCollocationFinder
	from nltk.metrics import TrigramAssocMeasures

	from collections import Counter
	import dbm, os
	import cPickle as pickle
	from gensim.models import Word2Vec
	import numpy as np

	def save_model(model, directory):
	model.init_sims() # making sure syn0norm is initialised
	if not os.path.exists(directory):
	os.makedirs(directory)
	# Saving indexes as DBM'ed dictionary