Skip to content

Instantly share code, notes, and snippets.

View Nempickaxe's full-sized avatar
🤔
print('hi')

Nem_pickaxe Nempickaxe

🤔
print('hi')
View GitHub Profile
@Nempickaxe
Nempickaxe / preprocessing_steps.py
Created September 17, 2020 16:38
nltk preprocessing function
import re
import nltk
import emoji
from nltk.tokenize import word_tokenize
def tokenize(corpus):
data = re.sub(r'[,!?;-]+', '.', corpus)
data = nltk.word_tokenize(data) # tokenize string to words
data = [ ch.lower() for ch in data
if ch.isalpha()
@Nempickaxe
Nempickaxe / text2png.py
Created July 14, 2020 11:54
Convert a text to image using pillow
import textwrap
import PIL
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
def text2png(text, fullpath, color = "#000", bgcolor = "#FFF", fontfullpath = None, fontsize = 13, leftpadding = 3, rightpadding = 3, width = 2000):
REPLACEMENT_CHARACTER = '\uFFFD'
NEWLINE_REPLACEMENT_STRING = ' ' + REPLACEMENT_CHARACTER + ' '
def get_lower_tri_heatmap(df, output="cooc_matrix.png"):
mask = np.zeros_like(df, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Want diagonal elements as well
mask[np.diag_indices_from(mask)] = False
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
@Nempickaxe
Nempickaxe / keybase.md
Created May 26, 2019 06:22
keybase public key

Keybase proof

I hereby claim:

  • I am nempickaxe on github.
  • I am ilaichi (https://keybase.io/ilaichi) on keybase.
  • I have a public key ASC0peYsZX_Z7LwCfPjY9FJz_772TLP9XsoLON6QsTED-go

To claim this, I am signing this object:

@Nempickaxe
Nempickaxe / get_grams.py
Created February 26, 2019 09:12
get bigrams and trigrams
import nltk
from nltk.tokenize import WordPunctTokenizer
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.collocations import TrigramCollocationFinder
from nltk.metrics import TrigramAssocMeasures
from collections import Counter
@Nempickaxe
Nempickaxe / word2vec.py
Last active September 17, 2020 16:48 — forked from moustaki/Faster save-load for word2vec
Faster save-load for word2vec
import dbm, os
import cPickle as pickle
from gensim.models import Word2Vec
import numpy as np
def save_model(model, directory):
model.init_sims() # making sure syn0norm is initialised
if not os.path.exists(directory):
os.makedirs(directory)
# Saving indexes as DBM'ed dictionary