Skip to content

Instantly share code, notes, and snippets.

@andcarnivorous
Created January 5, 2019 17:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andcarnivorous/836e5eabda2d1ce0874778a6f7db4e09 to your computer and use it in GitHub Desktop.
Save andcarnivorous/836e5eabda2d1ce0874778a6f7db4e09 to your computer and use it in GitHub Desktop.
from matplotlib import cm as cm
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from nltk.tokenize import word_tokenize
import re
from scipy import sparse
def repetitionMatrix(_input, title = "", kind = False, cmap = "Reds"):
_input = _input.lower()
_input = re.sub("[\(\)\-,;:\"\.\?\!\_\[\]]", " ", _input)
_input = re.sub("[\n']", " ", _input)
x = word_tokenize(_input)
y = x
word_freq = dict()
set_of_x = set(x)
for word in set_of_x:
val = x.count(word)
word_freq.update({word : val})
all_words = []
for i in x:
for j in y:
if i == j:
all_words.append(word_freq.get(i))
else:
all_words.append(0)
divider = int(len(all_words)/len(x))
arrays = []
for element in range(0, len(all_words), divider):
arrays.append(np.array(all_words[element-divider:element]))
colmap = cm.get_cmap(cmap)
arrays = np.vstack(arrays[1:])
sparsematrix = sparse.csr_matrix(arrays)
if kind == "sns":
# Plot using seaborn
sns.heatmap(arrays, cbar = False, square = True,
xticklabels = 50, yticklabels = 50).set_title(title)
elif kind == "sparse":
plt.spy(sparsematrix, markersize=4, precision = 3)
else:
plt.imshow(arrays)
plt.title(title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment