Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from matplotlib import cm as cm
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from nltk.tokenize import word_tokenize
import re
from scipy import sparse
def repetitionMatrix(_input, title = "", kind = False, cmap = "Reds"):
_input = _input.lower()
_input = re.sub("[\(\)\-,;:\"\.\?\!\_\[\]]", " ", _input)
_input = re.sub("[\n']", " ", _input)
x = word_tokenize(_input)
y = x
word_freq = dict()
set_of_x = set(x)
for word in set_of_x:
val = x.count(word)
word_freq.update({word : val})
all_words = []
for i in x:
for j in y:
if i == j:
all_words.append(word_freq.get(i))
else:
all_words.append(0)
divider = int(len(all_words)/len(x))
arrays = []
for element in range(0, len(all_words), divider):
arrays.append(np.array(all_words[element-divider:element]))
colmap = cm.get_cmap(cmap)
arrays = np.vstack(arrays[1:])
sparsematrix = sparse.csr_matrix(arrays)
if kind == "sns":
# Plot using seaborn
sns.heatmap(arrays, cbar = False, square = True,
xticklabels = 50, yticklabels = 50).set_title(title)
elif kind == "sparse":
plt.spy(sparsematrix, markersize=4, precision = 3)
else:
plt.imshow(arrays)
plt.title(title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.