Last active
December 9, 2020 23:19
-
-
Save pandanote-info/010a1b16450cf18f2d0d89e92d63271c to your computer and use it in GitHub Desktop.
matplotlibとProcessPoolExecutorを使用して動画用の画像を並列処理で作成するためのPython3のプログラム。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# | |
# See https://pandanote.info/?p=6970 for details. | |
# | |
import datetime | |
import math | |
import json | |
import numpy as np | |
from scipy.sparse import lil_matrix | |
import io, sys | |
import argparse | |
import time | |
import seaborn as sns | |
import matplotlib as mpl | |
import matplotlib.pyplot as plt | |
import matplotlib.ticker as ticker | |
from matplotlib.ticker import FormatStrFormatter | |
from mpl_toolkits.mplot3d import Axes3D | |
import os | |
import concurrent.futures as cf | |
def generate3dimage(ab, dim, verbose, outfile, colbar_label, labels, fontsize = 1.5, scale = 1.0, view = None): | |
sns.set_context("paper", font_scale=fontsize) | |
# plt.rcParams.update({'figure.max_open_warning': 0}) | |
plt.figure(figsize=(16, 12)) | |
ax = plt.axes(projection='3d') | |
# ax.set(xlabel='word(ID)',ylabel='Articles at https://pandanote.info/ (ID)',zlabel='Importance') | |
print(outfile) | |
if verbose == 1: | |
print(labels) | |
ax.set(**labels) | |
if view: | |
ax.view_init(elev=view[0],azim=view[1]) | |
_X,_Y = np.meshgrid(np.arange(dim[0]),np.arange(dim[0])); | |
dh = np.ones(dim[0]*dim[0])/2; | |
x,y = _X.ravel()-dh, _Y.ravel()-dh | |
Z = np.zeros_like(ab.toarray().ravel()) | |
dx = 1 | |
dy = 1 | |
dz = ab.toarray().ravel() | |
if verbose == 1: | |
print(x.shape) | |
print(y.shape) | |
print(Z.shape) | |
#print(dx.shape) | |
#print(dy.shape) | |
print(dz.shape) | |
norm = plt.Normalize(0,dz.max()) | |
colors = plt.cm.terrain_r(norm(dz)) | |
ax.set_zlim(0,dz.max()) | |
ax.bar3d(x,y,Z,dx,dy,(dz*scale) if abs(scale) > 1e-10 else np.zeros_like(ab.toarray().ravel()),color=colors) | |
colourMap = plt.cm.ScalarMappable(cmap=plt.cm.terrain_r) | |
colourMap.set_array(dz) | |
colBar = plt.colorbar(colourMap).set_label(colbar_label) | |
plt.savefig(outfile) | |
plt.close('all') | |
def calculate_cosine_similarity(aa,articleids,resultout,statsout): | |
aadim = aa.shape | |
ab = lil_matrix((aadim[0],aadim[0])) | |
al = [] | |
ast = [] | |
for i in range(0,aadim[0]): | |
tmp = aa[i].toarray().ravel() | |
al = np.append(al,math.sqrt(np.dot(tmp,tmp))) | |
for i in range(0,aadim[0]): | |
for j in range(i+1,aadim[0]): | |
ab[i,j] = np.dot(aa[i].toarray().ravel(),aa[j].toarray().ravel())/al[i]/al[j] | |
ast.append(((articleids[i],articleids[j]),ab[i,j])) | |
with open(resultout,mode='w') as f: | |
f.write(json.dumps(ab.toarray().tolist())) | |
f.close() | |
with open(statsout,mode='w') as f: | |
f.write(json.dumps(sorted(ast,key=lambda x:x[1], reverse=True))) | |
f.close() | |
return ab | |
if __name__ == '__main__': | |
start = time.time() | |
sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') | |
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') | |
parser = argparse.ArgumentParser(description='Command line options of freqreadtest.py') | |
parser.add_argument('-i','--input-file', type=str, default='../../scala/pandacorpus/pandanote_frequencies_result_20201107211207.txt', help='Input file in JSON format') | |
parser.add_argument('-v','--verbose', help='Increase output verbosity.', nargs='?', default=0, const=0, type=int) | |
parser.add_argument('-t','--three-dimension', help='Output as 3D chart.', action="store_true") | |
parser.add_argument('-m','--movie', help='Create files for 3D chart movie.', action="store_true") | |
parser.add_argument('-p','--parallel', help='Create files for 3D chart in parallel.', action="store_true") | |
parser.add_argument('-g','--grow-bars', help='Grows bar before rotate.', nargs='?', default=0.1, const=0.1, type=float) | |
parser.add_argument('-d','--output-dir', help='Output directory for 3D chart.', type=str) | |
parser.add_argument('-c','--cosine-similarity', help='Calculate cosine similarity between two articles.', action="store_true") | |
parser.add_argument('-f','--font-scale', help='Size of font as a scale value.', nargs='?', type=float) | |
args = parser.parse_args() | |
print(args) | |
params = vars(args) | |
inputfile = params['input_file'] | |
verbose = params['verbose'] | |
three_dimension = params['three_dimension'] | |
movie = params['movie'] | |
parallel = params['parallel'] | |
grow_bars = params['grow_bars'] | |
output_dir = params['output_dir'] | |
cosine_similarity = params['cosine_similarity'] | |
fontsize = params['font_scale'] if params['font_scale'] else 1.5 | |
with open(inputfile, encoding='utf-8') as fh: | |
freqlist = json.load(fh) | |
words = [] | |
articleids = [] | |
for k,v in freqlist.items(): | |
words.append(k) | |
for vv in v: | |
a,f = vv.split(",") | |
aa = int(a) | |
if aa not in articleids: | |
articleids.append(aa) | |
articleids.sort() | |
alen = len(articleids) | |
wlen = len(words) | |
bow = lil_matrix((alen,wlen)) | |
for k,v in freqlist.items(): | |
wi = words.index(k) | |
for vv in v: | |
a,f = map(int,vv.split(",")) | |
bow[articleids.index(a),wi] = f | |
#print(bow) | |
#print(words) | |
#print(articleids) | |
nzelemnum = len(bow.nonzero()[0]) | |
print("Matrix size: {0:d}x{1:d}, Number of element which has non-zero value: {2:d}".format(alen,wlen,nzelemnum)) | |
print("Rate of element which has non-zero value: {0:f}%".format(100.0*nzelemnum/alen/wlen)) | |
dim = bow.shape | |
# TF | |
if verbose == 1: | |
print("-- TF") | |
aa = bow.copy() | |
if verbose > 1: | |
np.set_printoptions(threshold=np.inf,formatter={'float': '{:.8f}'.format}) | |
for i in range(0,dim[0]): | |
ar = bow.getrow(i) | |
rowsum = np.matrix.sum(ar.todense()) | |
arr = ar/rowsum | |
aa[i] = arr | |
if verbose == 1: | |
print(aa.todense()) | |
# IDF(ln) | |
if verbose == 1: | |
print("-- TF-IDF(in)") | |
for j in range(0,dim[1]): | |
ac = aa.getcol(j) | |
idf = math.log(dim[0]/ac.getnnz()) | |
aa[0:dim[0],j] = ac*idf | |
if verbose == 1: | |
print(aa.toarray()) | |
datetimestr = datetime.datetime.now().strftime('%Y%m%d%H%M%S') | |
chartprefix = 'freqreadtest' | |
if cosine_similarity: | |
chartprefix = 'freqread_cosine_similarity' | |
ab = calculate_cosine_similarity(aa,articleids,(output_dir+"/" if (output_dir) else "")+chartprefix+'_'+datetimestr+'.json',(output_dir+"/" if (output_dir) else "")+chartprefix+'_stats_'+datetimestr+'.json',) | |
labels = {'xlabel':'Articles at https://pandanote.info/ (ID): column','ylabel':'Articles at https://pandanote.info/ (ID): row','zlabel':'Similarity'} | |
colbar_label = 'Similarity' | |
else: | |
ab = aa[0:dim[0],0:dim[0]] | |
labels = {'xlabel':'word(ID)','ylabel':'Articles at https://pandanote.info/ (ID)','zlabel':'Importance'} | |
colbar_label = 'Importance' | |
filename = (output_dir+"/" if (output_dir) else "")+chartprefix+'_'+datetimestr+'.png' | |
if not three_dimension: | |
sns.set_context("paper", font_scale=fontsize) | |
plt.figure(figsize=(16, 12)) | |
ax = sns.heatmap(ab.toarray(),cmap="terrain_r" if cosine_similarity else "bone_r") # Blues,terrain,CMRmap_r | |
if verbose == 1: | |
print(sns.color_palette()) | |
if cosine_similarity: | |
ax.set(xlabel='Articles at https://pandanote.info/ (ID): column',ylabel='Articles at https://pandanote.info/ (ID): row') | |
else: | |
ax.set(xlabel='word(ID)',ylabel='Articles at https://pandanote.info/ (ID)') | |
ax.xaxis.set_ticks_position('top') | |
ax.xaxis.set_label_position('top') | |
ax.xaxis.set_major_locator(ticker.MultipleLocator(20)) | |
ax.xaxis.set_major_formatter(FormatStrFormatter("%d")) | |
ax.yaxis.set_major_locator(ticker.MultipleLocator(20)) | |
ax.yaxis.set_major_formatter(FormatStrFormatter("%d")) | |
plt.savefig(filename) | |
else: | |
sns.set_context("paper", font_scale=fontsize) | |
if movie: | |
if grow_bars: | |
print("Grow rate: {0:f}".format(grow_bars)) | |
filenameprefix = (output_dir+"/" if (output_dir) else "")+chartprefix+'_3d_grow_bars_'+datetimestr | |
print("Filename prefix for growing bars: {0:s}".format(filenameprefix)) | |
if parallel: | |
with cf.ProcessPoolExecutor(max_workers=os.cpu_count()) as e: | |
interval = 1 | |
args = [(ab,dim,verbose,filenameprefix+"_{0:04d}.png".format(int(i//grow_bars)),colbar_label,labels,fontsize,i,(30,0)) for i in np.linspace(0, 1, num=int(1.0/grow_bars))] | |
e.map(generate3dimage,*zip(*args)) | |
#print(list(results)) | |
else: | |
count = 0 | |
for i in np.linspace(0,1.0,num=int(1.0/grow_bars)): | |
generate3dimage(ab,dim,verbose,filenameprefix+"_{0:04d}.png".format(count),colbar_label,labels,fontsize,i,(30,0)) | |
count = count + 1 | |
filenameprefix = (output_dir+"/" if (output_dir) else "")+chartprefix+'_3d_'+datetimestr | |
print("Filename prefix for rotating plots: {0:s}".format(filenameprefix)) | |
angle_of_rotation = 360 | |
if parallel: | |
with cf.ProcessPoolExecutor(max_workers=os.cpu_count()) as e: | |
interval = 1 | |
args = [(ab,dim,verbose,filenameprefix+"_{0:04d}.png".format(int(i/interval)),colbar_label,labels,fontsize,1.0,(30,i)) for i in range(0,angle_of_rotation+1,interval)] | |
e.map(generate3dimage,*zip(*args)) | |
#print(list(results)) | |
else: | |
count = 0 | |
for i in range(0,angle_of_rotation+1): | |
generate3dimage(ab,dim,verbose,filenameprefix+"_{0:04d}.png".format(count),colbar_label,labels,fontsize,1.0,(30,i)) | |
count = count + 1 | |
else: | |
filename = (output_dir+"/" if (output_dir) else "")+chartprefix+'_3d_'+datetime.datetime.now().strftime('%Y%m%d%H%M%S')+'.png' | |
generate3dimage(ab,dim,verbose,filename,colbar_label,labels,fontsize) | |
print("Output file: {0:s}".format(filename)) | |
elapsed_time = time.time() - start | |
print("Elapsed time:{0:f} sec.".format(elapsed_time)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment