BlogBlocks/LuaData2Numpy.py

## LuaData2Numpy.py
# Example from test.text:
#    This is a full set/block of information it takes up two lines.
"""
kolbrin4/kolbrin4-BASE_BASE_GRU_current-learning-rate_0.002000000000_rnn-size_64,_train_loss_2.72439270,_epoch_0.46_evaluationloss_2.6701.t7
BASE :200:431000 epoch 0.464, train_loss = 2.72439270, grad/param norm = 4.4864e-01, time/batch = 1.8300s

The block above is parsed to get this information. There are hundreds ans sometime thousands of line.
This information is used to create graphs of the model building process.

44,0.002000000000,64,3.01064547,0.23,3.0310,100431000,0.232,3.01064547,7.3745,01,1.6423,
puretest-brackets.data is a clean file enclosed in brackets of all this data. Then the puretest-brackets.data
is used to transfer it for storage in a numpy array file called puretest-brackets-strings.npy.

The following lines were added and/or changed to create the test.txt file
(line 366)
savefile = string.format('%s/%s_%s_GRU_current-learning-rate_%.12f_rnn-size_%s,_train_loss_%6.8f,_epoch_%.2f_evaluationloss_%.4f.t7', opt.checkpoint_dir, opt.savefile, ScriptName, optim_state.learningRate, opt.rnn_size, train_loss, epoch, val_loss)
file = io.open("test.txt", "a")
file:write(savefile,"\n")

(line 409)
info = string.format("%s :%d:%d epoch %.3f, train_loss = %6.8f, grad/param norm = %6.4e, time/batch = %.4fs", ScriptName, i, iterations, epoch, train_loss, grad_params:norm() / params:norm(), time)
end
if i % opt.eval_val_every == 0 or i == iterations then
file = io.open("test.txt", "a")
file:write(info,"\n")
"""

import re
import os
from time import sleep

count = 0
try:
    os.remove('puretest-brackets.data')
    os.remove('puretest-brackets-strings.npy')
    del line
    del result
    f0.close()
    f.close()
    del f0
    del f
except:
    pass
f0 = open('puretest-brackets.data', 'a')
f0.write('[')
f = open('test.txt', 'r').readlines()
for line in f:
    count=count+1
    line = line.replace('s\n','')
    line = line.replace('.t7\n','')
    result = re.sub('[ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=_,-]', ' ', line)
    result = ' '.join(result.split())
    result = result.replace(':','')
    result = result.replace('/ ','')
    result = result.replace(' ',',')
    if count >1:f0.write(",")
    f0.write(result)
f0.close()


f0 = open('puretest-brackets.data', 'a')
end = ']'
f0.write(end)
f0.close()

F0 = open('puretest-brackets.data', 'r').read()
FA = np.array(F0)
#F0 = F0.replace(","," ")
F0 = F0.replace(']','')
F0 = F0.replace('[','')
FB =np.asarray(F0, dtype='string')
np.save('puretest-brackets-strings', FB)
print "finished"
	# Example from test.text:
	# This is a full set/block of information it takes up two lines.
	"""
	kolbrin4/kolbrin4-BASE_BASE_GRU_current-learning-rate_0.002000000000_rnn-size_64,_train_loss_2.72439270,_epoch_0.46_evaluationloss_2.6701.t7
	BASE :200:431000 epoch 0.464, train_loss = 2.72439270, grad/param norm = 4.4864e-01, time/batch = 1.8300s

	The block above is parsed to get this information. There are hundreds ans sometime thousands of line.
	This information is used to create graphs of the model building process.

	44,0.002000000000,64,3.01064547,0.23,3.0310,100431000,0.232,3.01064547,7.3745,01,1.6423,
	puretest-brackets.data is a clean file enclosed in brackets of all this data. Then the puretest-brackets.data
	is used to transfer it for storage in a numpy array file called puretest-brackets-strings.npy.

	The following lines were added and/or changed to create the test.txt file
	(line 366)
	savefile = string.format('%s/%s_%s_GRU_current-learning-rate_%.12f_rnn-size_%s,_train_loss_%6.8f,_epoch_%.2f_evaluationloss_%.4f.t7', opt.checkpoint_dir, opt.savefile, ScriptName, optim_state.learningRate, opt.rnn_size, train_loss, epoch, val_loss)
	file = io.open("test.txt", "a")
	file:write(savefile,"\n")

	(line 409)
	info = string.format("%s :%d:%d epoch %.3f, train_loss = %6.8f, grad/param norm = %6.4e, time/batch = %.4fs", ScriptName, i, iterations, epoch, train_loss, grad_params:norm() / params:norm(), time)
	end
	if i % opt.eval_val_every == 0 or i == iterations then
	file = io.open("test.txt", "a")
	file:write(info,"\n")
	"""

	import re
	import os
	from time import sleep

	count = 0
	try:
	os.remove('puretest-brackets.data')
	os.remove('puretest-brackets-strings.npy')
	del line
	del result
	f0.close()
	f.close()
	del f0
	del f
	except:
	pass
	f0 = open('puretest-brackets.data', 'a')
	f0.write('[')
	f = open('test.txt', 'r').readlines()
	for line in f:
	count=count+1
	line = line.replace('s\n','')
	line = line.replace('.t7\n','')
	result = re.sub('[ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=_,-]', ' ', line)
	result = ' '.join(result.split())
	result = result.replace(':','')
	result = result.replace('/ ','')
	result = result.replace(' ',',')
	if count >1:f0.write(",")
	f0.write(result)
	f0.close()


	f0 = open('puretest-brackets.data', 'a')
	end = ']'
	f0.write(end)
	f0.close()

	F0 = open('puretest-brackets.data', 'r').read()
	FA = np.array(F0)
	#F0 = F0.replace(","," ")
	F0 = F0.replace(']','')
	F0 = F0.replace('[','')
	FB =np.asarray(F0, dtype='string')
	np.save('puretest-brackets-strings', FB)
	print "finished"