Skip to content

Instantly share code, notes, and snippets.

@kanazux
Last active August 29, 2015 13:56
Show Gist options
  • Save kanazux/8844346 to your computer and use it in GitHub Desktop.
Save kanazux/8844346 to your computer and use it in GitHub Desktop.
An parse to content in txt file to csv file
#!/usr/local/bin python
# -*- coding: iso-8859-1 -*-
# autor: Silvio Giunge (kanazuchi)
# export content in txt file to csv file
import re
import os
import sys
from string import digits
from subprocess import check_output
if len(sys.argv) < 2:
print "Usage: %s 'directory'" % sys.argv[0]
sys.exit("eg: %s /home/user/documents" % sys.argv[0])
if not (os.path.exists("%s" % sys.argv[1])):
sys.exit("%s is not a directory!" % sys.argv[1])
dirname = str(sys.argv[1])
if(sys.argv[1][-1] == "/"):
dirName = sys.argv[1][:-1]
else:
dirName = sys.argv[1]
dirList = list(check_output(["ls -a %s/*.txt" % dirName], shell=1).split('\n'))
for txtfile in dirList:
if(txtfile != ''):
fileTXT = open(txtfile, "r")
fread = fileTXT.read()
flist = list(fread.split('\n'))
data = []
for line in flist:
if (line != '\n' and line != '\r' and not re.match('.*---.*', line) and not re.match('Dimensional Equipamentos',line)):
for field in list(re.sub('( )+','|',line).split('|')):
if(field != ''):
data.append(field)
cont = 0
fwrite = []
while(cont < 23):
try:
if(re.match(u"Inscri..o Municipal[.*]?", data[cont])):
idata[cont] = data[cont].translate(None, digits)
elif(re.match(u"Inscri..o Estadual.*", data[cont])):
data[cont] = data[cont].translate(None, digits)
fwrite.append(data[cont].split(":")[0].strip())
except:
fwrite.append(data[cont].strip())
cont += 1
newFile = "%s.csv" % txtfile.split(".")[0]
fw = open(newFile, "a")
fw.write("%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s\n" % (fwrite[0],fwrite[1],fwrite[2],fwrite[3],fwrite[4],fwrite[5],fwrite[6],fwrite[7],fwrite[8],fwrite[9],fwrite[10],fwrite[11],fwrite[12],fwrite[13],fwrite[14],fwrite[15],fwrite[16],fwrite[17],fwrite[18],fwrite[19],fwrite[20],fwrite[21],fwrite[22]))
newLine = []
for item in data:
item = item.strip()
if re.match("E-mail", item.lstrip()):
try:
newLine.append(item.split(":")[1].strip())
except:
newLine.append(item)
try:
fw.write("%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s\n" % (newLine[0],newLine[1],newLine[2],newLine[3],newLine[4],newLine[5],newLine[6],newLine[7],newLine[8],newLine[9],newLine[10],newLine[11],newLine[12],newLine[13],newLine[14],newLine[15],newLine[16],newLine[17],newLine[18],newLine[19],re.sub("Inscri..o Municipal","",newLine[20]),re.sub("Inscri..o Estadual","",newLine[21]),newLine[22]))
except Exception, e:
print e
newLine = []
else:
try:
newLine.append(item.split(":")[1].strip())
except:
newLine.append(item)
fw.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment