Last active
August 29, 2015 13:56
-
-
Save kanazux/8844346 to your computer and use it in GitHub Desktop.
An parse to content in txt file to csv file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin python | |
# -*- coding: iso-8859-1 -*- | |
# autor: Silvio Giunge (kanazuchi) | |
# export content in txt file to csv file | |
import re | |
import os | |
import sys | |
from string import digits | |
from subprocess import check_output | |
if len(sys.argv) < 2: | |
print "Usage: %s 'directory'" % sys.argv[0] | |
sys.exit("eg: %s /home/user/documents" % sys.argv[0]) | |
if not (os.path.exists("%s" % sys.argv[1])): | |
sys.exit("%s is not a directory!" % sys.argv[1]) | |
dirname = str(sys.argv[1]) | |
if(sys.argv[1][-1] == "/"): | |
dirName = sys.argv[1][:-1] | |
else: | |
dirName = sys.argv[1] | |
dirList = list(check_output(["ls -a %s/*.txt" % dirName], shell=1).split('\n')) | |
for txtfile in dirList: | |
if(txtfile != ''): | |
fileTXT = open(txtfile, "r") | |
fread = fileTXT.read() | |
flist = list(fread.split('\n')) | |
data = [] | |
for line in flist: | |
if (line != '\n' and line != '\r' and not re.match('.*---.*', line) and not re.match('Dimensional Equipamentos',line)): | |
for field in list(re.sub('( )+','|',line).split('|')): | |
if(field != ''): | |
data.append(field) | |
cont = 0 | |
fwrite = [] | |
while(cont < 23): | |
try: | |
if(re.match(u"Inscri..o Municipal[.*]?", data[cont])): | |
idata[cont] = data[cont].translate(None, digits) | |
elif(re.match(u"Inscri..o Estadual.*", data[cont])): | |
data[cont] = data[cont].translate(None, digits) | |
fwrite.append(data[cont].split(":")[0].strip()) | |
except: | |
fwrite.append(data[cont].strip()) | |
cont += 1 | |
newFile = "%s.csv" % txtfile.split(".")[0] | |
fw = open(newFile, "a") | |
fw.write("%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s\n" % (fwrite[0],fwrite[1],fwrite[2],fwrite[3],fwrite[4],fwrite[5],fwrite[6],fwrite[7],fwrite[8],fwrite[9],fwrite[10],fwrite[11],fwrite[12],fwrite[13],fwrite[14],fwrite[15],fwrite[16],fwrite[17],fwrite[18],fwrite[19],fwrite[20],fwrite[21],fwrite[22])) | |
newLine = [] | |
for item in data: | |
item = item.strip() | |
if re.match("E-mail", item.lstrip()): | |
try: | |
newLine.append(item.split(":")[1].strip()) | |
except: | |
newLine.append(item) | |
try: | |
fw.write("%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s\n" % (newLine[0],newLine[1],newLine[2],newLine[3],newLine[4],newLine[5],newLine[6],newLine[7],newLine[8],newLine[9],newLine[10],newLine[11],newLine[12],newLine[13],newLine[14],newLine[15],newLine[16],newLine[17],newLine[18],newLine[19],re.sub("Inscri..o Municipal","",newLine[20]),re.sub("Inscri..o Estadual","",newLine[21]),newLine[22])) | |
except Exception, e: | |
print e | |
newLine = [] | |
else: | |
try: | |
newLine.append(item.split(":")[1].strip()) | |
except: | |
newLine.append(item) | |
fw.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment