Skip to content

Instantly share code, notes, and snippets.

@bfocht
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bfocht/3382e97476f4f2001ea3 to your computer and use it in GitHub Desktop.
Save bfocht/3382e97476f4f2001ea3 to your computer and use it in GitHub Desktop.
Script that will extract a ProgressDB binary dump file and convert it to a comma separate file. Tested on v8.3
#!/usr/bin/python
#Example usage
#python readprogressdumps.py -i city.bd -o city.csv -b 3,4,14,16,23 -p 36 -h branch,city-nm,city-no,entry-dt,entry-prsn,rte,state,tax-matl,tax-other,tax-serv,terr-no,tax-cd,area-cd,county-no,zip-cd,bill-to-only,low-zip,high-zip,estimator,salesman,technician,service-rep,service-area,CASS-city-nm,country,township-no,county-nm,township-nm,sales-rep
# -i input file is a binary dump file from Progress Software database
# -o output file
# -b columns that contain binary integers
# -p columns that contain packed decimals
# -h column headers (first row)
# --debug (make first row contain column number instead of column header
import struct
from decimal import Decimal
import csv
import sys,getopt
def unpack(val):
fld = struct.unpack('B',val)[0]
return int(fld)
def getSize(fileobject):
fileobject.seek(0,2) # move the cursor to the end of the file
size = fileobject.tell()
return size
def processfile (inputfile, outputfile, header, int_list, packedlist, show_field_no):
with open(inputfile, 'rb') as f:
w = open(outputfile, 'wb')
writer = csv.writer(w) #,quoting=csv.QUOTE_NONNUMERIC
print 'Input file is ', inputfile
print 'Output file is ', outputfile
filesize = getSize(f)
print 'filesize is %dkb' % filesize
f.seek(312) #field sepearater
sep = f.read(1)
f.seek(320)
fldcount = unpack(f.read(1))
print 'field count is %d' % fldcount
f.seek(636) #jump to first record
while f.read(1) != sep and f.tell() < filesize:
pass
writer.writerow(header)
#create header based on field number
if show_field_no:
currentfld=1
header=[]
while currentfld < fldcount:
header.append("%d" %(currentfld))
currentfld+=1
writer.writerow(header)
lineno = 0
#process file
while f.tell() < filesize:
lineno +=1
val = f.read(1)
fld = struct.unpack('B',val)[0]
num = int(fld)
if num == 231:
val =f.read(1)
fld1 = struct.unpack('B',val)[0]
val =f.read(1)
fld2 = struct.unpack('B',val)[0]
num = 256 * fld1 + fld2
f.read(num)
if num != 1:
val = f.read(1)
fld = struct.unpack('B',val)[0]
num = int(fld)
val = f.read(num)
if val != sep:
print 'invalid start character found on line number %d ...attempting to recover' % lineno
f.seek(f.tell()-num-12)
while int(struct.unpack('B',f.read(1))[0]) != 231 and f.tell() < filesize:
pass
f.seek(f.tell()-1)
continue
array=[]
currentfld=1
while currentfld < fldcount and f.tell() < filesize:
val = f.read(1)
fld = struct.unpack('B',val)[0]
num = int(fld)
#char string
if num == 250:
val =f.read(2)
charlen = struct.unpack('>h',val)[0]
charcount = struct.unpack('>b',f.read(1))[0]
if charlen > 0:
encvalue = ''
for x in range(1, charlen):
val = f.read(1)
num = struct.unpack('>b',val)[0]
if num == -1:
pass
elif num < 32:
encvalue += str(num)
else:
encvalue += val
array.append(encvalue)
else :
array.append('')
elif num == 230:
val =f.read(2)
charlen = struct.unpack('>h',val)[0]
if charlen > 0:
array.append(f.read(charlen))
else :
array.append('')
elif num > 0 and num !=253:
val = f.read(num)
packed = ord(struct.unpack('c', val[0])[0])
#binary values
if int_list and num == 3 and currentfld in int_list:
array.append(struct.unpack('>bh', val)[0])
elif int_list and num == 2 and currentfld in int_list:
array.append(struct.unpack('>h', val)[0])
elif int_list and num == 1 and currentfld in int_list:
array.append(struct.unpack('>b', val)[0])
#packed decimal-1
elif (packed >= 128 and packed <= 137):
encvalue = ''
for x in range(1, val.__len__()):
encvalue += struct.unpack('c', val[x])[0].encode('hex')
encvalue = Decimal(encvalue.replace('f',''))
for x in range(128, packed):
encvalue = encvalue/10
array.append(encvalue)
#packed decimal-2 signed
elif packedlist and currentfld in packedlist:
encvalue = ''
for x in range(1, val.__len__()):
encvalue += struct.unpack('c', val[x])[0].encode('hex')
encvalue = Decimal(encvalue.replace('f',''))
for x in range(0, packed):
encvalue = encvalue/10
encvalue = encvalue*-1
array.append(encvalue)
else:
encvalue = ''
for x in range(0, val.__len__()):
num = struct.unpack('>b',val[x])[0]
if num == -1:
pass
elif num < 32:
encvalue += str(num)
else:
encvalue += val[x]
array.append(encvalue)
else:
array.append('')
currentfld += 1
writer.writerow(array)
f.read(12) #stuff at end of line, I don't know what it is for
print 'finished exporting %d rows' % lineno
print 'done'
def main(argv):
inputfile = None
header = None
intlist = None
outputfile = None
packedlist = None
show_field_no = None
try:
opts, args = getopt.getopt(argv,"b:p:h:i:o:",["ifile=","ofile=", "binary=", "packed=","debug="])
except getopt.GetoptError:
print ' -i <inputfile> -o <outputfile> -b <binary field numbers> -h <header record>'
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
header = arg.split(',')
elif opt in ("--debug"):
show_field_no = 1
elif opt in ("-i", "--ifile"):
inputfile = arg
elif opt in ("-o", "--ofile"):
outputfile = arg
elif opt in ("-b","--binary"):
intlist = map(int,arg.split(','))
elif opt in ("-p","--packed"):
packedlist = map(int,arg.split(','))
if inputfile == None or outputfile == None :
print ' -i <inputfile> -o <outputfile> required'
sys.exit()
processfile(inputfile,outputfile, header, intlist, packedlist, show_field_no)
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment