bfocht/readprogressdumps.py

## readprogressdumps.py
#!/usr/bin/python

#Example usage
#python readprogressdumps.py -i city.bd -o city.csv -b 3,4,14,16,23 -p 36 -h branch,city-nm,city-no,entry-dt,entry-prsn,rte,state,tax-matl,tax-other,tax-serv,terr-no,tax-cd,area-cd,county-no,zip-cd,bill-to-only,low-zip,high-zip,estimator,salesman,technician,service-rep,service-area,CASS-city-nm,country,township-no,county-nm,township-nm,sales-rep
# -i input file is a binary dump file from Progress Software database
# -o output file
# -b columns that contain binary integers
# -p columns that contain packed decimals
# -h column headers  (first row)
# --debug  (make first row contain column number instead of column header

import struct
from decimal import Decimal
import csv
import sys,getopt

def unpack(val):
	fld  = struct.unpack('B',val)[0]
	return int(fld)

def getSize(fileobject):
    fileobject.seek(0,2) # move the cursor to the end of the file
    size = fileobject.tell()
    return size

def processfile (inputfile, outputfile, header, int_list, packedlist, show_field_no):
	with open(inputfile, 'rb') as f:
		w = open(outputfile, 'wb')
		writer = csv.writer(w) #,quoting=csv.QUOTE_NONNUMERIC

		print 'Input file is ', inputfile
		print 'Output file is ', outputfile

		filesize = getSize(f)
		print 'filesize is %dkb' % filesize

		f.seek(312) #field sepearater
		sep = f.read(1)
		f.seek(320)
		fldcount = unpack(f.read(1))
		print 'field count is %d' % fldcount

		f.seek(636) #jump to first record
		while  f.read(1) != sep  and f.tell() < filesize:
				pass


		writer.writerow(header)
		#create header based on field number
		if show_field_no:
			currentfld=1
			header=[]
			while currentfld < fldcount:
				header.append("%d" %(currentfld))
				currentfld+=1
			writer.writerow(header)


		lineno = 0
		#process file
	 	while  f.tell() < filesize:
			lineno +=1
			val = f.read(1)
			fld  = struct.unpack('B',val)[0]
			num = int(fld)
			if num == 231:
					val =f.read(1)
					fld1  = struct.unpack('B',val)[0]
					val =f.read(1)
					fld2  = struct.unpack('B',val)[0]
					num = 256 * fld1 + fld2
					f.read(num)
			if num != 1:
				val = f.read(1)
				fld  = struct.unpack('B',val)[0]
				num = int(fld)
			val = f.read(num)
			if  val != sep:
				print 'invalid start character found on line number %d ...attempting to recover' % lineno
				f.seek(f.tell()-num-12)
				while  int(struct.unpack('B',f.read(1))[0]) != 231  and f.tell() < filesize:
					pass
				f.seek(f.tell()-1)
				continue

		 	array=[]
		 	currentfld=1
		 	while currentfld < fldcount and f.tell() < filesize:
		 		val = f.read(1)
				fld  = struct.unpack('B',val)[0]
				num = int(fld)
				#char string
				if num == 250:
					val =f.read(2)
					charlen  = struct.unpack('>h',val)[0]
					charcount  = struct.unpack('>b',f.read(1))[0]
					if charlen > 0:
						encvalue = ''
						for x in range(1, charlen):
							val = f.read(1)
							num = struct.unpack('>b',val)[0]
							if num == -1:
								pass
							elif num < 32:
								encvalue += str(num)
							else:
								encvalue += val
						array.append(encvalue)
					else :
						array.append('')
				elif num == 230:
					val =f.read(2)
					charlen  = struct.unpack('>h',val)[0]
					if charlen > 0:
						array.append(f.read(charlen))
					else :
						array.append('')
				elif num > 0 and num !=253:
					val = f.read(num)

					packed = ord(struct.unpack('c', val[0])[0])
					#binary values
					if  int_list and num == 3 and currentfld in int_list:
							array.append(struct.unpack('>bh', val)[0])
					elif int_list and num == 2 and currentfld in int_list:
							array.append(struct.unpack('>h', val)[0])
					elif int_list and num == 1 and currentfld in int_list:
							array.append(struct.unpack('>b', val)[0])
					#packed decimal-1
					elif (packed >= 128 and packed <= 137):
						encvalue = ''
						for x in range(1, val.__len__()):
							encvalue += struct.unpack('c', val[x])[0].encode('hex')
						encvalue = Decimal(encvalue.replace('f',''))
						for x in range(128, packed):
							encvalue = encvalue/10
						array.append(encvalue)
					#packed decimal-2 signed
					elif packedlist and currentfld in packedlist:
						encvalue = ''
						for x in range(1, val.__len__()):
							encvalue += struct.unpack('c', val[x])[0].encode('hex')
						encvalue = Decimal(encvalue.replace('f',''))
						for x in range(0, packed):
							encvalue = encvalue/10
						encvalue = encvalue*-1
						array.append(encvalue)
					else:
						encvalue = ''
						for x in range(0, val.__len__()):
							num = struct.unpack('>b',val[x])[0]
							if num == -1:
								pass
							elif num < 32:
								encvalue += str(num)
							else:
								encvalue += val[x]
						array.append(encvalue)
				else:
					array.append('')
				currentfld += 1
			writer.writerow(array)

			f.read(12) #stuff at end of line, I don't know what it is for

		print 'finished exporting %d rows' % lineno
		print 'done'


def main(argv):
	inputfile = None
	header = None
	intlist = None
	outputfile = None
	packedlist = None
	show_field_no = None
	try:
		opts, args = getopt.getopt(argv,"b:p:h:i:o:",["ifile=","ofile=", "binary=", "packed=","debug="])
	except getopt.GetoptError:
		print ' -i <inputfile> -o <outputfile> -b <binary field numbers> -h <header record>'
		sys.exit(2)
	for opt, arg in opts:
		if opt == '-h':
			header = arg.split(',')
		elif opt in ("--debug"):
			show_field_no = 1
		elif opt in ("-i", "--ifile"):
			inputfile = arg
		elif opt in ("-o", "--ofile"):
			outputfile = arg
		elif opt in ("-b","--binary"):
			intlist = map(int,arg.split(','))
		elif opt in ("-p","--packed"):
			packedlist = map(int,arg.split(','))
	if inputfile == None or outputfile == None :
		print ' -i <inputfile> -o <outputfile> required'
		sys.exit()
	processfile(inputfile,outputfile, header, intlist, packedlist, show_field_no)


if __name__ == "__main__":
	main(sys.argv[1:])
	#!/usr/bin/python

	#Example usage
	#python readprogressdumps.py -i city.bd -o city.csv -b 3,4,14,16,23 -p 36 -h branch,city-nm,city-no,entry-dt,entry-prsn,rte,state,tax-matl,tax-other,tax-serv,terr-no,tax-cd,area-cd,county-no,zip-cd,bill-to-only,low-zip,high-zip,estimator,salesman,technician,service-rep,service-area,CASS-city-nm,country,township-no,county-nm,township-nm,sales-rep
	# -i input file is a binary dump file from Progress Software database
	# -o output file
	# -b columns that contain binary integers
	# -p columns that contain packed decimals
	# -h column headers (first row)
	# --debug (make first row contain column number instead of column header

	import struct
	from decimal import Decimal
	import csv
	import sys,getopt

	def unpack(val):
	fld = struct.unpack('B',val)[0]
	return int(fld)

	def getSize(fileobject):
	fileobject.seek(0,2) # move the cursor to the end of the file
	size = fileobject.tell()
	return size

	def processfile (inputfile, outputfile, header, int_list, packedlist, show_field_no):
	with open(inputfile, 'rb') as f:
	w = open(outputfile, 'wb')
	writer = csv.writer(w) #,quoting=csv.QUOTE_NONNUMERIC

	print 'Input file is ', inputfile
	print 'Output file is ', outputfile

	filesize = getSize(f)
	print 'filesize is %dkb' % filesize

	f.seek(312) #field sepearater
	sep = f.read(1)
	f.seek(320)
	fldcount = unpack(f.read(1))
	print 'field count is %d' % fldcount

	f.seek(636) #jump to first record
	while f.read(1) != sep and f.tell() < filesize:
	pass


	writer.writerow(header)
	#create header based on field number
	if show_field_no:
	currentfld=1
	header=[]
	while currentfld < fldcount:
	header.append("%d" %(currentfld))
	currentfld+=1
	writer.writerow(header)


	lineno = 0
	#process file
	while f.tell() < filesize:
	lineno +=1
	val = f.read(1)
	fld = struct.unpack('B',val)[0]
	num = int(fld)
	if num == 231:
	val =f.read(1)
	fld1 = struct.unpack('B',val)[0]
	val =f.read(1)
	fld2 = struct.unpack('B',val)[0]
	num = 256 * fld1 + fld2
	f.read(num)
	if num != 1:
	val = f.read(1)
	fld = struct.unpack('B',val)[0]
	num = int(fld)
	val = f.read(num)
	if val != sep:
	print 'invalid start character found on line number %d ...attempting to recover' % lineno
	f.seek(f.tell()-num-12)
	while int(struct.unpack('B',f.read(1))[0]) != 231 and f.tell() < filesize:
	pass
	f.seek(f.tell()-1)
	continue

	array=[]
	currentfld=1
	while currentfld < fldcount and f.tell() < filesize:
	val = f.read(1)
	fld = struct.unpack('B',val)[0]
	num = int(fld)
	#char string
	if num == 250:
	val =f.read(2)
	charlen = struct.unpack('>h',val)[0]
	charcount = struct.unpack('>b',f.read(1))[0]
	if charlen > 0:
	encvalue = ''
	for x in range(1, charlen):
	val = f.read(1)
	num = struct.unpack('>b',val)[0]
	if num == -1:
	pass
	elif num < 32:
	encvalue += str(num)
	else:
	encvalue += val
	array.append(encvalue)
	else :
	array.append('')
	elif num == 230:
	val =f.read(2)
	charlen = struct.unpack('>h',val)[0]
	if charlen > 0:
	array.append(f.read(charlen))
	else :
	array.append('')
	elif num > 0 and num !=253:
	val = f.read(num)

	packed = ord(struct.unpack('c', val[0])[0])
	#binary values
	if int_list and num == 3 and currentfld in int_list:
	array.append(struct.unpack('>bh', val)[0])
	elif int_list and num == 2 and currentfld in int_list:
	array.append(struct.unpack('>h', val)[0])
	elif int_list and num == 1 and currentfld in int_list:
	array.append(struct.unpack('>b', val)[0])
	#packed decimal-1
	elif (packed >= 128 and packed <= 137):
	encvalue = ''
	for x in range(1, val.__len__()):
	encvalue += struct.unpack('c', val[x])[0].encode('hex')
	encvalue = Decimal(encvalue.replace('f',''))
	for x in range(128, packed):
	encvalue = encvalue/10
	array.append(encvalue)
	#packed decimal-2 signed
	elif packedlist and currentfld in packedlist:
	encvalue = ''
	for x in range(1, val.__len__()):
	encvalue += struct.unpack('c', val[x])[0].encode('hex')
	encvalue = Decimal(encvalue.replace('f',''))
	for x in range(0, packed):
	encvalue = encvalue/10
	encvalue = encvalue*-1
	array.append(encvalue)
	else:
	encvalue = ''
	for x in range(0, val.__len__()):
	num = struct.unpack('>b',val[x])[0]
	if num == -1:
	pass
	elif num < 32:
	encvalue += str(num)
	else:
	encvalue += val[x]
	array.append(encvalue)
	else:
	array.append('')
	currentfld += 1
	writer.writerow(array)

	f.read(12) #stuff at end of line, I don't know what it is for

	print 'finished exporting %d rows' % lineno
	print 'done'


	def main(argv):
	inputfile = None
	header = None
	intlist = None
	outputfile = None
	packedlist = None
	show_field_no = None
	try:
	opts, args = getopt.getopt(argv,"b:p:h:i:o:",["ifile=","ofile=", "binary=", "packed=","debug="])
	except getopt.GetoptError:
	print ' -i <inputfile> -o <outputfile> -b <binary field numbers> -h <header record>'
	sys.exit(2)
	for opt, arg in opts:
	if opt == '-h':
	header = arg.split(',')
	elif opt in ("--debug"):
	show_field_no = 1
	elif opt in ("-i", "--ifile"):
	inputfile = arg
	elif opt in ("-o", "--ofile"):
	outputfile = arg
	elif opt in ("-b","--binary"):
	intlist = map(int,arg.split(','))
	elif opt in ("-p","--packed"):
	packedlist = map(int,arg.split(','))
	if inputfile == None or outputfile == None :
	print ' -i <inputfile> -o <outputfile> required'
	sys.exit()
	processfile(inputfile,outputfile, header, intlist, packedlist, show_field_no)



	if __name__ == "__main__":
	main(sys.argv[1:])