zorchenhimer/cobol-packed-numbers.py

## cobol-packed-numbers.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Pack and unpack COBOL's COMP-3 numbers.

Cobol stores most numbers as strings. There are times that they are stored in a
packed format ("Computational numbers").  Comp numbers are not stored like
traditional numbers (16-bit, 32-bit, 64-bit, etc), but in a bit length that is
four times the number of digits in the stored value plus four bits.  For a
four-digit number (eg PIC 9(4) or PIC 9(2)V9(2)) the length of the stored value
will be 12 bits.

Each digit is stored in four bits.  The number 4 becomes 0x4 (0100), 5 becomes
0x5 (0101), 6 becomes 0x6 (0110), etc.  Once the full number is encoded (456)
an additional digit is placed on the end to denote positve or negative.  0xD is
negative, 0xC is positive, and  while 0xF is unsigned.

Also note that decimals are not stored in numbers, computational or otherwise.
Decimals, and significant digits, are determined by the PICture clauses.  The
number is loaded into the variable in COBOL and the PICture clause dictates
where the decimal should be.  For example, given PIC 9(2)V9(2), the number is
stored on disk as a four digit integer, but when loaded it is represented in
memory as a two digit number with two additional digits of precision after a
decimal point.

For further information see the following pages:
    http://3480-3590-data-conversion.com/article-cobol-comp.html
    http://3480-3590-data-conversion.com/article-packed-fields.html


unpack_number() orignially from:
    https://mail.python.org/pipermail/python-list/2000-April/050953.html

"""


from array import array
from struct import pack

def unpack_number(p):
    """ Unpack a COMP-3 number. """
    a = array('B', p)
    v = float(0)

    # For all but last digit (half byte)
    for i in a[:-1]:
        v = (v * 100) + ( ( (i & 0xf0) >> 4) * 10) + (i & 0xf)

    # Last digit
    i = a[-1]
    v = (v * 10) + ((i & 0xf0) >> 4)

    # Negative/Positve check.
    if (i & 0xf) == 0xd:
        v = -v

    # Decimal points are determined by a COBOL program's PICtrue clauses, not
    # the data on disk.
    return int(v)

def pack_number(n):
    """ Pack a COMP-3 number. Format: PIC 9(9). """
    # Cobol numbers are stored without decimal info.  Remove the decimal before
    # calling pack_number()
    n = int(n)

    # Is the number negative?  Remember for later.
    negative = False
    if n < 0:
        negative = True
        n *= -1

    # Treat the number as a string.  Makes it easier to loop over.
    n_str = str(n)
    b = int(n_str[0])

    # For each digit, shift it onto the result.
    for c in n_str[1:]:
        b = (b << 4) | int(c)

    # Make the number negative if needed.
    if negative:
        b = (b << 4) | 0xd
    else:
        b = (b << 4) | 0xf

    # Pack the number as a long long and chop off the unused bits at the
    # beginning.  This will need to be changed for varying PICture clauses.
    b_packed = pack('>q', b)
    if len(b_packed) > 5:
        b_packed = b_packed[-5:]

    return b_packed

if __name__ == '__main__':
    value = 123456
    packed = pack_number(value)
    unpacked = unpack_number(packed)
    hex_packed = ''.join( [ "%02X" % ord( chr(x) ) for x in packed ] )
    print('Value: {}\nPacked: 0x{}\nUnpacked: {}'.format(value, hex_packed, unpacked))
	#!/usr/bin/env python
	# -- coding: utf-8 --

	"""
	Pack and unpack COBOL's COMP-3 numbers.

	Cobol stores most numbers as strings. There are times that they are stored in a
	packed format ("Computational numbers"). Comp numbers are not stored like
	traditional numbers (16-bit, 32-bit, 64-bit, etc), but in a bit length that is
	four times the number of digits in the stored value plus four bits. For a
	four-digit number (eg PIC 9(4) or PIC 9(2)V9(2)) the length of the stored value
	will be 12 bits.

	Each digit is stored in four bits. The number 4 becomes 0x4 (0100), 5 becomes
	0x5 (0101), 6 becomes 0x6 (0110), etc. Once the full number is encoded (456)
	an additional digit is placed on the end to denote positve or negative. 0xD is
	negative, 0xC is positive, and while 0xF is unsigned.

	Also note that decimals are not stored in numbers, computational or otherwise.
	Decimals, and significant digits, are determined by the PICture clauses. The
	number is loaded into the variable in COBOL and the PICture clause dictates
	where the decimal should be. For example, given PIC 9(2)V9(2), the number is
	stored on disk as a four digit integer, but when loaded it is represented in
	memory as a two digit number with two additional digits of precision after a
	decimal point.

	For further information see the following pages:
	http://3480-3590-data-conversion.com/article-cobol-comp.html
	http://3480-3590-data-conversion.com/article-packed-fields.html


	unpack_number() orignially from:
	https://mail.python.org/pipermail/python-list/2000-April/050953.html

	"""


	from array import array
	from struct import pack

	def unpack_number(p):
	""" Unpack a COMP-3 number. """
	a = array('B', p)
	v = float(0)

	# For all but last digit (half byte)
	for i in a[:-1]:
	v = (v * 100) + ( ( (i & 0xf0) >> 4) * 10) + (i & 0xf)

	# Last digit
	i = a[-1]
	v = (v * 10) + ((i & 0xf0) >> 4)

	# Negative/Positve check.
	if (i & 0xf) == 0xd:
	v = -v

	# Decimal points are determined by a COBOL program's PICtrue clauses, not
	# the data on disk.
	return int(v)

	def pack_number(n):
	""" Pack a COMP-3 number. Format: PIC 9(9). """
	# Cobol numbers are stored without decimal info. Remove the decimal before
	# calling pack_number()
	n = int(n)

	# Is the number negative? Remember for later.
	negative = False
	if n < 0:
	negative = True
	n *= -1

	# Treat the number as a string. Makes it easier to loop over.
	n_str = str(n)
	b = int(n_str[0])

	# For each digit, shift it onto the result.
	for c in n_str[1:]:
	b = (b << 4) \| int(c)

	# Make the number negative if needed.
	if negative:
	b = (b << 4) \| 0xd
	else:
	b = (b << 4) \| 0xf

	# Pack the number as a long long and chop off the unused bits at the
	# beginning. This will need to be changed for varying PICture clauses.
	b_packed = pack('>q', b)
	if len(b_packed) > 5:
	b_packed = b_packed[-5:]

	return b_packed

	if __name__ == '__main__':
	value = 123456
	packed = pack_number(value)
	unpacked = unpack_number(packed)
	hex_packed = ''.join( [ "%02X" % ord( chr(x) ) for x in packed ] )
	print('Value: {}\nPacked: 0x{}\nUnpacked: {}'.format(value, hex_packed, unpacked))