Skip to content

Instantly share code, notes, and snippets.

@el-hult
Last active June 13, 2021 20:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save el-hult/90f7495907f8edaa1783db23151e4e36 to your computer and use it in GitHub Desktop.
Save el-hult/90f7495907f8edaa1783db23151e4e36 to your computer and use it in GitHub Desktop.
Code for converting a CALS raster file to TIFF. Using vanilla Python.
"""
Python script that takes a folder, and for each CALS Raster file, it converts to a TIFF file.
https://en.wikipedia.org/wiki/CALS_Raster_file_format
file ending ".cal"
It so happens that TIFF can be compressed with Group 4 compression (as in faxes), and that is the compression format of CALS Raster images Type 1.
If the image is rotated, on must fix that.
If it is a CALS Type 2 image, it is a tiled image and one must decompress each tiles on its own, and I don't think that TIFF handles that.
In such a case, you need to do some more work. But I didn't need to think more about such problems.
Ludvig Hult
2021-06-13
"""
import os
import struct
import argparse
def parse_cals(data):
"""Read a CALS file, unpack some part of the header, reaturn the parsed header and the data block
The quick data format description is from here
http://support.ricoh.com/bb_v1oi/pub_e/oi_view/0001060/0001060558/view/rpgl_rtiff/int/0192.htm
Writen by Ludvig Hult 2021-06-13
"""
header_block_starts = [
0,
0x80,
0x100,
0x180,
0x200,
0x280,
0x300,
0x380,
0x400,
0x480,
0x500,
0x580,
0x600,
0x680,
0x700,
0x780,
]
header_block_len = 128
data_block_start=0x800
header_data = []
for start in header_block_starts:
header_block = data[start:start+header_block_len]
header_data.append(header_block.decode('ANSI'))
text_header = "\n".join(header_data)
binary_data = data[data_block_start:]
noneify = lambda s: None if s == "NONE" else s
header = {a: noneify(b.strip()) for a,b in [a.split(":",1) for a in header_data[:10]]}
header['notes'] = noneify(data[0x507:0x800].decode('ANSI').strip())
header['rtype'] = int(header['rtype'])
header['rdensty'] = int(header['rdensty'])
header['rpelcnt'] = tuple(int(a) for a in header['rpelcnt'].split(","))
header['rorient'] = tuple(int(a) for a in header['rorient'].split(","))
return header, binary_data
#####
# These functions are from https://shreevatsa.github.io/site/ccitt.html
#######
def tiff_header_for_CCITT(width, height, img_size, CCITT_group=4, blackIsZero=False):
"""Returns the appropriate header that will make it a valid TIFF file."""
tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
return struct.pack(tiff_header_struct,
b'II', # Byte order indication: Little-endian
42, # Version number (always 42)
8, # Offset to first IFD
8, # Number of tags in IFD
256, 4, 1, width, # ImageWidth, LONG, 1, width
257, 4, 1, height, # ImageLength, LONG, 1, length
258, 3, 1, 1, # BitsPerSample, SHORT, 1, 1
259, 3, 1, CCITT_group, # Compression, SHORT, 1, 4 = CCITT Group 4 fax encoding
262, 3, 1, int(blackIsZero), # Threshholding, SHORT, 1, 0 = WhiteIsZero
273, 4, 1, struct.calcsize(tiff_header_struct), # StripOffsets, LONG, 1, len of header
278, 4, 1, height, # RowsPerStrip, LONG, 1, length
279, 4, 1, img_size, # StripByteCounts, LONG, 1, size of image
0 # last IFD
)
def decode_ccitt_data(data, width, height, CCITT_group=4, blackIsZero=False):
"""Decodes CCITT-encoded data, if its intended width, height, etc are known."""
img_size = len(data)
tiff_header = tiff_header_for_CCITT(width, height, img_size, CCITT_group)
return tiff_header + data
###################################################################################################
def main():
p = argparse.ArgumentParser()
p.add_argument("--indir",help="input dir (N.B. does not recurse)",required=True)
p.add_argument("--outdir",help="output dir (must exist before hand)",required=True)
args = p.parse_args()
files = [f for f in os.listdir(args.indir) if f[-4:] == ".cal"]
for fname in files:
base = fname[:-4]
in_path = os.path.join(args.indir,f"{base}.cal")
out_path = os.path.join(args.outdir,f"{base}.tiff")
with open(in_path,'rb') as cals_file:
header,data = parse_cals(cals_file.read())
assert header['rtype'] == 1, "I only deal with type 1"
assert header['rorient'] == (0,270), "I only deal with simple orientation"
width, height = header['rpelcnt']
with open(out_path, 'wb') as tiff_file:
tiff_file.write(decode_ccitt_data(data, width, height, CCITT_group=4, blackIsZero=True))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment