-
-
Save pgodwin/7d66729444173146ad698d154f2b9b6c to your computer and use it in GitHub Desktop.
using System.Text; | |
/* This code provided by Bruce Jackson (brucejackson.info) and is | |
* provided "AS IS" with no warranties, expressed or implied, as to | |
* its effectivness or use. | |
* | |
* User agrees to edit the code to meet their partiular use and | |
* understands that this code has not been fully | |
* tested in a production environment. | |
* | |
* The author expressly waives any rights of copyright to the code | |
* and so any user is welcome to use this in whatever way they want. | |
* */ | |
namespace OcfLzw | |
{ | |
/// <summary> | |
/// Alternate implementation of the OCF/LZW Decompression by Bruce Jackson (brucejackson.info). | |
/// This code has not been tested in production and is provided for reference only. | |
/// </summary> | |
public class DecompressBlob | |
{ | |
class lzwItem | |
{ | |
public lzwItem(uint _prefix, uint _sufix) | |
{ | |
Prefix = _prefix; | |
Suffix = _sufix; | |
} | |
public uint Prefix; | |
public uint Suffix; | |
} | |
private const int MAX_CODES = 8192; | |
private uint[] tempDecompressBuffer = new uint[MAX_CODES]; | |
private lzwItem[] lzwLookupTable = new lzwItem[MAX_CODES]; | |
private int tempBufferIndex = 0; | |
private int currentByteBufferIndex = 0; | |
private int codeCount = 257; | |
private byte[] finalByteBuffer = null; | |
public byte[] Decompress(byte[] rawbytes, int stringSize) | |
{ | |
finalByteBuffer = new byte[stringSize]; | |
int byteArrayIndex = 0, | |
shift = 1, | |
currentShift = 1; | |
uint prevCode = 0, | |
middleCode = 0, | |
lookupIndex = 0, | |
firstCode = rawbytes[byteArrayIndex]; | |
while (true) | |
{ | |
if (currentShift >= 9) | |
{ | |
currentShift -= 8; | |
if (firstCode != 0) | |
{ | |
middleCode = rawbytes[++byteArrayIndex]; | |
firstCode = (firstCode << currentShift + 8) | (middleCode << currentShift); | |
middleCode = rawbytes[++byteArrayIndex]; | |
uint tempCode = middleCode >> (8 - currentShift); | |
lookupIndex = firstCode | tempCode; | |
goto skipit; | |
} | |
else | |
{ | |
firstCode = rawbytes[++byteArrayIndex]; | |
middleCode = rawbytes[++byteArrayIndex]; | |
} | |
} | |
else | |
{ | |
middleCode = rawbytes[++byteArrayIndex]; | |
} | |
lookupIndex = (firstCode << currentShift) | (middleCode >> 8 - currentShift); | |
if (lookupIndex == 256) // time to move to a new lookup table | |
{ | |
shift = 1; | |
currentShift++; | |
firstCode = rawbytes[byteArrayIndex]; | |
tempDecompressBuffer = new uint[MAX_CODES]; | |
tempBufferIndex = 0; | |
lzwLookupTable = new lzwItem[MAX_CODES]; | |
codeCount = 257; | |
continue; | |
} | |
else if (lookupIndex == 257) // EOF marker, better than using the string size | |
{ | |
return finalByteBuffer; | |
} | |
skipit: | |
if (prevCode == 0) | |
{ | |
tempDecompressBuffer[0] = lookupIndex; | |
} | |
if (lookupIndex < codeCount) | |
{ | |
SaveItemToLookupTable(lookupIndex); | |
if (codeCount < MAX_CODES) | |
{ | |
lzwLookupTable[codeCount++] = new lzwItem(prevCode, tempDecompressBuffer[tempBufferIndex]); | |
} | |
} | |
else | |
{ | |
lzwLookupTable[codeCount++] = new lzwItem(prevCode, tempDecompressBuffer[tempBufferIndex]); | |
SaveItemToLookupTable(lookupIndex); | |
} | |
firstCode = (uint)(middleCode & (0xff >> currentShift)); | |
currentShift += shift; | |
switch (codeCount) // use the lookup table size and not the current byte count | |
{ | |
case 511: | |
case 1023: | |
case 2047: | |
case 4095: | |
shift++; | |
currentShift++; | |
break; | |
} | |
prevCode = lookupIndex; | |
} | |
} | |
private void SaveItemToLookupTable(uint compressedCode) | |
{ | |
tempBufferIndex = -1; | |
while (compressedCode >= 258) | |
{ | |
tempDecompressBuffer[++tempBufferIndex] = lzwLookupTable[compressedCode].Suffix; | |
compressedCode = lzwLookupTable[compressedCode].Prefix; | |
} | |
tempDecompressBuffer[++tempBufferIndex] = compressedCode; | |
for (int i = tempBufferIndex; i >= 0; i--) | |
{ | |
finalByteBuffer[currentByteBufferIndex++] = (byte)tempDecompressBuffer[i]; | |
} | |
} | |
} | |
} |
@pgodwin thank you for sharing this script. Have you any idea where I can get the stringSize
from in Cerner db tables?
Thank you! Here's a clojure port, unprofiled just yet.
@thegoatherder: working on the CE_BLOB
table, BLOB_LENGTH
seems to return the uncompressed length when the blob is compressed (and the concatenated length when the blob is split into a sequence?). The docs aren't helpful unfortunately. blobgetlen()
returns the actual length of BLOB_CONTENTS
.
Thanks for sharing the script. It has been extremely helpful.
One issue we are having is joining split Blobs. ie where the Blob sequence number >1. Has anyone else tackled this?
@pgodwin Thank you for this! I also ported it to Java in case this is helpful to anyone else.
https://github.com/ARMoir/CernerBlobJava/blob/main/src/ocflzw/DecompressBlob.java
Thanks @plessbd, and thanks for sharing your port.