Skip to content

Instantly share code, notes, and snippets.

@odzhan
Created December 29, 2023 10:36
Show Gist options
  • Save odzhan/2165f2ac1c1394381c59e4fcf1929a68 to your computer and use it in GitHub Desktop.
Save odzhan/2165f2ac1c1394381c59e4fcf1929a68 to your computer and use it in GitHub Desktop.
SZDD compression
// LZ77 compression / decompression algorithm
// this is the compression Microsoft used in Windows *.HLP and *.MRB files
// It is also used with Install Shield files. These files are
// recognizable by the letters SZDD in the first 4 bytes. The file
// names for files compressed in this way are usually the name of the
// file as it would be installed but with the last character replaced
// by '_'
// This program is a complete hack. I am not responsible for the
// algorithm code in any way. I stole the compression code from
// somebody else who didn't put a blame line in the file so I've
// forgotten who he was. I just adapted it to run under linux from
// the command line. (D. Risacher)
// Implementation originally written by Daniel Robert Risacher
// Minor changes were made to make it compile without error using MSVC.
// There was also an error in the wrong file size being stored due to sign extension.
// NOTE: The original description indicates this is LZ77, but it's actually closer to LZSS.
// The compressed data from this program is compatible with Windows LZ API.
#define MSEXPAND
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#define N 4096
#define F 16
#define THRESHOLD 3
#define dad (node+1)
#define lson (node+1+N)
#define rson (node+1+N+N)
#define root (node+1+N+N+N)
#define NIL -1
#define int16 short
char *buffer;
int16 *node;
int16 pos;
void encode(FILE*,FILE*);
void decode(FILE*,FILE*);
int
main (int argc, char* argv[])
{
if ((argc != 4) || ((argv[1][0] != 'e') && (argv[1][0] != 'd'))) {
printf("usage: szdd [e/d] <infile> <outfile>\n");
return 0;
}
int flag = (argv[1][0] == 'e');
FILE *in = fopen(argv[2],"rb");
FILE *out = fopen(argv[3],"wb");
if (flag) {
encode(in, out);
} else {
decode(in, out);
}
return 0;
}
uint32_t filelength(int16 f)
{
struct stat buf;
fstat(f, &buf);
return buf.st_size;
}
//#define min(x,y) (x>y?y:x)
int16 insert(int16 i,int16 run)
{
int16 c,j,k,l,n,match;
int16 *p;
k=l=1;
match=THRESHOLD-1;
p=&root[(unsigned char)buffer[i]];
lson[i]=rson[i]=NIL;
while((j=*p)!=NIL)
{
for(n=min(k,l);n<run&&(c=(buffer[j+n]-buffer[i+n]))==0;n++) ;
if(n>match)
{
match=n;
pos=j;
}
if(c<0)
{
p=&lson[j];
k=n;
}
else if(c>0)
{
p=&rson[j];
l=n;
}
else
{
dad[j]=NIL;
dad[lson[j]]=lson+i-node;
dad[rson[j]]=rson+i-node;
lson[i]=lson[j];
rson[i]=rson[j];
break;
}
}
dad[i]=p-node;
*p=i;
return match;
}
void delete(int16 z)
{
int16 j;
if(dad[z]!=NIL)
{
if(rson[z]==NIL)
{
j=lson[z];
}
else if(lson[z]==NIL)
{
j=rson[z];
}
else
{
j=lson[z];
if(rson[j]!=NIL)
{
do
{
j=rson[j];
}
while(rson[j]!=NIL);
node[dad[j]]=lson[j];
dad[lson[j]]=dad[j];
lson[j]=lson[z];
dad[lson[z]]=lson+j-node;
}
rson[j]=rson[z];
dad[rson[z]]=rson+j-node;
}
dad[j]=dad[z];
node[dad[z]]=j;
dad[z]=NIL;
}
}
#if defined(_MSC_VER)
#pragma pack(push, 1)
struct header_struct {
long magic;
long magic2;
int16_t magic3;
long filesize;
};
#pragma pack(pop)
#else
struct header_struct { long magic, magic2; int16 magic3; long filesize; } __attribute__ ((packed));
#endif
void
encode(FILE *f,FILE *out)
{
int16 ch,i,run,len,match,size,mask;
char buf[17];
buffer=malloc(N+F+(N+1+N+N+256)*sizeof(int16)); // 28.5 k !
if(buffer)
{
#ifdef MSEXPAND
struct header_struct header={0};
header.magic=0x44445A53L; // SZDD
header.magic2=0x3327F088L;
header.magic3=0x0041;
header.filesize=filelength(fileno(f));
fwrite(&header,sizeof(header),1,out);
#endif
node=(int16 *)(buffer+N+F);
for(i=0;i<256;i++) root[i]=NIL;
for(i=NIL;i<N;i++) dad[i]=NIL;
size=mask=1;
buf[0]=0;
i=N-F-F;
for(len=0;len<F&&(ch=getc(f))!=-1;len++)
{
buffer[i+F]=ch;
i=(i+1)&(N-1);
}
run=len;
do
{
ch=getc(f);
if(i>=N-F)
{
delete(i+F-N);
buffer[i+F]=buffer[i+F-N]=ch;
}
else
{
delete(i+F);
buffer[i+F]=ch;
}
match=insert(i,run);
if(ch==-1)
{
run--;
len--;
}
if(len++>=run)
{
if(match>=THRESHOLD)
{
#ifdef MSEXPAND
buf[size++]=pos;
buf[size++]=((pos>>4)&0xF0)+(match-3);
#else
buf[0]|=mask;
*(int16 *)(buf+size)=((match-3)<<12)|((i-pos-1)&(N-1));
size+=2;
#endif
len-=match;
}
else
{
#ifdef MSEXPAND
buf[0]|=mask;
#endif
buf[size++]=buffer[i];
len--;
}
if(!((mask+=mask)&0xFF))
{
fwrite(buf,size,1,out);
size=mask=1;
buf[0]=0;
}
}
i=(i+1)&(N-1);
}
while(len>0);
if(size>1) fwrite(buf,size,1,out);
free(buffer);
}
}
void
decode(FILE *f,FILE *out)
{
int16 bits,ch,i,j,len,mask;
char *buffer;
#ifdef MSEXPAND
struct header_struct header;
i=fread(&header,1,sizeof(header),f);
if(i!=sizeof(header)||header.magic!=0x44445A53L||header.magic2!=0x3327F088L||header.magic3!=0x0041)
{
fwrite(&header,1,i,out);
while((ch=getc(f))!=-1) putc(ch,out);
return;
}
#endif
buffer=malloc(N);
if(buffer)
{
i=N-F;
while((bits=getc(f))!=-1)
{
for(mask=0x01;mask&0xFF;mask<<=1)
{
#ifdef MSEXPAND
if(!(bits&mask))
{
j=getc(f);
if(j==-1) break;
len=getc(f);
j+=(len&0xF0)<<4;
len=(len&15)+3;
#else
if(bits&mask)
{
j=getw(f);
len=((j>>12)&15)+3;
j=(i-j-1)&(N-1);
#endif
while(len--)
{
putc(buffer[i]=buffer[j],out);
j=(j+1)&(N-1);
i=(i+1)&(N-1);
}
}
else
{
ch=getc(f);
#ifndef MSEXPAND
if(ch==-1) break;
#endif
putc(buffer[i]=ch,out);
i=(i+1)&(N-1);
}
}
}
free(buffer);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment