Created
December 29, 2023 10:36
-
-
Save odzhan/2165f2ac1c1394381c59e4fcf1929a68 to your computer and use it in GitHub Desktop.
SZDD compression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// LZ77 compression / decompression algorithm | |
// this is the compression Microsoft used in Windows *.HLP and *.MRB files | |
// It is also used with Install Shield files. These files are | |
// recognizable by the letters SZDD in the first 4 bytes. The file | |
// names for files compressed in this way are usually the name of the | |
// file as it would be installed but with the last character replaced | |
// by '_' | |
// This program is a complete hack. I am not responsible for the | |
// algorithm code in any way. I stole the compression code from | |
// somebody else who didn't put a blame line in the file so I've | |
// forgotten who he was. I just adapted it to run under linux from | |
// the command line. (D. Risacher) | |
// Implementation originally written by Daniel Robert Risacher | |
// Minor changes were made to make it compile without error using MSVC. | |
// There was also an error in the wrong file size being stored due to sign extension. | |
// NOTE: The original description indicates this is LZ77, but it's actually closer to LZSS. | |
// The compressed data from this program is compatible with Windows LZ API. | |
#define MSEXPAND | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <sys/stat.h> | |
#define N 4096 | |
#define F 16 | |
#define THRESHOLD 3 | |
#define dad (node+1) | |
#define lson (node+1+N) | |
#define rson (node+1+N+N) | |
#define root (node+1+N+N+N) | |
#define NIL -1 | |
#define int16 short | |
char *buffer; | |
int16 *node; | |
int16 pos; | |
void encode(FILE*,FILE*); | |
void decode(FILE*,FILE*); | |
int | |
main (int argc, char* argv[]) | |
{ | |
if ((argc != 4) || ((argv[1][0] != 'e') && (argv[1][0] != 'd'))) { | |
printf("usage: szdd [e/d] <infile> <outfile>\n"); | |
return 0; | |
} | |
int flag = (argv[1][0] == 'e'); | |
FILE *in = fopen(argv[2],"rb"); | |
FILE *out = fopen(argv[3],"wb"); | |
if (flag) { | |
encode(in, out); | |
} else { | |
decode(in, out); | |
} | |
return 0; | |
} | |
uint32_t filelength(int16 f) | |
{ | |
struct stat buf; | |
fstat(f, &buf); | |
return buf.st_size; | |
} | |
//#define min(x,y) (x>y?y:x) | |
int16 insert(int16 i,int16 run) | |
{ | |
int16 c,j,k,l,n,match; | |
int16 *p; | |
k=l=1; | |
match=THRESHOLD-1; | |
p=&root[(unsigned char)buffer[i]]; | |
lson[i]=rson[i]=NIL; | |
while((j=*p)!=NIL) | |
{ | |
for(n=min(k,l);n<run&&(c=(buffer[j+n]-buffer[i+n]))==0;n++) ; | |
if(n>match) | |
{ | |
match=n; | |
pos=j; | |
} | |
if(c<0) | |
{ | |
p=&lson[j]; | |
k=n; | |
} | |
else if(c>0) | |
{ | |
p=&rson[j]; | |
l=n; | |
} | |
else | |
{ | |
dad[j]=NIL; | |
dad[lson[j]]=lson+i-node; | |
dad[rson[j]]=rson+i-node; | |
lson[i]=lson[j]; | |
rson[i]=rson[j]; | |
break; | |
} | |
} | |
dad[i]=p-node; | |
*p=i; | |
return match; | |
} | |
void delete(int16 z) | |
{ | |
int16 j; | |
if(dad[z]!=NIL) | |
{ | |
if(rson[z]==NIL) | |
{ | |
j=lson[z]; | |
} | |
else if(lson[z]==NIL) | |
{ | |
j=rson[z]; | |
} | |
else | |
{ | |
j=lson[z]; | |
if(rson[j]!=NIL) | |
{ | |
do | |
{ | |
j=rson[j]; | |
} | |
while(rson[j]!=NIL); | |
node[dad[j]]=lson[j]; | |
dad[lson[j]]=dad[j]; | |
lson[j]=lson[z]; | |
dad[lson[z]]=lson+j-node; | |
} | |
rson[j]=rson[z]; | |
dad[rson[z]]=rson+j-node; | |
} | |
dad[j]=dad[z]; | |
node[dad[z]]=j; | |
dad[z]=NIL; | |
} | |
} | |
#if defined(_MSC_VER) | |
#pragma pack(push, 1) | |
struct header_struct { | |
long magic; | |
long magic2; | |
int16_t magic3; | |
long filesize; | |
}; | |
#pragma pack(pop) | |
#else | |
struct header_struct { long magic, magic2; int16 magic3; long filesize; } __attribute__ ((packed)); | |
#endif | |
void | |
encode(FILE *f,FILE *out) | |
{ | |
int16 ch,i,run,len,match,size,mask; | |
char buf[17]; | |
buffer=malloc(N+F+(N+1+N+N+256)*sizeof(int16)); // 28.5 k ! | |
if(buffer) | |
{ | |
#ifdef MSEXPAND | |
struct header_struct header={0}; | |
header.magic=0x44445A53L; // SZDD | |
header.magic2=0x3327F088L; | |
header.magic3=0x0041; | |
header.filesize=filelength(fileno(f)); | |
fwrite(&header,sizeof(header),1,out); | |
#endif | |
node=(int16 *)(buffer+N+F); | |
for(i=0;i<256;i++) root[i]=NIL; | |
for(i=NIL;i<N;i++) dad[i]=NIL; | |
size=mask=1; | |
buf[0]=0; | |
i=N-F-F; | |
for(len=0;len<F&&(ch=getc(f))!=-1;len++) | |
{ | |
buffer[i+F]=ch; | |
i=(i+1)&(N-1); | |
} | |
run=len; | |
do | |
{ | |
ch=getc(f); | |
if(i>=N-F) | |
{ | |
delete(i+F-N); | |
buffer[i+F]=buffer[i+F-N]=ch; | |
} | |
else | |
{ | |
delete(i+F); | |
buffer[i+F]=ch; | |
} | |
match=insert(i,run); | |
if(ch==-1) | |
{ | |
run--; | |
len--; | |
} | |
if(len++>=run) | |
{ | |
if(match>=THRESHOLD) | |
{ | |
#ifdef MSEXPAND | |
buf[size++]=pos; | |
buf[size++]=((pos>>4)&0xF0)+(match-3); | |
#else | |
buf[0]|=mask; | |
*(int16 *)(buf+size)=((match-3)<<12)|((i-pos-1)&(N-1)); | |
size+=2; | |
#endif | |
len-=match; | |
} | |
else | |
{ | |
#ifdef MSEXPAND | |
buf[0]|=mask; | |
#endif | |
buf[size++]=buffer[i]; | |
len--; | |
} | |
if(!((mask+=mask)&0xFF)) | |
{ | |
fwrite(buf,size,1,out); | |
size=mask=1; | |
buf[0]=0; | |
} | |
} | |
i=(i+1)&(N-1); | |
} | |
while(len>0); | |
if(size>1) fwrite(buf,size,1,out); | |
free(buffer); | |
} | |
} | |
void | |
decode(FILE *f,FILE *out) | |
{ | |
int16 bits,ch,i,j,len,mask; | |
char *buffer; | |
#ifdef MSEXPAND | |
struct header_struct header; | |
i=fread(&header,1,sizeof(header),f); | |
if(i!=sizeof(header)||header.magic!=0x44445A53L||header.magic2!=0x3327F088L||header.magic3!=0x0041) | |
{ | |
fwrite(&header,1,i,out); | |
while((ch=getc(f))!=-1) putc(ch,out); | |
return; | |
} | |
#endif | |
buffer=malloc(N); | |
if(buffer) | |
{ | |
i=N-F; | |
while((bits=getc(f))!=-1) | |
{ | |
for(mask=0x01;mask&0xFF;mask<<=1) | |
{ | |
#ifdef MSEXPAND | |
if(!(bits&mask)) | |
{ | |
j=getc(f); | |
if(j==-1) break; | |
len=getc(f); | |
j+=(len&0xF0)<<4; | |
len=(len&15)+3; | |
#else | |
if(bits&mask) | |
{ | |
j=getw(f); | |
len=((j>>12)&15)+3; | |
j=(i-j-1)&(N-1); | |
#endif | |
while(len--) | |
{ | |
putc(buffer[i]=buffer[j],out); | |
j=(j+1)&(N-1); | |
i=(i+1)&(N-1); | |
} | |
} | |
else | |
{ | |
ch=getc(f); | |
#ifndef MSEXPAND | |
if(ch==-1) break; | |
#endif | |
putc(buffer[i]=ch,out); | |
i=(i+1)&(N-1); | |
} | |
} | |
} | |
free(buffer); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment