Skip to content

Instantly share code, notes, and snippets.

@tig
Created February 13, 2012 02:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tig/1812875 to your computer and use it in GitHub Desktop.
Save tig/1812875 to your computer and use it in GitHub Desktop.
COMPRESS - An LZW file compressor written in 1987
/*
compress.c (c) Charles E. Kindel, Jr., 1987
COMPRESS - An ASCII file compress program
Author: Charles E. Kindel, Jr. (tigger)
Started: June 14, 1987
Version: 2.0
SYNOPSIS
compress [-v] [-a]
uncompress [-v] [-a]
-v Write compress statistics to standard error.
-a Codes are written (read) in ASCII hex format.
DESCRIPTION
Compress reduces the size of the standard input file using adaptive
Lempel-Ziv coding. Standard input and output are used exclusively by
the program. Compressed files can be restored to their original form
using uncompress.
Compress uses the modified Lempel-Ziv algorithm popularized in "A
Technique for High Performance Data Compression", Terry A. Welch, IEEE
Computer, vol. 17, no. 6 (June 1984), pp. 8-19. Common substrings in
the file are first replaced by 12-bit codes 128 to 4094.
After the code limit is attained, compress automatically discards the
current table and starts from scratch. The amount of compression
obtained depends on the size of the input, the number of bits per code,
and the distribution of common substrings. Typically, text such as
source code or English is reduced by 50-60%. Compression is generally
much better than that achieved by Huffman coding (as used in pack),
or adaptive Huffman coding (compact), and takes less time to compute.
A message is generated to standard error if the size of the original
file is not reduced. Exit status is normally 0; if the last file is
larger after (attempted) compression, the status is 2; if an error
occurs, exit status is 1.
Code output is normally packed one code to every 1.5 bytes. With the
-a option, codes are represented by three HEX bytes (ASCII) separated
by spaces.
EXAMPLES
compress -av <infile.txt >outfile.lpz
Compresses infile.txt to outifile.lpz using ASCII code format.
Prints out a listing of diagnostics.
uncompress <infile.lpz >outfile.txt
Uncompresses infile.lpz to outfile.txt using normal code format.
MESSAGES
On an invalid command line the above synopsis is printed to stderr.
infile: not in compressed format (file directed through standard input
has not been compressed).
infile: not an ASCII file. (Character codes greater than 127 were
encountered in the input. File to be compressed already
compressed).
LIMITATIONS
Output is not always smaller than the input. Only one size code is
allowed (12 bits). Only ASCII files may be compressed.
FACILITY
Originally written on an IBM PC/XT using TurboC and Dos 3.1. Adapted
to Unix 4.2 bsd.
NOTES
To get this program to run on an IBM PC you must modify the main so
that you can fetch the options from the command line. Also errors will
go to standard output with TurboC, so an error file might be helpful.
REVISIONS
DATE VERSION NAME CHANGE
June 14, 1987 1.0 C.E. Kindel Original
June 15, 1987 1.1 C.E. Kindel Incorperated getopt
June 18, 1987 1.2 C.E. Kindel added find_or_insert,
compress, and clear_table.
June 22, 1987 1.3 C.E. Kindel Added uncompress.
June 23, 1987 1.4 C.E. Kindel Ported to UNIX.
June 23, 1987 1.5 C.E. Kindel Debugged -a option, made an
attempt to get compressed
codes to work.
June 23, 1987 2.0 C.E. Kindel Fine tuned code, got verbose
to work correctly. Time.
Final documentation.
*/
/*IMPORTS */
#include <stdio.h> /* printf, fprintf */
#include <ctype.h> /* toupper */
#include <time.h> /* time */
/* LOCAL DECLARATIONS */
#define void
static compress ();
static uncompress ();
static short int find_or_insert ();
static clear_table ();
static putcode ();
static short int getcode ();
static void verbose ();
/* LOCAL SYMBOLIC DEFINITIONS */
/* flags */
#define COMMAND 0 /* argv index for command */
#define NORMAL 0 /* normal program exit */
#define ERROR 1 /* error program exit */
#define NOTCOMPRESSED 2 /* input bytes < output bytes */
#define TRUE 1 /* true */
#define FALSE 0 /* false */
#define START 128 /* beginiing of the table */
#define ASCIILIMIT 127 /* where ascii chars stop */
#define NOT_FOUND 0xfff /* not found flag */
#define END 0xfff /* end flag */
#define CLEARED 0 /* table cleared flag */
#define MAXTABLE 4095 /* max table entry */
#define T_SIZE 4096 /* table size */
#define CODE_SIZE 3 /* size of compact code in bytes */
/* DATA DEFINITIONS */
static long int elapsed; /* elapsed time */
static short int aflag; /* flag for -a option (ASCII) */
static short int status; /* return status for program
0 = normal, 1 = error, 2 = notcompressed */
static int pair; /* has a pair been output yet */
static short int prefix [T_SIZE]; /* prefix table */
static char suffix [T_SIZE]; /* suffix table */
static short int link [T_SIZE]; /* link table */
static short int head [T_SIZE]; /* headers for link table */
static short int current; /* current table entry */
static char next_c; /* next input character */
static short int c_prefix; /* current prefix */
static float in_bytes; /* number of input bytes */
static float out_bytes; /* number of output bytes */
static char pcode [CODE_SIZE]; /* output codes */
static short int c_table_count; /* #times table cleared */
static short int largest_code; /* largest code output since reset */
/* FUNCTIONS */
/*--------------------------------------------------------------------------*/
/* main program */
/* */
/* Gets arguments from command line, calls compress, or uncompress, and */
/* verbose according to command line. */
/*--------------------------------------------------------------------------*/
int main (argc, argv)
int argc;
char **argv;
{
static int comp; /* flag for compress or uncompress */
static int vflag; /* flag for -v option (verbose) */
static char c; /* option letter */
elapsed = time ((long*) 0); /* get the start time */
if (toupper (*argv[COMMAND]) == 'C') /* 1st char of command */
comp = TRUE; /* to compress. */
else
comp = FALSE; /* to uncompress */
/* command line options */
while ((c = getopt (argc, argv, "avu")) != EOF)
switch (c)
{ case 'a': aflag = TRUE; break;
case 'v': vflag = TRUE; break;
default: /* whoops! bad option. */
fprintf (stderr, "Usage:\n compress [-v] [-a]\n");
fprintf (stderr, " uncompress [-v] [-a]\n");
fprintf (stderr,
" -v Write compress statistics to standard error.\n");
fprintf (stderr,
" -a Codes are written (read) in ASCII hex format.\n");
exit (ERROR); /* send a 1 to Unix */
break;
}
if (comp) /* if were to compress */
compress ();
else /* else were to uncompress */
uncompress ();
if (vflag) /* if -v option then spit it out */
verbose ();
return (status); /* 0 = norm, 1 = err, 2 = in < out */
}
/*--------------------------------------------------------------------------*/
/* End of main program */
/*==========================================================================*/
/* compress */
/* */
/* Uses Lepel-Ziv compression to compress an ASCII file. Calls putcode */
/* to output codes according to flags. */
/* uses find_or_insert to search in the string table for an entry, if the */
/* entry is not found it is inserted. find_or_insert also calls */
/* clear table. */
/*--------------------------------------------------------------------------*/
static void compress ()
{ static short int code; /* code to be outputted */
clear_table (); /* clear the tables */
c_table_count = 0;
largest_code = START; /* largest code since t. clear */
in_bytes = 0; /* number of input bytes */
out_bytes = 0; /* number of output bytes */
pair = FALSE; /* output a pair of codes? */
if ((next_c = getchar ()) != EOF) /* if not End Of input */
{ in_bytes++; /* increment input bytes */
current = START; /* point to first table entry (128) */
c_prefix = next_c;
while ((next_c = getchar ()) != EOF) /* while not at EOF */
{ if (next_c > ASCIILIMIT) /* if its not an ascii char */
{ fprintf (stderr, "infile: not an ASCII file.\n");
exit (ERROR);
}
in_bytes ++; /* increment input byte count */
switch ((code = find_or_insert ())) /* find or insert entry */
{ case NOT_FOUND: /* entry was inserted */
putcode (c_prefix);
c_prefix = next_c;
break;
case CLEARED: /* table was cleared */
c_table_count++; /* ct. table clears */
current = START;
putcode (c_prefix);
putcode (CLEARED);
c_prefix = next_c;
break;
default: /* entry was found, keep looking */
c_prefix = code;
break;
}
}
putcode (c_prefix);
putcode (END);
putcode (END);
}
return;
}
/*--------------------------------------------------------------------------*/
/* end of compress */
/*==========================================================================*/
/* find_or_insert */
/* */
/* Used by compress to find codes in the string table. If a entry is not */
/* found it is inserted and NOT_FOUND is returned. If there is no more */
/* room in table the table is cleared and CLEARED is returned. If the */
/* entry is found then its location is returned. */
/*--------------------------------------------------------------------------*/
static short int find_or_insert ()
{
static int x; /* table index */
x = head [c_prefix]; /* first entry with this prefix */
while (x != END)
if (suffix [x] == next_c) /* if suffix at entry is same as c */
return (x); /* it exists, so we keep looking */
else
x = link [x]; /* else we look at next link */
if (current <= MAXTABLE) /* if table isnt full */
{ prefix [current] = c_prefix; /* install prefix */
suffix [current] = next_c; /* and suffix */
if (head [c_prefix] != END) /* another entry with this prefix? */
{ x = head [c_prefix]; /* yes, look for it */
while (link [x] != END) /* while not the last one */
x = link [x]; /* look at the next */
link [x] = current; /* point the last one to cur entry */
}
else /* otherwise point the head to */
head [c_prefix] = current; /* current entry */
largest_code = ++current; /* increment current entry */
return (NOT_FOUND); /* tell em we had to insert new one */
}
else
{ clear_table (); /* if table is full clear it */
return (CLEARED);
}
}
/*--------------------------------------------------------------------------*/
/* end of find_or_insert */
/*==========================================================================*/
/* uncompress */
/* */
/* Uncompresses a file using Lemel-Ziv coding. Calls getcode to output */
/* codes. If it gets a 0x000 as a code it clears the table. It reads */
/* until it gets an 0xFFF as a code. */
/*--------------------------------------------------------------------------*/
static void uncompress ()
{
static char string [T_SIZE];
static short int code;
static short int next_entry;
static char *string_p;
pair = FALSE;
c_table_count = 0; /* #times table was cleared */
in_bytes = 0;
out_bytes = 0;
clear_table ();
string [MAXTABLE] = '\0'; /* put a null at the end of string */
next_entry = START; /* point to the beg. of the table */
code = getcode ();
while (code != END) /* while code is not last one */
{ prefix [next_entry] = code; /* add it to next entry */
current = next_entry; /* point to this entry */
string_p = &string [MAXTABLE]; /* init. string */
while (prefix [current] != END) /* while there are more chars left */
{ current = prefix [current]; /* in string.... */
if ((current + 1) == next_entry) /* if special case */
suffix [current] = suffix [current - 1];
else if ((current + 1) > next_entry)
{ fprintf (stderr,
"infile: not in compressed format.");
exit (ERROR);
}
string_p--; /* add char. to string */
*string_p = suffix [current];
out_bytes++;
}
fputs (string_p, stdout);
if (next_entry > START) /* if not first code */
suffix [next_entry - 1] = *string_p;
next_entry++;
code = getcode ();
if (code == CLEARED)
{ clear_table ();
c_table_count++;
next_entry = START;
code = getcode ();
}
}
}
/*--------------------------------------------------------------------------*/
/* end of uncompress */
/*==========================================================================*/
/* clear_table */
/* */
/* clears the string table to 0xFFF's where appropriate. */
/*--------------------------------------------------------------------------*/
static void clear_table ()
{
static short int i; /* counter */
for (i = 0; i <= MAXTABLE; i++) /* for i = 0 to 4095 */
{ if (i <= ASCIILIMIT) /* if < 127 then we can put an ASCII */
suffix [i] = i; /* char in suffix */
else
suffix [i] = END; /* otherwise suffix = fff */
prefix [i] = link [i] = head [i] = END;
}
}
/*--------------------------------------------------------------------------*/
/* end of clear_table */
/*==========================================================================*/
/* putcode */
/* */
/* Outputs to standard out an output code. */
/*--------------------------------------------------------------------------*/
static void putcode (outcode)
short int outcode;
{
if (aflag == TRUE)
{ printf ("%03x ", outcode);
out_bytes ++;
}
else
{ if (pair == TRUE)
{ pcode[2] |= outcode << 4;
pcode[3] = outcode >> 4;
printf ("%c%c%c", pcode[1],pcode[2],pcode[3]);
out_bytes += CODE_SIZE;
pair = FALSE;
}
else
{ pcode[1] = outcode;
pcode[2] = outcode >> 8;
pair = TRUE;
}
}
return;
}
/*--------------------------------------------------------------------------*/
/* end of putcode */
/*==========================================================================*/
/* getcode */
/*--------------------------------------------------------------------------*/
static short int getcode ()
{ static short int incode;
if (aflag == TRUE)
{ fscanf (stdin, "%03x ", &incode);
in_bytes ++;
}
else
{ if (pair == TRUE) /* then were on the 2nd code */
{ incode = ((pcode[2] >> 4) & 0x000F) | (pcode[3] << 4) & END;
pair = FALSE;
}
else /* we need to read in a packed code */
{ fscanf (stdin, "%c%c%c", &pcode[1],&pcode[2],&pcode[3]);
incode = ((pcode[2] << 8) | (pcode[1] & 0x00FF)) & END;
in_bytes += CODE_SIZE;
pair = TRUE;
}
}
return (incode);
}
/*--------------------------------------------------------------------------*/
/* end of getcode */
/*==========================================================================*/
/* verbose */
/*--------------------------------------------------------------------------*/
static void verbose ()
{ static float ratio;
ratio = ((out_bytes/in_bytes)*100.);
elapsed = time ((long*) 0) - elapsed;
fprintf (stderr, "\nCompression ratio = %6.2f%%\n", ratio);
fprintf (stderr, "Input bytes = %6.0f\n", in_bytes);
fprintf (stderr, "Output bytes = %6.0f\n", out_bytes);
fprintf (stderr, "Cleared string table %d times.\n", c_table_count);
fprintf (stderr, "Elapsed time = %d seconds.\n", elapsed);
fprintf (stderr, "\n\n");
}
/*--------------------------------------------------------------------------*/
/* end of verbose */
/*--------------------------------------------------------------------------*/
/*==========================================================================*/
/* END OF COMPRESS / UNCOMPRESS (C) KindelCo Software Systems, 1987 */
/*==========================================================================*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment