textfiles/computers/regan.lst

314 lines
11 KiB
Plaintext
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

_LZW REVISITED_
by Shawn M. Regan
[LISTING ONE]
/* Basic LZW Data Compression program published in DDJ October 1989 issue.
* Original Author: Mark R. Nelson
* Updated by: Shawn M. Regan, January 1990
* Added: - Method to clear table when compression ratio degrades
* - Self adjusting code size capability (up to 14 bits)
* Updated functions are marked with "MODIFIED". main() has been updated also
* Compile with -ml (large model) for MAX_BITS == 14 only
*/
#include <stdio.h>
#define INIT_BITS 9
#define MAX_BITS 14 /* Do not exceed 14 with this program */
#define HASHING_SHIFT MAX_BITS - 8
#if MAX_BITS == 14 /* Set the table size. Must be a prime */
#define TABLE_SIZE 18041 /* number somewhat larger than 2^MAX_BITS.*/
#elif MAX_BITS == 13
#define TABLE_SIZE 9029
#else
#define TABLE_SIZE 5021
#endif
#define CLEAR_TABLE 256 /* Code to flush the string table */
#define TERMINATOR 257 /* To mark EOF Condition, instead of MAX_VALUE */
#define FIRST_CODE 258 /* First available code for code_value table */
#define CHECK_TIME 100 /* Check comp ratio every CHECK_TIME chars input */
#define MAXVAL(n) (( 1 <<( n )) -1) /* max_value formula macro */
unsigned input_code();
void *malloc();
int *code_value; /* This is the code value array */
unsigned int *prefix_code; /* This array holds the prefix codes */
unsigned char *append_character; /* This array holds the appended chars */
unsigned char decode_stack[4000]; /* This array holds the decoded string */
int num_bits=INIT_BITS; /* Starting with 9 bit codes */
unsigned long bytes_in=0,bytes_out=0; /* Used to monitor compression ratio */
int max_code; /* old MAX_CODE */
unsigned long checkpoint=CHECK_TIME; /* For compression ratio monitoring */
main(int argc, char *argv[])
{
FILE *input_file, *output_file, *lzw_file;
char input_file_name[81];
/* The three buffers for the compression phase. */
code_value=malloc(TABLE_SIZE*sizeof(unsigned int));
prefix_code=malloc(TABLE_SIZE*sizeof(unsigned int));
append_character=malloc(TABLE_SIZE*sizeof(unsigned char));
if (code_value==NULL || prefix_code==NULL || append_character==NULL) {
printf("Error allocating table space!\n");
exit(1);
}
/* Get the file name, open it, and open the LZW output file. */
if (argc>1)
strcpy(input_file_name,argv[1]);
else {
printf("Input file name: ");
scanf("%s",input_file_name);
}
input_file=fopen(input_file_name,"rb");
lzw_file=fopen("test.lzw","wb");
if (input_file == NULL || lzw_file == NULL) {
printf("Error opening files\n");
exit(1);
}
max_code = MAXVAL(num_bits); /* Initialize max_value & max_code */
compress(input_file,lzw_file); /* Call compression routine */
fclose(input_file);
fclose(lzw_file);
free(code_value); /* Needed only for compression */
lzw_file=fopen("test.lzw","rb");
output_file=fopen("test.out","wb");
if (lzw_file == NULL || output_file == NULL) {
printf("Error opening files\n");
exit(1);
}
num_bits=INIT_BITS; /* Re-initialize for expansion */
max_code = MAXVAL(num_bits);
expand(lzw_file,output_file); /* Call expansion routine */
fclose(lzw_file); /* Clean it all up */
fclose(output_file);
free(prefix_code);
free(append_character);
}
/* MODIFIED This is the new compression routine. The first two 9-bit codes
* have been reserved for communication between the compressor and expander.
*/
compress(FILE *input, FILE *output)
{
unsigned int next_code=FIRST_CODE;
unsigned int character;
unsigned int string_code;
unsigned int index;
int i, /* All purpose integer */
ratio_new, /* New compression ratio as a percentage */
ratio_old=100; /* Original ratio at 100% */
for (i=0;i<TABLE_SIZE;i++) /* Initialize the string table first */
code_value[i]=-1;
printf("Compressing\n");
string_code=getc(input); /* Get the first code */
/* This is the main compression loop. Notice when the table is full we try
* to increment the code size. Only when num_bits == MAX_BITS and the code
* value table is full do we start to monitor the compression ratio.
*/
while((character=getc(input)) != (unsigned)EOF) {
if (!(++bytes_in % 1000)) { /* Count input bytes and pacifier */
putchar('.');
}
index=find_match(string_code,character);
if (code_value[index] != -1)
string_code=code_value[index];
else {
if (next_code <= max_code ) {
code_value[index]=next_code++;
prefix_code[index]=string_code;
append_character[index]=character;
}
output_code(output,string_code); /* Send out current code */
string_code=character;
if (next_code > max_code) { /* Is table Full? */
if ( num_bits < MAX_BITS) { /* Any more bits? */
putchar('+');
max_code = MAXVAL(++num_bits); /* Increment code size then */
}
else if (bytes_in > checkpoint) { /* At checkpoint? */
if (num_bits == MAX_BITS ) {
ratio_new = bytes_out*100/bytes_in; /* New compression ratio */
if (ratio_new > ratio_old) { /* Has ratio degraded? */
output_code(output,CLEAR_TABLE); /* YES,flush string table */
putchar('C');
num_bits=INIT_BITS;
next_code=FIRST_CODE; /* Reset to FIRST_CODE */
max_code = MAXVAL(num_bits); /* Re-Initialize this stuff */
bytes_in = bytes_out = 0;
ratio_old=100; /* Reset compression ratio */
for (i=0;i<TABLE_SIZE;i++) /* Reset code value array */
code_value[i]=-1;
}
else /* NO, then save new */
ratio_old = ratio_new; /* compression ratio */
}
checkpoint = bytes_in + CHECK_TIME; /* Set new checkpoint */
}
}
}
}
output_code(output,string_code); /* Output the last code */
if (next_code == max_code) { /* Handles special case for bit */
++num_bits; /* increment on EOF */
putchar('+');
}
output_code(output,TERMINATOR); /* Output the end of buffer code */
output_code(output,0); /* Flush the output buffer */
output_code(output,0);
output_code(output,0);
putchar('\n');
}
/* UNCHANGED from original
* This is the hashing routine.
*/
find_match(int hash_prefix, unsigned int hash_character)
{
int index, offset;
index = (hash_character << HASHING_SHIFT ) ^ hash_prefix;
if (index == 0 )
offset=1;
else
offset = TABLE_SIZE - index;
while(1) {
if (code_value[index] == -1 )
return(index);
if (prefix_code[index] == hash_prefix &&
append_character[index] == hash_character)
return(index);
index -= offset;
if (index < 0)
index += TABLE_SIZE;
}
}
/* MODIFIED This is the modified expansion routine. It must now check for the
* CLEAR_TABLE code and know when to increment the code size.
*/
expand(FILE *input, FILE *output)
{
unsigned int next_code=FIRST_CODE;
unsigned int new_code;
unsigned int old_code;
int character,
counter=0,
clear_flag=1; /* Need to clear the code value array */
unsigned char *string;
char *decode_string(unsigned char *buffer, unsigned int code);
printf("Expanding\n");
while((new_code=input_code(input)) != TERMINATOR) {
if (clear_flag) { /* Initialize or Re-Initialize */
clear_flag=0;
old_code=new_code; /* The next three lines have been moved */
character=old_code; /* from the original */
putc(old_code,output);
continue;
}
if (new_code == CLEAR_TABLE) { /* Clear string table */
clear_flag=1;
num_bits=INIT_BITS;
next_code=FIRST_CODE;
putchar('C');
max_code = MAXVAL(num_bits);
continue;
}
if (++counter == 1000) { /* Pacifier */
counter=0;
putchar('.');
}
if (new_code >= next_code) { /* Check for string+char+string */
*decode_stack=character;
string=decode_string(decode_stack+1,old_code);
}
else
string=decode_string(decode_stack,new_code);
character = *string; /* Output decoded string in reverse */
while (string >= decode_stack)
putc(*string--,output);
if (next_code <= max_code) { /* Add to string table if not full */
prefix_code[next_code]=old_code;
append_character[next_code++]=character;
if (next_code == max_code && num_bits < MAX_BITS) {
putchar('+');
max_code = MAXVAL(++num_bits);
}
}
old_code=new_code;
}
putchar('\n');
}
/* UNCHANGED from original
* Decode a string from the string table, storing it in a buffer.
* The buffer can then be output in reverse order by the expansion
* program.
*/
char *decode_string(unsigned char *buffer, unsigned int code)
{
int i=0;
while(code > 255 ) {
*buffer++ = append_character[code];
code=prefix_code[code];
if (i++ >= 4000 ) {
printf("Error during code expansion\n");
exit(1);
}
}
*buffer=code;
return(buffer);
}
/* UNCHANGED from original
* Input a variable length code.
*/
unsigned input_code(FILE *input)
{
unsigned int return_value;
static int input_bit_count=0;
static unsigned long input_bit_buffer=0L;
while (input_bit_count <= 24 ) {
input_bit_buffer |= (unsigned long) getc(input) << (24 - input_bit_count);
input_bit_count += 8;
}
return_value=input_bit_buffer >> (32-num_bits);
input_bit_buffer <<= num_bits;
input_bit_count -= num_bits;
return(return_value);
}
/* MODIFIED Output a variable length code.
*/
output_code(FILE *output, unsigned int code)
{
static int output_bit_count=0;
static unsigned long output_bit_buffer=0L;
output_bit_buffer |= (unsigned long) code << (32 - num_bits -
output_bit_count);
output_bit_count += num_bits;
while (output_bit_count >= 8) {
putc(output_bit_buffer >> 24, output);
output_bit_buffer <<= 8;
output_bit_count -= 8;
bytes_out++; /* ADDED for compression monitoring */
}
}