337 lines
12 KiB
C
337 lines
12 KiB
C
|
/*
|
||
|
Copyright (c) 1990-2008 Info-ZIP. All rights reserved.
|
||
|
|
||
|
See the accompanying file LICENSE, version 2000-Apr-09 or later
|
||
|
(the contents of which are also included in unzip.h) for terms of use.
|
||
|
If, for some reason, all these files are missing, the Info-ZIP license
|
||
|
also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
|
||
|
*/
|
||
|
/*---------------------------------------------------------------------------
|
||
|
|
||
|
unshrink.c version 1.22 19 Mar 2008
|
||
|
|
||
|
|
||
|
NOTE: This code may or may not infringe on the so-called "Welch
|
||
|
patent" owned by Unisys. (From reading the patent, it appears
|
||
|
that a pure LZW decompressor is *not* covered, but this claim has
|
||
|
not been tested in court, and Unisys is reported to believe other-
|
||
|
wise.) It is therefore the responsibility of the user to acquire
|
||
|
whatever license(s) may be required for legal use of this code.
|
||
|
|
||
|
THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
|
||
|
IN VIOLATION OF APPLICABLE PATENT LAW.
|
||
|
|
||
|
|
||
|
Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
|
||
|
up to 13 bits; in addition, there is provision for partial clearing of
|
||
|
leaf nodes. PKWARE uses the special code 256 (decimal) to indicate a
|
||
|
change in code size or a partial clear of the code tree: 256,1 for the
|
||
|
former and 256,2 for the latter. [Note that partial clearing can "orphan"
|
||
|
nodes: the parent-to-be can be cleared before its new child is added,
|
||
|
but the child is added anyway (as an orphan, as though the parent still
|
||
|
existed). When the tree fills up to the point where the parent node is
|
||
|
reused, the orphan is effectively "adopted." Versions prior to 1.05 were
|
||
|
affected more due to greater use of pointers (to children and siblings
|
||
|
as well as parents).]
|
||
|
|
||
|
This replacement version of unshrink.c was written from scratch. It is
|
||
|
based only on the algorithms described in Mark Nelson's _The Data Compres-
|
||
|
sion Book_ and in Terry Welch's original paper in the June 1984 issue of
|
||
|
IEEE _Computer_; no existing source code, including any in Nelson's book,
|
||
|
was used.
|
||
|
|
||
|
Memory requirements have been reduced in this version and are now no more
|
||
|
than the original Sam Smith code. This is still larger than any of the
|
||
|
other algorithms: at a minimum, 8K+8K+16K (stack+values+parents) assuming
|
||
|
16-bit short ints, and this does not even include the output buffer (the
|
||
|
other algorithms leave the uncompressed data in the work area, typically
|
||
|
called slide[]). For machines with a 64KB data space this is a problem,
|
||
|
particularly when text conversion is required and line endings have more
|
||
|
than one character. UnZip's solution is to use two roughly equal halves
|
||
|
of outbuf for the ASCII conversion in such a case; the "unshrink" argument
|
||
|
to flush() signals that this is the case.
|
||
|
|
||
|
For large-memory machines, a second outbuf is allocated for translations,
|
||
|
but only if unshrinking and only if translations are required.
|
||
|
|
||
|
| binary mode | text mode
|
||
|
---------------------------------------------------
|
||
|
big mem | big outbuf | big outbuf + big outbuf2 <- malloc'd here
|
||
|
small mem | small outbuf | half + half small outbuf
|
||
|
|
||
|
Copyright 1994, 1995 Greg Roelofs. See the accompanying file "COPYING"
|
||
|
in UnZip 5.20 (or later) source or binary distributions.
|
||
|
|
||
|
---------------------------------------------------------------------------*/
|
||
|
|
||
|
|
||
|
#define __UNSHRINK_C /* identifies this source module */
|
||
|
#define UNZIP_INTERNAL
|
||
|
#include "unzip.h"
|
||
|
|
||
|
|
||
|
#ifndef LZW_CLEAN
|
||
|
|
||
|
static void partial_clear OF((__GPRO__ int lastcodeused));
|
||
|
|
||
|
#ifdef DEBUG
|
||
|
# define OUTDBG(c) \
|
||
|
if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr);
|
||
|
#else
|
||
|
# define OUTDBG(c)
|
||
|
#endif
|
||
|
|
||
|
/* HSIZE is defined as 2^13 (8192) in unzip.h (resp. unzpriv.h */
|
||
|
#define BOGUSCODE 256
|
||
|
#define FLAG_BITS parent /* upper bits of parent[] used as flag bits */
|
||
|
#define CODE_MASK (HSIZE - 1) /* 0x1fff (lower bits are parent's index) */
|
||
|
#define FREE_CODE HSIZE /* 0x2000 (code is unused or was cleared) */
|
||
|
#define HAS_CHILD (HSIZE << 1) /* 0x4000 (code has a child--do not clear) */
|
||
|
|
||
|
#define parent G.area.shrink.Parent
|
||
|
#define Value G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
|
||
|
#define stack G.area.shrink.Stack
|
||
|
|
||
|
|
||
|
/***********************/
|
||
|
/* Function unshrink() */
|
||
|
/***********************/
|
||
|
|
||
|
int unshrink(__G)
|
||
|
__GDEF
|
||
|
{
|
||
|
uch *stacktop = stack + (HSIZE - 1);
|
||
|
register uch *newstr;
|
||
|
uch finalval;
|
||
|
int codesize=9, len, error;
|
||
|
shrint code, oldcode, curcode;
|
||
|
shrint lastfreecode;
|
||
|
unsigned int outbufsiz;
|
||
|
#if (defined(DLL) && !defined(NO_SLIDE_REDIR))
|
||
|
/* Normally realbuf and outbuf will be the same. However, if the data
|
||
|
* are redirected to a large memory buffer, realbuf will point to the
|
||
|
* new location while outbuf will remain pointing to the malloc'd
|
||
|
* memory buffer. */
|
||
|
uch *realbuf = G.outbuf;
|
||
|
#else
|
||
|
# define realbuf G.outbuf
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/*---------------------------------------------------------------------------
|
||
|
Initialize various variables.
|
||
|
---------------------------------------------------------------------------*/
|
||
|
|
||
|
lastfreecode = BOGUSCODE;
|
||
|
|
||
|
#ifndef VMS /* VMS uses its own buffer scheme for textmode flush(). */
|
||
|
#ifndef SMALL_MEM
|
||
|
/* non-memory-limited machines: allocate second (large) buffer for
|
||
|
* textmode conversion in flush(), but only if needed */
|
||
|
if (G.pInfo->textmode && !G.outbuf2 &&
|
||
|
(G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL)
|
||
|
return PK_MEM3;
|
||
|
#endif
|
||
|
#endif /* !VMS */
|
||
|
|
||
|
for (code = 0; code < BOGUSCODE; ++code) {
|
||
|
Value[code] = (uch)code;
|
||
|
parent[code] = BOGUSCODE;
|
||
|
}
|
||
|
for (code = BOGUSCODE+1; code < HSIZE; ++code)
|
||
|
parent[code] = FREE_CODE;
|
||
|
|
||
|
#if (defined(DLL) && !defined(NO_SLIDE_REDIR))
|
||
|
if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */
|
||
|
realbuf = G.redirect_buffer;
|
||
|
outbufsiz = (unsigned)G.redirect_size;
|
||
|
} else
|
||
|
#endif
|
||
|
#ifdef DLL
|
||
|
if (G.pInfo->textmode && !G.redirect_data)
|
||
|
#else
|
||
|
if (G.pInfo->textmode)
|
||
|
#endif
|
||
|
outbufsiz = RAWBUFSIZ;
|
||
|
else
|
||
|
outbufsiz = OUTBUFSIZ;
|
||
|
G.outptr = realbuf;
|
||
|
G.outcnt = 0L;
|
||
|
|
||
|
/*---------------------------------------------------------------------------
|
||
|
Get and output first code, then loop over remaining ones.
|
||
|
---------------------------------------------------------------------------*/
|
||
|
|
||
|
READBITS(codesize, oldcode)
|
||
|
if (G.zipeof)
|
||
|
return PK_OK;
|
||
|
|
||
|
finalval = (uch)oldcode;
|
||
|
OUTDBG(finalval)
|
||
|
*G.outptr++ = finalval;
|
||
|
++G.outcnt;
|
||
|
|
||
|
while (TRUE) {
|
||
|
READBITS(codesize, code)
|
||
|
if (G.zipeof)
|
||
|
break;
|
||
|
if (code == BOGUSCODE) { /* possible to have consecutive escapes? */
|
||
|
READBITS(codesize, code)
|
||
|
if (G.zipeof)
|
||
|
break;
|
||
|
if (code == 1) {
|
||
|
++codesize;
|
||
|
Trace((stderr, " (codesize now %d bits)\n", codesize));
|
||
|
if (codesize > MAX_BITS) return PK_ERR;
|
||
|
} else if (code == 2) {
|
||
|
Trace((stderr, " (partial clear code)\n"));
|
||
|
/* clear leafs (nodes with no children) */
|
||
|
partial_clear(__G__ lastfreecode);
|
||
|
Trace((stderr, " (done with partial clear)\n"));
|
||
|
lastfreecode = BOGUSCODE; /* reset start of free-node search */
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/*-----------------------------------------------------------------------
|
||
|
Translate code: traverse tree from leaf back to root.
|
||
|
-----------------------------------------------------------------------*/
|
||
|
|
||
|
newstr = stacktop;
|
||
|
curcode = code;
|
||
|
|
||
|
if (parent[code] == FREE_CODE) {
|
||
|
/* or (FLAG_BITS[code] & FREE_CODE)? */
|
||
|
Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
|
||
|
oldcode));
|
||
|
*newstr-- = finalval;
|
||
|
code = oldcode;
|
||
|
}
|
||
|
|
||
|
while (code != BOGUSCODE) {
|
||
|
if (newstr < stack) {
|
||
|
/* Bogus compression stream caused buffer underflow! */
|
||
|
Trace((stderr, "unshrink stack overflow!\n"));
|
||
|
return PK_ERR;
|
||
|
}
|
||
|
if (parent[code] == FREE_CODE) {
|
||
|
/* or (FLAG_BITS[code] & FREE_CODE)? */
|
||
|
Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n",
|
||
|
code, oldcode));
|
||
|
*newstr-- = finalval;
|
||
|
code = oldcode;
|
||
|
} else {
|
||
|
*newstr-- = Value[code];
|
||
|
code = (shrint)(parent[code] & CODE_MASK);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
len = (int)(stacktop - newstr++);
|
||
|
finalval = *newstr;
|
||
|
|
||
|
/*-----------------------------------------------------------------------
|
||
|
Write expanded string in reverse order to output buffer.
|
||
|
-----------------------------------------------------------------------*/
|
||
|
|
||
|
Trace((stderr,
|
||
|
"code %4d; oldcode %4d; char %3d (%c); len %d; string [", curcode,
|
||
|
oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr,
|
||
|
len));
|
||
|
|
||
|
{
|
||
|
register uch *p;
|
||
|
|
||
|
for (p = newstr; p < newstr+len; ++p) {
|
||
|
*G.outptr++ = *p;
|
||
|
OUTDBG(*p)
|
||
|
if (++G.outcnt == outbufsiz) {
|
||
|
Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt));
|
||
|
if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
|
||
|
Trace((stderr, "unshrink: flush() error (%d)\n",
|
||
|
error));
|
||
|
return error;
|
||
|
}
|
||
|
G.outptr = realbuf;
|
||
|
G.outcnt = 0L;
|
||
|
Trace((stderr, "done with flush()\n"));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*-----------------------------------------------------------------------
|
||
|
Add new leaf (first character of newstr) to tree as child of oldcode.
|
||
|
-----------------------------------------------------------------------*/
|
||
|
|
||
|
/* search for freecode */
|
||
|
code = (shrint)(lastfreecode + 1);
|
||
|
/* add if-test before loop for speed? */
|
||
|
while ((code < HSIZE) && (parent[code] != FREE_CODE))
|
||
|
++code;
|
||
|
lastfreecode = code;
|
||
|
Trace((stderr, "]; newcode %d\n", code));
|
||
|
if (code >= HSIZE)
|
||
|
/* invalid compressed data caused max-code overflow! */
|
||
|
return PK_ERR;
|
||
|
|
||
|
Value[code] = finalval;
|
||
|
parent[code] = oldcode;
|
||
|
oldcode = curcode;
|
||
|
|
||
|
}
|
||
|
|
||
|
/*---------------------------------------------------------------------------
|
||
|
Flush any remaining data and return to sender...
|
||
|
---------------------------------------------------------------------------*/
|
||
|
|
||
|
if (G.outcnt > 0L) {
|
||
|
Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt));
|
||
|
if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
|
||
|
Trace((stderr, "unshrink: flush() error (%d)\n", error));
|
||
|
return error;
|
||
|
}
|
||
|
Trace((stderr, "done with flush()\n"));
|
||
|
}
|
||
|
|
||
|
return PK_OK;
|
||
|
|
||
|
} /* end function unshrink() */
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
/****************************/
|
||
|
/* Function partial_clear() */ /* no longer recursive... */
|
||
|
/****************************/
|
||
|
|
||
|
static void partial_clear(__G__ lastcodeused)
|
||
|
__GDEF
|
||
|
int lastcodeused;
|
||
|
{
|
||
|
register shrint code;
|
||
|
|
||
|
/* clear all nodes which have no children (i.e., leaf nodes only) */
|
||
|
|
||
|
/* first loop: mark each parent as such */
|
||
|
for (code = BOGUSCODE+1; code <= lastcodeused; ++code) {
|
||
|
register shrint cparent = (shrint)(parent[code] & CODE_MASK);
|
||
|
|
||
|
if (cparent > BOGUSCODE)
|
||
|
FLAG_BITS[cparent] |= HAS_CHILD; /* set parent's child-bit */
|
||
|
}
|
||
|
|
||
|
/* second loop: clear all nodes *not* marked as parents; reset flag bits */
|
||
|
for (code = BOGUSCODE+1; code <= lastcodeused; ++code) {
|
||
|
if (FLAG_BITS[code] & HAS_CHILD) /* just clear child-bit */
|
||
|
FLAG_BITS[code] &= ~HAS_CHILD;
|
||
|
else { /* leaf: lose it */
|
||
|
Trace((stderr, "%d\n", code));
|
||
|
parent[code] = FREE_CODE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#endif /* !LZW_CLEAN */
|