summaryrefslogtreecommitdiff
path: root/release/src/router/busybox/archival/libunarchive/decompress_bunzip2.c
diff options
context:
space:
mode:
Diffstat (limited to 'release/src/router/busybox/archival/libunarchive/decompress_bunzip2.c')
-rw-r--r--release/src/router/busybox/archival/libunarchive/decompress_bunzip2.c2202
1 files changed, 634 insertions, 1568 deletions
diff --git a/release/src/router/busybox/archival/libunarchive/decompress_bunzip2.c b/release/src/router/busybox/archival/libunarchive/decompress_bunzip2.c
index 0164b77e..b53720f2 100644
--- a/release/src/router/busybox/archival/libunarchive/decompress_bunzip2.c
+++ b/release/src/router/busybox/archival/libunarchive/decompress_bunzip2.c
@@ -1,1658 +1,724 @@
-/*--
- This file is a part of bzip2 and/or libbzip2, a program and
- library for lossless, block-sorting data compression.
-
- Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- 2. The origin of this software must not be misrepresented; you must
- not claim that you wrote the original software. If you use this
- software in a product, an acknowledgment in the product
- documentation would be appreciated but is not required.
-
- 3. Altered source versions must be plainly marked as such, and must
- not be misrepresented as being the original software.
-
- 4. The name of the author may not be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
- THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- Julian Seward, Cambridge, UK.
- jseward@acm.org
- bzip2/libbzip2 version 1.0 of 21 March 2000
-
- This program is based on (at least) the work of:
- Mike Burrows
- David Wheeler
- Peter Fenwick
- Alistair Moffat
- Radford Neal
- Ian H. Witten
- Robert Sedgewick
- Jon L. Bentley
-
- For more information on these sources, see the manual.
---*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <getopt.h>
-#include <unistd.h>
-
-#include "busybox.h"
-
-#define MTFA_SIZE 4096
-#define MTFL_SIZE 16
-#define BZ_N_GROUPS 6
-#define BZ_G_SIZE 50
-#define BZ_MAX_ALPHA_SIZE 258
-
-#define BZ_OK 0
-#define BZ_STREAM_END 4
-#define BZ_SEQUENCE_ERROR (-1)
-#define BZ_DATA_ERROR (-4)
-#define BZ_DATA_ERROR_MAGIC (-5)
-#define BZ_IO_ERROR (-6)
-#define BZ_UNEXPECTED_EOF (-7)
-
-#define BZ_RUNA 0
-#define BZ_RUNB 1
-
-#define BZ_MAX_UNUSED 5000
-#define FILE_NAME_LEN 1034
-/*-- states for decompression. --*/
-
-#define BZ_X_IDLE 1
-#define BZ_X_OUTPUT 2
-
-#define BZ_X_MAGIC_1 10
-#define BZ_X_MAGIC_2 11
-#define BZ_X_MAGIC_3 12
-#define BZ_X_MAGIC_4 13
-#define BZ_X_BLKHDR_1 14
-#define BZ_X_BLKHDR_2 15
-#define BZ_X_BLKHDR_3 16
-#define BZ_X_BLKHDR_4 17
-#define BZ_X_BLKHDR_5 18
-#define BZ_X_BLKHDR_6 19
-#define BZ_X_BCRC_1 20
-#define BZ_X_BCRC_2 21
-#define BZ_X_BCRC_3 22
-#define BZ_X_BCRC_4 23
-#define BZ_X_RANDBIT 24
-#define BZ_X_ORIGPTR_1 25
-#define BZ_X_ORIGPTR_2 26
-#define BZ_X_ORIGPTR_3 27
-#define BZ_X_MAPPING_1 28
-#define BZ_X_MAPPING_2 29
-#define BZ_X_SELECTOR_1 30
-#define BZ_X_SELECTOR_2 31
-#define BZ_X_SELECTOR_3 32
-#define BZ_X_CODING_1 33
-#define BZ_X_CODING_2 34
-#define BZ_X_CODING_3 35
-#define BZ_X_MTF_1 36
-#define BZ_X_MTF_2 37
-#define BZ_X_MTF_3 38
-#define BZ_X_MTF_4 39
-#define BZ_X_MTF_5 40
-#define BZ_X_MTF_6 41
-#define BZ_X_ENDHDR_2 42
-#define BZ_X_ENDHDR_3 43
-#define BZ_X_ENDHDR_4 44
-#define BZ_X_ENDHDR_5 45
-#define BZ_X_ENDHDR_6 46
-#define BZ_X_CCRC_1 47
-#define BZ_X_CCRC_2 48
-#define BZ_X_CCRC_3 49
-#define BZ_X_CCRC_4 50
-
-#define BZ_MAX_CODE_LEN 23
-#define OM_TEST 3
-
-typedef struct {
- char *next_in;
- unsigned int avail_in;
-
- char *next_out;
- unsigned int avail_out;
-
- void *state;
-
-} bz_stream;
-
-#define BZ_MAX_UNUSED 5000
-typedef struct {
- bz_stream strm;
- int fd;
- unsigned char initialisedOk;
- char buf[BZ_MAX_UNUSED];
- int lastErr;
- int bufN;
-} bzFile;
-
-/*-- Structure holding all the decompression-side stuff. --*/
-typedef struct {
- /* pointer back to the struct bz_stream */
- bz_stream* strm;
-
- /* state indicator for this stream */
- int state;
-
- /* for doing the final run-length decoding */
- unsigned char state_out_ch;
- int state_out_len;
- unsigned char blockRandomised;
- int rNToGo;
- int rTPos;
-
- /* the buffer for bit stream reading */
- unsigned int bsBuff;
- int bsLive;
-
- /* misc administratium */
- int blockSize100k;
- int currBlockNo;
-
- /* for undoing the Burrows-Wheeler transform */
- int origPtr;
- unsigned int tPos;
- int k0;
- int unzftab[256];
- int nblock_used;
- int cftab[257];
- int cftabCopy[257];
-
- /* for undoing the Burrows-Wheeler transform (FAST) */
- unsigned int *tt;
-
- /* stored and calculated CRCs */
- unsigned int storedBlockCRC;
- unsigned int storedCombinedCRC;
- unsigned int calculatedBlockCRC;
- unsigned int calculatedCombinedCRC;
-
- /* map of bytes used in block */
- int nInUse;
- unsigned char inUse[256];
- unsigned char inUse16[16];
- unsigned char seqToUnseq[256];
-
- /* for decoding the MTF values */
- unsigned char mtfa [MTFA_SIZE];
- unsigned char selector [2 + (900000 / BZ_G_SIZE)];
- unsigned char selectorMtf[2 + (900000 / BZ_G_SIZE)];
- unsigned char len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int mtfbase[256 / MTFL_SIZE];
-
- int limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int minLens[BZ_N_GROUPS];
-
- /* save area for scalars in the main decompress code */
- int save_i;
- int save_j;
- int save_t;
- int save_alphaSize;
- int save_nGroups;
- int save_nSelectors;
- int save_EOB;
- int save_groupNo;
- int save_groupPos;
- int save_nextSym;
- int save_nblockMAX;
- int save_nblock;
- int save_es;
- int save_N;
- int save_curr;
- int save_zt;
- int save_zn;
- int save_zvec;
- int save_zj;
- int save_gSel;
- int save_gMinlen;
- int *save_gLimit;
- int *save_gBase;
- int *save_gPerm;
-} DState;
-
-static int BZ2_rNums[512];
-static bzFile *bzf;
-static int bzerr = BZ_OK;
-
-static const unsigned int BZ2_crc32Table[256] = {
-
- /*-- Ugly, innit? --*/
-
- 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
- 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
- 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
- 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
- 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
- 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
- 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
- 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
- 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
- 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
- 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
- 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
- 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
- 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
- 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
- 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
- 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
- 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
- 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
- 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
- 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
- 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
- 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
- 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
- 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
- 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
- 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
- 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
- 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
- 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
- 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
- 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
- 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
- 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
- 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
- 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
- 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
- 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
- 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
- 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
- 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
- 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
- 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
- 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
- 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
- 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
- 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
- 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
- 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
- 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
- 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
- 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
- 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
- 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
- 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
- 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
- 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
- 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
- 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
- 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
- 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
- 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
- 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
- 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
+/* vi: set sw=4 ts=4: */
+/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
+
+ Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
+ which also acknowledges contributions by Mike Burrows, David Wheeler,
+ Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
+ Robert Sedgewick, and Jon L. Bentley.
+
+ Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
+*/
+
+/*
+ Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
+
+ More efficient reading of Huffman codes, a streamlined read_bunzip()
+ function, and various other tweaks. In (limited) tests, approximately
+ 20% faster than bzcat on x86 and about 10% faster on arm.
+
+ Note that about 2/3 of the time is spent in read_unzip() reversing
+ the Burrows-Wheeler transformation. Much of that time is delay
+ resulting from cache misses.
+
+ I would ask that anyone benefiting from this work, especially those
+ using it in commercial products, consider making a donation to my local
+ non-profit hospice organization (www.hospiceacadiana.com) in the name of
+ the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003.
+
+ Manuel
+ */
+
+#include "libbb.h"
+#include "unarchive.h"
+
+/* Constants for Huffman coding */
+#define MAX_GROUPS 6
+#define GROUP_SIZE 50 /* 64 would have been more efficient */
+#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
+#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
+#define SYMBOL_RUNA 0
+#define SYMBOL_RUNB 1
+
+/* Status return values */
+#define RETVAL_OK 0
+#define RETVAL_LAST_BLOCK (-1)
+#define RETVAL_NOT_BZIP_DATA (-2)
+#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
+#define RETVAL_SHORT_WRITE (-4)
+#define RETVAL_DATA_ERROR (-5)
+#define RETVAL_OUT_OF_MEMORY (-6)
+#define RETVAL_OBSOLETE_INPUT (-7)
+
+/* Other housekeeping constants */
+#define IOBUF_SIZE 4096
+
+/* This is what we know about each Huffman coding group */
+struct group_data {
+ /* We have an extra slot at the end of limit[] for a sentinel value. */
+ int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS];
+ int minLen, maxLen;
};
-static void bz_rand_udp_mask(DState *s)
-{
- if (s->rNToGo == 0) {
- s->rNToGo = BZ2_rNums[s->rTPos];
- s->rTPos++;
- if (s->rTPos == 512) {
- s->rTPos = 0;
- }
- }
- s->rNToGo--;
-}
-
-static void BZ2_hbCreateDecodeTables(int *limit, int *base, int *perm, unsigned char *length, int minLen, int maxLen, int alphaSize )
-{
- int pp, i, j, vec;
-
- pp = 0;
- for (i = minLen; i <= maxLen; i++) {
- for (j = 0; j < alphaSize; j++) {
- if (length[j] == i) {
- perm[pp] = j;
- pp++;
- }
- }
- }
-
- for (i = 0; i < BZ_MAX_CODE_LEN; i++) {
- base[i] = 0;
- }
-
- for (i = 0; i < alphaSize; i++) {
- base[length[i]+1]++;
- }
-
- for (i = 1; i < BZ_MAX_CODE_LEN; i++) {
- base[i] += base[i-1];
- }
-
- for (i = 0; i < BZ_MAX_CODE_LEN; i++) {
- limit[i] = 0;
- }
- vec = 0;
+/* Structure holding all the housekeeping data, including IO buffers and
+ * memory that persists between calls to bunzip
+ * Found the most used member:
+ * cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \
+ * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER
+ * and moved it (inbufBitCount) to offset 0.
+ */
+struct bunzip_data {
+ /* I/O tracking data (file handles, buffers, positions, etc.) */
+ unsigned inbufBitCount, inbufBits;
+ int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/;
+ unsigned char *inbuf /*,*outbuf*/;
+
+ /* State for interrupting output loop */
+ int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
+
+ /* The CRC values stored in the block header and calculated from the data */
+ uint32_t headerCRC, totalCRC, writeCRC;
+
+ /* Intermediate buffer and its size (in bytes) */
+ unsigned *dbuf, dbufSize;
+
+ /* For I/O error handling */
+ jmp_buf jmpbuf;
+
+ /* Big things go last (register-relative addressing can be larger for big offsets) */
+ uint32_t crc32Table[256];
+ unsigned char selectors[32768]; /* nSelectors=15 bits */
+ struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
+};
+/* typedef struct bunzip_data bunzip_data; -- done in .h file */
- for (i = minLen; i <= maxLen; i++) {
- vec += (base[i+1] - base[i]);
- limit[i] = vec-1;
- vec <<= 1;
- }
- for (i = minLen + 1; i <= maxLen; i++) {
- base[i] = ((limit[i-1] + 1) << 1) - base[i];
- }
-}
+/* Return the next nnn bits of input. All reads from the compressed input
+ are done through this function. All reads are big endian */
-static int get_bits(DState *s, int *vvv, char nnn)
+static unsigned get_bits(bunzip_data *bd, int bits_wanted)
{
- while (1) {
- if (s->bsLive >= nnn) {
- *vvv = (s->bsBuff >> (s->bsLive-nnn)) & ((1 << nnn)-1);
- s->bsLive -= nnn;
- break;
+ unsigned bits = 0;
+
+ /* If we need to get more data from the byte buffer, do so. (Loop getting
+ one byte at a time to enforce endianness and avoid unaligned access.) */
+ while ((int)(bd->inbufBitCount) < bits_wanted) {
+
+ /* If we need to read more data from file into byte buffer, do so */
+ if (bd->inbufPos == bd->inbufCount) {
+ /* if "no input fd" case: in_fd == -1, read fails, we jump */
+ bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
+ if (bd->inbufCount <= 0)
+ longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
+ bd->inbufPos = 0;
}
- if (s->strm->avail_in == 0) {
- return(FALSE);
+
+ /* Avoid 32-bit overflow (dump bit buffer to top of output) */
+ if (bd->inbufBitCount >= 24) {
+ bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1);
+ bits_wanted -= bd->inbufBitCount;
+ bits <<= bits_wanted;
+ bd->inbufBitCount = 0;
}
- s->bsBuff = (s->bsBuff << 8) | ((unsigned int) (*((unsigned char*)(s->strm->next_in))));
- s->bsLive += 8;
- s->strm->next_in++;
- s->strm->avail_in--;
+
+ /* Grab next 8 bits of input from buffer. */
+ bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
+ bd->inbufBitCount += 8;
}
- return(TRUE);
-}
-static int bz_get_fast(DState *s)
-{
- int cccc;
- s->tPos = s->tt[s->tPos];
- cccc = (unsigned char)(s->tPos & 0xff);
- s->tPos >>= 8;
- return(cccc);
+ /* Calculate result */
+ bd->inbufBitCount -= bits_wanted;
+ bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1);
+
+ return bits;
}
-/*---------------------------------------------------*/
-static inline int BZ2_decompress(DState *s)
+/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
+static int get_next_block(bunzip_data *bd)
{
- int uc = 0;
- int retVal;
- int minLen, maxLen;
-
- /* stuff that needs to be saved/restored */
- int i;
- int j;
- int t;
- int alphaSize;
- int nGroups;
- int nSelectors;
- int EOB;
- int groupNo;
- int groupPos;
- int nextSym;
- int nblockMAX;
- int nblock;
- int es;
- int N;
- int curr;
- int zt;
- int zn;
- int zvec;
- int zj;
- int gSel;
- int gMinlen;
- int *gLimit;
- int *gBase;
- int *gPerm;
- int switch_val;
-
- int get_mtf_val_init(void)
- {
- if (groupPos == 0) {
- groupNo++;
- if (groupNo >= nSelectors) {
- retVal = BZ_DATA_ERROR;
- return(FALSE);
- }
- groupPos = BZ_G_SIZE;
- gSel = s->selector[groupNo];
- gMinlen = s->minLens[gSel];
- gLimit = &(s->limit[gSel][0]);
- gPerm = &(s->perm[gSel][0]);
- gBase = &(s->base[gSel][0]);
+ struct group_data *hufGroup;
+ int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector,
+ i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
+ unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
+ unsigned *dbuf, origPtr;
+
+ dbuf = bd->dbuf;
+ dbufSize = bd->dbufSize;
+ selectors = bd->selectors;
+
+ /* Reset longjmp I/O error handling */
+ i = setjmp(bd->jmpbuf);
+ if (i) return i;
+
+ /* Read in header signature and CRC, then validate signature.
+ (last block signature means CRC is for whole file, return now) */
+ i = get_bits(bd, 24);
+ j = get_bits(bd, 24);
+ bd->headerCRC = get_bits(bd, 32);
+ if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK;
+ if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA;
+
+ /* We can add support for blockRandomised if anybody complains. There was
+ some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
+ it didn't actually work. */
+ if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT;
+ origPtr = get_bits(bd, 24);
+ if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR;
+
+ /* mapping table: if some byte values are never used (encoding things
+ like ascii text), the compression code removes the gaps to have fewer
+ symbols to deal with, and writes a sparse bitfield indicating which
+ values were present. We make a translation table to convert the symbols
+ back to the corresponding bytes. */
+ t = get_bits(bd, 16);
+ symTotal = 0;
+ for (i = 0; i < 16; i++) {
+ if (t & (1 << (15-i))) {
+ k = get_bits(bd, 16);
+ for (j = 0; j < 16; j++)
+ if (k & (1 << (15-j)))
+ symToByte[symTotal++] = (16*i) + j;
}
- groupPos--;
- zn = gMinlen;
- return(TRUE);
}
- if (s->state == BZ_X_MAGIC_1) {
- /*initialise the save area*/
- s->save_i = 0;
- s->save_j = 0;
- s->save_t = 0;
- s->save_alphaSize = 0;
- s->save_nGroups = 0;
- s->save_nSelectors = 0;
- s->save_EOB = 0;
- s->save_groupNo = 0;
- s->save_groupPos = 0;
- s->save_nextSym = 0;
- s->save_nblockMAX = 0;
- s->save_nblock = 0;
- s->save_es = 0;
- s->save_N = 0;
- s->save_curr = 0;
- s->save_zt = 0;
- s->save_zn = 0;
- s->save_zvec = 0;
- s->save_zj = 0;
- s->save_gSel = 0;
- s->save_gMinlen = 0;
- s->save_gLimit = NULL;
- s->save_gBase = NULL;
- s->save_gPerm = NULL;
+ /* How many different Huffman coding groups does this block use? */
+ groupCount = get_bits(bd, 3);
+ if (groupCount < 2 || groupCount > MAX_GROUPS)
+ return RETVAL_DATA_ERROR;
+
+ /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding
+ group. Read in the group selector list, which is stored as MTF encoded
+ bit runs. (MTF=Move To Front, as each value is used it's moved to the
+ start of the list.) */
+ nSelectors = get_bits(bd, 15);
+ if (!nSelectors) return RETVAL_DATA_ERROR;
+ for (i = 0; i < groupCount; i++) mtfSymbol[i] = i;
+ for (i = 0; i < nSelectors; i++) {
+
+ /* Get next value */
+ for (j = 0; get_bits(bd, 1); j++)
+ if (j >= groupCount) return RETVAL_DATA_ERROR;
+
+ /* Decode MTF to get the next selector */
+ uc = mtfSymbol[j];
+ for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1];
+ mtfSymbol[0] = selectors[i] = uc;
}
- /*restore from the save area*/
- i = s->save_i;
- j = s->save_j;
- t = s->save_t;
- alphaSize = s->save_alphaSize;
- nGroups = s->save_nGroups;
- nSelectors = s->save_nSelectors;
- EOB = s->save_EOB;
- groupNo = s->save_groupNo;
- groupPos = s->save_groupPos;
- nextSym = s->save_nextSym;
- nblockMAX = s->save_nblockMAX;
- nblock = s->save_nblock;
- es = s->save_es;
- N = s->save_N;
- curr = s->save_curr;
- zt = s->save_zt;
- zn = s->save_zn;
- zvec = s->save_zvec;
- zj = s->save_zj;
- gSel = s->save_gSel;
- gMinlen = s->save_gMinlen;
- gLimit = s->save_gLimit;
- gBase = s->save_gBase;
- gPerm = s->save_gPerm;
-
- retVal = BZ_OK;
- switch_val = s->state;
- switch (switch_val) {
- case BZ_X_MAGIC_1:
- s->state = BZ_X_MAGIC_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 'B') {
- retVal = BZ_DATA_ERROR_MAGIC;
- goto save_state_and_return;
- }
-
- case BZ_X_MAGIC_2:
- s->state = BZ_X_MAGIC_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 'Z') {
- retVal = BZ_DATA_ERROR_MAGIC;
- goto save_state_and_return;
- }
-
- case BZ_X_MAGIC_3:
- s->state = BZ_X_MAGIC_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 'h') {
- retVal = BZ_DATA_ERROR_MAGIC;
- goto save_state_and_return;
- }
-
- case BZ_X_MAGIC_4:
- s->state = BZ_X_MAGIC_4;
- if (! get_bits(s, &s->blockSize100k, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if ((s->blockSize100k < '1') || (s->blockSize100k > '9')) {
- retVal = BZ_DATA_ERROR_MAGIC;
- goto save_state_and_return;
- }
- s->blockSize100k -= '0';
-
- s->tt = xmalloc(s->blockSize100k * 100000 * sizeof(int));
-
- case BZ_X_BLKHDR_1:
- s->state = BZ_X_BLKHDR_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
-
- if (uc == 0x17) {
- goto endhdr_2;
- }
- if (uc != 0x31) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_BLKHDR_2:
- s->state = BZ_X_BLKHDR_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x41) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_BLKHDR_3:
- s->state = BZ_X_BLKHDR_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x59) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_BLKHDR_4:
- s->state = BZ_X_BLKHDR_4;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x26) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_BLKHDR_5:
- s->state = BZ_X_BLKHDR_5;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x53) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_BLKHDR_6:
- s->state = BZ_X_BLKHDR_6;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x59) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- s->currBlockNo++;
- s->storedBlockCRC = 0;
-
- case BZ_X_BCRC_1:
- s->state = BZ_X_BCRC_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
-
- case BZ_X_BCRC_2:
- s->state = BZ_X_BCRC_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
-
- case BZ_X_BCRC_3:
- s->state = BZ_X_BCRC_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
-
- case BZ_X_BCRC_4:
- s->state = BZ_X_BCRC_4;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
-
- case BZ_X_RANDBIT:
- s->state = BZ_X_RANDBIT;
- {
- int tmp = s->blockRandomised;
- const int ret = get_bits(s, &tmp, 1);
- s->blockRandomised = tmp;
- if (! ret) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- }
-
- s->origPtr = 0;
-
- case BZ_X_ORIGPTR_1:
- s->state = BZ_X_ORIGPTR_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->origPtr = (s->origPtr << 8) | ((int)uc);
-
- case BZ_X_ORIGPTR_2:
- s->state = BZ_X_ORIGPTR_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->origPtr = (s->origPtr << 8) | ((int)uc);
-
- case BZ_X_ORIGPTR_3:
- s->state = BZ_X_ORIGPTR_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->origPtr = (s->origPtr << 8) | ((int)uc);
-
- if (s->origPtr < 0) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- if (s->origPtr > 10 + 100000*s->blockSize100k) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- /*--- Receive the mapping table ---*/
- case BZ_X_MAPPING_1:
- for (i = 0; i < 16; i++) {
- s->state = BZ_X_MAPPING_1;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 1) {
- s->inUse16[i] = TRUE;
- } else {
- s->inUse16[i] = FALSE;
- }
- }
-
- for (i = 0; i < 256; i++) {
- s->inUse[i] = FALSE;
- }
-
- for (i = 0; i < 16; i++) {
- if (s->inUse16[i]) {
- for (j = 0; j < 16; j++) {
- case BZ_X_MAPPING_2:
- s->state = BZ_X_MAPPING_2;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 1) {
- s->inUse[i * 16 + j] = TRUE;
- }
- }
- }
- }
-
- s->nInUse = 0;
- for (i = 0; i < 256; i++) {
- if (s->inUse[i]) {
- s->seqToUnseq[s->nInUse] = i;
- s->nInUse++;
- }
- }
- if (s->nInUse == 0) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- alphaSize = s->nInUse+2;
-
- /*--- Now the selectors ---*/
- case BZ_X_SELECTOR_1:
- s->state = BZ_X_SELECTOR_1;
- if (! get_bits(s, &nGroups, 3)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (nGroups < 2 || nGroups > 6) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_SELECTOR_2:
- s->state = BZ_X_SELECTOR_2;
- if (! get_bits(s, &nSelectors, 15)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (nSelectors < 1) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
-
-
- for (i = 0; i < nSelectors; i++) {
- j = 0;
- while (1) {
- case BZ_X_SELECTOR_3:
- s->state = BZ_X_SELECTOR_3;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 0) {
- break;
- }
- j++;
- if (j >= nGroups) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ /* Read the Huffman coding tables for each group, which code for symTotal
+ literal symbols, plus two run symbols (RUNA, RUNB) */
+ symCount = symTotal + 2;
+ for (j = 0; j < groupCount; j++) {
+ unsigned char length[MAX_SYMBOLS];
+ /* 8 bits is ALMOST enough for temp[], see below */
+ unsigned temp[MAX_HUFCODE_BITS+1];
+ int minLen, maxLen, pp;
+
+ /* Read Huffman code lengths for each symbol. They're stored in
+ a way similar to mtf; record a starting value for the first symbol,
+ and an offset from the previous value for everys symbol after that.
+ (Subtracting 1 before the loop and then adding it back at the end is
+ an optimization that makes the test inside the loop simpler: symbol
+ length 0 becomes negative, so an unsigned inequality catches it.) */
+ t = get_bits(bd, 5) - 1;
+ for (i = 0; i < symCount; i++) {
+ for (;;) {
+ if ((unsigned)t > (MAX_HUFCODE_BITS-1))
+ return RETVAL_DATA_ERROR;
+
+ /* If first bit is 0, stop. Else second bit indicates whether
+ to increment or decrement the value. Optimization: grab 2
+ bits and unget the second if the first was 0. */
+ k = get_bits(bd, 2);
+ if (k < 2) {
+ bd->inbufBitCount++;
+ break;
}
- s->selectorMtf[i] = j;
- }
- /*--- Undo the MTF values for the selectors. ---*/
- {
- unsigned char pos[BZ_N_GROUPS], tmp, v;
- for (v = 0; v < nGroups; v++) {
- pos[v] = v;
- }
- for (i = 0; i < nSelectors; i++) {
- v = s->selectorMtf[i];
- tmp = pos[v];
- while (v > 0) {
- pos[v] = pos[v-1];
- v--;
- }
- pos[0] = tmp;
- s->selector[i] = tmp;
- }
+ /* Add one if second bit 1, else subtract 1. Avoids if/else */
+ t += (((k+1) & 2) - 1);
}
- /*--- Now the coding tables ---*/
- for (t = 0; t < nGroups; t++) {
- case BZ_X_CODING_1:
- s->state = BZ_X_CODING_1;
- if (! get_bits(s, &curr, 5)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- for (i = 0; i < alphaSize; i++) {
- while (TRUE) {
- if (curr < 1 || curr > 20) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_CODING_2:
- s->state = BZ_X_CODING_2;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 0) {
- break;
- }
-
- case BZ_X_CODING_3:
- s->state = BZ_X_CODING_3;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 0) {
- curr++;
- } else {
- curr--;
- }
- }
- s->len[t][i] = curr;
- }
+ /* Correct for the initial -1, to get the final symbol length */
+ length[i] = t + 1;
}
- /*--- Create the Huffman decoding tables ---*/
- for (t = 0; t < nGroups; t++) {
- minLen = 32;
- maxLen = 0;
- for (i = 0; i < alphaSize; i++) {
- if (s->len[t][i] > maxLen) {
- maxLen = s->len[t][i];
- }
- if (s->len[t][i] < minLen) {
- minLen = s->len[t][i];
- }
- }
-
- BZ2_hbCreateDecodeTables (
- &(s->limit[t][0]),
- &(s->base[t][0]),
- &(s->perm[t][0]),
- &(s->len[t][0]),
- minLen, maxLen, alphaSize
- );
-
-
- s->minLens[t] = minLen;
+ /* Find largest and smallest lengths in this group */
+ minLen = maxLen = length[0];
+ for (i = 1; i < symCount; i++) {
+ if (length[i] > maxLen) maxLen = length[i];
+ else if (length[i] < minLen) minLen = length[i];
}
- /*--- Now the MTF values ---*/
-
- EOB = s->nInUse+1;
- nblockMAX = 100000 * s->blockSize100k;
- groupNo = -1;
- groupPos = 0;
-
- for (i = 0; i <= 255; i++) {
- s->unzftab[i] = 0;
+ /* Calculate permute[], base[], and limit[] tables from length[].
+ *
+ * permute[] is the lookup table for converting Huffman coded symbols
+ * into decoded symbols. base[] is the amount to subtract from the
+ * value of a Huffman symbol of a given length when using permute[].
+ *
+ * limit[] indicates the largest numerical value a symbol with a given
+ * number of bits can have. This is how the Huffman codes can vary in
+ * length: each code with a value>limit[length] needs another bit.
+ */
+ hufGroup = bd->groups + j;
+ hufGroup->minLen = minLen;
+ hufGroup->maxLen = maxLen;
+
+ /* Note that minLen can't be smaller than 1, so we adjust the base
+ and limit array pointers so we're not always wasting the first
+ entry. We do this again when using them (during symbol decoding).*/
+ base = hufGroup->base - 1;
+ limit = hufGroup->limit - 1;
+
+ /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
+ pp = 0;
+ for (i = minLen; i <= maxLen; i++) {
+ temp[i] = limit[i] = 0;
+ for (t = 0; t < symCount; t++)
+ if (length[t] == i)
+ hufGroup->permute[pp++] = t;
}
- /*-- MTF init --*/
- {
- int ii, jj, kk;
- kk = MTFA_SIZE-1;
- for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
- for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
- s->mtfa[kk] = (unsigned char)(ii * MTFL_SIZE + jj);
- kk--;
- }
- s->mtfbase[ii] = kk + 1;
- }
- }
- /*-- end MTF init --*/
-
- nblock = 0;
- if (! get_mtf_val_init()) {
- goto save_state_and_return;
+ /* Count symbols coded for at each bit length */
+ /* NB: in pathological cases, temp[8] can end ip being 256.
+ * That's why uint8_t is too small for temp[]. */
+ for (i = 0; i < symCount; i++) temp[length[i]]++;
+
+ /* Calculate limit[] (the largest symbol-coding value at each bit
+ * length, which is (previous limit<<1)+symbols at this level), and
+ * base[] (number of symbols to ignore at each bit length, which is
+ * limit minus the cumulative count of symbols coded for already). */
+ pp = t = 0;
+ for (i = minLen; i < maxLen; i++) {
+ pp += temp[i];
+
+ /* We read the largest possible symbol size and then unget bits
+ after determining how many we need, and those extra bits could
+ be set to anything. (They're noise from future symbols.) At
+ each level we're really only interested in the first few bits,
+ so here we set all the trailing to-be-ignored bits to 1 so they
+ don't affect the value>limit[length] comparison. */
+ limit[i] = (pp << (maxLen - i)) - 1;
+ pp <<= 1;
+ t += temp[i];
+ base[i+1] = pp - t;
}
- case BZ_X_MTF_1:
- s->state = BZ_X_MTF_1;
- if (! get_bits(s, &zvec, zn)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- while (1) {
- if (zn > 20 /* the longest code */) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- if (zvec <= gLimit[zn]) {
- break;
- }
- zn++;
-
- case BZ_X_MTF_2:
- s->state = BZ_X_MTF_2;
- if (! get_bits(s, &zj, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- zvec = (zvec << 1) | zj;
- }
- if (zvec - gBase[zn] < 0 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- nextSym = gPerm[zvec - gBase[zn]];
-
- while (1) {
- if (nextSym == EOB) {
- break;
- }
-
- if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
- es = -1;
- N = 1;
- do {
- if (nextSym == BZ_RUNA) {
- es = es + (0+1) * N;
- } else {
- if (nextSym == BZ_RUNB) {
- es = es + (1+1) * N;
- }
- }
- N = N * 2;
- if (! get_mtf_val_init()) {
- goto save_state_and_return;
- }
- case BZ_X_MTF_3:
- s->state = BZ_X_MTF_3;
- if (! get_bits(s, &zvec, zn)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- while (1) {
- if (zn > 20 /* the longest code */) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- if (zvec <= gLimit[zn]) {
- break;
- }
- zn++;
-
- case BZ_X_MTF_4:
- s->state = BZ_X_MTF_4;
- if (! get_bits(s, &zj, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- zvec = (zvec << 1) | zj;
- }
- if (zvec - gBase[zn] < 0 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
-
- }
- nextSym = gPerm[zvec - gBase[zn]];
- }
- while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
+ limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */
+ limit[maxLen] = pp + temp[maxLen] - 1;
+ base[minLen] = 0;
+ }
- es++;
- uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
- s->unzftab[uc] += es;
+ /* We've finished reading and digesting the block header. Now read this
+ block's Huffman coded symbols from the file and undo the Huffman coding
+ and run length encoding, saving the result into dbuf[dbufCount++] = uc */
- while (es > 0) {
- if (nblock >= nblockMAX) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- s->tt[nblock] = (unsigned int)uc;
- nblock++;
- es--;
- }
- continue;
- } else {
- if (nblock >= nblockMAX) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- /*-- uc = MTF ( nextSym-1 ) --*/
- {
- int ii, jj, kk, pp, lno, off;
- unsigned int nn;
- nn = (unsigned int)(nextSym - 1);
-
- if (nn < MTFL_SIZE) {
- /* avoid general-case expense */
- pp = s->mtfbase[0];
- uc = s->mtfa[pp+nn];
- while (nn > 3) {
- int z = pp+nn;
- s->mtfa[(z) ] = s->mtfa[(z)-1];
- s->mtfa[(z)-1] = s->mtfa[(z)-2];
- s->mtfa[(z)-2] = s->mtfa[(z)-3];
- s->mtfa[(z)-3] = s->mtfa[(z)-4];
- nn -= 4;
- }
- while (nn > 0) {
- s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
- }
- s->mtfa[pp] = uc;
- } else {
- /* general case */
- lno = nn / MTFL_SIZE;
- off = nn % MTFL_SIZE;
- pp = s->mtfbase[lno] + off;
- uc = s->mtfa[pp];
- while (pp > s->mtfbase[lno]) {
- s->mtfa[pp] = s->mtfa[pp-1];
- pp--;
- }
- s->mtfbase[lno]++;
- while (lno > 0) {
- s->mtfbase[lno]--;
- s->mtfa[s->mtfbase[lno]] = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
- lno--;
- }
- s->mtfbase[0]--;
- s->mtfa[s->mtfbase[0]] = uc;
- if (s->mtfbase[0] == 0) {
- kk = MTFA_SIZE-1;
- for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
- for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
- s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
- kk--;
- }
- s->mtfbase[ii] = kk + 1;
- }
- }
- }
- }
- /*-- end uc = MTF ( nextSym-1 ) --*/
+ /* Initialize symbol occurrence counters and symbol Move To Front table */
+ memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */
+ for (i = 0; i < 256; i++) {
+ //byteCount[i] = 0;
+ mtfSymbol[i] = (unsigned char)i;
+ }
- s->unzftab[s->seqToUnseq[uc]]++;
- s->tt[nblock] = (unsigned int)(s->seqToUnseq[uc]);
- nblock++;
+ /* Loop through compressed symbols. */
+
+ runPos = dbufCount = selector = 0;
+ for (;;) {
+
+ /* Fetch next Huffman coding group from list. */
+ symCount = GROUP_SIZE - 1;
+ if (selector >= nSelectors) return RETVAL_DATA_ERROR;
+ hufGroup = bd->groups + selectors[selector++];
+ base = hufGroup->base - 1;
+ limit = hufGroup->limit - 1;
+ continue_this_group:
+
+ /* Read next Huffman-coded symbol. */
+
+ /* Note: It is far cheaper to read maxLen bits and back up than it is
+ to read minLen bits and then an additional bit at a time, testing
+ as we go. Because there is a trailing last block (with file CRC),
+ there is no danger of the overread causing an unexpected EOF for a
+ valid compressed file. As a further optimization, we do the read
+ inline (falling back to a call to get_bits if the buffer runs
+ dry). The following (up to got_huff_bits:) is equivalent to
+ j = get_bits(bd, hufGroup->maxLen);
+ */
+ while ((int)(bd->inbufBitCount) < hufGroup->maxLen) {
+ if (bd->inbufPos == bd->inbufCount) {
+ j = get_bits(bd, hufGroup->maxLen);
+ goto got_huff_bits;
+ }
+ bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
+ bd->inbufBitCount += 8;
+ };
+ bd->inbufBitCount -= hufGroup->maxLen;
+ j = (bd->inbufBits >> bd->inbufBitCount) & ((1 << hufGroup->maxLen) - 1);
+
+ got_huff_bits:
+
+ /* Figure how how many bits are in next symbol and unget extras */
+ i = hufGroup->minLen;
+ while (j > limit[i]) ++i;
+ bd->inbufBitCount += (hufGroup->maxLen - i);
+
+ /* Huffman decode value to get nextSym (with bounds checking) */
+ if (i > hufGroup->maxLen)
+ return RETVAL_DATA_ERROR;
+ j = (j >> (hufGroup->maxLen - i)) - base[i];
+ if ((unsigned)j >= MAX_SYMBOLS)
+ return RETVAL_DATA_ERROR;
+ nextSym = hufGroup->permute[j];
+
+ /* We have now decoded the symbol, which indicates either a new literal
+ byte, or a repeated run of the most recent literal byte. First,
+ check if nextSym indicates a repeated run, and if so loop collecting
+ how many times to repeat the last literal. */
+ if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
+
+ /* If this is the start of a new run, zero out counter */
+ if (!runPos) {
+ runPos = 1;
+ t = 0;
+ }
+
+ /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at
+ each bit position, add 1 or 2 instead. For example,
+ 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2.
+ You can make any bit pattern that way using 1 less symbol than
+ the basic or 0/1 method (except all bits 0, which would use no
+ symbols, but a run of length 0 doesn't mean anything in this
+ context). Thus space is saved. */
+ t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */
+ if (runPos < dbufSize) runPos <<= 1;
+ goto end_of_huffman_loop;
+ }
- if (! get_mtf_val_init()) {
- goto save_state_and_return;
- }
- case BZ_X_MTF_5:
- s->state = BZ_X_MTF_5;
- if (! get_bits(s, &zvec, zn)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- while (1) {
- if (zn > 20 /* the longest code */) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- if (zvec <= gLimit[zn]) {
- break;
- }
- zn++;
-
- case BZ_X_MTF_6:
- s->state = BZ_X_MTF_6;
- if (! get_bits(s, &zj, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- zvec = (zvec << 1) | zj;
- }
- if (zvec - gBase[zn] < 0 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- nextSym = gPerm[zvec - gBase[zn]];
- continue;
+ /* When we hit the first non-run symbol after a run, we now know
+ how many times to repeat the last literal, so append that many
+ copies to our buffer of decoded symbols (dbuf) now. (The last
+ literal used is the one at the head of the mtfSymbol array.) */
+ if (runPos) {
+ runPos = 0;
+ if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR;
+
+ uc = symToByte[mtfSymbol[0]];
+ byteCount[uc] += t;
+ while (t--) dbuf[dbufCount++] = uc;
}
- }
- /* Now we know what nblock is, we can do a better sanity
- check on s->origPtr.
- */
- if (s->origPtr < 0 || s->origPtr >= nblock) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- s->state_out_len = 0;
- s->state_out_ch = 0;
- s->calculatedBlockCRC = 0xffffffffL;
- s->state = BZ_X_OUTPUT;
-
- /*-- Set up cftab to facilitate generation of T^(-1) --*/
- s->cftab[0] = 0;
- for (i = 1; i <= 256; i++) {
- s->cftab[i] = s->unzftab[i-1];
- }
- for (i = 1; i <= 256; i++) {
- s->cftab[i] += s->cftab[i-1];
+ /* Is this the terminating symbol? */
+ if (nextSym > symTotal) break;
+
+ /* At this point, nextSym indicates a new literal character. Subtract
+ one to get the position in the MTF array at which this literal is
+ currently to be found. (Note that the result can't be -1 or 0,
+ because 0 and 1 are RUNA and RUNB. But another instance of the
+ first symbol in the mtf array, position 0, would have been handled
+ as part of a run above. Therefore 1 unused mtf position minus
+ 2 non-literal nextSym values equals -1.) */
+ if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR;
+ i = nextSym - 1;
+ uc = mtfSymbol[i];
+
+ /* Adjust the MTF array. Since we typically expect to move only a
+ * small number of symbols, and are bound by 256 in any case, using
+ * memmove here would typically be bigger and slower due to function
+ * call overhead and other assorted setup costs. */
+ do {
+ mtfSymbol[i] = mtfSymbol[i-1];
+ } while (--i);
+ mtfSymbol[0] = uc;
+ uc = symToByte[uc];
+
+ /* We have our literal byte. Save it into dbuf. */
+ byteCount[uc]++;
+ dbuf[dbufCount++] = (unsigned)uc;
+
+ /* Skip group initialization if we're not done with this group. Done
+ * this way to avoid compiler warning. */
+ end_of_huffman_loop:
+ if (symCount--) goto continue_this_group;
}
- /*-- compute the T^(-1) vector --*/
- for (i = 0; i < nblock; i++) {
- uc = (unsigned char)(s->tt[i] & 0xff);
- s->tt[s->cftab[uc]] |= (i << 8);
- s->cftab[uc]++;
+ /* At this point, we've read all the Huffman-coded symbols (and repeated
+ runs) for this block from the input stream, and decoded them into the
+ intermediate buffer. There are dbufCount many decoded bytes in dbuf[].
+ Now undo the Burrows-Wheeler transform on dbuf.
+ See http://dogma.net/markn/articles/bwt/bwt.htm
+ */
+
+ /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
+ j = 0;
+ for (i = 0; i < 256; i++) {
+ k = j + byteCount[i];
+ byteCount[i] = j;
+ j = k;
}
- s->tPos = s->tt[s->origPtr] >> 8;
- s->nblock_used = 0;
- if (s->blockRandomised) {
- s->rNToGo = 0;
- s->rTPos = 0;
- s->k0 = bz_get_fast(s);
-
- s->nblock_used++;
- bz_rand_udp_mask(s);
- s->k0 ^= ((s->rNToGo == 1) ? 1 : 0);
- } else {
- s->k0 = bz_get_fast(s);
- s->nblock_used++;
+ /* Figure out what order dbuf would be in if we sorted it. */
+ for (i = 0; i < dbufCount; i++) {
+ uc = (unsigned char)(dbuf[i] & 0xff);
+ dbuf[byteCount[uc]] |= (i << 8);
+ byteCount[uc]++;
}
- retVal = BZ_OK;
- goto save_state_and_return;
-
-endhdr_2:
- case BZ_X_ENDHDR_2:
- s->state = BZ_X_ENDHDR_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x72) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_ENDHDR_3:
- s->state = BZ_X_ENDHDR_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x45) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_ENDHDR_4:
- s->state = BZ_X_ENDHDR_4;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x38) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_ENDHDR_5:
- s->state = BZ_X_ENDHDR_5;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x50) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_ENDHDR_6:
- s->state = BZ_X_ENDHDR_6;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x90) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = 0;
-
- case BZ_X_CCRC_1:
- s->state = BZ_X_CCRC_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
- case BZ_X_CCRC_2:
- s->state = BZ_X_CCRC_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
-
- case BZ_X_CCRC_3:
- s->state = BZ_X_CCRC_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
-
- case BZ_X_CCRC_4:
- s->state = BZ_X_CCRC_4;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
-
- s->state = BZ_X_IDLE;
- retVal = BZ_STREAM_END;
- goto save_state_and_return;
-
+ /* Decode first byte by hand to initialize "previous" byte. Note that it
+ doesn't get output, and if the first three characters are identical
+ it doesn't qualify as a run (hence writeRunCountdown=5). */
+ if (dbufCount) {
+ if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
+ bd->writePos = dbuf[origPtr];
+ bd->writeCurrent = (unsigned char)(bd->writePos & 0xff);
+ bd->writePos >>= 8;
+ bd->writeRunCountdown = 5;
}
+ bd->writeCount = dbufCount;
-save_state_and_return:
- s->save_i = i;
- s->save_j = j;
- s->save_t = t;
- s->save_alphaSize = alphaSize;
- s->save_nGroups = nGroups;
- s->save_nSelectors = nSelectors;
- s->save_EOB = EOB;
- s->save_groupNo = groupNo;
- s->save_groupPos = groupPos;
- s->save_nextSym = nextSym;
- s->save_nblockMAX = nblockMAX;
- s->save_nblock = nblock;
- s->save_es = es;
- s->save_N = N;
- s->save_curr = curr;
- s->save_zt = zt;
- s->save_zn = zn;
- s->save_zvec = zvec;
- s->save_zj = zj;
- s->save_gSel = gSel;
- s->save_gMinlen = gMinlen;
- s->save_gLimit = gLimit;
- s->save_gBase = gBase;
- s->save_gPerm = gPerm;
-
- return retVal;
+ return RETVAL_OK;
}
-extern void BZ2_bzReadClose(void)
+/* Undo burrows-wheeler transform on intermediate buffer to produce output.
+ If start_bunzip was initialized with out_fd=-1, then up to len bytes of
+ data are written to outbuf. Return value is number of bytes written or
+ error (all errors are negative numbers). If out_fd!=-1, outbuf and len
+ are ignored, data is written to out_fd and return is RETVAL_OK or error.
+*/
+int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
{
- if (bzf->initialisedOk) {
- bz_stream *strm = &(bzf->strm);
- DState *s;
- if (strm == NULL) {
- return;
- }
- s = strm->state;
- if ((s == NULL) || (s->strm != strm)) {
- return;
- }
- free(s->tt);
- free(strm->state);
- strm->state = NULL;
- return;
- }
- free(bzf);
-}
+ const unsigned *dbuf;
+ int pos, current, previous, gotcount;
-static void unRLE_obuf_to_output_FAST(DState *s)
-{
- unsigned char k1;
-
- if (s->blockRandomised) {
- while (1) {
- /* try to finish existing run */
- while (1) {
- if (s->strm->avail_out == 0) {
- return;
- }
- if (s->state_out_len == 0) {
- break;
- }
- *((unsigned char *)(s->strm->next_out)) = s->state_out_ch;
- s->calculatedBlockCRC = (s->calculatedBlockCRC << 8) ^
- BZ2_crc32Table[(s->calculatedBlockCRC >> 24) ^
- ((unsigned char)s->state_out_ch)];
- s->state_out_len--;
- s->strm->next_out++;
- s->strm->avail_out--;
- }
-
- /* can a new run be started? */
- if (s->nblock_used == s->save_nblock+1) {
- return;
- }
- s->state_out_len = 1;
- s->state_out_ch = s->k0;
- k1 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- k1 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- if (s->nblock_used == s->save_nblock+1) {
- continue;
- }
- if (k1 != s->k0) {
- s->k0 = k1;
- continue;
- }
+ /* If last read was short due to end of file, return last block now */
+ if (bd->writeCount < 0) return bd->writeCount;
- s->state_out_len = 2;
- k1 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- k1 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- if (s->nblock_used == s->save_nblock+1) {
- continue;
- }
- if (k1 != s->k0) {
- s->k0 = k1;
- continue;
- }
- s->state_out_len = 3;
- k1 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- k1 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- if (s->nblock_used == s->save_nblock+1) {
- continue;
- }
- if (k1 != s->k0) {
- s->k0 = k1;
- continue;
- }
+ gotcount = 0;
+ dbuf = bd->dbuf;
+ pos = bd->writePos;
+ current = bd->writeCurrent;
- k1 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- k1 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- s->state_out_len = ((int)k1) + 4;
- s->k0 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- s->k0 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- }
- } else {
- /* restore */
- unsigned int c_calculatedBlockCRC = s->calculatedBlockCRC;
- unsigned char c_state_out_ch = s->state_out_ch;
- int c_state_out_len = s->state_out_len;
- int c_nblock_used = s->nblock_used;
- int c_k0 = s->k0;
- unsigned int *c_tt = s->tt;
- unsigned int c_tPos = s->tPos;
- char *cs_next_out = s->strm->next_out;
- unsigned int cs_avail_out = s->strm->avail_out;
- /* end restore */
-
- int s_save_nblockPP = s->save_nblock+1;
-
- while (1) {
- /* try to finish existing run */
- if (c_state_out_len > 0) {
- while (TRUE) {
- if (cs_avail_out == 0) {
- goto return_notr;
- }
- if (c_state_out_len == 1) {
- break;
- }
- *((unsigned char *)(cs_next_out)) = c_state_out_ch;
- c_calculatedBlockCRC = (c_calculatedBlockCRC << 8) ^
- BZ2_crc32Table[(c_calculatedBlockCRC >> 24) ^
- ((unsigned char)c_state_out_ch)];
- c_state_out_len--;
- cs_next_out++;
- cs_avail_out--;
- }
-s_state_out_len_eq_one:
- {
- if (cs_avail_out == 0) {
- c_state_out_len = 1;
- goto return_notr;
- }
- *((unsigned char *)(cs_next_out)) = c_state_out_ch;
- c_calculatedBlockCRC = (c_calculatedBlockCRC << 8) ^
- BZ2_crc32Table[(c_calculatedBlockCRC >> 24) ^
- ((unsigned char)c_state_out_ch)];
- cs_next_out++;
- cs_avail_out--;
- }
- }
- /* can a new run be started? */
- if (c_nblock_used == s_save_nblockPP) {
- c_state_out_len = 0; goto return_notr;
- }
- c_state_out_ch = c_k0;
- c_tPos = c_tt[c_tPos];
- k1 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+ /* We will always have pending decoded data to write into the output
+ buffer unless this is the very first call (in which case we haven't
+ Huffman-decoded a block into the intermediate buffer yet). */
+ if (bd->writeCopies) {
- c_nblock_used++;
+ /* Inside the loop, writeCopies means extra copies (beyond 1) */
+ --bd->writeCopies;
- if (k1 != c_k0) {
- c_k0 = k1;
- goto s_state_out_len_eq_one;
- }
+ /* Loop outputting bytes */
+ for (;;) {
- if (c_nblock_used == s_save_nblockPP) {
- goto s_state_out_len_eq_one;
+ /* If the output buffer is full, snapshot state and return */
+ if (gotcount >= len) {
+ bd->writePos = pos;
+ bd->writeCurrent = current;
+ bd->writeCopies++;
+ return len;
}
- c_state_out_len = 2;
- c_tPos = c_tt[c_tPos];
- k1 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+ /* Write next byte into output buffer, updating CRC */
+ outbuf[gotcount++] = current;
+ bd->writeCRC = (bd->writeCRC << 8)
+ ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current];
- c_nblock_used++;
- if (c_nblock_used == s_save_nblockPP) {
- continue;
- }
- if (k1 != c_k0) {
- c_k0 = k1;
+ /* Loop now if we're outputting multiple copies of this byte */
+ if (bd->writeCopies) {
+ --bd->writeCopies;
continue;
}
+ decode_next_byte:
+ if (!bd->writeCount--) break;
+ /* Follow sequence vector to undo Burrows-Wheeler transform */
+ previous = current;
+ pos = dbuf[pos];
+ current = pos & 0xff;
+ pos >>= 8;
+
+ /* After 3 consecutive copies of the same byte, the 4th
+ * is a repeat count. We count down from 4 instead
+ * of counting up because testing for non-zero is faster */
+ if (--bd->writeRunCountdown) {
+ if (current != previous)
+ bd->writeRunCountdown = 4;
+ } else {
- c_state_out_len = 3;
- c_tPos = c_tt[c_tPos];
- k1 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+ /* We have a repeated run, this byte indicates the count */
+ bd->writeCopies = current;
+ current = previous;
+ bd->writeRunCountdown = 5;
- c_nblock_used++;
- if (c_nblock_used == s_save_nblockPP) {
- continue;
- }
- if (k1 != c_k0) {
- c_k0 = k1;
- continue;
- }
-
- c_tPos = c_tt[c_tPos];
- k1 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+ /* Sometimes there are just 3 bytes (run length 0) */
+ if (!bd->writeCopies) goto decode_next_byte;
- c_nblock_used++;
- c_state_out_len = ((int)k1) + 4;
+ /* Subtract the 1 copy we'd output anyway to get extras */
+ --bd->writeCopies;
+ }
+ }
- c_tPos = c_tt[c_tPos];
- c_k0 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+ /* Decompression of this block completed successfully */
+ bd->writeCRC = ~bd->writeCRC;
+ bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC;
- c_nblock_used++;
+ /* If this block had a CRC error, force file level CRC error. */
+ if (bd->writeCRC != bd->headerCRC) {
+ bd->totalCRC = bd->headerCRC + 1;
+ return RETVAL_LAST_BLOCK;
}
+ }
-return_notr:
-
- /* save */
- s->calculatedBlockCRC = c_calculatedBlockCRC;
- s->state_out_ch = c_state_out_ch;
- s->state_out_len = c_state_out_len;
- s->nblock_used = c_nblock_used;
- s->k0 = c_k0;
- s->tt = c_tt;
- s->tPos = c_tPos;
- s->strm->next_out = cs_next_out;
- s->strm->avail_out = cs_avail_out;
- /* end save */
+ /* Refill the intermediate buffer by Huffman-decoding next block of input */
+ /* (previous is just a convenient unused temp variable here) */
+ previous = get_next_block(bd);
+ if (previous) {
+ bd->writeCount = previous;
+ return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
}
+ bd->writeCRC = ~0;
+ pos = bd->writePos;
+ current = bd->writeCurrent;
+ goto decode_next_byte;
}
-static inline
-int BZ2_bzDecompress(bz_stream *strm)
-{
- DState* s;
- s = strm->state;
- while (1) {
- if (s->state == BZ_X_IDLE) {
- return BZ_SEQUENCE_ERROR;
- }
- if (s->state == BZ_X_OUTPUT) {
- unRLE_obuf_to_output_FAST(s);
- if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
- s->calculatedBlockCRC = ~(s->calculatedBlockCRC);
- if (s->calculatedBlockCRC != s->storedBlockCRC) {
- return BZ_DATA_ERROR;
- }
- s->calculatedCombinedCRC = (s->calculatedCombinedCRC << 1) | (s->calculatedCombinedCRC >> 31);
- s->calculatedCombinedCRC ^= s->calculatedBlockCRC;
- s->state = BZ_X_BLKHDR_1;
- } else {
- return BZ_OK;
- }
- }
- if (s->state >= BZ_X_MAGIC_1) {
- int r = BZ2_decompress(s);
- if (r == BZ_STREAM_END) {
- if (s->calculatedCombinedCRC != s->storedCombinedCRC) {
- return BZ_DATA_ERROR;
- }
- return r;
- }
- if (s->state != BZ_X_OUTPUT) {
- return r;
- }
- }
- }
+/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
+ a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
+ ignored, and data is read from file handle into temporary buffer. */
- return(0); /*NOTREACHED*/
+/* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
+ should work for NOFORK applets too, we must be extremely careful to not leak
+ any allocations! */
+int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf,
+ int len)
+{
+ bunzip_data *bd;
+ unsigned i;
+ enum {
+ BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0',
+ h0 = ('h' << 8) + '0',
+ };
+
+ /* Figure out how much data to allocate */
+ i = sizeof(bunzip_data);
+ if (in_fd != -1) i += IOBUF_SIZE;
+
+ /* Allocate bunzip_data. Most fields initialize to zero. */
+ bd = *bdp = xzalloc(i);
+
+ /* Setup input buffer */
+ bd->in_fd = in_fd;
+ if (-1 == in_fd) {
+ /* in this case, bd->inbuf is read-only */
+ bd->inbuf = (void*)inbuf; /* cast away const-ness */
+ bd->inbufCount = len;
+ } else
+ bd->inbuf = (unsigned char *)(bd + 1);
+
+ /* Init the CRC32 table (big endian) */
+ crc32_filltable(bd->crc32Table, 1);
+
+ /* Setup for I/O error handling via longjmp */
+ i = setjmp(bd->jmpbuf);
+ if (i) return i;
+
+ /* Ensure that file starts with "BZh['1'-'9']." */
+ /* Update: now caller verifies 1st two bytes, makes .gz/.bz2
+ * integration easier */
+ /* was: */
+ /* i = get_bits(bd, 32); */
+ /* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */
+ i = get_bits(bd, 16);
+ if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA;
+
+ /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of
+ uncompressed data. Allocate intermediate buffer for block. */
+ /* bd->dbufSize = 100000 * (i - BZh0); */
+ bd->dbufSize = 100000 * (i - h0);
+
+ /* Cannot use xmalloc - may leak bd in NOFORK case! */
+ bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(int));
+ if (!bd->dbuf) {
+ free(bd);
+ xfunc_die();
+ }
+ return RETVAL_OK;
}
-extern ssize_t read_bz2(int fd, void *buf, size_t count)
+void FAST_FUNC dealloc_bunzip(bunzip_data *bd)
{
- int n, ret;
+ free(bd->dbuf);
+ free(bd);
+}
- bzerr = BZ_OK;
- if (count == 0) {
- return(0);
- }
- bzf->strm.avail_out = count;
- bzf->strm.next_out = buf;
- while (1) {
- if (bzf->strm.avail_in == 0) {
- n = bb_xread(bzf->fd, bzf->buf, BZ_MAX_UNUSED);
- if (n == 0) {
+/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */
+USE_DESKTOP(long long) int FAST_FUNC
+unpack_bz2_stream(int src_fd, int dst_fd)
+{
+ USE_DESKTOP(long long total_written = 0;)
+ char *outbuf;
+ bunzip_data *bd;
+ int i;
+
+ outbuf = xmalloc(IOBUF_SIZE);
+ i = start_bunzip(&bd, src_fd, NULL, 0);
+ if (!i) {
+ for (;;) {
+ i = read_bunzip(bd, outbuf, IOBUF_SIZE);
+ if (i <= 0) break;
+ if (i != full_write(dst_fd, outbuf, i)) {
+ i = RETVAL_SHORT_WRITE;
break;
}
- bzf->bufN = n;
- bzf->strm.avail_in = bzf->bufN;
- bzf->strm.next_in = bzf->buf;
+ USE_DESKTOP(total_written += i;)
}
+ }
- ret = BZ2_bzDecompress(&(bzf->strm));
-
- if ((ret != BZ_OK) && (ret != BZ_STREAM_END)) {
- bb_error_msg_and_die("Error decompressing");
- }
+ /* Check CRC and release memory */
- if (ret == BZ_STREAM_END) {
- bzerr = BZ_STREAM_END;
- return(count - bzf->strm.avail_out);
- }
- if (bzf->strm.avail_out == 0) {
- bzerr = BZ_OK;
- return(count);
+ if (i == RETVAL_LAST_BLOCK) {
+ if (bd->headerCRC != bd->totalCRC) {
+ bb_error_msg("CRC error");
+ } else {
+ i = RETVAL_OK;
}
+ } else if (i == RETVAL_SHORT_WRITE) {
+ bb_error_msg("short write");
+ } else {
+ bb_error_msg("bunzip error %d", i);
}
- return(0);
+ dealloc_bunzip(bd);
+ free(outbuf);
+
+ return i ? i : USE_DESKTOP(total_written) + 0;
}
-extern void BZ2_bzReadOpen(int fd, void *unused, int nUnused)
+USE_DESKTOP(long long) int FAST_FUNC
+unpack_bz2_stream_prime(int src_fd, int dst_fd)
{
- DState *s;
-
- bzf = xmalloc(sizeof(bzFile));
- bzf->initialisedOk = FALSE;
- bzf->fd = fd;
- bzf->bufN = 0;
-
- s = xmalloc(sizeof(DState));
- s->strm = &bzf->strm;
- s->state = BZ_X_MAGIC_1;
- s->bsLive = 0;
- s->bsBuff = 0;
- s->calculatedCombinedCRC = 0;
- s->tt = NULL;
- s->currBlockNo = 0;
- bzf->strm.state = s;
-
- while (nUnused > 0) {
- bzf->buf[bzf->bufN] = *((unsigned char *)(unused));
- bzf->bufN++;
- unused = ((void *)( 1 + ((unsigned char *)(unused)) ));
- nUnused--;
+ unsigned char magic[2];
+ xread(src_fd, magic, 2);
+ if (magic[0] != 'B' || magic[1] != 'Z') {
+ bb_error_msg_and_die("invalid magic");
}
- bzf->strm.avail_in = bzf->bufN;
- bzf->strm.next_in = bzf->buf;
+ return unpack_bz2_stream(src_fd, dst_fd);
+}
- bzf->initialisedOk = TRUE;
+#ifdef TESTING
- return;
-}
+static char *const bunzip_errors[] = {
+ NULL, "Bad file checksum", "Not bzip data",
+ "Unexpected input EOF", "Unexpected output EOF", "Data error",
+ "Out of memory", "Obsolete (pre 0.9.5) bzip format not supported"
+};
-extern unsigned char uncompressStream(int src_fd, int dst_fd)
+/* Dumb little test thing, decompress stdin to stdout */
+int main(int argc, char **argv)
{
- unsigned char unused[BZ_MAX_UNUSED];
- unsigned char *unusedTmp;
- unsigned char obuf[5000];
- int nread;
- int nUnused;
- int streamNo;
int i;
-
- nUnused = 0;
- streamNo = 0;
-
- while(1) {
- BZ2_bzReadOpen(src_fd, unused, nUnused);
- streamNo++;
-
- while (bzerr == BZ_OK) {
- nread = read_bz2(src_fd, obuf, 5000);
- if (bzerr == BZ_DATA_ERROR_MAGIC) {
- bb_error_msg_and_die("invalid magic");
- }
- if (((bzerr == BZ_OK) || (bzerr == BZ_STREAM_END)) && (nread > 0)) {
- if (write(dst_fd, obuf, nread) != nread) {
- BZ2_bzReadClose();
- bb_perror_msg_and_die("Couldnt write to file");
- }
- }
- }
- nUnused = bzf->strm.avail_in;
- unusedTmp = bzf->strm.next_in;
-
- for (i = 0; i < nUnused; i++) {
- unused[i] = unusedTmp[i];
- }
- BZ2_bzReadClose();
- if (nUnused == 0) {
- break;
- }
- }
-
- close(src_fd);
- if (dst_fd != fileno(stdout)) {
- close(dst_fd);
- }
- return TRUE;
+ char c;
+
+ int i = unpack_bz2_stream_prime(0, 1);
+ if (i < 0)
+ fprintf(stderr, "%s\n", bunzip_errors[-i]);
+ else if (read(STDIN_FILENO, &c, 1))
+ fprintf(stderr, "Trailing garbage ignored\n");
+ return -i;
}
-
+#endif