Wireshark-dev: Re: [Wireshark-dev] Compilation error Red Hat 3.4.3-9.EL4
From: Jakub Zawadzki <darkjames-ws@xxxxxxxxxxxx>
Date: Tue, 27 Jun 2017 20:47:31 +0200
Hello,

W dniu 2017-06-27 05:33, Guy Harris napisał(a):
On Apr 29, 2011, at 12:52 AM, Jakub Zawadzki <darkjames-ws@xxxxxxxxxxxx> wrote:

On Thu, Apr 28, 2011 at 11:24:08PM -0700, Guy Harris wrote:
I wouldn't to it by checking for a particular version, though -
I'd just check for inflatePrime() and, if it's not present, don't build in the "transparent access to gzipped files" support.

Actually we still can do transparent access, but only when BLOCK is not inside
middle of byte.

So does that mean "we can still do transparent access, but it won't
work with some valid gzipped files", or does it mean "we can still do
transparent access on all valid gzipped files as long as the code is
careful", or does it mean "we can still do transparent access on all
valid gzipped files, but it won't be as efficient as it would be with
inflatePrime()"?

Given that the random-access-to-gzipped-files code is compiled in
regardless of whether we have inflatePrime(), presumably it doesn't
mean the first of those.

I would say that mix of (1) and (3).

It all depends what's position of blocks in compressed file (we don't have control on it),
attaching sample code for checking.

I think it's possible to craft gzipped capture file where block always finish in middle of byte.

Jakub.
/* file_wrappers.c
 *
 * Wiretap Library
 * Copyright (c) 1998 by Gilbert Ramirez <gram@xxxxxxxxxxxxxxx>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

/* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
 * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
 * under licence:
 *
 *  This software is provided 'as-is', without any express or implied
 *  warranty.  In no event will the authors be held liable for any damages
 *  arising from the use of this software.
 *
 *  Permission is granted to anyone to use this software for any purpose,
 *  including commercial applications, and to alter it and redistribute it
 *  freely, subject to the following restrictions:
 *
 *  1. The origin of this software must not be misrepresented; you must not
 *     claim that you wrote the original software. If you use this software
 *     in a product, an acknowledgment in the product documentation would be
 *     appreciated but is not required.
 *  2. Altered source versions must be plainly marked as such, and must not be
 *     misrepresented as being the original software.
 *  3. This notice may not be removed or altered from any source distribution.
 */

#include <errno.h>
#include <string.h>
#include <stdlib.h>

#include <sys/stat.h>
#include <fcntl.h>

#include <stdio.h>
#include <stdint.h>
#include <unistd.h>

#define WTAP_ERR_SHORT_READ -1
#define WTAP_ERR_DECOMPRESS -2

#define ZLIB_CONST
#include <zlib.h>

typedef struct wtap_reader *FILE_T;

/* values for wtap_reader compression */
typedef enum {
    UNKNOWN,       /* unknown - look for a gzip header */
    UNCOMPRESSED,  /* uncompressed - copy input directly */
    ZLIB,          /* decompress a zlib stream */
    GZIP_AFTER_HEADER
} compression_t;

struct wtap_reader {
    int fd;                    /* file descriptor */
    int64_t raw_pos;            /* current position in file (just to not call lseek()) */
    int64_t pos;                /* current position in uncompressed data */
    unsigned int size;                /* buffer size */
    unsigned char *in;         /* input buffer */
    unsigned char *out;        /* output buffer (double-sized when reading) */
    unsigned char *next;       /* next output data to deliver or write */

    unsigned int have;                /* amount of output data unused at next */
    int eof;              /* TRUE if end of input file reached */
    int64_t raw;                /* where the raw data started, for seeking */
    compression_t compression; /* type of compression, if any */
    /* error information */
    int err;                   /* error code */

    unsigned int avail_in;            /* number of bytes available at next_in */
    unsigned char *next_in;    /* next input byte */
    /* zlib inflate stream */
    z_stream strm;             /* stream structure in-place (not a pointer) */
};

static int     /* gz_load */
raw_read(FILE_T state, unsigned char *buf, unsigned int count, unsigned int *have)
{
    ssize_t ret;

    *have = 0;
    do {
        ret = read(state->fd, buf + *have, count - *have);
        if (ret <= 0)
            break;
        *have += (unsigned)ret;
        state->raw_pos += ret;
    } while (*have < count);
    if (ret < 0) {
        state->err = errno;
        return -1;
    }
    if (ret == 0)
        state->eof = 1;
    return 0;
}

static int /* gz_avail */
fill_in_buffer(FILE_T state)
{
    if (state->err)
        return -1;
    if (state->eof == 0) {
        if (raw_read(state, state->in, state->size, &(state->avail_in)) == -1)
            return -1;
        state->next_in = state->in;
    }
    return 0;
}

#define SPAN G_GINT64_CONSTANT(1048576)

static unsigned int histo[8];
static int64_t prev_pos0;

static void
zlib_fast_seek_add(int bits, int64_t in_pos, int64_t out_pos)
{
	if (bits == 0)
	{
		if (prev_pos0)
			printf("::: ### bits=0, after %jd bytes\n", (intmax_t) (in_pos - prev_pos0));

		prev_pos0 = in_pos;
	}

	histo[bits++]++;
	printf("::: position=%jd and bits=%d\n", (intmax_t) in_pos, bits);
}

static void
fast_seek_header(int64_t in_pos, int64_t out_pos)
{
	printf("::: position=%jd\n", (intmax_t) in_pos);
}

/* Get next byte from input, or -1 if end or error.
 *
 * Note:
 *
 *      1) errors from raw_read(), and thus from fill_in_buffer(), are
 *      "sticky", and fill_in_buffer() won't do any reading if there's
 *      an error;
 *
 *      2) GZ_GETC() returns -1 on an EOF;
 *
 * so it's safe to make multiple GZ_GETC() calls and only check the
 * last one for an error. */
#define GZ_GETC() ((state->avail_in == 0 && fill_in_buffer(state) == -1) ? -1 : \
                   (state->avail_in == 0 ? -1 :                         \
                    (state->avail_in--, *(state->next_in)++)))

/* Get a one-byte integer and return 0 on success and the value in *ret.
   Otherwise -1 is returned, state->err is set, and *ret is not modified. */
static int
gz_next1(FILE_T state, uint8_t *ret)
{
    int ch;

    ch = GZ_GETC();
    if (ch == -1) {
        if (state->err == 0) {
            /* EOF */
            state->err = WTAP_ERR_SHORT_READ;
        }
        return -1;
    }
    *ret = ch;
    return 0;
}

/* Get a two-byte little-endian integer and return 0 on success and the value
   in *ret.  Otherwise -1 is returned, state->err is set, and *ret is not
   modified. */
static int
gz_next2(FILE_T state, uint16_t *ret)
{
    uint16_t val;
    int ch;

    val = GZ_GETC();
    ch = GZ_GETC();
    if (ch == -1) {
        if (state->err == 0) {
            /* EOF */
            state->err = WTAP_ERR_SHORT_READ;
        }
        return -1;
    }
    val += (uint16_t)ch << 8;
    *ret = val;
    return 0;
}

/* Get a four-byte little-endian integer and return 0 on success and the value
   in *ret.  Otherwise -1 is returned, state->err is set, and *ret is not
   modified. */
static int
gz_next4(FILE_T state, uint32_t *ret)
{
    uint32_t val;
    int ch;

    val = GZ_GETC();
    val += (unsigned)GZ_GETC() << 8;
    val += (uint32_t)GZ_GETC() << 16;
    ch = GZ_GETC();
    if (ch == -1) {
        if (state->err == 0) {
            /* EOF */
            state->err = WTAP_ERR_SHORT_READ;
        }
        return -1;
    }
    val += (uint32_t)ch << 24;
    *ret = val;
    return 0;
}

/* Skip the specified number of bytes and return 0 on success.  Otherwise -1
   is returned. */
static int
gz_skipn(FILE_T state, size_t n)
{
    while (n != 0) {
        if (GZ_GETC() == -1) {
            if (state->err == 0) {
                /* EOF */
                state->err = WTAP_ERR_SHORT_READ;
            }
            return -1;
        }
        n--;
    }
    return 0;
}

/* Skip a null-terminated string and return 0 on success.  Otherwise -1
   is returned. */
static int
gz_skipzstr(FILE_T state)
{
    int ch;

    /* It's null-terminated, so scan until we read a byte with
       the value 0 or get an error. */
    while ((ch = GZ_GETC()) > 0)
        ;
    if (ch == -1) {
        if (state->err == 0) {
            /* EOF */
            state->err = WTAP_ERR_SHORT_READ;
        }
        return -1;
    }
    return 0;
}

static void /* gz_decomp */
zlib_read(FILE_T state, unsigned char *buf, unsigned int count)
{
    int ret = 0;        /* XXX */
    uint32_t crc, len;
    z_streamp strm = &(state->strm);

    unsigned char *buf2 = buf;
    unsigned int count2 = count;

    strm->avail_out = count;
    strm->next_out = buf;

    /* fill output buffer up to end of deflate stream or error */
    do {
        /* get more input for inflate() */
        if (state->avail_in == 0 && fill_in_buffer(state) == -1)
            break;
        if (state->avail_in == 0) {
            /* EOF */
            state->err = WTAP_ERR_SHORT_READ;
            break;
        }

        strm->avail_in = state->avail_in;
        strm->next_in = state->next_in;
        /* decompress and handle errors */
#ifdef Z_BLOCK
        ret = inflate(strm, Z_BLOCK);
#else
        ret = inflate(strm, Z_NO_FLUSH);
#endif
        state->avail_in = strm->avail_in;
#ifdef z_const
        state->next_in = (unsigned char *)strm->next_in;
#else
        state->next_in = strm->next_in;
#endif
        if (ret == Z_STREAM_ERROR) {
            state->err = WTAP_ERR_DECOMPRESS;
            break;
        }
        if (ret == Z_NEED_DICT) {
            state->err = WTAP_ERR_DECOMPRESS;
            break;
        }
        if (ret == Z_MEM_ERROR) {
            /* This means "not enough memory". */
            state->err = ENOMEM;
            break;
        }
        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
            state->err = WTAP_ERR_DECOMPRESS;
            break;
        }
        /*
         * XXX - Z_BUF_ERROR?
         */

        strm->adler = crc32(strm->adler, buf2, count2 - strm->avail_out);
#ifdef Z_BLOCK
        if (ret != Z_STREAM_END && (strm->data_type & 128) && !(strm->data_type & 64))
            zlib_fast_seek_add((strm->data_type & 7), state->raw_pos - strm->avail_in, state->pos + (count - strm->avail_out));
#endif
        buf2 = (buf2 + count2 - strm->avail_out);
        count2 = strm->avail_out;

    } while (strm->avail_out && ret != Z_STREAM_END);

    /* update available output and crc check value */
    state->next = buf;
    state->have = count - strm->avail_out;

    /* Check gzip trailer if at end of deflate stream.
       We don't fail immediately here, we just set an error
       indication, so that we try to process what data we
       got before the error.  The next attempt to read
       something past that data will get the error. */
    if (ret == Z_STREAM_END) {
        if (gz_next4(state, &crc) != -1 &&
            gz_next4(state, &len) != -1) {
            if (crc != strm->adler) {
                state->err = WTAP_ERR_DECOMPRESS;
            } else if (len != (strm->total_out & 0xffffffffUL)) {
                state->err = WTAP_ERR_DECOMPRESS;
            }
        }
        state->compression = UNKNOWN;      /* ready for next stream, once have is 0 */
    }
}

static int
gz_head(FILE_T state)
{
    /* get some data in the input buffer */
    if (state->avail_in == 0) {
        if (fill_in_buffer(state) == -1)
            return -1;
        if (state->avail_in == 0)
            return 0;
    }

    /* look for the gzip magic header bytes 31 and 139 */
    if (state->next_in[0] == 31) {
        state->avail_in--;
        state->next_in++;
        if (state->avail_in == 0 && fill_in_buffer(state) == -1)
            return -1;
        if (state->avail_in && state->next_in[0] == 139) {
            uint8_t cm;
            uint8_t flags;
            uint16_t len;
            uint16_t hcrc;

            /* we have a gzip header, woo hoo! */
            state->avail_in--;
            state->next_in++;

            /* read rest of header */

            /* compression method (CM) */
            if (gz_next1(state, &cm) == -1)
                return -1;
            if (cm != 8) {
                state->err = WTAP_ERR_DECOMPRESS;
                return -1;
            }

            /* flags (FLG) */
            if (gz_next1(state, &flags) == -1)
                return -1;
            if (flags & 0xe0) {     /* reserved flag bits */
                state->err = WTAP_ERR_DECOMPRESS;
                return -1;
            }

            /* modification time (MTIME) */
            if (gz_skipn(state, 4) == -1)
                return -1;

            /* extra flags (XFL) */
            if (gz_skipn(state, 1) == -1)
                return -1;

            /* operating system (OS) */
            if (gz_skipn(state, 1) == -1)
                return -1;

            if (flags & 4) {
                /* extra field - get XLEN */
                if (gz_next2(state, &len) == -1)
                    return -1;

                /* skip the extra field */
                if (gz_skipn(state, len) == -1)
                    return -1;
            }
            if (flags & 8) {
                /* file name */
                if (gz_skipzstr(state) == -1)
                    return -1;
            }
            if (flags & 16) {
                /* comment */
                if (gz_skipzstr(state) == -1)
                    return -1;
            }
            if (flags & 2) {
                /* header crc */
                if (gz_next2(state, &hcrc) == -1)
                    return -1;
                /* XXX - check the CRC? */
            }

            /* set up for decompression */
            inflateReset(&(state->strm));
            state->strm.adler = crc32(0L, Z_NULL, 0);
            state->compression = ZLIB;
#ifdef Z_BLOCK
            fast_seek_header(state->raw_pos - state->avail_in, state->pos);
#endif
            return 0;
        }
        else {
            /* not a gzip file -- save first byte (31) and fall to raw i/o */
            state->out[0] = 31;
            state->have = 1;
        }
    }

    fast_seek_header(state->raw_pos - state->avail_in - state->have, state->pos);

    /* doing raw i/o, save start of raw data for seeking, copy any leftover
       input to output -- this assumes that the output buffer is larger than
       the input buffer, which also assures space for gzungetc() */
    state->raw = state->pos;
    state->next = state->out;
    if (state->avail_in) {
        memcpy(state->next + state->have, state->next_in, state->avail_in);
        state->have += state->avail_in;
        state->avail_in = 0;
    }
    state->compression = UNCOMPRESSED;
    return 0;
}

static int /* gz_make */
fill_out_buffer(FILE_T state)
{
    if (state->compression == UNKNOWN) {           /* look for gzip header */
        if (gz_head(state) == -1)
            return -1;
        if (state->have)                /* got some data from gz_head() */
            return 0;
    }
    if (state->compression == UNCOMPRESSED) {           /* straight copy */
        if (raw_read(state, state->out, state->size /* << 1 */, &(state->have)) == -1)
            return -1;
        state->next = state->out;
    }
    else if (state->compression == ZLIB) {      /* decompress */
        zlib_read(state, state->out, state->size << 1);
    }
    return 0;
}

static void
gz_reset(FILE_T state)
{
    state->have = 0;              /* no output data available */
    state->eof = 0;           /* not at end of file */
    state->compression = UNKNOWN; /* look for gzip header */

    state->err = 0;               /* clear error */
    state->pos = 0;               /* no uncompressed data yet */
    state->avail_in = 0;          /* no input data yet */
}

FILE_T
file_fdopen(int fd)
{
    int want = 4096;
    FILE_T state;

    if (fd == -1)
        return NULL;

    /* allocate FILE_T structure to return */
    state = (FILE_T)malloc(sizeof *state);
    if (state == NULL)
        return NULL;


    /* open the file with the appropriate mode (or just use fd) */
    state->fd = fd;

    /* save the current position for rewinding (only if reading) */
    state->raw_pos = 0;

    /* initialize stream */
    gz_reset(state);

    /* allocate buffers */
    state->in = (unsigned char *)malloc(want);
    state->out = (unsigned char *)malloc(want << 1);
    state->size = want;
    if (state->in == NULL || state->out == NULL) {
        free(state->out);
        free(state->in);
        free(state);
        errno = ENOMEM;
        return NULL;
    }

    /* allocate inflate memory */
    state->strm.zalloc = Z_NULL;
    state->strm.zfree = Z_NULL;
    state->strm.opaque = Z_NULL;
    state->strm.avail_in = 0;
    state->strm.next_in = Z_NULL;
    if (inflateInit2(&(state->strm), -15) != Z_OK) {    /* raw inflate */
        free(state->out);
        free(state->in);
        free(state);
        errno = ENOMEM;
        return NULL;
    }

    /* return stream */
    return state;
}

FILE_T
file_open(const char *path)
{
    int fd;
    FILE_T ft;

    /* open file and do correct filename conversions.

       XXX - do we need O_LARGEFILE?  On UN*X, if we need to do
       something special to get large file support, the configure
       script should have set us up with the appropriate #defines,
       so we should be getting a large-file-enabled file descriptor
       here.  Pre-Large File Summit UN*Xes, and possibly even some
       post-LFS UN*Xes, might require O_LARGEFILE here, though.
       If so, we should probably handle that in ws_open(). */
    if ((fd = open(path, O_RDONLY, 0000)) == -1)
        return NULL;

    /* open file handle */
    ft = file_fdopen(fd);
    if (ft == NULL) {
        close(fd);
        return NULL;
    }

    return ft;
}

int main(int argc, char **argv)
{
	FILE_T ft;
	int i;

	ft = file_open(argv[1]);

    while (fill_out_buffer(ft) != -1)
	{
		if (ft->have == 0)
			break;
        ft->have = 0;
	}

	for (i = 0; i < 8; i++)
		printf("bits[%d] = %u\n", i, histo[i]);
}