Wireshark-dev: [Wireshark-dev] possible memory error in the SnifferDecompress function?
From: Lewis Burns <lewisurn@xxxxxxxxx>
Date: Tue, 09 Sep 2014 15:11:03 -0700
Hi,

We've recently done some unit testing on open source projects. One of issues we've found is related to the SnifferDecompress function in the wiretap/ngsniffer.c file. We're unable to determine that the memory issues shown by valgrind can actually appear in the program due to our unfamiliarity with the code base. I'm sending in a small testcase to the list and hoping that some developers can validate or invalidate that this is a bug in the code.

The output from running the SnifferDecompress function is as follows:

==5795== Memcheck, a memory error detector
==5795== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==5795== Using Valgrind-3.10.0.SVN and LibVEX; rerun with -h for copyright info
==5795== Command: a.out
==5795==
==5795== Source and destination overlap in memcpy(0x521290b, 0x5212899, 185)
==5795==    at 0x4C2F71C: memcpy@@GLIBC_2.14 (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==5795==    by 0x4009D0: SnifferDecompress (in /home/chaoqiang/workspace/se/klee/exp/a.out)
==5795==    by 0x400B6F: main (in /home/chaoqiang/workspace/se/klee/exp/a.out)
==5795==
==5795== Source and destination overlap in memcpy(0x521ab32, 0x521ab28, 15)
==5795==    at 0x4C2F71C: memcpy@@GLIBC_2.14 (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==5795==    by 0x400A7A: SnifferDecompress (in /home/chaoqiang/workspace/se/klee/exp/a.out)
==5795==    by 0x400B6F: main (in /home/chaoqiang/workspace/se/klee/exp/a.out)
==5795==
==5795== Invalid write of size 1
==5795==    at 0x400798: SnifferDecompress (in /home/chaoqiang/workspace/se/klee/exp/a.out)
==5795==    by 0x400B6F: main (in /home/chaoqiang/workspace/se/klee/exp/a.out)
==5795==  Address 0x521d080 is 0 bytes after a block of size 65,536 alloc'd
==5795==    at 0x4C2AB80: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==5795==    by 0x400AE8: main (in /home/chaoqiang/workspace/se/klee/exp/a.out)

Steps to repeat the issue:
gcc -DRANDOM ngsniffer_noklee.c
valgrind a.out

Thanks very much,
Lewis
// wireshark project
// ngsniffer.c:2329,2356,2392,2421

#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <time.h>

#define	OUTBUF_SIZE	65536
#define	INBUF_SIZE	65536
#define G_MAXUINT16 65536

#define WTAP_ERR_UNC_TRUNCATED -1
#define WTAP_ERR_UNC_OVERFLOW -2
#define WTAP_ERR_UNC_BAD_OFFSET -3

typedef unsigned short guint16;
typedef unsigned char guint8;
#define pntohs(p)   ((guint16)                       \
                     ((guint16)*((const guint8 *)(p)+0)<<8|  \
                      (guint16)*((const guint8 *)(p)+1)<<0))


#define pletohs(p)  ((guint16)                       \
                     ((guint16)*((const guint8 *)(p)+1)<<8|  \
                      (guint16)*((const guint8 *)(p)+0)<<0))

#define pletoh16(p) ((guint16)                       \
                     ((guint16)*((const guint8 *)(p)+1)<<8|  \
                      (guint16)*((const guint8 *)(p)+0)<<0))

/*
   SnifferDecompress() decompresses a blob of compressed data from a
   Sniffer(R) capture file.

   This function is Copyright (c) 1999-2999 Tim Farley

   Parameters
      inbuf - buffer of compressed bytes from file, not including
	      the preceding length word
      inlen - length of inbuf in bytes (max 64k)
      outbuf - decompressed contents, could contain a partial Sniffer
	      record at the end.
      outlen - length of outbuf.

   Return value is the number of bytes in outbuf on return.
*/
static int
SnifferDecompress(unsigned char *inbuf, size_t inlen, unsigned char *outbuf,
		  size_t outlen, int *err)
{
	unsigned char * pin  = inbuf;
	unsigned char * pout = outbuf;
	unsigned char * pin_end  = pin + inlen;
	unsigned char * pout_end = pout + outlen;
	unsigned int bit_mask;      /* one bit is set in this, to mask with bit_value */
	unsigned int bit_value = 0; /* cache the last 16 coding bits we retrieved */
	unsigned int code_type;     /* encoding type, from high 4 bits of byte */
	unsigned int code_low;      /* other 4 bits from encoding byte */
	int length;		    /* length of RLE sequence or repeated string */
	int offset;		    /* offset of string to repeat */

	if (inlen > G_MAXUINT16) {
		return ( -1 );
	}

	bit_mask  = 0;  /* don't have any bits yet */
	while (1)
	{
		/* Shift down the bit mask we use to see whats encoded */
		bit_mask = bit_mask >> 1;

		/* If there are no bits left, time to get another 16 bits */
		if ( 0 == bit_mask )
		{
			bit_mask  = 0x8000;  /* start with the high bit */
			bit_value = pletoh16(pin);   /* get the next 16 bits */
			pin += 2;          /* skip over what we just grabbed */
			if ( pin >= pin_end )
			{
				*err = WTAP_ERR_UNC_TRUNCATED;	 /* data was oddly truncated */
				return ( -1 );
			}
		}

		/* Use the bits in bit_value to see what's encoded and what is raw data */
		if ( !(bit_mask & bit_value) )
		{
			/* bit not set - raw byte we just copy */
			*(pout++) = *(pin++);
		}
		else
		{
			/* bit set - next item is encoded.  Peel off high nybble
			   of next byte to see the encoding type.  Set aside low
			   nybble while we are at it */
			code_type = (unsigned int) ((*pin) >> 4 ) & 0xF;
			code_low  = (unsigned int) ((*pin) & 0xF );
			pin++;   /* increment over the code byte we just retrieved */
			if ( pin >= pin_end )
			{
				*err = WTAP_ERR_UNC_TRUNCATED;	 /* data was oddly truncated */
				return ( -1 );
			}

			/* Based on the code type, decode the compressed string */
			switch ( code_type )
			{
			case 0  :   /* RLE short runs */
				/*
				  Run length is the low nybble of the first code byte.
				  Byte to repeat immediately follows.
				  Total code size: 2 bytes.
				*/
				length = code_low + 3;
				/* If length would put us past end of output, avoid overflow */
				if ( pout + length > pout_end )
				{
					*err = WTAP_ERR_UNC_OVERFLOW;
					return ( -1 );
				}

				/* generate the repeated series of bytes */
				memset( pout, *pin++, length );
				pout += length;
				break;
			case 1  :   /* RLE long runs */
				/*
				  Low 4 bits of run length is the low nybble of the
				  first code byte, upper 8 bits of run length is in
				  the next byte.
				  Byte to repeat immediately follows.
				  Total code size: 3 bytes.
				*/
				length = code_low + ((unsigned int)(*pin++) << 4) + 19;
				/* If we are already at end of input, there is no byte
				   to repeat */
				if ( pin >= pin_end )
				{
					*err = WTAP_ERR_UNC_TRUNCATED;	 /* data was oddly truncated */
					return ( -1 );
				}
				/* If length would put us past end of output, avoid overflow */
				if ( pout + length > pout_end )
				{
					*err = WTAP_ERR_UNC_OVERFLOW;
					return ( -1 );
				}

				/* generate the repeated series of bytes */
				memset( pout, *pin++, length );
				pout += length;
				break;
			case 2  :   /* LZ77 long strings */
				/*
				  Low 4 bits of offset to string is the low nybble of the
				  first code byte, upper 8 bits of offset is in
				  the next byte.
				  Length of string immediately follows.
				  Total code size: 3 bytes.
				*/
				offset = code_low + ((unsigned int)(*pin++) << 4) + 3;
				/* If we are already at end of input, there is no byte
				   to repeat */
				if ( pin >= pin_end )
				{
					*err = WTAP_ERR_UNC_TRUNCATED;	 /* data was oddly truncated */
					return ( -1 );
				}
				/* Check if offset would put us back past begin of buffer */
				if ( pout - offset < outbuf )
				{
					*err = WTAP_ERR_UNC_BAD_OFFSET;
					return ( -1 );
				}

				/* get length from next byte, make sure it won't overrun buf */
				length = (unsigned int)(*pin++) + 16;
				if ( pout + length > pout_end )
				{
					*err = WTAP_ERR_UNC_OVERFLOW;
					return ( -1 );
				}

				/* Copy the string from previous text to output position,
				   advance output pointer */
				memcpy( pout, pout - offset, length );
				pout += length;
				break;
			default :   /* (3 to 15): LZ77 short strings */
				/*
				  Low 4 bits of offset to string is the low nybble of the
				  first code byte, upper 8 bits of offset is in
				  the next byte.
				  Length of string to repeat is overloaded into code_type.
				  Total code size: 2 bytes.
				*/
				offset = code_low + ((unsigned int)(*pin++) << 4) + 3;
				/* Check if offset would put us back past begin of buffer */
				if ( pout - offset < outbuf )
				{
					*err = WTAP_ERR_UNC_BAD_OFFSET;
					return ( -1 );
				}

				/* get length from code_type, make sure it won't overrun buf */
				length = code_type;
				if ( pout + length > pout_end )
				{
					*err = WTAP_ERR_UNC_OVERFLOW;
					return ( -1 );
				}

				/* Copy the string from previous text to output position,
				   advance output pointer */
				memcpy( pout, pout - offset, length );
				pout += length;
				break;
			}
		}

		/* If we've consumed all the input, we are done */
		if ( pin >= pin_end )
			break;
	}

	return (int) ( pout - outbuf );  /* return length of expanded text */
}



int main() 
{
#ifdef RANDOM
srand(time(NULL));
int i;
int j;
size_t inlen=INBUF_SIZE;
size_t outlen=OUTBUF_SIZE;
unsigned char* inbuf= malloc(INBUF_SIZE);
unsigned char* outbuf = malloc(OUTBUF_SIZE);
for(i=0; i< 1000000;i++) {
    int r = rand()%INBUF_SIZE;
    if(r%2==0) {
        for(j=0;j<r;j++) {
            inbuf[j]=rand()%256;
        }
        inlen=r;
        int err;
        SnifferDecompress(inbuf, inlen, outbuf, outlen, &err);
    }
}
#endif

#ifdef TEST

unsigned char inbuf[6]={0x00,0x80,0x11,0x00,0x00,0x00};
unsigned char* outbuf = malloc(20);
int inlen=6;
int outlen=20;
int err;
SnifferDecompress(inbuf, inlen, outbuf, outlen, &err);
#endif
}