Ethereal-dev: [Ethereal-dev] HTTP gzip/deflate decompression patch

Note: This archive is from the project's previous web site, ethereal.com. This list is no longer active.

From: Jerry Talkington <jtalkington@xxxxxxxxxxxxxxxxxxxxx>
Date: Sun, 2 May 2004 20:47:33 -0700
Howdy!
  
Here's a patch to enable decompression of gzip/deflate Content-Encoded
HTTP entity bodies.  I've also added options for disabling de-chunking
and decompression.

I also implemented a tvb_uncompress() function.  I was originally going to
yank the code from the packet-slsk.c, but that could have led to some
unnecessarily large (10 * the compressed size) buffers.  I didn't
convert and remove the Soul Seek uncompress functions, because I
couldn't test it.

However, the new code should handle those (and I'm not even
sure that the current code will actually do decompression, based on all
of the workarounds that had to be implemented to get the new stuff to
work across library version numbers.)  If someone submits a capture with
those packets, I'll convert them over.

-- 
GPG public key:
http://pgp.mit.edu:11371/pks/lookup?op=get&search=0x9D5B8762
Index: AUTHORS
===================================================================
RCS file: /cvsroot/ethereal/AUTHORS,v
retrieving revision 1.1007
diff -u -r1.1007 AUTHORS
--- AUTHORS	1 May 2004 21:18:09 -0000	1.1007
+++ AUTHORS	3 May 2004 00:21:52 -0000
@@ -286,6 +286,7 @@
 }
 
 Jerry Talkington <jtalkington[AT]users.sourceforge.net> {
+	tvb_uncompress()/HTTP Content-Encoding decompression
 	HTTP chunked encoding dissection
 	updates to HTTP support
 	Filter selection/editing GUI improvements
Index: packet-http.c
===================================================================
RCS file: /cvsroot/ethereal/packet-http.c,v
retrieving revision 1.100
diff -u -r1.100 packet-http.c
--- packet-http.c	30 Apr 2004 17:07:20 -0000	1.100
+++ packet-http.c	3 May 2004 00:21:53 -0000
@@ -95,6 +95,21 @@
  */
 static gboolean http_desegment_body = FALSE;
 
+/*
+ * De-chunking of content-encoding: chunk entity bodies.
+ */
+static gboolean http_dechunk_body = TRUE;
+
+/*
+ * Decompression of zlib encoded entities.
+ */
+#ifdef HAVE_LIBZ
+static gboolean http_decompress_body = TRUE;
+#else
+static gboolean http_decompress_body = FALSE;
+#endif
+
+
 #define TCP_PORT_HTTP			80
 #define TCP_PORT_PROXY_HTTP		3128
 #define TCP_PORT_PROXY_ADMIN_HTTP	3132
@@ -620,8 +635,9 @@
 		 */
 		if (headers.transfer_encoding != NULL &&
 		    strcasecmp(headers.transfer_encoding, "identity") != 0) {
-			if (strcasecmp(headers.transfer_encoding, "chunked")
-			    == 0) {
+			if (http_dechunk_body &&
+			    (strcasecmp(headers.transfer_encoding, "chunked")
+			    == 0)) {
 
 				chunks_decoded = chunked_encoding_dissector(
 				    &next_tvb, pinfo, http_tree, 0);
@@ -655,23 +671,60 @@
 		if (headers.content_encoding != NULL &&
 		    strcasecmp(headers.content_encoding, "identity") != 0) {
 			/*
-			 * We currently can't handle, for example, "gzip",
-			 * "compress", or "deflate"; just handle them as
-			 * data for now.
+			 * We currently can't handle, for example, "compress";
+			 * just handle them as data for now.
+			 * 
+			 * After July 7, 2004 the LZW patent expires, so support
+			 * might be added then.  However, I don't think that
+			 * anybody ever really implemented "compress", due to
+			 * the aformentioned patent.
 			 */
-			proto_item *e_ti = NULL;
-			proto_tree *e_tree = NULL;
+			tvbuff_t *uncomp_tvb = NULL;
 
-			e_ti = proto_tree_add_text(http_tree, next_tvb, 0,
-			    tvb_length(next_tvb), "Encoded entity-body (%s)",
-			    headers.content_encoding);
+			if (http_decompress_body &&
+			    (strcasecmp(headers.content_encoding, "gzip") == 0 ||
+			    strcasecmp(headers.content_encoding, "deflate")
+			    == 0)) {
+			
+				uncomp_tvb = tvb_uncompress(next_tvb, 0,
+				    tvb_length(next_tvb));
+			}
 
-			e_tree = proto_item_add_subtree(e_ti,
-			    ett_http_encoded_entity);
+			if (uncomp_tvb != NULL) {
+				/*
+				 * Decompression worked
+				 */
+				tvb_free(next_tvb);
+				next_tvb = uncomp_tvb;
+				
+				tvb_set_child_real_data_tvbuff(tvb, next_tvb);
+				add_new_data_source(pinfo, next_tvb, 
+				    "Entity body");
+			} else {
 
-			call_dissector(data_handle, next_tvb, pinfo, e_tree);
-			
-			goto body_dissected;
+				proto_item *e_ti = NULL;
+				proto_tree *e_tree = NULL;
+
+				if (chunks_decoded > 1) {
+					tvb_set_child_real_data_tvbuff(tvb,
+					    next_tvb);
+					add_new_data_source(pinfo, next_tvb,
+					    "Entity body");
+				}
+
+				e_ti = proto_tree_add_text(http_tree,
+				    next_tvb, 0, tvb_length(next_tvb),
+				    "Encoded entity-body (%s)",
+				    headers.content_encoding);
+
+				e_tree = proto_item_add_subtree(e_ti,
+				    ett_http_encoded_entity);
+
+				call_dissector(data_handle, next_tvb, pinfo,
+				    e_tree);
+				
+				goto body_dissected;
+			}
 		}
 
 		/*
@@ -958,9 +1011,6 @@
 		/ * tvb_set_reported_length(new_tvb, chunked_data_size); * /
 		*/
 
-		tvb_set_child_real_data_tvbuff(tvb, new_tvb);
-		add_new_data_source(pinfo, new_tvb, "De-chunked entity body");
-
 		tvb_free(*tvb_ptr);
 		*tvb_ptr = new_tvb;
 		
@@ -1515,12 +1565,22 @@
 	    "of a request spanning multiple TCP segments",
 	    &http_desegment_headers);
 	prefs_register_bool_preference(http_module, "desegment_body",
-	    "Trust the \"Content-length:\" header and desegment HTTP "
-	    "bodies\nspanning multiple TCP segments",
-	    "Whether the HTTP dissector should use the "
-	    "\"Content-length:\" value to desegment the body "
-	    "of a request spanning multiple TCP segments",
+	    "Desegment HTTP bodies spanning multiple TCP segments",
+	    "Whether the HTTP dissector should "
+	    "desegment the body of a request spanning multiple TCP segments",
 	    &http_desegment_body);
+	prefs_register_bool_preference(http_module, "dechunk_body",
+	    "Reassemble chunked transfer-coded bodies",
+	    "Whether to reassemble bodies of entities that are transfered "
+	    "using the \"Transfer-Encoding: chunked\" method",
+	    &http_dechunk_body);
+#ifdef HAVE_LIBZ
+	prefs_register_bool_preference(http_module, "decompress_body",
+	    "Uncompress entity bodies",
+	    "Whether to uncompress entity bodies that are compressed "
+	    "using \"Content-Encoding: \"",
+	    &http_decompress_body);
+#endif
 
 	http_handle = create_dissector_handle(dissect_http, proto_http);
 
Index: epan/tvbuff.c
===================================================================
RCS file: /cvsroot/ethereal/epan/tvbuff.c,v
retrieving revision 1.61
diff -u -r1.61 tvbuff.c
--- epan/tvbuff.c	23 Mar 2004 18:06:29 -0000	1.61
+++ epan/tvbuff.c	3 May 2004 00:21:54 -0000
@@ -41,6 +41,10 @@
 
 #include <string.h>
 
+#ifdef HAVE_LIBZ
+#include <zlib.h>
+#endif
+
 #include "pint.h"
 #include "tvbuff.h"
 #include "strutil.h"
@@ -2149,3 +2153,242 @@
 
 	return -1;
 }
+
+#ifdef HAVE_LIBZ
+/*
+ * Uncompresses a zlib compressed packet inside a message of tvb at offset with
+ * length comprlen.  Returns an uncompressed tvbuffer if uncompression
+ * succeeded or NULL if uncompression failed.
+ */
+#define TVB_Z_BUFSIZ 4096
+tvbuff_t *
+tvb_uncompress(tvbuff_t *tvb, int offset, int comprlen)
+{
+	
+
+	gint err = Z_OK;
+	gint bytes_out = 0;
+	guint8 *compr = NULL;
+	guint8 *uncompr = NULL;
+	tvbuff_t *uncompr_tvb = NULL;
+	z_streamp strm = NULL;
+	gchar strmbuf[TVB_Z_BUFSIZ];
+	gint inits_done = 0;
+	gint wbits = MAX_WBITS;
+	guint8 *next = NULL;
+
+	strm = g_malloc0(sizeof(z_stream));
+
+	if (strm == NULL) {
+		return NULL;
+	}
+
+	compr = tvb_memdup(tvb, offset, comprlen);
+
+	if (!compr) {
+		return NULL;
+	}
+
+	next = compr;
+
+	strm->next_in = next;
+	strm->avail_in = comprlen;
+
+	memset(&strmbuf, 0, TVB_Z_BUFSIZ);
+	strm->next_out = (Bytef *)&strmbuf;
+	strm->avail_out = TVB_Z_BUFSIZ;
+
+	err = inflateInit2(strm, wbits);
+	inits_done = 1;
+	if (err != Z_OK) {
+		g_free(strm);
+		g_free(compr);
+		return NULL;
+	}
+
+	while (1) {
+		memset(&strmbuf, 0, TVB_Z_BUFSIZ);
+		strm->next_out = (Bytef *)&strmbuf;
+		strm->avail_out = TVB_Z_BUFSIZ;
+
+		err = inflate(strm, Z_SYNC_FLUSH);
+
+		if (err == Z_OK || err == Z_STREAM_END) {
+			guint bytes_pass = TVB_Z_BUFSIZ - strm->avail_out;
+
+			if (uncompr == NULL) {
+				uncompr = g_memdup(&strmbuf, bytes_pass);
+			} else {
+				guint8 *new_data = g_malloc0(bytes_out +
+				    bytes_pass);
+
+				if (new_data == NULL) {
+					g_free(strm);
+					g_free(compr);
+
+					if (uncompr != NULL) {
+						g_free(uncompr);
+					}
+					
+					return NULL;
+				}
+				
+				g_memmove(new_data, uncompr, bytes_out);
+				g_memmove((new_data + bytes_out), &strmbuf,
+				    bytes_pass);
+
+				g_free(uncompr);
+				uncompr = new_data;
+			}
+
+			bytes_out += bytes_pass;
+
+			if ( err == Z_STREAM_END) {
+				inflateEnd(strm);
+				g_free(strm);
+				break;
+			}
+		} else if (err == Z_BUF_ERROR) {
+			/*
+			 * It's possible that not enough frames were captured
+			 * to decompress this fully, so return what we've done
+			 * so far, if any.
+			 */
+
+			g_free(strm);
+
+			if (uncompr != NULL) {
+				break;
+			} else {
+				g_free(compr);
+				return NULL;
+			}
+			
+		} else if (err == Z_DATA_ERROR && inits_done == 1
+		    && uncompr == NULL && (*compr  == 0x1f) &&
+		    (*(compr + 1) == 0x8b)) {
+			/*
+			 * inflate() is supposed to handle both gzip and deflate
+			 * streams automatically, but in reality it doesn't
+			 * seem to handle either (at least not within the
+			 * context of an HTTP response.)  We have to try
+			 * several tweaks, depending on the type of data and
+			 * version of the library installed.
+			 */
+
+			/*
+			 * Gzip file format.  Skip past the header, since the
+			 * fix to make it work (setting windowBits to 31)
+			 * doesn't work with all versions of the library.
+			 */
+			Bytef *c = compr + 2;
+			Bytef flags = 0;
+
+			if (*c == Z_DEFLATED) {
+				c++;
+			} else {
+				g_free(strm);
+				g_free(compr);
+				return NULL;
+			}
+
+			flags = *c;
+
+			/* Skip past the MTIME, XFL, and OS fields. */
+			c += 7;
+
+			if (flags & 0x2) {
+				/* An Extra field is present. */
+				gint xsize = (gint)(*c |
+				    (*(c + 1) << 8));
+
+				c += xsize;
+			}
+
+			if (flags & 0x3) {
+				/* A null terminated filename */
+
+				while (*c != NULL) {
+					c++;
+				}
+
+				c++;
+			}
+
+			if (flags & 0x4) {
+				/* A null terminated comment */
+				
+				while (*c != NULL) {
+					c++;
+				}
+
+				c++;
+			}
+
+
+			inflateReset(strm);
+			next = c;
+			strm->next_in = next;
+			comprlen -= (c - compr);
+			
+			err = inflateInit2(strm, wbits);
+			inits_done++;
+		} else if (err == Z_DATA_ERROR && uncompr == NULL &&
+		    inits_done <= 3) {
+			
+			/* 
+			 * Re-init the stream with a negative
+			 * MAX_WBITS. This is necessary due to
+			 * some servers (Apache) not sending
+			 * the deflate header with the
+			 * content-encoded response.
+			 */
+			wbits = -MAX_WBITS;
+
+			inflateReset(strm);
+
+			strm->next_in = next;
+			strm->avail_in = comprlen;
+
+			memset(&strmbuf, 0, TVB_Z_BUFSIZ);
+			strm->next_out = (Bytef *)&strmbuf;
+			strm->avail_out = TVB_Z_BUFSIZ;
+
+			err = inflateInit2(strm, wbits);
+				
+			inits_done++;
+			
+			if (err != Z_OK) {
+				g_free(strm);
+				g_free(compr);
+				g_free(uncompr);
+
+				return NULL;
+			}
+		} else {
+			g_free(strm);
+			g_free(compr);
+
+			if (uncompr == NULL) {
+				return NULL;
+			}
+
+			break;
+		}
+	}
+	
+	if (uncompr != NULL) {
+		uncompr_tvb =  tvb_new_real_data((guint8*) uncompr, bytes_out,
+		    bytes_out);
+	}
+	g_free(compr);
+	return uncompr_tvb;
+}
+#else
+tvbuff_t *
+tvb_uncompress(tvbuff_t *tvb _U_, int offset _U_, int comprlen _U_)
+{
+	return NULL;
+}
+#endif
+
Index: epan/tvbuff.h
===================================================================
RCS file: /cvsroot/ethereal/epan/tvbuff.h,v
retrieving revision 1.41
diff -u -r1.41 tvbuff.h
--- epan/tvbuff.h	23 Mar 2004 18:06:29 -0000	1.41
+++ epan/tvbuff.h	3 May 2004 00:21:55 -0000
@@ -513,6 +513,13 @@
 extern gint tvb_find_tvb(tvbuff_t *haystack_tvb, tvbuff_t *needle_tvb,
 	gint haystack_offset);
 
+/*
+ * Uncompresses a zlib compressed packet inside a tvbuff at offset with
+ * length comprlen.  Returns an uncompressed tvbuffer if uncompression
+ * succeeded or NULL if uncompression failed.
+ */
+extern tvbuff_t* tvb_uncompress(tvbuff_t *tvb, int offset, int comprlen);
+
 /************** END OF ACCESSORS ****************/
 
 #endif /* __TVBUFF_H__ */