8 files changed, 879 insertions, 291 deletions
diff --git a/pigz/Makefile b/pigz/Makefile
index fa47e2b13..822902c44 100644
--- a/pigz/Makefile
+++ b/pigz/Makefile
@@ -1,7 +1,8 @@
-CFLAGS=-O3
+CC=cc
+CFLAGS=-O3 -Wall -Wextra
 
 pigz: pigz.o yarn.o
-	cc -o pigz pigz.o yarn.o -lpthread -lz
+	$(CC) -o pigz pigz.o yarn.o -lpthread -lz
 	ln -f pigz unpigz
 
 pigz.o: pigz.c yarn.h
@@ -11,19 +12,19 @@ yarn.o: yarn.c yarn.h
 dev: pigz pigzt pigzn
 
 pigzt: pigzt.o yarnt.o
-	cc -o pigzt pigzt.o yarnt.o -lpthread -lz
+	$(CC) -o pigzt pigzt.o yarnt.o -lpthread -lz
 
 pigzt.o: pigz.c yarn.h
-	cc -Wall -O3 -DDEBUG -g -c -o pigzt.o pigz.c
+	$(CC) -Wall -O3 -DDEBUG -g -c -o pigzt.o pigz.c
 
 yarnt.o: yarn.c yarn.h
-	cc -Wall -O3 -DDEBUG -g -c -o yarnt.o yarn.c
+	$(CC) -Wall -O3 -DDEBUG -g -c -o yarnt.o yarn.c
 
 pigzn: pigzn.o
-	cc -o pigzn pigzn.o -lz
+	$(CC) -o pigzn pigzn.o -lz
 
 pigzn.o: pigz.c
-	cc -Wall -O3 -DDEBUG -DNOTHREAD -g -c -o pigzn.o pigz.c
+	$(CC) -Wall -O3 -DDEBUG -DNOTHREAD -g -c -o pigzn.o pigz.c
 
 test: pigz
 	./pigz -kf pigz.c ; ./pigz -t pigz.c.gz
@@ -31,12 +32,22 @@ test: pigz
 	./pigz -kfp 1 pigz.c ; ./pigz -t pigz.c.gz
 	./pigz -kfz pigz.c ; ./pigz -t pigz.c.zz
 	./pigz -kfK pigz.c ; ./pigz -t pigz.c.zip
-	compress -f < pigz.c | ./unpigz | cmp - pigz.c
-	rm -f pigz.c.gz pigz.c.zz pigz.c.zip
+	printf "" | ./pigz -cdf | wc -c | test `cat` -eq 0
+	printf "x" | ./pigz -cdf | wc -c | test `cat` -eq 1
+	printf "xy" | ./pigz -cdf | wc -c | test `cat` -eq 2
+	printf "xyz" | ./pigz -cdf | wc -c | test `cat` -eq 3
+	(printf "w" | gzip ; printf "x") | ./pigz -cdf | wc -c | test `cat` -eq 2
+	(printf "w" | gzip ; printf "xy") | ./pigz -cdf | wc -c | test `cat` -eq 3
+	(printf "w" | gzip ; printf "xyz") | ./pigz -cdf | wc -c | test `cat` -eq 4
+	-@if test "`whereis compress | grep /`" != ""; then \
+	  echo 'compress -f < pigz.c | ./unpigz | cmp - pigz.c' ;\
+	  compress -f < pigz.c | ./unpigz | cmp - pigz.c ;\
+	fi
+	@rm -f pigz.c.gz pigz.c.zz pigz.c.zip
 
 tests: dev test
 	./pigzn -kf pigz.c ; ./pigz -t pigz.c.gz
-	rm -f pigz.c.gz
+	@rm -f pigz.c.gz
 
 docs: pigz.pdf
 
@@ -44,4 +55,4 @@ pigz.pdf: pigz.1
 	groff -mandoc -f H -T ps pigz.1 | ps2pdf - pigz.pdf
 
 clean:
-	rm -f *.o pigz unpigz pigzn pigzt pigz.c.gz pigz.c.zz pigz.c.zip
+	@rm -f *.o pigz unpigz pigzn pigzt pigz.c.gz pigz.c.zz pigz.c.zip
diff --git a/pigz/README b/pigz/README
index bad49da2e..bf29102e8 100644
--- a/pigz/README
+++ b/pigz/README
@@ -1,4 +1,4 @@
-pigz 2.1.6 (17 Jan 2010) by Mark Adler
+pigz 2.2.5 (xx Apr 2012) by Mark Adler
 
 pigz, which stands for Parallel Implementation of GZip, is a fully functional
 replacement for gzip that exploits multiple processors and multiple cores to
diff --git a/pigz/pigz.1 b/pigz/pigz.1
index 567c6caa4..8d75ca228 100644
--- a/pigz/pigz.1
+++ b/pigz/pigz.1
@@ -63,7 +63,7 @@ The default input block size is 128K, but can be changed with the
 .B -b
 option.  The number of compress threads is set by default to the number
 of online processors,
-which can be changed using the 
+which can be changed using the
 .B -p
 option.  Specifying
 .B -p 1
@@ -86,7 +86,7 @@ uses a single thread
 (the main thread) for decompression, but will create three other threads for
 reading, writing, and check calculation, which can speed up decompression
 under some circumstances.  Parallel decompression can be turned off by
-specifying one process 
+specifying one process
 (
 .B -dp 1
 or
@@ -162,6 +162,9 @@ Print no messages, even on error.
 .B -r --recursive
 Process the contents of all subdirectories.
 .TP
+.B -R --rsyncable
+Input-determined block locations for rsync.
+.TP
 .B -S --suffix .sss
 Use suffix .sss instead of .gz (for compression).
 .TP
@@ -179,9 +182,12 @@ Show the version of pigz.
 .TP
 .B -z --zlib
 Compress to zlib (.zz) instead of gzip format.
+.TP
+.B --
+All arguments after "--" are treated as file names (for names that start with "-")
 .SH "COPYRIGHT NOTICE"
 This software is provided 'as-is', without any express or implied
 warranty.  In no event will the author be held liable for any damages
 arising from the use of this software.
 .PP
-Copyright (C) 2007, 2008, 2009, 2010 Mark Adler <madler@alumni.caltech.edu>
+Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Mark Adler <madler@alumni.caltech.edu>
diff --git a/pigz/pigz.c b/pigz/pigz.c
index 97e6faa14..5416bc97d 100644
--- a/pigz/pigz.c
+++ b/pigz/pigz.c
@@ -1,6 +1,6 @@
 /* pigz.c -- parallel implementation of gzip
- * Copyright (C) 2007, 2008, 2009, 2010 Mark Adler
- * Version 2.1.6  17 Jan 2010  Mark Adler
+ * Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Mark Adler
+ * Version 2.2.5  28 Jul 2012  Mark Adler
  */
 
 /*
@@ -47,7 +47,7 @@
                        Implement all command line options except --recursive
                        Add --keep option to prevent deleting input files
                        Add thread tracing information with -vv used
-                       Copy crc32_combine() from zlib (possible thread issue)
+                       Copy crc32_combine() from zlib (shared libraries issue)
    1.3    25 Feb 2007  Implement --recursive
                        Expand help to show all options
                        Show help if no arguments or output piping are provided
@@ -106,15 +106,56 @@
                        Decompress if executable named "gunzip" [Hoffstätte]
                        Allow ".tgz" suffix [Chernookiy]
                        Fix adler32 comparison on .zz files
+   2.1.7  17 Dec 2011  Avoid unused parameter warning in reenter()
+                       Don't assume 2's complement ints in compress_thread()
+                       Replicate gzip -cdf cat-like behavior
+                       Replicate gzip -- option to suppress option decoding
+                       Test output from make test instead of showing it
+                       Updated pigz.spec to install unpigz, pigz.1 [Obermaier]
+                       Add PIGZ environment variable [Mueller]
+                       Replicate gzip suffix search when decoding or listing
+                       Fix bug in load() to set in_left to zero on end of file
+                       Do not check suffix when input file won't be modified
+                       Decompress to stdout if name is "*cat" [Hayasaka]
+                       Write data descriptor signature to be like Info-ZIP
+                       Update and sort options list in help
+                       Use CC variable for compiler in Makefile
+                       Exit with code 2 if a warning has been issued
+                       Fix thread synchronization problem when tracing
+                       Change macro name MAX to MAX2 to avoid library conflicts
+                       Determine number of processors on HP-UX [Lloyd]
+   2.2    31 Dec 2011  Check for expansion bound busting (e.g. modified zlib)
+                       Make the "threads" list head global variable volatile
+                       Fix construction and printing of 32-bit check values
+                       Add --rsyncable functionality
+   2.2.1   1 Jan 2012  Fix bug in --rsyncable buffer management
+   2.2.2   1 Jan 2012  Fix another bug in --rsyncable buffer management
+   2.2.3  15 Jan 2012  Remove volatile in yarn.c
+                       Reduce the number of input buffers
+                       Change initial rsyncable hash to comparison value
+                       Improve the efficiency of arriving at a byte boundary
+                       Add thread portability #defines from yarn.c
+                       Have rsyncable compression be independent of threading
+                       Fix bug where constructed dictionaries not being used
+   2.2.4  11 Mar 2012  Avoid some return value warnings
+                       Improve the portability of printing the off_t type
+                       Check for existence of compress binary before using
+                       Update zlib version checking to 1.2.6 for new functions
+                       Fix bug in zip (-K) output
+                       Fix license in pigz.spec
+                       Remove thread portability #defines in pigz.c
+   2.2.5  28 Jul 2012  Avoid race condition in free_pool()
+                       Change suffix to .tar when decompressing or listing .tgz
+                       Print name of executable in error messages
+                       Show help properly when the name is unpigz or gunzip
+                       Fix permissions security problem before output is closed
  */
 
-#define VERSION "pigz 2.1.6\n"
+#define VERSION "pigz 2.2.5\n"
 
 /* To-do:
-    - add --rsyncable (or -R) [use my own algorithm, set min/max block size]
     - make source portable for Windows, VMS, etc. (see gzip source code)
     - make build portable (currently good for Unixish)
-    - add bzip2 decompression
  */
 
 /*
@@ -132,9 +173,11 @@
 
    Each partial raw deflate stream is terminated by an empty stored block
    (using the Z_SYNC_FLUSH option of zlib), in order to end that partial bit
-   stream at a byte boundary.  That allows the partial streams to be
-   concatenated simply as sequences of bytes.  This adds a very small four to
-   five byte overhead to the output for each input chunk.
+   stream at a byte boundary, unless that partial stream happens to already end
+   at a byte boundary (the latter requires zlib 1.2.6 or later).  Ending on a
+   byte boundary allows the partial streams to be concatenated simply as
+   sequences of bytes.  This adds a very small four to five byte overhead
+   (average 3.75 bytes) to the output for each input chunk.
 
    The default input block size is 128K, but can be changed with the -b option.
    The number of compress threads is set by default to 8, which can be changed
@@ -190,10 +233,10 @@
    jobs until instructed to return.  When a job is pulled, the dictionary, if
    provided, will be loaded into the deflate engine and then that input buffer
    is dropped for reuse.  Then the input data is compressed into an output
-   buffer sized to assure that it can contain maximally expanded deflate data.
-   The job is then put into the write job list, sorted by the sequence number.
-   The compress thread however continues to calculate the check value on the
-   input data, either a CRC-32 or Adler-32, possibly in parallel with the write
+   buffer that grows in size if necessary to hold the compressed data. The job
+   is then put into the write job list, sorted by the sequence number. The
+   compress thread however continues to calculate the check value on the input
+   data, either a CRC-32 or Adler-32, possibly in parallel with the write
    thread writing the output data.  Once that's done, the compress thread drops
    the input buffer and also releases the lock on the check value so that the
    write thread can combine it with the previous check values.  The compress
@@ -237,7 +280,7 @@
    to compression, that number allows a second set of buffers to be read while
    the first set of compressions are being performed.  The number of output
    buffers is not directly limited, but is indirectly limited by the release of
-   input buffers to the same number.
+   input buffers to about the same number.
  */
 
 /* use large file functions if available */
@@ -266,7 +309,15 @@
                         /* O_WRONLY */
 #include <dirent.h>     /* opendir(), readdir(), closedir(), DIR, */
                         /* struct dirent */
-#include <limits.h>     /* PATH_MAX */
+#include <limits.h>     /* PATH_MAX, UINT_MAX */
+#if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3
+#  include <inttypes.h> /* intmax_t */
+#endif
+
+#ifdef __hpux
+#  include <sys/param.h>
+#  include <sys/pstat.h>
+#endif
 
 #include "zlib.h"       /* deflateInit2(), deflateReset(), deflate(), */
                         /* deflateEnd(), deflateSetDictionary(), crc32(),
@@ -304,7 +355,69 @@
         } \
     } while (0)
 
+/* sliding dictionary size for deflate */
+#define DICT 32768U
+
+/* largest power of 2 that fits in an unsigned int -- used to limit requests
+   to zlib functions that use unsigned int lengths */
+#define MAXP2 (UINT_MAX - (UINT_MAX >> 1))
+
+/* rsyncable constants -- RSYNCBITS is the number of bits in the mask for
+   comparison.  For random input data, there will be a hit on average every
+   1<<RSYNCBITS bytes.  So for an RSYNCBITS of 12, there will be an average of
+   one hit every 4096 bytes, resulting in a mean block size of 4096.  RSYNCMASK
+   is the resulting bit mask.  RSYNCHIT is what the hash value is compared to
+   after applying the mask.
+
+   The choice of 12 for RSYNCBITS is consistent with the original rsyncable
+   patch for gzip which also uses a 12-bit mask.  This results in a relatively
+   small hit to compression, on the order of 1.5% to 3%.  A mask of 13 bits can
+   be used instead if a hit of less than 1% to the compression is desired, at
+   the expense of more blocks transmitted for rsync updates.  (Your mileage may
+   vary.)
+
+   This implementation of rsyncable uses a different hash algorithm than what
+   the gzip rsyncable patch uses in order to provide better performance in
+   several regards.  The algorithm is simply to shift the hash value left one
+   bit and exclusive-or that with the next byte.  This is masked to the number
+   of hash bits (RSYNCMASK) and compared to all ones except for a zero in the
+   top bit (RSYNCHIT). This rolling hash has a very small window of 19 bytes
+   (RSYNCBITS+7).  The small window provides the benefit of much more rapid
+   resynchronization after a change, than does the 4096-byte window of the gzip
+   rsyncable patch.
+
+   The comparison value is chosen to avoid matching any repeated bytes or short
+   sequences.  The gzip rsyncable patch on the other hand uses a sum and zero
+   for comparison, which results in certain bad behaviors, such as always
+   matching everywhere in a long sequence of zeros.  Such sequences occur
+   frequently in tar files.
+
+   This hash efficiently discards history older than 19 bytes simply by
+   shifting that data past the top of the mask -- no history needs to be
+   retained to undo its impact on the hash value, as is needed for a sum.
+
+   The choice of the comparison value (RSYNCHIT) has the virtue of avoiding
+   extremely short blocks.  The shortest block is five bytes (RSYNCBITS-7) from
+   hit to hit, and is unlikely.  Whereas with the gzip rsyncable algorithm,
+   blocks of one byte are not only possible, but in fact are the most likely
+   block size.
+
+   Thanks and acknowledgement to Kevin Day for his experimentation and insights
+   on rsyncable hash characteristics that led to some of the choices here.
+ */
+#define RSYNCBITS 12
+#define RSYNCMASK ((1U << RSYNCBITS) - 1)
+#define RSYNCHIT (RSYNCMASK >> 1)
+
+/* initial pool counts and sizes -- INBUFS is the limit on the number of input
+   spaces as a function of the number of processors (used to throttle the
+   creation of compression jobs), OUTPOOL is the initial size of the output
+   data buffer, chosen to make resizing of the buffer very unlikely */
+#define INBUFS(p) (((p)<<1)+3)
+#define OUTPOOL(s) ((s)+((s)>>4))
+
 /* globals (modified by main thread only when it's the only thread) */
+local char *prog;           /* name by which pigz was invoked */
 local int ind;              /* input file descriptor */
 local int outd;             /* output file descriptor */
 local char in[PATH_MAX+1];  /* input file name (accommodate recursion) */
@@ -313,8 +426,9 @@ local int verbosity;        /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */
 local int headis;           /* 1 to store name, 2 to store date, 3 both */
 local int pipeout;          /* write output to stdout even if file */
 local int keep;             /* true to prevent deletion of input file */
-local int force;            /* true to overwrite, compress links */
+local int force;            /* true to overwrite, compress links, cat */
 local int form;             /* gzip = 0, zlib = 1, zip = 2 or 3 */
+local unsigned char magic1; /* first byte of possible header when decoding */
 local int recurse;          /* true to dive down into directory structure */
 local char *sufx;           /* suffix to use (".gz" or user supplied) */
 local char *name;           /* name for gzip header */
@@ -325,8 +439,9 @@ local int decode;           /* 0 to compress, 1 to decompress, 2 to test */
 local int level;            /* compression level */
 local int rsync;            /* true for rsync blocking */
 local int procs;            /* maximum number of compression threads (>= 1) */
-local int dict;             /* true to initialize dictionary in each thread */
+local int setdict;          /* true to initialize dictionary in each thread */
 local size_t size;          /* uncompressed input size per thread (>= 32K) */
+local int warned = 0;       /* true if a warning has been given */
 
 /* saved gzip/zip header data for decompression, testing, and listing */
 local time_t stamp;                 /* time stamp from gzip header */
@@ -335,13 +450,29 @@ local unsigned long zip_crc;        /* local header crc */
 local unsigned long zip_clen;       /* local header compressed length */
 local unsigned long zip_ulen;       /* local header uncompressed length */
 
+/* display a complaint with the program name on stderr */
+local int complain(char *fmt, ...)
+{
+    va_list ap;
+
+    if (verbosity > 0) {
+        fprintf(stderr, "%s: ", prog);
+        va_start(ap, fmt);
+        vfprintf(stderr, fmt, ap);
+        va_end(ap);
+        putc('\n', stderr);
+        fflush(stderr);
+        warned = 1;
+    }
+    return 0;
+}
+
 /* exit with error, delete output file if in the middle of writing it */
 local int bail(char *why, char *what)
 {
     if (outd != -1 && out != NULL)
         unlink(out);
-    if (verbosity > 0)
-        fprintf(stderr, "pigz abort: %s%s\n", why, what);
+    complain("abort: %s%s", why, what);
     exit(1);
     return 0;
 }
@@ -523,22 +654,15 @@ local void writen(int desc, unsigned char *buf, size_t len)
 
     while (len) {
         ret = write(desc, buf, len);
-        if (ret < 1)
-            fprintf(stderr, "write error code %d\n", errno);
-        if (ret < 1)
+        if (ret < 1) {
+            complain("write error code %d", errno);
             bail("write error on ", out);
+        }
         buf += ret;
         len -= ret;
     }
 }
 
-/* sliding dictionary size for deflate */
-#define DICT 32768U
-
-/* largest power of 2 that fits in an unsigned int -- used to limit requests
-   to zlib functions that use unsigned int lengths */
-#define MAX ((((unsigned)0 - 1) >> 1) + 1)
-
 /* convert Unix time to MS-DOS date and time, assuming current timezone
    (you got a better idea?) */
 local unsigned long time2dos(time_t t)
@@ -637,10 +761,11 @@ local void put_trailer(unsigned long ulen, unsigned long clen,
         unsigned long cent;
 
         /* write data descriptor (as promised in local header) */
-        PUT4L(tail, check);
-        PUT4L(tail + 4, clen);
-        PUT4L(tail + 8, ulen);
-        writen(outd, tail, 12);
+        PUT4L(tail, 0x08074b50UL);
+        PUT4L(tail + 4, check);
+        PUT4L(tail + 8, clen);
+        PUT4L(tail + 12, ulen);
+        writen(outd, tail, 16);
 
         /* write central file header */
         PUT4L(tail, 0x02014b50UL);  /* central header signature */
@@ -685,7 +810,7 @@ local void put_trailer(unsigned long ulen, unsigned long clen,
         PUT2L(tail + 8, 1);         /* number of entries on this disk */
         PUT2L(tail + 10, 1);        /* total number of entries */
         PUT4L(tail + 12, cent);     /* size of central directory */
-        PUT4L(tail + 16, head + clen + 12); /* offset of central directory */
+        PUT4L(tail + 16, head + clen + 16); /* offset of central directory */
         PUT2L(tail + 20, 0);        /* no zip file comment */
         writen(outd, tail, 22);     /* write end of central directory record */
     }
@@ -827,17 +952,18 @@ local unsigned long adler32_comb(unsigned long adler1, unsigned long adler2,
 /* a space (one buffer for each space) */
 struct space {
     lock *use;              /* use count -- return to pool when zero */
-    void *buf;              /* buffer of size pool->size */
-    size_t len;             /* for application usage */
+    unsigned char *buf;     /* buffer of size size */
+    size_t size;            /* current size of this buffer */
+    size_t len;             /* for application usage (initially zero) */
     struct pool *pool;      /* pool to return to */
     struct space *next;     /* for pool linked list */
 };
 
-/* pool of spaces (one pool for each size needed) */
+/* pool of spaces (one pool for each type needed) */
 struct pool {
     lock *have;             /* unused spaces available, lock for list */
     struct space *head;     /* linked list of available buffers */
-    size_t size;            /* size of all buffers in this pool */
+    size_t size;            /* size of new buffers in this pool */
     int limit;              /* number of new spaces allowed, or -1 */
     int made;               /* number of buffers made */
 };
@@ -872,6 +998,7 @@ local struct space *get_space(struct pool *pool)
         pool->head = space->next;
         twist(pool->have, BY, -1);      /* one less in pool */
         twist(space->use, TO, 1);       /* initially one user */
+        space->len = 0;
         return space;
     }
 
@@ -888,10 +1015,51 @@ local struct space *get_space(struct pool *pool)
     space->buf = malloc(pool->size);
     if (space->buf == NULL)
         bail("not enough memory", "");
+    space->size = pool->size;
+    space->len = 0;
     space->pool = pool;                 /* remember the pool this belongs to */
     return space;
 }
 
+/* compute next size up by multiplying by about 2**(1/3) and round to the next
+   power of 2 if we're close (so three applications results in doubling) -- if
+   small, go up to at least 16, if overflow, go to max size_t value */
+local size_t grow(size_t size)
+{
+    size_t was, top;
+    int shift;
+
+    was = size;
+    size += size >> 2;
+    top = size;
+    for (shift = 0; top > 7; shift++)
+        top >>= 1;
+    if (top == 7)
+        size = (size_t)1 << (shift + 3);
+    if (size < 16)
+        size = 16;
+    if (size <= was)
+        size = (size_t)0 - 1;
+    return size;
+}
+
+/* increase the size of the buffer in space */
+local void grow_space(struct space *space)
+{
+    size_t more;
+
+    /* compute next size up */
+    more = grow(space->size);
+    if (more == space->size)
+        bail("not enough memory", "");
+
+    /* reallocate the buffer */
+    space->buf = realloc(space->buf, more);
+    if (space->buf == NULL)
+        bail("not enough memory", "");
+    space->size = more;
+}
+
 /* increment the use count to require one more drop before returning this space
    to the pool */
 local void use_space(struct space *space)
@@ -935,15 +1103,17 @@ local int free_pool(struct pool *pool)
         free(space);
         count++;
     }
+    assert(count == pool->made);
     release(pool->have);
     free_lock(pool->have);
-    assert(count == pool->made);
     return count;
 }
 
 /* input and output buffer pools */
 local struct pool in_pool;
 local struct pool out_pool;
+local struct pool dict_pool;
+local struct pool lens_pool;
 
 /* -- parallel compression -- */
 
@@ -955,6 +1125,7 @@ struct job {
     int more;                   /* true if this is not the last chunk */
     struct space *in;           /* input data to compress */
     struct space *out;          /* dictionary or resulting compressed data */
+    struct space *lens;         /* coded list of flush block lengths */
     unsigned long check;        /* check value for input data */
     lock *calc;                 /* released when check calculation complete */
     struct job *next;           /* next job in the list (either list) */
@@ -988,9 +1159,13 @@ local void setup_jobs(void)
     write_first = new_lock(-1);
     write_head = NULL;
 
-    /* initialize buffer pools */
-    new_pool(&in_pool, size, (procs << 1) + 2);
-    new_pool(&out_pool, size + (size >> 11) + 10, -1);
+    /* initialize buffer pools (initial size for out_pool not critical, since
+       buffers will be grown in size if needed -- initial size chosen to make
+       this unlikely -- same for lens_pool) */
+    new_pool(&in_pool, size, INBUFS(procs));
+    new_pool(&out_pool, OUTPOOL(size), -1);
+    new_pool(&dict_pool, DICT, -1);
+    new_pool(&lens_pool, size >> (RSYNCBITS - 1), -1);
 }
 
 /* command the compress threads to all return, then join them all (call from
@@ -1019,6 +1194,10 @@ local void finish_jobs(void)
     cthreads = 0;
 
     /* free the resources */
+    caught = free_pool(&lens_pool);
+    Trace(("-- freed %d block lengths buffers", caught));
+    caught = free_pool(&dict_pool);
+    Trace(("-- freed %d dictionary buffers", caught));
     caught = free_pool(&out_pool);
     Trace(("-- freed %d output buffers", caught));
     caught = free_pool(&in_pool);
@@ -1028,6 +1207,28 @@ local void finish_jobs(void)
     compress_have = NULL;
 }
 
+/* compress all strm->avail_in bytes at strm->next_in to out->buf, updating
+   out->len, grow the size of the buffer (out->size) if necessary -- respect
+   the size limitations of the zlib stream data types (size_t may be larger
+   than unsigned) */
+local void deflate_engine(z_stream *strm, struct space *out, int flush)
+{
+    size_t room;
+
+    do {
+        room = out->size - out->len;
+        if (room == 0) {
+            grow_space(out);
+            room = out->size - out->len;
+        }
+        strm->next_out = out->buf + out->len;
+        strm->avail_out = room < UINT_MAX ? (unsigned)room : UINT_MAX;
+        (void)deflate(strm, flush);
+        out->len = strm->next_out - out->buf;
+    } while (strm->avail_out == 0);
+    assert(strm->avail_in == 0);
+}
+
 /* get the next compression job from the head of the list, compress and compute
    the check value on the input, and put a job in the write list with the
    results -- keep looking for more jobs, returning when a job is found with a
@@ -1035,11 +1236,15 @@ local void finish_jobs(void)
    find) */
 local void compress_thread(void *dummy)
 {
-    struct job *job;                /* job pulled and working on */ 
-    struct job *here, **prior;      /* pointers for inserting in write list */ 
+    struct job *job;                /* job pulled and working on */
+    struct job *here, **prior;      /* pointers for inserting in write list */
     unsigned long check;            /* check value of input */
-    unsigned char *next;            /* pointer for check value data */
+    unsigned char *next;            /* pointer for blocks, check value data */
+    size_t left;                    /* input left to process */
     size_t len;                     /* remaining bytes to compress/check */
+#if ZLIB_VERNUM >= 0x1260
+    int bits;                       /* deflate pending bits */
+#endif
     z_stream strm;                  /* deflate stream */
 
     (void)dummy;
@@ -1073,42 +1278,80 @@ local void compress_thread(void *dummy)
         (void)deflateReset(&strm);
         (void)deflateParams(&strm, level, Z_DEFAULT_STRATEGY);
 
-        /* set dictionary if provided, release that input buffer (only provided
-           if dict is true and if this is not the first work unit) -- the
-           amount of data in the buffer is assured to be >= DICT */
+        /* set dictionary if provided, release that input or dictionary buffer
+           (not NULL if dict is true and if this is not the first work unit) */
         if (job->out != NULL) {
             len = job->out->len;
-            deflateSetDictionary(&strm,
-                (unsigned char *)(job->out->buf) + (len - DICT), DICT);
+            left = len < DICT ? len : DICT;
+            deflateSetDictionary(&strm, job->out->buf + (len - left), left);
             drop_space(job->out);
         }
 
-        /* set up input and output (the output size is assured to be big enough
-           for the worst case expansion of the input buffer size, plus five
-           bytes for the terminating stored block) */
+        /* set up input and output */
         job->out = get_space(&out_pool);
         strm.next_in = job->in->buf;
         strm.next_out = job->out->buf;
 
-        /* run MAX-sized amounts of input through deflate -- this loop is
-           needed for those cases where the integer type is smaller than the
-           size_t type, or when len is close to the limit of the size_t type */
-        len = job->in->len;
-        while (len > MAX) {
-            strm.avail_in = MAX;
-            strm.avail_out = (unsigned)-1;
-            (void)deflate(&strm, Z_NO_FLUSH);
-            assert(strm.avail_in == 0 && strm.avail_out != 0);
-            len -= MAX;
-        }
+        /* compress each block, either flushing or finishing */
+        next = job->lens == NULL ? NULL : job->lens->buf;
+        left = job->in->len;
+        job->out->len = 0;
+        do {
+            /* decode next block length from blocks list */
+            len = next == NULL ? 128 : *next++;
+            if (len < 128)                          /* 64..32831 */
+                len = (len << 8) + (*next++) + 64;
+            else if (len == 128)                    /* end of list */
+                len = left;
+            else if (len < 192)                     /* 1..63 */
+                len &= 0x3f;
+            else {                                  /* 32832..4227135 */
+                len = ((len & 0x3f) << 16) + (*next++ << 8) + 32832U;
+                len += *next++;
+            }
+            left -= len;
+
+            /* run MAXP2-sized amounts of input through deflate -- this loop is
+               needed for those cases where the unsigned type is smaller than
+               the size_t type, or when len is close to the limit of the size_t
+               type */
+            while (len > MAXP2) {
+                strm.avail_in = MAXP2;
+                deflate_engine(&strm, job->out, Z_NO_FLUSH);
+                len -= MAXP2;
+            }
 
-        /* run the last piece through deflate -- terminate with a sync marker,
-           or finish deflate stream if this is the last block */
-        strm.avail_in = (unsigned)len;
-        strm.avail_out = (unsigned)-1;
-        (void)deflate(&strm, job->more ? Z_SYNC_FLUSH :  Z_FINISH);
-        assert(strm.avail_in == 0 && strm.avail_out != 0);
-        job->out->len = strm.next_out - (unsigned char *)(job->out->buf);
+            /* run the last piece through deflate -- end on a byte boundary,
+               using a sync marker if necessary, or finish the deflate stream
+               if this is the last block */
+            strm.avail_in = (unsigned)len;
+            if (left || job->more) {
+#if ZLIB_VERNUM >= 0x1260
+                deflate_engine(&strm, job->out, Z_BLOCK);
+
+                /* add just enough empty blocks to get to a byte boundary */
+                (void)deflatePending(&strm, Z_NULL, &bits);
+                if (bits & 1)
+                    deflate_engine(&strm, job->out, Z_SYNC_FLUSH);
+                else if (bits & 7) {
+                    do {
+                        bits = deflatePrime(&strm, 10, 2);  /* static empty */
+                        assert(bits == Z_OK);
+                        (void)deflatePending(&strm, Z_NULL, &bits);
+                    } while (bits & 7);
+                    deflate_engine(&strm, job->out, Z_BLOCK);
+                }
+#else
+                deflate_engine(&strm, job->out, Z_SYNC_FLUSH);
+#endif
+            }
+            else
+                deflate_engine(&strm, job->out, Z_FINISH);
+        } while (left);
+        if (job->lens != NULL) {
+            drop_space(job->lens);
+            job->lens = NULL;
+        }
         Trace(("-- compressed #%ld%s", job->seq, job->more ? "" : " (last)"));
 
         /* reserve input buffer until check value has been calculated */
@@ -1132,24 +1375,24 @@ local void compress_thread(void *dummy)
         len = job->in->len;
         next = job->in->buf;
         check = CHECK(0L, Z_NULL, 0);
-        while (len > MAX) {
-            check = CHECK(check, next, MAX);
-            len -= MAX;
-            next += MAX;
+        while (len > MAXP2) {
+            check = CHECK(check, next, MAXP2);
+            len -= MAXP2;
+            next += MAXP2;
         }
         check = CHECK(check, next, (unsigned)len);
         drop_space(job->in);
         job->check = check;
+        Trace(("-- checked #%ld%s", job->seq, job->more ? "" : " (last)"));
         possess(job->calc);
         twist(job->calc, TO, 1);
-        Trace(("-- checked #%ld%s", job->seq, job->more ? "" : " (last)"));
 
         /* done with that one -- go find another job */
     }
 
     /* found job with seq == -1 -- free deflate memory and return to join */
     release(compress_have);
-    deflateEnd(&strm);
+    (void)deflateEnd(&strm);
 }
 
 /* collect the write jobs off of the list in sequence order and write out the
@@ -1172,7 +1415,7 @@ local void write_thread(void *dummy)
     Trace(("-- write thread running"));
     head = put_header();
 
-    /* process output of compress threads until end of input */    
+    /* process output of compress threads until end of input */
     ulen = clen = 0;
     check = CHECK(0L, Z_NULL, 0);
     seq = 0;
@@ -1224,6 +1467,32 @@ local void write_thread(void *dummy)
     twist(write_first, TO, -1);
 }
 
+/* encode a hash hit to the block lengths list -- hit == 0 ends the list */
+local void append_len(struct job *job, size_t len)
+{
+    struct space *lens;
+
+    assert(len < 4227136UL);
+    if (job->lens == NULL)
+        job->lens = get_space(&lens_pool);
+    lens = job->lens;
+    if (lens->size < lens->len + 3)
+        grow_space(lens);
+    if (len < 64)
+        lens->buf[lens->len++] = len + 128;
+    else if (len < 32832U) {
+        len -= 64;
+        lens->buf[lens->len++] = len >> 8;
+        lens->buf[lens->len++] = len;
+    }
+    else {
+        len -= 32832U;
+        lens->buf[lens->len++] = (len >> 16) + 192;
+        lens->buf[lens->len++] = len >> 8;
+        lens->buf[lens->len++] = len;
+    }
+}
+
 /* compress ind to outd, using multiple threads for the compression and check
    value calculations and one other thread for writing the output -- compress
    threads will be launched and left running (waiting actually) to support
@@ -1231,10 +1500,18 @@ local void write_thread(void *dummy)
 local void parallel_compress(void)
 {
     long seq;                       /* sequence number */
-    struct space *prev;             /* previous input space */
-    struct space *next;             /* next input space */
+    struct space *curr;             /* input data to compress */
+    struct space *next;             /* input data that follows curr */
+    struct space *hold;             /* input data that follows next */
+    struct space *dict;             /* dictionary for next compression */
     struct job *job;                /* job for compress, then write */
     int more;                       /* true if more input to read */
+    unsigned hash;                  /* hash for rsyncable */
+    unsigned char *scan;            /* next byte to compute hash on */
+    unsigned char *end;             /* after end of data to compute hash on */
+    unsigned char *last;            /* position after last hit */
+    size_t left;                    /* last hit in curr to end of curr */
+    size_t len;                     /* for various length computations */
 
     /* if first time or after an option change, setup the job lists */
     setup_jobs();
@@ -1243,36 +1520,135 @@ local void parallel_compress(void)
     writeth = launch(write_thread, NULL);
 
     /* read from input and start compress threads (write thread will pick up
-       the output of the compress threads) */
+     the output of the compress threads) */
     seq = 0;
-    prev = NULL;
     next = get_space(&in_pool);
-    next->len = readn(ind, next->buf, next->pool->size);
+    next->len = readn(ind, next->buf, next->size);
+    hold = NULL;
+    dict = NULL;
+    scan = next->buf;
+    hash = RSYNCHIT;
+    left = 0;
     do {
-        /* create a new job, use next input chunk, previous as dictionary */
+        /* create a new job */
         job = malloc(sizeof(struct job));
         if (job == NULL)
             bail("not enough memory", "");
-        job->seq = seq;
-        job->in = next;
-        job->out = dict ? prev : NULL;  /* dictionary for compression */
         job->calc = new_lock(0);
 
-        /* check for end of file, reading next chunk if not sure */
-        if (next->len < next->pool->size)
-            more = 0;
-        else {
+        /* update input spaces */
+        curr = next;
+        next = hold;
+        hold = NULL;
+
+        /* get more input if we don't already have some */
+        if (next == NULL) {
             next = get_space(&in_pool);
-            next->len = readn(ind, next->buf, next->pool->size);
-            more = next->len != 0;
-            if (!more)
-                drop_space(next);       /* won't be using it */
-            if (dict && more) {
-                use_space(job->in);     /* hold as dictionary for next loop */
-                prev = job->in;
+            next->len = readn(ind, next->buf, next->size);
+        }
+
+        /* if rsyncable, generate block lengths and prepare curr for job to
+           likely have less than size bytes (up to the last hash hit) */
+        job->lens = NULL;
+        if (rsync && curr->len) {
+            /* compute the hash function starting where we last left off to
+               cover either size bytes or to EOF, whichever is less, through
+               the data in curr (and in the next loop, through next) -- save
+               the block lengths resulting from the hash hits in the job->lens
+               list */
+            if (left == 0) {
+                /* scan is in curr */
+                last = curr->buf;
+                end = curr->buf + curr->len;
+                while (scan < end) {
+                    hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
+                    if (hash == RSYNCHIT) {
+                        len = scan - last;
+                        append_len(job, len);
+                        last = scan;
+                    }
+                }
+
+                /* continue scan in next */
+                left = scan - last;
+                scan = next->buf;
+            }
+
+            /* scan in next for enough bytes to fill curr, or what is available
+               in next, whichever is less (if next isn't full, then we're at
+               the end of the file) -- the bytes in curr since the last hit,
+               stored in left, counts towards the size of the first block */
+            last = next->buf;
+            len = curr->size - curr->len;
+            if (len > next->len)
+                len = next->len;
+            end = next->buf + len;
+            while (scan < end) {
+                hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
+                if (hash == RSYNCHIT) {
+                    len = (scan - last) + left;
+                    left = 0;
+                    append_len(job, len);
+                    last = scan;
+                }
+            }
+            append_len(job, 0);
+
+            /* create input in curr for job up to last hit or entire buffer if
+               no hits at all -- save remainder in next and possibly hold */
+            len = (job->lens->len == 1 ? scan : last) - next->buf;
+            if (len) {
+                /* got hits in next, or no hits in either -- copy to curr */
+                memcpy(curr->buf + curr->len, next->buf, len);
+                curr->len += len;
+                memmove(next->buf, next->buf + len, next->len - len);
+                next->len -= len;
+                scan -= len;
+                left = 0;
+            }
+            else if (job->lens->len != 1 && left && next->len) {
+                /* had hits in curr, but none in next, and last hit in curr
+                   wasn't right at the end, so we have input there to save --
+                   use curr up to the last hit, save the rest, moving next to
+                   hold */
+                hold = next;
+                next = get_space(&in_pool);
+                memcpy(next->buf, curr->buf + (curr->len - left), left);
+                next->len = left;
+                curr->len -= left;
+            }
+            else {
+                /* else, last match happened to be right at the end of curr,
+                   or we're at the end of the input compressing the rest */
+                left = 0;
             }
         }
+
+        /* compress curr->buf to curr->len -- compress thread will drop curr */
+        job->in = curr;
+
+        /* set job->more if there is more to compress after curr */
+        more = next->len != 0;
         job->more = more;
+
+        /* provide dictionary for this job, prepare dictionary for next job */
+        job->out = dict;
+        if (more && setdict) {
+            if (curr->len >= DICT || job->out == NULL) {
+                dict = curr;
+                use_space(dict);
+            }
+            else {
+                dict = get_space(&dict_pool);
+                len = DICT - curr->len;
+                memcpy(dict->buf, job->out->buf + (job->out->len - len), len);
+                memcpy(dict->buf + len, curr->buf, curr->len);
+                dict->len = DICT;
+            }
+        }
+
+        /* preparation of job is complete */
+        job->seq = seq;
         Trace(("-- read #%ld%s", seq, more ? "" : " (last)"));
         if (++seq < 1)
             bail("input too long: ", in);
@@ -1289,9 +1665,8 @@ local void parallel_compress(void)
         *compress_tail = job;
         compress_tail = &(job->next);
         twist(compress_have, BY, +1);
-
-        /* do until end of input */
     } while (more);
+    drop_space(next);
 
     /* wait for the write thread to complete (we leave the compress threads out
        there and waiting in case there is another stream to compress) */
@@ -1302,6 +1677,19 @@ local void parallel_compress(void)
 
 #endif
 
+/* repeated code in single_compress to compress available input and write it */
+#define DEFLATE_WRITE(flush) \
+    do { \
+        do { \
+            strm->avail_out = out_size; \
+            strm->next_out = out; \
+            (void)deflate(strm, flush); \
+            writen(outd, out, out_size - strm->avail_out); \
+            clen += out_size - strm->avail_out; \
+        } while (strm->avail_out == 0); \
+        assert(strm->avail_in == 0); \
+    } while (0)
+
 /* do a simple compression in a single thread from ind to outd -- if reset is
    true, instead free the memory that was allocated and retained for input,
    output, and deflate */
@@ -1309,6 +1697,14 @@ local void single_compress(int reset)
 {
     size_t got;                     /* amount read */
     size_t more;                    /* amount of next read (0 if eof) */
+    size_t start;                   /* start of next read */
+    size_t block;                   /* bytes in current block for -i */
+    unsigned hash;                  /* hash for rsyncable */
+#if ZLIB_VERNUM >= 0x1260
+    int bits;                       /* deflate pending bits */
+#endif
+    unsigned char *scan;            /* pointer for hash computation */
+    size_t left;                    /* bytes left to compress after hash hit */
     unsigned long head;             /* header length */
     unsigned long ulen;             /* total uncompressed size (overflow ok) */
     unsigned long clen;             /* total compressed size (overflow ok) */
@@ -1320,7 +1716,7 @@ local void single_compress(int reset)
     /* if requested, just release the allocations and return */
     if (reset) {
         if (strm != NULL) {
-            deflateEnd(strm);
+            (void)deflateEnd(strm);
             free(strm);
             free(out);
             free(next);
@@ -1332,7 +1728,7 @@ local void single_compress(int reset)
 
     /* initialize the deflate structure if this is the first time */
     if (strm == NULL) {
-        out_size = size > MAX ? MAX : (unsigned)size;
+        out_size = size > MAXP2 ? MAXP2 : (unsigned)size;
         if ((in = malloc(size)) == NULL ||
             (next = malloc(size)) == NULL ||
             (out = malloc(out_size)) == NULL ||
@@ -1354,49 +1750,105 @@ local void single_compress(int reset)
     (void)deflateParams(strm, level, Z_DEFAULT_STRATEGY);
 
     /* do raw deflate and calculate check value */
-    ulen = clen = 0;
-    check = CHECK(0L, Z_NULL, 0);
+    got = 0;
     more = readn(ind, next, size);
+    ulen = (unsigned)more;
+    start = 0;
+    clen = 0;
+    block = 0;
+    check = CHECK(0L, Z_NULL, 0);
+    hash = RSYNCHIT;
     do {
         /* get data to compress, see if there is any more input */
-        got = more;
-        { unsigned char *temp; temp = in; in = next; next = temp; }
-        more = got < size ? 0 : readn(ind, next, size);
-        ulen += (unsigned long)got;
-        strm->next_in = in;
-
-        /* compress MAX-size chunks in case unsigned type is small */
-        while (got > MAX) {
-            strm->avail_in = MAX;
-            check = CHECK(check, strm->next_in, strm->avail_in);
+        if (got == 0) {
+            scan = in;  in = next;  next = scan;
+            strm->next_in = in + start;
+            got = more;
+            more = readn(ind, next, size);
+            ulen += (unsigned long)more;
+            start = 0;
+        }
+
+        /* if rsyncable, compute hash until a hit or the end of the block */
+        left = 0;
+        if (rsync && got) {
+            scan = strm->next_in;
+            left = got;
             do {
-                strm->avail_out = out_size;
-                strm->next_out = out;
-                (void)deflate(strm, Z_NO_FLUSH);
-                writen(outd, out, out_size - strm->avail_out);
-                clen += out_size - strm->avail_out;
-            } while (strm->avail_out == 0);
-            assert(strm->avail_in == 0);
-            got -= MAX;
+                if (left == 0) {
+                    /* went to the end -- if no more or no hit in size bytes,
+                       then proceed to do a flush or finish with got bytes */
+                    if (more == 0 || got == size)
+                        break;
+
+                    /* fill in[] with what's left there and as much as possible
+                       from next[] -- set up to continue hash hit search */
+                    memmove(in, strm->next_in, got);
+                    strm->next_in = in;
+                    scan = in + got;
+                    left = more > size - got ? size - got : more;
+                    memcpy(scan, next + start, left);
+                    got += left;
+                    more -= left;
+                    start += left;
+
+                    /* if that emptied the next buffer, try to refill it */
+                    if (more == 0) {
+                        more = readn(ind, next, size);
+                        ulen += (unsigned long)more;
+                        start = 0;
+                    }
+                }
+                left--;
+                hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
+            } while (hash != RSYNCHIT);
+            got -= left;
+        }
+
+        /* clear history for --independent option */
+        if (!setdict) {
+            block += got;
+            if (block > size) {
+                (void)deflateReset(strm);
+                block = got;
+            }
         }
 
-        /* compress the remainder, finishing if end of input -- when not -i,
-           use a Z_SYNC_FLUSH so that this and parallel compression produce the
-           same output */
+        /* compress MAXP2-size chunks in case unsigned type is small */
+        while (got > MAXP2) {
+            strm->avail_in = MAXP2;
+            check = CHECK(check, strm->next_in, strm->avail_in);
+            DEFLATE_WRITE(Z_NO_FLUSH);
+            got -= MAXP2;
+        }
+
+        /* compress the remainder, emit a block -- finish if end of input */
         strm->avail_in = (unsigned)got;
+        got = left;
         check = CHECK(check, strm->next_in, strm->avail_in);
-        do {
-            strm->avail_out = out_size;
-            strm->next_out = out;
-            (void)deflate(strm,
-                more ? (dict ? Z_SYNC_FLUSH : Z_FULL_FLUSH) : Z_FINISH);
-            writen(outd, out, out_size - strm->avail_out);
-            clen += out_size - strm->avail_out;
-        } while (strm->avail_out == 0);
-        assert(strm->avail_in == 0);
+        if (more || got) {
+#if ZLIB_VERNUM >= 0x1260
+            DEFLATE_WRITE(Z_BLOCK);
+            (void)deflatePending(strm, Z_NULL, &bits);
+            if (bits & 1)
+                DEFLATE_WRITE(Z_SYNC_FLUSH);
+            else if (bits & 7) {
+                do {
+                    bits = deflatePrime(strm, 10, 2);
+                    assert(bits == Z_OK);
+                    (void)deflatePending(strm, Z_NULL, &bits);
+                } while (bits & 7);
+                DEFLATE_WRITE(Z_NO_FLUSH);
+            }
+#else
+            DEFLATE_WRITE(Z_SYNC_FLUSH);
+#endif
+        }
+        else
+            DEFLATE_WRITE(Z_FINISH);
 
         /* do until no more input */
-    } while (more);
+    } while (more || got);
 
     /* write trailer */
     put_trailer(ulen, clen, check, head);
@@ -1444,7 +1896,7 @@ local void load_read(void *dummy)
 
 #endif
 
-/* load() is called when in_left has gone to zero in order to provide more
+/* load() is called when the input has been consumed in order to provide more
    input data: load the input buffer with BUF or less bytes (less if at end of
    file) from the file ind, set in_next to point to the in_left bytes read,
    update in_tot, and return in_left -- in_eof is set to true when in_left has
@@ -1454,6 +1906,7 @@ local size_t load(void)
     /* if already detected end of file, do nothing */
     if (in_short) {
         in_eof = 1;
+        in_left = 0;
         return 0;
     }
 
@@ -1528,7 +1981,7 @@ local void in_init(void)
 /* buffered reading macros for decompression and listing */
 #define GET() (in_eof || (in_left == 0 && load() == 0) ? EOF : \
                (in_left--, *in_next++))
-#define GET2() (tmp2 = GET(), tmp2 + (GET() << 8))
+#define GET2() (tmp2 = GET(), tmp2 + ((unsigned)(GET()) << 8))
 #define GET4() (tmp4 = GET2(), tmp4 + ((unsigned long)(GET2()) << 16))
 #define SKIP(dist) \
     do { \
@@ -1542,6 +1995,12 @@ local void in_init(void)
         in_next += togo; \
     } while (0)
 
+/* pull LSB order or MSB order integers from an unsigned char buffer */
+#define PULL2L(p) ((p)[0] + ((unsigned)((p)[1]) << 8))
+#define PULL4L(p) (PULL2L(p) + ((unsigned long)(PULL2L((p) + 2)) << 16))
+#define PULL2M(p) (((unsigned)((p)[0]) << 8) + (p)[1])
+#define PULL4M(p) (((unsigned long)(PULL2M(p)) << 16) + PULL2M((p) + 2))
+
 /* convert MS-DOS date and time to a Unix time, assuming current timezone
    (you got a better idea?) */
 local time_t dos2time(unsigned long dos)
@@ -1641,9 +2100,10 @@ local int get_header(int save)
 
     /* see if it's a gzip, zlib, or lzw file */
     form = 0;
-    magic = GET() << 8;
+    magic1 = GET();
     if (in_eof)
         return -1;
+    magic = magic1 << 8;
     magic += GET();
     if (in_eof)
         return -2;
@@ -1699,8 +2159,11 @@ local int get_header(int save)
         form = 2 + ((flags & 8) >> 3);
         return in_eof ? -3 : method;
     }
-    if (magic != 0x1f8b)            /* not gzip */
+    if (magic != 0x1f8b) {          /* not gzip */
+        in_left++;      /* unget second magic byte */
+        in_next--;
         return -2;
+    }
 
     /* it's gzip -- get method and flags */
     method = GET();
@@ -1824,6 +2287,8 @@ local void show_info(int method, unsigned long check, off_t len, int cont)
     else if (hname == NULL) {
         n = strlen(in) - compressed_suffix(in);
         strncpy(name, in, n > max + 1 ? max + 1 : n);
+        if (strcmp(in + n, ".tgz") == 0 && n < max + 1)
+            strncpy(name + n, ".tar", max + 1 - n);
     }
     else
         strncpy(name, hname, max + 1);
@@ -1867,15 +2332,25 @@ local void show_info(int method, unsigned long check, off_t len, int cont)
         if ((form == 3 && !decode) ||
             (method == 8 && in_tot > (len + (len >> 10) + 12)) ||
             (method == 256 && in_tot > len + (len >> 1) + 3))
-            printf(sizeof(off_t) == 4 ? "%10lu %10lu?  unk    %s\n" :
-                                        "%10llu %10llu?  unk    %s\n",
+#if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3
+            printf("%10jd %10jd?  unk    %s\n",
+                   (intmax_t)in_tot, (intmax_t)len, name);
+        else
+            printf("%10jd %10jd %6.1f%%  %s\n",
+                   (intmax_t)in_tot, (intmax_t)len,
+                   len == 0 ? 0 : 100 * (len - in_tot)/(double)len,
+                   name);
+#else
+            printf(sizeof(off_t) == sizeof(long) ?
+                   "%10ld %10ld?  unk    %s\n" : "%10lld %10lld?  unk    %s\n",
                    in_tot, len, name);
         else
-            printf(sizeof(off_t) == 4 ? "%10lu %10lu %6.1f%%  %s\n" :
-                                        "%10llu %10llu %6.1f%%  %s\n",
+            printf(sizeof(off_t) == sizeof(long) ?
+                   "%10ld %10ld %6.1f%%  %s\n" : "%10lld %10lld %6.1f%%  %s\n",
                    in_tot, len,
                    len == 0 ? 0 : 100 * (len - in_tot)/(double)len,
                    name);
+#endif
     }
 }
 
@@ -1898,7 +2373,7 @@ local void list_info(void)
     if (method < 0) {
         RELEASE(hname);
         if (method != -1 && verbosity > 1)
-            fprintf(stderr, "%s not a compressed file -- skipping\n", in);
+            complain("%s not a compressed file -- skipping", in);
         return;
     }
 
@@ -1926,7 +2401,7 @@ local void list_info(void)
             in_tot = at;
             lseek(ind, -4, SEEK_END);
             readn(ind, tail, 4);
-            check = (*tail << 24) + (tail[1] << 16) + (tail[2] << 8) + tail[3];
+            check = PULL4M(tail);
         }
         in_tot -= 6;
         show_info(method, check, 0, 0);
@@ -1949,9 +2424,7 @@ local void list_info(void)
     /* skip to end to get trailer (8 bytes), compute compressed length */
     if (in_short) {                     /* whole thing already read */
         if (in_left < 8) {
-            if (verbosity > 0)
-                fprintf(stderr, "%s not a valid gzip file -- skipping\n",
-                        in);
+            complain("%s not a valid gzip file -- skipping", in);
             return;
         }
         in_tot = in_left - 8;           /* compressed size */
@@ -1970,9 +2443,7 @@ local void list_info(void)
         } while (in_left == BUF);       /* read until end */
         if (in_left < 8) {
             if (n + in_left < 8) {
-                if (verbosity > 0)
-                    fprintf(stderr, "%s not a valid gzip file -- skipping\n",
-                            in);
+                complain("%s not a valid gzip file -- skipping", in);
                 return;
             }
             if (in_left) {
@@ -1986,20 +2457,38 @@ local void list_info(void)
         in_tot -= at + 8;
     }
     if (in_tot < 2) {
-        if (verbosity > 0)
-            fprintf(stderr, "%s not a valid gzip file -- skipping\n", in);
+        complain("%s not a valid gzip file -- skipping", in);
         return;
     }
 
     /* convert trailer to check and uncompressed length (modulo 2^32) */
-    check = tail[0] + (tail[1] << 8) + (tail[2] << 16) + (tail[3] << 24);
-    len = tail[4] + (tail[5] << 8) + (tail[6] << 16) + (tail[7] << 24);
+    check = PULL4L(tail);
+    len = PULL4L(tail + 4);
 
     /* list information about contents */
     show_info(method, check, len, 0);
     RELEASE(hname);
 }
 
+/* --- copy input to output (when acting like cat) --- */
+
+local void cat(void)
+{
+    /* write first magic byte (if we're here, there's at least one byte) */
+    writen(outd, &magic1, 1);
+    out_tot = 1;
+
+    /* copy the remainder of the input to the output (if there were any more
+       bytes of input, then in_left is non-zero and in_next is pointing to the
+       second magic byte) */
+    while (in_left) {
+        writen(outd, in_next, in_left);
+        out_tot += in_left;
+        in_left = 0;
+        load();
+    }
+}
+
 /* --- decompress deflate input --- */
 
 /* call-back input function for inflateBack() */
@@ -2167,7 +2656,7 @@ local void infchk(void)
         /* read and check trailer */
         if (form > 1) {             /* zip local trailer (if any) */
             if (form == 3) {        /* data descriptor follows */
-                /* read original version of data descriptor*/
+                /* read original version of data descriptor */
                 zip_crc = GET4();
                 zip_clen = GET4();
                 zip_ulen = GET4();
@@ -2183,6 +2672,14 @@ local void infchk(void)
                     zip_ulen = GET4();
                 }
 
+                /* handle incredibly rare cases where crc equals signature */
+                else if (zip_crc == 0x08074b50UL && zip_clen == zip_crc &&
+                         ((clen & LOW32) != zip_crc || zip_ulen == zip_crc)) {
+                    zip_crc = zip_clen;
+                    zip_clen = zip_ulen;
+                    zip_ulen = GET4();
+                }
+
                 /* if second length doesn't match, try 64-bit lengths */
                 if (zip_ulen != (out_tot & LOW32)) {
                     zip_ulen = GET4();
@@ -2198,7 +2695,7 @@ local void infchk(void)
         else if (form == 1) {       /* zlib (big-endian) trailer */
             check = (unsigned long)(GET()) << 24;
             check += (unsigned long)(GET()) << 16;
-            check += GET() << 8;
+            check += (unsigned)(GET()) << 8;
             check += GET();
             if (in_eof)
                 bail("corrupted zlib stream -- missing trailer: ", in);
@@ -2226,8 +2723,12 @@ local void infchk(void)
         /* if a gzip or zlib entry follows a gzip or zlib entry, decompress it
            (don't replace saved header information from first entry) */
     } while (form < 2 && (ret = get_header(0)) == 8 && form < 2);
-    if (ret != -1 && form < 2)
-        fprintf(stderr, "%s OK, has trailing junk which was ignored\n", in);
+
+    /* gzip -cdf copies junk after gzip stream directly to output */
+    if (form < 2 && ret == -2 && force && pipeout && decode != 2 && !list)
+        cat();
+    else if (ret != -1 && form < 2)
+        complain("%s OK, has trailing junk which was ignored", in);
 }
 
 /* --- decompress Unix compress (LZW) input --- */
@@ -2448,17 +2949,17 @@ local void copymeta(char *from, char *to)
         return;
 
     /* set to's mode bits, ignore errors */
-    chmod(to, st.st_mode & 07777);
+    (void)chmod(to, st.st_mode & 07777);
 
     /* copy owner's user and group, ignore errors */
-    chown(to, st.st_uid, st.st_gid);
+    (void)chown(to, st.st_uid, st.st_gid);
 
     /* copy access and modify times, ignore errors */
     times[0].tv_sec = st.st_atime;
     times[0].tv_usec = 0;
     times[1].tv_sec = st.st_mtime;
     times[1].tv_usec = 0;
-    utimes(to, times);
+    (void)utimes(to, times);
 }
 
 /* set the access and modify times of fd to t */
@@ -2470,7 +2971,7 @@ local void touch(char *path, time_t t)
     times[0].tv_usec = 0;
     times[1].tv_sec = t;
     times[1].tv_usec = 0;
-    utimes(path, times);
+    (void)utimes(path, times);
 }
 
 /* process provided input file, or stdin if path is NULL -- process() can
@@ -2480,6 +2981,9 @@ local void process(char *path)
     int method = -1;                /* get_header() return value */
     size_t len;                     /* length of base name (minus suffix) */
     struct stat st;                 /* to get file type and mod time */
+    /* all compressed suffixes for decoding search, in length order */
+    static char *sufs[] = {".z", "-z", "_z", ".Z", ".gz", "-gz", ".zz", "-zz",
+                           ".zip", ".ZIP", ".tgz", NULL};
 
     /* open input file with name in, descriptor ind -- set name and mtime */
     if (path == NULL) {
@@ -2499,34 +3003,45 @@ local void process(char *path)
         }
         len = strlen(in);
 
-        /* only process regular files, but allow symbolic links if -f,
-           recurse into directory if -r */
+        /* try to stat input file -- if not there and decoding, look for that
+           name with compressed suffixes */
         if (lstat(in, &st)) {
+            if (errno == ENOENT && (list || decode)) {
+                char **try = sufs;
+                do {
+                    if (*try == NULL || len + strlen(*try) >= sizeof(in))
+                        break;
+                    strcpy(in + len, *try++);
+                    errno = 0;
+                } while (lstat(in, &st) && errno == ENOENT);
+            }
 #ifdef EOVERFLOW
             if (errno == EOVERFLOW || errno == EFBIG)
                 bail(in,
-                    " too large -- pigz not compiled with large file support");
+                    " too large -- not compiled with large file support");
 #endif
-            if (verbosity > 0)
-                fprintf(stderr, "%s does not exist -- skipping\n", in);
-            return;
+            if (errno) {
+                in[len] = 0;
+                complain("%s does not exist -- skipping", in);
+                return;
+            }
+            len = strlen(in);
         }
+
+        /* only process regular files, but allow symbolic links if -f,
+           recurse into directory if -r */
         if ((st.st_mode & S_IFMT) != S_IFREG &&
             (st.st_mode & S_IFMT) != S_IFLNK &&
             (st.st_mode & S_IFMT) != S_IFDIR) {
-            if (verbosity > 0)
-                fprintf(stderr, "%s is a special file or device -- skipping\n",
-                        in);
+            complain("%s is a special file or device -- skipping", in);
             return;
         }
         if ((st.st_mode & S_IFMT) == S_IFLNK && !force && !pipeout) {
-            if (verbosity > 0)
-                fprintf(stderr, "%s is a symbolic link -- skipping\n", in);
+            complain("%s is a symbolic link -- skipping", in);
             return;
         }
         if ((st.st_mode & S_IFMT) == S_IFDIR && !recurse) {
-            if (verbosity > 0)
-                fprintf(stderr, "%s is a directory -- skipping\n", in);
+            complain("%s is a directory -- skipping", in);
             return;
         }
 
@@ -2599,19 +3114,15 @@ local void process(char *path)
         /* don't compress .gz (or provided suffix) files, unless -f */
         if (!(force || list || decode) && len >= strlen(sufx) &&
                 strcmp(in + len - strlen(sufx), sufx) == 0) {
-            if (verbosity > 0)
-                fprintf(stderr, "%s ends with %s -- skipping\n", in, sufx);
+            complain("%s ends with %s -- skipping", in, sufx);
             return;
         }
 
-        /* only decompress or list files with compressed suffix */
-        if (list || decode) {
+        /* only decompress over input file with compressed suffix */
+        if (decode && !pipeout) {
             int suf = compressed_suffix(in);
             if (suf == 0) {
-                if (verbosity > 0)
-                    fprintf(stderr,
-                            "%s does not have compressed suffix -- skipping\n",
-                            in);
+                complain("%s does not have compressed suffix -- skipping", in);
                 return;
             }
             len -= suf;
@@ -2633,15 +3144,15 @@ local void process(char *path)
     if (decode) {
         in_init();
         method = get_header(1);
-        if (method != 8 && method != 256) {
+        if (method != 8 && method != 256 &&
+                /* gzip -cdf acts like cat on uncompressed input */
+                !(method == -2 && force && pipeout && decode != 2 && !list)) {
             RELEASE(hname);
             if (ind != 0)
                 close(ind);
-            if (method != -1 && verbosity > 0)
-                fprintf(stderr,
-                    method < 0 ? "%s is not compressed -- skipping\n" :
-                        "%s has unknown compression method -- skipping\n",
-                    in);
+            if (method != -1)
+                complain(method < 0 ? "%s is not compressed -- skipping" :
+                         "%s has unknown compression method -- skipping", in);
             return;
         }
 
@@ -2685,7 +3196,7 @@ local void process(char *path)
                  " (use -f to force)");
     }
     else {
-        char *to;
+        char *to, *repl;
 
         /* use header name for output when decompressing with -N */
         to = in;
@@ -2694,14 +3205,17 @@ local void process(char *path)
             len = strlen(hname);
         }
 
+        /* replace .tgx with .tar when decoding */
+        repl = decode && strcmp(to + len, ".tgz") ? "" : ".tar";
+
         /* create output file and open to write */
-        out = malloc(len + (decode ? 0 : strlen(sufx)) + 1);
+        out = malloc(len + (decode ? strlen(repl) : strlen(sufx)) + 1);
         if (out == NULL)
             bail("not enough memory", "");
         memcpy(out, to, len);
-        strcpy(out + len, decode ? "" : sufx);
+        strcpy(out + len, decode ? repl : sufx);
         outd = open(out, O_CREAT | O_TRUNC | O_WRONLY |
-                         (force ? 0 : O_EXCL), 0666);
+                         (force ? 0 : O_EXCL), 0600);
 
         /* if exists and not -f, give user a chance to overwrite */
         if (outd < 0 && errno == EEXIST && isatty(0) && verbosity) {
@@ -2717,13 +3231,12 @@ local void process(char *path)
             } while (ch != EOF && ch != '\n' && ch != '\r');
             if (reply == 1)
                 outd = open(out, O_CREAT | O_TRUNC | O_WRONLY,
-                            0666);
+                            0600);
         }
 
         /* if exists and no overwrite, report and go on to next */
         if (outd < 0 && errno == EEXIST) {
-            if (verbosity > 0)
-                fprintf(stderr, "%s exists -- skipping\n", out);
+            complain("%s exists -- skipping", out);
             RELEASE(out);
             RELEASE(hname);
             if (ind != 0)
@@ -2744,8 +3257,10 @@ local void process(char *path)
     if (decode) {
         if (method == 8)
             infchk();
-        else
+        else if (method == 256)
             unlzw();
+        else
+            cat();
     }
 #ifndef NOTHREAD
     else if (procs > 1)
@@ -2789,31 +3304,35 @@ local char *helptext[] = {
 "Options:",
 "  -0 to -9, --fast, --best   Compression levels, --fast is -1, --best is -9",
 "  -b, --blocksize mmm  Set compression block size to mmmK (default 128K)",
+"  -c, --stdout         Write all processed output to stdout (won't delete)",
+"  -d, --decompress     Decompress the compressed input",
+"  -f, --force          Force overwrite, compress .gz, links, and to terminal",
+"  -h, --help           Display a help screen and quit",
+"  -i, --independent    Compress blocks independently for damage recovery",
+"  -k, --keep           Do not delete original file after processing",
+"  -K, --zip            Compress to PKWare zip (.zip) single entry format",
+"  -l, --list           List the contents of the compressed input",
+"  -L, --license        Display the pigz license and quit",
+"  -n, --no-name        Do not store or restore file name in/from header",
+"  -N, --name           Store/restore file name and mod time in/from header",
 #ifndef NOTHREAD
 "  -p, --processes n    Allow up to n compression threads (default is the",
 "                       number of online processors, or 8 if unknown)",
 #endif
-"  -i, --independent    Compress blocks independently for damage recovery",
-"  -R, --rsyncable      Input-determined block locations for rsync",
-"  -d, --decompress     Decompress the compressed input",
-"  -t, --test           Test the integrity of the compressed input",
-"  -l, --list           List the contents of the compressed input",
-"  -f, --force          Force overwrite, compress .gz, links, and to terminal",
+"  -q, --quiet          Print no messages, even on error",
 "  -r, --recursive      Process the contents of all subdirectories",
+"  -R, --rsyncable      Input-determined block locations for rsync",
 "  -S, --suffix .sss    Use suffix .sss instead of .gz (for compression)",
-"  -z, --zlib           Compress to zlib (.zz) instead of gzip format",
-"  -K, --zip            Compress to PKWare zip (.zip) single entry format",
-"  -k, --keep           Do not delete original file after processing",
-"  -c, --stdout         Write all processed output to stdout (won't delete)",
-"  -N, --name           Store/restore file name and mod time in/from header",
-"  -n, --no-name        Do not store or restore file name in/from header",
+"  -t, --test           Test the integrity of the compressed input",
 "  -T, --no-time        Do not store or restore mod time in/from header",
-"  -q, --quiet          Print no messages, even on error",
 #ifdef DEBUG
-"  -v, --verbose        Provide more verbose output (-vv to debug)"
+"  -v, --verbose        Provide more verbose output (-vv to debug)",
 #else
-"  -v, --verbose        Provide more verbose output"
+"  -v, --verbose        Provide more verbose output",
 #endif
+"  -V  --version        Show the version of pigz",
+"  -z, --zlib           Compress to zlib (.zz) instead of gzip format",
+"  --                   All arguments after \"--\" are treated as files"
 };
 
 /* display the help text above */
@@ -2839,6 +3358,13 @@ local int nprocs(int n)
 #  else
 #    ifdef _SC_NPROC_ONLN
     n = (int)sysconf(_SC_NPROC_ONLN);
+#    else
+#      ifdef __hpux
+    struct pst_dynamic psd;
+
+    if (pstat_getdynamic(&psd, sizeof(psd), (size_t)1, 0) != -1)
+        n = psd.psd_proc_cnt;
+#      endif
 #    endif
 #  endif
     return n;
@@ -2857,7 +3383,7 @@ local void defaults(void)
 #endif
     size = 131072UL;
     rsync = 0;                      /* don't do rsync blocking */
-    dict = 1;                       /* initialize dictionary each thread */
+    setdict = 1;                    /* initialize dictionary each thread */
     verbosity = 1;                  /* normal message level */
     headis = 3;                     /* store/restore name and timestamp */
     pipeout = 0;                    /* don't force output to stdout */
@@ -2918,7 +3444,7 @@ local int option(char *arg)
 
     /* if no argument or dash option, check status of get */
     if (get && (arg == NULL || *arg == '-')) {
-        bad[1] = "bps"[get - 1];
+        bad[1] = "bpS"[get - 1];
         bail("missing parameter after ", bad);
     }
     if (arg == NULL)
@@ -2965,7 +3491,8 @@ local int option(char *arg)
             case 'K':  form = 2;  sufx = ".zip";  break;
             case 'L':
                 fputs(VERSION, stderr);
-                fputs("Copyright (C) 2007, 2008, 2009, 2010 Mark Adler\n",
+                fputs("Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012"
+                      " Mark Adler\n",
                       stderr);
                 fputs("Subject to the terms of the zlib license.\n",
                       stderr);
@@ -2973,8 +3500,7 @@ local int option(char *arg)
                 exit(0);
             case 'N':  headis = 3;  break;
             case 'T':  headis &= ~2;  break;
-            case 'R':  rsync = 1;
-                bail("invalid option: rsyncable not implemented yet: ", bad);
+            case 'R':  rsync = 1;  break;
             case 'S':  get = 3;  break;
             case 'V':  fputs(VERSION, stderr);  exit(0);
             case 'Z':
@@ -2986,7 +3512,7 @@ local int option(char *arg)
             case 'd':  decode = 1;  headis = 0;  break;
             case 'f':  force = 1;  break;
             case 'h':  help();  break;
-            case 'i':  dict = 0;  break;
+            case 'i':  setdict = 0;  break;
             case 'k':  keep = 1;  break;
             case 'l':  list = 1;  break;
             case 'n':  headis &= ~1;  break;
@@ -3004,7 +3530,7 @@ local int option(char *arg)
             return 0;
     }
 
-    /* process option parameter for -b, -p, or -s */
+    /* process option parameter for -b, -p, or -S */
     if (get) {
         size_t n;
 
@@ -3014,8 +3540,9 @@ local int option(char *arg)
             if (size < DICT)
                 bail("block size too small (must be >= 32K)", "");
             if (n != size >> 10 ||
-                size + (size >> 11) + 10 < size ||
-                (ssize_t)(size + (size >> 11) + 10) < 0)
+                OUTPOOL(size) < size ||
+                (ssize_t)OUTPOOL(size) < 0 ||
+                size > (1UL << 22))
                 bail("block size too large: ", arg);
             new_opts();
         }
@@ -3024,11 +3551,11 @@ local int option(char *arg)
             procs = (int)n;                     /* # processes */
             if (procs < 1)
                 bail("invalid number of processes: ", arg);
-            if ((size_t)procs != n || ((procs << 1) + 2) < 1)
+            if ((size_t)procs != n || INBUFS(procs) < 1)
                 bail("too many processes: ", arg);
 #ifdef NOTHREAD
             if (procs > 1)
-                bail("this pigz compiled without threads", "");
+                bail("compiled without threads", "");
 #endif
             new_opts();
         }
@@ -3059,13 +3586,19 @@ local void cut_short(int sig)
 int main(int argc, char **argv)
 {
     int n;                          /* general index */
+    int noop;                       /* true to suppress option decoding */
     unsigned long done;             /* number of named files processed */
     char *opts, *p;                 /* environment default options, marker */
 
+    /* save pointer to program name for error messages */
+    p = strrchr(argv[0], '/');
+    p = p == NULL ? argv[0] : p + 1;
+    prog = *p ? p : "pigz";
+
     /* prepare for interrupts and logging */
     signal(SIGINT, cut_short);
 #ifndef NOTHREAD
-    yarn_prefix = "pigz";           /* prefix for yarn error messages */
+    yarn_prefix = prog;             /* prefix for yarn error messages */
     yarn_abort = cut_short;         /* call on thread error */
 #endif
 #ifdef DEBUG
@@ -3076,7 +3609,7 @@ int main(int argc, char **argv)
     /* set all options to defaults */
     defaults();
 
-    /* process user environment variable defaults */
+    /* process user environment variable defaults in GZIP */
     opts = getenv("GZIP");
     if (opts != NULL) {
         while (*opts) {
@@ -3094,24 +3627,45 @@ int main(int argc, char **argv)
         option(NULL);
     }
 
-    /* if no command line arguments and stdout is a terminal, show help */
-    if (argc < 2 && isatty(1))
-        help();
+    /* process user environment variable defaults in PIGZ as well */
+    opts = getenv("PIGZ");
+    if (opts != NULL) {
+        while (*opts) {
+            while (*opts == ' ' || *opts == '\t')
+                opts++;
+            p = opts;
+            while (*p && *p != ' ' && *p != '\t')
+                p++;
+            n = *p;
+            *p = 0;
+            if (option(opts))
+                bail("cannot provide files in PIGZ environment variable", "");
+            opts = p + (n ? 1 : 0);
+        }
+        option(NULL);
+    }
 
-    /* decompress if named "unpigz" or "gunzip" */
-    p = strrchr(argv[0], '/');
-    p = p == NULL ? argv[0] : p + 1;
-    if (strcmp(p, "unpigz") == 0 || strcmp(p, "gunzip") == 0)
+    /* decompress if named "unpigz" or "gunzip", to stdout if "*cat" */
+    if (strcmp(prog, "unpigz") == 0 || strcmp(prog, "gunzip") == 0)
         decode = 1, headis = 0;
+    if ((n = strlen(prog)) > 2 && strcmp(prog + n - 3, "cat") == 0)
+        decode = 1, headis = 0, pipeout = 1;
+
+    /* if no arguments and compressed data to or from a terminal, show help */
+    if (argc < 2 && isatty(decode ? 0 : 1))
+        help();
 
-    /* process command-line arguments */
-    done = 0;
+    /* process command-line arguments, no options after "--" */
+    done = noop = 0;
     for (n = 1; n < argc; n++)
-        if (option(argv[n])) {          /* true if file name, process it */
-            if (done == 1 && pipeout && !decode && !list && form > 1) {
-                fprintf(stderr, "warning: output is concatenated zip files ");
-                fprintf(stderr, "-- pigz will not be able to extract\n");
-            }
+        if (noop == 0 && strcmp(argv[n], "--") == 0) {
+            noop = 1;
+            option(NULL);
+        }
+        else if (noop || option(argv[n])) { /* true if file name, process it */
+            if (done == 1 && pipeout && !decode && !list && form > 1)
+                complain("warning: output will be concatenated zip files -- "
+                         "will not be able to extract");
             process(strcmp(argv[n], "-") ? argv[n] : NULL);
             done++;
         }
@@ -3124,5 +3678,5 @@ int main(int argc, char **argv)
     /* done -- release resources, show log */
     new_opts();
     log_dump();
-    return 0;
+    return warned ? 2 : 0;
 }
diff --git a/pigz/pigz.pdf b/pigz/pigz.pdf
new file mode 100644
index 000000000..3d382f4aa
--- /dev/null
+++ b/pigz/pigz.pdf
diff --git a/pigz/pigz.spec b/pigz/pigz.spec
index a29c202e4..96daadb22 100644
--- a/pigz/pigz.spec
+++ b/pigz/pigz.spec
@@ -1,26 +1,33 @@
-Summary: pigz is a parallel implementation of gzip which utilizes multiple cores
-Name: pigz
-Version: 2.1.6
-Release: 1
-Source0: %{name}-%{version}.tar.gz
-License: GPL
-Group: Applications/Tools
-Packager: Duncan Brown <duncan@duncanbrown.org>
-BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
-URL: http://www.zlib.net/pigz
-
-%description
-pigz, which stands for parallel implementation of gzip, is a fully functional replacement for gzip that exploits multiple processors and multiple cores to the hilt when compressing data. pigz was written by Mark Adler, and uses the zlib and pthread libraries.
-
-%clean
-rm -rf $RPM_BUILD_ROOT
-%prep
-mkdir -p $RPM_BUILD_ROOT
-%setup -q
-%build
-make
-mkdir -p ${RPM_BUILD_ROOT}/usr/bin
-mv pigz ${RPM_BUILD_ROOT}/usr/bin
-%files
-%defattr(-,root,root)
-/usr/bin/pigz
+Summary: pigz is a parallel implementation of gzip which utilizes multiple cores
+Name: pigz
+Version: 2.2.5
+Release: 1
+Source0: %{name}-%{version}.tar.gz
+License: zlib
+Group: Applications/Tools
+Packager: Duncan Brown <duncan@duncanbrown.org>
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
+URL: http://www.zlib.net/pigz
+
+%description
+pigz, which stands for parallel implementation of gzip, is a fully functional replacement for gzip that exploits multiple processors and multiple cores to the hilt when compressing data. pigz was written by Mark Adler, and uses the zlib and pthread libraries.
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+%prep
+mkdir -p $RPM_BUILD_ROOT
+
+%setup -q
+%build
+make
+mkdir -p ${RPM_BUILD_ROOT}/usr/bin
+mkdir -p ${RPM_BUILD_ROOT}/usr/man/man1
+mv pigz unpigz ${RPM_BUILD_ROOT}/usr/bin
+mv pigz.1 ${RPM_BUILD_ROOT}/usr/man/man1
+
+%files
+%defattr(-,root,root)
+/usr/bin/pigz
+/usr/bin/unpigz
+%doc
+/usr/man/man1/pigz.1
diff --git a/pigz/yarn.c b/pigz/yarn.c
index 0562e7010..5e557f3f1 100644
--- a/pigz/yarn.c
+++ b/pigz/yarn.c
@@ -1,6 +1,6 @@
 /* yarn.c -- generic thread operations implemented using pthread functions
- * Copyright (C) 2008 Mark Adler
- * Version 1.1  26 Oct 2008  Mark Adler
+ * Copyright (C) 2008, 2012 Mark Adler
+ * Version 1.3  13 Jan 2012  Mark Adler
  * For conditions of distribution and use, see copyright notice in yarn.h
  */
 
@@ -13,11 +13,19 @@
    1.0    19 Oct 2008  First version
    1.1    26 Oct 2008  No need to set the stack size -- remove
                        Add yarn_abort() function for clean-up on error exit
+   1.2    19 Dec 2011  (changes reversed in 1.3)
+   1.3    13 Jan 2012  Add large file #define for consistency with pigz.c
+                       Update thread portability #defines per IEEE 1003.1-2008
+                       Fix documentation in yarn.h for yarn_prefix
  */
 
 /* for thread portability */
-#define _POSIX_PTHREAD_SEMANTICS
-#define _REENTRANT
+#define _XOPEN_SOURCE 700
+#define _POSIX_C_SOURCE 200809L
+#define _THREAD_SAFE
+
+/* use large file functions if available */
+#define _FILE_OFFSET_BITS 64
 
 /* external libraries and entities referenced */
 #include <stdio.h>      /* fprintf(), stderr */
@@ -199,6 +207,8 @@ local void reenter(void *dummy)
     thread *match, **prior;
     pthread_t me;
 
+    (void)dummy;
+
     /* find this thread in the threads list by matching the thread id */
     me = pthread_self();
     possess(&(threads_lock));
diff --git a/pigz/yarn.h b/pigz/yarn.h
index 7d99c7aed..7e3f914fd 100644
--- a/pigz/yarn.h
+++ b/pigz/yarn.h
@@ -1,6 +1,6 @@
 /* yarn.h -- generic interface for thread operations
- * Copyright (C) 2008 Mark Adler
- * Version 1.1  26 Oct 2008  Mark Adler
+ * Copyright (C) 2008, 2011 Mark Adler
+ * Version 1.3  13 Jan 2012  Mark Adler
  */
 
 /*
@@ -100,9 +100,9 @@
 
    -- Error control --
 
-   yarn_name - a char pointer to a string that will be the prefix for any error
-        messages that these routines generate before exiting -- if not changed
-        by the application, "yarn" will be used
+   yarn_prefix - a char pointer to a string that will be the prefix for any
+        error messages that these routines generate before exiting -- if not
+        changed by the application, "yarn" will be used
    yarn_abort - an external function that will be executed when there is an
         internal yarn error, due to out of memory or misuse -- this function
         may exit to abort the application, or if it returns, the yarn error