Merge changes from zlib 1.2.4. Bump shared library version number.

This commit is contained in:
delphij 2010-03-19 00:33:49 +00:00
commit 91d211f71c
41 changed files with 9973 additions and 2139 deletions

277
ChangeLog
View File

@ -1,6 +1,281 @@
ChangeLog file for zlib
Changes in 1.2.4 (14 Mar 2010)
- Fix VER3 extraction in configure for no fourth subversion
- Update zlib.3, add docs to Makefile.in to make .pdf out of it
- Add zlib.3.pdf to distribution
- Don't set error code in gzerror() if passed pointer is NULL
- Apply destination directory fixes to CMakeLists.txt [Lowman]
- Move #cmakedefine's to a new zconf.in.cmakein
- Restore zconf.h for builds that don't use configure or cmake
- Add distclean to dummy Makefile for convenience
- Update and improve INDEX, README, and FAQ
- Update CMakeLists.txt for the return of zconf.h [Lowman]
- Update contrib/vstudio/vc9 and vc10 [Vollant]
- Change libz.dll.a back to libzdll.a in win32/Makefile.gcc
- Apply license and readme changes to contrib/asm686 [Raiter]
- Check file name lengths and add -c option in minigzip.c [Li]
- Update contrib/amd64 and contrib/masmx86/ [Vollant]
- Avoid use of "eof" parameter in trees.c to not shadow library variable
- Update make_vms.com for removal of zlibdefs.h [Zinser]
- Update assembler code and vstudio projects in contrib [Vollant]
- Remove outdated assembler code contrib/masm686 and contrib/asm586
- Remove old vc7 and vc8 from contrib/vstudio
- Update win32/Makefile.msc, add ZLIB_VER_SUBREVISION [Rowe]
- Fix memory leaks in gzclose_r() and gzclose_w(), file leak in gz_open()
- Add contrib/gcc_gvmat64 for longest_match and inflate_fast [Vollant]
- Remove *64 functions from win32/zlib.def (they're not 64-bit yet)
- Fix bug in void-returning vsprintf() case in gzwrite.c
- Fix name change from inflate.h in contrib/inflate86/inffas86.c
- Check if temporary file exists before removing in make_vms.com [Zinser]
- Fix make install and uninstall for --static option
- Fix usage of _MSC_VER in gzguts.h and zutil.h [Truta]
- Update readme.txt in contrib/masmx64 and masmx86 to assemble
Changes in 1.2.3.9 (21 Feb 2010)
- Expunge gzio.c
- Move as400 build information to old
- Fix updates in contrib/minizip and contrib/vstudio
- Add const to vsnprintf test in configure to avoid warnings [Weigelt]
- Delete zconf.h (made by configure) [Weigelt]
- Change zconf.in.h to zconf.h.in per convention [Weigelt]
- Check for NULL buf in gzgets()
- Return empty string for gzgets() with len == 1 (like fgets())
- Fix description of gzgets() in zlib.h for end-of-file, NULL return
- Update minizip to 1.1 [Vollant]
- Avoid MSVC loss of data warnings in gzread.c, gzwrite.c
- Note in zlib.h that gzerror() should be used to distinguish from EOF
- Remove use of snprintf() from gzlib.c
- Fix bug in gzseek()
- Update contrib/vstudio, adding vc9 and vc10 [Kuno, Vollant]
- Fix zconf.h generation in CMakeLists.txt [Lowman]
- Improve comments in zconf.h where modified by configure
Changes in 1.2.3.8 (13 Feb 2010)
- Clean up text files (tabs, trailing whitespace, etc.) [Oberhumer]
- Use z_off64_t in gz_zero() and gz_skip() to match state->skip
- Avoid comparison problem when sizeof(int) == sizeof(z_off64_t)
- Revert to Makefile.in from 1.2.3.6 (live with the clutter)
- Fix missing error return in gzflush(), add zlib.h note
- Add *64 functions to zlib.map [Levin]
- Fix signed/unsigned comparison in gz_comp()
- Use SFLAGS when testing shared linking in configure
- Add --64 option to ./configure to use -m64 with gcc
- Fix ./configure --help to correctly name options
- Have make fail if a test fails [Levin]
- Avoid buffer overrun in contrib/masmx64/gvmat64.asm [Simpson]
- Remove assembler object files from contrib
Changes in 1.2.3.7 (24 Jan 2010)
- Always gzopen() with O_LARGEFILE if available
- Fix gzdirect() to work immediately after gzopen() or gzdopen()
- Make gzdirect() more precise when the state changes while reading
- Improve zlib.h documentation in many places
- Catch memory allocation failure in gz_open()
- Complete close operation if seek forward in gzclose_w() fails
- Return Z_ERRNO from gzclose_r() if close() fails
- Return Z_STREAM_ERROR instead of EOF for gzclose() being passed NULL
- Return zero for gzwrite() errors to match zlib.h description
- Return -1 on gzputs() error to match zlib.h description
- Add zconf.in.h to allow recovery from configure modification [Weigelt]
- Fix static library permissions in Makefile.in [Weigelt]
- Avoid warnings in configure tests that hide functionality [Weigelt]
- Add *BSD and DragonFly to Linux case in configure [gentoo 123571]
- Change libzdll.a to libz.dll.a in win32/Makefile.gcc [gentoo 288212]
- Avoid access of uninitialized data for first inflateReset2 call [Gomes]
- Keep object files in subdirectories to reduce the clutter somewhat
- Remove default Makefile and zlibdefs.h, add dummy Makefile
- Add new external functions to Z_PREFIX, remove duplicates, z_z_ -> z_
- Remove zlibdefs.h completely -- modify zconf.h instead
Changes in 1.2.3.6 (17 Jan 2010)
- Avoid void * arithmetic in gzread.c and gzwrite.c
- Make compilers happier with const char * for gz_error message
- Avoid unused parameter warning in inflate.c
- Avoid signed-unsigned comparison warning in inflate.c
- Indent #pragma's for traditional C
- Fix usage of strwinerror() in glib.c, change to gz_strwinerror()
- Correct email address in configure for system options
- Update make_vms.com and add make_vms.com to contrib/minizip [Zinser]
- Update zlib.map [Brown]
- Fix Makefile.in for Solaris 10 make of example64 and minizip64 [Tšršk]
- Apply various fixes to CMakeLists.txt [Lowman]
- Add checks on len in gzread() and gzwrite()
- Add error message for no more room for gzungetc()
- Remove zlib version check in gzwrite()
- Defer compression of gzprintf() result until need to
- Use snprintf() in gzdopen() if available
- Remove USE_MMAP configuration determination (only used by minigzip)
- Remove examples/pigz.c (available separately)
- Update examples/gun.c to 1.6
Changes in 1.2.3.5 (8 Jan 2010)
- Add space after #if in zutil.h for some compilers
- Fix relatively harmless bug in deflate_fast() [Exarevsky]
- Fix same problem in deflate_slow()
- Add $(SHAREDLIBV) to LIBS in Makefile.in [Brown]
- Add deflate_rle() for faster Z_RLE strategy run-length encoding
- Add deflate_huff() for faster Z_HUFFMAN_ONLY encoding
- Change name of "write" variable in inffast.c to avoid library collisions
- Fix premature EOF from gzread() in gzio.c [Brown]
- Use zlib header window size if windowBits is 0 in inflateInit2()
- Remove compressBound() call in deflate.c to avoid linking compress.o
- Replace use of errno in gz* with functions, support WinCE [Alves]
- Provide alternative to perror() in minigzip.c for WinCE [Alves]
- Don't use _vsnprintf on later versions of MSVC [Lowman]
- Add CMake build script and input file [Lowman]
- Update contrib/minizip to 1.1 [Svensson, Vollant]
- Moved nintendods directory from contrib to .
- Replace gzio.c with a new set of routines with the same functionality
- Add gzbuffer(), gzoffset(), gzclose_r(), gzclose_w() as part of above
- Update contrib/minizip to 1.1b
- Change gzeof() to return 0 on error instead of -1 to agree with zlib.h
Changes in 1.2.3.4 (21 Dec 2009)
- Use old school .SUFFIXES in Makefile.in for FreeBSD compatibility
- Update comments in configure and Makefile.in for default --shared
- Fix test -z's in configure [Marquess]
- Build examplesh and minigzipsh when not testing
- Change NULL's to Z_NULL's in deflate.c and in comments in zlib.h
- Import LDFLAGS from the environment in configure
- Fix configure to populate SFLAGS with discovered CFLAGS options
- Adapt make_vms.com to the new Makefile.in [Zinser]
- Add zlib2ansi script for C++ compilation [Marquess]
- Add _FILE_OFFSET_BITS=64 test to make test (when applicable)
- Add AMD64 assembler code for longest match to contrib [Teterin]
- Include options from $SFLAGS when doing $LDSHARED
- Simplify 64-bit file support by introducing z_off64_t type
- Make shared object files in objs directory to work around old Sun cc
- Use only three-part version number for Darwin shared compiles
- Add rc option to ar in Makefile.in for when ./configure not run
- Add -WI,-rpath,. to LDFLAGS for OSF 1 V4*
- Set LD_LIBRARYN32_PATH for SGI IRIX shared compile
- Protect against _FILE_OFFSET_BITS being defined when compiling zlib
- Rename Makefile.in targets allstatic to static and allshared to shared
- Fix static and shared Makefile.in targets to be independent
- Correct error return bug in gz_open() by setting state [Brown]
- Put spaces before ;;'s in configure for better sh compatibility
- Add pigz.c (parallel implementation of gzip) to examples/
- Correct constant in crc32.c to UL [Leventhal]
- Reject negative lengths in crc32_combine()
- Add inflateReset2() function to work like inflateEnd()/inflateInit2()
- Include sys/types.h for _LARGEFILE64_SOURCE [Brown]
- Correct typo in doc/algorithm.txt [Janik]
- Fix bug in adler32_combine() [Zhu]
- Catch missing-end-of-block-code error in all inflates and in puff
Assures that random input to inflate eventually results in an error
- Added enough.c (calculation of ENOUGH for inftrees.h) to examples/
- Update ENOUGH and its usage to reflect discovered bounds
- Fix gzerror() error report on empty input file [Brown]
- Add ush casts in trees.c to avoid pedantic runtime errors
- Fix typo in zlib.h uncompress() description [Reiss]
- Correct inflate() comments with regard to automatic header detection
- Remove deprecation comment on Z_PARTIAL_FLUSH (it stays)
- Put new version of gzlog (2.0) in examples with interruption recovery
- Add puff compile option to permit invalid distance-too-far streams
- Add puff TEST command options, ability to read piped input
- Prototype the *64 functions in zlib.h when _FILE_OFFSET_BITS == 64, but
_LARGEFILE64_SOURCE not defined
- Fix Z_FULL_FLUSH to truly erase the past by resetting s->strstart
- Fix deflateSetDictionary() to use all 32K for output consistency
- Remove extraneous #define MIN_LOOKAHEAD in deflate.c (in deflate.h)
- Clear bytes after deflate lookahead to avoid use of uninitialized data
- Change a limit in inftrees.c to be more transparent to Coverity Prevent
- Update win32/zlib.def with exported symbols from zlib.h
- Correct spelling error in zlib.h [Willem]
- Allow Z_BLOCK for deflate() to force a new block
- Allow negative bits in inflatePrime() to delete existing bit buffer
- Add Z_TREES flush option to inflate() to return at end of trees
- Add inflateMark() to return current state information for random access
- Add Makefile for NintendoDS to contrib [Costa]
- Add -w in configure compile tests to avoid spurious warnings [Beucler]
- Fix typos in zlib.h comments for deflateSetDictionary()
- Fix EOF detection in transparent gzread() [Maier]
Changes in 1.2.3.3 (2 October 2006)
- Make --shared the default for configure, add a --static option
- Add compile option to permit invalid distance-too-far streams
- Add inflateUndermine() function which is required to enable above
- Remove use of "this" variable name for C++ compatibility [Marquess]
- Add testing of shared library in make test, if shared library built
- Use ftello() and fseeko() if available instead of ftell() and fseek()
- Provide two versions of all functions that use the z_off_t type for
binary compatibility -- a normal version and a 64-bit offset version,
per the Large File Support Extension when _LARGEFILE64_SOURCE is
defined; use the 64-bit versions by default when _FILE_OFFSET_BITS
is defined to be 64
- Add a --uname= option to configure to perhaps help with cross-compiling
Changes in 1.2.3.2 (3 September 2006)
- Turn off silly Borland warnings [Hay]
- Use off64_t and define _LARGEFILE64_SOURCE when present
- Fix missing dependency on inffixed.h in Makefile.in
- Rig configure --shared to build both shared and static [Teredesai, Truta]
- Remove zconf.in.h and instead create a new zlibdefs.h file
- Fix contrib/minizip/unzip.c non-encrypted after encrypted [Vollant]
- Add treebuild.xml (see http://treebuild.metux.de/) [Weigelt]
Changes in 1.2.3.1 (16 August 2006)
- Add watcom directory with OpenWatcom make files [Daniel]
- Remove #undef of FAR in zconf.in.h for MVS [Fedtke]
- Update make_vms.com [Zinser]
- Use -fPIC for shared build in configure [Teredesai, Nicholson]
- Use only major version number for libz.so on IRIX and OSF1 [Reinholdtsen]
- Use fdopen() (not _fdopen()) for Interix in zutil.h [BŠck]
- Add some FAQ entries about the contrib directory
- Update the MVS question in the FAQ
- Avoid extraneous reads after EOF in gzio.c [Brown]
- Correct spelling of "successfully" in gzio.c [Randers-Pehrson]
- Add comments to zlib.h about gzerror() usage [Brown]
- Set extra flags in gzip header in gzopen() like deflate() does
- Make configure options more compatible with double-dash conventions
[Weigelt]
- Clean up compilation under Solaris SunStudio cc [Rowe, Reinholdtsen]
- Fix uninstall target in Makefile.in [Truta]
- Add pkgconfig support [Weigelt]
- Use $(DESTDIR) macro in Makefile.in [Reinholdtsen, Weigelt]
- Replace set_data_type() with a more accurate detect_data_type() in
trees.c, according to the txtvsbin.txt document [Truta]
- Swap the order of #include <stdio.h> and #include "zlib.h" in
gzio.c, example.c and minigzip.c [Truta]
- Shut up annoying VS2005 warnings about standard C deprecation [Rowe,
Truta] (where?)
- Fix target "clean" from win32/Makefile.bor [Truta]
- Create .pdb and .manifest files in win32/makefile.msc [Ziegler, Rowe]
- Update zlib www home address in win32/DLL_FAQ.txt [Truta]
- Update contrib/masmx86/inffas32.asm for VS2005 [Vollant, Van Wassenhove]
- Enable browse info in the "Debug" and "ASM Debug" configurations in
the Visual C++ 6 project, and set (non-ASM) "Debug" as default [Truta]
- Add pkgconfig support [Weigelt]
- Add ZLIB_VER_MAJOR, ZLIB_VER_MINOR and ZLIB_VER_REVISION in zlib.h,
for use in win32/zlib1.rc [Polushin, Rowe, Truta]
- Add a document that explains the new text detection scheme to
doc/txtvsbin.txt [Truta]
- Add rfc1950.txt, rfc1951.txt and rfc1952.txt to doc/ [Truta]
- Move algorithm.txt into doc/ [Truta]
- Synchronize FAQ with website
- Fix compressBound(), was low for some pathological cases [Fearnley]
- Take into account wrapper variations in deflateBound()
- Set examples/zpipe.c input and output to binary mode for Windows
- Update examples/zlib_how.html with new zpipe.c (also web site)
- Fix some warnings in examples/gzlog.c and examples/zran.c (it seems
that gcc became pickier in 4.0)
- Add zlib.map for Linux: "All symbols from zlib-1.1.4 remain
un-versioned, the patch adds versioning only for symbols introduced in
zlib-1.2.0 or later. It also declares as local those symbols which are
not designed to be exported." [Levin]
- Update Z_PREFIX list in zconf.in.h, add --zprefix option to configure
- Do not initialize global static by default in trees.c, add a response
NO_INIT_GLOBAL_POINTERS to initialize them if needed [Marquess]
- Don't use strerror() in gzio.c under WinCE [Yakimov]
- Don't use errno.h in zutil.h under WinCE [Yakimov]
- Move arguments for AR to its usage to allow replacing ar [Marot]
- Add HAVE_VISIBILITY_PRAGMA in zconf.in.h for Mozilla [Randers-Pehrson]
- Improve inflateInit() and inflateInit2() documentation
- Fix structure size comment in inflate.h
- Change configure help option from --h* to --help [Santos]
Changes in 1.2.3 (18 July 2005)
- Apply security vulnerability fixes to contrib/infback9 as well
- Clean up some text files (carriage returns, trailing space)
@ -13,7 +288,7 @@ Changes in 1.2.2.4 (11 July 2005)
compile
- Fix some spelling errors in comments [Betts]
- Correct inflateInit2() error return documentation in zlib.h
- Added zran.c example of compressed data random access to examples
- Add zran.c example of compressed data random access to examples
directory, shows use of inflatePrime()
- Fix cast for assignments to strm->state in inflate.c and infback.c
- Fix zlibCompileFlags() in zutil.c to use 1L for long shifts [Oberhumer]

261
FAQ
View File

@ -3,8 +3,8 @@
If your question is not there, please check the zlib home page
http://www.zlib.org which may have more recent information.
The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
http://zlib.net/ which may have more recent information.
The lastest zlib FAQ is at http://zlib.net/zlib_faq.html
1. Is zlib Y2K-compliant?
@ -13,54 +13,51 @@ The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
2. Where can I get a Windows DLL version?
The zlib sources can be compiled without change to produce a DLL.
See the file win32/DLL_FAQ.txt in the zlib distribution.
Pointers to the precompiled DLL are found in the zlib web site at
http://www.zlib.org.
The zlib sources can be compiled without change to produce a DLL. See the
file win32/DLL_FAQ.txt in the zlib distribution. Pointers to the
precompiled DLL are found in the zlib web site at http://zlib.net/ .
3. Where can I get a Visual Basic interface to zlib?
See
* http://www.dogma.net/markn/articles/zlibtool/zlibtool.htm
* contrib/visual-basic.txt in the zlib distribution
* http://marknelson.us/1997/01/01/zlib-engine/
* win32/DLL_FAQ.txt in the zlib distribution
4. compress() returns Z_BUF_ERROR.
Make sure that before the call of compress, the length of the compressed
buffer is equal to the total size of the compressed buffer and not
zero. For Visual Basic, check that this parameter is passed by reference
Make sure that before the call of compress(), the length of the compressed
buffer is equal to the available size of the compressed buffer and not
zero. For Visual Basic, check that this parameter is passed by reference
("as any"), not by value ("as long").
5. deflate() or inflate() returns Z_BUF_ERROR.
Before making the call, make sure that avail_in and avail_out are not
zero. When setting the parameter flush equal to Z_FINISH, also make sure
that avail_out is big enough to allow processing all pending input.
Note that a Z_BUF_ERROR is not fatal--another call to deflate() or
inflate() can be made with more input or output space. A Z_BUF_ERROR
may in fact be unavoidable depending on how the functions are used, since
it is not possible to tell whether or not there is more output pending
when strm.avail_out returns with zero.
Before making the call, make sure that avail_in and avail_out are not zero.
When setting the parameter flush equal to Z_FINISH, also make sure that
avail_out is big enough to allow processing all pending input. Note that a
Z_BUF_ERROR is not fatal--another call to deflate() or inflate() can be
made with more input or output space. A Z_BUF_ERROR may in fact be
unavoidable depending on how the functions are used, since it is not
possible to tell whether or not there is more output pending when
strm.avail_out returns with zero. See http://zlib.net/zlib_how.html for a
heavily annotated example.
6. Where's the zlib documentation (man pages, etc.)?
It's in zlib.h for the moment, and Francis S. Lin has converted it to a
web page zlib.html. Volunteers to transform this to Unix-style man pages,
please contact us (zlib@gzip.org). Examples of zlib usage are in the files
example.c and minigzip.c.
It's in zlib.h . Examples of zlib usage are in the files example.c and
minigzip.c, with more in examples/ .
7. Why don't you use GNU autoconf or libtool or ...?
Because we would like to keep zlib as a very small and simple
package. zlib is rather portable and doesn't need much configuration.
Because we would like to keep zlib as a very small and simple package.
zlib is rather portable and doesn't need much configuration.
8. I found a bug in zlib.
Most of the time, such problems are due to an incorrect usage of
zlib. Please try to reproduce the problem with a small program and send
the corresponding source to us at zlib@gzip.org . Do not send
multi-megabyte data files without prior agreement.
Most of the time, such problems are due to an incorrect usage of zlib.
Please try to reproduce the problem with a small program and send the
corresponding source to us at zlib@gzip.org . Do not send multi-megabyte
data files without prior agreement.
9. Why do I get "undefined reference to gzputc"?
@ -82,7 +79,7 @@ The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
12. Can zlib handle .Z files?
No, sorry. You have to spawn an uncompress or gunzip subprocess, or adapt
No, sorry. You have to spawn an uncompress or gunzip subprocess, or adapt
the code of uncompress on your own.
13. How can I make a Unix shared library?
@ -99,8 +96,10 @@ The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
However, many flavors of Unix come with a shared zlib already installed.
Before going to the trouble of compiling a shared version of zlib and
trying to install it, you may want to check if it's already there! If you
can #include <zlib.h>, it's there. The -lz option will probably link to it.
trying to install it, you may want to check if it's already there! If you
can #include <zlib.h>, it's there. The -lz option will probably link to
it. You can check the version at the top of zlib.h or with the
ZLIB_VERSION symbol defined in zlib.h .
15. I have a question about OttoPDF.
@ -109,8 +108,8 @@ The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
16. Can zlib decode Flate data in an Adobe PDF file?
Yes. See http://www.fastio.com/ (ClibPDF), or http://www.pdflib.com/ .
To modify PDF forms, see http://sourceforge.net/projects/acroformtool/ .
Yes. See http://www.pdflib.com/ . To modify PDF forms, see
http://sourceforge.net/projects/acroformtool/ .
17. Why am I getting this "register_frame_info not found" error on Solaris?
@ -121,67 +120,67 @@ The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
symbol __register_frame_info: referenced symbol not found
The symbol __register_frame_info is not part of zlib, it is generated by
the C compiler (cc or gcc). You must recompile applications using zlib
which have this problem. This problem is specific to Solaris. See
the C compiler (cc or gcc). You must recompile applications using zlib
which have this problem. This problem is specific to Solaris. See
http://www.sunfreeware.com for Solaris versions of zlib and applications
using zlib.
18. Why does gzip give an error on a file I make with compress/deflate?
The compress and deflate functions produce data in the zlib format, which
is different and incompatible with the gzip format. The gz* functions in
zlib on the other hand use the gzip format. Both the zlib and gzip
formats use the same compressed data format internally, but have different
headers and trailers around the compressed data.
is different and incompatible with the gzip format. The gz* functions in
zlib on the other hand use the gzip format. Both the zlib and gzip formats
use the same compressed data format internally, but have different headers
and trailers around the compressed data.
19. Ok, so why are there two different formats?
The gzip format was designed to retain the directory information about
a single file, such as the name and last modification date. The zlib
format on the other hand was designed for in-memory and communication
channel applications, and has a much more compact header and trailer and
uses a faster integrity check than gzip.
The gzip format was designed to retain the directory information about a
single file, such as the name and last modification date. The zlib format
on the other hand was designed for in-memory and communication channel
applications, and has a much more compact header and trailer and uses a
faster integrity check than gzip.
20. Well that's nice, but how do I make a gzip file in memory?
You can request that deflate write the gzip format instead of the zlib
format using deflateInit2(). You can also request that inflate decode
the gzip format using inflateInit2(). Read zlib.h for more details.
format using deflateInit2(). You can also request that inflate decode the
gzip format using inflateInit2(). Read zlib.h for more details.
21. Is zlib thread-safe?
Yes. However any library routines that zlib uses and any application-
provided memory allocation routines must also be thread-safe. zlib's gz*
Yes. However any library routines that zlib uses and any application-
provided memory allocation routines must also be thread-safe. zlib's gz*
functions use stdio library routines, and most of zlib's functions use the
library memory allocation routines by default. zlib's Init functions allow
for the application to provide custom memory allocation routines.
library memory allocation routines by default. zlib's *Init* functions
allow for the application to provide custom memory allocation routines.
Of course, you should only operate on any given zlib or gzip stream from a
single thread at a time.
22. Can I use zlib in my commercial application?
Yes. Please read the license in zlib.h.
Yes. Please read the license in zlib.h.
23. Is zlib under the GNU license?
No. Please read the license in zlib.h.
No. Please read the license in zlib.h.
24. The license says that altered source versions must be "plainly marked". So
what exactly do I need to do to meet that requirement?
You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h. In
You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h. In
particular, the final version number needs to be changed to "f", and an
identification string should be appended to ZLIB_VERSION. Version numbers
identification string should be appended to ZLIB_VERSION. Version numbers
x.x.x.f are reserved for modifications to zlib by others than the zlib
maintainers. For example, if the version of the base zlib you are altering
maintainers. For example, if the version of the base zlib you are altering
is "1.2.3.4", then in zlib.h you should change ZLIB_VERNUM to 0x123f, and
ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3". You can also
ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3". You can also
update the version strings in deflate.c and inftrees.c.
For altered source distributions, you should also note the origin and
nature of the changes in zlib.h, as well as in ChangeLog and README, along
with the dates of the alterations. The origin should include at least your
with the dates of the alterations. The origin should include at least your
name (or your company's name), and an email address to contact for help or
issues with the library.
@ -197,105 +196,112 @@ The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
26. Will zlib work on a 64-bit machine?
It should. It has been tested on 64-bit machines, and has no dependence
on any data types being limited to 32-bits in length. If you have any
Yes. It has been tested on 64-bit machines, and has no dependence on any
data types being limited to 32-bits in length. If you have any
difficulties, please provide a complete problem report to zlib@gzip.org
27. Will zlib decompress data from the PKWare Data Compression Library?
No. The PKWare DCL uses a completely different compressed data format
than does PKZIP and zlib. However, you can look in zlib's contrib/blast
No. The PKWare DCL uses a completely different compressed data format than
does PKZIP and zlib. However, you can look in zlib's contrib/blast
directory for a possible solution to your problem.
28. Can I access data randomly in a compressed stream?
No, not without some preparation. If when compressing you periodically
use Z_FULL_FLUSH, carefully write all the pending data at those points,
and keep an index of those locations, then you can start decompression
at those points. You have to be careful to not use Z_FULL_FLUSH too
often, since it can significantly degrade compression.
No, not without some preparation. If when compressing you periodically use
Z_FULL_FLUSH, carefully write all the pending data at those points, and
keep an index of those locations, then you can start decompression at those
points. You have to be careful to not use Z_FULL_FLUSH too often, since it
can significantly degrade compression. Alternatively, you can scan a
deflate stream once to generate an index, and then use that index for
random access. See examples/zran.c .
29. Does zlib work on MVS, OS/390, CICS, etc.?
We don't know for sure. We have heard occasional reports of success on
these systems. If you do use it on one of these, please provide us with
a report, instructions, and patches that we can reference when we get
these questions. Thanks.
It has in the past, but we have not heard of any recent evidence. There
were working ports of zlib 1.1.4 to MVS, but those links no longer work.
If you know of recent, successful applications of zlib on these operating
systems, please let us know. Thanks.
30. Is there some simpler, easier to read version of inflate I can look at
to understand the deflate format?
30. Is there some simpler, easier to read version of inflate I can look at to
understand the deflate format?
First off, you should read RFC 1951. Second, yes. Look in zlib's
First off, you should read RFC 1951. Second, yes. Look in zlib's
contrib/puff directory.
31. Does zlib infringe on any patents?
As far as we know, no. In fact, that was originally the whole point behind
zlib. Look here for some more information:
As far as we know, no. In fact, that was originally the whole point behind
zlib. Look here for some more information:
http://www.gzip.org/#faq11
32. Can zlib work with greater than 4 GB of data?
Yes. inflate() and deflate() will process any amount of data correctly.
Yes. inflate() and deflate() will process any amount of data correctly.
Each call of inflate() or deflate() is limited to input and output chunks
of the maximum value that can be stored in the compiler's "unsigned int"
type, but there is no limit to the number of chunks. Note however that the
strm.total_in and strm_total_out counters may be limited to 4 GB. These
type, but there is no limit to the number of chunks. Note however that the
strm.total_in and strm_total_out counters may be limited to 4 GB. These
counters are provided as a convenience and are not used internally by
inflate() or deflate(). The application can easily set up its own counters
inflate() or deflate(). The application can easily set up its own counters
updated after each call of inflate() or deflate() to count beyond 4 GB.
compress() and uncompress() may be limited to 4 GB, since they operate in a
single call. gzseek() and gztell() may be limited to 4 GB depending on how
zlib is compiled. See the zlibCompileFlags() function in zlib.h.
single call. gzseek() and gztell() may be limited to 4 GB depending on how
zlib is compiled. See the zlibCompileFlags() function in zlib.h.
The word "may" appears several times above since there is a 4 GB limit
only if the compiler's "long" type is 32 bits. If the compiler's "long"
type is 64 bits, then the limit is 16 exabytes.
The word "may" appears several times above since there is a 4 GB limit only
if the compiler's "long" type is 32 bits. If the compiler's "long" type is
64 bits, then the limit is 16 exabytes.
33. Does zlib have any security vulnerabilities?
The only one that we are aware of is potentially in gzprintf(). If zlib
is compiled to use sprintf() or vsprintf(), then there is no protection
against a buffer overflow of a 4K string space, other than the caller of
gzprintf() assuring that the output will not exceed 4K. On the other
hand, if zlib is compiled to use snprintf() or vsnprintf(), which should
normally be the case, then there is no vulnerability. The ./configure
script will display warnings if an insecure variation of sprintf() will
be used by gzprintf(). Also the zlibCompileFlags() function will return
information on what variant of sprintf() is used by gzprintf().
The only one that we are aware of is potentially in gzprintf(). If zlib is
compiled to use sprintf() or vsprintf(), then there is no protection
against a buffer overflow of an 8K string space (or other value as set by
gzbuffer()), other than the caller of gzprintf() assuring that the output
will not exceed 8K. On the other hand, if zlib is compiled to use
snprintf() or vsnprintf(), which should normally be the case, then there is
no vulnerability. The ./configure script will display warnings if an
insecure variation of sprintf() will be used by gzprintf(). Also the
zlibCompileFlags() function will return information on what variant of
sprintf() is used by gzprintf().
If you don't have snprintf() or vsnprintf() and would like one, you can
find a portable implementation here:
http://www.ijs.si/software/snprintf/
Note that you should be using the most recent version of zlib. Versions
1.1.3 and before were subject to a double-free vulnerability.
Note that you should be using the most recent version of zlib. Versions
1.1.3 and before were subject to a double-free vulnerability, and versions
1.2.1 and 1.2.2 were subject to an access exception when decompressing
invalid compressed data.
34. Is there a Java version of zlib?
Probably what you want is to use zlib in Java. zlib is already included
as part of the Java SDK in the java.util.zip package. If you really want
a version of zlib written in the Java language, look on the zlib home
page for links: http://www.zlib.org/
page for links: http://zlib.net/ .
35. I get this or that compiler or source-code scanner warning when I crank it
up to maximally-pedantic. Can't you guys write proper code?
Many years ago, we gave up attempting to avoid warnings on every compiler
in the universe. It just got to be a waste of time, and some compilers
were downright silly. So now, we simply make sure that the code always
works.
in the universe. It just got to be a waste of time, and some compilers
were downright silly as well as contradicted each other. So now, we simply
make sure that the code always works.
36. Valgrind (or some similar memory access checker) says that deflate is
performing a conditional jump that depends on an uninitialized value.
Isn't that a bug?
No. That is intentional for performance reasons, and the output of
deflate is not affected. This only started showing up recently since
zlib 1.2.x uses malloc() by default for allocations, whereas earlier
versions used calloc(), which zeros out the allocated memory.
No. That is intentional for performance reasons, and the output of deflate
is not affected. This only started showing up recently since zlib 1.2.x
uses malloc() by default for allocations, whereas earlier versions used
calloc(), which zeros out the allocated memory. Even though the code was
correct, versions 1.2.4 and later was changed to not stimulate these
checkers.
37. Will zlib read the (insert any ancient or arcane format here) compressed
data format?
@ -305,20 +311,21 @@ The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
38. How can I encrypt/decrypt zip files with zlib?
zlib doesn't support encryption. The original PKZIP encryption is very weak
and can be broken with freely available programs. To get strong encryption,
use GnuPG, http://www.gnupg.org/ , which already includes zlib compression.
For PKZIP compatible "encryption", look at http://www.info-zip.org/
zlib doesn't support encryption. The original PKZIP encryption is very
weak and can be broken with freely available programs. To get strong
encryption, use GnuPG, http://www.gnupg.org/ , which already includes zlib
compression. For PKZIP compatible "encryption", look at
http://www.info-zip.org/
39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings?
"gzip" is the gzip format, and "deflate" is the zlib format. They should
probably have called the second one "zlib" instead to avoid confusion
with the raw deflate compressed data format. While the HTTP 1.1 RFC 2616
"gzip" is the gzip format, and "deflate" is the zlib format. They should
probably have called the second one "zlib" instead to avoid confusion with
the raw deflate compressed data format. While the HTTP 1.1 RFC 2616
correctly points to the zlib specification in RFC 1950 for the "deflate"
transfer encoding, there have been reports of servers and browsers that
incorrectly produce or expect raw deflate data per the deflate
specficiation in RFC 1951, most notably Microsoft. So even though the
specficiation in RFC 1951, most notably Microsoft. So even though the
"deflate" transfer encoding using the zlib format would be the more
efficient approach (and in fact exactly what the zlib format was designed
for), using the "gzip" transfer encoding is probably more reliable due to
@ -328,12 +335,32 @@ The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
40. Does zlib support the new "Deflate64" format introduced by PKWare?
No. PKWare has apparently decided to keep that format proprietary, since
they have not documented it as they have previous compression formats.
In any case, the compression improvements are so modest compared to other
more modern approaches, that it's not worth the effort to implement.
No. PKWare has apparently decided to keep that format proprietary, since
they have not documented it as they have previous compression formats. In
any case, the compression improvements are so modest compared to other more
modern approaches, that it's not worth the effort to implement.
41. Can you please sign these lengthy legal documents and fax them back to us
41. I'm having a problem with the zip functions in zlib, can you help?
There are no zip functions in zlib. You are probably using minizip by
Giles Vollant, which is found in the contrib directory of zlib. It is not
part of zlib. In fact none of the stuff in contrib is part of zlib. The
files in there are not supported by the zlib authors. You need to contact
the authors of the respective contribution for help.
42. The match.asm code in contrib is under the GNU General Public License.
Since it's part of zlib, doesn't that mean that all of zlib falls under the
GNU GPL?
No. The files in contrib are not part of zlib. They were contributed by
other authors and are provided as a convenience to the user within the zlib
distribution. Each item in contrib has its own license.
43. Is zlib subject to export controls? What is its ECCN?
zlib is not subject to export controls, and so is classified as EAR99.
44. Can you please sign these lengthy legal documents and fax them back to us
so that we can use your software in our product?
No. Go away. Shoo.

View File

@ -4,21 +4,54 @@
LIB= z
SHLIBDIR?= /lib
SHLIB_MAJOR= 6
MAN= zlib.3
#CFLAGS+= -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7
#CFLAGS+= -g -DDEBUG
#CFLAGS+= -Wall -Wwrite-strings -Wpointer-arith -Wconversion \
# -Wstrict-prototypes -Wmissing-prototypes
#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7
#CFLAGS=-g -DDEBUG
#CFLAGS=-O3 -Wall -Wwrite-strings -Wpointer-arith -Wconversion \
# -Wstrict-prototypes -Wmissing-prototypes
CFLAGS+= -DHAS_snprintf -DHAS_vsnprintf
CFLAGS+= -DHAS_snprintf -DHAS_vsnprintf -I${.CURDIR}
WARNS?= 3
WARNS?= 2
CLEANFILES+= example.o example foo.gz minigzip.o minigzip
SRCS = adler32.c compress.c crc32.c gzio.c uncompr.c deflate.c trees.c \
zutil.c inflate.c inftrees.c inffast.c zopen.c infback.c
SRCS+= adler32.c
SRCS+= compress.c
SRCS+= crc32.c
SRCS+= deflate.c
SRCS+= gzclose.c
SRCS+= gzlib.c
SRCS+= gzread.c
SRCS+= gzwrite.c
SRCS+= infback.c
SRCS+= inflate.c
SRCS+= inftrees.c
SRCS+= trees.c
SRCS+= uncompr.c
SRCS+= zopen.c
SRCS+= zutil.c
.if ${MACHINE_ARCH} == "i386" && defined(MACHINE_CPU)
.if ${MACHINE_CPU:M*i686*}
.PATH: ${.CURDIR}/contrib/asm686
SRCS+= match.S
CFLAGS+= -DASMV -DNO_UNDERLINE
.endif
.endif
.if ${MACHINE_ARCH} == "amd64"
.PATH: ${.CURDIR}/contrib/gcc_gvmat64
.PATH: ${.CURDIR}/contrib/inflate86
SRCS+= gvmat64.S
SRCS+= inffas86.c
CFLAGS+= -DASMV -DASMINF -DNO_UNDERLINE
.else
SRCS+= inffast.c
.endif
INCS= zconf.h zlib.h
minigzip: all minigzip.o

85
README
View File

@ -1,56 +1,51 @@
ZLIB DATA COMPRESSION LIBRARY
zlib 1.2.3 is a general purpose data compression library. All the code is
zlib 1.2.4 is a general purpose data compression library. All the code is
thread safe. The data format used by the zlib library is described by RFCs
(Request for Comments) 1950 to 1952 in the files
http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format)
and rfc1952.txt (gzip format). These documents are also available in other
formats from ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html
and rfc1952.txt (gzip format).
All functions of the compression library are documented in the file zlib.h
(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example
(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example
of the library is given in the file example.c which also tests that the library
is working correctly. Another example is given in the file minigzip.c. The
is working correctly. Another example is given in the file minigzip.c. The
compression library itself is composed of all source files except example.c and
minigzip.c.
To compile all files and run the test program, follow the instructions given at
the top of Makefile. In short "make test; make install" should work for most
machines. For Unix: "./configure; make test; make install". For MSDOS, use one
of the special makefiles such as Makefile.msc. For VMS, use make_vms.com.
the top of Makefile.in. In short "./configure; make test", and if that goes
well, "make install" should work for most flavors of Unix. For Windows, use one
of the special makefiles in win32/ or projects/ . For VMS, use make_vms.com.
Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
<info@winimage.com> for the Windows DLL version. The zlib home page is
http://www.zlib.org or http://www.gzip.org/zlib/ Before reporting a problem,
please check this site to verify that you have the latest version of zlib;
otherwise get the latest version and check whether the problem still exists or
not.
<info@winimage.com> for the Windows DLL version. The zlib home page is
http://zlib.net/ . Before reporting a problem, please check this site to
verify that you have the latest version of zlib; otherwise get the latest
version and check whether the problem still exists or not.
PLEASE read the zlib FAQ http://www.gzip.org/zlib/zlib_faq.html before asking
for help.
PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan. 1997
issue of Dr. Dobb's Journal; a copy of the article is available in
http://dogma.net/markn/articles/zlibtool/zlibtool.htm
Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan. 1997
issue of Dr. Dobb's Journal; a copy of the article is available at
http://marknelson.us/1997/01/01/zlib-engine/ .
The changes made in version 1.2.3 are documented in the file ChangeLog.
The changes made in version 1.2.4 are documented in the file ChangeLog.
Unsupported third party contributions are provided in directory "contrib".
Unsupported third party contributions are provided in directory contrib/ .
A Java implementation of zlib is available in the Java Development Kit
http://java.sun.com/j2se/1.4.2/docs/api/java/util/zip/package-summary.html
See the zlib home page http://www.zlib.org for details.
zlib is available in Java using the java.util.zip package, documented at
http://java.sun.com/developer/technicalArticles/Programming/compression/ .
A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is in the
CPAN (Comprehensive Perl Archive Network) sites
http://www.cpan.org/modules/by-module/Compress/
A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
at CPAN (Comprehensive Perl Archive Network) sites, including
http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
available in Python 1.5 and later versions, see
http://www.python.org/doc/lib/module-zlib.html
http://www.python.org/doc/lib/module-zlib.html .
A zlib binding for TCL written by Andreas Kupries <a.kupries@westend.com> is
availlable at http://www.oche.de/~akupries/soft/trf/trf_zip.html
zlib is built into tcl: http://wiki.tcl.tk/4610 .
An experimental package to read and write files in .zip format, written on top
of zlib by Gilles Vollant <info@winimage.com>, is available in the
@ -74,25 +69,21 @@ Notes for some targets:
- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
other compilers. Use "make test" to check your compiler.
- gzdopen is not supported on RISCOS, BEOS and by some Mac compilers.
- gzdopen is not supported on RISCOS or BEOS.
- For PalmOs, see http://palmzlib.sourceforge.net/
- When building a shared, i.e. dynamic library on Mac OS X, the library must be
installed before testing (do "make install" before "make test"), since the
library location is specified in the library.
Acknowledgments:
The deflate format used by zlib was defined by Phil Katz. The deflate
and zlib specifications were written by L. Peter Deutsch. Thanks to all the
people who reported problems and suggested various improvements in zlib;
they are too numerous to cite here.
The deflate format used by zlib was defined by Phil Katz. The deflate and
zlib specifications were written by L. Peter Deutsch. Thanks to all the
people who reported problems and suggested various improvements in zlib; they
are too numerous to cite here.
Copyright notice:
(C) 1995-2004 Jean-loup Gailly and Mark Adler
(C) 1995-2010 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
@ -113,13 +104,11 @@ Copyright notice:
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
If you use the zlib library in a product, we would appreciate *not*
receiving lengthy legal documents to sign. The sources are provided
for free but without warranty of any kind. The library has been
entirely written by Jean-loup Gailly and Mark Adler; it does not
include third-party code.
If you use the zlib library in a product, we would appreciate *not* receiving
lengthy legal documents to sign. The sources are provided for free but without
warranty of any kind. The library has been entirely written by Jean-loup
Gailly and Mark Adler; it does not include third-party code.
If you redistribute modified sources, we would appreciate that you include
in the file ChangeLog history information documenting your changes. Please
read the FAQ for more information on the distribution of modified source
versions.
If you redistribute modified sources, we would appreciate that you include in
the file ChangeLog history information documenting your changes. Please read
the FAQ for more information on the distribution of modified source versions.

View File

@ -1,12 +1,15 @@
/* adler32.c -- compute the Adler-32 checksum of a data stream
* Copyright (C) 1995-2004 Mark Adler
* Copyright (C) 1995-2007 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#define ZLIB_INTERNAL
#include "zlib.h"
#include "zutil.h"
#define local static
local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2);
#define BASE 65521UL /* largest prime smaller than 65536 */
#define NMAX 5552
@ -125,10 +128,10 @@ uLong ZEXPORT adler32(adler, buf, len)
}
/* ========================================================================= */
uLong ZEXPORT adler32_combine(adler1, adler2, len2)
local uLong adler32_combine_(adler1, adler2, len2)
uLong adler1;
uLong adler2;
z_off_t len2;
z_off64_t len2;
{
unsigned long sum1;
unsigned long sum2;
@ -141,9 +144,26 @@ uLong ZEXPORT adler32_combine(adler1, adler2, len2)
MOD(sum2);
sum1 += (adler2 & 0xffff) + BASE - 1;
sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
if (sum1 > BASE) sum1 -= BASE;
if (sum1 > BASE) sum1 -= BASE;
if (sum2 > (BASE << 1)) sum2 -= (BASE << 1);
if (sum2 > BASE) sum2 -= BASE;
if (sum1 >= BASE) sum1 -= BASE;
if (sum1 >= BASE) sum1 -= BASE;
if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1);
if (sum2 >= BASE) sum2 -= BASE;
return sum1 | (sum2 << 16);
}
/* ========================================================================= */
uLong ZEXPORT adler32_combine(adler1, adler2, len2)
uLong adler1;
uLong adler2;
z_off_t len2;
{
return adler32_combine_(adler1, adler2, len2);
}
uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
uLong adler1;
uLong adler2;
z_off64_t len2;
{
return adler32_combine_(adler1, adler2, len2);
}

View File

@ -1,5 +1,5 @@
/* compress.c -- compress a memory buffer
* Copyright (C) 1995-2003 Jean-loup Gailly.
* Copyright (C) 1995-2005 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -75,5 +75,6 @@ int ZEXPORT compress (dest, destLen, source, sourceLen)
uLong ZEXPORT compressBound (sourceLen)
uLong sourceLen;
{
return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + 11;
return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
(sourceLen >> 25) + 13;
}

77
contrib/README.contrib Normal file
View File

@ -0,0 +1,77 @@
All files under this contrib directory are UNSUPPORTED. There were
provided by users of zlib and were not tested by the authors of zlib.
Use at your own risk. Please contact the authors of the contributions
for help about these, not the zlib authors. Thanks.
ada/ by Dmitriy Anisimkov <anisimkov@yahoo.com>
Support for Ada
See http://zlib-ada.sourceforge.net/
amd64/ by Mikhail Teterin <mi@ALDAN.algebra.com>
asm code for AMD64
See patch at http://www.freebsd.org/cgi/query-pr.cgi?pr=bin/96393
asm686/ by Brian Raiter <breadbox@muppetlabs.com>
asm code for Pentium and PPro/PII, using the AT&T (GNU as) syntax
See http://www.muppetlabs.com/~breadbox/software/assembly.html
blast/ by Mark Adler <madler@alumni.caltech.edu>
Decompressor for output of PKWare Data Compression Library (DCL)
delphi/ by Cosmin Truta <cosmint@cs.ubbcluj.ro>
Support for Delphi and C++ Builder
dotzlib/ by Henrik Ravn <henrik@ravn.com>
Support for Microsoft .Net and Visual C++ .Net
gcc_gvmat64/by Gilles Vollant <info@winimage.com>
GCC Version of x86 64-bit (AMD64 and Intel EM64t) code for x64
assembler to replace longest_match() and inflate_fast()
infback9/ by Mark Adler <madler@alumni.caltech.edu>
Unsupported diffs to infback to decode the deflate64 format
inflate86/ by Chris Anderson <christop@charm.net>
Tuned x86 gcc asm code to replace inflate_fast()
iostream/ by Kevin Ruland <kevin@rodin.wustl.edu>
A C++ I/O streams interface to the zlib gz* functions
iostream2/ by Tyge Løvset <Tyge.Lovset@cmr.no>
Another C++ I/O streams interface
iostream3/ by Ludwig Schwardt <schwardt@sun.ac.za>
and Kevin Ruland <kevin@rodin.wustl.edu>
Yet another C++ I/O streams interface
masmx64/ by Gilles Vollant <info@winimage.com>
x86 64-bit (AMD64 and Intel EM64t) code for x64 assembler to
replace longest_match() and inflate_fast(), also masm x86
64-bits translation of Chris Anderson inflate_fast()
masmx86/ by Gilles Vollant <info@winimage.com>
x86 asm code to replace longest_match() and inflate_fast(),
for Visual C++ and MASM (32 bits).
Based on Brian Raiter (asm686) and Chris Anderson (inflate86)
minizip/ by Gilles Vollant <info@winimage.com>
Mini zip and unzip based on zlib
Includes Zip64 support by Mathias Svensson <mathias@result42.com>
See http://www.winimage.com/zLibDll/unzip.html
pascal/ by Bob Dellaca <bobdl@xtra.co.nz> et al.
Support for Pascal
puff/ by Mark Adler <madler@alumni.caltech.edu>
Small, low memory usage inflate. Also serves to provide an
unambiguous description of the deflate format.
testzlib/ by Gilles Vollant <info@winimage.com>
Example of the use of zlib
untgz/ by Pedro A. Aranda Gutierrez <paag@tid.es>
A very simple tar.gz file extractor using zlib
vstudio/ by Gilles Vollant <info@winimage.com>
Building a minizip-enhanced zlib with Microsoft Visual Studio

51
contrib/asm686/README.686 Normal file
View File

@ -0,0 +1,51 @@
This is a patched version of zlib, modified to use
Pentium-Pro-optimized assembly code in the deflation algorithm. The
files changed/added by this patch are:
README.686
match.S
The speedup that this patch provides varies, depending on whether the
compiler used to build the original version of zlib falls afoul of the
PPro's speed traps. My own tests show a speedup of around 10-20% at
the default compression level, and 20-30% using -9, against a version
compiled using gcc 2.7.2.3. Your mileage may vary.
Note that this code has been tailored for the PPro/PII in particular,
and will not perform particuarly well on a Pentium.
If you are using an assembler other than GNU as, you will have to
translate match.S to use your assembler's syntax. (Have fun.)
Brian Raiter
breadbox@muppetlabs.com
April, 1998
Added for zlib 1.1.3:
The patches come from
http://www.muppetlabs.com/~breadbox/software/assembly.html
To compile zlib with this asm file, copy match.S to the zlib directory
then do:
CFLAGS="-O3 -DASMV" ./configure
make OBJA=match.o
Update:
I've been ignoring these assembly routines for years, believing that
gcc's generated code had caught up with it sometime around gcc 2.95
and the major rearchitecting of the Pentium 4. However, I recently
learned that, despite what I believed, this code still has some life
in it. On the Pentium 4 and AMD64 chips, it continues to run about 8%
faster than the code produced by gcc 4.1.
In acknowledgement of its continuing usefulness, I've altered the
license to match that of the rest of zlib. Share and Enjoy!
Brian Raiter
breadbox@muppetlabs.com
April, 2007

343
contrib/asm686/match.S Normal file
View File

@ -0,0 +1,343 @@
/* match.S -- x86 assembly version of the zlib longest_match() function.
* Optimized for the Intel 686 chips (PPro and later).
*
* Copyright (C) 1998, 2007 Brian Raiter <breadbox@muppetlabs.com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#ifndef NO_UNDERLINE
#define match_init _match_init
#define longest_match _longest_match
#endif
#define MAX_MATCH (258)
#define MIN_MATCH (3)
#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1)
#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7)
/* stack frame offsets */
#define chainlenwmask 0 /* high word: current chain len */
/* low word: s->wmask */
#define window 4 /* local copy of s->window */
#define windowbestlen 8 /* s->window + bestlen */
#define scanstart 16 /* first two bytes of string */
#define scanend 12 /* last two bytes of string */
#define scanalign 20 /* dword-misalignment of string */
#define nicematch 24 /* a good enough match size */
#define bestlen 28 /* size of best match so far */
#define scan 32 /* ptr to string wanting match */
#define LocalVarsSize (36)
/* saved ebx 36 */
/* saved edi 40 */
/* saved esi 44 */
/* saved ebp 48 */
/* return address 52 */
#define deflatestate 56 /* the function arguments */
#define curmatch 60
/* All the +zlib1222add offsets are due to the addition of fields
* in zlib in the deflate_state structure since the asm code was first written
* (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
* (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
* if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
*/
#define zlib1222add (8)
#define dsWSize (36+zlib1222add)
#define dsWMask (44+zlib1222add)
#define dsWindow (48+zlib1222add)
#define dsPrev (56+zlib1222add)
#define dsMatchLen (88+zlib1222add)
#define dsPrevMatch (92+zlib1222add)
#define dsStrStart (100+zlib1222add)
#define dsMatchStart (104+zlib1222add)
#define dsLookahead (108+zlib1222add)
#define dsPrevLen (112+zlib1222add)
#define dsMaxChainLen (116+zlib1222add)
#define dsGoodMatch (132+zlib1222add)
#define dsNiceMatch (136+zlib1222add)
.file "match.S"
.globl match_init, longest_match
.text
/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */
longest_match:
/* Save registers that the compiler may be using, and adjust %esp to */
/* make room for our stack frame. */
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $LocalVarsSize, %esp
/* Retrieve the function arguments. %ecx will hold cur_match */
/* throughout the entire function. %edx will hold the pointer to the */
/* deflate_state structure during the function's setup (before */
/* entering the main loop). */
movl deflatestate(%esp), %edx
movl curmatch(%esp), %ecx
/* uInt wmask = s->w_mask; */
/* unsigned chain_length = s->max_chain_length; */
/* if (s->prev_length >= s->good_match) { */
/* chain_length >>= 2; */
/* } */
movl dsPrevLen(%edx), %eax
movl dsGoodMatch(%edx), %ebx
cmpl %ebx, %eax
movl dsWMask(%edx), %eax
movl dsMaxChainLen(%edx), %ebx
jl LastMatchGood
shrl $2, %ebx
LastMatchGood:
/* chainlen is decremented once beforehand so that the function can */
/* use the sign flag instead of the zero flag for the exit test. */
/* It is then shifted into the high word, to make room for the wmask */
/* value, which it will always accompany. */
decl %ebx
shll $16, %ebx
orl %eax, %ebx
movl %ebx, chainlenwmask(%esp)
/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */
movl dsNiceMatch(%edx), %eax
movl dsLookahead(%edx), %ebx
cmpl %eax, %ebx
jl LookaheadLess
movl %eax, %ebx
LookaheadLess: movl %ebx, nicematch(%esp)
/* register Bytef *scan = s->window + s->strstart; */
movl dsWindow(%edx), %esi
movl %esi, window(%esp)
movl dsStrStart(%edx), %ebp
lea (%esi,%ebp), %edi
movl %edi, scan(%esp)
/* Determine how many bytes the scan ptr is off from being */
/* dword-aligned. */
movl %edi, %eax
negl %eax
andl $3, %eax
movl %eax, scanalign(%esp)
/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */
/* s->strstart - (IPos)MAX_DIST(s) : NIL; */
movl dsWSize(%edx), %eax
subl $MIN_LOOKAHEAD, %eax
subl %eax, %ebp
jg LimitPositive
xorl %ebp, %ebp
LimitPositive:
/* int best_len = s->prev_length; */
movl dsPrevLen(%edx), %eax
movl %eax, bestlen(%esp)
/* Store the sum of s->window + best_len in %esi locally, and in %esi. */
addl %eax, %esi
movl %esi, windowbestlen(%esp)
/* register ush scan_start = *(ushf*)scan; */
/* register ush scan_end = *(ushf*)(scan+best_len-1); */
/* Posf *prev = s->prev; */
movzwl (%edi), %ebx
movl %ebx, scanstart(%esp)
movzwl -1(%edi,%eax), %ebx
movl %ebx, scanend(%esp)
movl dsPrev(%edx), %edi
/* Jump into the main loop. */
movl chainlenwmask(%esp), %edx
jmp LoopEntry
.balign 16
/* do {
* match = s->window + cur_match;
* if (*(ushf*)(match+best_len-1) != scan_end ||
* *(ushf*)match != scan_start) continue;
* [...]
* } while ((cur_match = prev[cur_match & wmask]) > limit
* && --chain_length != 0);
*
* Here is the inner loop of the function. The function will spend the
* majority of its time in this loop, and majority of that time will
* be spent in the first ten instructions.
*
* Within this loop:
* %ebx = scanend
* %ecx = curmatch
* %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
* %esi = windowbestlen - i.e., (window + bestlen)
* %edi = prev
* %ebp = limit
*/
LookupLoop:
andl %edx, %ecx
movzwl (%edi,%ecx,2), %ecx
cmpl %ebp, %ecx
jbe LeaveNow
subl $0x00010000, %edx
js LeaveNow
LoopEntry: movzwl -1(%esi,%ecx), %eax
cmpl %ebx, %eax
jnz LookupLoop
movl window(%esp), %eax
movzwl (%eax,%ecx), %eax
cmpl scanstart(%esp), %eax
jnz LookupLoop
/* Store the current value of chainlen. */
movl %edx, chainlenwmask(%esp)
/* Point %edi to the string under scrutiny, and %esi to the string we */
/* are hoping to match it up with. In actuality, %esi and %edi are */
/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */
/* initialized to -(MAX_MATCH_8 - scanalign). */
movl window(%esp), %esi
movl scan(%esp), %edi
addl %ecx, %esi
movl scanalign(%esp), %eax
movl $(-MAX_MATCH_8), %edx
lea MAX_MATCH_8(%edi,%eax), %edi
lea MAX_MATCH_8(%esi,%eax), %esi
/* Test the strings for equality, 8 bytes at a time. At the end,
* adjust %edx so that it is offset to the exact byte that mismatched.
*
* We already know at this point that the first three bytes of the
* strings match each other, and they can be safely passed over before
* starting the compare loop. So what this code does is skip over 0-3
* bytes, as much as necessary in order to dword-align the %edi
* pointer. (%esi will still be misaligned three times out of four.)
*
* It should be confessed that this loop usually does not represent
* much of the total running time. Replacing it with a more
* straightforward "rep cmpsb" would not drastically degrade
* performance.
*/
LoopCmps:
movl (%esi,%edx), %eax
xorl (%edi,%edx), %eax
jnz LeaveLoopCmps
movl 4(%esi,%edx), %eax
xorl 4(%edi,%edx), %eax
jnz LeaveLoopCmps4
addl $8, %edx
jnz LoopCmps
jmp LenMaximum
LeaveLoopCmps4: addl $4, %edx
LeaveLoopCmps: testl $0x0000FFFF, %eax
jnz LenLower
addl $2, %edx
shrl $16, %eax
LenLower: subb $1, %al
adcl $0, %edx
/* Calculate the length of the match. If it is longer than MAX_MATCH, */
/* then automatically accept it as the best possible match and leave. */
lea (%edi,%edx), %eax
movl scan(%esp), %edi
subl %edi, %eax
cmpl $MAX_MATCH, %eax
jge LenMaximum
/* If the length of the match is not longer than the best match we */
/* have so far, then forget it and return to the lookup loop. */
movl deflatestate(%esp), %edx
movl bestlen(%esp), %ebx
cmpl %ebx, %eax
jg LongerMatch
movl windowbestlen(%esp), %esi
movl dsPrev(%edx), %edi
movl scanend(%esp), %ebx
movl chainlenwmask(%esp), %edx
jmp LookupLoop
/* s->match_start = cur_match; */
/* best_len = len; */
/* if (len >= nice_match) break; */
/* scan_end = *(ushf*)(scan+best_len-1); */
LongerMatch: movl nicematch(%esp), %ebx
movl %eax, bestlen(%esp)
movl %ecx, dsMatchStart(%edx)
cmpl %ebx, %eax
jge LeaveNow
movl window(%esp), %esi
addl %eax, %esi
movl %esi, windowbestlen(%esp)
movzwl -1(%edi,%eax), %ebx
movl dsPrev(%edx), %edi
movl %ebx, scanend(%esp)
movl chainlenwmask(%esp), %edx
jmp LookupLoop
/* Accept the current string, with the maximum possible length. */
LenMaximum: movl deflatestate(%esp), %edx
movl $MAX_MATCH, bestlen(%esp)
movl %ecx, dsMatchStart(%edx)
/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */
/* return s->lookahead; */
LeaveNow:
movl deflatestate(%esp), %edx
movl bestlen(%esp), %ebx
movl dsLookahead(%edx), %eax
cmpl %eax, %ebx
jg LookaheadRet
movl %ebx, %eax
LookaheadRet:
/* Restore the stack and return from whence we came. */
addl $LocalVarsSize, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
match_init: ret

View File

@ -0,0 +1,574 @@
/*
;uInt longest_match_x64(
; deflate_state *s,
; IPos cur_match); // current match
; gvmat64.S -- Asm portion of the optimized longest_match for 32 bits x86_64
; (AMD64 on Athlon 64, Opteron, Phenom
; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7)
; this file is translation from gvmat64.asm to GCC 4.x (for Linux, Mac XCode)
; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
;
; File written by Gilles Vollant, by converting to assembly the longest_match
; from Jean-loup Gailly in deflate.c of zLib and infoZip zip.
; and by taking inspiration on asm686 with masm, optimised assembly code
; from Brian Raiter, written 1998
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software
; 3. This notice may not be removed or altered from any source distribution.
;
; http://www.zlib.net
; http://www.winimage.com/zLibDll
; http://www.muppetlabs.com/~breadbox/software/assembly.html
;
; to compile this file for zLib, I use option:
; gcc -c -arch x86_64 gvmat64.S
;uInt longest_match(s, cur_match)
; deflate_state *s;
; IPos cur_match; // current match /
;
; with XCode for Mac, I had strange error with some jump on intel syntax
; this is why BEFORE_JMP and AFTER_JMP are used
*/
#define BEFORE_JMP .att_syntax
#define AFTER_JMP .intel_syntax noprefix
#ifndef NO_UNDERLINE
# define match_init _match_init
# define longest_match _longest_match
#endif
.intel_syntax noprefix
.globl match_init, longest_match
.text
longest_match:
#define LocalVarsSize 96
/*
; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12
; free register : r14,r15
; register can be saved : rsp
*/
#define chainlenwmask (rsp + 8 - LocalVarsSize)
#define nicematch (rsp + 16 - LocalVarsSize)
#define save_rdi (rsp + 24 - LocalVarsSize)
#define save_rsi (rsp + 32 - LocalVarsSize)
#define save_rbx (rsp + 40 - LocalVarsSize)
#define save_rbp (rsp + 48 - LocalVarsSize)
#define save_r12 (rsp + 56 - LocalVarsSize)
#define save_r13 (rsp + 64 - LocalVarsSize)
#define save_r14 (rsp + 72 - LocalVarsSize)
#define save_r15 (rsp + 80 - LocalVarsSize)
/*
; all the +4 offsets are due to the addition of pending_buf_size (in zlib
; in the deflate_state structure since the asm code was first written
; (if you compile with zlib 1.0.4 or older, remove the +4).
; Note : these value are good with a 8 bytes boundary pack structure
*/
#define MAX_MATCH 258
#define MIN_MATCH 3
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
/*
;;; Offsets for fields in the deflate_state structure. These numbers
;;; are calculated from the definition of deflate_state, with the
;;; assumption that the compiler will dword-align the fields. (Thus,
;;; changing the definition of deflate_state could easily cause this
;;; program to crash horribly, without so much as a warning at
;;; compile time. Sigh.)
; all the +zlib1222add offsets are due to the addition of fields
; in zlib in the deflate_state structure since the asm code was first written
; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
*/
/* you can check the structure offset by running
#include <stdlib.h>
#include <stdio.h>
#include "deflate.h"
void print_depl()
{
deflate_state ds;
deflate_state *s=&ds;
printf("size pointer=%u\n",(int)sizeof(void*));
printf("#define dsWSize %u\n",(int)(((char*)&(s->w_size))-((char*)s)));
printf("#define dsWMask %u\n",(int)(((char*)&(s->w_mask))-((char*)s)));
printf("#define dsWindow %u\n",(int)(((char*)&(s->window))-((char*)s)));
printf("#define dsPrev %u\n",(int)(((char*)&(s->prev))-((char*)s)));
printf("#define dsMatchLen %u\n",(int)(((char*)&(s->match_length))-((char*)s)));
printf("#define dsPrevMatch %u\n",(int)(((char*)&(s->prev_match))-((char*)s)));
printf("#define dsStrStart %u\n",(int)(((char*)&(s->strstart))-((char*)s)));
printf("#define dsMatchStart %u\n",(int)(((char*)&(s->match_start))-((char*)s)));
printf("#define dsLookahead %u\n",(int)(((char*)&(s->lookahead))-((char*)s)));
printf("#define dsPrevLen %u\n",(int)(((char*)&(s->prev_length))-((char*)s)));
printf("#define dsMaxChainLen %u\n",(int)(((char*)&(s->max_chain_length))-((char*)s)));
printf("#define dsGoodMatch %u\n",(int)(((char*)&(s->good_match))-((char*)s)));
printf("#define dsNiceMatch %u\n",(int)(((char*)&(s->nice_match))-((char*)s)));
}
*/
#define dsWSize 68
#define dsWMask 76
#define dsWindow 80
#define dsPrev 96
#define dsMatchLen 144
#define dsPrevMatch 148
#define dsStrStart 156
#define dsMatchStart 160
#define dsLookahead 164
#define dsPrevLen 168
#define dsMaxChainLen 172
#define dsGoodMatch 188
#define dsNiceMatch 192
#define window_size [ rcx + dsWSize]
#define WMask [ rcx + dsWMask]
#define window_ad [ rcx + dsWindow]
#define prev_ad [ rcx + dsPrev]
#define strstart [ rcx + dsStrStart]
#define match_start [ rcx + dsMatchStart]
#define Lookahead [ rcx + dsLookahead] //; 0ffffffffh on infozip
#define prev_length [ rcx + dsPrevLen]
#define max_chain_length [ rcx + dsMaxChainLen]
#define good_match [ rcx + dsGoodMatch]
#define nice_match [ rcx + dsNiceMatch]
/*
; windows:
; parameter 1 in rcx(deflate state s), param 2 in rdx (cur match)
; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and
; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp
;
; All registers must be preserved across the call, except for
; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch.
;
; gcc on macosx-linux:
; see http://www.x86-64.org/documentation/abi-0.99.pdf
; param 1 in rdi, param 2 in rsi
; rbx, rsp, rbp, r12 to r15 must be preserved
;;; Save registers that the compiler may be using, and adjust esp to
;;; make room for our stack frame.
;;; Retrieve the function arguments. r8d will hold cur_match
;;; throughout the entire function. edx will hold the pointer to the
;;; deflate_state structure during the function's setup (before
;;; entering the main loop.
; ms: parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match)
; mac: param 1 in rdi, param 2 rsi
; this clear high 32 bits of r8, which can be garbage in both r8 and rdx
*/
mov [save_rbx],rbx
mov [save_rbp],rbp
mov rcx,rdi
mov r8d,esi
mov [save_r12],r12
mov [save_r13],r13
mov [save_r14],r14
mov [save_r15],r15
//;;; uInt wmask = s->w_mask;
//;;; unsigned chain_length = s->max_chain_length;
//;;; if (s->prev_length >= s->good_match) {
//;;; chain_length >>= 2;
//;;; }
mov edi, prev_length
mov esi, good_match
mov eax, WMask
mov ebx, max_chain_length
cmp edi, esi
jl LastMatchGood
shr ebx, 2
LastMatchGood:
//;;; chainlen is decremented once beforehand so that the function can
//;;; use the sign flag instead of the zero flag for the exit test.
//;;; It is then shifted into the high word, to make room for the wmask
//;;; value, which it will always accompany.
dec ebx
shl ebx, 16
or ebx, eax
//;;; on zlib only
//;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
mov eax, nice_match
mov [chainlenwmask], ebx
mov r10d, Lookahead
cmp r10d, eax
cmovnl r10d, eax
mov [nicematch],r10d
//;;; register Bytef *scan = s->window + s->strstart;
mov r10, window_ad
mov ebp, strstart
lea r13, [r10 + rbp]
//;;; Determine how many bytes the scan ptr is off from being
//;;; dword-aligned.
mov r9,r13
neg r13
and r13,3
//;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
//;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
mov eax, window_size
sub eax, MIN_LOOKAHEAD
xor edi,edi
sub ebp, eax
mov r11d, prev_length
cmovng ebp,edi
//;;; int best_len = s->prev_length;
//;;; Store the sum of s->window + best_len in esi locally, and in esi.
lea rsi,[r10+r11]
//;;; register ush scan_start = *(ushf*)scan;
//;;; register ush scan_end = *(ushf*)(scan+best_len-1);
//;;; Posf *prev = s->prev;
movzx r12d,word ptr [r9]
movzx ebx, word ptr [r9 + r11 - 1]
mov rdi, prev_ad
//;;; Jump into the main loop.
mov edx, [chainlenwmask]
cmp bx,word ptr [rsi + r8 - 1]
jz LookupLoopIsZero
LookupLoop1:
and r8d, edx
movzx r8d, word ptr [rdi + r8*2]
cmp r8d, ebp
jbe LeaveNow
sub edx, 0x00010000
BEFORE_JMP
js LeaveNow
AFTER_JMP
LoopEntry1:
cmp bx,word ptr [rsi + r8 - 1]
BEFORE_JMP
jz LookupLoopIsZero
AFTER_JMP
LookupLoop2:
and r8d, edx
movzx r8d, word ptr [rdi + r8*2]
cmp r8d, ebp
BEFORE_JMP
jbe LeaveNow
AFTER_JMP
sub edx, 0x00010000
BEFORE_JMP
js LeaveNow
AFTER_JMP
LoopEntry2:
cmp bx,word ptr [rsi + r8 - 1]
BEFORE_JMP
jz LookupLoopIsZero
AFTER_JMP
LookupLoop4:
and r8d, edx
movzx r8d, word ptr [rdi + r8*2]
cmp r8d, ebp
BEFORE_JMP
jbe LeaveNow
AFTER_JMP
sub edx, 0x00010000
BEFORE_JMP
js LeaveNow
AFTER_JMP
LoopEntry4:
cmp bx,word ptr [rsi + r8 - 1]
BEFORE_JMP
jnz LookupLoop1
jmp LookupLoopIsZero
AFTER_JMP
/*
;;; do {
;;; match = s->window + cur_match;
;;; if (*(ushf*)(match+best_len-1) != scan_end ||
;;; *(ushf*)match != scan_start) continue;
;;; [...]
;;; } while ((cur_match = prev[cur_match & wmask]) > limit
;;; && --chain_length != 0);
;;;
;;; Here is the inner loop of the function. The function will spend the
;;; majority of its time in this loop, and majority of that time will
;;; be spent in the first ten instructions.
;;;
;;; Within this loop:
;;; ebx = scanend
;;; r8d = curmatch
;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
;;; esi = windowbestlen - i.e., (window + bestlen)
;;; edi = prev
;;; ebp = limit
*/
.balign 16
LookupLoop:
and r8d, edx
movzx r8d, word ptr [rdi + r8*2]
cmp r8d, ebp
BEFORE_JMP
jbe LeaveNow
AFTER_JMP
sub edx, 0x00010000
BEFORE_JMP
js LeaveNow
AFTER_JMP
LoopEntry:
cmp bx,word ptr [rsi + r8 - 1]
BEFORE_JMP
jnz LookupLoop1
AFTER_JMP
LookupLoopIsZero:
cmp r12w, word ptr [r10 + r8]
BEFORE_JMP
jnz LookupLoop1
AFTER_JMP
//;;; Store the current value of chainlen.
mov [chainlenwmask], edx
/*
;;; Point edi to the string under scrutiny, and esi to the string we
;;; are hoping to match it up with. In actuality, esi and edi are
;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
;;; initialized to -(MAX_MATCH_8 - scanalign).
*/
lea rsi,[r8+r10]
mov rdx, 0xfffffffffffffef8 //; -(MAX_MATCH_8)
lea rsi, [rsi + r13 + 0x0108] //;MAX_MATCH_8]
lea rdi, [r9 + r13 + 0x0108] //;MAX_MATCH_8]
prefetcht1 [rsi+rdx]
prefetcht1 [rdi+rdx]
/*
;;; Test the strings for equality, 8 bytes at a time. At the end,
;;; adjust rdx so that it is offset to the exact byte that mismatched.
;;;
;;; We already know at this point that the first three bytes of the
;;; strings match each other, and they can be safely passed over before
;;; starting the compare loop. So what this code does is skip over 0-3
;;; bytes, as much as necessary in order to dword-align the edi
;;; pointer. (rsi will still be misaligned three times out of four.)
;;;
;;; It should be confessed that this loop usually does not represent
;;; much of the total running time. Replacing it with a more
;;; straightforward "rep cmpsb" would not drastically degrade
;;; performance.
*/
LoopCmps:
mov rax, [rsi + rdx]
xor rax, [rdi + rdx]
jnz LeaveLoopCmps
mov rax, [rsi + rdx + 8]
xor rax, [rdi + rdx + 8]
jnz LeaveLoopCmps8
mov rax, [rsi + rdx + 8+8]
xor rax, [rdi + rdx + 8+8]
jnz LeaveLoopCmps16
add rdx,8+8+8
BEFORE_JMP
jnz LoopCmps
jmp LenMaximum
AFTER_JMP
LeaveLoopCmps16: add rdx,8
LeaveLoopCmps8: add rdx,8
LeaveLoopCmps:
test eax, 0x0000FFFF
jnz LenLower
test eax,0xffffffff
jnz LenLower32
add rdx,4
shr rax,32
or ax,ax
BEFORE_JMP
jnz LenLower
AFTER_JMP
LenLower32:
shr eax,16
add rdx,2
LenLower:
sub al, 1
adc rdx, 0
//;;; Calculate the length of the match. If it is longer than MAX_MATCH,
//;;; then automatically accept it as the best possible match and leave.
lea rax, [rdi + rdx]
sub rax, r9
cmp eax, MAX_MATCH
BEFORE_JMP
jge LenMaximum
AFTER_JMP
/*
;;; If the length of the match is not longer than the best match we
;;; have so far, then forget it and return to the lookup loop.
;///////////////////////////////////
*/
cmp eax, r11d
jg LongerMatch
lea rsi,[r10+r11]
mov rdi, prev_ad
mov edx, [chainlenwmask]
BEFORE_JMP
jmp LookupLoop
AFTER_JMP
/*
;;; s->match_start = cur_match;
;;; best_len = len;
;;; if (len >= nice_match) break;
;;; scan_end = *(ushf*)(scan+best_len-1);
*/
LongerMatch:
mov r11d, eax
mov match_start, r8d
cmp eax, [nicematch]
BEFORE_JMP
jge LeaveNow
AFTER_JMP
lea rsi,[r10+rax]
movzx ebx, word ptr [r9 + rax - 1]
mov rdi, prev_ad
mov edx, [chainlenwmask]
BEFORE_JMP
jmp LookupLoop
AFTER_JMP
//;;; Accept the current string, with the maximum possible length.
LenMaximum:
mov r11d,MAX_MATCH
mov match_start, r8d
//;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
//;;; return s->lookahead;
LeaveNow:
mov eax, Lookahead
cmp r11d, eax
cmovng eax, r11d
//;;; Restore the stack and return from whence we came.
// mov rsi,[save_rsi]
// mov rdi,[save_rdi]
mov rbx,[save_rbx]
mov rbp,[save_rbp]
mov r12,[save_r12]
mov r13,[save_r13]
mov r14,[save_r14]
mov r15,[save_r15]
ret 0
//; please don't remove this string !
//; Your can freely use gvmat64 in any free or commercial app
//; but it is far better don't remove the string in the binary!
// db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0
match_init:
ret 0

1157
contrib/inflate86/inffas86.c Normal file

File diff suppressed because it is too large Load Diff

1368
contrib/inflate86/inffast.S Normal file

File diff suppressed because it is too large Load Diff

33
crc32.c
View File

@ -1,5 +1,5 @@
/* crc32.c -- compute the CRC-32 of a data stream
* Copyright (C) 1995-2005 Mark Adler
* Copyright (C) 1995-2006 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*
* Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
@ -53,7 +53,7 @@
/* Definitions for doing the crc four data bytes at a time. */
#ifdef BYFOUR
# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \
# define REV(w) ((((w)>>24)&0xff)+(((w)>>8)&0xff00)+ \
(((w)&0xff00)<<8)+(((w)&0xff)<<24))
local unsigned long crc32_little OF((unsigned long,
const unsigned char FAR *, unsigned));
@ -68,6 +68,8 @@
local unsigned long gf2_matrix_times OF((unsigned long *mat,
unsigned long vec));
local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
local uLong crc32_combine_(uLong crc1, uLong crc2, z_off64_t len2);
#ifdef DYNAMIC_CRC_TABLE
@ -367,22 +369,22 @@ local void gf2_matrix_square(square, mat)
}
/* ========================================================================= */
uLong ZEXPORT crc32_combine(crc1, crc2, len2)
local uLong crc32_combine_(crc1, crc2, len2)
uLong crc1;
uLong crc2;
z_off_t len2;
z_off64_t len2;
{
int n;
unsigned long row;
unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */
unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */
/* degenerate case */
if (len2 == 0)
/* degenerate case (also disallow negative lengths) */
if (len2 <= 0)
return crc1;
/* put operator for one zero bit in odd */
odd[0] = 0xedb88320L; /* CRC-32 polynomial */
odd[0] = 0xedb88320UL; /* CRC-32 polynomial */
row = 1;
for (n = 1; n < GF2_DIM; n++) {
odd[n] = row;
@ -421,3 +423,20 @@ uLong ZEXPORT crc32_combine(crc1, crc2, len2)
crc1 ^= crc2;
return crc1;
}
/* ========================================================================= */
uLong ZEXPORT crc32_combine(crc1, crc2, len2)
uLong crc1;
uLong crc2;
z_off_t len2;
{
return crc32_combine_(crc1, crc2, len2);
}
uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
uLong crc1;
uLong crc2;
z_off64_t len2;
{
return crc32_combine_(crc1, crc2, len2);
}

268
deflate.c
View File

@ -1,5 +1,5 @@
/* deflate.c -- compress data using the deflation algorithm
* Copyright (C) 1995-2005 Jean-loup Gailly.
* Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -52,7 +52,7 @@
#include "deflate.h"
const char deflate_copyright[] =
" deflate 1.2.3 Copyright 1995-2005 Jean-loup Gailly ";
" deflate 1.2.4 Copyright 1995-2010 Jean-loup Gailly and Mark Adler ";
/*
If you use the zlib library in a product, an acknowledgment is welcome
in the documentation of your product. If for some reason you cannot
@ -79,19 +79,18 @@ local block_state deflate_fast OF((deflate_state *s, int flush));
#ifndef FASTEST
local block_state deflate_slow OF((deflate_state *s, int flush));
#endif
local block_state deflate_rle OF((deflate_state *s, int flush));
local block_state deflate_huff OF((deflate_state *s, int flush));
local void lm_init OF((deflate_state *s));
local void putShortMSB OF((deflate_state *s, uInt b));
local void flush_pending OF((z_streamp strm));
local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
#ifndef FASTEST
#ifdef ASMV
void match_init OF((void)); /* asm code initialization */
uInt longest_match OF((deflate_state *s, IPos cur_match));
#else
local uInt longest_match OF((deflate_state *s, IPos cur_match));
#endif
#endif
local uInt longest_match_fast OF((deflate_state *s, IPos cur_match));
#ifdef DEBUG
local void check_match OF((deflate_state *s, IPos start, IPos match,
@ -110,11 +109,6 @@ local void check_match OF((deflate_state *s, IPos start, IPos match,
#endif
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
/* Minimum amount of lookahead, except at the end of the input file.
* See deflate.c for comments about the MIN_MATCH+1.
*/
/* Values for max_lazy_match, good_match and max_chain_length, depending on
* the desired pack level (0..9). The values given below have been tuned to
* exclude worst case performance for pathological files. Better values may be
@ -288,6 +282,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos));
s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos));
s->high_water = 0; /* nothing written to s->window yet */
s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
@ -332,8 +328,8 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
strm->adler = adler32(strm->adler, dictionary, dictLength);
if (length < MIN_MATCH) return Z_OK;
if (length > MAX_DIST(s)) {
length = MAX_DIST(s);
if (length > s->w_size) {
length = s->w_size;
dictionary += dictLength - length; /* use the tail of the dictionary */
}
zmemcpy(s->window, dictionary, length);
@ -435,9 +431,10 @@ int ZEXPORT deflateParams(strm, level, strategy)
}
func = configuration_table[s->level].func;
if (func != configuration_table[level].func && strm->total_in != 0) {
if ((strategy != s->strategy || func != configuration_table[level].func) &&
strm->total_in != 0) {
/* Flush the last buffer: */
err = deflate(strm, Z_PARTIAL_FLUSH);
err = deflate(strm, Z_BLOCK);
}
if (s->level != level) {
s->level = level;
@ -481,33 +478,66 @@ int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
* resulting from using fixed blocks instead of stored blocks, which deflate
* can emit on compressed data for some combinations of the parameters.
*
* This function could be more sophisticated to provide closer upper bounds
* for every combination of windowBits and memLevel, as well as wrap.
* But even the conservative upper bound of about 14% expansion does not
* seem onerous for output buffer allocation.
* This function could be more sophisticated to provide closer upper bounds for
* every combination of windowBits and memLevel. But even the conservative
* upper bound of about 14% expansion does not seem onerous for output buffer
* allocation.
*/
uLong ZEXPORT deflateBound(strm, sourceLen)
z_streamp strm;
uLong sourceLen;
{
deflate_state *s;
uLong destLen;
uLong complen, wraplen;
Bytef *str;
/* conservative upper bound */
destLen = sourceLen +
((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 11;
/* conservative upper bound for compressed data */
complen = sourceLen +
((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
/* if can't get parameters, return conservative bound */
/* if can't get parameters, return conservative bound plus zlib wrapper */
if (strm == Z_NULL || strm->state == Z_NULL)
return destLen;
return complen + 6;
/* compute wrapper length */
s = strm->state;
switch (s->wrap) {
case 0: /* raw deflate */
wraplen = 0;
break;
case 1: /* zlib wrapper */
wraplen = 6 + (s->strstart ? 4 : 0);
break;
case 2: /* gzip wrapper */
wraplen = 18;
if (s->gzhead != Z_NULL) { /* user-supplied gzip header */
if (s->gzhead->extra != Z_NULL)
wraplen += 2 + s->gzhead->extra_len;
str = s->gzhead->name;
if (str != Z_NULL)
do {
wraplen++;
} while (*str++);
str = s->gzhead->comment;
if (str != Z_NULL)
do {
wraplen++;
} while (*str++);
if (s->gzhead->hcrc)
wraplen += 2;
}
break;
default: /* for compiler happiness */
wraplen = 6;
}
/* if not default parameters, return conservative bound */
s = strm->state;
if (s->w_bits != 15 || s->hash_bits != 8 + 7)
return destLen;
return complen + wraplen;
/* default settings: return tight bound for that case */
return compressBound(sourceLen);
return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
(sourceLen >> 25) + 13 - 6 + wraplen;
}
/* =========================================================================
@ -557,7 +587,7 @@ int ZEXPORT deflate (strm, flush)
deflate_state *s;
if (strm == Z_NULL || strm->state == Z_NULL ||
flush > Z_FINISH || flush < 0) {
flush > Z_BLOCK || flush < 0) {
return Z_STREAM_ERROR;
}
s = strm->state;
@ -581,7 +611,7 @@ int ZEXPORT deflate (strm, flush)
put_byte(s, 31);
put_byte(s, 139);
put_byte(s, 8);
if (s->gzhead == NULL) {
if (s->gzhead == Z_NULL) {
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
@ -608,7 +638,7 @@ int ZEXPORT deflate (strm, flush)
(s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
4 : 0));
put_byte(s, s->gzhead->os & 0xff);
if (s->gzhead->extra != NULL) {
if (s->gzhead->extra != Z_NULL) {
put_byte(s, s->gzhead->extra_len & 0xff);
put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
}
@ -650,7 +680,7 @@ int ZEXPORT deflate (strm, flush)
}
#ifdef GZIP
if (s->status == EXTRA_STATE) {
if (s->gzhead->extra != NULL) {
if (s->gzhead->extra != Z_NULL) {
uInt beg = s->pending; /* start of bytes to update crc */
while (s->gzindex < (s->gzhead->extra_len & 0xffff)) {
@ -678,7 +708,7 @@ int ZEXPORT deflate (strm, flush)
s->status = NAME_STATE;
}
if (s->status == NAME_STATE) {
if (s->gzhead->name != NULL) {
if (s->gzhead->name != Z_NULL) {
uInt beg = s->pending; /* start of bytes to update crc */
int val;
@ -709,7 +739,7 @@ int ZEXPORT deflate (strm, flush)
s->status = COMMENT_STATE;
}
if (s->status == COMMENT_STATE) {
if (s->gzhead->comment != NULL) {
if (s->gzhead->comment != Z_NULL) {
uInt beg = s->pending; /* start of bytes to update crc */
int val;
@ -787,7 +817,9 @@ int ZEXPORT deflate (strm, flush)
(flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
block_state bstate;
bstate = (*(configuration_table[s->level].func))(s, flush);
bstate = s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
(s->strategy == Z_RLE ? deflate_rle(s, flush) :
(*(configuration_table[s->level].func))(s, flush));
if (bstate == finish_started || bstate == finish_done) {
s->status = FINISH_STATE;
@ -808,13 +840,17 @@ int ZEXPORT deflate (strm, flush)
if (bstate == block_done) {
if (flush == Z_PARTIAL_FLUSH) {
_tr_align(s);
} else { /* FULL_FLUSH or SYNC_FLUSH */
} else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */
_tr_stored_block(s, (char*)0, 0L, 0);
/* For a full flush, this empty block will be recognized
* as a special marker by inflate_sync().
*/
if (flush == Z_FULL_FLUSH) {
CLEAR_HASH(s); /* forget history */
if (s->lookahead == 0) {
s->strstart = 0;
s->block_start = 0L;
}
}
}
flush_pending(strm);
@ -1167,12 +1203,13 @@ local uInt longest_match(s, cur_match)
return s->lookahead;
}
#endif /* ASMV */
#endif /* FASTEST */
#else /* FASTEST */
/* ---------------------------------------------------------------------------
* Optimized version for level == 1 or strategy == Z_RLE only
* Optimized version for FASTEST only
*/
local uInt longest_match_fast(s, cur_match)
local uInt longest_match(s, cur_match)
deflate_state *s;
IPos cur_match; /* current match */
{
@ -1225,6 +1262,8 @@ local uInt longest_match_fast(s, cur_match)
return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead;
}
#endif /* FASTEST */
#ifdef DEBUG
/* ===========================================================================
* Check that the match at match_start is indeed a match.
@ -1303,7 +1342,6 @@ local void fill_window(s)
later. (Using level 0 permanently is not an optimal usage of
zlib, so we don't care about this pathological case.)
*/
/* %%% avoid this when Z_RLE */
n = s->hash_size;
p = &s->head[n];
do {
@ -1355,27 +1393,61 @@ local void fill_window(s)
*/
} while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
/* If the WIN_INIT bytes after the end of the current data have never been
* written, then zero those bytes in order to avoid memory check reports of
* the use of uninitialized (or uninitialised as Julian writes) bytes by
* the longest match routines. Update the high water mark for the next
* time through here. WIN_INIT is set to MAX_MATCH since the longest match
* routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
*/
if (s->high_water < s->window_size) {
ulg curr = s->strstart + (ulg)(s->lookahead);
ulg init;
if (s->high_water < curr) {
/* Previous high water mark below current data -- zero WIN_INIT
* bytes or up to end of window, whichever is less.
*/
init = s->window_size - curr;
if (init > WIN_INIT)
init = WIN_INIT;
zmemzero(s->window + curr, (unsigned)init);
s->high_water = curr + init;
}
else if (s->high_water < (ulg)curr + WIN_INIT) {
/* High water mark at or above current data, but below current data
* plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
* to end of window, whichever is less.
*/
init = (ulg)curr + WIN_INIT - s->high_water;
if (init > s->window_size - s->high_water)
init = s->window_size - s->high_water;
zmemzero(s->window + s->high_water, (unsigned)init);
s->high_water += init;
}
}
}
/* ===========================================================================
* Flush the current block, with given end-of-file flag.
* IN assertion: strstart is set to the end of the current match.
*/
#define FLUSH_BLOCK_ONLY(s, eof) { \
#define FLUSH_BLOCK_ONLY(s, last) { \
_tr_flush_block(s, (s->block_start >= 0L ? \
(charf *)&s->window[(unsigned)s->block_start] : \
(charf *)Z_NULL), \
(ulg)((long)s->strstart - s->block_start), \
(eof)); \
(last)); \
s->block_start = s->strstart; \
flush_pending(s->strm); \
Tracev((stderr,"[FLUSH]")); \
}
/* Same but force premature exit if necessary. */
#define FLUSH_BLOCK(s, eof) { \
FLUSH_BLOCK_ONLY(s, eof); \
if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
#define FLUSH_BLOCK(s, last) { \
FLUSH_BLOCK_ONLY(s, last); \
if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \
}
/* ===========================================================================
@ -1449,7 +1521,7 @@ local block_state deflate_fast(s, flush)
deflate_state *s;
int flush;
{
IPos hash_head = NIL; /* head of the hash chain */
IPos hash_head; /* head of the hash chain */
int bflush; /* set if current block must be flushed */
for (;;) {
@ -1469,6 +1541,7 @@ local block_state deflate_fast(s, flush)
/* Insert the string window[strstart .. strstart+2] in the
* dictionary, and set hash_head to the head of the hash chain:
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
INSERT_STRING(s, s->strstart, hash_head);
}
@ -1481,19 +1554,8 @@ local block_state deflate_fast(s, flush)
* of window index 0 (in particular we have to avoid a match
* of the string with itself at the start of the input file).
*/
#ifdef FASTEST
if ((s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) ||
(s->strategy == Z_RLE && s->strstart - hash_head == 1)) {
s->match_length = longest_match_fast (s, hash_head);
}
#else
if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) {
s->match_length = longest_match (s, hash_head);
} else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) {
s->match_length = longest_match_fast (s, hash_head);
}
#endif
/* longest_match() or longest_match_fast() sets match_start */
s->match_length = longest_match (s, hash_head);
/* longest_match() sets match_start */
}
if (s->match_length >= MIN_MATCH) {
check_match(s, s->strstart, s->match_start, s->match_length);
@ -1555,7 +1617,7 @@ local block_state deflate_slow(s, flush)
deflate_state *s;
int flush;
{
IPos hash_head = NIL; /* head of hash chain */
IPos hash_head; /* head of hash chain */
int bflush; /* set if current block must be flushed */
/* Process the input block. */
@ -1576,6 +1638,7 @@ local block_state deflate_slow(s, flush)
/* Insert the string window[strstart .. strstart+2] in the
* dictionary, and set hash_head to the head of the hash chain:
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
INSERT_STRING(s, s->strstart, hash_head);
}
@ -1591,12 +1654,8 @@ local block_state deflate_slow(s, flush)
* of window index 0 (in particular we have to avoid a match
* of the string with itself at the start of the input file).
*/
if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) {
s->match_length = longest_match (s, hash_head);
} else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) {
s->match_length = longest_match_fast (s, hash_head);
}
/* longest_match() or longest_match_fast() sets match_start */
s->match_length = longest_match (s, hash_head);
/* longest_match() sets match_start */
if (s->match_length <= 5 && (s->strategy == Z_FILTERED
#if TOO_FAR <= 32767
@ -1674,7 +1733,6 @@ local block_state deflate_slow(s, flush)
}
#endif /* FASTEST */
#if 0
/* ===========================================================================
* For Z_RLE, simply look for runs of bytes, generate matches only of distance
* one. Do not maintain a hash table. (It will be regenerated if this run of
@ -1684,11 +1742,9 @@ local block_state deflate_rle(s, flush)
deflate_state *s;
int flush;
{
int bflush; /* set if current block must be flushed */
uInt run; /* length of run */
uInt max; /* maximum length of run */
uInt prev; /* byte at distance one to match */
Bytef *scan; /* scan for end of run */
int bflush; /* set if current block must be flushed */
uInt prev; /* byte at distance one to match */
Bytef *scan, *strend; /* scan goes up to strend for length of run */
for (;;) {
/* Make sure that we always have enough lookahead, except
@ -1704,23 +1760,33 @@ local block_state deflate_rle(s, flush)
}
/* See how many times the previous byte repeats */
run = 0;
if (s->strstart > 0) { /* if there is a previous byte, that is */
max = s->lookahead < MAX_MATCH ? s->lookahead : MAX_MATCH;
s->match_length = 0;
if (s->lookahead >= MIN_MATCH && s->strstart > 0) {
scan = s->window + s->strstart - 1;
prev = *scan++;
do {
if (*scan++ != prev)
break;
} while (++run < max);
prev = *scan;
if (prev == *++scan && prev == *++scan && prev == *++scan) {
strend = s->window + s->strstart + MAX_MATCH;
do {
} while (prev == *++scan && prev == *++scan &&
prev == *++scan && prev == *++scan &&
prev == *++scan && prev == *++scan &&
prev == *++scan && prev == *++scan &&
scan < strend);
s->match_length = MAX_MATCH - (int)(strend - scan);
if (s->match_length > s->lookahead)
s->match_length = s->lookahead;
}
}
/* Emit match if have run of MIN_MATCH or longer, else emit literal */
if (run >= MIN_MATCH) {
check_match(s, s->strstart, s->strstart - 1, run);
_tr_tally_dist(s, 1, run - MIN_MATCH, bflush);
s->lookahead -= run;
s->strstart += run;
if (s->match_length >= MIN_MATCH) {
check_match(s, s->strstart, s->strstart - 1, s->match_length);
_tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush);
s->lookahead -= s->match_length;
s->strstart += s->match_length;
s->match_length = 0;
} else {
/* No match, output a literal byte */
Tracevv((stderr,"%c", s->window[s->strstart]));
@ -1733,4 +1799,36 @@ local block_state deflate_rle(s, flush)
FLUSH_BLOCK(s, flush == Z_FINISH);
return flush == Z_FINISH ? finish_done : block_done;
}
#endif
/* ===========================================================================
* For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table.
* (It will be regenerated if this run of deflate switches away from Huffman.)
*/
local block_state deflate_huff(s, flush)
deflate_state *s;
int flush;
{
int bflush; /* set if current block must be flushed */
for (;;) {
/* Make sure that we have a literal to write. */
if (s->lookahead == 0) {
fill_window(s);
if (s->lookahead == 0) {
if (flush == Z_NO_FLUSH)
return need_more;
break; /* flush the current block */
}
}
/* Output a literal byte */
s->match_length = 0;
Tracevv((stderr,"%c", s->window[s->strstart]));
_tr_tally_lit (s, s->window[s->strstart], bflush);
s->lookahead--;
s->strstart++;
if (bflush) FLUSH_BLOCK(s, 0);
}
FLUSH_BLOCK(s, flush == Z_FINISH);
return flush == Z_FINISH ? finish_done : block_done;
}

View File

@ -1,5 +1,5 @@
/* deflate.h -- internal compression state
* Copyright (C) 1995-2004 Jean-loup Gailly
* Copyright (C) 1995-2009 Jean-loup Gailly
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -260,6 +260,13 @@ typedef struct internal_state {
* are always zero.
*/
ulg high_water;
/* High water mark offset in window for initialized bytes -- bytes above
* this are set to zero in order to avoid memory check warnings when
* longest match routines access bytes past the input. This is then
* updated to the new high water mark.
*/
} FAR deflate_state;
/* Output a byte on the stream.
@ -278,14 +285,18 @@ typedef struct internal_state {
* distances are limited to MAX_DIST instead of WSIZE.
*/
#define WIN_INIT MAX_MATCH
/* Number of bytes after end of data in window to initialize in order to avoid
memory checker errors from longest match routines */
/* in trees.c */
void _tr_init OF((deflate_state *s));
int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len,
int eof));
int last));
void _tr_align OF((deflate_state *s));
void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
int eof));
int last));
#define d_code(dist) \
((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])

View File

@ -121,7 +121,7 @@ At least for deflate's output that generates new trees every several 10's of
kbytes. You can imagine that filling in a 2^15 entry table for a 15-bit code
would take too long if you're only decoding several thousand symbols. At the
other extreme, you could make a new table for every bit in the code. In fact,
that's essentially a Huffman tree. But then you spend two much time
that's essentially a Huffman tree. But then you spend too much time
traversing the tree while decoding, even for short symbols.
So the number of bits for the first lookup table is a trade of the time to

619
doc/rfc1950.txt Normal file
View File

@ -0,0 +1,619 @@
Network Working Group P. Deutsch
Request for Comments: 1950 Aladdin Enterprises
Category: Informational J-L. Gailly
Info-ZIP
May 1996
ZLIB Compressed Data Format Specification version 3.3
Status of This Memo
This memo provides information for the Internet community. This memo
does not specify an Internet standard of any kind. Distribution of
this memo is unlimited.
IESG Note:
The IESG takes no position on the validity of any Intellectual
Property Rights statements contained in this document.
Notices
Copyright (c) 1996 L. Peter Deutsch and Jean-Loup Gailly
Permission is granted to copy and distribute this document for any
purpose and without charge, including translations into other
languages and incorporation into compilations, provided that the
copyright notice and this notice are preserved, and that any
substantive changes or deletions from the original are clearly
marked.
A pointer to the latest version of this and related documentation in
HTML format can be found at the URL
<ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
Abstract
This specification defines a lossless compressed data format. The
data can be produced or consumed, even for an arbitrarily long
sequentially presented input data stream, using only an a priori
bounded amount of intermediate storage. The format presently uses
the DEFLATE compression method but can be easily extended to use
other compression methods. It can be implemented readily in a manner
not covered by patents. This specification also defines the ADLER-32
checksum (an extension and improvement of the Fletcher checksum),
used for detection of data corruption, and provides an algorithm for
computing it.
Deutsch & Gailly Informational [Page 1]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
Table of Contents
1. Introduction ................................................... 2
1.1. Purpose ................................................... 2
1.2. Intended audience ......................................... 3
1.3. Scope ..................................................... 3
1.4. Compliance ................................................ 3
1.5. Definitions of terms and conventions used ................ 3
1.6. Changes from previous versions ............................ 3
2. Detailed specification ......................................... 3
2.1. Overall conventions ....................................... 3
2.2. Data format ............................................... 4
2.3. Compliance ................................................ 7
3. References ..................................................... 7
4. Source code .................................................... 8
5. Security Considerations ........................................ 8
6. Acknowledgements ............................................... 8
7. Authors' Addresses ............................................. 8
8. Appendix: Rationale ............................................ 9
9. Appendix: Sample code ..........................................10
1. Introduction
1.1. Purpose
The purpose of this specification is to define a lossless
compressed data format that:
* Is independent of CPU type, operating system, file system,
and character set, and hence can be used for interchange;
* Can be produced or consumed, even for an arbitrarily long
sequentially presented input data stream, using only an a
priori bounded amount of intermediate storage, and hence can
be used in data communications or similar structures such as
Unix filters;
* Can use a number of different compression methods;
* Can be implemented readily in a manner not covered by
patents, and hence can be practiced freely.
The data format defined by this specification does not attempt to
allow random access to compressed data.
Deutsch & Gailly Informational [Page 2]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
1.2. Intended audience
This specification is intended for use by implementors of software
to compress data into zlib format and/or decompress data from zlib
format.
The text of the specification assumes a basic background in
programming at the level of bits and other primitive data
representations.
1.3. Scope
The specification specifies a compressed data format that can be
used for in-memory compression of a sequence of arbitrary bytes.
1.4. Compliance
Unless otherwise indicated below, a compliant decompressor must be
able to accept and decompress any data set that conforms to all
the specifications presented here; a compliant compressor must
produce data sets that conform to all the specifications presented
here.
1.5. Definitions of terms and conventions used
byte: 8 bits stored or transmitted as a unit (same as an octet).
(For this specification, a byte is exactly 8 bits, even on
machines which store a character on a number of bits different
from 8.) See below, for the numbering of bits within a byte.
1.6. Changes from previous versions
Version 3.1 was the first public release of this specification.
In version 3.2, some terminology was changed and the Adler-32
sample code was rewritten for clarity. In version 3.3, the
support for a preset dictionary was introduced, and the
specification was converted to RFC style.
2. Detailed specification
2.1. Overall conventions
In the diagrams below, a box like this:
+---+
| | <-- the vertical bars might be missing
+---+
Deutsch & Gailly Informational [Page 3]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
represents one byte; a box like this:
+==============+
| |
+==============+
represents a variable number of bytes.
Bytes stored within a computer do not have a "bit order", since
they are always treated as a unit. However, a byte considered as
an integer between 0 and 255 does have a most- and least-
significant bit, and since we write numbers with the most-
significant digit on the left, we also write bytes with the most-
significant bit on the left. In the diagrams below, we number the
bits of a byte so that bit 0 is the least-significant bit, i.e.,
the bits are numbered:
+--------+
|76543210|
+--------+
Within a computer, a number may occupy multiple bytes. All
multi-byte numbers in the format described here are stored with
the MOST-significant byte first (at the lower memory address).
For example, the decimal number 520 is stored as:
0 1
+--------+--------+
|00000010|00001000|
+--------+--------+
^ ^
| |
| + less significant byte = 8
+ more significant byte = 2 x 256
2.2. Data format
A zlib stream has the following structure:
0 1
+---+---+
|CMF|FLG| (more-->)
+---+---+
Deutsch & Gailly Informational [Page 4]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
(if FLG.FDICT set)
0 1 2 3
+---+---+---+---+
| DICTID | (more-->)
+---+---+---+---+
+=====================+---+---+---+---+
|...compressed data...| ADLER32 |
+=====================+---+---+---+---+
Any data which may appear after ADLER32 are not part of the zlib
stream.
CMF (Compression Method and flags)
This byte is divided into a 4-bit compression method and a 4-
bit information field depending on the compression method.
bits 0 to 3 CM Compression method
bits 4 to 7 CINFO Compression info
CM (Compression method)
This identifies the compression method used in the file. CM = 8
denotes the "deflate" compression method with a window size up
to 32K. This is the method used by gzip and PNG (see
references [1] and [2] in Chapter 3, below, for the reference
documents). CM = 15 is reserved. It might be used in a future
version of this specification to indicate the presence of an
extra field before the compressed data.
CINFO (Compression info)
For CM = 8, CINFO is the base-2 logarithm of the LZ77 window
size, minus eight (CINFO=7 indicates a 32K window size). Values
of CINFO above 7 are not allowed in this version of the
specification. CINFO is not defined in this specification for
CM not equal to 8.
FLG (FLaGs)
This flag byte is divided as follows:
bits 0 to 4 FCHECK (check bits for CMF and FLG)
bit 5 FDICT (preset dictionary)
bits 6 to 7 FLEVEL (compression level)
The FCHECK value must be such that CMF and FLG, when viewed as
a 16-bit unsigned integer stored in MSB order (CMF*256 + FLG),
is a multiple of 31.
Deutsch & Gailly Informational [Page 5]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
FDICT (Preset dictionary)
If FDICT is set, a DICT dictionary identifier is present
immediately after the FLG byte. The dictionary is a sequence of
bytes which are initially fed to the compressor without
producing any compressed output. DICT is the Adler-32 checksum
of this sequence of bytes (see the definition of ADLER32
below). The decompressor can use this identifier to determine
which dictionary has been used by the compressor.
FLEVEL (Compression level)
These flags are available for use by specific compression
methods. The "deflate" method (CM = 8) sets these flags as
follows:
0 - compressor used fastest algorithm
1 - compressor used fast algorithm
2 - compressor used default algorithm
3 - compressor used maximum compression, slowest algorithm
The information in FLEVEL is not needed for decompression; it
is there to indicate if recompression might be worthwhile.
compressed data
For compression method 8, the compressed data is stored in the
deflate compressed data format as described in the document
"DEFLATE Compressed Data Format Specification" by L. Peter
Deutsch. (See reference [3] in Chapter 3, below)
Other compressed data formats are not specified in this version
of the zlib specification.
ADLER32 (Adler-32 checksum)
This contains a checksum value of the uncompressed data
(excluding any dictionary data) computed according to Adler-32
algorithm. This algorithm is a 32-bit extension and improvement
of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073
standard. See references [4] and [5] in Chapter 3, below)
Adler-32 is composed of two sums accumulated per byte: s1 is
the sum of all bytes, s2 is the sum of all s1 values. Both sums
are done modulo 65521. s1 is initialized to 1, s2 to zero. The
Adler-32 checksum is stored as s2*65536 + s1 in most-
significant-byte first (network) order.
Deutsch & Gailly Informational [Page 6]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
2.3. Compliance
A compliant compressor must produce streams with correct CMF, FLG
and ADLER32, but need not support preset dictionaries. When the
zlib data format is used as part of another standard data format,
the compressor may use only preset dictionaries that are specified
by this other data format. If this other format does not use the
preset dictionary feature, the compressor must not set the FDICT
flag.
A compliant decompressor must check CMF, FLG, and ADLER32, and
provide an error indication if any of these have incorrect values.
A compliant decompressor must give an error indication if CM is
not one of the values defined in this specification (only the
value 8 is permitted in this version), since another value could
indicate the presence of new features that would cause subsequent
data to be interpreted incorrectly. A compliant decompressor must
give an error indication if FDICT is set and DICTID is not the
identifier of a known preset dictionary. A decompressor may
ignore FLEVEL and still be compliant. When the zlib data format
is being used as a part of another standard format, a compliant
decompressor must support all the preset dictionaries specified by
the other format. When the other format does not use the preset
dictionary feature, a compliant decompressor must reject any
stream in which the FDICT flag is set.
3. References
[1] Deutsch, L.P.,"GZIP Compressed Data Format Specification",
available in ftp://ftp.uu.net/pub/archiving/zip/doc/
[2] Thomas Boutell, "PNG (Portable Network Graphics) specification",
available in ftp://ftp.uu.net/graphics/png/documents/
[3] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification",
available in ftp://ftp.uu.net/pub/archiving/zip/doc/
[4] Fletcher, J. G., "An Arithmetic Checksum for Serial
Transmissions," IEEE Transactions on Communications, Vol. COM-30,
No. 1, January 1982, pp. 247-252.
[5] ITU-T Recommendation X.224, Annex D, "Checksum Algorithms,"
November, 1993, pp. 144, 145. (Available from
gopher://info.itu.ch). ITU-T X.244 is also the same as ISO 8073.
Deutsch & Gailly Informational [Page 7]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
4. Source code
Source code for a C language implementation of a "zlib" compliant
library is available at ftp://ftp.uu.net/pub/archiving/zip/zlib/.
5. Security Considerations
A decoder that fails to check the ADLER32 checksum value may be
subject to undetected data corruption.
6. Acknowledgements
Trademarks cited in this document are the property of their
respective owners.
Jean-Loup Gailly and Mark Adler designed the zlib format and wrote
the related software described in this specification. Glenn
Randers-Pehrson converted this document to RFC and HTML format.
7. Authors' Addresses
L. Peter Deutsch
Aladdin Enterprises
203 Santa Margarita Ave.
Menlo Park, CA 94025
Phone: (415) 322-0103 (AM only)
FAX: (415) 322-1734
EMail: <ghost@aladdin.com>
Jean-Loup Gailly
EMail: <gzip@prep.ai.mit.edu>
Questions about the technical content of this specification can be
sent by email to
Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
Mark Adler <madler@alumni.caltech.edu>
Editorial comments on this specification can be sent by email to
L. Peter Deutsch <ghost@aladdin.com> and
Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
Deutsch & Gailly Informational [Page 8]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
8. Appendix: Rationale
8.1. Preset dictionaries
A preset dictionary is specially useful to compress short input
sequences. The compressor can take advantage of the dictionary
context to encode the input in a more compact manner. The
decompressor can be initialized with the appropriate context by
virtually decompressing a compressed version of the dictionary
without producing any output. However for certain compression
algorithms such as the deflate algorithm this operation can be
achieved without actually performing any decompression.
The compressor and the decompressor must use exactly the same
dictionary. The dictionary may be fixed or may be chosen among a
certain number of predefined dictionaries, according to the kind
of input data. The decompressor can determine which dictionary has
been chosen by the compressor by checking the dictionary
identifier. This document does not specify the contents of
predefined dictionaries, since the optimal dictionaries are
application specific. Standard data formats using this feature of
the zlib specification must precisely define the allowed
dictionaries.
8.2. The Adler-32 algorithm
The Adler-32 algorithm is much faster than the CRC32 algorithm yet
still provides an extremely low probability of undetected errors.
The modulo on unsigned long accumulators can be delayed for 5552
bytes, so the modulo operation time is negligible. If the bytes
are a, b, c, the second sum is 3a + 2b + c + 3, and so is position
and order sensitive, unlike the first sum, which is just a
checksum. That 65521 is prime is important to avoid a possible
large class of two-byte errors that leave the check unchanged.
(The Fletcher checksum uses 255, which is not prime and which also
makes the Fletcher check insensitive to single byte changes 0 <->
255.)
The sum s1 is initialized to 1 instead of zero to make the length
of the sequence part of s2, so that the length does not have to be
checked separately. (Any sequence of zeroes has a Fletcher
checksum of zero.)
Deutsch & Gailly Informational [Page 9]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
9. Appendix: Sample code
The following C code computes the Adler-32 checksum of a data buffer.
It is written for clarity, not for speed. The sample code is in the
ANSI C programming language. Non C users may find it easier to read
with these hints:
& Bitwise AND operator.
>> Bitwise right shift operator. When applied to an
unsigned quantity, as here, right shift inserts zero bit(s)
at the left.
<< Bitwise left shift operator. Left shift inserts zero
bit(s) at the right.
++ "n++" increments the variable n.
% modulo operator: a % b is the remainder of a divided by b.
#define BASE 65521 /* largest prime smaller than 65536 */
/*
Update a running Adler-32 checksum with the bytes buf[0..len-1]
and return the updated checksum. The Adler-32 checksum should be
initialized to 1.
Usage example:
unsigned long adler = 1L;
while (read_buffer(buffer, length) != EOF) {
adler = update_adler32(adler, buffer, length);
}
if (adler != original_adler) error();
*/
unsigned long update_adler32(unsigned long adler,
unsigned char *buf, int len)
{
unsigned long s1 = adler & 0xffff;
unsigned long s2 = (adler >> 16) & 0xffff;
int n;
for (n = 0; n < len; n++) {
s1 = (s1 + buf[n]) % BASE;
s2 = (s2 + s1) % BASE;
}
return (s2 << 16) + s1;
}
/* Return the adler32 of the bytes buf[0..len-1] */
Deutsch & Gailly Informational [Page 10]
RFC 1950 ZLIB Compressed Data Format Specification May 1996
unsigned long adler32(unsigned char *buf, int len)
{
return update_adler32(1L, buf, len);
}
Deutsch & Gailly Informational [Page 11]

955
doc/rfc1951.txt Normal file
View File

@ -0,0 +1,955 @@
Network Working Group P. Deutsch
Request for Comments: 1951 Aladdin Enterprises
Category: Informational May 1996
DEFLATE Compressed Data Format Specification version 1.3
Status of This Memo
This memo provides information for the Internet community. This memo
does not specify an Internet standard of any kind. Distribution of
this memo is unlimited.
IESG Note:
The IESG takes no position on the validity of any Intellectual
Property Rights statements contained in this document.
Notices
Copyright (c) 1996 L. Peter Deutsch
Permission is granted to copy and distribute this document for any
purpose and without charge, including translations into other
languages and incorporation into compilations, provided that the
copyright notice and this notice are preserved, and that any
substantive changes or deletions from the original are clearly
marked.
A pointer to the latest version of this and related documentation in
HTML format can be found at the URL
<ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
Abstract
This specification defines a lossless compressed data format that
compresses data using a combination of the LZ77 algorithm and Huffman
coding, with efficiency comparable to the best currently available
general-purpose compression methods. The data can be produced or
consumed, even for an arbitrarily long sequentially presented input
data stream, using only an a priori bounded amount of intermediate
storage. The format can be implemented readily in a manner not
covered by patents.
Deutsch Informational [Page 1]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
Table of Contents
1. Introduction ................................................... 2
1.1. Purpose ................................................... 2
1.2. Intended audience ......................................... 3
1.3. Scope ..................................................... 3
1.4. Compliance ................................................ 3
1.5. Definitions of terms and conventions used ................ 3
1.6. Changes from previous versions ............................ 4
2. Compressed representation overview ............................. 4
3. Detailed specification ......................................... 5
3.1. Overall conventions ....................................... 5
3.1.1. Packing into bytes .................................. 5
3.2. Compressed block format ................................... 6
3.2.1. Synopsis of prefix and Huffman coding ............... 6
3.2.2. Use of Huffman coding in the "deflate" format ....... 7
3.2.3. Details of block format ............................. 9
3.2.4. Non-compressed blocks (BTYPE=00) ................... 11
3.2.5. Compressed blocks (length and distance codes) ...... 11
3.2.6. Compression with fixed Huffman codes (BTYPE=01) .... 12
3.2.7. Compression with dynamic Huffman codes (BTYPE=10) .. 13
3.3. Compliance ............................................... 14
4. Compression algorithm details ................................. 14
5. References .................................................... 16
6. Security Considerations ....................................... 16
7. Source code ................................................... 16
8. Acknowledgements .............................................. 16
9. Author's Address .............................................. 17
1. Introduction
1.1. Purpose
The purpose of this specification is to define a lossless
compressed data format that:
* Is independent of CPU type, operating system, file system,
and character set, and hence can be used for interchange;
* Can be produced or consumed, even for an arbitrarily long
sequentially presented input data stream, using only an a
priori bounded amount of intermediate storage, and hence
can be used in data communications or similar structures
such as Unix filters;
* Compresses data with efficiency comparable to the best
currently available general-purpose compression methods,
and in particular considerably better than the "compress"
program;
* Can be implemented readily in a manner not covered by
patents, and hence can be practiced freely;
Deutsch Informational [Page 2]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
* Is compatible with the file format produced by the current
widely used gzip utility, in that conforming decompressors
will be able to read data produced by the existing gzip
compressor.
The data format defined by this specification does not attempt to:
* Allow random access to compressed data;
* Compress specialized data (e.g., raster graphics) as well
as the best currently available specialized algorithms.
A simple counting argument shows that no lossless compression
algorithm can compress every possible input data set. For the
format defined here, the worst case expansion is 5 bytes per 32K-
byte block, i.e., a size increase of 0.015% for large data sets.
English text usually compresses by a factor of 2.5 to 3;
executable files usually compress somewhat less; graphical data
such as raster images may compress much more.
1.2. Intended audience
This specification is intended for use by implementors of software
to compress data into "deflate" format and/or decompress data from
"deflate" format.
The text of the specification assumes a basic background in
programming at the level of bits and other primitive data
representations. Familiarity with the technique of Huffman coding
is helpful but not required.
1.3. Scope
The specification specifies a method for representing a sequence
of bytes as a (usually shorter) sequence of bits, and a method for
packing the latter bit sequence into bytes.
1.4. Compliance
Unless otherwise indicated below, a compliant decompressor must be
able to accept and decompress any data set that conforms to all
the specifications presented here; a compliant compressor must
produce data sets that conform to all the specifications presented
here.
1.5. Definitions of terms and conventions used
Byte: 8 bits stored or transmitted as a unit (same as an octet).
For this specification, a byte is exactly 8 bits, even on machines
Deutsch Informational [Page 3]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
which store a character on a number of bits different from eight.
See below, for the numbering of bits within a byte.
String: a sequence of arbitrary bytes.
1.6. Changes from previous versions
There have been no technical changes to the deflate format since
version 1.1 of this specification. In version 1.2, some
terminology was changed. Version 1.3 is a conversion of the
specification to RFC style.
2. Compressed representation overview
A compressed data set consists of a series of blocks, corresponding
to successive blocks of input data. The block sizes are arbitrary,
except that non-compressible blocks are limited to 65,535 bytes.
Each block is compressed using a combination of the LZ77 algorithm
and Huffman coding. The Huffman trees for each block are independent
of those for previous or subsequent blocks; the LZ77 algorithm may
use a reference to a duplicated string occurring in a previous block,
up to 32K input bytes before.
Each block consists of two parts: a pair of Huffman code trees that
describe the representation of the compressed data part, and a
compressed data part. (The Huffman trees themselves are compressed
using Huffman encoding.) The compressed data consists of a series of
elements of two types: literal bytes (of strings that have not been
detected as duplicated within the previous 32K input bytes), and
pointers to duplicated strings, where a pointer is represented as a
pair <length, backward distance>. The representation used in the
"deflate" format limits distances to 32K bytes and lengths to 258
bytes, but does not limit the size of a block, except for
uncompressible blocks, which are limited as noted above.
Each type of value (literals, distances, and lengths) in the
compressed data is represented using a Huffman code, using one code
tree for literals and lengths and a separate code tree for distances.
The code trees for each block appear in a compact form just before
the compressed data for that block.
Deutsch Informational [Page 4]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
3. Detailed specification
3.1. Overall conventions In the diagrams below, a box like this:
+---+
| | <-- the vertical bars might be missing
+---+
represents one byte; a box like this:
+==============+
| |
+==============+
represents a variable number of bytes.
Bytes stored within a computer do not have a "bit order", since
they are always treated as a unit. However, a byte considered as
an integer between 0 and 255 does have a most- and least-
significant bit, and since we write numbers with the most-
significant digit on the left, we also write bytes with the most-
significant bit on the left. In the diagrams below, we number the
bits of a byte so that bit 0 is the least-significant bit, i.e.,
the bits are numbered:
+--------+
|76543210|
+--------+
Within a computer, a number may occupy multiple bytes. All
multi-byte numbers in the format described here are stored with
the least-significant byte first (at the lower memory address).
For example, the decimal number 520 is stored as:
0 1
+--------+--------+
|00001000|00000010|
+--------+--------+
^ ^
| |
| + more significant byte = 2 x 256
+ less significant byte = 8
3.1.1. Packing into bytes
This document does not address the issue of the order in which
bits of a byte are transmitted on a bit-sequential medium,
since the final data format described here is byte- rather than
Deutsch Informational [Page 5]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
bit-oriented. However, we describe the compressed block format
in below, as a sequence of data elements of various bit
lengths, not a sequence of bytes. We must therefore specify
how to pack these data elements into bytes to form the final
compressed byte sequence:
* Data elements are packed into bytes in order of
increasing bit number within the byte, i.e., starting
with the least-significant bit of the byte.
* Data elements other than Huffman codes are packed
starting with the least-significant bit of the data
element.
* Huffman codes are packed starting with the most-
significant bit of the code.
In other words, if one were to print out the compressed data as
a sequence of bytes, starting with the first byte at the
*right* margin and proceeding to the *left*, with the most-
significant bit of each byte on the left as usual, one would be
able to parse the result from right to left, with fixed-width
elements in the correct MSB-to-LSB order and Huffman codes in
bit-reversed order (i.e., with the first bit of the code in the
relative LSB position).
3.2. Compressed block format
3.2.1. Synopsis of prefix and Huffman coding
Prefix coding represents symbols from an a priori known
alphabet by bit sequences (codes), one code for each symbol, in
a manner such that different symbols may be represented by bit
sequences of different lengths, but a parser can always parse
an encoded string unambiguously symbol-by-symbol.
We define a prefix code in terms of a binary tree in which the
two edges descending from each non-leaf node are labeled 0 and
1 and in which the leaf nodes correspond one-for-one with (are
labeled with) the symbols of the alphabet; then the code for a
symbol is the sequence of 0's and 1's on the edges leading from
the root to the leaf labeled with that symbol. For example:
Deutsch Informational [Page 6]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
/\ Symbol Code
0 1 ------ ----
/ \ A 00
/\ B B 1
0 1 C 011
/ \ D 010
A /\
0 1
/ \
D C
A parser can decode the next symbol from an encoded input
stream by walking down the tree from the root, at each step
choosing the edge corresponding to the next input bit.
Given an alphabet with known symbol frequencies, the Huffman
algorithm allows the construction of an optimal prefix code
(one which represents strings with those symbol frequencies
using the fewest bits of any possible prefix codes for that
alphabet). Such a code is called a Huffman code. (See
reference [1] in Chapter 5, references for additional
information on Huffman codes.)
Note that in the "deflate" format, the Huffman codes for the
various alphabets must not exceed certain maximum code lengths.
This constraint complicates the algorithm for computing code
lengths from symbol frequencies. Again, see Chapter 5,
references for details.
3.2.2. Use of Huffman coding in the "deflate" format
The Huffman codes used for each alphabet in the "deflate"
format have two additional rules:
* All codes of a given bit length have lexicographically
consecutive values, in the same order as the symbols
they represent;
* Shorter codes lexicographically precede longer codes.
Deutsch Informational [Page 7]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
We could recode the example above to follow this rule as
follows, assuming that the order of the alphabet is ABCD:
Symbol Code
------ ----
A 10
B 0
C 110
D 111
I.e., 0 precedes 10 which precedes 11x, and 110 and 111 are
lexicographically consecutive.
Given this rule, we can define the Huffman code for an alphabet
just by giving the bit lengths of the codes for each symbol of
the alphabet in order; this is sufficient to determine the
actual codes. In our example, the code is completely defined
by the sequence of bit lengths (2, 1, 3, 3). The following
algorithm generates the codes as integers, intended to be read
from most- to least-significant bit. The code lengths are
initially in tree[I].Len; the codes are produced in
tree[I].Code.
1) Count the number of codes for each code length. Let
bl_count[N] be the number of codes of length N, N >= 1.
2) Find the numerical value of the smallest code for each
code length:
code = 0;
bl_count[0] = 0;
for (bits = 1; bits <= MAX_BITS; bits++) {
code = (code + bl_count[bits-1]) << 1;
next_code[bits] = code;
}
3) Assign numerical values to all codes, using consecutive
values for all codes of the same length with the base
values determined at step 2. Codes that are never used
(which have a bit length of zero) must not be assigned a
value.
for (n = 0; n <= max_code; n++) {
len = tree[n].Len;
if (len != 0) {
tree[n].Code = next_code[len];
next_code[len]++;
}
Deutsch Informational [Page 8]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
}
Example:
Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3,
3, 2, 4, 4). After step 1, we have:
N bl_count[N]
- -----------
2 1
3 5
4 2
Step 2 computes the following next_code values:
N next_code[N]
- ------------
1 0
2 0
3 2
4 14
Step 3 produces the following code values:
Symbol Length Code
------ ------ ----
A 3 010
B 3 011
C 3 100
D 3 101
E 3 110
F 2 00
G 4 1110
H 4 1111
3.2.3. Details of block format
Each block of compressed data begins with 3 header bits
containing the following data:
first bit BFINAL
next 2 bits BTYPE
Note that the header bits do not necessarily begin on a byte
boundary, since a block does not necessarily occupy an integral
number of bytes.
Deutsch Informational [Page 9]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
BFINAL is set if and only if this is the last block of the data
set.
BTYPE specifies how the data are compressed, as follows:
00 - no compression
01 - compressed with fixed Huffman codes
10 - compressed with dynamic Huffman codes
11 - reserved (error)
The only difference between the two compressed cases is how the
Huffman codes for the literal/length and distance alphabets are
defined.
In all cases, the decoding algorithm for the actual data is as
follows:
do
read block header from input stream.
if stored with no compression
skip any remaining bits in current partially
processed byte
read LEN and NLEN (see next section)
copy LEN bytes of data to output
otherwise
if compressed with dynamic Huffman codes
read representation of code trees (see
subsection below)
loop (until end of block code recognized)
decode literal/length value from input stream
if value < 256
copy value (literal byte) to output stream
otherwise
if value = end of block (256)
break from loop
otherwise (value = 257..285)
decode distance from input stream
move backwards distance bytes in the output
stream, and copy length bytes from this
position to the output stream.
end loop
while not last block
Note that a duplicated string reference may refer to a string
in a previous block; i.e., the backward distance may cross one
or more block boundaries. However a distance cannot refer past
the beginning of the output stream. (An application using a
Deutsch Informational [Page 10]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
preset dictionary might discard part of the output stream; a
distance can refer to that part of the output stream anyway)
Note also that the referenced string may overlap the current
position; for example, if the last 2 bytes decoded have values
X and Y, a string reference with <length = 5, distance = 2>
adds X,Y,X,Y,X to the output stream.
We now specify each compression method in turn.
3.2.4. Non-compressed blocks (BTYPE=00)
Any bits of input up to the next byte boundary are ignored.
The rest of the block consists of the following information:
0 1 2 3 4...
+---+---+---+---+================================+
| LEN | NLEN |... LEN bytes of literal data...|
+---+---+---+---+================================+
LEN is the number of data bytes in the block. NLEN is the
one's complement of LEN.
3.2.5. Compressed blocks (length and distance codes)
As noted above, encoded data blocks in the "deflate" format
consist of sequences of symbols drawn from three conceptually
distinct alphabets: either literal bytes, from the alphabet of
byte values (0..255), or <length, backward distance> pairs,
where the length is drawn from (3..258) and the distance is
drawn from (1..32,768). In fact, the literal and length
alphabets are merged into a single alphabet (0..285), where
values 0..255 represent literal bytes, the value 256 indicates
end-of-block, and values 257..285 represent length codes
(possibly in conjunction with extra bits following the symbol
code) as follows:
Deutsch Informational [Page 11]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
Extra Extra Extra
Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
---- ---- ------ ---- ---- ------- ---- ---- -------
257 0 3 267 1 15,16 277 4 67-82
258 0 4 268 1 17,18 278 4 83-98
259 0 5 269 2 19-22 279 4 99-114
260 0 6 270 2 23-26 280 4 115-130
261 0 7 271 2 27-30 281 5 131-162
262 0 8 272 2 31-34 282 5 163-194
263 0 9 273 3 35-42 283 5 195-226
264 0 10 274 3 43-50 284 5 227-257
265 1 11,12 275 3 51-58 285 0 258
266 1 13,14 276 3 59-66
The extra bits should be interpreted as a machine integer
stored with the most-significant bit first, e.g., bits 1110
represent the value 14.
Extra Extra Extra
Code Bits Dist Code Bits Dist Code Bits Distance
---- ---- ---- ---- ---- ------ ---- ---- --------
0 0 1 10 4 33-48 20 9 1025-1536
1 0 2 11 4 49-64 21 9 1537-2048
2 0 3 12 5 65-96 22 10 2049-3072
3 0 4 13 5 97-128 23 10 3073-4096
4 1 5,6 14 6 129-192 24 11 4097-6144
5 1 7,8 15 6 193-256 25 11 6145-8192
6 2 9-12 16 7 257-384 26 12 8193-12288
7 2 13-16 17 7 385-512 27 12 12289-16384
8 3 17-24 18 8 513-768 28 13 16385-24576
9 3 25-32 19 8 769-1024 29 13 24577-32768
3.2.6. Compression with fixed Huffman codes (BTYPE=01)
The Huffman codes for the two alphabets are fixed, and are not
represented explicitly in the data. The Huffman code lengths
for the literal/length alphabet are:
Lit Value Bits Codes
--------- ---- -----
0 - 143 8 00110000 through
10111111
144 - 255 9 110010000 through
111111111
256 - 279 7 0000000 through
0010111
280 - 287 8 11000000 through
11000111
Deutsch Informational [Page 12]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
The code lengths are sufficient to generate the actual codes,
as described above; we show the codes in the table for added
clarity. Literal/length values 286-287 will never actually
occur in the compressed data, but participate in the code
construction.
Distance codes 0-31 are represented by (fixed-length) 5-bit
codes, with possible additional bits as shown in the table
shown in Paragraph 3.2.5, above. Note that distance codes 30-
31 will never actually occur in the compressed data.
3.2.7. Compression with dynamic Huffman codes (BTYPE=10)
The Huffman codes for the two alphabets appear in the block
immediately after the header bits and before the actual
compressed data, first the literal/length code and then the
distance code. Each code is defined by a sequence of code
lengths, as discussed in Paragraph 3.2.2, above. For even
greater compactness, the code length sequences themselves are
compressed using a Huffman code. The alphabet for code lengths
is as follows:
0 - 15: Represent code lengths of 0 - 15
16: Copy the previous code length 3 - 6 times.
The next 2 bits indicate repeat length
(0 = 3, ... , 3 = 6)
Example: Codes 8, 16 (+2 bits 11),
16 (+2 bits 10) will expand to
12 code lengths of 8 (1 + 6 + 5)
17: Repeat a code length of 0 for 3 - 10 times.
(3 bits of length)
18: Repeat a code length of 0 for 11 - 138 times
(7 bits of length)
A code length of 0 indicates that the corresponding symbol in
the literal/length or distance alphabet will not occur in the
block, and should not participate in the Huffman code
construction algorithm given earlier. If only one distance
code is used, it is encoded using one bit, not zero bits; in
this case there is a single code length of one, with one unused
code. One distance code of zero bits means that there are no
distance codes used at all (the data is all literals).
We can now define the format of the block:
5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286)
5 Bits: HDIST, # of Distance codes - 1 (1 - 32)
4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19)
Deutsch Informational [Page 13]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
(HCLEN + 4) x 3 bits: code lengths for the code length
alphabet given just above, in the order: 16, 17, 18,
0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
These code lengths are interpreted as 3-bit integers
(0-7); as above, a code length of 0 means the
corresponding symbol (literal/length or distance code
length) is not used.
HLIT + 257 code lengths for the literal/length alphabet,
encoded using the code length Huffman code
HDIST + 1 code lengths for the distance alphabet,
encoded using the code length Huffman code
The actual compressed data of the block,
encoded using the literal/length and distance Huffman
codes
The literal/length symbol 256 (end of data),
encoded using the literal/length Huffman code
The code length repeat codes can cross from HLIT + 257 to the
HDIST + 1 code lengths. In other words, all code lengths form
a single sequence of HLIT + HDIST + 258 values.
3.3. Compliance
A compressor may limit further the ranges of values specified in
the previous section and still be compliant; for example, it may
limit the range of backward pointers to some value smaller than
32K. Similarly, a compressor may limit the size of blocks so that
a compressible block fits in memory.
A compliant decompressor must accept the full range of possible
values defined in the previous section, and must accept blocks of
arbitrary size.
4. Compression algorithm details
While it is the intent of this document to define the "deflate"
compressed data format without reference to any particular
compression algorithm, the format is related to the compressed
formats produced by LZ77 (Lempel-Ziv 1977, see reference [2] below);
since many variations of LZ77 are patented, it is strongly
recommended that the implementor of a compressor follow the general
algorithm presented here, which is known not to be patented per se.
The material in this section is not part of the definition of the
Deutsch Informational [Page 14]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
specification per se, and a compressor need not follow it in order to
be compliant.
The compressor terminates a block when it determines that starting a
new block with fresh trees would be useful, or when the block size
fills up the compressor's block buffer.
The compressor uses a chained hash table to find duplicated strings,
using a hash function that operates on 3-byte sequences. At any
given point during compression, let XYZ be the next 3 input bytes to
be examined (not necessarily all different, of course). First, the
compressor examines the hash chain for XYZ. If the chain is empty,
the compressor simply writes out X as a literal byte and advances one
byte in the input. If the hash chain is not empty, indicating that
the sequence XYZ (or, if we are unlucky, some other 3 bytes with the
same hash function value) has occurred recently, the compressor
compares all strings on the XYZ hash chain with the actual input data
sequence starting at the current point, and selects the longest
match.
The compressor searches the hash chains starting with the most recent
strings, to favor small distances and thus take advantage of the
Huffman encoding. The hash chains are singly linked. There are no
deletions from the hash chains; the algorithm simply discards matches
that are too old. To avoid a worst-case situation, very long hash
chains are arbitrarily truncated at a certain length, determined by a
run-time parameter.
To improve overall compression, the compressor optionally defers the
selection of matches ("lazy matching"): after a match of length N has
been found, the compressor searches for a longer match starting at
the next input byte. If it finds a longer match, it truncates the
previous match to a length of one (thus producing a single literal
byte) and then emits the longer match. Otherwise, it emits the
original match, and, as described above, advances N bytes before
continuing.
Run-time parameters also control this "lazy match" procedure. If
compression ratio is most important, the compressor attempts a
complete second search regardless of the length of the first match.
In the normal case, if the current match is "long enough", the
compressor reduces the search for a longer match, thus speeding up
the process. If speed is most important, the compressor inserts new
strings in the hash table only when no match was found, or when the
match is not "too long". This degrades the compression ratio but
saves time since there are both fewer insertions and fewer searches.
Deutsch Informational [Page 15]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
5. References
[1] Huffman, D. A., "A Method for the Construction of Minimum
Redundancy Codes", Proceedings of the Institute of Radio
Engineers, September 1952, Volume 40, Number 9, pp. 1098-1101.
[2] Ziv J., Lempel A., "A Universal Algorithm for Sequential Data
Compression", IEEE Transactions on Information Theory, Vol. 23,
No. 3, pp. 337-343.
[3] Gailly, J.-L., and Adler, M., ZLIB documentation and sources,
available in ftp://ftp.uu.net/pub/archiving/zip/doc/
[4] Gailly, J.-L., and Adler, M., GZIP documentation and sources,
available as gzip-*.tar in ftp://prep.ai.mit.edu/pub/gnu/
[5] Schwartz, E. S., and Kallick, B. "Generating a canonical prefix
encoding." Comm. ACM, 7,3 (Mar. 1964), pp. 166-169.
[6] Hirschberg and Lelewer, "Efficient decoding of prefix codes,"
Comm. ACM, 33,4, April 1990, pp. 449-459.
6. Security Considerations
Any data compression method involves the reduction of redundancy in
the data. Consequently, any corruption of the data is likely to have
severe effects and be difficult to correct. Uncompressed text, on
the other hand, will probably still be readable despite the presence
of some corrupted bytes.
It is recommended that systems using this data format provide some
means of validating the integrity of the compressed data. See
reference [3], for example.
7. Source code
Source code for a C language implementation of a "deflate" compliant
compressor and decompressor is available within the zlib package at
ftp://ftp.uu.net/pub/archiving/zip/zlib/.
8. Acknowledgements
Trademarks cited in this document are the property of their
respective owners.
Phil Katz designed the deflate format. Jean-Loup Gailly and Mark
Adler wrote the related software described in this specification.
Glenn Randers-Pehrson converted this document to RFC and HTML format.
Deutsch Informational [Page 16]
RFC 1951 DEFLATE Compressed Data Format Specification May 1996
9. Author's Address
L. Peter Deutsch
Aladdin Enterprises
203 Santa Margarita Ave.
Menlo Park, CA 94025
Phone: (415) 322-0103 (AM only)
FAX: (415) 322-1734
EMail: <ghost@aladdin.com>
Questions about the technical content of this specification can be
sent by email to:
Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
Mark Adler <madler@alumni.caltech.edu>
Editorial comments on this specification can be sent by email to:
L. Peter Deutsch <ghost@aladdin.com> and
Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
Deutsch Informational [Page 17]

675
doc/rfc1952.txt Normal file
View File

@ -0,0 +1,675 @@
Network Working Group P. Deutsch
Request for Comments: 1952 Aladdin Enterprises
Category: Informational May 1996
GZIP file format specification version 4.3
Status of This Memo
This memo provides information for the Internet community. This memo
does not specify an Internet standard of any kind. Distribution of
this memo is unlimited.
IESG Note:
The IESG takes no position on the validity of any Intellectual
Property Rights statements contained in this document.
Notices
Copyright (c) 1996 L. Peter Deutsch
Permission is granted to copy and distribute this document for any
purpose and without charge, including translations into other
languages and incorporation into compilations, provided that the
copyright notice and this notice are preserved, and that any
substantive changes or deletions from the original are clearly
marked.
A pointer to the latest version of this and related documentation in
HTML format can be found at the URL
<ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
Abstract
This specification defines a lossless compressed data format that is
compatible with the widely used GZIP utility. The format includes a
cyclic redundancy check value for detecting data corruption. The
format presently uses the DEFLATE method of compression but can be
easily extended to use other compression methods. The format can be
implemented readily in a manner not covered by patents.
Deutsch Informational [Page 1]
RFC 1952 GZIP File Format Specification May 1996
Table of Contents
1. Introduction ................................................... 2
1.1. Purpose ................................................... 2
1.2. Intended audience ......................................... 3
1.3. Scope ..................................................... 3
1.4. Compliance ................................................ 3
1.5. Definitions of terms and conventions used ................. 3
1.6. Changes from previous versions ............................ 3
2. Detailed specification ......................................... 4
2.1. Overall conventions ....................................... 4
2.2. File format ............................................... 5
2.3. Member format ............................................. 5
2.3.1. Member header and trailer ........................... 6
2.3.1.1. Extra field ................................... 8
2.3.1.2. Compliance .................................... 9
3. References .................................................. 9
4. Security Considerations .................................... 10
5. Acknowledgements ........................................... 10
6. Author's Address ........................................... 10
7. Appendix: Jean-Loup Gailly's gzip utility .................. 11
8. Appendix: Sample CRC Code .................................. 11
1. Introduction
1.1. Purpose
The purpose of this specification is to define a lossless
compressed data format that:
* Is independent of CPU type, operating system, file system,
and character set, and hence can be used for interchange;
* Can compress or decompress a data stream (as opposed to a
randomly accessible file) to produce another data stream,
using only an a priori bounded amount of intermediate
storage, and hence can be used in data communications or
similar structures such as Unix filters;
* Compresses data with efficiency comparable to the best
currently available general-purpose compression methods,
and in particular considerably better than the "compress"
program;
* Can be implemented readily in a manner not covered by
patents, and hence can be practiced freely;
* Is compatible with the file format produced by the current
widely used gzip utility, in that conforming decompressors
will be able to read data produced by the existing gzip
compressor.
Deutsch Informational [Page 2]
RFC 1952 GZIP File Format Specification May 1996
The data format defined by this specification does not attempt to:
* Provide random access to compressed data;
* Compress specialized data (e.g., raster graphics) as well as
the best currently available specialized algorithms.
1.2. Intended audience
This specification is intended for use by implementors of software
to compress data into gzip format and/or decompress data from gzip
format.
The text of the specification assumes a basic background in
programming at the level of bits and other primitive data
representations.
1.3. Scope
The specification specifies a compression method and a file format
(the latter assuming only that a file can store a sequence of
arbitrary bytes). It does not specify any particular interface to
a file system or anything about character sets or encodings
(except for file names and comments, which are optional).
1.4. Compliance
Unless otherwise indicated below, a compliant decompressor must be
able to accept and decompress any file that conforms to all the
specifications presented here; a compliant compressor must produce
files that conform to all the specifications presented here. The
material in the appendices is not part of the specification per se
and is not relevant to compliance.
1.5. Definitions of terms and conventions used
byte: 8 bits stored or transmitted as a unit (same as an octet).
(For this specification, a byte is exactly 8 bits, even on
machines which store a character on a number of bits different
from 8.) See below for the numbering of bits within a byte.
1.6. Changes from previous versions
There have been no technical changes to the gzip format since
version 4.1 of this specification. In version 4.2, some
terminology was changed, and the sample CRC code was rewritten for
clarity and to eliminate the requirement for the caller to do pre-
and post-conditioning. Version 4.3 is a conversion of the
specification to RFC style.
Deutsch Informational [Page 3]
RFC 1952 GZIP File Format Specification May 1996
2. Detailed specification
2.1. Overall conventions
In the diagrams below, a box like this:
+---+
| | <-- the vertical bars might be missing
+---+
represents one byte; a box like this:
+==============+
| |
+==============+
represents a variable number of bytes.
Bytes stored within a computer do not have a "bit order", since
they are always treated as a unit. However, a byte considered as
an integer between 0 and 255 does have a most- and least-
significant bit, and since we write numbers with the most-
significant digit on the left, we also write bytes with the most-
significant bit on the left. In the diagrams below, we number the
bits of a byte so that bit 0 is the least-significant bit, i.e.,
the bits are numbered:
+--------+
|76543210|
+--------+
This document does not address the issue of the order in which
bits of a byte are transmitted on a bit-sequential medium, since
the data format described here is byte- rather than bit-oriented.
Within a computer, a number may occupy multiple bytes. All
multi-byte numbers in the format described here are stored with
the least-significant byte first (at the lower memory address).
For example, the decimal number 520 is stored as:
0 1
+--------+--------+
|00001000|00000010|
+--------+--------+
^ ^
| |
| + more significant byte = 2 x 256
+ less significant byte = 8
Deutsch Informational [Page 4]
RFC 1952 GZIP File Format Specification May 1996
2.2. File format
A gzip file consists of a series of "members" (compressed data
sets). The format of each member is specified in the following
section. The members simply appear one after another in the file,
with no additional information before, between, or after them.
2.3. Member format
Each member has the following structure:
+---+---+---+---+---+---+---+---+---+---+
|ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
+---+---+---+---+---+---+---+---+---+---+
(if FLG.FEXTRA set)
+---+---+=================================+
| XLEN |...XLEN bytes of "extra field"...| (more-->)
+---+---+=================================+
(if FLG.FNAME set)
+=========================================+
|...original file name, zero-terminated...| (more-->)
+=========================================+
(if FLG.FCOMMENT set)
+===================================+
|...file comment, zero-terminated...| (more-->)
+===================================+
(if FLG.FHCRC set)
+---+---+
| CRC16 |
+---+---+
+=======================+
|...compressed blocks...| (more-->)
+=======================+
0 1 2 3 4 5 6 7
+---+---+---+---+---+---+---+---+
| CRC32 | ISIZE |
+---+---+---+---+---+---+---+---+
Deutsch Informational [Page 5]
RFC 1952 GZIP File Format Specification May 1996
2.3.1. Member header and trailer
ID1 (IDentification 1)
ID2 (IDentification 2)
These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
(0x8b, \213), to identify the file as being in gzip format.
CM (Compression Method)
This identifies the compression method used in the file. CM
= 0-7 are reserved. CM = 8 denotes the "deflate"
compression method, which is the one customarily used by
gzip and which is documented elsewhere.
FLG (FLaGs)
This flag byte is divided into individual bits as follows:
bit 0 FTEXT
bit 1 FHCRC
bit 2 FEXTRA
bit 3 FNAME
bit 4 FCOMMENT
bit 5 reserved
bit 6 reserved
bit 7 reserved
If FTEXT is set, the file is probably ASCII text. This is
an optional indication, which the compressor may set by
checking a small amount of the input data to see whether any
non-ASCII characters are present. In case of doubt, FTEXT
is cleared, indicating binary data. For systems which have
different file formats for ascii text and binary data, the
decompressor can use FTEXT to choose the appropriate format.
We deliberately do not specify the algorithm used to set
this bit, since a compressor always has the option of
leaving it cleared and a decompressor always has the option
of ignoring it and letting some other program handle issues
of data conversion.
If FHCRC is set, a CRC16 for the gzip header is present,
immediately before the compressed data. The CRC16 consists
of the two least significant bytes of the CRC32 for all
bytes of the gzip header up to and not including the CRC16.
[The FHCRC bit was never set by versions of gzip up to
1.2.4, even though it was documented with a different
meaning in gzip 1.2.4.]
If FEXTRA is set, optional extra fields are present, as
described in a following section.
Deutsch Informational [Page 6]
RFC 1952 GZIP File Format Specification May 1996
If FNAME is set, an original file name is present,
terminated by a zero byte. The name must consist of ISO
8859-1 (LATIN-1) characters; on operating systems using
EBCDIC or any other character set for file names, the name
must be translated to the ISO LATIN-1 character set. This
is the original name of the file being compressed, with any
directory components removed, and, if the file being
compressed is on a file system with case insensitive names,
forced to lower case. There is no original file name if the
data was compressed from a source other than a named file;
for example, if the source was stdin on a Unix system, there
is no file name.
If FCOMMENT is set, a zero-terminated file comment is
present. This comment is not interpreted; it is only
intended for human consumption. The comment must consist of
ISO 8859-1 (LATIN-1) characters. Line breaks should be
denoted by a single line feed character (10 decimal).
Reserved FLG bits must be zero.
MTIME (Modification TIME)
This gives the most recent modification time of the original
file being compressed. The time is in Unix format, i.e.,
seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this
may cause problems for MS-DOS and other systems that use
local rather than Universal time.) If the compressed data
did not come from a file, MTIME is set to the time at which
compression started. MTIME = 0 means no time stamp is
available.
XFL (eXtra FLags)
These flags are available for use by specific compression
methods. The "deflate" method (CM = 8) sets these flags as
follows:
XFL = 2 - compressor used maximum compression,
slowest algorithm
XFL = 4 - compressor used fastest algorithm
OS (Operating System)
This identifies the type of file system on which compression
took place. This may be useful in determining end-of-line
convention for text files. The currently defined values are
as follows:
Deutsch Informational [Page 7]
RFC 1952 GZIP File Format Specification May 1996
0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
1 - Amiga
2 - VMS (or OpenVMS)
3 - Unix
4 - VM/CMS
5 - Atari TOS
6 - HPFS filesystem (OS/2, NT)
7 - Macintosh
8 - Z-System
9 - CP/M
10 - TOPS-20
11 - NTFS filesystem (NT)
12 - QDOS
13 - Acorn RISCOS
255 - unknown
XLEN (eXtra LENgth)
If FLG.FEXTRA is set, this gives the length of the optional
extra field. See below for details.
CRC32 (CRC-32)
This contains a Cyclic Redundancy Check value of the
uncompressed data computed according to CRC-32 algorithm
used in the ISO 3309 standard and in section 8.1.1.6.2 of
ITU-T recommendation V.42. (See http://www.iso.ch for
ordering ISO documents. See gopher://info.itu.ch for an
online version of ITU-T V.42.)
ISIZE (Input SIZE)
This contains the size of the original (uncompressed) input
data modulo 2^32.
2.3.1.1. Extra field
If the FLG.FEXTRA bit is set, an "extra field" is present in
the header, with total length XLEN bytes. It consists of a
series of subfields, each of the form:
+---+---+---+---+==================================+
|SI1|SI2| LEN |... LEN bytes of subfield data ...|
+---+---+---+---+==================================+
SI1 and SI2 provide a subfield ID, typically two ASCII letters
with some mnemonic value. Jean-Loup Gailly
<gzip@prep.ai.mit.edu> is maintaining a registry of subfield
IDs; please send him any subfield ID you wish to use. Subfield
IDs with SI2 = 0 are reserved for future use. The following
IDs are currently defined:
Deutsch Informational [Page 8]
RFC 1952 GZIP File Format Specification May 1996
SI1 SI2 Data
---------- ---------- ----
0x41 ('A') 0x70 ('P') Apollo file type information
LEN gives the length of the subfield data, excluding the 4
initial bytes.
2.3.1.2. Compliance
A compliant compressor must produce files with correct ID1,
ID2, CM, CRC32, and ISIZE, but may set all the other fields in
the fixed-length part of the header to default values (255 for
OS, 0 for all others). The compressor must set all reserved
bits to zero.
A compliant decompressor must check ID1, ID2, and CM, and
provide an error indication if any of these have incorrect
values. It must examine FEXTRA/XLEN, FNAME, FCOMMENT and FHCRC
at least so it can skip over the optional fields if they are
present. It need not examine any other part of the header or
trailer; in particular, a decompressor may ignore FTEXT and OS
and always produce binary output, and still be compliant. A
compliant decompressor must give an error indication if any
reserved bit is non-zero, since such a bit could indicate the
presence of a new field that would cause subsequent data to be
interpreted incorrectly.
3. References
[1] "Information Processing - 8-bit single-byte coded graphic
character sets - Part 1: Latin alphabet No.1" (ISO 8859-1:1987).
The ISO 8859-1 (Latin-1) character set is a superset of 7-bit
ASCII. Files defining this character set are available as
iso_8859-1.* in ftp://ftp.uu.net/graphics/png/documents/
[2] ISO 3309
[3] ITU-T recommendation V.42
[4] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification",
available in ftp://ftp.uu.net/pub/archiving/zip/doc/
[5] Gailly, J.-L., GZIP documentation, available as gzip-*.tar in
ftp://prep.ai.mit.edu/pub/gnu/
[6] Sarwate, D.V., "Computation of Cyclic Redundancy Checks via Table
Look-Up", Communications of the ACM, 31(8), pp.1008-1013.
Deutsch Informational [Page 9]
RFC 1952 GZIP File Format Specification May 1996
[7] Schwaderer, W.D., "CRC Calculation", April 85 PC Tech Journal,
pp.118-133.
[8] ftp://ftp.adelaide.edu.au/pub/rocksoft/papers/crc_v3.txt,
describing the CRC concept.
4. Security Considerations
Any data compression method involves the reduction of redundancy in
the data. Consequently, any corruption of the data is likely to have
severe effects and be difficult to correct. Uncompressed text, on
the other hand, will probably still be readable despite the presence
of some corrupted bytes.
It is recommended that systems using this data format provide some
means of validating the integrity of the compressed data, such as by
setting and checking the CRC-32 check value.
5. Acknowledgements
Trademarks cited in this document are the property of their
respective owners.
Jean-Loup Gailly designed the gzip format and wrote, with Mark Adler,
the related software described in this specification. Glenn
Randers-Pehrson converted this document to RFC and HTML format.
6. Author's Address
L. Peter Deutsch
Aladdin Enterprises
203 Santa Margarita Ave.
Menlo Park, CA 94025
Phone: (415) 322-0103 (AM only)
FAX: (415) 322-1734
EMail: <ghost@aladdin.com>
Questions about the technical content of this specification can be
sent by email to:
Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
Mark Adler <madler@alumni.caltech.edu>
Editorial comments on this specification can be sent by email to:
L. Peter Deutsch <ghost@aladdin.com> and
Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
Deutsch Informational [Page 10]
RFC 1952 GZIP File Format Specification May 1996
7. Appendix: Jean-Loup Gailly's gzip utility
The most widely used implementation of gzip compression, and the
original documentation on which this specification is based, were
created by Jean-Loup Gailly <gzip@prep.ai.mit.edu>. Since this
implementation is a de facto standard, we mention some more of its
features here. Again, the material in this section is not part of
the specification per se, and implementations need not follow it to
be compliant.
When compressing or decompressing a file, gzip preserves the
protection, ownership, and modification time attributes on the local
file system, since there is no provision for representing protection
attributes in the gzip file format itself. Since the file format
includes a modification time, the gzip decompressor provides a
command line switch that assigns the modification time from the file,
rather than the local modification time of the compressed input, to
the decompressed output.
8. Appendix: Sample CRC Code
The following sample code represents a practical implementation of
the CRC (Cyclic Redundancy Check). (See also ISO 3309 and ITU-T V.42
for a formal specification.)
The sample code is in the ANSI C programming language. Non C users
may find it easier to read with these hints:
& Bitwise AND operator.
^ Bitwise exclusive-OR operator.
>> Bitwise right shift operator. When applied to an
unsigned quantity, as here, right shift inserts zero
bit(s) at the left.
! Logical NOT operator.
++ "n++" increments the variable n.
0xNNN 0x introduces a hexadecimal (base 16) constant.
Suffix L indicates a long value (at least 32 bits).
/* Table of CRCs of all 8-bit messages. */
unsigned long crc_table[256];
/* Flag: has the table been computed? Initially false. */
int crc_table_computed = 0;
/* Make the table for a fast CRC. */
void make_crc_table(void)
{
unsigned long c;
Deutsch Informational [Page 11]
RFC 1952 GZIP File Format Specification May 1996
int n, k;
for (n = 0; n < 256; n++) {
c = (unsigned long) n;
for (k = 0; k < 8; k++) {
if (c & 1) {
c = 0xedb88320L ^ (c >> 1);
} else {
c = c >> 1;
}
}
crc_table[n] = c;
}
crc_table_computed = 1;
}
/*
Update a running crc with the bytes buf[0..len-1] and return
the updated crc. The crc should be initialized to zero. Pre- and
post-conditioning (one's complement) is performed within this
function so it shouldn't be done by the caller. Usage example:
unsigned long crc = 0L;
while (read_buffer(buffer, length) != EOF) {
crc = update_crc(crc, buffer, length);
}
if (crc != original_crc) error();
*/
unsigned long update_crc(unsigned long crc,
unsigned char *buf, int len)
{
unsigned long c = crc ^ 0xffffffffL;
int n;
if (!crc_table_computed)
make_crc_table();
for (n = 0; n < len; n++) {
c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8);
}
return c ^ 0xffffffffL;
}
/* Return the CRC of the bytes buf[0..len-1]. */
unsigned long crc(unsigned char *buf, int len)
{
return update_crc(0L, buf, len);
}
Deutsch Informational [Page 12]

107
doc/txtvsbin.txt Normal file
View File

@ -0,0 +1,107 @@
A Fast Method for Identifying Plain Text Files
==============================================
Introduction
------------
Given a file coming from an unknown source, it is sometimes desirable
to find out whether the format of that file is plain text. Although
this may appear like a simple task, a fully accurate detection of the
file type requires heavy-duty semantic analysis on the file contents.
It is, however, possible to obtain satisfactory results by employing
various heuristics.
Previous versions of PKZip and other zip-compatible compression tools
were using a crude detection scheme: if more than 80% (4/5) of the bytes
found in a certain buffer are within the range [7..127], the file is
labeled as plain text, otherwise it is labeled as binary. A prominent
limitation of this scheme is the restriction to Latin-based alphabets.
Other alphabets, like Greek, Cyrillic or Asian, make extensive use of
the bytes within the range [128..255], and texts using these alphabets
are most often misidentified by this scheme; in other words, the rate
of false negatives is sometimes too high, which means that the recall
is low. Another weakness of this scheme is a reduced precision, due to
the false positives that may occur when binary files containing large
amounts of textual characters are misidentified as plain text.
In this article we propose a new, simple detection scheme that features
a much increased precision and a near-100% recall. This scheme is
designed to work on ASCII, Unicode and other ASCII-derived alphabets,
and it handles single-byte encodings (ISO-8859, MacRoman, KOI8, etc.)
and variable-sized encodings (ISO-2022, UTF-8, etc.). Wider encodings
(UCS-2/UTF-16 and UCS-4/UTF-32) are not handled, however.
The Algorithm
-------------
The algorithm works by dividing the set of bytecodes [0..255] into three
categories:
- The white list of textual bytecodes:
9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255.
- The gray list of tolerated bytecodes:
7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC).
- The black list of undesired, non-textual bytecodes:
0 (NUL) to 6, 14 to 31.
If a file contains at least one byte that belongs to the white list and
no byte that belongs to the black list, then the file is categorized as
plain text; otherwise, it is categorized as binary. (The boundary case,
when the file is empty, automatically falls into the latter category.)
Rationale
---------
The idea behind this algorithm relies on two observations.
The first observation is that, although the full range of 7-bit codes
[0..127] is properly specified by the ASCII standard, most control
characters in the range [0..31] are not used in practice. The only
widely-used, almost universally-portable control codes are 9 (TAB),
10 (LF) and 13 (CR). There are a few more control codes that are
recognized on a reduced range of platforms and text viewers/editors:
7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB) and 27 (ESC); but these
codes are rarely (if ever) used alone, without being accompanied by
some printable text. Even the newer, portable text formats such as
XML avoid using control characters outside the list mentioned here.
The second observation is that most of the binary files tend to contain
control characters, especially 0 (NUL). Even though the older text
detection schemes observe the presence of non-ASCII codes from the range
[128..255], the precision rarely has to suffer if this upper range is
labeled as textual, because the files that are genuinely binary tend to
contain both control characters and codes from the upper range. On the
other hand, the upper range needs to be labeled as textual, because it
is used by virtually all ASCII extensions. In particular, this range is
used for encoding non-Latin scripts.
Since there is no counting involved, other than simply observing the
presence or the absence of some byte values, the algorithm produces
consistent results, regardless what alphabet encoding is being used.
(If counting were involved, it could be possible to obtain different
results on a text encoded, say, using ISO-8859-16 versus UTF-8.)
There is an extra category of plain text files that are "polluted" with
one or more black-listed codes, either by mistake or by peculiar design
considerations. In such cases, a scheme that tolerates a small fraction
of black-listed codes would provide an increased recall (i.e. more true
positives). This, however, incurs a reduced precision overall, since
false positives are more likely to appear in binary files that contain
large chunks of textual data. Furthermore, "polluted" plain text should
be regarded as binary by general-purpose text detection schemes, because
general-purpose text processing algorithms might not be applicable.
Under this premise, it is safe to say that our detection method provides
a near-100% recall.
Experiments have been run on many files coming from various platforms
and applications. We tried plain text files, system logs, source code,
formatted office documents, compiled object code, etc. The results
confirm the optimistic assumptions about the capabilities of this
algorithm.
--
Cosmin Truta
Last updated: 2006-May-28

View File

@ -1,12 +1,12 @@
/* example.c -- usage example of the zlib compression library
* Copyright (C) 1995-2004 Jean-loup Gailly.
* Copyright (C) 1995-2006 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include <stdio.h>
#include "zlib.h"
#include <stdio.h>
#ifdef STDC
# include <string.h>

25
gzclose.c Normal file
View File

@ -0,0 +1,25 @@
/* gzclose.c -- zlib gzclose() function
* Copyright (C) 2004, 2010 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "gzguts.h"
/* gzclose() is in a separate file so that it is linked in only if it is used.
That way the other gzclose functions can be used instead to avoid linking in
unneeded compression or decompression routines. */
int ZEXPORT gzclose(file)
gzFile file;
{
#ifndef NO_GZCOMPRESS
gz_statep state;
if (file == NULL)
return Z_STREAM_ERROR;
state = (gz_statep)file;
return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file);
#else
return gzclose_r(file);
#endif
}

132
gzguts.h Normal file
View File

@ -0,0 +1,132 @@
/* gzguts.h -- zlib internal header definitions for gz* operations
* Copyright (C) 2004, 2005, 2010 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef _LARGEFILE64_SOURCE
# ifndef _LARGEFILE_SOURCE
# define _LARGEFILE_SOURCE
# endif
# ifdef _FILE_OFFSET_BITS
# undef _FILE_OFFSET_BITS
# endif
#endif
#define ZLIB_INTERNAL
#include <stdio.h>
#include "zlib.h"
#ifdef STDC
# include <string.h>
# include <stdlib.h>
# include <limits.h>
#endif
#include <fcntl.h>
#ifdef NO_DEFLATE /* for compatibility with old definition */
# define NO_GZCOMPRESS
#endif
#ifdef _MSC_VER
# include <io.h>
# define vsnprintf _vsnprintf
#endif
#ifndef local
# define local static
#endif
/* compile with -Dlocal if your debugger can't find static symbols */
/* gz* functions always use library allocation functions */
#ifndef STDC
extern voidp malloc OF((uInt size));
extern void free OF((voidpf ptr));
#endif
/* get errno and strerror definition */
#if defined UNDER_CE && defined NO_ERRNO_H
# include <windows.h>
# define zstrerror() gz_strwinerror((DWORD)GetLastError())
#else
# ifdef STDC
# include <errno.h>
# define zstrerror() strerror(errno)
# else
# define zstrerror() "stdio error (consult errno)"
# endif
#endif
/* MVS fdopen() */
#ifdef __MVS__
#pragma map (fdopen , "\174\174FDOPEN")
FILE *fdopen(int, const char *);
#endif
#ifdef _LARGEFILE64_SOURCE
# define z_off64_t off64_t
#else
# define z_off64_t z_off_t
#endif
/* default i/o buffer size -- double this for output when reading */
#define GZBUFSIZE 8192
/* gzip modes, also provide a little integrity check on the passed structure */
#define GZ_NONE 0
#define GZ_READ 7247
#define GZ_WRITE 31153
#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */
/* values for gz_state how */
#define LOOK 0 /* look for a gzip header */
#define COPY 1 /* copy input directly */
#define GZIP 2 /* decompress a gzip stream */
/* internal gzip file state data structure */
typedef struct {
/* used for both reading and writing */
int mode; /* see gzip modes above */
int fd; /* file descriptor */
char *path; /* path or fd for error messages */
z_off64_t pos; /* current position in uncompressed data */
unsigned size; /* buffer size, zero if not allocated yet */
unsigned want; /* requested buffer size, default is GZBUFSIZE */
unsigned char *in; /* input buffer */
unsigned char *out; /* output buffer (double-sized when reading) */
unsigned char *next; /* next output data to deliver or write */
/* just for reading */
unsigned have; /* amount of output data unused at next */
int eof; /* true if end of input file reached */
z_off64_t start; /* where the gzip data started, for rewinding */
z_off64_t raw; /* where the raw data started, for seeking */
int how; /* 0: get header, 1: copy, 2: decompress */
int direct; /* true if last read direct, false if gzip */
/* just for writing */
int level; /* compression level */
int strategy; /* compression strategy */
/* seek request */
z_off64_t skip; /* amount to skip (already rewound if backwards) */
int seek; /* true if seek request pending */
/* error information */
int err; /* error code */
char *msg; /* error message */
/* zlib inflate or deflate stream */
z_stream strm; /* stream structure in-place (not a pointer) */
} gz_state;
typedef gz_state FAR *gz_statep;
/* shared functions */
ZEXTERN void ZEXPORT gz_error OF((gz_statep, int, const char *));
#if defined UNDER_CE && defined NO_ERRNO_H
ZEXTERN char ZEXPORT *gz_strwinerror OF((DWORD error));
#endif
/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t
value -- needed when comparing unsigned to z_off64_t, which is signed
(possible z_off64_t types off_t, off64_t, and long are all signed) */
#ifdef INT_MAX
# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX)
#else
ZEXTERN unsigned ZEXPORT gz_intmax OF((void));
# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
#endif

1027
gzio.c

File diff suppressed because it is too large Load Diff

535
gzlib.c Normal file
View File

@ -0,0 +1,535 @@
/* gzlib.c -- zlib functions common to reading and writing gzip files
* Copyright (C) 2004, 2010 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "gzguts.h"
#ifdef _LARGEFILE64_SOURCE
# define LSEEK lseek64
#else
# define LSEEK lseek
#endif
/* Local functions */
local void gz_reset OF((gz_statep));
local gzFile gz_open OF((const char *, int, const char *));
#if defined UNDER_CE && defined NO_ERRNO_H
/* Map the Windows error number in ERROR to a locale-dependent error message
string and return a pointer to it. Typically, the values for ERROR come
from GetLastError.
The string pointed to shall not be modified by the application, but may be
overwritten by a subsequent call to gz_strwinerror
The gz_strwinerror function does not change the current setting of
GetLastError. */
char ZEXPORT *gz_strwinerror (error)
DWORD error;
{
static char buf[1024];
wchar_t *msgbuf;
DWORD lasterr = GetLastError();
DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM
| FORMAT_MESSAGE_ALLOCATE_BUFFER,
NULL,
error,
0, /* Default language */
(LPVOID)&msgbuf,
0,
NULL);
if (chars != 0) {
/* If there is an \r\n appended, zap it. */
if (chars >= 2
&& msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') {
chars -= 2;
msgbuf[chars] = 0;
}
if (chars > sizeof (buf) - 1) {
chars = sizeof (buf) - 1;
msgbuf[chars] = 0;
}
wcstombs(buf, msgbuf, chars + 1);
LocalFree(msgbuf);
}
else {
sprintf(buf, "unknown win32 error (%ld)", error);
}
SetLastError(lasterr);
return buf;
}
#endif /* UNDER_CE && NO_ERRNO_H */
/* Reset gzip file state */
local void gz_reset(state)
gz_statep state;
{
if (state->mode == GZ_READ) { /* for reading ... */
state->have = 0; /* no output data available */
state->eof = 0; /* not at end of file */
state->how = LOOK; /* look for gzip header */
state->direct = 1; /* default for empty file */
}
state->seek = 0; /* no seek request pending */
gz_error(state, Z_OK, NULL); /* clear error */
state->pos = 0; /* no uncompressed data yet */
state->strm.avail_in = 0; /* no input data yet */
}
/* Open a gzip file either by name or file descriptor. */
local gzFile gz_open(path, fd, mode)
const char *path;
int fd;
const char *mode;
{
gz_statep state;
/* allocate gzFile structure to return */
state = malloc(sizeof(gz_state));
if (state == NULL)
return NULL;
state->size = 0; /* no buffers allocated yet */
state->want = GZBUFSIZE; /* requested buffer size */
state->msg = NULL; /* no error message yet */
/* interpret mode */
state->mode = GZ_NONE;
state->level = Z_DEFAULT_COMPRESSION;
state->strategy = Z_DEFAULT_STRATEGY;
while (*mode) {
if (*mode >= '0' && *mode <= '9')
state->level = *mode - '0';
else
switch (*mode) {
case 'r':
state->mode = GZ_READ;
break;
#ifndef NO_GZCOMPRESS
case 'w':
state->mode = GZ_WRITE;
break;
case 'a':
state->mode = GZ_APPEND;
break;
#endif
case '+': /* can't read and write at the same time */
free(state);
return NULL;
case 'b': /* ignore -- will request binary anyway */
break;
case 'f':
state->strategy = Z_FILTERED;
break;
case 'h':
state->strategy = Z_HUFFMAN_ONLY;
break;
case 'R':
state->strategy = Z_RLE;
break;
case 'F':
state->strategy = Z_FIXED;
default: /* could consider as an error, but just ignore */
;
}
mode++;
}
/* must provide an "r", "w", or "a" */
if (state->mode == GZ_NONE) {
free(state);
return NULL;
}
/* save the path name for error messages */
state->path = malloc(strlen(path) + 1);
if (state->path == NULL) {
free(state);
return NULL;
}
strcpy(state->path, path);
/* open the file with the appropriate mode (or just use fd) */
state->fd = fd != -1 ? fd :
open(path,
#ifdef O_LARGEFILE
O_LARGEFILE |
#endif
#ifdef O_BINARY
O_BINARY |
#endif
(state->mode == GZ_READ ?
O_RDONLY :
(O_WRONLY | O_CREAT | (
state->mode == GZ_WRITE ?
O_TRUNC :
O_APPEND))),
0666);
if (state->fd == -1) {
free(state);
return NULL;
}
if (state->mode == GZ_APPEND)
state->mode = GZ_WRITE; /* simplify later checks */
/* save the current position for rewinding (only if reading) */
if (state->mode == GZ_READ) {
state->start = LSEEK(state->fd, 0, SEEK_CUR);
if (state->start == -1) state->start = 0;
}
/* initialize stream */
gz_reset(state);
/* return stream */
return (gzFile)state;
}
/* -- see zlib.h -- */
gzFile ZEXPORT gzopen(path, mode)
const char *path;
const char *mode;
{
return gz_open(path, -1, mode);
}
/* -- see zlib.h -- */
gzFile ZEXPORT gzopen64(path, mode)
const char *path;
const char *mode;
{
return gz_open(path, -1, mode);
}
/* -- see zlib.h -- */
gzFile ZEXPORT gzdopen(fd, mode)
int fd;
const char *mode;
{
char *path; /* identifier for error messages */
gzFile gz;
if (fd == -1 || (path = malloc(7 + 3 * sizeof(int))) == NULL)
return NULL;
sprintf(path, "<fd:%d>", fd);
gz = gz_open(path, fd, mode);
free(path);
return gz;
}
/* -- see zlib.h -- */
int ZEXPORT gzbuffer(file, size)
gzFile file;
unsigned size;
{
gz_statep state;
/* get internal structure and check integrity */
if (file == NULL)
return -1;
state = (gz_statep)file;
if (state->mode != GZ_READ && state->mode != GZ_WRITE)
return -1;
/* make sure we haven't already allocated memory */
if (state->size != 0)
return -1;
/* check and set requested size */
if (size == 0)
return -1;
state->want = size;
return 0;
}
/* -- see zlib.h -- */
int ZEXPORT gzrewind(file)
gzFile file;
{
gz_statep state;
/* get internal structure */
if (file == NULL)
return -1;
state = (gz_statep)file;
/* check that we're reading and that there's no error */
if (state->mode != GZ_READ || state->err != Z_OK)
return -1;
/* back up and start over */
if (LSEEK(state->fd, state->start, SEEK_SET) == -1)
return -1;
gz_reset(state);
return 0;
}
/* -- see zlib.h -- */
z_off64_t ZEXPORT gzseek64(file, offset, whence)
gzFile file;
z_off64_t offset;
int whence;
{
unsigned n;
z_off64_t ret;
gz_statep state;
/* get internal structure and check integrity */
if (file == NULL)
return -1;
state = (gz_statep)file;
if (state->mode != GZ_READ && state->mode != GZ_WRITE)
return -1;
/* check that there's no error */
if (state->err != Z_OK)
return -1;
/* can only seek from start or relative to current position */
if (whence != SEEK_SET && whence != SEEK_CUR)
return -1;
/* normalize offset to a SEEK_CUR specification */
if (whence == SEEK_SET)
offset -= state->pos;
else if (state->seek)
offset += state->skip;
state->seek = 0;
/* if within raw area while reading, just go there */
if (state->mode == GZ_READ && state->how == COPY &&
state->pos + offset >= state->raw) {
ret = LSEEK(state->fd, offset, SEEK_CUR);
if (ret == -1)
return -1;
state->have = 0;
state->eof = 0;
state->seek = 0;
gz_error(state, Z_OK, NULL);
state->strm.avail_in = 0;
state->pos += offset;
return state->pos;
}
/* calculate skip amount, rewinding if needed for back seek when reading */
if (offset < 0) {
if (state->mode != GZ_READ) /* writing -- can't go backwards */
return -1;
offset += state->pos;
if (offset < 0) /* before start of file! */
return -1;
if (gzrewind(file) == -1) /* rewind, then skip to offset */
return -1;
}
/* if reading, skip what's in output buffer (one less gzgetc() check) */
if (state->mode == GZ_READ) {
n = GT_OFF(state->have) || (z_off64_t)state->have > offset ?
(unsigned)offset : state->have;
state->have -= n;
state->next += n;
state->pos += n;
offset -= n;
}
/* request skip (if not zero) */
if (offset) {
state->seek = 1;
state->skip = offset;
}
return state->pos + offset;
}
/* -- see zlib.h -- */
z_off_t ZEXPORT gzseek(file, offset, whence)
gzFile file;
z_off_t offset;
int whence;
{
z_off64_t ret;
ret = gzseek64(file, (z_off64_t)offset, whence);
return ret == (z_off_t)ret ? (z_off_t)ret : -1;
}
/* -- see zlib.h -- */
z_off64_t ZEXPORT gztell64(file)
gzFile file;
{
gz_statep state;
/* get internal structure and check integrity */
if (file == NULL)
return -1;
state = (gz_statep)file;
if (state->mode != GZ_READ && state->mode != GZ_WRITE)
return -1;
/* return position */
return state->pos + (state->seek ? state->skip : 0);
}
/* -- see zlib.h -- */
z_off_t ZEXPORT gztell(file)
gzFile file;
{
z_off64_t ret;
ret = gztell64(file);
return ret == (z_off_t)ret ? (z_off_t)ret : -1;
}
/* -- see zlib.h -- */
z_off64_t ZEXPORT gzoffset64(file)
gzFile file;
{
z_off64_t offset;
gz_statep state;
/* get internal structure and check integrity */
if (file == NULL)
return -1;
state = (gz_statep)file;
if (state->mode != GZ_READ && state->mode != GZ_WRITE)
return -1;
/* compute and return effective offset in file */
offset = LSEEK(state->fd, 0, SEEK_CUR);
if (offset == -1)
return -1;
if (state->mode == GZ_READ) /* reading */
offset -= state->strm.avail_in; /* don't count buffered input */
return offset;
}
/* -- see zlib.h -- */
z_off_t ZEXPORT gzoffset(file)
gzFile file;
{
z_off64_t ret;
ret = gzoffset64(file);
return ret == (z_off_t)ret ? (z_off_t)ret : -1;
}
/* -- see zlib.h -- */
int ZEXPORT gzeof(file)
gzFile file;
{
gz_statep state;
/* get internal structure and check integrity */
if (file == NULL)
return 0;
state = (gz_statep)file;
if (state->mode != GZ_READ && state->mode != GZ_WRITE)
return 0;
/* return end-of-file state */
return state->mode == GZ_READ ? (state->eof && state->have == 0) : 0;
}
/* -- see zlib.h -- */
const char * ZEXPORT gzerror(file, errnum)
gzFile file;
int *errnum;
{
gz_statep state;
/* get internal structure and check integrity */
if (file == NULL)
return NULL;
state = (gz_statep)file;
if (state->mode != GZ_READ && state->mode != GZ_WRITE)
return NULL;
/* return error information */
if (errnum != NULL)
*errnum = state->err;
return state->msg == NULL ? "" : state->msg;
}
/* -- see zlib.h -- */
void ZEXPORT gzclearerr(file)
gzFile file;
{
gz_statep state;
/* get internal structure and check integrity */
if (file == NULL)
return;
state = (gz_statep)file;
if (state->mode != GZ_READ && state->mode != GZ_WRITE)
return;
/* clear error and end-of-file */
if (state->mode == GZ_READ)
state->eof = 0;
gz_error(state, Z_OK, NULL);
}
/* Create an error message in allocated memory and set state->err and
state->msg accordingly. Free any previous error message already there. Do
not try to free or allocate space if the error is Z_MEM_ERROR (out of
memory). Simply save the error message as a static string. If there is an
allocation failure constructing the error message, then convert the error to
out of memory. */
void ZEXPORT gz_error(state, err, msg)
gz_statep state;
int err;
const char *msg;
{
/* free previously allocated message and clear */
if (state->msg != NULL) {
if (state->err != Z_MEM_ERROR)
free(state->msg);
state->msg = NULL;
}
/* set error code, and if no message, then done */
state->err = err;
if (msg == NULL)
return;
/* for an out of memory error, save as static string */
if (err == Z_MEM_ERROR) {
state->msg = (char *)msg;
return;
}
/* construct error message with path */
if ((state->msg = malloc(strlen(state->path) + strlen(msg) + 3)) == NULL) {
state->err = Z_MEM_ERROR;
state->msg = (char *)"out of memory";
return;
}
strcpy(state->msg, state->path);
strcat(state->msg, ": ");
strcat(state->msg, msg);
return;
}
#ifndef INT_MAX
/* portably return maximum value for an int (when limits.h presumed not
available) -- we need to do this to cover cases where 2's complement not
used, since C standard permits 1's complement and sign-bit representations,
otherwise we could just use ((unsigned)-1) >> 1 */
unsigned ZEXPORT gz_intmax()
{
unsigned p, q;
p = 1;
do {
q = p;
p <<= 1;
p++;
} while (p > q);
return q >> 1;
}
#endif

653
gzread.c Normal file
View File

@ -0,0 +1,653 @@
/* gzread.c -- zlib functions for reading gzip files
* Copyright (C) 2004, 2005, 2010 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "gzguts.h"
#include <unistd.h>
/* Local functions */
local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
local int gz_avail OF((gz_statep));
local int gz_next4 OF((gz_statep, unsigned long *));
local int gz_head OF((gz_statep));
local int gz_decomp OF((gz_statep));
local int gz_make OF((gz_statep));
local int gz_skip OF((gz_statep, z_off64_t));
/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from
state->fd, and update state->eof, state->err, and state->msg as appropriate.
This function needs to loop on read(), since read() is not guaranteed to
read the number of bytes requested, depending on the type of descriptor. */
local int gz_load(state, buf, len, have)
gz_statep state;
unsigned char *buf;
unsigned len;
unsigned *have;
{
int ret;
*have = 0;
do {
ret = read(state->fd, buf + *have, len - *have);
if (ret <= 0)
break;
*have += ret;
} while (*have < len);
if (ret < 0) {
gz_error(state, Z_ERRNO, zstrerror());
return -1;
}
if (ret == 0)
state->eof = 1;
return 0;
}
/* Load up input buffer and set eof flag if last data loaded -- return -1 on
error, 0 otherwise. Note that the eof flag is set when the end of the input
file is reached, even though there may be unused data in the buffer. Once
that data has been used, no more attempts will be made to read the file.
gz_avail() assumes that strm->avail_in == 0. */
local int gz_avail(state)
gz_statep state;
{
z_streamp strm = &(state->strm);
if (state->err != Z_OK)
return -1;
if (state->eof == 0) {
if (gz_load(state, state->in, state->size, &(strm->avail_in)) == -1)
return -1;
strm->next_in = state->in;
}
return 0;
}
/* Get next byte from input, or -1 if end or error. */
#define NEXT() ((strm->avail_in == 0 && gz_avail(state) == -1) ? -1 : \
(strm->avail_in == 0 ? -1 : \
(strm->avail_in--, *(strm->next_in)++)))
/* Get a four-byte little-endian integer and return 0 on success and the value
in *ret. Otherwise -1 is returned and *ret is not modified. */
local int gz_next4(state, ret)
gz_statep state;
unsigned long *ret;
{
int ch;
unsigned long val;
z_streamp strm = &(state->strm);
val = NEXT();
val += (unsigned)NEXT() << 8;
val += (unsigned long)NEXT() << 16;
ch = NEXT();
if (ch == -1)
return -1;
val += (unsigned long)ch << 24;
*ret = val;
return 0;
}
/* Look for gzip header, set up for inflate or copy. state->have must be zero.
If this is the first time in, allocate required memory. state->how will be
left unchanged if there is no more input data available, will be set to COPY
if there is no gzip header and direct copying will be performed, or it will
be set to GZIP for decompression, and the gzip header will be skipped so
that the next available input data is the raw deflate stream. If direct
copying, then leftover input data from the input buffer will be copied to
the output buffer. In that case, all further file reads will be directly to
either the output buffer or a user buffer. If decompressing, the inflate
state and the check value will be initialized. gz_head() will return 0 on
success or -1 on failure. Failures may include read errors or gzip header
errors. */
local int gz_head(state)
gz_statep state;
{
z_streamp strm = &(state->strm);
int flags;
unsigned len;
/* allocate read buffers and inflate memory */
if (state->size == 0) {
/* allocate buffers */
state->in = malloc(state->want);
state->out = malloc(state->want << 1);
if (state->in == NULL || state->out == NULL) {
if (state->out != NULL)
free(state->out);
if (state->in != NULL)
free(state->in);
gz_error(state, Z_MEM_ERROR, "out of memory");
return -1;
}
state->size = state->want;
/* allocate inflate memory */
state->strm.zalloc = Z_NULL;
state->strm.zfree = Z_NULL;
state->strm.opaque = Z_NULL;
state->strm.avail_in = 0;
state->strm.next_in = Z_NULL;
if (inflateInit2(&(state->strm), -15) != Z_OK) { /* raw inflate */
free(state->out);
free(state->in);
state->size = 0;
gz_error(state, Z_MEM_ERROR, "out of memory");
return -1;
}
}
/* get some data in the input buffer */
if (strm->avail_in == 0) {
if (gz_avail(state) == -1)
return -1;
if (strm->avail_in == 0)
return 0;
}
/* look for the gzip magic header bytes 31 and 139 */
if (strm->next_in[0] == 31) {
strm->avail_in--;
strm->next_in++;
if (strm->avail_in == 0 && gz_avail(state) == -1)
return -1;
if (strm->avail_in && strm->next_in[0] == 139) {
/* we have a gzip header, woo hoo! */
strm->avail_in--;
strm->next_in++;
/* skip rest of header */
if (NEXT() != 8) { /* compression method */
gz_error(state, Z_DATA_ERROR, "unknown compression method");
return -1;
}
flags = NEXT();
if (flags & 0xe0) { /* reserved flag bits */
gz_error(state, Z_DATA_ERROR, "unknown header flags set");
return -1;
}
NEXT(); /* modification time */
NEXT();
NEXT();
NEXT();
NEXT(); /* extra flags */
NEXT(); /* operating system */
if (flags & 4) { /* extra field */
len = (unsigned)NEXT();
len += (unsigned)NEXT() << 8;
while (len--)
if (NEXT() < 0)
break;
}
if (flags & 8) /* file name */
while (NEXT() > 0)
;
if (flags & 16) /* comment */
while (NEXT() > 0)
;
if (flags & 2) { /* header crc */
NEXT();
NEXT();
}
/* an unexpected end of file is not checked for here -- it will be
noticed on the first request for uncompressed data */
/* set up for decompression */
inflateReset(strm);
strm->adler = crc32(0L, Z_NULL, 0);
state->how = GZIP;
state->direct = 0;
return 0;
}
else {
/* not a gzip file -- save first byte (31) and fall to raw i/o */
state->out[0] = 31;
state->have = 1;
}
}
/* doing raw i/o, save start of raw data for seeking, copy any leftover
input to output -- this assumes that the output buffer is larger than
the input buffer, which also assures space for gzungetc() */
state->raw = state->pos;
state->next = state->out;
if (strm->avail_in) {
memcpy(state->next + state->have, strm->next_in, strm->avail_in);
state->have += strm->avail_in;
strm->avail_in = 0;
}
state->how = COPY;
state->direct = 1;
return 0;
}
/* Decompress from input to the provided next_out and avail_out in the state.
If the end of the compressed data is reached, then verify the gzip trailer
check value and length (modulo 2^32). state->have and state->next are set
to point to the just decompressed data, and the crc is updated. If the
trailer is verified, state->how is reset to LOOK to look for the next gzip
stream or raw data, once state->have is depleted. Returns 0 on success, -1
on failure. Failures may include invalid compressed data or a failed gzip
trailer verification. */
local int gz_decomp(state)
gz_statep state;
{
int ret;
unsigned had;
unsigned long crc, len;
z_streamp strm = &(state->strm);
/* fill output buffer up to end of deflate stream */
had = strm->avail_out;
do {
/* get more input for inflate() */
if (strm->avail_in == 0 && gz_avail(state) == -1)
return -1;
if (strm->avail_in == 0) {
gz_error(state, Z_DATA_ERROR, "unexpected end of file");
return -1;
}
/* decompress and handle errors */
ret = inflate(strm, Z_NO_FLUSH);
if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
gz_error(state, Z_STREAM_ERROR,
"internal error: inflate stream corrupt");
return -1;
}
if (ret == Z_MEM_ERROR) {
gz_error(state, Z_MEM_ERROR, "out of memory");
return -1;
}
if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
gz_error(state, Z_DATA_ERROR,
strm->msg == NULL ? "compressed data error" : strm->msg);
return -1;
}
} while (strm->avail_out && ret != Z_STREAM_END);
/* update available output and crc check value */
state->have = had - strm->avail_out;
state->next = strm->next_out - state->have;
strm->adler = crc32(strm->adler, state->next, state->have);
/* check gzip trailer if at end of deflate stream */
if (ret == Z_STREAM_END) {
if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) {
gz_error(state, Z_DATA_ERROR, "unexpected end of file");
return -1;
}
if (crc != strm->adler) {
gz_error(state, Z_DATA_ERROR, "incorrect data check");
return -1;
}
if (len != (strm->total_out & 0xffffffffL)) {
gz_error(state, Z_DATA_ERROR, "incorrect length check");
return -1;
}
state->how = LOOK; /* ready for next stream, once have is 0 (leave
state->direct unchanged to remember how) */
}
/* good decompression */
return 0;
}
/* Make data and put in the output buffer. Assumes that state->have == 0.
Data is either copied from the input file or decompressed from the input
file depending on state->how. If state->how is LOOK, then a gzip header is
looked for (and skipped if found) to determine wither to copy or decompress.
Returns -1 on error, otherwise 0. gz_make() will leave state->have as COPY
or GZIP unless the end of the input file has been reached and all data has
been processed. */
local int gz_make(state)
gz_statep state;
{
z_streamp strm = &(state->strm);
if (state->how == LOOK) { /* look for gzip header */
if (gz_head(state) == -1)
return -1;
if (state->have) /* got some data from gz_head() */
return 0;
}
if (state->how == COPY) { /* straight copy */
if (gz_load(state, state->out, state->size << 1, &(state->have)) == -1)
return -1;
state->next = state->out;
}
else if (state->how == GZIP) { /* decompress */
strm->avail_out = state->size << 1;
strm->next_out = state->out;
if (gz_decomp(state) == -1)
return -1;
}
return 0;
}
/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */
local int gz_skip(state, len)
gz_statep state;
z_off64_t len;
{
unsigned n;
/* skip over len bytes or reach end-of-file, whichever comes first */
while (len)
/* skip over whatever is in output buffer */
if (state->have) {
n = GT_OFF(state->have) || (z_off64_t)state->have > len ?
(unsigned)len : state->have;
state->have -= n;
state->next += n;
state->pos += n;
len -= n;
}
/* output buffer empty -- return if we're at the end of the input */
else if (state->eof && state->strm.avail_in == 0)
break;
/* need more data to skip -- load up output buffer */
else {
/* get more output, looking for header if required */
if (gz_make(state) == -1)
return -1;
}
return 0;
}
/* -- see zlib.h -- */
int ZEXPORT gzread(file, buf, len)
gzFile file;
voidp buf;
unsigned len;
{
unsigned got, n;
gz_statep state;
z_streamp strm;
/* get internal structure */
if (file == NULL)
return -1;
state = (gz_statep)file;
strm = &(state->strm);
/* check that we're reading and that there's no error */
if (state->mode != GZ_READ || state->err != Z_OK)
return -1;
/* since an int is returned, make sure len fits in one, otherwise return
with an error (this avoids the flaw in the interface) */
if ((int)len < 0) {
gz_error(state, Z_BUF_ERROR, "requested length does not fit in int");
return -1;
}
/* if len is zero, avoid unnecessary operations */
if (len == 0)
return 0;
/* process a skip request */
if (state->seek) {
state->seek = 0;
if (gz_skip(state, state->skip) == -1)
return -1;
}
/* get len bytes to buf, or less than len if at the end */
got = 0;
do {
/* first just try copying data from the output buffer */
if (state->have) {
n = state->have > len ? len : state->have;
memcpy(buf, state->next, n);
state->next += n;
state->have -= n;
}
/* output buffer empty -- return if we're at the end of the input */
else if (state->eof && strm->avail_in == 0)
break;
/* need output data -- for small len or new stream load up our output
buffer */
else if (state->how == LOOK || len < (state->size << 1)) {
/* get more output, looking for header if required */
if (gz_make(state) == -1)
return -1;
continue; /* no progress yet -- go back to memcpy() above */
/* the copy above assures that we will leave with space in the
output buffer, allowing at least one gzungetc() to succeed */
}
/* large len -- read directly into user buffer */
else if (state->how == COPY) { /* read directly */
if (gz_load(state, buf, len, &n) == -1)
return -1;
}
/* large len -- decompress directly into user buffer */
else { /* state->how == GZIP */
strm->avail_out = len;
strm->next_out = buf;
if (gz_decomp(state) == -1)
return -1;
n = state->have;
state->have = 0;
}
/* update progress */
len -= n;
buf = (char *)buf + n;
got += n;
state->pos += n;
} while (len);
/* return number of bytes read into user buffer (will fit in int) */
return (int)got;
}
/* -- see zlib.h -- */
int ZEXPORT gzgetc(file)
gzFile file;
{
int ret;
unsigned char buf[1];
gz_statep state;
/* get internal structure */
if (file == NULL)
return -1;
state = (gz_statep)file;
/* check that we're reading and that there's no error */
if (state->mode != GZ_READ || state->err != Z_OK)
return -1;
/* try output buffer (no need to check for skip request) */
if (state->have) {
state->have--;
state->pos++;
return *(state->next)++;
}
/* nothing there -- try gzread() */
ret = gzread(file, buf, 1);
return ret < 1 ? -1 : buf[0];
}
/* -- see zlib.h -- */
int ZEXPORT gzungetc(c, file)
int c;
gzFile file;
{
gz_statep state;
/* get internal structure */
if (file == NULL)
return -1;
state = (gz_statep)file;
/* check that we're reading and that there's no error */
if (state->mode != GZ_READ || state->err != Z_OK)
return -1;
/* process a skip request */
if (state->seek) {
state->seek = 0;
if (gz_skip(state, state->skip) == -1)
return -1;
}
/* can't push EOF */
if (c < 0)
return -1;
/* if output buffer empty, put byte at end (allows more pushing) */
if (state->have == 0) {
state->have = 1;
state->next = state->out + (state->size << 1) - 1;
state->next[0] = c;
state->pos--;
return c;
}
/* if no room, give up (must have already done a gzungetc()) */
if (state->have == (state->size << 1)) {
gz_error(state, Z_BUF_ERROR, "out of room to push characters");
return -1;
}
/* slide output data if needed and insert byte before existing data */
if (state->next == state->out) {
unsigned char *src = state->out + state->have;
unsigned char *dest = state->out + (state->size << 1);
while (src > state->out)
*--dest = *--src;
state->next = dest;
}
state->have++;
state->next--;
state->next[0] = c;
state->pos--;
return c;
}
/* -- see zlib.h -- */
char * ZEXPORT gzgets(file, buf, len)
gzFile file;
char *buf;
int len;
{
unsigned left, n;
char *str;
unsigned char *eol;
gz_statep state;
/* check parameters and get internal structure */
if (file == NULL || buf == NULL || len < 1)
return NULL;
state = (gz_statep)file;
/* check that we're reading and that there's no error */
if (state->mode != GZ_READ || state->err != Z_OK)
return NULL;
/* process a skip request */
if (state->seek) {
state->seek = 0;
if (gz_skip(state, state->skip) == -1)
return NULL;
}
/* copy output bytes up to new line or len - 1, whichever comes first --
append a terminating zero to the string (we don't check for a zero in
the contents, let the user worry about that) */
str = buf;
left = (unsigned)len - 1;
if (left) do {
/* assure that something is in the output buffer */
if (state->have == 0) {
if (gz_make(state) == -1)
return NULL; /* error */
if (state->have == 0) { /* end of file */
if (buf == str) /* got bupkus */
return NULL;
break; /* got something -- return it */
}
}
/* look for end-of-line in current output buffer */
n = state->have > left ? left : state->have;
eol = memchr(state->next, '\n', n);
if (eol != NULL)
n = (unsigned)(eol - state->next) + 1;
/* copy through end-of-line, or remainder if not found */
memcpy(buf, state->next, n);
state->have -= n;
state->next += n;
state->pos += n;
left -= n;
buf += n;
} while (left && eol == NULL);
/* found end-of-line or out of space -- terminate string and return it */
buf[0] = 0;
return str;
}
/* -- see zlib.h -- */
int ZEXPORT gzdirect(file)
gzFile file;
{
gz_statep state;
/* get internal structure */
if (file == NULL)
return 0;
state = (gz_statep)file;
/* check that we're reading */
if (state->mode != GZ_READ)
return 0;
/* if the state is not known, but we can find out, then do so (this is
mainly for right after a gzopen() or gzdopen()) */
if (state->how == LOOK && state->have == 0)
(void)gz_head(state);
/* return 1 if reading direct, 0 if decompressing a gzip stream */
return state->direct;
}
/* -- see zlib.h -- */
int ZEXPORT gzclose_r(file)
gzFile file;
{
int ret;
gz_statep state;
/* get internal structure */
if (file == NULL)
return Z_STREAM_ERROR;
state = (gz_statep)file;
/* check that we're reading */
if (state->mode != GZ_READ)
return Z_STREAM_ERROR;
/* free memory and close file */
if (state->size) {
inflateEnd(&(state->strm));
free(state->out);
free(state->in);
}
gz_error(state, Z_OK, NULL);
free(state->path);
ret = close(state->fd);
free(state);
return ret ? Z_ERRNO : Z_OK;
}

532
gzwrite.c Normal file
View File

@ -0,0 +1,532 @@
/* gzwrite.c -- zlib functions for writing gzip files
* Copyright (C) 2004, 2005, 2010 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "gzguts.h"
#include <unistd.h>
/* Local functions */
local int gz_init OF((gz_statep));
local int gz_comp OF((gz_statep, int));
local int gz_zero OF((gz_statep, z_off64_t));
/* Initialize state for writing a gzip file. Mark initialization by setting
state->size to non-zero. Return -1 on failure or 0 on success. */
local int gz_init(state)
gz_statep state;
{
int ret;
z_streamp strm = &(state->strm);
/* allocate input and output buffers */
state->in = malloc(state->want);
state->out = malloc(state->want);
if (state->in == NULL || state->out == NULL) {
if (state->out != NULL)
free(state->out);
if (state->in != NULL)
free(state->in);
gz_error(state, Z_MEM_ERROR, "out of memory");
return -1;
}
/* allocate deflate memory, set up for gzip compression */
strm->zalloc = Z_NULL;
strm->zfree = Z_NULL;
strm->opaque = Z_NULL;
ret = deflateInit2(strm, state->level, Z_DEFLATED,
15 + 16, 8, state->strategy);
if (ret != Z_OK) {
free(state->in);
gz_error(state, Z_MEM_ERROR, "out of memory");
return -1;
}
/* mark state as initialized */
state->size = state->want;
/* initialize write buffer */
strm->avail_out = state->size;
strm->next_out = state->out;
state->next = strm->next_out;
return 0;
}
/* Compress whatever is at avail_in and next_in and write to the output file.
Return -1 if there is an error writing to the output file, otherwise 0.
flush is assumed to be a valid deflate() flush value. If flush is Z_FINISH,
then the deflate() state is reset to start a new gzip stream. */
local int gz_comp(state, flush)
gz_statep state;
int flush;
{
int ret, got;
unsigned have;
z_streamp strm = &(state->strm);
/* allocate memory if this is the first time through */
if (state->size == 0 && gz_init(state) == -1)
return -1;
/* run deflate() on provided input until it produces no more output */
ret = Z_OK;
do {
/* write out current buffer contents if full, or if flushing, but if
doing Z_FINISH then don't write until we get to Z_STREAM_END */
if (strm->avail_out == 0 || (flush != Z_NO_FLUSH &&
(flush != Z_FINISH || ret == Z_STREAM_END))) {
have = (unsigned)(strm->next_out - state->next);
if (have && ((got = write(state->fd, state->next, have)) < 0 ||
(unsigned)got != have)) {
gz_error(state, Z_ERRNO, zstrerror());
return -1;
}
if (strm->avail_out == 0) {
strm->avail_out = state->size;
strm->next_out = state->out;
}
state->next = strm->next_out;
}
/* compress */
have = strm->avail_out;
ret = deflate(strm, flush);
if (ret == Z_STREAM_ERROR) {
gz_error(state, Z_STREAM_ERROR,
"internal error: deflate stream corrupt");
return -1;
}
have -= strm->avail_out;
} while (have);
/* if that completed a deflate stream, allow another to start */
if (flush == Z_FINISH)
deflateReset(strm);
/* all done, no errors */
return 0;
}
/* Compress len zeros to output. Return -1 on error, 0 on success. */
local int gz_zero(state, len)
gz_statep state;
z_off64_t len;
{
int first;
unsigned n;
z_streamp strm = &(state->strm);
/* consume whatever's left in the input buffer */
if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
return -1;
/* compress len zeros (len guaranteed > 0) */
first = 1;
while (len) {
n = GT_OFF(state->size) || (z_off64_t)state->size > len ?
(unsigned)len : state->size;
if (first) {
memset(state->in, 0, n);
first = 0;
}
strm->avail_in = n;
strm->next_in = state->in;
state->pos += n;
if (gz_comp(state, Z_NO_FLUSH) == -1)
return -1;
len -= n;
}
return 0;
}
/* -- see zlib.h -- */
int ZEXPORT gzwrite(file, buf, len)
gzFile file;
voidpc buf;
unsigned len;
{
unsigned put = len;
unsigned n;
gz_statep state;
z_streamp strm;
/* get internal structure */
if (file == NULL)
return 0;
state = (gz_statep)file;
strm = &(state->strm);
/* check that we're writing and that there's no error */
if (state->mode != GZ_WRITE || state->err != Z_OK)
return 0;
/* since an int is returned, make sure len fits in one, otherwise return
with an error (this avoids the flaw in the interface) */
if ((int)len < 0) {
gz_error(state, Z_BUF_ERROR, "requested length does not fit in int");
return 0;
}
/* if len is zero, avoid unnecessary operations */
if (len == 0)
return 0;
/* allocate memory if this is the first time through */
if (state->size == 0 && gz_init(state) == -1)
return 0;
/* check for seek request */
if (state->seek) {
state->seek = 0;
if (gz_zero(state, state->skip) == -1)
return 0;
}
/* for small len, copy to input buffer, otherwise compress directly */
if (len < state->size) {
/* copy to input buffer, compress when full */
do {
if (strm->avail_in == 0)
strm->next_in = state->in;
n = state->size - strm->avail_in;
if (n > len)
n = len;
memcpy(strm->next_in + strm->avail_in, buf, n);
strm->avail_in += n;
state->pos += n;
buf = (char *)buf + n;
len -= n;
if (len && gz_comp(state, Z_NO_FLUSH) == -1)
return 0;
} while (len);
}
else {
/* consume whatever's left in the input buffer */
if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
return 0;
/* directly compress user buffer to file */
strm->avail_in = len;
strm->next_in = (voidp)buf;
state->pos += len;
if (gz_comp(state, Z_NO_FLUSH) == -1)
return 0;
}
/* input was all buffered or compressed (put will fit in int) */
return (int)put;
}
/* -- see zlib.h -- */
int ZEXPORT gzputc(file, c)
gzFile file;
int c;
{
unsigned char buf[1];
gz_statep state;
z_streamp strm;
/* get internal structure */
if (file == NULL)
return -1;
state = (gz_statep)file;
strm = &(state->strm);
/* check that we're writing and that there's no error */
if (state->mode != GZ_WRITE || state->err != Z_OK)
return -1;
/* check for seek request */
if (state->seek) {
state->seek = 0;
if (gz_zero(state, state->skip) == -1)
return -1;
}
/* try writing to input buffer for speed (state->size == 0 if buffer not
initialized) */
if (strm->avail_in < state->size) {
if (strm->avail_in == 0)
strm->next_in = state->in;
strm->next_in[strm->avail_in++] = c;
state->pos++;
return c;
}
/* no room in buffer or not initialized, use gz_write() */
buf[0] = c;
if (gzwrite(file, buf, 1) != 1)
return -1;
return c;
}
/* -- see zlib.h -- */
int ZEXPORT gzputs(file, str)
gzFile file;
const char *str;
{
int ret;
unsigned len;
/* write string */
len = (unsigned)strlen(str);
ret = gzwrite(file, str, len);
return ret == 0 && len != 0 ? -1 : ret;
}
#ifdef STDC
#include <stdarg.h>
/* -- see zlib.h -- */
int ZEXPORTVA gzprintf (gzFile file, const char *format, ...)
{
int size, len;
gz_statep state;
z_streamp strm;
va_list va;
/* get internal structure */
if (file == NULL)
return -1;
state = (gz_statep)file;
strm = &(state->strm);
/* check that we're writing and that there's no error */
if (state->mode != GZ_WRITE || state->err != Z_OK)
return 0;
/* make sure we have some buffer space */
if (state->size == 0 && gz_init(state) == -1)
return 0;
/* check for seek request */
if (state->seek) {
state->seek = 0;
if (gz_zero(state, state->skip) == -1)
return 0;
}
/* consume whatever's left in the input buffer */
if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
return 0;
/* do the printf() into the input buffer, put length in len */
size = (int)(state->size);
state->in[size - 1] = 0;
va_start(va, format);
#ifdef NO_vsnprintf
# ifdef HAS_vsprintf_void
(void)vsprintf(state->in, format, va);
va_end(va);
for (len = 0; len < size; len++)
if (state->in[len] == 0) break;
# else
len = vsprintf(state->in, format, va);
va_end(va);
# endif
#else
# ifdef HAS_vsnprintf_void
(void)vsnprintf(state->in, size, format, va);
va_end(va);
len = strlen(state->in);
# else
len = vsnprintf((char *)(state->in), size, format, va);
va_end(va);
# endif
#endif
/* check that printf() results fit in buffer */
if (len <= 0 || len >= (int)size || state->in[size - 1] != 0)
return 0;
/* update buffer and position, defer compression until needed */
strm->avail_in = (unsigned)len;
strm->next_in = state->in;
state->pos += len;
return len;
}
#else /* !STDC */
/* -- see zlib.h -- */
int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
gzFile file;
const char *format;
int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
{
int size, len;
gz_statep state;
z_streamp strm;
/* get internal structure */
if (file == NULL)
return -1;
state = (gz_statep)file;
strm = &(state->strm);
/* check that we're writing and that there's no error */
if (state->mode != GZ_WRITE || state->err != Z_OK)
return 0;
/* make sure we have some buffer space */
if (state->size == 0 && gz_init(state) == -1)
return 0;
/* check for seek request */
if (state->seek) {
state->seek = 0;
if (gz_zero(state, state->skip) == -1)
return 0;
}
/* consume whatever's left in the input buffer */
if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
return 0;
/* do the printf() into the input buffer, put length in len */
size = (int)(state->size);
state->in[size - 1] = 0;
#ifdef NO_snprintf
# ifdef HAS_sprintf_void
sprintf(state->in, format, a1, a2, a3, a4, a5, a6, a7, a8,
a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
for (len = 0; len < size; len++)
if (state->in[len] == 0) break;
# else
len = sprintf(state->in, format, a1, a2, a3, a4, a5, a6, a7, a8,
a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
# endif
#else
# ifdef HAS_snprintf_void
snprintf(state->in, size, format, a1, a2, a3, a4, a5, a6, a7, a8,
a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
len = strlen(state->in);
# else
len = snprintf(state->in, size, format, a1, a2, a3, a4, a5, a6, a7, a8,
a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
# endif
#endif
/* check that printf() results fit in buffer */
if (len <= 0 || len >= (int)size || state->in[size - 1] != 0)
return 0;
/* update buffer and position, defer compression until needed */
strm->avail_in = (unsigned)len;
strm->next_in = state->in;
state->pos += len;
return len;
}
#endif
/* -- see zlib.h -- */
int ZEXPORT gzflush(file, flush)
gzFile file;
int flush;
{
gz_statep state;
/* get internal structure */
if (file == NULL)
return -1;
state = (gz_statep)file;
/* check that we're writing and that there's no error */
if (state->mode != GZ_WRITE || state->err != Z_OK)
return Z_STREAM_ERROR;
/* check flush parameter */
if (flush < 0 || flush > Z_FINISH)
return Z_STREAM_ERROR;
/* check for seek request */
if (state->seek) {
state->seek = 0;
if (gz_zero(state, state->skip) == -1)
return -1;
}
/* compress remaining data with requested flush */
gz_comp(state, flush);
return state->err;
}
/* -- see zlib.h -- */
int ZEXPORT gzsetparams(file, level, strategy)
gzFile file;
int level;
int strategy;
{
gz_statep state;
z_streamp strm;
/* get internal structure */
if (file == NULL)
return Z_STREAM_ERROR;
state = (gz_statep)file;
strm = &(state->strm);
/* check that we're writing and that there's no error */
if (state->mode != GZ_WRITE || state->err != Z_OK)
return Z_STREAM_ERROR;
/* if no change is requested, then do nothing */
if (level == state->level && strategy == state->strategy)
return Z_OK;
/* check for seek request */
if (state->seek) {
state->seek = 0;
if (gz_zero(state, state->skip) == -1)
return -1;
}
/* change compression parameters for subsequent input */
if (state->size) {
/* flush previous input with previous parameters before changing */
if (strm->avail_in && gz_comp(state, Z_PARTIAL_FLUSH) == -1)
return state->err;
deflateParams(strm, level, strategy);
}
state->level = level;
state->strategy = strategy;
return Z_OK;
}
/* -- see zlib.h -- */
int ZEXPORT gzclose_w(file)
gzFile file;
{
int ret = 0;
gz_statep state;
/* get internal structure */
if (file == NULL)
return Z_STREAM_ERROR;
state = (gz_statep)file;
/* check that we're writing */
if (state->mode != GZ_WRITE)
return Z_STREAM_ERROR;
/* check for seek request */
if (state->seek) {
state->seek = 0;
ret += gz_zero(state, state->skip);
}
/* flush, free memory, and close file */
ret += gz_comp(state, Z_FINISH);
(void)deflateEnd(&(state->strm));
free(state->out);
free(state->in);
gz_error(state, Z_OK, NULL);
free(state->path);
ret += close(state->fd);
free(state);
return ret ? Z_ERRNO : Z_OK;
}

View File

@ -1,5 +1,5 @@
/* infback.c -- inflate using a call-back interface
* Copyright (C) 1995-2005 Mark Adler
* Copyright (C) 1995-2009 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -55,7 +55,7 @@ int stream_size;
state->wbits = windowBits;
state->wsize = 1U << windowBits;
state->window = window;
state->write = 0;
state->wnext = 0;
state->whave = 0;
return Z_OK;
}
@ -253,7 +253,7 @@ void FAR *out_desc;
unsigned bits; /* bits in bit buffer */
unsigned copy; /* number of stored or match bytes to copy */
unsigned char FAR *from; /* where to copy match bytes from */
code this; /* current decoding table entry */
code here; /* current decoding table entry */
code last; /* parent table entry */
unsigned len; /* length to copy for repeats, bits to drop */
int ret; /* return code */
@ -389,19 +389,19 @@ void FAR *out_desc;
state->have = 0;
while (state->have < state->nlen + state->ndist) {
for (;;) {
this = state->lencode[BITS(state->lenbits)];
if ((unsigned)(this.bits) <= bits) break;
here = state->lencode[BITS(state->lenbits)];
if ((unsigned)(here.bits) <= bits) break;
PULLBYTE();
}
if (this.val < 16) {
NEEDBITS(this.bits);
DROPBITS(this.bits);
state->lens[state->have++] = this.val;
if (here.val < 16) {
NEEDBITS(here.bits);
DROPBITS(here.bits);
state->lens[state->have++] = here.val;
}
else {
if (this.val == 16) {
NEEDBITS(this.bits + 2);
DROPBITS(this.bits);
if (here.val == 16) {
NEEDBITS(here.bits + 2);
DROPBITS(here.bits);
if (state->have == 0) {
strm->msg = (char *)"invalid bit length repeat";
state->mode = BAD;
@ -411,16 +411,16 @@ void FAR *out_desc;
copy = 3 + BITS(2);
DROPBITS(2);
}
else if (this.val == 17) {
NEEDBITS(this.bits + 3);
DROPBITS(this.bits);
else if (here.val == 17) {
NEEDBITS(here.bits + 3);
DROPBITS(here.bits);
len = 0;
copy = 3 + BITS(3);
DROPBITS(3);
}
else {
NEEDBITS(this.bits + 7);
DROPBITS(this.bits);
NEEDBITS(here.bits + 7);
DROPBITS(here.bits);
len = 0;
copy = 11 + BITS(7);
DROPBITS(7);
@ -438,7 +438,16 @@ void FAR *out_desc;
/* handle error breaks in while */
if (state->mode == BAD) break;
/* build code tables */
/* check for end-of-block code (better have one) */
if (state->lens[256] == 0) {
strm->msg = (char *)"invalid code -- missing end-of-block";
state->mode = BAD;
break;
}
/* build code tables -- note: do not change the lenbits or distbits
values here (9 and 6) without reading the comments in inftrees.h
concerning the ENOUGH constants, which depend on those values */
state->next = state->codes;
state->lencode = (code const FAR *)(state->next);
state->lenbits = 9;
@ -474,28 +483,28 @@ void FAR *out_desc;
/* get a literal, length, or end-of-block code */
for (;;) {
this = state->lencode[BITS(state->lenbits)];
if ((unsigned)(this.bits) <= bits) break;
here = state->lencode[BITS(state->lenbits)];
if ((unsigned)(here.bits) <= bits) break;
PULLBYTE();
}
if (this.op && (this.op & 0xf0) == 0) {
last = this;
if (here.op && (here.op & 0xf0) == 0) {
last = here;
for (;;) {
this = state->lencode[last.val +
here = state->lencode[last.val +
(BITS(last.bits + last.op) >> last.bits)];
if ((unsigned)(last.bits + this.bits) <= bits) break;
if ((unsigned)(last.bits + here.bits) <= bits) break;
PULLBYTE();
}
DROPBITS(last.bits);
}
DROPBITS(this.bits);
state->length = (unsigned)this.val;
DROPBITS(here.bits);
state->length = (unsigned)here.val;
/* process literal */
if (this.op == 0) {
Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
if (here.op == 0) {
Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
"inflate: literal '%c'\n" :
"inflate: literal 0x%02x\n", this.val));
"inflate: literal 0x%02x\n", here.val));
ROOM();
*put++ = (unsigned char)(state->length);
left--;
@ -504,21 +513,21 @@ void FAR *out_desc;
}
/* process end of block */
if (this.op & 32) {
if (here.op & 32) {
Tracevv((stderr, "inflate: end of block\n"));
state->mode = TYPE;
break;
}
/* invalid code */
if (this.op & 64) {
if (here.op & 64) {
strm->msg = (char *)"invalid literal/length code";
state->mode = BAD;
break;
}
/* length code -- get extra bits, if any */
state->extra = (unsigned)(this.op) & 15;
state->extra = (unsigned)(here.op) & 15;
if (state->extra != 0) {
NEEDBITS(state->extra);
state->length += BITS(state->extra);
@ -528,30 +537,30 @@ void FAR *out_desc;
/* get distance code */
for (;;) {
this = state->distcode[BITS(state->distbits)];
if ((unsigned)(this.bits) <= bits) break;
here = state->distcode[BITS(state->distbits)];
if ((unsigned)(here.bits) <= bits) break;
PULLBYTE();
}
if ((this.op & 0xf0) == 0) {
last = this;
if ((here.op & 0xf0) == 0) {
last = here;
for (;;) {
this = state->distcode[last.val +
here = state->distcode[last.val +
(BITS(last.bits + last.op) >> last.bits)];
if ((unsigned)(last.bits + this.bits) <= bits) break;
if ((unsigned)(last.bits + here.bits) <= bits) break;
PULLBYTE();
}
DROPBITS(last.bits);
}
DROPBITS(this.bits);
if (this.op & 64) {
DROPBITS(here.bits);
if (here.op & 64) {
strm->msg = (char *)"invalid distance code";
state->mode = BAD;
break;
}
state->offset = (unsigned)this.val;
state->offset = (unsigned)here.val;
/* get distance extra bits, if any */
state->extra = (unsigned)(this.op) & 15;
state->extra = (unsigned)(here.op) & 15;
if (state->extra != 0) {
NEEDBITS(state->extra);
state->offset += BITS(state->extra);

View File

@ -1,5 +1,5 @@
/* inffast.c -- fast decoding
* Copyright (C) 1995-2004 Mark Adler
* Copyright (C) 1995-2008 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -79,7 +79,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
#endif
unsigned wsize; /* window size or zero if not using window */
unsigned whave; /* valid bytes in the window */
unsigned write; /* window write index */
unsigned wnext; /* window write index */
unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
unsigned long hold; /* local strm->hold */
unsigned bits; /* local strm->bits */
@ -87,7 +87,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
code const FAR *dcode; /* local strm->distcode */
unsigned lmask; /* mask for first level of length codes */
unsigned dmask; /* mask for first level of distance codes */
code this; /* retrieved table entry */
code here; /* retrieved table entry */
unsigned op; /* code bits, operation, extra bits, or */
/* window position, window bytes to copy */
unsigned len; /* match length, unused bytes */
@ -106,7 +106,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
#endif
wsize = state->wsize;
whave = state->whave;
write = state->write;
wnext = state->wnext;
window = state->window;
hold = state->hold;
bits = state->bits;
@ -124,20 +124,20 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
hold += (unsigned long)(PUP(in)) << bits;
bits += 8;
}
this = lcode[hold & lmask];
here = lcode[hold & lmask];
dolen:
op = (unsigned)(this.bits);
op = (unsigned)(here.bits);
hold >>= op;
bits -= op;
op = (unsigned)(this.op);
op = (unsigned)(here.op);
if (op == 0) { /* literal */
Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
"inflate: literal '%c'\n" :
"inflate: literal 0x%02x\n", this.val));
PUP(out) = (unsigned char)(this.val);
"inflate: literal 0x%02x\n", here.val));
PUP(out) = (unsigned char)(here.val);
}
else if (op & 16) { /* length base */
len = (unsigned)(this.val);
len = (unsigned)(here.val);
op &= 15; /* number of extra bits */
if (op) {
if (bits < op) {
@ -155,14 +155,14 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
hold += (unsigned long)(PUP(in)) << bits;
bits += 8;
}
this = dcode[hold & dmask];
here = dcode[hold & dmask];
dodist:
op = (unsigned)(this.bits);
op = (unsigned)(here.bits);
hold >>= op;
bits -= op;
op = (unsigned)(this.op);
op = (unsigned)(here.op);
if (op & 16) { /* distance base */
dist = (unsigned)(this.val);
dist = (unsigned)(here.val);
op &= 15; /* number of extra bits */
if (bits < op) {
hold += (unsigned long)(PUP(in)) << bits;
@ -187,12 +187,34 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
if (dist > op) { /* see if copy from window */
op = dist - op; /* distance back in window */
if (op > whave) {
strm->msg = (char *)"invalid distance too far back";
state->mode = BAD;
break;
if (state->sane) {
strm->msg =
(char *)"invalid distance too far back";
state->mode = BAD;
break;
}
#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
if (len <= op - whave) {
do {
PUP(out) = 0;
} while (--len);
continue;
}
len -= op - whave;
do {
PUP(out) = 0;
} while (--op > whave);
if (op == 0) {
from = out - dist;
do {
PUP(out) = PUP(from);
} while (--len);
continue;
}
#endif
}
from = window - OFF;
if (write == 0) { /* very common case */
if (wnext == 0) { /* very common case */
from += wsize - op;
if (op < len) { /* some from window */
len -= op;
@ -202,17 +224,17 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
from = out - dist; /* rest from output */
}
}
else if (write < op) { /* wrap around window */
from += wsize + write - op;
op -= write;
else if (wnext < op) { /* wrap around window */
from += wsize + wnext - op;
op -= wnext;
if (op < len) { /* some from end of window */
len -= op;
do {
PUP(out) = PUP(from);
} while (--op);
from = window - OFF;
if (write < len) { /* some from start of window */
op = write;
if (wnext < len) { /* some from start of window */
op = wnext;
len -= op;
do {
PUP(out) = PUP(from);
@ -222,7 +244,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
}
}
else { /* contiguous in window */
from += write - op;
from += wnext - op;
if (op < len) { /* some from window */
len -= op;
do {
@ -259,7 +281,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
}
}
else if ((op & 64) == 0) { /* 2nd level distance code */
this = dcode[this.val + (hold & ((1U << op) - 1))];
here = dcode[here.val + (hold & ((1U << op) - 1))];
goto dodist;
}
else {
@ -269,7 +291,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
}
}
else if ((op & 64) == 0) { /* 2nd level length code */
this = lcode[this.val + (hold & ((1U << op) - 1))];
here = lcode[here.val + (hold & ((1U << op) - 1))];
goto dolen;
}
else if (op & 32) { /* end-of-block */
@ -305,7 +327,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
- Using bit fields for code structure
- Different op definition to avoid & for extra bits (do & for table bits)
- Three separate decoding do-loops for direct, window, and write == 0
- Three separate decoding do-loops for direct, window, and wnext == 0
- Special case for distance > 1 copies to do overlapped load and store copy
- Explicit branch predictions (based on measured branch probabilities)
- Deferring match copy and interspersed it with decoding subsequent codes

282
inflate.c
View File

@ -1,5 +1,5 @@
/* inflate.c -- zlib decompression
* Copyright (C) 1995-2005 Mark Adler
* Copyright (C) 1995-2010 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -45,7 +45,7 @@
* - Rearrange window copies in inflate_fast() for speed and simplification
* - Unroll last copy for window match in inflate_fast()
* - Use local copies of window variables in inflate_fast() for speed
* - Pull out common write == 0 case for speed in inflate_fast()
* - Pull out common wnext == 0 case for speed in inflate_fast()
* - Make op and len in inflate_fast() unsigned for consistency
* - Add FAR to lcode and dcode declarations in inflate_fast()
* - Simplified bad distance check in inflate_fast()
@ -117,28 +117,52 @@ z_streamp strm;
state->head = Z_NULL;
state->wsize = 0;
state->whave = 0;
state->write = 0;
state->wnext = 0;
state->hold = 0;
state->bits = 0;
state->lencode = state->distcode = state->next = state->codes;
state->sane = 1;
state->back = -1;
Tracev((stderr, "inflate: reset\n"));
return Z_OK;
}
int ZEXPORT inflatePrime(strm, bits, value)
int ZEXPORT inflateReset2(strm, windowBits)
z_streamp strm;
int bits;
int value;
int windowBits;
{
int wrap;
struct inflate_state FAR *state;
/* get the state */
if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
state = (struct inflate_state FAR *)strm->state;
if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
value &= (1L << bits) - 1;
state->hold += value << state->bits;
state->bits += bits;
return Z_OK;
/* extract wrap request from windowBits parameter */
if (windowBits < 0) {
wrap = 0;
windowBits = -windowBits;
}
else {
wrap = (windowBits >> 4) + 1;
#ifdef GUNZIP
if (windowBits < 48)
windowBits &= 15;
#endif
}
/* set number of window bits, free window if different */
if (windowBits && (windowBits < 8 || windowBits > 15))
return Z_STREAM_ERROR;
if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
ZFREE(strm, state->window);
state->window = Z_NULL;
}
/* update state and reset the rest of it */
state->wrap = wrap;
state->wbits = (unsigned)windowBits;
return inflateReset(strm);
}
int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
@ -147,6 +171,7 @@ int windowBits;
const char *version;
int stream_size;
{
int ret;
struct inflate_state FAR *state;
if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
@ -164,24 +189,13 @@ int stream_size;
if (state == Z_NULL) return Z_MEM_ERROR;
Tracev((stderr, "inflate: allocated\n"));
strm->state = (struct internal_state FAR *)state;
if (windowBits < 0) {
state->wrap = 0;
windowBits = -windowBits;
}
else {
state->wrap = (windowBits >> 4) + 1;
#ifdef GUNZIP
if (windowBits < 48) windowBits &= 15;
#endif
}
if (windowBits < 8 || windowBits > 15) {
state->window = Z_NULL;
ret = inflateReset2(strm, windowBits);
if (ret != Z_OK) {
ZFREE(strm, state);
strm->state = Z_NULL;
return Z_STREAM_ERROR;
}
state->wbits = (unsigned)windowBits;
state->window = Z_NULL;
return inflateReset(strm);
return ret;
}
int ZEXPORT inflateInit_(strm, version, stream_size)
@ -192,6 +206,27 @@ int stream_size;
return inflateInit2_(strm, DEF_WBITS, version, stream_size);
}
int ZEXPORT inflatePrime(strm, bits, value)
z_streamp strm;
int bits;
int value;
{
struct inflate_state FAR *state;
if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
state = (struct inflate_state FAR *)strm->state;
if (bits < 0) {
state->hold = 0;
state->bits = 0;
return Z_OK;
}
if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
value &= (1L << bits) - 1;
state->hold += value << state->bits;
state->bits += bits;
return Z_OK;
}
/*
Return state with length and distance decoding tables and index sizes set to
fixed code decoding. Normally this returns fixed tables from inffixed.h.
@ -340,7 +375,7 @@ unsigned out;
/* if window not in use yet, initialize */
if (state->wsize == 0) {
state->wsize = 1U << state->wbits;
state->write = 0;
state->wnext = 0;
state->whave = 0;
}
@ -348,22 +383,22 @@ unsigned out;
copy = out - strm->avail_out;
if (copy >= state->wsize) {
zmemcpy(state->window, strm->next_out - state->wsize, state->wsize);
state->write = 0;
state->wnext = 0;
state->whave = state->wsize;
}
else {
dist = state->wsize - state->write;
dist = state->wsize - state->wnext;
if (dist > copy) dist = copy;
zmemcpy(state->window + state->write, strm->next_out - copy, dist);
zmemcpy(state->window + state->wnext, strm->next_out - copy, dist);
copy -= dist;
if (copy) {
zmemcpy(state->window, strm->next_out - copy, copy);
state->write = copy;
state->wnext = copy;
state->whave = state->wsize;
}
else {
state->write += dist;
if (state->write == state->wsize) state->write = 0;
state->wnext += dist;
if (state->wnext == state->wsize) state->wnext = 0;
if (state->whave < state->wsize) state->whave += dist;
}
}
@ -564,7 +599,7 @@ int flush;
unsigned in, out; /* save starting available input and output */
unsigned copy; /* number of stored or match bytes to copy */
unsigned char FAR *from; /* where to copy match bytes from */
code this; /* current decoding table entry */
code here; /* current decoding table entry */
code last; /* parent table entry */
unsigned len; /* length to copy for repeats, bits to drop */
int ret; /* return code */
@ -619,7 +654,9 @@ int flush;
}
DROPBITS(4);
len = BITS(4) + 8;
if (len > state->wbits) {
if (state->wbits == 0)
state->wbits = len;
else if (len > state->wbits) {
strm->msg = (char *)"invalid window size";
state->mode = BAD;
break;
@ -771,7 +808,7 @@ int flush;
strm->adler = state->check = adler32(0L, Z_NULL, 0);
state->mode = TYPE;
case TYPE:
if (flush == Z_BLOCK) goto inf_leave;
if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
case TYPEDO:
if (state->last) {
BYTEBITS();
@ -791,7 +828,11 @@ int flush;
fixedtables(state);
Tracev((stderr, "inflate: fixed codes block%s\n",
state->last ? " (last)" : ""));
state->mode = LEN; /* decode codes */
state->mode = LEN_; /* decode codes */
if (flush == Z_TREES) {
DROPBITS(2);
goto inf_leave;
}
break;
case 2: /* dynamic block */
Tracev((stderr, "inflate: dynamic codes block%s\n",
@ -816,6 +857,9 @@ int flush;
Tracev((stderr, "inflate: stored length %u\n",
state->length));
INITBITS();
state->mode = COPY_;
if (flush == Z_TREES) goto inf_leave;
case COPY_:
state->mode = COPY;
case COPY:
copy = state->length;
@ -876,19 +920,19 @@ int flush;
case CODELENS:
while (state->have < state->nlen + state->ndist) {
for (;;) {
this = state->lencode[BITS(state->lenbits)];
if ((unsigned)(this.bits) <= bits) break;
here = state->lencode[BITS(state->lenbits)];
if ((unsigned)(here.bits) <= bits) break;
PULLBYTE();
}
if (this.val < 16) {
NEEDBITS(this.bits);
DROPBITS(this.bits);
state->lens[state->have++] = this.val;
if (here.val < 16) {
NEEDBITS(here.bits);
DROPBITS(here.bits);
state->lens[state->have++] = here.val;
}
else {
if (this.val == 16) {
NEEDBITS(this.bits + 2);
DROPBITS(this.bits);
if (here.val == 16) {
NEEDBITS(here.bits + 2);
DROPBITS(here.bits);
if (state->have == 0) {
strm->msg = (char *)"invalid bit length repeat";
state->mode = BAD;
@ -898,16 +942,16 @@ int flush;
copy = 3 + BITS(2);
DROPBITS(2);
}
else if (this.val == 17) {
NEEDBITS(this.bits + 3);
DROPBITS(this.bits);
else if (here.val == 17) {
NEEDBITS(here.bits + 3);
DROPBITS(here.bits);
len = 0;
copy = 3 + BITS(3);
DROPBITS(3);
}
else {
NEEDBITS(this.bits + 7);
DROPBITS(this.bits);
NEEDBITS(here.bits + 7);
DROPBITS(here.bits);
len = 0;
copy = 11 + BITS(7);
DROPBITS(7);
@ -925,7 +969,16 @@ int flush;
/* handle error breaks in while */
if (state->mode == BAD) break;
/* build code tables */
/* check for end-of-block code (better have one) */
if (state->lens[256] == 0) {
strm->msg = (char *)"invalid code -- missing end-of-block";
state->mode = BAD;
break;
}
/* build code tables -- note: do not change the lenbits or distbits
values here (9 and 6) without reading the comments in inftrees.h
concerning the ENOUGH constants, which depend on those values */
state->next = state->codes;
state->lencode = (code const FAR *)(state->next);
state->lenbits = 9;
@ -946,88 +999,102 @@ int flush;
break;
}
Tracev((stderr, "inflate: codes ok\n"));
state->mode = LEN_;
if (flush == Z_TREES) goto inf_leave;
case LEN_:
state->mode = LEN;
case LEN:
if (have >= 6 && left >= 258) {
RESTORE();
inflate_fast(strm, out);
LOAD();
if (state->mode == TYPE)
state->back = -1;
break;
}
state->back = 0;
for (;;) {
this = state->lencode[BITS(state->lenbits)];
if ((unsigned)(this.bits) <= bits) break;
here = state->lencode[BITS(state->lenbits)];
if ((unsigned)(here.bits) <= bits) break;
PULLBYTE();
}
if (this.op && (this.op & 0xf0) == 0) {
last = this;
if (here.op && (here.op & 0xf0) == 0) {
last = here;
for (;;) {
this = state->lencode[last.val +
here = state->lencode[last.val +
(BITS(last.bits + last.op) >> last.bits)];
if ((unsigned)(last.bits + this.bits) <= bits) break;
if ((unsigned)(last.bits + here.bits) <= bits) break;
PULLBYTE();
}
DROPBITS(last.bits);
state->back += last.bits;
}
DROPBITS(this.bits);
state->length = (unsigned)this.val;
if ((int)(this.op) == 0) {
Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
DROPBITS(here.bits);
state->back += here.bits;
state->length = (unsigned)here.val;
if ((int)(here.op) == 0) {
Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
"inflate: literal '%c'\n" :
"inflate: literal 0x%02x\n", this.val));
"inflate: literal 0x%02x\n", here.val));
state->mode = LIT;
break;
}
if (this.op & 32) {
if (here.op & 32) {
Tracevv((stderr, "inflate: end of block\n"));
state->back = -1;
state->mode = TYPE;
break;
}
if (this.op & 64) {
if (here.op & 64) {
strm->msg = (char *)"invalid literal/length code";
state->mode = BAD;
break;
}
state->extra = (unsigned)(this.op) & 15;
state->extra = (unsigned)(here.op) & 15;
state->mode = LENEXT;
case LENEXT:
if (state->extra) {
NEEDBITS(state->extra);
state->length += BITS(state->extra);
DROPBITS(state->extra);
state->back += state->extra;
}
Tracevv((stderr, "inflate: length %u\n", state->length));
state->was = state->length;
state->mode = DIST;
case DIST:
for (;;) {
this = state->distcode[BITS(state->distbits)];
if ((unsigned)(this.bits) <= bits) break;
here = state->distcode[BITS(state->distbits)];
if ((unsigned)(here.bits) <= bits) break;
PULLBYTE();
}
if ((this.op & 0xf0) == 0) {
last = this;
if ((here.op & 0xf0) == 0) {
last = here;
for (;;) {
this = state->distcode[last.val +
here = state->distcode[last.val +
(BITS(last.bits + last.op) >> last.bits)];
if ((unsigned)(last.bits + this.bits) <= bits) break;
if ((unsigned)(last.bits + here.bits) <= bits) break;
PULLBYTE();
}
DROPBITS(last.bits);
state->back += last.bits;
}
DROPBITS(this.bits);
if (this.op & 64) {
DROPBITS(here.bits);
state->back += here.bits;
if (here.op & 64) {
strm->msg = (char *)"invalid distance code";
state->mode = BAD;
break;
}
state->offset = (unsigned)this.val;
state->extra = (unsigned)(this.op) & 15;
state->offset = (unsigned)here.val;
state->extra = (unsigned)(here.op) & 15;
state->mode = DISTEXT;
case DISTEXT:
if (state->extra) {
NEEDBITS(state->extra);
state->offset += BITS(state->extra);
DROPBITS(state->extra);
state->back += state->extra;
}
#ifdef INFLATE_STRICT
if (state->offset > state->dmax) {
@ -1036,11 +1103,6 @@ int flush;
break;
}
#endif
if (state->offset > state->whave + out - left) {
strm->msg = (char *)"invalid distance too far back";
state->mode = BAD;
break;
}
Tracevv((stderr, "inflate: distance %u\n", state->offset));
state->mode = MATCH;
case MATCH:
@ -1048,12 +1110,32 @@ int flush;
copy = out - left;
if (state->offset > copy) { /* copy from window */
copy = state->offset - copy;
if (copy > state->write) {
copy -= state->write;
if (copy > state->whave) {
if (state->sane) {
strm->msg = (char *)"invalid distance too far back";
state->mode = BAD;
break;
}
#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
Trace((stderr, "inflate.c too far\n"));
copy -= state->whave;
if (copy > state->length) copy = state->length;
if (copy > left) copy = left;
left -= copy;
state->length -= copy;
do {
*put++ = 0;
} while (--copy);
if (state->length == 0) state->mode = LEN;
break;
#endif
}
if (copy > state->wnext) {
copy -= state->wnext;
from = state->window + (state->wsize - copy);
}
else
from = state->window + (state->write - copy);
from = state->window + (state->wnext - copy);
if (copy > state->length) copy = state->length;
}
else { /* copy from output */
@ -1146,7 +1228,8 @@ int flush;
strm->adler = state->check =
UPDATE(state->check, strm->next_out - out, out);
strm->data_type = state->bits + (state->last ? 64 : 0) +
(state->mode == TYPE ? 128 : 0);
(state->mode == TYPE ? 128 : 0) +
(state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
ret = Z_BUF_ERROR;
return ret;
@ -1366,3 +1449,32 @@ z_streamp source;
dest->state = (struct internal_state FAR *)copy;
return Z_OK;
}
int ZEXPORT inflateUndermine(strm, subvert)
z_streamp strm;
int subvert;
{
struct inflate_state FAR *state;
if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
state = (struct inflate_state FAR *)strm->state;
state->sane = !subvert;
#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
return Z_OK;
#else
state->sane = 1;
return Z_DATA_ERROR;
#endif
}
long ZEXPORT inflateMark(strm)
z_streamp strm;
{
struct inflate_state FAR *state;
if (strm == Z_NULL || strm->state == Z_NULL) return -1L << 16;
state = (struct inflate_state FAR *)strm->state;
return ((long)(state->back) << 16) +
(state->mode == COPY ? state->length :
(state->mode == MATCH ? state->was - state->length : 0));
}

View File

@ -1,5 +1,5 @@
/* inflate.h -- internal inflate state definition
* Copyright (C) 1995-2004 Mark Adler
* Copyright (C) 1995-2009 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -32,11 +32,13 @@ typedef enum {
TYPE, /* i: waiting for type bits, including last-flag bit */
TYPEDO, /* i: same, but skip check to exit inflate on new block */
STORED, /* i: waiting for stored size (length and complement) */
COPY_, /* i/o: same as COPY below, but only first time in */
COPY, /* i/o: waiting for input or output to copy stored block */
TABLE, /* i: waiting for dynamic block table lengths */
LENLENS, /* i: waiting for code length code lengths */
CODELENS, /* i: waiting for length/lit and distance code lengths */
LEN, /* i: waiting for length/lit code */
LEN_, /* i: same as LEN below, but only first time in */
LEN, /* i: waiting for length/lit/eob code */
LENEXT, /* i: waiting for length extra bits */
DIST, /* i: waiting for distance code */
DISTEXT, /* i: waiting for distance extra bits */
@ -53,19 +55,21 @@ typedef enum {
/*
State transitions between above modes -
(most modes can go to the BAD or MEM mode -- not shown for clarity)
(most modes can go to BAD or MEM on error -- not shown for clarity)
Process header:
HEAD -> (gzip) or (zlib)
(gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME
NAME -> COMMENT -> HCRC -> TYPE
HEAD -> (gzip) or (zlib) or (raw)
(gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT ->
HCRC -> TYPE
(zlib) -> DICTID or TYPE
DICTID -> DICT -> TYPE
(raw) -> TYPEDO
Read deflate blocks:
TYPE -> STORED or TABLE or LEN or CHECK
STORED -> COPY -> TYPE
TABLE -> LENLENS -> CODELENS -> LEN
Read deflate codes:
TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK
STORED -> COPY_ -> COPY -> TYPE
TABLE -> LENLENS -> CODELENS -> LEN_
LEN_ -> LEN
Read deflate codes in fixed or dynamic block:
LEN -> LENEXT or LIT or TYPE
LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
LIT -> LEN
@ -73,7 +77,7 @@ typedef enum {
CHECK -> LENGTH -> DONE
*/
/* state maintained between inflate() calls. Approximately 7K bytes. */
/* state maintained between inflate() calls. Approximately 10K bytes. */
struct inflate_state {
inflate_mode mode; /* current inflate mode */
int last; /* true if processing last block */
@ -88,7 +92,7 @@ struct inflate_state {
unsigned wbits; /* log base 2 of requested window size */
unsigned wsize; /* window size or zero if not using window */
unsigned whave; /* valid bytes in the window */
unsigned write; /* window write index */
unsigned wnext; /* window write index */
unsigned char FAR *window; /* allocated sliding window, if needed */
/* bit accumulator */
unsigned long hold; /* input bit accumulator */
@ -112,4 +116,7 @@ struct inflate_state {
unsigned short lens[320]; /* temporary storage for code lengths */
unsigned short work[288]; /* work area for code table building */
code codes[ENOUGH]; /* space for code tables */
int sane; /* if false, allow invalid distance too far */
int back; /* bits back of last unprocessed length/lit */
unsigned was; /* initial length of match */
};

View File

@ -1,5 +1,5 @@
/* inftrees.c -- generate Huffman trees for efficient decoding
* Copyright (C) 1995-2005 Mark Adler
* Copyright (C) 1995-2010 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -9,7 +9,7 @@
#define MAXBITS 15
const char inflate_copyright[] =
" inflate 1.2.3 Copyright 1995-2005 Mark Adler ";
" inflate 1.2.4 Copyright 1995-2010 Mark Adler ";
/*
If you use the zlib library in a product, an acknowledgment is welcome
in the documentation of your product. If for some reason you cannot
@ -50,7 +50,7 @@ unsigned short FAR *work;
unsigned fill; /* index for replicating entries */
unsigned low; /* low bits for current root entry */
unsigned mask; /* mask for low root bits */
code this; /* table entry for duplication */
code here; /* table entry for duplication */
code FAR *next; /* next available space in table */
const unsigned short FAR *base; /* base value table to use */
const unsigned short FAR *extra; /* extra bits table to use */
@ -62,7 +62,7 @@ unsigned short FAR *work;
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
static const unsigned short lext[31] = { /* Length codes 257..285 extra */
16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 64, 195};
static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
@ -115,15 +115,15 @@ unsigned short FAR *work;
if (count[max] != 0) break;
if (root > max) root = max;
if (max == 0) { /* no symbols to code at all */
this.op = (unsigned char)64; /* invalid code marker */
this.bits = (unsigned char)1;
this.val = (unsigned short)0;
*(*table)++ = this; /* make a table to force an error */
*(*table)++ = this;
here.op = (unsigned char)64; /* invalid code marker */
here.bits = (unsigned char)1;
here.val = (unsigned short)0;
*(*table)++ = here; /* make a table to force an error */
*(*table)++ = here;
*bits = 1;
return 0; /* no symbols, but wait for decoding to report error */
}
for (min = 1; min <= MAXBITS; min++)
for (min = 1; min < max; min++)
if (count[min] != 0) break;
if (root < min) root = min;
@ -166,11 +166,10 @@ unsigned short FAR *work;
entered in the tables.
used keeps track of how many table entries have been allocated from the
provided *table space. It is checked when a LENS table is being made
against the space in *table, ENOUGH, minus the maximum space needed by
the worst case distance code, MAXD. This should never happen, but the
sufficiency of ENOUGH has not been proven exhaustively, hence the check.
This assumes that when type == LENS, bits == 9.
provided *table space. It is checked for LENS and DIST tables against
the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
the initial root table size constants. See the comments in inftrees.h
for more information.
sym increments through all symbols, and the loop terminates when
all codes of length max, i.e. all codes, have been processed. This
@ -209,24 +208,25 @@ unsigned short FAR *work;
mask = used - 1; /* mask for comparing low */
/* check available table space */
if (type == LENS && used >= ENOUGH - MAXD)
if ((type == LENS && used >= ENOUGH_LENS) ||
(type == DISTS && used >= ENOUGH_DISTS))
return 1;
/* process all codes and make table entries */
for (;;) {
/* create table entry */
this.bits = (unsigned char)(len - drop);
here.bits = (unsigned char)(len - drop);
if ((int)(work[sym]) < end) {
this.op = (unsigned char)0;
this.val = work[sym];
here.op = (unsigned char)0;
here.val = work[sym];
}
else if ((int)(work[sym]) > end) {
this.op = (unsigned char)(extra[work[sym]]);
this.val = base[work[sym]];
here.op = (unsigned char)(extra[work[sym]]);
here.val = base[work[sym]];
}
else {
this.op = (unsigned char)(32 + 64); /* end of block */
this.val = 0;
here.op = (unsigned char)(32 + 64); /* end of block */
here.val = 0;
}
/* replicate for those indices with low len bits equal to huff */
@ -235,7 +235,7 @@ unsigned short FAR *work;
min = fill; /* save offset to next table */
do {
fill -= incr;
next[(huff >> drop) + fill] = this;
next[(huff >> drop) + fill] = here;
} while (fill != 0);
/* backwards increment the len-bit code huff */
@ -277,7 +277,8 @@ unsigned short FAR *work;
/* check for enough space */
used += 1U << curr;
if (type == LENS && used >= ENOUGH - MAXD)
if ((type == LENS && used >= ENOUGH_LENS) ||
(type == DISTS && used >= ENOUGH_DISTS))
return 1;
/* point entry in root table to sub-table */
@ -295,20 +296,20 @@ unsigned short FAR *work;
through high index bits. When the current sub-table is filled, the loop
drops back to the root table to fill in any remaining entries there.
*/
this.op = (unsigned char)64; /* invalid code marker */
this.bits = (unsigned char)(len - drop);
this.val = (unsigned short)0;
here.op = (unsigned char)64; /* invalid code marker */
here.bits = (unsigned char)(len - drop);
here.val = (unsigned short)0;
while (huff != 0) {
/* when done with sub-table, drop back to root table */
if (drop != 0 && (huff & mask) != low) {
drop = 0;
len = root;
next = *table;
this.bits = (unsigned char)len;
here.bits = (unsigned char)len;
}
/* put invalid code marker in table */
next[huff >> drop] = this;
next[huff >> drop] = here;
/* backwards increment the len-bit code huff */
incr = 1U << (len - 1);

View File

@ -35,15 +35,22 @@ typedef struct {
01000000 - invalid code
*/
/* Maximum size of dynamic tree. The maximum found in a long but non-
exhaustive search was 1444 code structures (852 for length/literals
and 592 for distances, the latter actually the result of an
exhaustive search). The true maximum is not known, but the value
below is more than safe. */
#define ENOUGH 2048
#define MAXD 592
/* Maximum size of the dynamic table. The maximum number of code structures is
1444, which is the sum of 852 for literal/length codes and 592 for distance
codes. These values were found by exhaustive searches using the program
examples/enough.c found in the zlib distribtution. The arguments to that
program are the number of symbols, the initial root table size, and the
maximum bit length of a code. "enough 286 9 15" for literal/length codes
returns returns 852, and "enough 30 6 15" for distance codes returns 592.
The initial root table size (9 or 6) is found in the fifth argument of the
inflate_table() calls in inflate.c and infback.c. If the root table size is
changed, then these maximum sizes would be need to be recalculated and
updated. */
#define ENOUGH_LENS 852
#define ENOUGH_DISTS 592
#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS)
/* Type of code to build for inftable() */
/* Type of code to build for inflate_table() */
typedef enum {
CODES,
LENS,

View File

@ -1,5 +1,5 @@
/* minigzip.c -- simulate gzip using the zlib compression library
* Copyright (C) 1995-2005 Jean-loup Gailly.
* Copyright (C) 1995-2006, 2010 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -13,11 +13,10 @@
* or in pipe mode.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/* @(#) $Id$ */
#include <stdio.h>
#include "zlib.h"
#include <stdio.h>
#ifdef STDC
# include <string.h>
@ -55,6 +54,70 @@ __FBSDID("$FreeBSD$");
extern int unlink OF((const char *));
#endif
#if defined(UNDER_CE) && defined(NO_ERRNO_H)
# include <windows.h>
# define perror(s) pwinerror(s)
/* Map the Windows error number in ERROR to a locale-dependent error
message string and return a pointer to it. Typically, the values
for ERROR come from GetLastError.
The string pointed to shall not be modified by the application,
but may be overwritten by a subsequent call to strwinerror
The strwinerror function does not change the current setting
of GetLastError. */
static char *strwinerror (error)
DWORD error;
{
static char buf[1024];
wchar_t *msgbuf;
DWORD lasterr = GetLastError();
DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM
| FORMAT_MESSAGE_ALLOCATE_BUFFER,
NULL,
error,
0, /* Default language */
(LPVOID)&msgbuf,
0,
NULL);
if (chars != 0) {
/* If there is an \r\n appended, zap it. */
if (chars >= 2
&& msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') {
chars -= 2;
msgbuf[chars] = 0;
}
if (chars > sizeof (buf) - 1) {
chars = sizeof (buf) - 1;
msgbuf[chars] = 0;
}
wcstombs(buf, msgbuf, chars + 1);
LocalFree(msgbuf);
}
else {
sprintf(buf, "unknown win32 error (%ld)", error);
}
SetLastError(lasterr);
return buf;
}
static void pwinerror (s)
const char *s;
{
if (s && *s)
fprintf(stderr, "%s: %s\n", s, strwinerror(GetLastError ()));
else
fprintf(stderr, "%s\n", strwinerror(GetLastError ()));
}
#endif /* UNDER_CE && NO_ERRNO_H */
#ifndef GZ_SUFFIX
# define GZ_SUFFIX ".gz"
#endif
@ -201,9 +264,9 @@ void file_compress(file, mode)
if (strlen(file) + strlen(GZ_SUFFIX) >= sizeof(outfile)) {
fprintf(stderr, "%s: filename too long\n", prog);
exit(1);
exit(1);
}
strcpy(outfile, file);
strcat(outfile, GZ_SUFFIX);
@ -237,7 +300,7 @@ void file_uncompress(file)
if (len + strlen(GZ_SUFFIX) >= sizeof(buf)) {
fprintf(stderr, "%s: filename too long\n", prog);
exit(1);
exit(1);
}
strcpy(buf, file);
@ -304,9 +367,9 @@ int main(argc, argv)
while (argc > 0) {
if (strcmp(*argv, "-c") == 0)
copyout = 1;
copyout = 1;
else if (strcmp(*argv, "-d") == 0)
uncompr = 1;
uncompr = 1;
else if (strcmp(*argv, "-f") == 0)
outmode[3] = 'f';
else if (strcmp(*argv, "-h") == 0)
@ -335,36 +398,36 @@ int main(argc, argv)
gz_compress(stdin, file);
}
} else {
if (copyout) {
SET_BINARY_MODE(stdout);
}
if (copyout) {
SET_BINARY_MODE(stdout);
}
do {
if (uncompr) {
if (copyout) {
file = gzopen(*argv, "rb");
if (file == NULL)
fprintf(stderr, "%s: can't gzopen %s\n", prog, *argv);
else
gz_uncompress(file, stdout);
} else {
file_uncompress(*argv);
}
if (copyout) {
file = gzopen(*argv, "rb");
if (file == NULL)
fprintf(stderr, "%s: can't gzopen %s\n", prog, *argv);
else
gz_uncompress(file, stdout);
} else {
file_uncompress(*argv);
}
} else {
if (copyout) {
FILE * in = fopen(*argv, "rb");
if (copyout) {
FILE * in = fopen(*argv, "rb");
if (in == NULL) {
perror(*argv);
} else {
file = gzdopen(fileno(stdout), outmode);
if (file == NULL) error("can't gzdopen stdout");
if (in == NULL) {
perror(*argv);
} else {
file = gzdopen(fileno(stdout), outmode);
if (file == NULL) error("can't gzdopen stdout");
gz_compress(in, file);
}
} else {
file_compress(*argv, outmode);
}
gz_compress(in, file);
}
} else {
file_compress(*argv, outmode);
}
}
} while (argv++, --argc);
}

82
trees.c
View File

@ -1,5 +1,6 @@
/* trees.c -- output deflated data using Huffman coding
* Copyright (C) 1995-2005 Jean-loup Gailly
* Copyright (C) 1995-2009 Jean-loup Gailly
* detect_data_type() function provided freely by Cosmin Truta, 2006
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -152,7 +153,7 @@ local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
int blcodes));
local void compress_block OF((deflate_state *s, ct_data *ltree,
ct_data *dtree));
local void set_data_type OF((deflate_state *s));
local int detect_data_type OF((deflate_state *s));
local unsigned bi_reverse OF((unsigned value, int length));
local void bi_windup OF((deflate_state *s));
local void bi_flush OF((deflate_state *s));
@ -203,12 +204,12 @@ local void send_bits(s, value, length)
* unused bits in value.
*/
if (s->bi_valid > (int)Buf_size - length) {
s->bi_buf |= (value << s->bi_valid);
s->bi_buf |= (ush)value << s->bi_valid;
put_short(s, s->bi_buf);
s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
s->bi_valid += length - Buf_size;
} else {
s->bi_buf |= value << s->bi_valid;
s->bi_buf |= (ush)value << s->bi_valid;
s->bi_valid += length;
}
}
@ -218,12 +219,12 @@ local void send_bits(s, value, length)
{ int len = length;\
if (s->bi_valid > (int)Buf_size - len) {\
int val = value;\
s->bi_buf |= (val << s->bi_valid);\
s->bi_buf |= (ush)val << s->bi_valid;\
put_short(s, s->bi_buf);\
s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
s->bi_valid += len - Buf_size;\
} else {\
s->bi_buf |= (value) << s->bi_valid;\
s->bi_buf |= (ush)(value) << s->bi_valid;\
s->bi_valid += len;\
}\
}
@ -250,11 +251,13 @@ local void tr_static_init()
if (static_init_done) return;
/* For some embedded targets, global variables are not initialized: */
#ifdef NO_INIT_GLOBAL_POINTERS
static_l_desc.static_tree = static_ltree;
static_l_desc.extra_bits = extra_lbits;
static_d_desc.static_tree = static_dtree;
static_d_desc.extra_bits = extra_dbits;
static_bl_desc.extra_bits = extra_blbits;
#endif
/* Initialize the mapping length (0..255) -> length code (0..28) */
length = 0;
@ -864,13 +867,13 @@ local void send_all_trees(s, lcodes, dcodes, blcodes)
/* ===========================================================================
* Send a stored block
*/
void _tr_stored_block(s, buf, stored_len, eof)
void _tr_stored_block(s, buf, stored_len, last)
deflate_state *s;
charf *buf; /* input block */
ulg stored_len; /* length of input block */
int eof; /* true if this is the last block for a file */
int last; /* one if this is the last block for a file */
{
send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */
#ifdef DEBUG
s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
s->compressed_len += (stored_len + 4) << 3;
@ -918,11 +921,11 @@ void _tr_align(s)
* Determine the best encoding for the current block: dynamic trees, static
* trees or store, and output the encoded block to the zip file.
*/
void _tr_flush_block(s, buf, stored_len, eof)
void _tr_flush_block(s, buf, stored_len, last)
deflate_state *s;
charf *buf; /* input block, or NULL if too old */
ulg stored_len; /* length of input block */
int eof; /* true if this is the last block for a file */
int last; /* one if this is the last block for a file */
{
ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
int max_blindex = 0; /* index of last bit length code of non zero freq */
@ -931,8 +934,8 @@ void _tr_flush_block(s, buf, stored_len, eof)
if (s->level > 0) {
/* Check if the file is binary or text */
if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN)
set_data_type(s);
if (s->strm->data_type == Z_UNKNOWN)
s->strm->data_type = detect_data_type(s);
/* Construct the literal and distance trees */
build_tree(s, (tree_desc *)(&(s->l_desc)));
@ -978,20 +981,20 @@ void _tr_flush_block(s, buf, stored_len, eof)
* successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
* transform a block into a stored block.
*/
_tr_stored_block(s, buf, stored_len, eof);
_tr_stored_block(s, buf, stored_len, last);
#ifdef FORCE_STATIC
} else if (static_lenb >= 0) { /* force static trees */
#else
} else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
#endif
send_bits(s, (STATIC_TREES<<1)+eof, 3);
send_bits(s, (STATIC_TREES<<1)+last, 3);
compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
#ifdef DEBUG
s->compressed_len += 3 + s->static_len;
#endif
} else {
send_bits(s, (DYN_TREES<<1)+eof, 3);
send_bits(s, (DYN_TREES<<1)+last, 3);
send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
max_blindex+1);
compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
@ -1005,14 +1008,14 @@ void _tr_flush_block(s, buf, stored_len, eof)
*/
init_block(s);
if (eof) {
if (last) {
bi_windup(s);
#ifdef DEBUG
s->compressed_len += 7; /* align on byte boundary */
#endif
}
Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
s->compressed_len-7*eof));
s->compressed_len-7*last));
}
/* ===========================================================================
@ -1118,24 +1121,45 @@ local void compress_block(s, ltree, dtree)
}
/* ===========================================================================
* Set the data type to BINARY or TEXT, using a crude approximation:
* set it to Z_TEXT if all symbols are either printable characters (33 to 255)
* or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise.
* Check if the data type is TEXT or BINARY, using the following algorithm:
* - TEXT if the two conditions below are satisfied:
* a) There are no non-portable control characters belonging to the
* "black list" (0..6, 14..25, 28..31).
* b) There is at least one printable character belonging to the
* "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
* - BINARY otherwise.
* - The following partially-portable control characters form a
* "gray list" that is ignored in this detection algorithm:
* (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
* IN assertion: the fields Freq of dyn_ltree are set.
*/
local void set_data_type(s)
local int detect_data_type(s)
deflate_state *s;
{
/* black_mask is the bit mask of black-listed bytes
* set bits 0..6, 14..25, and 28..31
* 0xf3ffc07f = binary 11110011111111111100000001111111
*/
unsigned long black_mask = 0xf3ffc07fUL;
int n;
for (n = 0; n < 9; n++)
/* Check for non-textual ("black-listed") bytes. */
for (n = 0; n <= 31; n++, black_mask >>= 1)
if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
return Z_BINARY;
/* Check for textual ("white-listed") bytes. */
if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
|| s->dyn_ltree[13].Freq != 0)
return Z_TEXT;
for (n = 32; n < LITERALS; n++)
if (s->dyn_ltree[n].Freq != 0)
break;
if (n == 9)
for (n = 14; n < 32; n++)
if (s->dyn_ltree[n].Freq != 0)
break;
s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY;
return Z_TEXT;
/* There are no "black-listed" or "white-listed" bytes:
* this stream either is empty or has tolerated ("gray-listed") bytes only.
*/
return Z_BINARY;
}
/* ===========================================================================

View File

@ -1,5 +1,5 @@
/* uncompr.c -- decompress a memory buffer
* Copyright (C) 1995-2003 Jean-loup Gailly.
* Copyright (C) 1995-2003, 2010 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -16,8 +16,6 @@
been saved previously by the compressor and transmitted to the decompressor
by some mechanism outside the scope of this compression library.)
Upon exit, destLen is the actual size of the compressed buffer.
This function can be used to decompress a whole file at once if the
input file is mmap'ed.
uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
enough memory, Z_BUF_ERROR if there was not enough room in the output

194
zconf.h
View File

@ -1,5 +1,5 @@
/* zconf.h -- configuration of the zlib compression library
* Copyright (C) 1995-2005 Jean-loup Gailly.
* Copyright (C) 1995-2010 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -11,52 +11,124 @@
/*
* If you *really* need a unique prefix for all types and library functions,
* compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
* Even better than compiling with -DZ_PREFIX would be to use configure to set
* this permanently in zconf.h using "./configure --zprefix".
*/
#ifdef Z_PREFIX
# define deflateInit_ z_deflateInit_
# define deflate z_deflate
# define deflateEnd z_deflateEnd
# define inflateInit_ z_inflateInit_
# define inflate z_inflate
# define inflateEnd z_inflateEnd
# define deflateInit2_ z_deflateInit2_
# define deflateSetDictionary z_deflateSetDictionary
# define deflateCopy z_deflateCopy
# define deflateReset z_deflateReset
# define deflateParams z_deflateParams
# define deflateBound z_deflateBound
# define deflatePrime z_deflatePrime
# define inflateInit2_ z_inflateInit2_
# define inflateSetDictionary z_inflateSetDictionary
# define inflateSync z_inflateSync
# define inflateSyncPoint z_inflateSyncPoint
# define inflateCopy z_inflateCopy
# define inflateReset z_inflateReset
# define inflateBack z_inflateBack
# define inflateBackEnd z_inflateBackEnd
#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */
/* all linked symbols */
# define _dist_code z__dist_code
# define _length_code z__length_code
# define _tr_align z__tr_align
# define _tr_flush_block z__tr_flush_block
# define _tr_init z__tr_init
# define _tr_stored_block z__tr_stored_block
# define _tr_tally z__tr_tally
# define adler32 z_adler32
# define adler32_combine z_adler32_combine
# define adler32_combine64 z_adler32_combine64
# define compress z_compress
# define compress2 z_compress2
# define compressBound z_compressBound
# define uncompress z_uncompress
# define adler32 z_adler32
# define crc32 z_crc32
# define crc32_combine z_crc32_combine
# define crc32_combine64 z_crc32_combine64
# define deflate z_deflate
# define deflateBound z_deflateBound
# define deflateCopy z_deflateCopy
# define deflateEnd z_deflateEnd
# define deflateInit2_ z_deflateInit2_
# define deflateInit_ z_deflateInit_
# define deflateParams z_deflateParams
# define deflatePrime z_deflatePrime
# define deflateReset z_deflateReset
# define deflateSetDictionary z_deflateSetDictionary
# define deflateSetHeader z_deflateSetHeader
# define deflateTune z_deflateTune
# define deflate_copyright z_deflate_copyright
# define get_crc_table z_get_crc_table
# define gz_error z_gz_error
# define gz_intmax z_gz_intmax
# define gz_strwinerror z_gz_strwinerror
# define gzbuffer z_gzbuffer
# define gzclearerr z_gzclearerr
# define gzclose z_gzclose
# define gzclose_r z_gzclose_r
# define gzclose_w z_gzclose_w
# define gzdirect z_gzdirect
# define gzdopen z_gzdopen
# define gzeof z_gzeof
# define gzerror z_gzerror
# define gzflush z_gzflush
# define gzgetc z_gzgetc
# define gzgets z_gzgets
# define gzoffset z_gzoffset
# define gzoffset64 z_gzoffset64
# define gzopen z_gzopen
# define gzopen64 z_gzopen64
# define gzprintf z_gzprintf
# define gzputc z_gzputc
# define gzputs z_gzputs
# define gzread z_gzread
# define gzrewind z_gzrewind
# define gzseek z_gzseek
# define gzseek64 z_gzseek64
# define gzsetparams z_gzsetparams
# define gztell z_gztell
# define gztell64 z_gztell64
# define gzungetc z_gzungetc
# define gzwrite z_gzwrite
# define inflate z_inflate
# define inflateBack z_inflateBack
# define inflateBackEnd z_inflateBackEnd
# define inflateBackInit_ z_inflateBackInit_
# define inflateCopy z_inflateCopy
# define inflateEnd z_inflateEnd
# define inflateGetHeader z_inflateGetHeader
# define inflateInit2_ z_inflateInit2_
# define inflateInit_ z_inflateInit_
# define inflateMark z_inflateMark
# define inflatePrime z_inflatePrime
# define inflateReset z_inflateReset
# define inflateReset2 z_inflateReset2
# define inflateSetDictionary z_inflateSetDictionary
# define inflateSync z_inflateSync
# define inflateSyncPoint z_inflateSyncPoint
# define inflateUndermine z_inflateUndermine
# define inflate_copyright z_inflate_copyright
# define inflate_fast z_inflate_fast
# define inflate_table z_inflate_table
# define uncompress z_uncompress
# define zError z_zError
# define zcalloc z_zcalloc
# define zcfree z_zcfree
# define zlibCompileFlags z_zlibCompileFlags
# define zlibVersion z_zlibVersion
# define alloc_func z_alloc_func
# define free_func z_free_func
# define in_func z_in_func
# define out_func z_out_func
/* all zlib typedefs in zlib.h and zconf.h */
# define Byte z_Byte
# define uInt z_uInt
# define uLong z_uLong
# define Bytef z_Bytef
# define alloc_func z_alloc_func
# define charf z_charf
# define free_func z_free_func
# define gzFile z_gzFile
# define gz_header z_gz_header
# define gz_headerp z_gz_headerp
# define in_func z_in_func
# define intf z_intf
# define out_func z_out_func
# define uInt z_uInt
# define uIntf z_uIntf
# define uLong z_uLong
# define uLongf z_uLongf
# define voidpf z_voidpf
# define voidp z_voidp
# define voidpc z_voidpc
# define voidpf z_voidpf
/* all zlib structs in zlib.h and zconf.h */
# define gz_header_s z_gz_header_s
# define internal_state z_internal_state
#endif
#if defined(__MSDOS__) && !defined(MSDOS)
@ -243,6 +315,10 @@
# endif
#endif
#ifdef HAVE_VISIBILITY_PRAGMA
# define ZEXTERN __attribute__((visibility ("default"))) extern
#endif
#ifndef ZEXTERN
# define ZEXTERN extern
#endif
@ -284,14 +360,25 @@ typedef uLong FAR uLongf;
typedef Byte *voidp;
#endif
#if 0 /* HAVE_UNISTD_H -- this line is updated by ./configure */
# include <sys/types.h> /* for off_t */
# include <unistd.h> /* for SEEK_* and off_t */
# ifdef VMS
# include <unixio.h> /* for off_t */
# endif
# define z_off_t off_t
#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */
# define Z_HAVE_UNISTD_H
#endif
#ifdef Z_HAVE_UNISTD_H
# include <sys/types.h> /* for off_t */
# include <unistd.h> /* for SEEK_* and off_t */
# ifdef VMS
# include <unixio.h> /* for off_t */
# endif
# ifndef z_off_t
# define z_off_t off_t
# endif
#endif
#ifdef _LARGEFILE64_SOURCE
# include <sys/types.h>
#endif
#ifndef SEEK_SET
# define SEEK_SET 0 /* Seek from beginning of file. */
# define SEEK_CUR 1 /* Seek from current position. */
@ -316,26 +403,23 @@ typedef uLong FAR uLongf;
#if defined(__MVS__)
# define NO_vsnprintf
# ifdef FAR
# undef FAR
# endif
#endif
/* MVS linker does not support external names larger than 8 bytes */
#if defined(__MVS__)
# pragma map(deflateInit_,"DEIN")
# pragma map(deflateInit2_,"DEIN2")
# pragma map(deflateEnd,"DEEND")
# pragma map(deflateBound,"DEBND")
# pragma map(inflateInit_,"ININ")
# pragma map(inflateInit2_,"ININ2")
# pragma map(inflateEnd,"INEND")
# pragma map(inflateSync,"INSY")
# pragma map(inflateSetDictionary,"INSEDI")
# pragma map(compressBound,"CMBND")
# pragma map(inflate_table,"INTABL")
# pragma map(inflate_fast,"INFA")
# pragma map(inflate_copyright,"INCOPY")
#pragma map(deflateInit_,"DEIN")
#pragma map(deflateInit2_,"DEIN2")
#pragma map(deflateEnd,"DEEND")
#pragma map(deflateBound,"DEBND")
#pragma map(inflateInit_,"ININ")
#pragma map(inflateInit2_,"ININ2")
#pragma map(inflateEnd,"INEND")
#pragma map(inflateSync,"INSY")
#pragma map(inflateSetDictionary,"INSEDI")
#pragma map(compressBound,"CMBND")
#pragma map(inflate_table,"INTABL")
#pragma map(inflate_fast,"INFA")
#pragma map(inflate_copyright,"INCOPY")
#endif
#endif /* ZCONF_H */

66
zlib.3
View File

@ -1,4 +1,4 @@
.TH ZLIB 3 "18 July 2005"
.TH ZLIB 3 "14 March 2010"
.SH NAME
zlib \- compression/decompression library
.SH SYNOPSIS
@ -9,15 +9,15 @@ for full description]
The
.I zlib
library is a general purpose data compression library.
The code is thread safe.
The code is thread safe, assuming that the standard library functions
used are thread safe, such as memory allocation routines.
It provides in-memory compression and decompression functions,
including integrity checks of the uncompressed data.
This version of the library supports only one compression method (deflation)
but other algorithms will be added later
and will have the same stream interface.
but other algorithms may be added later
with the same stream interface.
.LP
Compression can be done in a single step if the buffers are large enough
(for example if an input file is mmap'ed),
or can be done by repeated calls of the compression function.
In the latter case,
the application must provide more input and/or consume the output
@ -30,7 +30,7 @@ with an interface similar to that of stdio.
.LP
The library does not install any signal handler.
The decoder checks the consistency of the compressed data,
so the library should never crash even in case of corrupted input.
so the library should never crash even in the case of corrupted input.
.LP
All functions of the compression library are documented in the file
.IR zlib.h .
@ -38,18 +38,19 @@ The distribution source includes examples of use of the library
in the files
.I example.c
and
.IR minigzip.c .
.IR minigzip.c,
as well as other examples in the
.IR examples/
directory.
.LP
Changes to this version are documented in the file
.I ChangeLog
that accompanies the source,
and are concerned primarily with bug fixes and portability enhancements.
that accompanies the source.
.LP
A Java implementation of
.I zlib
is available in the Java Development Kit 1.1:
is available in Java using the java.util.zip package:
.IP
http://www.javasoft.com/products/JDK/1.1/docs/api/Package-java.util.zip.html
http://java.sun.com/developer/technicalArticles/Programming/compression/
.LP
A Perl interface to
.IR zlib ,
@ -57,7 +58,7 @@ written by Paul Marquess (pmqs@cpan.org),
is available at CPAN (Comprehensive Perl Archive Network) sites,
including:
.IP
http://www.cpan.org/modules/by-module/Compress/
http://search.cpan.org/~pmqs/IO-Compress-Zlib/
.LP
A Python interface to
.IR zlib ,
@ -66,14 +67,11 @@ is available in Python 1.5 and later versions:
.IP
http://www.python.org/doc/lib/module-zlib.html
.LP
A
.I zlib
binding for
.IR tcl (1),
written by Andreas Kupries (a.kupries@westend.com),
is availlable at:
is built into
.IR tcl:
.IP
http://www.westend.com/~kupries/doc/trf/man/man.html
http://wiki.tcl.tk/4610
.LP
An experimental package to read and write files in .zip format,
written on top of
@ -81,40 +79,34 @@ written on top of
by Gilles Vollant (info@winimage.com),
is available at:
.IP
http://www.winimage.com/zLibDll/unzip.html
http://www.winimage.com/zLibDll/minizip.html
and also in the
.I contrib/minizip
directory of the main
.I zlib
web site.
source distribution.
.SH "SEE ALSO"
The
.I zlib
web site can be found at either of these locations:
web site can be found at:
.IP
http://www.zlib.org
.br
http://www.gzip.org/zlib/
http://zlib.net/
.LP
The data format used by the zlib library is described by RFC
(Request for Comments) 1950 to 1952 in the files:
.IP
http://www.ietf.org/rfc/rfc1950.txt (concerning zlib format)
http://www.ietf.org/rfc/rfc1950.txt (for the zlib header and trailer format)
.br
http://www.ietf.org/rfc/rfc1951.txt (concerning deflate format)
http://www.ietf.org/rfc/rfc1951.txt (for the deflate compressed data format)
.br
http://www.ietf.org/rfc/rfc1952.txt (concerning gzip format)
http://www.ietf.org/rfc/rfc1952.txt (for the gzip header and trailer format)
.LP
These documents are also available in other formats from:
.IP
ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html
.LP
Mark Nelson (markn@ieee.org) wrote an article about
Mark Nelson wrote an article about
.I zlib
for the Jan. 1997 issue of Dr. Dobb's Journal;
a copy of the article is available at:
.IP
http://dogma.net/markn/articles/zlibtool/zlibtool.htm
http://marknelson.us/1997/01/01/zlib-engine/
.SH "REPORTING PROBLEMS"
Before reporting a problem,
please check the
@ -127,14 +119,14 @@ Please read the
.I zlib
FAQ at:
.IP
http://www.gzip.org/zlib/zlib_faq.html
http://zlib.net/zlib_faq.html
.LP
before asking for help.
Send questions and/or comments to zlib@gzip.org,
or (for the Windows DLL version) to Gilles Vollant (info@winimage.com).
.SH AUTHORS
Version 1.2.3
Copyright (C) 1995-2005 Jean-loup Gailly (jloup@gzip.org)
Version 1.2.4
Copyright (C) 1995-2010 Jean-loup Gailly (jloup@gzip.org)
and Mark Adler (madler@alumni.caltech.edu).
.LP
This software is provided "as-is,"

1138
zlib.h

File diff suppressed because it is too large Load Diff

View File

@ -34,25 +34,25 @@ uLong ZEXPORT zlibCompileFlags()
uLong flags;
flags = 0;
switch (sizeof(uInt)) {
switch ((int)(sizeof(uInt))) {
case 2: break;
case 4: flags += 1; break;
case 8: flags += 2; break;
default: flags += 3;
}
switch (sizeof(uLong)) {
switch ((int)(sizeof(uLong))) {
case 2: break;
case 4: flags += 1 << 2; break;
case 8: flags += 2 << 2; break;
default: flags += 3 << 2;
}
switch (sizeof(voidpf)) {
switch ((int)(sizeof(voidpf))) {
case 2: break;
case 4: flags += 1 << 4; break;
case 8: flags += 2 << 4; break;
default: flags += 3 << 4;
}
switch (sizeof(z_off_t)) {
switch ((int)(sizeof(z_off_t))) {
case 2: break;
case 4: flags += 1 << 6; break;
case 8: flags += 2 << 6; break;

52
zutil.h
View File

@ -1,5 +1,5 @@
/* zutil.h -- internal interface and configuration of the compression library
* Copyright (C) 1995-2005 Jean-loup Gailly.
* Copyright (C) 1995-2010 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@ -17,26 +17,24 @@
#include "zlib.h"
#ifdef STDC
# ifndef _WIN32_WCE
# if !(defined(_WIN32_WCE) && defined(_MSC_VER))
# include <stddef.h>
# endif
# include <string.h>
# include <stdlib.h>
#endif
#ifdef NO_ERRNO_H
# ifdef _WIN32_WCE
/* The Microsoft C Run-Time Library for Windows CE doesn't have
* errno. We define it as a global variable to simplify porting.
* Its value is always 0 and should not be used. We rename it to
* avoid conflict with other libraries that use the same workaround.
*/
# define errno z_errno
# endif
extern int errno;
#if defined(UNDER_CE) && defined(NO_ERRNO_H)
# define zseterrno(ERR) SetLastError((DWORD)(ERR))
# define zerrno() ((int)GetLastError())
#else
# ifndef _WIN32_WCE
# ifdef NO_ERRNO_H
extern int errno;
# else
# include <errno.h>
# endif
# define zseterrno(ERR) do { errno = (ERR); } while (0)
# define zerrno() errno
#endif
#ifndef local
@ -89,7 +87,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32))
# define OS_CODE 0x00
# if defined(__TURBOC__) || defined(__BORLANDC__)
# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
# if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
/* Allow compilation with ANSI keywords only enabled */
void _Cdecl farfree( void *block );
void *_Cdecl farmalloc( unsigned long nbytes );
@ -118,7 +116,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
#ifdef OS2
# define OS_CODE 0x06
# ifdef M_I86
#include <malloc.h>
# include <malloc.h>
# endif
#endif
@ -151,7 +149,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
# define fdopen(fd,mode) NULL /* No fdopen() */
#endif
#if (defined(_MSC_VER) && (_MSC_VER > 600))
#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX
# if defined(_WIN32_WCE)
# define fdopen(fd,mode) NULL /* No fdopen() */
# ifndef _PTRDIFF_T_DEFINED
@ -161,6 +159,18 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
# else
# define fdopen(fd,type) _fdopen(fd,type)
# endif
#endif
#if defined(__BORLANDC__)
#pragma warn -8004
#pragma warn -8008
#pragma warn -8066
#endif
#ifdef _LARGEFILE64_SOURCE
# define z_off64_t off64_t
#else
# define z_off64_t z_off_t
#endif
/* common defaults */
@ -171,6 +181,12 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
#ifndef F_OPEN
# define F_OPEN(name, mode) fopen((name), (mode))
#endif
#ifdef _LARGEFILE64_SOURCE
# define F_OPEN64(name, mode) fopen64((name), (mode))
#else
# define F_OPEN64(name, mode) fopen((name), (mode))
#endif
/* functions */
@ -197,7 +213,9 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
# ifdef WIN32
/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
# if !defined(vsnprintf) && !defined(NO_vsnprintf)
# define vsnprintf _vsnprintf
# if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 )
# define vsnprintf _vsnprintf
# endif
# endif
# endif
# ifdef __SASC