Virgin import of Christos Zoulas's FILE 5.00.

This commit is contained in:
David E. O'Brien 2009-05-02 06:25:51 +00:00
parent c061027810
commit 1ff822f5fe
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor/file/dist/; revision=191736
47 changed files with 1657 additions and 1231 deletions

156
ChangeLog
View File

@ -1,9 +1,105 @@
2008-12-12 15:50 Christos Zoulas <christos@zoulas.com>
* fix initial offset calculation for non 4K sector files
* add loop limits to avoid DoS attacks by constructing
looping sector references.
2008-12-03 13:05 Christos Zoulas <christos@zoulas.com>
* fix memory botches on cdf file parsing.
* exit with non-zero value for any error, not just for the last
file processed.
2008-11-09 20:42 Charles Longeau <chl@tuxfamily.org>
* Replace all str{cpy,cat} functions with strl{cpy,cat}
* Ensure that strl{cpy,cat} are included in libmagic,
as needed.
2008-11-06 18:18 Christos Zoulas <christos@zoulas.com>
* Handle ID3 format files.
2008-11-06 23:00 Reuben Thomas <rrt@sc3d.org>
* Fix --mime, --mime-type and --mime-encoding under new scheme.
* Rename "ascii" to "text" and add "encoding" test.
* Return a precise ("utf-16le" or "utf-16be") MIME charset for
UTF-16.
* Fix error in comment caused by automatic indentation adding
words!
2008-11-06 10:35 Christos Zoulas <christos@astron.com>
* use memchr instead of strchr because the string
might not be NUL terminated (Scott MacVicar)
2008-11-03 07:31 Reuben Thomas <rrt@sc3d.org>
* Fix a printf with a non-literal format string.
* Fix formatting and punctuation of help for "--apple".
2008-10-30 11:00 Reuben Thomas <rrt@sc3d.org>
* Correct words counts in comments of struct magic.
* Fix handle_annotation to allow both Apple and MIME types to be
printed, and to return correct code if MIME type is
printed (1, not 0) or if there's an error (-1 not 1).
* Fix output of charset for MIME type (precede with semi-colon;
fixes Debian bug #501460).
* Fix potential attacks via conversion specifications in magic
strings.
* Add a FIXME for Debian bug #488562 (magic files should be
read in a defined order, by sorting the names).
2008-10-18 16:45 Christos Zoulas <christos@astron.com>
* Added APPLE file creator/type
2008-10-12 10:20 Christos Zoulas <christos@astron.com>
* Added CDF parsing
2008-10-09 16:40 Christos Zoulas <christos@astron.com>
* filesystem and msdos patches (Joerg Jenderek)
2008-10-09 13:20 Christos Zoulas <christos@astron.com>
* correct --exclude documentation issues: remove troff and fortran
and rename "token" to "tokens". (Randy McMurchy)
2008-10-01 10:30 Christos Zoulas <christos@astron.com>
* Read ~/.magic in addition to the default magic file not instead
of, as documented in the man page.
2008-09-10 21:30 Reuben Thomas <rrt@sc3d.org>
* Comment out graphviz patterns, as they match too many files.
2008-08-30 12:54 Christos Zoulas <christos@astron.com>
* Don't eat trailing \n in magic enties.
* Cast defines to allow compilation using a c++ compiler.
2008-08-25 23:56 Reuben Thomas <rrt@sc3d.org>
* Add text/x-lua MIME type for Lua scripts.
* Escape { in regex in graphviz patterns.
2008-07-26 00:59 Reuben Thomas <rrt@sc3d.org>
* Add MIME types for special files.
@ -55,22 +151,22 @@
2008-05-06 00:13 Robert Byrnes <byrnes@wildpumpkin.net>
* src/Makefile.am:
* src/Makefile.am:
Ensure that getopt_long and [v]asprintf are included in libmagic,
as needed.
Remove unnecessary EXTRA_DIST.
* src/Makefile.in:
* src/Makefile.in:
Rerun automake.
* src/vasprintf.c (dispatch):
* src/vasprintf.c (dispatch):
Fix variable precision bug: be sure to step past '*'.
* src/vasprintf.c (core):
* src/vasprintf.c (core):
Remove unreachable code.
* src/apprentice.c (set_test_type):
* src/apprentice.c (set_test_type):
Add cast to avoid compiler warning.
2008-04-22 23:45 Christos Zoulas <christos@astron.com>
@ -81,12 +177,12 @@
2008-04-04 11:00 Christos Zoulas <christos@astron.com>
* >= <= is not supported, so fix the magic and warn about it.
* >= <= is not supported, so fix the magic and warn about it.
reported by: Thien-Thi Nguyen <ttn@gnuvola.org>
2008-03-27 16:16 Robert Byrnes <byrnes@wildpumpkin.net>
* src/readelf.c (donote):
* src/readelf.c (donote):
ELF core file command name/line bug fixes and enhancements:
Try larger offsets first to avoid false matches
@ -112,7 +208,7 @@
* Clarify UTF-8 BOM message (Reuben Thomas)
* Add HTML comment to token list in names.h
2007-02-04 15:50 Christos Zoulas <christos@astron.com>
* Debian fixes (Reuben Thomas)
@ -152,7 +248,7 @@
2007-10-28 20:48 Christos Zoulas <christos@astron.com>
* float and double magic support (Behan Webster)
* float and double magic support (Behan Webster)
2007-10-28 20:48 Christos Zoulas <christos@astron.com>
@ -199,7 +295,7 @@
be easily parsed:
mimetype [charset=character-set] [encoding=encoding-mime-type]
Remove spurious extra text from some MIME type printouts
Remove spurious extra text from some MIME type printouts
(mostly in is_tar).
Fix one case where -i produced nothing at all (for a 1-byte file,
@ -229,7 +325,7 @@
2007-03-15 10:51 Christos Zoulas <christos@astron.com>
* fix fortran and nroff reversed tests (Dmitry V. Levin)
* fix exclude option (Dmitry V. Levin)
2007-02-08 17:30 Christos Zoulas <christos@astron.com>
@ -248,7 +344,7 @@
* Add exclude flag.
2007-01-18 05:29 Anon Ymous <do@not.spam.me>
* Move the "type" detection code from parse() into its own table
driven routine. This avoids maintaining multiple lists in
file.h.
@ -256,7 +352,7 @@
* Add an optional conditional field (ust before the type field).
This code is wrapped in "#ifdef ENABLE_CONDITIONALS" as it is
likely to go away.
2007-01-16 23:24 Anon Ymous <do@not.spam.me>
* Fix an initialization bug in check_mem().
@ -327,7 +423,7 @@
2006-12-08 16:32 Christos Zoulas <christos@astron.com>
* store and print the line number of the magic
entry for debugging.
entry for debugging.
* if the magic entry did not print anything,
don't treat it as a match
@ -342,7 +438,7 @@
file_softmagic.
2006-11-25 13:35 Christos Zoulas <christos@astron.com>
* Don't store the current offset in the magic
struct, because it needs to be restored and
it was not done properly all the time. Bug
@ -432,7 +528,7 @@
* Look for note sections in non executables.
2005-09-20 13:33 Christos Zoulas <christos@astron.com>
* Don't print SVR4 Style in core files multiple times
(Radek Vokál)
@ -443,9 +539,9 @@
2005-08-18 09:53 Christos Zoulas <christos@astron.com>
* Remove erroreous mention of /etc/magic in the file man page
This is gentoo bug 101639. (Mike Frysinger)
This is gentoo bug 101639. (Mike Frysinger)
* Cross-compile support and detection (Mike Frysinger)
* Cross-compile support and detection (Mike Frysinger)
2005-08-12 10:17 Christos Zoulas <christos@astron.com>
@ -477,20 +573,20 @@
* Avoid NULL pointer dereference in time conversion.
2005-03-06 00:00 Joerg Walter <jwalt@mail.garni.ch>
* Add indirect magic offset support, and search mode.
2005-01-12 00:00 Stepan Kasal <kasal@ucw.cz>
* src/ascmagic.c (file_ascmagic): Fix three bugs about text files:
If a CRLF text file happens to have CR at offset HOWMANY - 1
(currently 0xffff), it should not be counted as CR line
terminator.
If a line has length exactly MAXLINELEN, it should not yet be
treated as a ``very long line'', as MAXLINELEN is ``longest sane
line length''.
With CRLF, the line length was not computed correctly, and even
lines of length MAXLINELEN - 1 were treated as ``very long''.
* src/ascmagic.c (file_ascmagic): Fix three bugs about text files:
If a CRLF text file happens to have CR at offset HOWMANY - 1
(currently 0xffff), it should not be counted as CR line
terminator.
If a line has length exactly MAXLINELEN, it should not yet be
treated as a ``very long line'', as MAXLINELEN is ``longest sane
line length''.
With CRLF, the line length was not computed correctly, and even
lines of length MAXLINELEN - 1 were treated as ``very long''.
2004-12-07 14:15 Christos Zoulas <christos@astron.com>
@ -525,12 +621,12 @@
* Remove 3rd and 4th copyright clause; approved by Ian Darwin.
* Fix small memory leaks; caught by: Tamas Sarlos
* Fix small memory leaks; caught by: Tamas Sarlos
<stamas@csillag.ilab.sztaki.hu>
2004-07-24 16:33 Christos Zoulas <christos@astron.com>
* magic.mime update Danny Milosavljevic <danny.milo@gmx.net>
* magic.mime update Danny Milosavljevic <danny.milo@gmx.net>
* FreeBSD version update Oliver Eikemeier <eikemeier@fillmore-labs.com>

View File

@ -325,6 +325,7 @@
# MP2, M1A
0 beshort&0xFFFE 0xFFFC MPEG ADTS, layer II, v1
!:mime audio/mpeg
# rates
>2 byte&0xF0 0x10 \b, 32 kbps
>2 byte&0xF0 0x20 \b, 48 kbps
@ -399,6 +400,7 @@
# MP3, M2A
0 beshort&0xFFFE 0xFFF2 MPEG ADTS, layer III, v2
!:mime audio/mpeg
# rate
>2 byte&0xF0 0x10 \b, 8 kbps
>2 byte&0xF0 0x20 \b, 16 kbps
@ -790,3 +792,24 @@
0 belong 0x00000001
>4 byte&0x1F 0x07
!:mime video/h264
# Type: Bink Video
# URL: http://wiki.multimedia.cx/index.php?title=3DBink_Container
# From: <hoehle@users.sourceforge.net> 2008-07-18
0 string BIK Bink Video
>3 regex =[a-z] rev.%s
#>4 ulelong x size %d
>20 ulelong x \b, %d
>24 ulelong x \bx%d
>8 ulelong x \b, %d frames
>32 ulelong x at rate %d/
>28 ulelong >1 \b%d
>40 ulelong =0 \b, no audio
>40 ulelong !0 \b, %d audio track
>>40 ulelong !1 \bs
# follow properties of the first audio track only
>>48 uleshort x %dHz
>>51 byte&0x20 0 mono
>>51 byte&0x20 !0 stereo
#>>51 byte&0x10 0 FFT
#>>51 byte&0x10 !0 DCT

View File

@ -286,43 +286,14 @@
# SGI SoundTrack <mpruett@sgi.com>
0 string _SGI_SoundTrack SGI SoundTrack project file
# ID3 version 2 tags <waschk@informatik.uni-rostock.de>
0 string ID3 Audio file with ID3 version 2.
# ??? Normally such a file is an MP3 file, but this will give false positives
!:mime audio/mpeg
>3 ubyte <0xff \b%d
#>4 ubyte <0xff \b%d tag
>2584 string fLaC \b, FLAC encoding
>>2588 byte&0x7f >0 \b, unknown version
>>2588 byte&0x7f 0 \b
# some common bits/sample values
>>>2600 beshort&0x1f0 0x030 \b, 4 bit
>>>2600 beshort&0x1f0 0x050 \b, 6 bit
>>>2600 beshort&0x1f0 0x070 \b, 8 bit
>>>2600 beshort&0x1f0 0x0b0 \b, 12 bit
>>>2600 beshort&0x1f0 0x0f0 \b, 16 bit
>>>2600 beshort&0x1f0 0x170 \b, 24 bit
>>>2600 byte&0xe 0x0 \b, mono
>>>2600 byte&0xe 0x2 \b, stereo
>>>2600 byte&0xe 0x4 \b, 3 channels
>>>2600 byte&0xe 0x6 \b, 4 channels
>>>2600 byte&0xe 0x8 \b, 5 channels
>>>2600 byte&0xe 0xa \b, 6 channels
>>>2600 byte&0xe 0xc \b, 7 channels
>>>2600 byte&0xe 0xe \b, 8 channels
# some common sample rates
>>>2597 belong&0xfffff0 0x0ac440 \b, 44.1 kHz
>>>2597 belong&0xfffff0 0x0bb800 \b, 48 kHz
>>>2597 belong&0xfffff0 0x07d000 \b, 32 kHz
>>>2597 belong&0xfffff0 0x056220 \b, 22.05 kHz
>>>2597 belong&0xfffff0 0x05dc00 \b, 24 kHz
>>>2597 belong&0xfffff0 0x03e800 \b, 16 kHz
>>>2597 belong&0xfffff0 0x02b110 \b, 11.025 kHz
>>>2597 belong&0xfffff0 0x02ee00 \b, 12 kHz
>>>2597 belong&0xfffff0 0x01f400 \b, 8 kHz
>>>2597 belong&0xfffff0 0x177000 \b, 96 kHz
>>>2597 belong&0xfffff0 0x0fa000 \b, 64 kHz
>>>2601 byte&0xf >0 \b, >4G samples
>2584 string !fLaC \b, MP3 encoding
0 string ID3 Audio file with ID3 version 2
>3 byte x \b.%d
>4 byte x \b.%d
>>5 byte &0x80 \b, unsynchronized frames
>>5 byte &0x40 \b, extended header
>>5 byte &0x20 \b, experimental
>>5 byte &0x10 \b, footer present
>(6.I) indirect x \b, contains:
# NSF (NES sound file) magic
0 string NESM\x1a NES Sound File

View File

@ -12,16 +12,18 @@
# (and use as a hack). Let's not use 18, because the Mach-O people
# might add another one or two as time goes by...
#
0 beshort 0xcafe
>2 beshort 0xbabe
0 belong 0xcafebabe
!:mime application/x-java-applet
>>2 belong >30 compiled Java class data,
>>>6 beshort x version %d.
>>>4 beshort x \b%d
>>4 belong 1 Mach-O fat file with 1 architecture
>>4 belong >1
>>>4 belong <20 Mach-O fat file with %ld architectures
>2 beshort 0xd00d JAR compressed with pack200,
>4 belong >30 compiled Java class data,
>>6 beshort x version %d.
>>4 beshort x \b%d
0 belong 0xcafebabe
>4 belong 1 Mach-O fat file with 1 architecture
>4 belong >1
>>4 belong <20 Mach-O fat file with %ld architectures
0 belong 0xcafed00d JAR compressed with pack200,
>>5 byte x version %d.
>>4 byte x \b%d
!:mime application/x-java-pack200

View File

@ -11,6 +11,7 @@
# standard unix compress
0 string \037\235 compress'd data
!:mime application/x-compress
!:apple LZIVZIVU
>2 byte&0x80 >0 block compressed
>2 byte&0x1f x %d bits
@ -76,6 +77,11 @@
!:mime application/x-bzip2
>3 byte >47 \b, block size = %c00k
# lzip
0 string LZIP lzip compressed data
!:mime application/x-lzip
>4 byte x \b, version: %d
# squeeze and crunch
# Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
0 beshort 0x76FF squeezed data,

View File

@ -143,11 +143,13 @@
>>18 leshort 91 picoJava,
>>18 leshort 92 OpenRISC,
>>18 leshort 93 ARC Cores Tangent-A5,
>>18 leshort 0x3426 OpenRISC (obsolete),
>>18 leshort 0x8472 OpenRISC (obsolete),
>>18 leshort 94 Tensilica Xtensa,
>>18 leshort 97 NatSemi 32k,
>>18 leshort 106 Analog Devices Blackfin,
>>18 leshort 113 Altera Nios II,
>>18 leshort 0xae META,
>>18 leshort 0x3426 OpenRISC (obsolete),
>>18 leshort 0x8472 OpenRISC (obsolete),
>>18 leshort 0x9026 Alpha (unofficial),
>>20 lelong 0 invalid version
>>20 lelong 1 version 1

View File

@ -1,10 +1,11 @@
#------------------------------------------------------------------------------
# Epoc 32 : file(1) magic for Epoc Documents [psion/osaris
# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
# Stefan Praszalowicz (hpicollo@worldnet.fr)
#0 lelong 0x10000037 Epoc32
# Useful information for improving this file can be found at:
# http://software.frodo.looijaard.name/psiconv/formats/Index.html
0 lelong 0x10000037
>4 lelong 0x1000006D
>>8 lelong 0x1000007F Word
>>8 lelong 0x10000088 Sheet
>>8 lelong 0x1000007D Sketch
>>8 lelong 0x10000085 TextEd
>>8 lelong 0x1000007F Psion Word
>>8 lelong 0x10000088 Psion Sheet
>>8 lelong 0x1000007D Psion Sketch
>>8 lelong 0x10000085 Psion TextEd

View File

@ -104,9 +104,32 @@
>>>346 string des\ Betriebssystems
>>>>366 string Betriebssystem\ nicht\ vorhanden \b, Microsoft Windows XP MBR (german)
>>>>>0x1B8 ulelong >0 \b, Serial 0x%-.4x
>0x145 string Default:\ F \b, FREE-DOS MBR
#>0x145 string Default:\ F \b, FREE-DOS MBR
#>0x14B string Default:\ F \b, FREE-DOS 1.0 MBR
>0x145 search/7 Default:\ F \b, FREE-DOS MBR
#>>313 string F0\ .\ .\ .
#>>>322 string disk\ 1
#>>>>382 string FAT3
>64 string no\ active\ partition\ found
>>96 string read\ error\ while\ reading\ drive \b, FREE-DOS Beta 0.9 MBR
# Ranish Partition Manager http://www.ranish.com/part/
>387 search/4 \0\ Error!\r
>>378 search/7 Virus!
>>>397 search/4 Booting\
>>>>408 search/4 HD1/\0 \b, Ranish MBR (
>>>>>416 string Writing\ changes... \b2.37
>>>>>>438 ubyte x \b,0x%x dots
>>>>>>440 ubyte >0 \b,virus check
>>>>>>441 ubyte >0 \b,partition %c
#2.38,2.42,2.44
>>>>>416 string !Writing\ changes... \b
>>>>>>418 ubyte 1 \bvirus check,
>>>>>>419 ubyte x \b0x%x seconds
>>>>>>420 ubyte&0x0F >0 \b,partition
>>>>>>>420 ubyte&0x0F <5 \b %x
>>>>>>>420 ubyte&0x0F 0Xf \b ask
>>>>>420 ubyte x \b)
#
>271 string Operating\ system\ loading
>>296 string error\r \b, SYSLINUX MBR (2.10)
# http://www.acronis.de/
@ -124,18 +147,20 @@
>0x40 string SBML
# label with 11 characters of FAT 12 bit filesystem
>>43 string SMART\ BTMGR
>>>430 string SBMK\ Bad!\r
>>>>3 string SBM \b, Smart Boot Manager
>>>>>6 string >\0 \b, version %s
>>>430 string SBMK\ Bad!\r \b, Smart Boot Manager
# OEM-ID not always "SBM"
#>>>>3 strings SBM
>>>>6 string >\0 \b, version %s
>382 string XOSLLOADXCF \b, eXtended Operating System Loader
>6 string LILO \b, LInux i386 boot LOader
>>120 string LILO \b, version 22.3.4 SuSe
>>172 string LILO \b, version 22.5.8 Debian
# updated by Joerg Jenderek
# updated by Joerg Jenderek at Oct 2008
# variables according to grub-0.97/stage1/stage1.S or
# http://www.gnu.org/software/grub/manual/grub.html#Embedded-data
# usual values are marked with comments to get only informations of strange GRUB loaders
>0 ulelong 0x009048EB
>342 search/60 \0Geom\0
#>0 ulelong x %x=0x009048EB , 0x2a9048EB 0
>>0x41 ubyte <2
>>>0x3E ubyte >2 \b; GRand Unified Bootloader
# 0x3 for 0.5.95,0.93,0.94,0.96 0x4 for 1.90
@ -178,15 +203,14 @@
>3 string BCDL
>>498 string BCDL\ \ \ \ BIN \b, Bootable CD Loader (1.50Z)
# mbr partion table entries
# OEM-ID not Microsoft,SYSLINUX,or MTOOLs
# OEM-ID does not contain MicroSoft,NEWLDR,DOS,SYSLINUX,or MTOOLs
>3 string !MS
>>3 string !SYSLINUX
>>>3 string !MTOOL
>>>>3 string !NEWLDR
>>>>>5 string !DOS
# not FAT (32 bit)
>>>>82 string !FAT32
#not IO.SYS
>>>>>472 string !IO\ \ \ \ \ \ SYS
>>>>>>480 string !IO\ \ \ \ \ \ SYS
>>>>>>82 string !FAT32
#not Linux kernel
>>>>>>>514 string !HdrS
#not BeOS
@ -272,6 +296,11 @@
>>>>>>>>>(1.b+11) ubyte 0xb
>>>>>>>>>>(1.b+12) ubyte 0x56
>>>>>>>>>>(1.b+13) ubyte 0xb4 \b, mkdosfs boot message display
>214 string Please\ try\ to\ install\ FreeDOS\ \b, DOS Emulator boot message display
#>>244 string from\ dosemu-freedos-*-bin.tgz\r
#>>>170 string Sorry,\ could\ not\ load\ an\
#>>>>195 string operating\ system.\r\n
#
>103 string This\ is\ not\ a\ bootable\ disk.\
>>132 string Please\ insert\ a\ bootable\
>>>157 string floppy\ and\r\n
@ -374,12 +403,22 @@
>430 string Datentr\204ger\ entfernen\xFF\r\n
>>454 string Medienfehler\xFF\r\n
>>>469 string Neustart:\ Taste\ dr\201cken\r \b, Microsoft Windows XP Bootloader (4.german)
>>>>368 ubyte&0xDF >0
>>>>>368 string x %-.5s
>>>>>>373 ubyte&0xDF >0
>>>>>>>373 string x \b%-.3s
>>>>>376 ubyte&0xDF >0
>>>>>>376 string x \b.%-.3s
>>>>379 string \0
>>>>>368 ubyte&0xDF >0
>>>>>>368 string x %-.5s
>>>>>>>373 ubyte&0xDF >0
>>>>>>>>373 string x \b%-.3s
>>>>>>376 ubyte&0xDF >0
>>>>>>>376 string x \b.%-.3s
# variant
>>>>417 ubyte&0xDF >0
>>>>>417 string x %-.5s
>>>>>>422 ubyte&0xDF >0
>>>>>>>422 string x \b%-.3s
>>>>>425 ubyte&0xDF >0
>>>>>>425 string >\ \b.%-.3s
#
#>3 string NTFS\ \ \ \
>389 string Fehler\ beim\ Lesen\
>>407 string des\ Datentr\204gers
@ -567,12 +606,27 @@
>>>489 string Any\ key\ to\ retry \b, DR-DOS Bootloader
>>471 string Cannot\ load\ DOS\
>>487 string press\ key\ to\ retry \b, Open-DOS Bootloader
#??
>444 string KERNEL\ \ SYS
>>314 string BOOT\ error! \b, FREE-DOS Bootloader
>499 string KERNEL\ \ SYS
>>305 string BOOT\ err!\0 \b, Free-DOS Bootloader
>449 string KERNEL\ \ SYS
>>319 string BOOT\ error! \b, FREE-DOS 0.5 Bootloader
#
>449 string Loading\ FreeDOS
>>0x1AF ulelong >0 \b, FREE-DOS 0.95,1.0 Bootloader
>>>497 ubyte&0xDF >0
>>>>497 string x \b %-.6s
>>>>>503 ubyte&0xDF >0
>>>>>>503 string x \b%-.1s
>>>>>>>504 ubyte&0xDF >0
>>>>>>>>504 string x \b%-.1s
>>>>505 ubyte&0xDF >0
>>>>>505 string x \b.%-.3s
#
>331 string Error!.0 \b, FREE-DOS 1.0 bootloader
#
>125 string Loading\ FreeDOS...\r
>>311 string BOOT\ error!\r \b, FREE-DOS bootloader
>>>441 ubyte&0xDF >0
@ -706,13 +760,7 @@
#it also hangs with another message ("NF").
>>>>>492 string RENF \b, FAT (12 bit)
>>>>>495 string RENF \b, FAT (16 bit)
# added by Joerg Jenderek
# http://syslinux.zytor.com/iso.php
0 ulelong 0x7c40eafa isolinux Loader
# http://syslinux.zytor.com/pxe.php
0 ulelong 0x007c05ea pxelinux Loader
0 ulelong 0x60669c66 pxelinux Loader
# loader end
# x86 bootloader end
# updated by Joerg Jenderek at Sep 2007
>3 ubyte 0
#no active flag
@ -732,6 +780,7 @@
# older drives may use Near JuMP instruction E9 xx xx
>0 lelong&0x009000EB 0x009000EB
>0 lelong&0x000000E9 0x000000E9
# minimal short forward jump found 03cx??
# maximal short forward jump is 07fx
>1 ubyte <0xff \b, code offset 0x%x
# mtools-3.9.8/msdos.h
@ -740,91 +789,92 @@
>>11 uleshort&0x000f x
>>>11 uleshort <32769
>>>>11 uleshort >31
>>>>>3 string >\0 \b, OEM-ID "%8.8s"
>>>>>21 ubyte&0xf0 0xF0
>>>>>>3 string >\0 \b, OEM-ID "%8.8s"
#http://mirror.href.com/thestarman/asm/debug/debug2.htm#IHC
>>>>>>8 string IHC \b cached by Windows 9M
>>>>>11 uleshort >512 \b, Bytes/sector %u
#>>>>>11 uleshort =512 \b, Bytes/sector %u=512 (usual)
>>>>>11 uleshort <512 \b, Bytes/sector %u
>>>>>13 ubyte >1 \b, sectors/cluster %u
#>>>>>13 ubyte =1 \b, sectors/cluster %u (usual on Floppies)
>>>>>14 uleshort >32 \b, reserved sectors %u
#>>>>>14 uleshort =32 \b, reserved sectors %u (usual Fat32)
#>>>>>14 uleshort >1 \b, reserved sectors %u
#>>>>>14 uleshort =1 \b, reserved sectors %u (usual FAT12,FAT16)
>>>>>14 uleshort <1 \b, reserved sectors %u
>>>>>16 ubyte >2 \b, FATs %u
#>>>>>16 ubyte =2 \b, FATs %u (usual)
>>>>>16 ubyte =1 \b, FAT %u
>>>>>16 ubyte >0
>>>>>17 uleshort >0 \b, root entries %u
#>>>>>17 uleshort =0 \b, root entries %u=0 (usual Fat32)
>>>>>19 uleshort >0 \b, sectors %u (volumes <=32 MB)
#>>>>>19 uleshort =0 \b, sectors %u=0 (usual Fat32)
>>>>>21 ubyte >0xF0 \b, Media descriptor 0x%x
#>>>>>21 ubyte =0xF0 \b, Media descriptor 0x%x (usual floppy)
>>>>>21 ubyte <0xF0 \b, Media descriptor 0x%x
>>>>>22 uleshort >0 \b, sectors/FAT %u
#>>>>>22 uleshort =0 \b, sectors/FAT %u=0 (usual Fat32)
>>>>>26 ubyte >2 \b, heads %u
#>>>>>26 ubyte =2 \b, heads %u (usual floppy)
>>>>>26 ubyte =1 \b, heads %u
>>>>>>>8 string IHC \b cached by Windows 9M
>>>>>>11 uleshort >512 \b, Bytes/sector %u
#>>>>>>11 uleshort =512 \b, Bytes/sector %u=512 (usual)
>>>>>>11 uleshort <512 \b, Bytes/sector %u
>>>>>>13 ubyte >1 \b, sectors/cluster %u
#>>>>>>13 ubyte =1 \b, sectors/cluster %u (usual on Floppies)
>>>>>>14 uleshort >32 \b, reserved sectors %u
#>>>>>>14 uleshort =32 \b, reserved sectors %u (usual Fat32)
#>>>>>>14 uleshort >1 \b, reserved sectors %u
#>>>>>>14 uleshort =1 \b, reserved sectors %u (usual FAT12,FAT16)
>>>>>>14 uleshort <1 \b, reserved sectors %u
>>>>>>16 ubyte >2 \b, FATs %u
#>>>>>>16 ubyte =2 \b, FATs %u (usual)
>>>>>>16 ubyte =1 \b, FAT %u
>>>>>>16 ubyte >0
>>>>>>17 uleshort >0 \b, root entries %u
#>>>>>>17 uleshort =0 \b, root entries %u=0 (usual Fat32)
>>>>>>19 uleshort >0 \b, sectors %u (volumes <=32 MB)
#>>>>>>19 uleshort =0 \b, sectors %u=0 (usual Fat32)
>>>>>>21 ubyte >0xF0 \b, Media descriptor 0x%x
#>>>>>>21 ubyte =0xF0 \b, Media descriptor 0x%x (usual floppy)
>>>>>>21 ubyte <0xF0 \b, Media descriptor 0x%x
>>>>>>22 uleshort >0 \b, sectors/FAT %u
#>>>>>>22 uleshort =0 \b, sectors/FAT %u=0 (usual Fat32)
>>>>>>26 ubyte >2 \b, heads %u
#>>>>>>26 ubyte =2 \b, heads %u (usual floppy)
>>>>>>26 ubyte =1 \b, heads %u
#skip for Digital Research DOS (version 3.41) 1440 kB Bootdisk
>>>>>38 ubyte !0x70
>>>>>>28 ulelong >0 \b, hidden sectors %u
#>>>>>>28 ulelong =0 \b, hidden sectors %u (usual floppy)
>>>>>>32 ulelong >0 \b, sectors %u (volumes > 32 MB)
#>>>>>>32 ulelong =0 \b, sectors %u (volumes > 32 MB)
>>>>>>38 ubyte !0x70
>>>>>>>28 ulelong >0 \b, hidden sectors %u
#>>>>>>>28 ulelong =0 \b, hidden sectors %u (usual floppy)
>>>>>>>32 ulelong >0 \b, sectors %u (volumes > 32 MB)
#>>>>>>>32 ulelong =0 \b, sectors %u (volumes > 32 MB)
# FAT<32 specific
>>>>>82 string !FAT32
#>>>>>>36 ubyte 0x80 \b, physical drive 0x%x=0x80 (usual harddisk)
#>>>>>>36 ubyte 0 \b, physical drive 0x%x=0 (usual floppy)
>>>>>>36 ubyte !0x80
>>>>>>>36 ubyte !0 \b, physical drive 0x%x
>>>>>>37 ubyte >0 \b, reserved 0x%x
#>>>>>>37 ubyte =0 \b, reserved 0x%x
>>>>>>38 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
>>>>>>38 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
>>>>>>38 ubyte =0x29
>>>>>>>39 ulelong x \b, serial number 0x%x
>>>>>>>43 string <NO\ NAME \b, label: "%11.11s"
>>>>>>>43 string >NO\ NAME \b, label: "%11.11s"
>>>>>>>43 string =NO\ NAME \b, unlabeled
>>>>>>54 string FAT \b, FAT
>>>>>>>54 string FAT12 \b (12 bit)
>>>>>>>54 string FAT16 \b (16 bit)
>>>>>>82 string !FAT32
#>>>>>>>36 ubyte 0x80 \b, physical drive 0x%x=0x80 (usual harddisk)
#>>>>>>>36 ubyte 0 \b, physical drive 0x%x=0 (usual floppy)
>>>>>>>36 ubyte !0x80
>>>>>>>>36 ubyte !0 \b, physical drive 0x%x
>>>>>>>37 ubyte >0 \b, reserved 0x%x
#>>>>>>>37 ubyte =0 \b, reserved 0x%x
>>>>>>>38 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
>>>>>>>38 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
>>>>>>>38 ubyte =0x29
>>>>>>>>39 ulelong x \b, serial number 0x%x
>>>>>>>>43 string <NO\ NAME \b, label: "%11.11s"
>>>>>>>>43 string >NO\ NAME \b, label: "%11.11s"
>>>>>>>>43 string =NO\ NAME \b, unlabeled
>>>>>>>54 string FAT \b, FAT
>>>>>>>>54 string FAT12 \b (12 bit)
>>>>>>>>54 string FAT16 \b (16 bit)
# FAT32 specific
>>>>>82 string FAT32 \b, FAT (32 bit)
>>>>>>36 ulelong x \b, sectors/FAT %u
>>>>>>40 uleshort >0 \b, extension flags %u
#>>>>>>40 uleshort =0 \b, extension flags %u
>>>>>>42 uleshort >0 \b, fsVersion %u
#>>>>>>42 uleshort =0 \b, fsVersion %u (usual)
>>>>>>44 ulelong >2 \b, rootdir cluster %u
#>>>>>>44 ulelong =2 \b, rootdir cluster %u
#>>>>>>44 ulelong =1 \b, rootdir cluster %u
>>>>>>48 uleshort >1 \b, infoSector %u
#>>>>>>48 uleshort =1 \b, infoSector %u (usual)
>>>>>>48 uleshort <1 \b, infoSector %u
>>>>>>50 uleshort >6 \b, Backup boot sector %u
#>>>>>>50 uleshort =6 \b, Backup boot sector %u (usual)
>>>>>>50 uleshort <6 \b, Backup boot sector %u
>>>>>>54 ulelong >0 \b, reserved1 0x%x
>>>>>>58 ulelong >0 \b, reserved2 0x%x
>>>>>>62 ulelong >0 \b, reserved3 0x%x
>>>>>>82 string FAT32 \b, FAT (32 bit)
>>>>>>>36 ulelong x \b, sectors/FAT %u
>>>>>>>40 uleshort >0 \b, extension flags %u
#>>>>>>>40 uleshort =0 \b, extension flags %u
>>>>>>>42 uleshort >0 \b, fsVersion %u
#>>>>>>>42 uleshort =0 \b, fsVersion %u (usual)
>>>>>>>44 ulelong >2 \b, rootdir cluster %u
#>>>>>>>44 ulelong =2 \b, rootdir cluster %u
#>>>>>>>44 ulelong =1 \b, rootdir cluster %u
>>>>>>>48 uleshort >1 \b, infoSector %u
#>>>>>>>48 uleshort =1 \b, infoSector %u (usual)
>>>>>>>48 uleshort <1 \b, infoSector %u
>>>>>>>50 uleshort >6 \b, Backup boot sector %u
#>>>>>>>50 uleshort =6 \b, Backup boot sector %u (usual)
>>>>>>>50 uleshort <6 \b, Backup boot sector %u
>>>>>>>54 ulelong >0 \b, reserved1 0x%x
>>>>>>>58 ulelong >0 \b, reserved2 0x%x
>>>>>>>62 ulelong >0 \b, reserved3 0x%x
# same structure as FAT1X
>>>>>>64 ubyte >0x80 \b, physical drive 0x%x
#>>>>>>64 ubyte =0x80 \b, physical drive 0x%x=80 (usual harddisk)
>>>>>>64 ubyte&0x7F >0 \b, physical drive 0x%x
#>>>>>>64 ubyte =0 \b, physical drive 0x%x=0 (usual floppy)
>>>>>>65 ubyte >0 \b, reserved 0x%x
>>>>>>66 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
>>>>>>66 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
>>>>>>66 ubyte =0x29
>>>>>>>67 ulelong x \b, serial number 0x%x
>>>>>>>71 string <NO\ NAME \b, label: "%11.11s"
>>>>>>71 string >NO\ NAME \b, label: "%11.11s"
>>>>>>71 string =NO\ NAME \b, unlabeled
>>>>>>>64 ubyte >0x80 \b, physical drive 0x%x
#>>>>>>>64 ubyte =0x80 \b, physical drive 0x%x=80 (usual harddisk)
>>>>>>>64 ubyte&0x7F >0 \b, physical drive 0x%x
#>>>>>>>64 ubyte =0 \b, physical drive 0x%x=0 (usual floppy)
>>>>>>>65 ubyte >0 \b, reserved 0x%x
>>>>>>>66 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
>>>>>>>66 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
>>>>>>>66 ubyte =0x29
>>>>>>>>67 ulelong x \b, serial number 0x%x
>>>>>>>>71 string <NO\ NAME \b, label: "%11.11s"
>>>>>>>71 string >NO\ NAME \b, label: "%11.11s"
>>>>>>>71 string =NO\ NAME \b, unlabeled
### FATs end
>0x200 lelong 0x82564557 \b, BSD disklabel
# FATX
@ -854,6 +904,13 @@
0x18b string OS/2 OS/2 Boot Manager
# updated by Joerg Jenderek at Oct 2008!!
# http://syslinux.zytor.com/iso.php
0 ulelong 0x7c40eafa isolinux Loader
# http://syslinux.zytor.com/pxe.php
0 ulelong 0x007c05ea pxelinux Loader
0 ulelong 0x60669c66 pxelinux Loader
# added by Joerg Jenderek
# In the second sector (+0x200) are variables according to grub-0.97/stage2/asm.S or
# grub-1.94/kern/i386/pc/startup.S
@ -1324,6 +1381,14 @@
>0x10024 belong x (blocksize %d,
>0x10060 string >\0 lockproto %s)
# BTRFS
0x10040 string _BHRfS_M BTRFS Filesystem
>0x1012b string >\0 (label "%s",
>0x10090 lelong x sectorsize %d,
>0x10094 lelong x nodesize %d,
>0x10098 lelong x leafsize %d)
# dvdisaster's .ecc
# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
0 string *dvdisaster* dvdisaster error correction file

View File

@ -1,7 +1,10 @@
#------------------------------------------------------------------------------
# graphviz: file(1) magic for http://www.graphviz.org/
0 regex/100 [\r\n\t\ ]*graph[\r\n\t\ ]*.*\\{ graphviz graph text
!:mime text/vnd.graphviz
0 regex/100 [\r\n\t\ ]*digraph[\r\n\t\ ]*.*\\{ graphviz digraph text
!:mime text/vnd.graphviz
# FIXME: These patterns match too generally. For example, the first
# line matches a LaTeX file containing the word "graph" (with a {
# following later) and the second line matches this file.
#0 regex/100 [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text
#!:mime text/vnd.graphviz
#0 regex/100 [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text
#!:mime text/vnd.graphviz

View File

@ -110,6 +110,7 @@
# GIF
0 string GIF8 GIF image data
!:mime image/gif
!:apple 8BIMGIFf
>4 string 7a \b, version 8%s,
>4 string 9a \b, version 8%s,
>6 leshort >0 %hd x
@ -600,3 +601,7 @@
# Wavelet Scalar Quantization format used in gray-scale fingerprint images
# From Tano M Fotang <mfotang@quanteq.com>
0 string \xff\xa0\xff\xa8\x00 Wavelet Scalar Quantization image data
# JPEG 2000 Code Stream Bitmap
# From Petr Splichal <psplicha@redhat.com>
0 string \xFF\x4F\xFF\x51\x00 JPEG-2000 Code Stream Bitmap data

View File

@ -10,6 +10,7 @@
#
0 beshort 0xffd8 JPEG image data
!:mime image/jpeg
!:apple 8BIMJPEG
!:strength +1
>6 string JFIF \b, JFIF standard
# The following added by Erik Rossen <rossen@freesurf.ch> 1999-09-06

View File

@ -4,7 +4,7 @@
# Java ByteCode, so they are both handled in the file "cafebabe".
# The "feedface" ones are handled herein.
#------------------------------------------------------------
0 lelong&0xfeffffff 0xfeedface Mach-O
0 lelong&0xfffffffe 0xfeedface Mach-O
>0 byte 0xcf 64-bit
>12 lelong 1 object
>12 lelong 2 executable

View File

@ -11,6 +11,8 @@
# Stuffit archives are the de facto standard of compression for Macintosh
# files obtained from most archives. (franklsm@tuns.ca)
0 string SIT! StuffIt Archive (data)
!:mime application/x-stuffit
!:apple SIT!SIT!
>2 string x : %s
0 string SITD StuffIt Deluxe (data)
>2 string x : %s
@ -20,6 +22,7 @@
# Newer StuffIt archives (grant@netbsd.org)
0 string StuffIt StuffIt Archive
!:mime application/x-stuffit
!:apple SIT!SIT!
#>162 string >0 : %s
# Macintosh Applications and Installation binaries (franklsm@tuns.ca)

View File

@ -4,15 +4,15 @@
#
# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
# updated by Joerg Jenderek
# updated by Joerg Jenderek at Oct 2008
0 string @
>1 string/cB \ echo\ off MS-DOS batch file text
>1 string/cB \ echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cB echo\ off MS-DOS batch file text
>1 string/cB echo\ off DOS batch file text
!:mime text/x-msdos-batch
>1 string/cB rem\ MS-DOS batch file text
>1 string/cB rem\ DOS batch file text
!:mime text/x-msdos-batch
>1 string/cB set\ MS-DOS batch file text
>1 string/cB set\ DOS batch file text
!:mime text/x-msdos-batch
@ -285,8 +285,9 @@
# Uncommenting only the first two lines will cover about 2/3 of COM files,
# but it isn't feasible to match all COM files since there must be at least
# two dozen different one-byte "magics".
#0 byte 0xe9 DOS executable (COM)
#>0x1FE leshort 0xAA55 \b, boot code
# test too generic ?
0 byte 0xe9 DOS executable (COM)
>0x1FE leshort 0xAA55 \b, boot code
>6 string SFX\ of\ LHarc (%s)
0 belong 0xffffffff DOS executable (device driver)
#CMD640X2.SYS
@ -309,25 +310,38 @@
>>77 string >\x40
>>>77 string <\x5B
>>>>77 string x \b, name: %.8s
#0 byte 0x8c DOS executable (COM)
# 0xeb conflicts with "sequent" magic
#0 byte 0xeb DOS executable (COM)
#>0x1FE leshort 0xAA55 \b, boot code
#>85 string UPX \b, UPX compressed
#>4 string \ $ARX \b, ARX self-extracting archive
#>4 string \ $LHarc \b, LHarc self-extracting archive
#>0x20e string SFX\ by\ LARC \b, LARC self-extracting archive
# test too generic ?
0 byte 0x8c DOS executable (COM)
# updated by Joerg Jenderek at Oct 2008
0 ulelong 0xffff10eb DR-DOS executable (COM)
# byte 0xeb conflicts with "sequent" magic leshort 0xn2eb
0 ubeshort&0xeb8d >0xeb00
# DR-DOS STACKER.COM SCREATE.SYS missed
>0 byte 0xeb DOS executable (COM)
>>0x1FE leshort 0xAA55 \b, boot code
>>85 string UPX \b, UPX compressed
>>4 string \ $ARX \b, ARX self-extracting archive
>>4 string \ $LHarc \b, LHarc self-extracting archive
>>0x20e string SFX\ by\ LARC \b, LARC self-extracting archive
# updated by Joerg Jenderek at Oct 2008
#0 byte 0xb8 COM executable
0 uleshort&0x80ff 0x00b8
# modified by Joerg Jenderek
>1 lelong !0x21cd4cff for DOS
>1 lelong !0x21cd4cff COM executable for DOS
# http://syslinux.zytor.com/comboot.php
# (32-bit COMBOOT) programs *.C32 contain 32-bit code and run in flat-memory 32-bit protected mode
# start with assembler instructions mov eax,21cd4cffh
>1 lelong 0x21cd4cff (32-bit COMBOOT)
0 uleshort&0xc0ff 0xc0b8
>1 lelong 0x21cd4cff COM executable (32-bit COMBOOT)
0 string \x81\xfc
>4 string \x77\x02\xcd\x20\xb9
>>36 string UPX! FREE-DOS executable (COM), UPX compressed
252 string Must\ have\ DOS\ version DR-DOS executable (COM)
# added by Joerg Jenderek at Oct 2008
# GRR search is not working
#34 search/2 UPX! FREE-DOS executable (COM), UPX compressed
34 string UPX! FREE-DOS executable (COM), UPX compressed
35 string UPX! FREE-DOS executable (COM), UPX compressed
# GRR search is not working
#2 search/28 \xcd\x21 COM executable for MS-DOS
#WHICHFAT.cOM
@ -564,6 +578,7 @@
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0 string MSCF\0\0\0\0 Microsoft Cabinet archive data
!:mime application/vnd.ms-cab-compressed
>8 lelong x \b, %u bytes
>28 leshort 1 \b, 1 file
>28 leshort >1 \b, %u files

View File

@ -23,7 +23,6 @@
# by Dmitry V. Levin and Alexey Tourbin
# check the first line
0 search/1 package
0 regex \^package[\ \t]+[A-Za-z_]
>0 regex \^package[\ \t]+[0-9A-Za-z_:]+\ *; Perl5 module source text
# not 'p', check other lines
0 search/1 !p

View File

@ -6,6 +6,7 @@
# PostScript, updated by Daniel Quinlan (quinlan@yggdrasil.com)
0 string %! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT
>2 string PS-Adobe- conforming
>>11 string >\0 DSC level %.3s
>>>15 string EPS \b, type %s
@ -16,6 +17,7 @@
# Some PCs have the annoying habit of adding a ^D as a document separator
0 string \004%! PostScript document text
!:mime application/postscript
!:apple ASPSTEXT
>3 string PS-Adobe- conforming
>>12 string >\0 DSC level %.3s
>>>16 string EPS \b, type %s

View File

@ -6,6 +6,23 @@
# this should work on Linux, SunOS, and maybe others
# Added new official magic number for recent versions of the Olson code
0 string TZif timezone data
>4 byte 0 \b, old version
>4 byte >0 \b, version %c
>20 belong 0 \b, no gmt time flags
>20 belong 1 \b, 1 gmt time flag
>20 belong >1 \b, %d gmt time flags
>24 belong 0 \b, no std time flags
>20 belong 1 \b, 1 std time flag
>24 belong >1 \b, %d std time flags
>28 belong 0 \b, no leap seconds
>28 belong 1 \b, 1 leap second
>28 belong >1 \b, %d leap seconds
>32 belong 0 \b, no transition times
>32 belong 1 \b, 1 transition time
>32 belong >1 \b, %d transition times
>36 belong 0 \b, no abbreviation chars
>36 belong 1 \b, 1 abbreviation char
>36 belong >1 \b, %d abbreviation chars
0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0 old timezone data
0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0 old timezone data
0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\3\0 old timezone data

View File

@ -23,3 +23,12 @@
>24 long x %ldx
>28 long 1008 YUV422]
>28 long 1000 RGB24]
# Xcursor data
# X11 mouse cursor format defined in libXcursor, see
# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
0 string Xcur Xcursor data
!:mime image/x-xcursor
>10 leshort x version %hd
>>8 leshort x \b.%hd

View File

@ -1,5 +1,5 @@
#
# $File: Makefile.am,v 1.43 2008/08/08 08:24:06 christos Exp $
# $File: Makefile.am,v 1.44 2009/01/28 02:11:20 christos Exp $
#
MAGIC_FRAGMENT_BASE = Magdir
MAGIC_FRAGMENT_DIR = $(top_srcdir)/magic/$(MAGIC_FRAGMENT_BASE)
@ -209,6 +209,7 @@ $(MAGIC_FRAGMENT_DIR)/vxl \
$(MAGIC_FRAGMENT_DIR)/warc \
$(MAGIC_FRAGMENT_DIR)/weak \
$(MAGIC_FRAGMENT_DIR)/windows \
$(MAGIC_FRAGMENT_DIR)/wireless \
$(MAGIC_FRAGMENT_DIR)/wordprocessors \
$(MAGIC_FRAGMENT_DIR)/xdelta \
$(MAGIC_FRAGMENT_DIR)/xenix \

View File

@ -163,7 +163,7 @@ top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
#
# $File: Makefile.am,v 1.43 2008/08/08 08:24:06 christos Exp $
# $File: Makefile.am,v 1.44 2009/01/28 02:11:20 christos Exp $
#
MAGIC_FRAGMENT_BASE = Magdir
MAGIC_FRAGMENT_DIR = $(top_srcdir)/magic/$(MAGIC_FRAGMENT_BASE)
@ -371,6 +371,7 @@ $(MAGIC_FRAGMENT_DIR)/vxl \
$(MAGIC_FRAGMENT_DIR)/warc \
$(MAGIC_FRAGMENT_DIR)/weak \
$(MAGIC_FRAGMENT_DIR)/windows \
$(MAGIC_FRAGMENT_DIR)/wireless \
$(MAGIC_FRAGMENT_DIR)/wordprocessors \
$(MAGIC_FRAGMENT_DIR)/xdelta \
$(MAGIC_FRAGMENT_DIR)/xenix \

56
README
View File

@ -1,5 +1,5 @@
** README for file(1) Command **
@(#) $File: README,v 1.40 2008/04/23 03:45:20 christos Exp $
@(#) $File: README,v 1.41 2008/12/02 16:34:46 christos Exp $
E-mail: christos@astron.com
Mailing List: file@mx.gw.com
@ -48,33 +48,35 @@ in magic(5) format please, to the maintainer, Christos Zoulas.
COPYING - read this first.
README - read this second (you are currently reading this file).
PORTING - read this only if the program won't compile.
Makefile - read this next, adapt it as needed (particularly
the location of the old existing file command and
the man page layouts), type "make" to compile,
"make try" to try it out against your old version.
Expect some diffs, particularly since your original
file(1) may not grok the embedded-space ("\ ") in
the current magic file, or may even not use the
magic file.
apprentice.c - parses /etc/magic to learn magic
ascmagic.c - third & last set of tests, based on hardwired assumptions.
core - not included in distribution due to mailer limitations.
debug.c - includes -c printout routine
file.1 - man page for the command
magic.4 - man page for the magic file, courtesy Guy Harris.
INSTALL - read on how to install
src/apprentice.c - parses /etc/magic to learn magic
src/apptype.c - used for OS/2 specific application type magic
src/asprintf.c - replacement for OS's that don't have it.
src/ascmagic.c - third & last set of tests, based on hardwired assumptions.
src/cdf.c - parser for Microsoft Compound Document Files
src/cdf_time.c - time converter for CDF.
src/compress.c - handles decompressing files to look inside.
src/encoding.c - handles unicode encodings
src/file.c - the main program
src/file.h - header file
src/fsmagic.c - first set of tests the program runs, based on filesystem info
src/funcs.c - utilility functions
src/getopt_long.c - used for OS/2 specific application type magic
src/is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
src/names.h - header file for ascmagic.c
src/magic.c - the libmagic api
src/print.c - print results, errors, warnings.
src/readcdf.c - CDF wrapper.
src/readelf.[ch] - Stand-alone elf parsing code.
src/softmagic.c - 2nd set of tests, based on /etc/magic
src/strlcat.c - used for OS/2 specific application type magic
src/strlcpy.c - used for OS/2 specific application type magic
src/vasprintf.c - used for OS/2 specific application type magic
doc/file.1 - man page for the command
doc/magic.4 - man page for the magic file, courtesy Guy Harris.
Install as magic.4 on USG and magic.5 on V7 or Berkeley; cf Makefile.
file.c - main program
file.h - header file
fsmagic.c - first set of tests the program runs, based on filesystem info
is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
magdir - directory of /etc/magic pieces
magdir/Makefile - ADJUST THIS FOR YOUR CONFIGURATION
names.h - header file for ascmagic.c
softmagic.c - 2nd set of tests, based on /etc/magic
readelf.[ch] - Stand-alone elf parsing code.
compress.c - on-the-fly decompression.
print.c - print results, errors, warnings.
Magdir - directory of /etc/magic pieces
------------------------------------------------------------------------------

6
TODO
View File

@ -1,3 +1,9 @@
Fix output so that tests for MIME and APPLE flags are not needed all
over the place, and actual output is only done in one place. This
needs a design. Suggestion: push possible outputs on to a list, then
pick the last-pushed (most specific, one hopes) value at the end, or
use a default if the list is empty.
Continue to squash all magic bugs. See Debian BTS for a good source.
Store arbitrarily long strings, for example for %s patterns, so that

View File

@ -30,6 +30,11 @@
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: apprentice.c,v 1.147 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
#include "magic.h"
#include "patchlevel.h"
#include <stdlib.h>
@ -40,18 +45,11 @@
#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/param.h>
#ifdef QUICK
#include <sys/mman.h>
#endif
#include <sys/types.h>
#include <dirent.h>
#ifndef lint
FILE_RCSID("@(#)$File: apprentice.c,v 1.140 2008/07/20 04:02:15 christos Exp $")
#endif /* lint */
#define EATAB {while (isascii((unsigned char) *l) && \
isspace((unsigned char) *l)) ++l;}
#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
@ -106,7 +104,7 @@ private void bs1(struct magic *);
private uint16_t swap2(uint16_t);
private uint32_t swap4(uint32_t);
private uint64_t swap8(uint64_t);
private void mkdbname(const char *, char **, int);
private char *mkdbname(struct magic_set *, const char *, int);
private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
const char *);
private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
@ -115,8 +113,8 @@ private int check_format_type(const char *, int);
private int check_format(struct magic_set *, struct magic *);
private int get_op(char);
private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
private int parse_strength(struct magic_set *, struct magic_entry *,
const char *);
private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
private size_t maxmagic = 0;
@ -131,6 +129,7 @@ private struct {
} bang[] = {
#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
DECLARE_FIELD(mime),
DECLARE_FIELD(apple),
DECLARE_FIELD(strength),
#undef DECLARE_FIELD
{ NULL, 0, NULL }
@ -215,6 +214,9 @@ static const struct type_tbl_s {
{ XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE },
{ XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE },
{ XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE },
{ XX("leid3"), FILE_LEID3, FILE_FMT_NUM },
{ XX("beid3"), FILE_BEID3, FILE_FMT_NUM },
{ XX("indirect"), FILE_INDIRECT, FILE_FMT_NONE },
{ XX_NULL, FILE_INVALID, FILE_FMT_NONE },
# undef XX
# undef XX_NULL
@ -589,7 +591,8 @@ set_test_type(struct magic *mstart, struct magic *m)
case FILE_REGEX:
case FILE_SEARCH:
/* binary test if pattern is not text */
if (file_looks_utf8(m->value.us, m->vallen, NULL, NULL) <= 0)
if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
NULL) <= 0)
mstart->flag |= BINTEST;
break;
case FILE_DEFAULT:
@ -704,6 +707,8 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
(void)fprintf(stderr, "%s\n", usg_hdr);
/* load directory or file */
/* FIXME: Read file names and sort them to prevent
non-determinism. See Debian bug #488562. */
if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
dir = opendir(fn);
if (dir) {
@ -868,6 +873,7 @@ file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
case FILE_REGEX:
case FILE_SEARCH:
case FILE_DEFAULT:
case FILE_INDIRECT:
break;
default:
if (ms->flags & MAGIC_CHECK)
@ -1184,6 +1190,12 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
case 'G':
m->in_type = FILE_BEDOUBLE;
break;
case 'i':
m->in_type = FILE_LEID3;
break;
case 'I':
m->in_type = FILE_BEID3;
break;
default:
if (ms->flags & MAGIC_CHECK)
file_magwarn(ms,
@ -1472,6 +1484,38 @@ parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
return -1;
}
/*
* Parse an Apple CREATOR/TYPE annotation from magic file and put it into magic[index - 1]
*/
private int
parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
{
size_t i;
const char *l = line;
struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
if (m->apple[0] != '\0') {
file_magwarn(ms, "Current entry already has a APPLE type `%.8s',"
" new type `%s'", m->mimetype, l);
return -1;
}
EATAB;
for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
|| strchr("-+/.", *l)) && i < sizeof(m->apple); m->apple[i++] = *l++)
continue;
if (i == sizeof(m->apple) && *l) {
if (ms->flags & MAGIC_CHECK)
file_magwarn(ms, "APPLE type `%s' truncated %zu",
line, i);
}
if (i > 0)
return 0;
else
return -1;
}
/*
* parse a MIME annotation line from magic file, put into magic[index - 1]
* if valid
@ -1490,10 +1534,8 @@ parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
}
EATAB;
for (i = 0;
*l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
|| strchr("-+/.", *l)) && i < sizeof(m->mimetype);
m->mimetype[i++] = *l++)
for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
|| strchr("-+/.", *l)) && i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
continue;
if (i == sizeof(m->mimetype)) {
m->desc[sizeof(m->mimetype) - 1] = '\0';
@ -2014,7 +2056,7 @@ apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
char *dbname = NULL;
void *mm = NULL;
mkdbname(fn, &dbname, 0);
dbname = mkdbname(ms, fn, 0);
if (dbname == NULL)
goto error2;
@ -2111,7 +2153,7 @@ apprentice_compile(struct magic_set *ms, struct magic **magicp,
char *dbname;
int rv = -1;
mkdbname(fn, &dbname, 1);
dbname = mkdbname(ms, fn, 1);
if (dbname == NULL)
goto out;
@ -2149,24 +2191,45 @@ private const char ext[] = ".mgc";
/*
* make a dbname
*/
private void
mkdbname(const char *fn, char **buf, int strip)
private char *
mkdbname(struct magic_set *ms, const char *fn, int strip)
{
const char *p;
const char *p, *q;
char *buf;
if (strip) {
if ((p = strrchr(fn, '/')) != NULL)
fn = ++p;
}
if ((p = strstr(fn, ext)) != NULL && p[sizeof(ext) - 1] == '\0')
*buf = strdup(fn);
else
(void)asprintf(buf, "%s%s", fn, ext);
for (q = fn; *q; q++)
continue;
/* Look for .mgc */
for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
if (*p != *q)
break;
if (buf && *buf && strlen(*buf) > MAXPATHLEN) {
free(*buf);
*buf = NULL;
/* Did not find .mgc, restore q */
if (p >= ext)
while (*q)
q++;
q++;
/* Compatibility with old code that looked in .mime */
if (ms->flags & MAGIC_MIME) {
asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext);
if (access(buf, R_OK) != -1) {
ms->flags &= MAGIC_MIME_TYPE;
return buf;
}
free(buf);
}
asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext);
/* Compatibility with old code that looked in .mime */
if (strstr(p, ".mime") != NULL)
ms->flags &= MAGIC_MIME_TYPE;
return buf;
}
/*

View File

@ -26,15 +26,13 @@
#include "file.h"
#include <stdio.h>
#ifndef lint
FILE_RCSID("@(#)$File: apptype.c,v 1.10 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
#include <stdlib.h>
#include <string.h>
#ifndef lint
FILE_RCSID("@(#)$File: apptype.c,v 1.7 2007/01/12 17:38:27 christos Exp $")
#endif /* lint */
#ifdef __EMX__
#include <io.h>
#define INCL_DOSSESMGR

View File

@ -2,7 +2,7 @@
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -12,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@ -31,14 +31,15 @@
*
* Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
* to handle character codes other than ASCII on a unified basis.
*
* Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
* international characters, now subsumed into this file.
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
#include "magic.h"
#include <stdio.h>
#include <string.h>
#include <memory.h>
#include <ctype.h>
@ -48,39 +49,71 @@
#endif
#include "names.h"
#ifndef lint
FILE_RCSID("@(#)$File: ascmagic.c,v 1.64 2008/07/16 18:00:57 christos Exp $")
#endif /* lint */
#define MAXLINELEN 300 /* longest sane line length */
#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
|| (x) == 0x85 || (x) == '\f')
private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
size_t *);
private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
private int ascmatch(const unsigned char *, const unichar *, size_t);
private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t);
private size_t trim_nuls(const unsigned char *, size_t);
/*
* Undo the NUL-termination kindly provided by process()
* but leave at least one byte to look at
*/
private size_t
trim_nuls(const unsigned char *buf, size_t nbytes)
{
while (nbytes > 1 && buf[nbytes - 1] == '\0')
nbytes--;
return nbytes;
}
protected int
file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
{
size_t i;
unsigned char *nbuf = NULL, *utf8_buf = NULL, *utf8_end;
unichar *ubuf = NULL;
size_t ulen, mlen;
const struct names *p;
int rv = -1;
int mime = ms->flags & MAGIC_MIME;
unichar *ubuf = NULL;
size_t ulen;
int rv = 1;
const char *code = NULL;
const char *code_mime = NULL;
const char *type = NULL;
if (ms->flags & MAGIC_APPLE)
return 0;
nbytes = trim_nuls(buf, nbytes);
/* If file doesn't look like any sort of text, give up. */
if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime,
&type) == 0) {
rv = 0;
goto done;
}
rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code,
type);
done:
if (ubuf)
free(ubuf);
return rv;
}
protected int
file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
size_t nbytes, unichar *ubuf, size_t ulen, const char *code,
const char *type)
{
unsigned char *utf8_buf = NULL, *utf8_end;
size_t mlen, i;
const struct names *p;
int rv = -1;
int mime = ms->flags & MAGIC_MIME;
const char *subtype = NULL;
const char *subtype_mime = NULL;
@ -96,82 +129,20 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
size_t last_line_end = (size_t)-1;
int has_long_lines = 0;
/*
* Undo the NUL-termination kindly provided by process()
* but leave at least one byte to look at
*/
while (nbytes > 1 && buf[nbytes - 1] == '\0')
nbytes--;
if (ms->flags & MAGIC_APPLE)
return 0;
if ((nbuf = CAST(unsigned char *, calloc((size_t)1,
(nbytes + 1) * sizeof(nbuf[0])))) == NULL)
goto done;
if ((ubuf = CAST(unichar *, calloc((size_t)1,
(nbytes + 1) * sizeof(ubuf[0])))) == NULL)
goto done;
/*
* Then try to determine whether it's any character code we can
* identify. Each of these tests, if it succeeds, will leave
* the text converted into one-unichar-per-character Unicode in
* ubuf, and the number of characters converted in ulen.
*/
if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
code = "ASCII";
code_mime = "us-ascii";
type = "text";
} else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) {
code = "UTF-8 Unicode (with BOM)";
code_mime = "utf-8";
type = "text";
} else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
code = "UTF-8 Unicode";
code_mime = "utf-8";
type = "text";
} else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) {
if (i == 1)
code = "Little-endian UTF-16 Unicode";
else
code = "Big-endian UTF-16 Unicode";
type = "character data";
code_mime = "utf-16"; /* is this defined? */
} else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
code = "ISO-8859";
type = "text";
code_mime = "iso-8859-1";
} else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
code = "Non-ISO extended-ASCII";
type = "text";
code_mime = "unknown";
} else {
from_ebcdic(buf, nbytes, nbuf);
if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
code = "EBCDIC";
type = "character data";
code_mime = "ebcdic";
} else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
code = "International EBCDIC";
type = "character data";
code_mime = "ebcdic";
} else {
rv = 0;
goto done; /* doesn't look like text at all */
}
}
nbytes = trim_nuls(buf, nbytes);
/* If we have fewer than 2 bytes, give up. */
if (nbytes <= 1) {
rv = 0;
goto done;
}
/* Convert ubuf to UTF-8 and try text soft magic */
/* If original was ASCII or UTF-8, could use nbuf instead of
re-converting. */
/* malloc size is a conservative overestimate; could be
re-converting improved, or at least realloced after
re-converting conversion. */
improved, or at least realloced after conversion. */
mlen = ulen * 6;
if ((utf8_buf = CAST(unsigned char *, malloc(mlen))) == NULL) {
file_oomem(ms, mlen);
@ -179,10 +150,11 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
}
if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL)
goto done;
if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
rv = 1;
if ((rv = file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf),
TEXTTEST)) != 0)
goto done;
}
else
rv = -1;
/* look for tokens from names.h - this is expensive! */
if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)
@ -255,41 +227,30 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
if (seen_cr && nbytes < HOWMANY)
n_cr++;
if (strcmp(type, "binary") == 0) {
rv = 0;
goto done;
}
if (mime) {
if (mime & MAGIC_MIME_TYPE) {
if ((mime & MAGIC_MIME_TYPE) != 0) {
if (subtype_mime) {
if (file_printf(ms, subtype_mime) == -1)
if (file_printf(ms, "%s", subtype_mime) == -1)
goto done;
} else {
if (file_printf(ms, "text/plain") == -1)
goto done;
}
}
if ((mime == 0 || mime == MAGIC_MIME) && code_mime) {
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, " charset=") == -1)
goto done;
if (file_printf(ms, code_mime) == -1)
goto done;
}
if (mime == MAGIC_MIME_ENCODING)
file_printf(ms, "binary");
} else {
if (file_printf(ms, code) == -1)
if (file_printf(ms, "%s", code) == -1)
goto done;
if (subtype) {
if (file_printf(ms, " ") == -1)
goto done;
if (file_printf(ms, subtype) == -1)
if (file_printf(ms, " %s", subtype) == -1)
goto done;
}
if (file_printf(ms, " ") == -1)
goto done;
if (file_printf(ms, type) == -1)
if (file_printf(ms, " %s", type) == -1)
goto done;
if (has_long_lines)
@ -305,7 +266,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
if (file_printf(ms, ", with") == -1)
goto done;
if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
if (file_printf(ms, " no") == -1)
goto done;
} else {
@ -348,10 +309,6 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
}
rv = 1;
done:
if (nbuf)
free(nbuf);
if (ubuf)
free(ubuf);
if (utf8_buf)
free(utf8_buf);
@ -374,144 +331,6 @@ ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
return 1;
}
/*
* This table reflects a particular philosophy about what constitutes
* "text," and there is room for disagreement about it.
*
* Version 3.31 of the file command considered a file to be ASCII if
* each of its characters was approved by either the isascii() or
* isalpha() function. On most systems, this would mean that any
* file consisting only of characters in the range 0x00 ... 0x7F
* would be called ASCII text, but many systems might reasonably
* consider some characters outside this range to be alphabetic,
* so the file command would call such characters ASCII. It might
* have been more accurate to call this "considered textual on the
* local system" than "ASCII."
*
* It considered a file to be "International language text" if each
* of its characters was either an ASCII printing character (according
* to the real ASCII standard, not the above test), a character in
* the range 0x80 ... 0xFF, or one of the following control characters:
* backspace, tab, line feed, vertical tab, form feed, carriage return,
* escape. No attempt was made to determine the language in which files
* of this type were written.
*
*
* The table below considers a file to be ASCII if all of its characters
* are either ASCII printing characters (again, according to the X3.4
* standard, not isascii()) or any of the following controls: bell,
* backspace, tab, line feed, form feed, carriage return, esc, nextline.
*
* I include bell because some programs (particularly shell scripts)
* use it literally, even though it is rare in normal text. I exclude
* vertical tab because it never seems to be used in real text. I also
* include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
* because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
* character to. It might be more appropriate to include it in the 8859
* set instead of the ASCII set, but it's got to be included in *something*
* we recognize or EBCDIC files aren't going to be considered textual.
* Some old Unix source files use SO/SI (^N/^O) to shift between Greek
* and Latin characters, so these should possibly be allowed. But they
* make a real mess on VT100-style displays if they're not paired properly,
* so we are probably better off not calling them text.
*
* A file is considered to be ISO-8859 text if its characters are all
* either ASCII, according to the above definition, or printing characters
* from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
*
* Finally, a file is considered to be international text from some other
* character code if its characters are all either ISO-8859 (according to
* the above definition) or characters in the range 0x80 ... 0x9F, which
* ISO-8859 considers to be control characters but the IBM PC and Macintosh
* consider to be printing characters.
*/
#define F 0 /* character never appears in text */
#define T 1 /* character appears in plain ASCII text */
#define I 2 /* character appears in ISO-8859 text */
#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
private char text_chars[256] = {
/* BEL BS HT LF FF CR */
F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
/* ESC */
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
/* NEL */
X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
};
private int
looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
size_t i;
*ulen = 0;
for (i = 0; i < nbytes; i++) {
int t = text_chars[buf[i]];
if (t != T)
return 0;
ubuf[(*ulen)++] = buf[i];
}
return 1;
}
private int
looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
{
size_t i;
*ulen = 0;
for (i = 0; i < nbytes; i++) {
int t = text_chars[buf[i]];
if (t != T && t != I)
return 0;
ubuf[(*ulen)++] = buf[i];
}
return 1;
}
private int
looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
size_t i;
*ulen = 0;
for (i = 0; i < nbytes; i++) {
int t = text_chars[buf[i]];
if (t != T && t != I && t != X)
return 0;
ubuf[(*ulen)++] = buf[i];
}
return 1;
}
/*
* Encode Unicode string as UTF-8, returning pointer to character
* after end of string, or NULL if an invalid character is found.
@ -568,226 +387,3 @@ encode_utf8(unsigned char *buf, size_t len, unichar *ubuf, size_t ulen)
return buf;
}
/*
* Decide whether some text looks like UTF-8. Returns:
*
* -1: invalid UTF-8
* 0: uses odd control characters, so doesn't look like text
* 1: 7-bit text
* 2: definitely UTF-8 text (valid high-bit set bytes)
*
* If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
* ubuf must be big enough!
*/
protected int
file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
{
size_t i;
int n;
unichar c;
int gotone = 0, ctrl = 0;
if (ubuf)
*ulen = 0;
for (i = 0; i < nbytes; i++) {
if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
/*
* Even if the whole file is valid UTF-8 sequences,
* still reject it if it uses weird control characters.
*/
if (text_chars[buf[i]] != T)
ctrl = 1;
if (ubuf)
ubuf[(*ulen)++] = buf[i];
} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
return -1;
} else { /* 11xxxxxx begins UTF-8 */
int following;
if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
c = buf[i] & 0x1f;
following = 1;
} else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
c = buf[i] & 0x0f;
following = 2;
} else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
c = buf[i] & 0x07;
following = 3;
} else if ((buf[i] & 0x04) == 0) { /* 111110xx */
c = buf[i] & 0x03;
following = 4;
} else if ((buf[i] & 0x02) == 0) { /* 1111110x */
c = buf[i] & 0x01;
following = 5;
} else
return -1;
for (n = 0; n < following; n++) {
i++;
if (i >= nbytes)
goto done;
if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
return -1;
c = (c << 6) + (buf[i] & 0x3f);
}
if (ubuf)
ubuf[(*ulen)++] = c;
gotone = 1;
}
}
done:
return ctrl ? 0 : (gotone ? 2 : 1);
}
/*
* Decide whether some text looks like UTF-8 with BOM. If there is no
* BOM, return -1; otherwise return the result of looks_utf8 on the
* rest of the text.
*/
private int
looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
else
return -1;
}
private int
looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
int bigend;
size_t i;
if (nbytes < 2)
return 0;
if (buf[0] == 0xff && buf[1] == 0xfe)
bigend = 0;
else if (buf[0] == 0xfe && buf[1] == 0xff)
bigend = 1;
else
return 0;
*ulen = 0;
for (i = 2; i + 1 < nbytes; i += 2) {
/* XXX fix to properly handle chars > 65536 */
if (bigend)
ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
else
ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
if (ubuf[*ulen - 1] == 0xfffe)
return 0;
if (ubuf[*ulen - 1] < 128 &&
text_chars[(size_t)ubuf[*ulen - 1]] != T)
return 0;
}
return 1 + bigend;
}
#undef F
#undef T
#undef I
#undef X
/*
* This table maps each EBCDIC character to an (8-bit extended) ASCII
* character, as specified in the rationale for the dd(1) command in
* draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
*
* Unfortunately it does not seem to correspond exactly to any of the
* five variants of EBCDIC documented in IBM's _Enterprise Systems
* Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
* Edition, July, 1999, pp. I-1 - I-4.
*
* Fortunately, though, all versions of EBCDIC, including this one, agree
* on most of the printing characters that also appear in (7-bit) ASCII.
* Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
*
* Fortunately too, there is general agreement that codes 0x00 through
* 0x3F represent control characters, 0x41 a nonbreaking space, and the
* remainder printing characters.
*
* This is sufficient to allow us to identify EBCDIC text and to distinguish
* between old-style and internationalized examples of text.
*/
private unsigned char ebcdic_to_ascii[] = {
0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15,
16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31,
128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7,
144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26,
' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
};
#ifdef notdef
/*
* The following EBCDIC-to-ASCII table may relate more closely to reality,
* or at least to modern reality. It comes from
*
* http://ftp.s390.ibm.com/products/oe/bpxqp9.html
*
* and maps the characters of EBCDIC code page 1047 (the code used for
* Unix-derived software on IBM's 390 systems) to the corresponding
* characters from ISO 8859-1.
*
* If this table is used instead of the above one, some of the special
* cases for the NEL character can be taken out of the code.
*/
private unsigned char ebcdic_1047_to_8859[] = {
0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
};
#endif
/*
* Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
*/
private void
from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
{
size_t i;
for (i = 0; i < nbytes; i++) {
out[i] = ebcdic_to_ascii[buf[i]];
}
}

View File

@ -26,7 +26,11 @@
* SUCH DAMAGE.
*/
#include <stdarg.h>
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: asprintf.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
#endif
int vasprintf(char **ptr, const char *format_string, va_list vargs);

View File

@ -33,15 +33,18 @@
* using method, return sizeof new
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: compress.c,v 1.61 2009/02/03 20:27:51 christos Exp $")
#endif
#include "magic.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#ifdef HAVE_SYS_WAIT_H
#include <sys/wait.h>
@ -54,11 +57,6 @@
#include <zlib.h>
#endif
#ifndef lint
FILE_RCSID("@(#)$File: compress.c,v 1.57 2008/07/16 18:00:57 christos Exp $")
#endif
private const struct {
const char magic[8];
size_t maglen;
@ -77,6 +75,7 @@ private const struct {
{ "PK\3\4", 4, { "gzip", "-cdq", NULL }, 1 }, /* pkzipped, */
/* ...only first file examined */
{ "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */
{ "LZIP", 4, { "lzip", "-cdq", NULL }, 1 },
};
private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
@ -237,7 +236,7 @@ file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
char buf[4096];
int r, tfd;
(void)strcpy(buf, "/tmp/file.XXXXXX");
(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
#ifndef HAVE_MKSTEMP
{
char *ptr = mktemp(buf);

View File

@ -78,6 +78,12 @@
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the `strlcat' function. */
#undef HAVE_STRLCAT
/* Define to 1 if you have the `strlcpy' function. */
#undef HAVE_STRLCPY
/* Define to 1 if you have the `strndup' function. */
#undef HAVE_STRNDUP
@ -90,6 +96,12 @@
/* Define to 1 if `st_rdev' is member of `struct stat'. */
#undef HAVE_STRUCT_STAT_ST_RDEV
/* Define to 1 if `tm_gmtoff' is member of `struct tm'. */
#undef HAVE_STRUCT_TM_TM_GMTOFF
/* Define to 1 if `tm_zone' is member of `struct tm'. */
#undef HAVE_STRUCT_TM_TM_ZONE
/* Define to 1 if you have the <sys/mman.h> header file. */
#undef HAVE_SYS_MMAN_H

227
configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.61 for file 4.26.
# Generated by GNU Autoconf 2.61 for file 5.00.
#
# Report bugs to <christos@astron.com>.
#
@ -728,8 +728,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='file'
PACKAGE_TARNAME='file'
PACKAGE_VERSION='4.26'
PACKAGE_STRING='file 4.26'
PACKAGE_VERSION='5.00'
PACKAGE_STRING='file 5.00'
PACKAGE_BUGREPORT='christos@astron.com'
# Factoring default headers for most tests.
@ -1395,7 +1395,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures file 4.26 to adapt to many kinds of systems.
\`configure' configures file 5.00 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1465,7 +1465,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of file 4.26:";;
short | recursive ) echo "Configuration of file 5.00:";;
esac
cat <<\_ACEOF
@ -1572,7 +1572,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
file configure 4.26
file configure 5.00
generated by GNU Autoconf 2.61
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@ -1586,7 +1586,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by file $as_me 4.26, which was
It was created by file $as_me 5.00, which was
generated by GNU Autoconf 2.61. Invocation command line was
$ $0 $@
@ -2276,7 +2276,7 @@ fi
# Define the identity of the package.
PACKAGE='file'
VERSION='4.26'
VERSION='5.00'
cat >>confdefs.h <<_ACEOF
@ -22164,6 +22164,209 @@ cat >>confdefs.h <<\_ACEOF
#define TM_IN_SYS_TIME 1
_ACEOF
fi
{ echo "$as_me:$LINENO: checking for struct tm.tm_gmtoff" >&5
echo $ECHO_N "checking for struct tm.tm_gmtoff... $ECHO_C" >&6; }
if test "${ac_cv_member_struct_tm_tm_gmtoff+set}" = set; then
echo $ECHO_N "(cached) $ECHO_C" >&6
else
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
int
main ()
{
static struct tm ac_aggr;
if (ac_aggr.tm_gmtoff)
return 0;
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_cv_member_struct_tm_tm_gmtoff=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
int
main ()
{
static struct tm ac_aggr;
if (sizeof ac_aggr.tm_gmtoff)
return 0;
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_cv_member_struct_tm_tm_gmtoff=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_cv_member_struct_tm_tm_gmtoff=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ echo "$as_me:$LINENO: result: $ac_cv_member_struct_tm_tm_gmtoff" >&5
echo "${ECHO_T}$ac_cv_member_struct_tm_tm_gmtoff" >&6; }
if test $ac_cv_member_struct_tm_tm_gmtoff = yes; then
cat >>confdefs.h <<_ACEOF
#define HAVE_STRUCT_TM_TM_GMTOFF 1
_ACEOF
fi
{ echo "$as_me:$LINENO: checking for struct tm.tm_zone" >&5
echo $ECHO_N "checking for struct tm.tm_zone... $ECHO_C" >&6; }
if test "${ac_cv_member_struct_tm_tm_zone+set}" = set; then
echo $ECHO_N "(cached) $ECHO_C" >&6
else
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
int
main ()
{
static struct tm ac_aggr;
if (ac_aggr.tm_zone)
return 0;
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_cv_member_struct_tm_tm_zone=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
int
main ()
{
static struct tm ac_aggr;
if (sizeof ac_aggr.tm_zone)
return 0;
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_cv_member_struct_tm_tm_zone=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_cv_member_struct_tm_tm_zone=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ echo "$as_me:$LINENO: result: $ac_cv_member_struct_tm_tm_zone" >&5
echo "${ECHO_T}$ac_cv_member_struct_tm_tm_zone" >&6; }
if test $ac_cv_member_struct_tm_tm_zone = yes; then
cat >>confdefs.h <<_ACEOF
#define HAVE_STRUCT_TM_TM_ZONE 1
_ACEOF
fi
{ echo "$as_me:$LINENO: checking for tm_zone in struct tm" >&5
@ -23853,7 +24056,9 @@ done
for ac_func in getopt_long asprintf vasprintf
for ac_func in getopt_long asprintf vasprintf strlcpy strlcat
do
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ echo "$as_me:$LINENO: checking for $ac_func" >&5
@ -24469,7 +24674,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by file $as_me 4.26, which was
This file was extended by file $as_me 5.00, which was
generated by GNU Autoconf 2.61. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -24522,7 +24727,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
file config.status 4.26
file config.status 5.00
configured by $0, generated by GNU Autoconf 2.61,
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"

View File

@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
AC_INIT(file, 4.26, christos@astron.com)
AC_INIT(file, 5.00, christos@astron.com)
AM_INIT_AUTOMAKE
AM_CONFIG_HEADER(config.h)
@ -75,6 +75,8 @@ AC_TYPE_OFF_T
AC_TYPE_SIZE_T
AC_CHECK_MEMBERS([struct stat.st_rdev])
AC_STRUCT_TM
AC_CHECK_MEMBERS([struct tm.tm_gmtoff, struct tm.tm_zone])
AC_STRUCT_TIMEZONE_DAYLIGHT
AC_SYS_LARGEFILE
AC_FUNC_FSEEKO
@ -139,7 +141,7 @@ dnl Checks for functions
AC_CHECK_FUNCS(mmap strerror strndup strtoul mbrtowc mkstemp utimes utime wcwidth strtof)
dnl Provide implementation of some required functions if necessary
AC_REPLACE_FUNCS(getopt_long asprintf vasprintf)
AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat)
dnl Checks for libraries
AC_CHECK_LIB(z,gzopen)

185
file.c
View File

@ -2,7 +2,7 @@
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -12,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@ -30,15 +30,16 @@
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: file.c,v 1.130 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
#include "magic.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/param.h> /* for MAXPATHLEN */
#include <sys/stat.h>
#ifdef RESTORE_TIME
# if (__COHERENT__ >= 0x420)
# include <sys/utime.h>
@ -73,11 +74,6 @@ int getopt_long(int argc, char * const *argv, const char *optstring, const struc
#include "patchlevel.h"
#ifndef lint
FILE_RCSID("@(#)$File: file.c,v 1.121 2008/07/03 15:48:18 christos Exp $")
#endif /* lint */
#ifdef S_IFLNK
#define SYMLINKFLAG "Lh"
#else
@ -87,7 +83,7 @@ FILE_RCSID("@(#)$File: file.c,v 1.121 2008/07/03 15:48:18 christos Exp $")
# define USAGE "Usage: %s [-bcik" SYMLINKFLAG "nNrsvz0] [-e test] [-f namefile] [-F separator] [-m magicfiles] file...\n %s -C -m magicfiles\n"
#ifndef MAXPATHLEN
#define MAXPATHLEN 512
#define MAXPATHLEN 1024
#endif
private int /* Global command-line options */
@ -96,21 +92,45 @@ private int /* Global command-line options */
nobuffer = 0, /* Do not buffer stdout */
nulsep = 0; /* Append '\0' to the separator */
private const char *magicfile = 0; /* where the magic is */
private const char *default_magicfile = MAGIC;
private const char *separator = ":"; /* Default field separator */
private const char hmagic[] = "/.magic";
private const struct option long_options[] = {
#define OPT(shortname, longname, opt, doc) \
{longname, opt, NULL, shortname},
#define OPT_LONGONLY(longname, opt, doc) \
{longname, opt, NULL, 0},
#include "file_opts.h"
#undef OPT
#undef OPT_LONGONLY
{0, 0, NULL, 0}
};
#define OPTSTRING "bcCde:f:F:hikLm:nNprsvz0"
private const struct {
const char *name;
int value;
} nv[] = {
{ "apptype", MAGIC_NO_CHECK_APPTYPE },
{ "ascii", MAGIC_NO_CHECK_ASCII },
{ "cdf", MAGIC_NO_CHECK_CDF },
{ "compress", MAGIC_NO_CHECK_COMPRESS },
{ "elf", MAGIC_NO_CHECK_ELF },
{ "encoding", MAGIC_NO_CHECK_ENCODING },
{ "soft", MAGIC_NO_CHECK_SOFT },
{ "tar", MAGIC_NO_CHECK_TAR },
{ "tokens", MAGIC_NO_CHECK_TOKENS },
};
private char *progname; /* used throughout */
private struct magic_set *magic;
private void unwrap(char *);
private void usage(void);
private void help(void);
int main(int, char *[]);
private void process(const char *, int);
private void load(const char *, int);
private int unwrap(struct magic_set *, const char *);
private int process(struct magic_set *ms, const char *, int);
private struct magic_set *load(const char *, int);
/*
@ -122,36 +142,12 @@ main(int argc, char *argv[])
int c;
size_t i;
int action = 0, didsomefiles = 0, errflg = 0;
int flags = 0;
int flags = 0, e = 0;
char *home, *usermagic;
struct stat sb;
static const char hmagic[] = "/.magic";
#define OPTSTRING "bcCde:f:F:hikLm:nNprsvz0"
struct magic_set *magic = NULL;
char magicpath[2 * MAXPATHLEN + 2];
int longindex;
static const struct option long_options[] =
{
#define OPT(shortname, longname, opt, doc) \
{longname, opt, NULL, shortname},
#define OPT_LONGONLY(longname, opt, doc) \
{longname, opt, NULL, 0},
#include "file_opts.h"
#undef OPT
#undef OPT_LONGONLY
{0, 0, NULL, 0}
};
static const struct {
const char *name;
int value;
} nv[] = {
{ "apptype", MAGIC_NO_CHECK_APPTYPE },
{ "ascii", MAGIC_NO_CHECK_ASCII },
{ "compress", MAGIC_NO_CHECK_COMPRESS },
{ "elf", MAGIC_NO_CHECK_ELF },
{ "soft", MAGIC_NO_CHECK_SOFT },
{ "tar", MAGIC_NO_CHECK_TAR },
{ "tokens", MAGIC_NO_CHECK_TOKENS },
};
const char *magicfile; /* where the magic is */
/* makes islower etc work for other langs */
(void)setlocale(LC_CTYPE, "");
@ -171,14 +167,12 @@ main(int argc, char *argv[])
magicfile = usermagic;
else
if ((home = getenv("HOME")) != NULL) {
if ((usermagic = malloc(strlen(home)
+ sizeof(hmagic))) != NULL) {
(void)strcpy(usermagic, home);
(void)strcat(usermagic, hmagic);
if (stat(usermagic, &sb)<0)
free(usermagic);
else
magicfile = usermagic;
(void)snprintf(magicpath, sizeof(magicpath), "%s%s",
home, hmagic);
if (access(magicpath, R_OK) == 0) {
(void)snprintf(magicpath, sizeof(magicpath),
"%s%s:%s", home, hmagic, magicfile);
magicfile = magicpath;
}
}
@ -194,9 +188,12 @@ main(int argc, char *argv[])
help();
break;
case 10:
flags |= MAGIC_MIME_TYPE;
flags |= MAGIC_APPLE;
break;
case 11:
flags |= MAGIC_MIME_TYPE;
break;
case 12:
flags |= MAGIC_MIME_ENCODING;
break;
}
@ -226,12 +223,14 @@ main(int argc, char *argv[])
else
flags |= nv[i].value;
break;
case 'f':
if(action)
usage();
load(magicfile, flags);
unwrap(optarg);
if (magic == NULL)
if ((magic = load(magicfile, flags)) == NULL)
return 1;
e |= unwrap(magic, optarg);
++didsomefiles;
break;
case 'F':
@ -289,10 +288,18 @@ main(int argc, char *argv[])
if (errflg) {
usage();
}
if (e)
return e;
switch(action) {
case FILE_CHECK:
case FILE_COMPILE:
/*
* Don't try to check/compile ~/.magic unless we explicitly
* ask for it.
*/
if (magicfile == magicpath)
magicfile = default_magicfile;
magic = magic_open(flags|MAGIC_CHECK);
if (magic == NULL) {
(void)fprintf(stderr, "%s: %s\n", progname,
@ -304,18 +311,19 @@ main(int argc, char *argv[])
if (c == -1) {
(void)fprintf(stderr, "%s: %s\n", progname,
magic_error(magic));
return -1;
return 1;
}
return 0;
default:
load(magicfile, flags);
if (magic == NULL)
if ((magic = load(magicfile, flags)) == NULL)
return 1;
break;
}
if (optind == argc) {
if (!didsomefiles) {
if (!didsomefiles)
usage();
}
}
else {
size_t j, wid, nw;
@ -332,42 +340,43 @@ main(int argc, char *argv[])
bflag = optind >= argc - 1;
}
for (; optind < argc; optind++)
process(argv[optind], wid);
e |= process(magic, argv[optind], wid);
}
c = magic->haderr ? 1 : 0;
magic_close(magic);
return c;
if (magic)
magic_close(magic);
return e;
}
private void
private struct magic_set *
/*ARGSUSED*/
load(const char *m, int flags)
load(const char *magicfile, int flags)
{
if (magic || m == NULL)
return;
magic = magic_open(flags);
struct magic_set *magic = magic_open(flags);
if (magic == NULL) {
(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
exit(1);
return NULL;
}
if (magic_load(magic, magicfile) == -1) {
(void)fprintf(stderr, "%s: %s\n",
progname, magic_error(magic));
exit(1);
magic_close(magic);
return NULL;
}
return magic;
}
/*
* unwrap -- read a file of filenames, do each one.
*/
private void
unwrap(char *fn)
private int
unwrap(struct magic_set *ms, const char *fn)
{
char buf[MAXPATHLEN];
FILE *f;
int wid = 0, cwid;
int e = 0;
if (strcmp("-", fn) == 0) {
f = stdin;
@ -376,7 +385,7 @@ unwrap(char *fn)
if ((f = fopen(fn, "r")) == NULL) {
(void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n",
progname, fn, strerror(errno));
exit(1);
return 1;
}
while (fgets(buf, sizeof(buf), f) != NULL) {
@ -391,19 +400,20 @@ unwrap(char *fn)
while (fgets(buf, sizeof(buf), f) != NULL) {
buf[strcspn(buf, "\n")] = '\0';
process(buf, wid);
e |= process(ms, buf, wid);
if(nobuffer)
(void)fflush(stdout);
}
(void)fclose(f);
return e;
}
/*
* Called for each input file on the command line (or in a list of files)
*/
private void
process(const char *inname, int wid)
private int
process(struct magic_set *ms, const char *inname, int wid)
{
const char *type;
int std_in = strcmp(inname, "-") == 0;
@ -418,11 +428,14 @@ process(const char *inname, int wid)
(int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
}
type = magic_file(magic, std_in ? NULL : inname);
if (type == NULL)
(void)printf("ERROR: %s\n", magic_error(magic));
else
type = magic_file(ms, std_in ? NULL : inname);
if (type == NULL) {
(void)printf("ERROR: %s\n", magic_error(ms));
return 1;
} else {
(void)printf("%s\n", type);
return 0;
}
}
size_t
@ -475,9 +488,9 @@ help(void)
"Determine type of FILEs.\n"
"\n", stderr);
#define OPT(shortname, longname, opt, doc) \
fprintf(stderr, " -%c, --" longname doc, shortname);
fprintf(stderr, " -%c, --" longname doc, shortname);
#define OPT_LONGONLY(longname, opt, doc) \
fprintf(stderr, " --" longname doc);
fprintf(stderr, " --" longname doc);
#include "file_opts.h"
#undef OPT
#undef OPT_LONGONLY

60
file.h
View File

@ -2,7 +2,7 @@
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -12,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@ -27,7 +27,7 @@
*/
/*
* file.h - definitions for file(1) program
* @(#)$File: file.h,v 1.108 2008/07/16 18:00:57 christos Exp $
* @(#)$File: file.h,v 1.118 2009/02/03 20:27:51 christos Exp $
*/
#ifndef __file_h__
@ -48,6 +48,7 @@
#endif
#include <regex.h>
#include <sys/types.h>
#include <sys/param.h>
/* Do this here and now, because struct stat gets re-defined on solaris */
#include <sys/stat.h>
#include <stdarg.h>
@ -103,8 +104,8 @@
#define MAXstring 32 /* max leng of "string" types */
#define MAGICNO 0xF11E041C
#define VERSIONNO 6
#define FILE_MAGICSIZE (32 * 6)
#define VERSIONNO 7
#define FILE_MAGICSIZE 200
#define FILE_LOAD 0
#define FILE_CHECK 1
@ -122,7 +123,7 @@ union VALUETYPE {
unsigned char us[MAXstring];
float f;
double d;
};
};
struct magic {
/* Word 1 */
@ -134,7 +135,7 @@ struct magic {
#define UNSIGNED 0x08 /* comparison is unsigned */
#define NOSPACE 0x10 /* suppress space character before output */
#define BINTEST 0x20 /* test is for a binary type (set only
for top-level tests) */
for top-level tests) */
#define TEXTTEST 0 /* for passing to file_softmagic */
uint8_t factor;
@ -183,7 +184,10 @@ struct magic {
#define FILE_DOUBLE 36
#define FILE_BEDOUBLE 37
#define FILE_LEDOUBLE 38
#define FILE_NAMES_SIZE 39/* size of array to contain all names */
#define FILE_BEID3 39
#define FILE_LEID3 40
#define FILE_INDIRECT 41
#define FILE_NAMES_SIZE 42/* size of array to contain all names */
#define IS_STRING(t) \
((t) == FILE_STRING || \
@ -209,7 +213,7 @@ struct magic {
#else
uint8_t dummy;
#endif
uint8_t factor_op;
uint8_t factor_op;
#define FILE_FACTOR_OP_PLUS '+'
#define FILE_FACTOR_OP_MINUS '-'
#define FILE_FACTOR_OP_TIMES '*'
@ -257,11 +261,13 @@ struct magic {
#define str_range _u._s._count
#define str_flags _u._s._flags
/* Words 9-16 */
union VALUETYPE value; /* either number or string */
/* Words 17..31 */
union VALUETYPE value; /* either number or string */
/* Words 17-24 */
char desc[MAXDESC]; /* description */
/* Words 32..47 */
/* Words 25-32 */
char mimetype[MAXDESC]; /* MIME type */
/* Words 33-34 */
char apple[8];
};
#define BIT(A) (1 << (A))
@ -302,7 +308,7 @@ struct level_info {
int last_match;
int last_cond; /* used for error checking by parse() */
#endif
} *li;
};
struct magic_set {
struct mlist *mlist;
struct cont {
@ -315,8 +321,9 @@ struct magic_set {
} o;
uint32_t offset;
int error;
int flags;
int haderr;
int flags; /* Control magic tests. */
int event_flags; /* Note things that happened. */
#define EVENT_HAD_ERR 0x01
const char *file;
size_t line; /* current magic line number */
@ -348,11 +355,19 @@ protected int file_printf(struct magic_set *, const char *, ...)
protected int file_reset(struct magic_set *);
protected int file_tryelf(struct magic_set *, int, const unsigned char *,
size_t);
protected int file_trycdf(struct magic_set *, int, const unsigned char *,
size_t);
protected int file_zmagic(struct magic_set *, int, const char *,
const unsigned char *, size_t);
protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
protected int file_ascmagic_with_encoding(struct magic_set *,
const unsigned char *, size_t, unichar *, size_t, const char *,
const char *);
protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
unichar **, size_t *, const char **, const char **, const char **);
protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
protected int file_softmagic(struct magic_set *, const unsigned char *, size_t, int);
protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
int);
protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
protected uint64_t file_signextend(struct magic_set *, struct magic *,
uint64_t);
@ -397,6 +412,13 @@ int vasprintf(char **, const char *, va_list);
int asprintf(char **ptr, const char *format_string, ...);
#endif
#ifndef HAVE_STRLCPY
size_t strlcpy(char *dst, const char *src, size_t siz);
#endif
#ifndef HAVE_STRLCAT
size_t strlcat(char *dst, const char *src, size_t siz);
#endif
#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
#define QUICK
#endif
@ -407,12 +429,14 @@ int asprintf(char **ptr, const char *format_string, ...);
#ifndef __cplusplus
#ifdef __GNUC__
static const char *rcsid(const char *) __attribute__((__used__));
#endif
#define FILE_RCSID(id) \
static const char rcsid[] __attribute__((__used__)) = id;
#else
#define FILE_RCSID(id) \
static const char *rcsid(const char *p) { \
return rcsid(p = id); \
}
#endif
#else
#define FILE_RCSID(id)
#endif

153
file.man
View File

@ -1,5 +1,5 @@
.\" $File: file.man,v 1.73 2008/02/19 17:58:00 rrt Exp $
.Dd February 19, 2008
.\" $File: file.man,v 1.79 2008/11/06 22:49:08 rrt Exp $
.Dd October 9, 2008
.Dt FILE __CSECTION__
.Os
.Sh NAME
@ -41,12 +41,12 @@ characters and is probably safe to read on an
terminal),
.Em executable
(the file contains the result of compiling a program
in a form understandable to some
in a form understandable to some
.Dv UNIX
kernel or another),
or
.Em data
meaning anything else (data is usually
meaning anything else (data is usually
.Sq binary
or non-printable).
Exceptions are well-known file formats (core files, tar archives)
@ -54,13 +54,13 @@ that are known to contain binary data.
When modifying magic files or the program itself, make sure to
.Em "preserve these keywords" .
Users depend on knowing that all the readable files in a directory
have the word
.Dq text
have the word
.Sq text
printed.
Don't do as Berkeley did and change
.Dq shell commands text
to
.Dq shell script .
Don't do as Berkeley did and change
.Sq shell commands text
to
.Sq shell script .
.Pp
The filesystem tests are based on examining the return from a
.Xr stat 2
@ -78,16 +78,16 @@ The magic tests are used to check for files with data in
particular fixed formats.
The canonical example of this is a binary executable (compiled program)
.Dv a.out
file, whose format is defined in
file, whose format is defined in
.In elf.h ,
.In a.out.h
and possibly
.In exec.h
in the standard include directory.
These files have a
These files have a
.Sq "magic number"
stored in a particular place
near the beginning of the file that tells the
near the beginning of the file that tells the
.Dv UNIX operating system
that the file is a binary executable, and which of several types thereof.
The concept of a
@ -116,11 +116,11 @@ ranges and sequences of bytes that constitute printable text
in each set.
If a file passes any of these tests, its character set is reported.
ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified
as
.Dq text
as
.Sq text
because they will be mostly readable on nearly any terminal;
UTF-16 and EBCDIC are only
.Dq character data
UTF-16 and EBCDIC are only
.Sq character data
because, while
they contain text, it is text that will require translation
before it can be read.
@ -144,19 +144,19 @@ For example, the keyword
.Em .br
indicates that the file is most likely a
.Xr troff 1
input file, just as the keyword
input file, just as the keyword
.Em struct
indicates a C program.
These tests are less reliable than the previous
two groups, so they are performed last.
The language test routines also test for some miscellany
(such as
(such as
.Xr tar 1
archives).
.Pp
Any file that cannot be identified as having been written
in any of the character sets listed above is simply said to be
.Dq data .
.Sq data .
.Sh OPTIONS
.Bl -tag -width indent
.It Fl b , -brief
@ -177,40 +177,41 @@ from the list of tests made to determine the file type. Valid test names
are:
.Bl -tag -width
.It apptype
Check for
.Dv EMX
application type (only on EMX).
.It ascii
Check for various types of ascii files.
.It text
Various types of text files (this test will try to guess the text encoding, irrespective of the setting of the
.Sq encoding
option).
.It encoding
Different text encodings for soft magic tests.
.It tokens
Looks for known tokens inside text files.
.It cdf
Prints details of Compound Document Files.
.It compress
Don't look for, or inside compressed files.
Checks for, and looks inside, compressed files.
.It elf
Don't print elf details.
.It fortran
Don't look for fortran sequences inside ascii files.
Prints ELF file details.
.It soft
Don't consult magic files.
Consults magic files.
.It tar
Don't examine tar files.
.It token
Don't look for known tokens inside ascii files.
.It troff
Don't look for troff sequences inside ascii files.
Examines tar files.
.El
.It Fl f , -files-from Ar namefile
Read the names of the files to be examined from
Read the names of the files to be examined from
.Ar namefile
(one per line)
(one per line)
before the argument list.
Either
Either
.Ar namefile
or at least one filename argument must be present;
to test the standard input, use
to test the standard input, use
.Sq -
as a filename argument.
.It Fl F , -separator Ar separator
Use the specified string as the separator between the filename and the
file result returned. Defaults to
file result returned. Defaults to
.Sq \&: .
.It Fl h , -no-dereference
option causes symlinks not to be followed
@ -221,17 +222,15 @@ is not defined.
.It Fl i , -mime
Causes the file command to output mime type strings rather than the more
traditional human readable ones. Thus it may say
.Dq text/plain charset=us-ascii
.Sq text/plain; charset=us-ascii
rather than
.Dq ASCII text .
.Sq ASCII text .
In order for this option to work, file changes the way
it handles files recognized by the command itself (such as many of the
text file types, directories etc), and makes use of an alternative
.Dq magic
.Sq magic
file.
(See
.Dq FILES
section, below).
(See the FILES section, below).
.It Fl -mime-type , -mime-encoding
Like
.Fl i ,
@ -239,10 +238,10 @@ but print only the specified element(s).
.It Fl k , -keep-going
Don't stop at the first match, keep going. Subsequent matches will be
have the string
.Dq "\[rs]012\- "
.Sq "\[rs]012\- "
prepended.
(If you want a newline, see the
.Dq "\-r"
.Sq "\-r"
option.)
.It Fl L , -dereference
option causes symlinks to be followed, as the like-named option in
@ -324,7 +323,7 @@ will not attempt to open
.Pa $HOME/.magic .
.Nm
adds
.Dq .mgc
.Sq .mgc
to the value of this variable as appropriate.
The environment variable
.Dv POSIXLY_CORRECT
@ -347,47 +346,47 @@ options.
.Sh STANDARDS CONFORMANCE
This program is believed to exceed the System V Interface Definition
of FILE(CMD), as near as one can determine from the vague language
contained therein.
contained therein.
Its behavior is mostly compatible with the System V program of the same name.
This version knows more magic, however, so it will produce
different (albeit more accurate) output in many cases.
different (albeit more accurate) output in many cases.
.\" URL: http://www.opengroup.org/onlinepubs/009695399/utilities/file.html
.Pp
The one significant difference
The one significant difference
between this version and System V
is that this version treats any white space
as a delimiter, so that spaces in pattern strings must be escaped.
For example,
.Bd -literal -offset indent
.Bd -literal -offset indent
>10 string language impress\ (imPRESS data)
.Ed
.Pp
in an existing magic file would have to be changed to
.Bd -literal -offset indent
.Bd -literal -offset indent
>10 string language\e impress (imPRESS data)
.Ed
.Pp
In addition, in this version, if a pattern string contains a backslash,
it must be escaped.
For example
.Bd -literal -offset indent
.Bd -literal -offset indent
0 string \ebegindata Andrew Toolkit document
.Ed
.Pp
in an existing magic file would have to be changed to
.Bd -literal -offset indent
.Bd -literal -offset indent
0 string \e\ebegindata Andrew Toolkit document
.Ed
.Pp
SunOS releases 3.2 and later from Sun Microsystems include a
.Nm
.Nm
command derived from the System V one, but with some extensions.
My version differs from Sun's only in minor ways.
It includes the extension of the
It includes the extension of the
.Sq &
operator, used as,
for example,
.Bd -literal -offset indent
.Bd -literal -offset indent
>16 long&0x7fffffff >0 not stripped
.Ed
.Sh MAGIC DIRECTORY
@ -395,7 +394,7 @@ The magic file entries have been collected from various sources,
mainly USENET, and contributed by various authors.
Christos Zoulas (address below) will collect additional
or corrected magic file entries.
A consolidation of magic file entries
A consolidation of magic file entries
will be distributed periodically.
.Pp
The order of entries in the magic file is significant.
@ -405,14 +404,14 @@ If your old
.Nm
command uses a magic file,
keep the old magic file around for comparison purposes
(rename it to
(rename it to
.Pa __MAGIC__.orig ).
.Sh EXAMPLES
.Bd -literal -offset indent
.Bd -literal -offset indent
$ file file.c file /dev/{wd0a,hda}
file.c: C program text
file: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV),
dynamically linked (uses shared libs), stripped
dynamically linked (uses shared libs), stripped
/dev/wd0a: block special (0/0)
/dev/hda: block special (3/0)
@ -441,9 +440,9 @@ file: application/x-executable
.Ed
.Sh HISTORY
There has been a
.Nm
command in every
There has been a
.Nm
command in every
.Dv UNIX since at least Research Version 4
(man page dated November, 1973).
The System V version introduced one significant major change:
@ -466,7 +465,7 @@ Primary development and maintenance from 1990 to the present by
Christos Zoulas (christos@astron.com).
.Pp
Altered by Chris Lowth, chris@lowth.com, 2000:
Handle the
Handle the
.Fl i
option to output mime type strings, using an alternative
magic file and internal logic.
@ -480,7 +479,7 @@ support and merge MIME and non-MIME magic, support directories as well
as files of magic, apply many bug fixes and improve the build system.
.Pp
The list of contributors to the
.Dq magic
.Sq magic
directory (magic files)
is too long to include here.
You know who you are; thank you.
@ -512,10 +511,10 @@ files.
The support for text files (primarily for programming languages)
is simplistic, inefficient and requires recompilation to update.
.Pp
The list of keywords in
The list of keywords in
.Dv ascmagic
probably belongs in the Magic file.
This could be done by using some keyword like
This could be done by using some keyword like
.Sq *
for the offset value.
.Pp
@ -523,20 +522,20 @@ Complain about conflicts in the magic file entries.
Make a rule that the magic entries sort based on file offset rather
than position within the magic file?
.Pp
The program should provide a way to give an estimate
of
.Dq how good
The program should provide a way to give an estimate
of
.Sq how good
a guess is.
We end up removing guesses (e.g.
.Dq From\
We end up removing guesses (e.g.
.Sq From\
as first 5 chars of file) because
they are not as good as other guesses (e.g.
.Dq Newsgroups:
they are not as good as other guesses (e.g.
.Sq Newsgroups:
versus
.Dq Return-Path:
.Sq Return-Path:
).
Still, if the others don't pan out, it should be possible to use the
first guess.
first guess.
.Pp
This manual page, and particularly this section, is too long.
.Sh RETURN CODE

View File

@ -28,6 +28,7 @@ OPT('f', "files-from", 1, " FILE read the filenames to be examined from FIL
OPT('F', "separator", 1, " STRING use string as separator instead of `:'\n")
OPT('i', "mime", 0, " output MIME type strings (--mime-type and\n"
" --mime-encoding)\n")
OPT_LONGONLY("apple", 0, " output the Apple CREATOR/TYPE\n")
OPT_LONGONLY("mime-type", 0, " output the MIME type\n")
OPT_LONGONLY("mime-encoding", 0, " output the MIME encoding\n")
OPT('k', "keep-going", 0, " don't stop at the first match\n")

127
fsmagic.c
View File

@ -30,13 +30,17 @@
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: fsmagic.c,v 1.59 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
#include "magic.h"
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdlib.h>
#include <sys/stat.h>
/* Since major is a function on SVR4, we cannot use `ifndef major'. */
#ifdef MAJOR_IN_MKDEV
# include <sys/mkdev.h>
@ -56,10 +60,6 @@
#endif
#undef HAVE_MAJOR
#ifndef lint
FILE_RCSID("@(#)$File: fsmagic.c,v 1.52 2008/07/25 23:59:01 rrt Exp $")
#endif /* lint */
private int
bad_link(struct magic_set *ms, int err, char *buf)
{
@ -84,6 +84,21 @@ bad_link(struct magic_set *ms, int err, char *buf)
return 1;
}
private int
handle_mime(struct magic_set *ms, int mime, const char *str)
{
if ((mime & MAGIC_MIME_TYPE)) {
if (file_printf(ms, "application/%s", str) == -1)
return -1;
if ((mime & MAGIC_MIME_ENCODING) && file_printf(ms,
"; charset=") == -1)
return -1;
}
if ((mime & MAGIC_MIME_ENCODING) && file_printf(ms, "binary") == -1)
return -1;
return 0;
}
protected int
file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
{
@ -95,6 +110,8 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
struct stat tstatbuf;
#endif
if (ms->flags & MAGIC_APPLE)
return 0;
if (fn == NULL)
return 0;
@ -140,11 +157,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
switch (sb->st_mode & S_IFMT) {
case S_IFDIR:
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-directory")
== -1)
return -1;
if (!mime && file_printf(ms, "directory") == -1)
if (mime) {
if (handle_mime(ms, mime, "x-directory") == -1)
return -1;
} else if (file_printf(ms, "directory") == -1)
return -1;
return 1;
#ifdef S_IFCHR
@ -156,20 +172,20 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
*/
if ((ms->flags & MAGIC_DEVICES) != 0)
break;
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-character-device")
== -1)
return -1;
if (!mime) {
if (mime) {
if (handle_mime(ms, mime, "x-character-device") == -1)
return -1;
} else {
#ifdef HAVE_STAT_ST_RDEV
# ifdef dv_unit
if (file_printf(ms, "character special (%d/%d/%d)",
major(sb->st_rdev), dv_unit(sb->st_rdev),
major(sb->st_rdev), dv_unit(sb->st_rdev),
dv_subunit(sb->st_rdev)) == -1)
return -1;
# else
if (file_printf(ms, "character special (%ld/%ld)",
(long) major(sb->st_rdev), (long) minor(sb->st_rdev)) == -1)
(long)major(sb->st_rdev), (long)minor(sb->st_rdev))
== -1)
return -1;
# endif
#else
@ -188,11 +204,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
*/
if ((ms->flags & MAGIC_DEVICES) != 0)
break;
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-block-device")
== -1)
return -1;
if (!mime) {
if (mime) {
if (handle_mime(ms, mime, "x-block-device") == -1)
return -1;
} else {
#ifdef HAVE_STAT_ST_RDEV
# ifdef dv_unit
if (file_printf(ms, "block special (%d/%d/%d)",
@ -216,21 +231,19 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
case S_IFIFO:
if((ms->flags & MAGIC_DEVICES) != 0)
break;
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-fifo")
== -1)
return -1;
if (!mime && file_printf(ms, "fifo (named pipe)") == -1)
if (mime) {
if (handle_mime(ms, mime, "x-fifo") == -1)
return -1;
} else if (file_printf(ms, "fifo (named pipe)") == -1)
return -1;
return 1;
#endif
#ifdef S_IFDOOR
case S_IFDOOR:
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-door")
== -1)
return -1;
if (!mime && file_printf(ms, "door") == -1)
if (mime) {
if (handle_mime(ms, mime, "x-door") == -1)
return -1;
} else if (file_printf(ms, "door") == -1)
return -1;
return 1;
#endif
@ -242,11 +255,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
fn);
return -1;
}
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-symlink")
== -1)
return -1;
if (!mime && file_printf(ms,
if (mime) {
if (handle_mime(ms, mime, "x-symlink") == -1)
return -1;
} else if (file_printf(ms,
"unreadable symlink `%s' (%s)", fn,
strerror(errno)) == -1)
return -1;
@ -271,18 +283,20 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
"path too long: `%s'", buf);
return -1;
}
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-path-too-long")
== -1)
return -1;
if (!mime && file_printf(ms,
if (mime) {
if (handle_mime(ms, mime,
"x-path-too-long") == -1)
return -1;
} else if (file_printf(ms,
"path too long: `%s'", fn) == -1)
return -1;
return 1;
}
(void)strcpy(buf2, fn); /* take dir part */
/* take dir part */
(void)strlcpy(buf2, fn, sizeof buf2);
buf2[tmp - fn + 1] = '\0';
(void)strcat(buf2, buf); /* plus (rel) link */
/* plus (rel) link */
(void)strlcat(buf2, buf, sizeof buf2);
tmp = buf2;
}
if (stat(tmp, &tstatbuf) < 0)
@ -297,11 +311,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
ms->flags |= MAGIC_SYMLINK;
return p != NULL ? 1 : -1;
} else { /* just print what it points to */
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-symlink")
== -1)
return -1;
if (!mime && file_printf(ms, "symbolic link to `%s'",
if (mime) {
if (handle_mime(ms, mime, "x-symlink") == -1)
return -1;
} else if (file_printf(ms, "symbolic link to `%s'",
buf) == -1)
return -1;
}
@ -310,11 +323,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
#ifdef S_IFSOCK
#ifndef __COHERENT__
case S_IFSOCK:
if ((mime & MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-socket")
== -1)
return -1;
if (!mime && file_printf(ms, "socket") == -1)
if (mime) {
if (handle_mime(ms, mime, "x-socket") == -1)
return -1;
} else if (file_printf(ms, "socket") == -1)
return -1;
return 1;
#endif
@ -340,9 +352,10 @@ file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
* when we read the file.)
*/
if ((ms->flags & MAGIC_DEVICES) == 0 && sb->st_size == 0) {
if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
file_printf(ms, mime ? "application/x-empty" :
"empty") == -1)
if (mime) {
if (handle_mime(ms, mime, "x-empty") == -1)
return -1;
} else if (file_printf(ms, "empty") == -1)
return -1;
return 1;
}

161
funcs.c
View File

@ -1,7 +1,7 @@
/*
* Copyright (c) Christos Zoulas 2003.
* All Rights Reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -11,7 +11,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@ -25,6 +25,11 @@
* SUCH DAMAGE.
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: funcs.c,v 1.51 2008/11/07 18:57:28 christos Exp $")
#endif /* lint */
#include "magic.h"
#include <stdarg.h>
#include <stdlib.h>
@ -40,10 +45,6 @@
#include <limits.h>
#endif
#ifndef lint
FILE_RCSID("@(#)$File: funcs.c,v 1.44 2008/07/16 18:00:57 christos Exp $")
#endif /* lint */
#ifndef SIZE_MAX
#define SIZE_MAX ((size_t)~0)
#endif
@ -97,17 +98,17 @@ file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
uint32_t lineno)
{
/* Only the first error is ok */
if (ms->haderr)
if (ms->event_flags & EVENT_HAD_ERR)
return;
if (lineno != 0) {
free(ms->o.buf);
ms->o.buf = NULL;
file_printf(ms, "line %u: ", lineno);
}
file_vprintf(ms, f, va);
file_vprintf(ms, f, va);
if (error > 0)
file_printf(ms, " (%s)", strerror(error));
ms->haderr++;
ms->event_flags |= EVENT_HAD_ERR;
ms->error = error;
}
@ -157,9 +158,16 @@ protected int
file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
size_t nb)
{
int m;
int m = 0, rv = 0, looks_text = 0;
int mime = ms->flags & MAGIC_MIME;
const unsigned char *ubuf = CAST(const unsigned char *, buf);
unichar *u8buf = NULL;
size_t ulen;
const char *code = NULL;
const char *code_mime = "binary";
const char *type = NULL;
if (nb == 0) {
if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
@ -175,6 +183,11 @@ file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
return 1;
}
if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen,
&code, &code_mime, &type);
}
#ifdef __EMX__
if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
switch (file_os2_apptype(ms, inname, buf, nb)) {
@ -189,41 +202,96 @@ file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
#endif
/* try compression stuff */
if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) != 0 ||
(m = file_zmagic(ms, fd, inname, ubuf, nb)) == 0) {
/* Check if we have a tar file */
if ((ms->flags & MAGIC_NO_CHECK_TAR) != 0 ||
(m = file_is_tar(ms, ubuf, nb)) == 0) {
/* try tests in /etc/magic (or surrogate magic file) */
if ((ms->flags & MAGIC_NO_CHECK_SOFT) != 0 ||
(m = file_softmagic(ms, ubuf, nb, BINTEST)) == 0) {
/* try known keywords, check whether it is ASCII */
if ((ms->flags & MAGIC_NO_CHECK_ASCII) != 0 ||
(m = file_ascmagic(ms, ubuf, nb)) == 0) {
/* abandon hope, all ye who remain here */
if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
file_printf(ms, mime ? "application/octet-stream" :
"data") == -1)
return -1;
m = 1;
}
if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0)
if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) {
if ((ms->flags & MAGIC_DEBUG) != 0)
(void)fprintf(stderr, "zmagic %d\n", m);
goto done;
}
}
}
/* Check if we have a tar file */
if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0)
if ((m = file_is_tar(ms, ubuf, nb)) != 0) {
if ((ms->flags & MAGIC_DEBUG) != 0)
(void)fprintf(stderr, "tar %d\n", m);
goto done;
}
/* Check if we have a CDF file */
if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0)
if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) {
if ((ms->flags & MAGIC_DEBUG) != 0)
(void)fprintf(stderr, "cdf %d\n", m);
goto done;
}
/* try soft magic tests */
if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0)
if ((m = file_softmagic(ms, ubuf, nb, BINTEST)) != 0) {
if ((ms->flags & MAGIC_DEBUG) != 0)
(void)fprintf(stderr, "softmagic %d\n", m);
#ifdef BUILTIN_ELF
if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 &&
nb > 5 && fd != -1) {
/*
* We matched something in the file, so this *might*
* be an ELF file, and the file is at least 5 bytes
* long, so if it's an ELF file it has at least one
* byte past the ELF magic number - try extracting
* information from the ELF headers that cannot easily
* be extracted with rules in the magic file.
*/
(void)file_tryelf(ms, fd, ubuf, nb);
}
if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 &&
nb > 5 && fd != -1) {
/*
* We matched something in the file, so this
* *might* be an ELF file, and the file is at
* least 5 bytes long, so if it's an ELF file
* it has at least one byte past the ELF magic
* number - try extracting information from the
* ELF headers that cannot easily * be
* extracted with rules in the magic file.
*/
if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0)
if ((ms->flags & MAGIC_DEBUG) != 0)
(void)fprintf(stderr,
"elf %d\n", m);
}
#endif
goto done;
}
/* try text properties (and possibly text tokens) */
if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) {
if ((m = file_ascmagic(ms, ubuf, nb)) != 0) {
if ((ms->flags & MAGIC_DEBUG) != 0)
(void)fprintf(stderr, "ascmagic %d\n", m);
goto done;
}
/* try to discover text encoding */
if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
if (looks_text == 0)
if ((m = file_ascmagic_with_encoding( ms, ubuf,
nb, u8buf, ulen, code, type)) != 0) {
if ((ms->flags & MAGIC_DEBUG) != 0)
(void)fprintf(stderr,
"ascmagic/enc %d\n", m);
goto done;
}
}
}
/* give up */
m = 1;
if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
file_printf(ms, mime ? "application/octet-stream" : "data") == -1) {
rv = -1;
}
done:
if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
if (ms->flags & MAGIC_MIME_TYPE)
if (file_printf(ms, "; charset=") == -1)
rv = -1;
if (file_printf(ms, "%s", code_mime) == -1)
rv = -1;
}
if (u8buf)
free(u8buf);
if (rv)
return rv;
return m;
}
#endif
@ -236,7 +304,7 @@ file_reset(struct magic_set *ms)
return -1;
}
ms->o.buf = NULL;
ms->haderr = 0;
ms->event_flags &= ~EVENT_HAD_ERR;
ms->error = -1;
return 0;
}
@ -255,12 +323,15 @@ file_getbuffer(struct magic_set *ms)
char *pbuf, *op, *np;
size_t psize, len;
if (ms->haderr)
if (ms->event_flags & EVENT_HAD_ERR)
return NULL;
if (ms->flags & MAGIC_RAW)
return ms->o.buf;
if (ms->o.buf == NULL)
return NULL;
/* * 4 is for octal representation, + 1 is for NUL */
len = strlen(ms->o.buf);
if (len > (SIZE_MAX - 1) / 4) {
@ -315,7 +386,7 @@ file_getbuffer(struct magic_set *ms)
for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) {
if (isprint((unsigned char)*op)) {
*np++ = *op;
*np++ = *op;
} else {
OCTALIFY(np, op);
}

View File

@ -29,14 +29,16 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: getopt_long.c,v 1.5 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
#include <assert.h>
#ifdef HAVE_ERR_H
#include <err.h>
#else
#include <stdio.h>
#define warnx printf
#endif
#include <errno.h>

View File

@ -38,16 +38,16 @@
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: is_tar.c,v 1.36 2009/02/03 20:27:51 christos Exp $")
#endif
#include "magic.h"
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include "tar.h"
#ifndef lint
FILE_RCSID("@(#)$File: is_tar.c,v 1.31 2008/02/04 20:51:17 christos Exp $")
#endif
#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
private int is_tar(const unsigned char *, size_t);
@ -66,16 +66,17 @@ file_is_tar(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
* Do the tar test first, because if the first file in the tar
* archive starts with a dot, we can confuse it with an nroff file.
*/
int tar = is_tar(buf, nbytes);
int tar;
int mime = ms->flags & MAGIC_MIME;
if ((ms->flags & MAGIC_APPLE) != 0)
return 0;
tar = is_tar(buf, nbytes);
if (tar < 1 || tar > 3)
return 0;
if (mime == MAGIC_MIME_ENCODING)
return 0;
if (file_printf(ms, mime ? "application/x-tar" :
if (file_printf(ms, "%s", mime ? "application/x-tar" :
tartype[tar - 1]) == -1)
return -1;
return 1;

View File

@ -1,4 +1,4 @@
.\" $File: libmagic.man,v 1.18 2008/02/28 22:24:46 rrt Exp $
.\" $File: libmagic.man,v 1.19 2008/10/06 20:16:04 christos Exp $
.\"
.\" Copyright (c) Christos Zoulas 2003.
.\" All Rights Reserved.
@ -25,7 +25,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.Dd November 15, 2006
.Dd October 6, 2008
.Dt MAGIC 3
.Os
.Sh NAME
@ -200,11 +200,8 @@ before any magic queries can performed.
.Pp
The default database file is named by the MAGIC environment variable. If
that variable is not set, the default database file name is __MAGIC__.
.Pp
.Fn magic_load
adds
.Dq .mime
and/or
.Dq .mgc
to the database filename as appropriate.
.Sh RETURN VALUES
@ -237,11 +234,7 @@ when
.Dv MAGIC_PRESERVE_ATIME
is set.
.Sh FILES
.Bl -tag -width __MAGIC__.mime.mgc -compact
.It Pa __MAGIC__.mime
The non-compiled default magic mime database.
.It Pa __MAGIC__.mime.mgc
The compiled default magic mime database.
.Bl -tag -width __MAGIC__.mgc -compact
.It Pa __MAGIC__
The non-compiled default magic database.
.It Pa __MAGIC__.mgc

26
magic.c
View File

@ -26,15 +26,16 @@
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: magic.c,v 1.59 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
#include "magic.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/param.h> /* for MAXPATHLEN */
#include <sys/stat.h>
#ifdef QUICK
#include <sys/mman.h>
#endif
@ -64,10 +65,6 @@
#include "patchlevel.h"
#ifndef lint
FILE_RCSID("@(#)$File: magic.c,v 1.54 2008/07/25 23:30:32 rrt Exp $")
#endif /* lint */
#ifndef PIPE_BUF
/* Get the PIPE_BUF from pathconf */
#ifdef _PC_PIPE_BUF
@ -116,7 +113,7 @@ magic_open(int flags)
if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
goto free;
ms->haderr = 0;
ms->event_flags = 0;
ms->error = -1;
ms->mlist = NULL;
ms->file = "unknown";
@ -229,7 +226,7 @@ close_and_restore(const struct magic_set *ms, const char *name, int fd,
#elif defined(HAVE_UTIME_H) || defined(HAVE_SYS_UTIME_H)
struct utimbuf utbuf;
(void)memset(utbuf, 0, sizeof(utbuf));
(void)memset(&utbuf, 0, sizeof(utbuf));
utbuf.actime = sb->st_atime;
utbuf.modtime = sb->st_mtime;
(void) utime(name, &utbuf); /* don't care if loses */
@ -302,8 +299,9 @@ file_or_fd(struct magic_set *ms, const char *inname, int fd)
if ((fd = open(inname, flags)) < 0) {
#ifdef __CYGWIN__
/* FIXME: Do this with EXEEXT from autotools */
char *tmp = alloca(strlen(inname) + 5);
(void)strcat(strcpy(tmp, inname), ".exe");
size_t len = strlen(inname) + 5;
char *tmp = alloca(len);
(void)strlcat(strlcpy(tmp, inname, len), ".exe", len);
if ((fd = open(tmp, flags)) < 0) {
#endif
if (unreadable_info(ms, sb.st_mode,
@ -385,13 +383,13 @@ magic_buffer(struct magic_set *ms, const void *buf, size_t nb)
public const char *
magic_error(struct magic_set *ms)
{
return ms->haderr ? ms->o.buf : NULL;
return (ms->event_flags & EVENT_HAD_ERR) ? ms->o.buf : NULL;
}
public int
magic_errno(struct magic_set *ms)
{
return ms->haderr ? ms->error : 0;
return (ms->event_flags & EVENT_HAD_ERR) ? ms->error : 0;
}
public int

20
magic.h
View File

@ -1,7 +1,7 @@
/*
* Copyright (c) Christos Zoulas 2003.
* All Rights Reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -11,7 +11,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@ -34,21 +34,27 @@
#define MAGIC_SYMLINK 0x000002 /* Follow symlinks */
#define MAGIC_COMPRESS 0x000004 /* Check inside compressed files */
#define MAGIC_DEVICES 0x000008 /* Look at the contents of devices */
#define MAGIC_MIME_TYPE 0x000010 /* Return only the MIME type */
#define MAGIC_MIME_TYPE 0x000010 /* Return the MIME type */
#define MAGIC_CONTINUE 0x000020 /* Return all matches */
#define MAGIC_CHECK 0x000040 /* Print warnings to stderr */
#define MAGIC_PRESERVE_ATIME 0x000080 /* Restore access time on exit */
#define MAGIC_RAW 0x000100 /* Don't translate unprint chars */
#define MAGIC_RAW 0x000100 /* Don't translate unprintable chars */
#define MAGIC_ERROR 0x000200 /* Handle ENOENT etc as real errors */
#define MAGIC_MIME_ENCODING 0x000400 /* Return only the MIME encoding */
#define MAGIC_MIME_ENCODING 0x000400 /* Return the MIME encoding */
#define MAGIC_MIME (MAGIC_MIME_TYPE|MAGIC_MIME_ENCODING)
#define MAGIC_APPLE 0x000800 /* Return the Apple creator and type */
#define MAGIC_NO_CHECK_COMPRESS 0x001000 /* Don't check for compressed files */
#define MAGIC_NO_CHECK_TAR 0x002000 /* Don't check for tar files */
#define MAGIC_NO_CHECK_SOFT 0x004000 /* Don't check magic entries */
#define MAGIC_NO_CHECK_APPTYPE 0x008000 /* Don't check application type */
#define MAGIC_NO_CHECK_ELF 0x010000 /* Don't check for elf details */
#define MAGIC_NO_CHECK_ASCII 0x020000 /* Don't check for ascii files */
#define MAGIC_NO_CHECK_TOKENS 0x100000 /* Don't check ascii/tokens */
#define MAGIC_NO_CHECK_TEXT 0x020000 /* Don't check for text files */
#define MAGIC_NO_CHECK_CDF 0x040000 /* Don't check for cdf files */
#define MAGIC_NO_CHECK_TOKENS 0x100000 /* Don't check tokens */
#define MAGIC_NO_CHECK_ENCODING 0x200000 /* Don't check text encodings */
/* Defined for backwards compatibility (renamed) */
#define MAGIC_NO_CHECK_ASCII MAGIC_NO_CHECK_TEXT
/* Defined for backwards compatibility; do nothing */
#define MAGIC_NO_CHECK_FORTRAN 0x000000 /* Don't check ascii/fortran */

View File

@ -1,4 +1,4 @@
.\" $File: magic.man,v 1.57 2008/08/30 09:50:20 christos Exp $
.\" $File: magic.man,v 1.59 2008/11/06 23:22:53 christos Exp $
.Dd August 30, 2008
.Dt MAGIC __FSECTION__
.Os
@ -84,6 +84,8 @@ local time rather than UTC.
.It Dv qldate
An eight-byte value interpreted as a UNIX-style date, but interpreted as
local time rather than UTC.
.It Dv beid3
A 32-bit ID3 length in big-endian byte order.
.It Dv beshort
A two-byte value in big-endian byte order.
.It Dv belong
@ -110,6 +112,8 @@ interpreted as a UNIX-style date, but interpreted as local time rather
than UTC.
.It Dv bestring16
A two-byte unicode (UCS16) string in big-endian byte order.
.It Dv leid3
A 32-bit ID3 length in little-endian byte order.
.It Dv leshort
A two-byte value in little-endian byte order.
.It Dv lelong
@ -145,6 +149,8 @@ interpreted as a UNIX date.
A four-byte value in middle-endian (PDP-11) byte order,
interpreted as a UNIX-style date, but interpreted as local time rather
than UTC.
.It Dv indirect
Starting at the given offset, consult the magic database again.
.It Dv regex
A regular expression match in extended POSIX regular expression syntax
(like egrep). Regular expressions can take exponential time to
@ -290,6 +296,11 @@ added before it: multiple matches are normally separated by a single
space.
.El
.Pp
An APPLE 4+4 character APPLE creator and type can be specified as:
.Bd -literal -offset indent
!:apple CREATYPE
.Ed
.Pp
A MIME type is given on a separate line, which must be the next
non-blank or comment line after the magic line that identifies the
file type, and has the following format:
@ -361,12 +372,12 @@ the file.
The value at that offset is read, and is used again as an offset
in the file.
Indirect offsets are of the form:
.Em (( x [.[bslBSL]][+\-][ y ]) .
.Em (( x [.[bislBISL]][+\-][ y ]) .
The value of
.Em x
is used as an offset in the file.
A byte, short or long is read at that offset depending on the
.Em [bslBSLm]
A byte, id3 length, short or long is read at that offset depending on the
.Em [bislBISLm]
type specifier.
The capitalized types interpret the number as a big endian
value, whereas the small letter versions interpret the number as a little

View File

@ -1,11 +1,14 @@
#define FILE_VERSION_MAJOR 4
#define patchlevel 26
#define FILE_VERSION_MAJOR 5
#define patchlevel 0
/*
* Patchlevel file for Ian Darwin's MAGIC command.
* $File: patchlevel.h,v 1.70 2008/08/30 10:01:01 christos Exp $
* $File: patchlevel.h,v 1.71 2009/01/21 19:09:42 christos Exp $
*
* $Log: patchlevel.h,v $
* Revision 1.71 2009/01/21 19:09:42 christos
* file 5.0
*
* Revision 1.70 2008/08/30 10:01:01 christos
* file 4.26
*

16
print.c
View File

@ -30,8 +30,11 @@
*/
#include "file.h"
#include <stdio.h>
#include <errno.h>
#ifndef lint
FILE_RCSID("@(#)$File: print.c,v 1.66 2009/02/03 20:27:51 christos Exp $")
#endif /* lint */
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
@ -40,10 +43,6 @@
#endif
#include <time.h>
#ifndef lint
FILE_RCSID("@(#)$File: print.c,v 1.63 2008/02/17 19:28:54 rrt Exp $")
#endif /* lint */
#define SZOF(a) (sizeof(a) / sizeof(a[0]))
#ifndef COMPILE_ONLY
@ -64,7 +63,8 @@ file_mdump(struct magic *m)
if (m->in_op & FILE_OPINVERSE)
(void) fputc('~', stderr);
(void) fprintf(stderr, "%c%u),",
((m->in_op & FILE_OPS_MASK) < SZOF(optyp)) ?
((size_t)(m->in_op & FILE_OPS_MASK) <
SZOF(optyp)) ?
optyp[m->in_op & FILE_OPS_MASK] : '?',
m->in_offset);
}
@ -93,7 +93,7 @@ file_mdump(struct magic *m)
(void) fprintf(stderr, "/%u", m->str_range);
}
else {
if ((m->mask_op & FILE_OPS_MASK) < SZOF(optyp))
if ((size_t)(m->mask_op & FILE_OPS_MASK) < SZOF(optyp))
(void) fputc(optyp[m->mask_op & FILE_OPS_MASK], stderr);
else
(void) fputc('?', stderr);

View File

@ -26,6 +26,10 @@
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: readelf.c,v 1.81 2008/11/04 16:38:28 christos Exp $")
#endif
#ifdef BUILTIN_ELF
#include <string.h>
#include <ctype.h>
@ -37,10 +41,6 @@
#include "readelf.h"
#include "magic.h"
#ifndef lint
FILE_RCSID("@(#)$File: readelf.c,v 1.76 2008/07/16 18:00:57 christos Exp $")
#endif
#ifdef ELFCORE
private int dophn_core(struct magic_set *, int, int, int, off_t, int, size_t,
off_t, int *);
@ -875,7 +875,7 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
noff = 0;
for (;;) {
if (noff >= (size_t)xsh_size)
if (noff >= (off_t)xsh_size)
break;
noff = donote(ms, nbuf, (size_t)noff,
(size_t)xsh_size, clazz, swap, 4,
@ -907,8 +907,9 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
for (;;) {
Elf32_Cap cap32;
Elf64_Cap cap64;
char cbuf[MAX(sizeof cap32, sizeof cap64)];
if ((coff += xcap_sizeof) >= (size_t)xsh_size)
char cbuf[/*CONSTCOND*/
MAX(sizeof cap32, sizeof cap64)];
if ((coff += xcap_sizeof) >= (off_t)xsh_size)
break;
if (read(fd, cbuf, (size_t)xcap_sizeof) !=
(ssize_t)xcap_sizeof) {
@ -929,7 +930,8 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
if (file_printf(ms,
", with unknown capability "
"0x%llx = 0x%llx",
xcap_tag, xcap_val) == -1)
(unsigned long long)xcap_tag,
(unsigned long long)xcap_val) == -1)
return -1;
break;
}
@ -976,11 +978,12 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
if (cap_hw1)
if (file_printf(ms,
" unknown hardware capability 0x%llx",
cap_hw1) == -1)
(unsigned long long)cap_hw1) == -1)
return -1;
} else {
if (file_printf(ms,
" hardware capability 0x%llx", cap_hw1) == -1)
" hardware capability 0x%llx",
(unsigned long long)cap_hw1) == -1)
return -1;
}
}
@ -996,7 +999,7 @@ doshn(struct magic_set *ms, int clazz, int swap, int fd, off_t off, int num,
if (cap_sf1)
if (file_printf(ms,
", with unknown software capability 0x%llx",
cap_sf1) == -1)
(unsigned long long)cap_sf1) == -1)
return -1;
}
return 0;
@ -1138,7 +1141,7 @@ file_tryelf(struct magic_set *ms, int fd, const unsigned char *buf,
Elf64_Ehdr elf64hdr;
uint16_t type;
if (ms->flags & MAGIC_MIME)
if (ms->flags & (MAGIC_MIME|MAGIC_APPLE))
return 0;
/*
* ELF executables have multiple section headers in arbitrary

View File

@ -2,7 +2,7 @@
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -12,7 +12,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@ -30,6 +30,11 @@
*/
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: softmagic.c,v 1.133 2008/11/07 22:50:37 christos Exp $")
#endif /* lint */
#include "magic.h"
#include <string.h>
#include <ctype.h>
@ -37,32 +42,24 @@
#include <time.h>
#ifndef lint
FILE_RCSID("@(#)$File: softmagic.c,v 1.120 2008/07/28 17:25:21 christos Exp $")
#endif /* lint */
private int match(struct magic_set *, struct magic *, uint32_t,
const unsigned char *, size_t, int);
private int mget(struct magic_set *, const unsigned char *,
struct magic *, size_t, unsigned int);
private int magiccheck(struct magic_set *, struct magic *);
private int32_t mprint(struct magic_set *, struct magic *);
private int32_t moffset(struct magic_set *, struct magic *);
private void mdebug(uint32_t, const char *, size_t);
private int mcopy(struct magic_set *, union VALUETYPE *, int, int,
const unsigned char *, uint32_t, size_t, size_t);
private int mconvert(struct magic_set *, struct magic *);
private int print_sep(struct magic_set *, int);
private int handle_annotation(struct magic_set *, struct magic *);
private void cvt_8(union VALUETYPE *, const struct magic *);
private void cvt_16(union VALUETYPE *, const struct magic *);
private void cvt_32(union VALUETYPE *, const struct magic *);
private void cvt_64(union VALUETYPE *, const struct magic *);
/*
* Macro to give description string according to whether we want plain
* text or MIME type
*/
#define MAGIC_DESC ((ms->flags & MAGIC_MIME) ? m->mimetype : m->desc)
/*
* softmagic - lookup one file in parsed, in-memory copy of database
* Passed the name and FILE * of one file to be typed.
@ -114,15 +111,16 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
uint32_t magindex = 0;
unsigned int cont_level = 0;
int need_separator = 0;
int returnval = 0; /* if a match is found it is set to 1*/
int returnval = 0, e; /* if a match is found it is set to 1*/
int firstline = 1; /* a flag to print X\n X\n- X */
int printed_something = 0;
int print = (ms->flags & (MAGIC_MIME|MAGIC_APPLE)) == 0;
if (file_check_mem(ms, cont_level) == -1)
return -1;
for (magindex = 0; magindex < nmagic; magindex++) {
int flush;
int flush = 0;
struct magic *m = &magic[magindex];
if ((m->flag & BINTEST) != mode) {
@ -137,11 +135,16 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
ms->line = m->lineno;
/* if main entry matches, print it... */
flush = !mget(ms, s, m, nbytes, cont_level);
if (flush) {
if (m->reln == '!')
flush = 0;
} else {
switch (mget(ms, s, m, nbytes, cont_level)) {
case -1:
return -1;
case 0:
flush = m->reln != '!';
break;
default:
if (m->type == FILE_INDIRECT)
returnval = 1;
switch (magiccheck(ms, m)) {
case -1:
return -1;
@ -149,11 +152,13 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
flush++;
break;
default:
flush = 0;
break;
}
break;
}
if (flush) {
/*
/*
* main entry didn't match,
* flush its continuations
*/
@ -167,16 +172,21 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
* If we are going to print something, we'll need to print
* a blank before we print something else.
*/
if (*MAGIC_DESC) {
if (*m->desc) {
need_separator = 1;
printed_something = 1;
if ((e = handle_annotation(ms, m)) != 0)
return e;
if (print_sep(ms, firstline) == -1)
return -1;
}
if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1)
if (print && mprint(ms, m) == -1)
return -1;
ms->c.li[cont_level].off = moffset(ms, m);
/* and any continuations that match */
if (file_check_mem(ms, ++cont_level) == -1)
return -1;
@ -208,10 +218,21 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
continue;
}
#endif
flush = !mget(ms, s, m, nbytes, cont_level);
if (flush && m->reln != '!')
continue;
switch (mget(ms, s, m, nbytes, cont_level)) {
case -1:
return -1;
case 0:
if (m->reln != '!')
continue;
flush = 1;
break;
default:
if (m->type == FILE_INDIRECT)
returnval = 1;
flush = 0;
break;
}
switch (flush ? 1 : magiccheck(ms, m)) {
case -1:
return -1;
@ -234,8 +255,10 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
* If we are going to print something,
* make sure that we have a separator first.
*/
if (*MAGIC_DESC) {
if (*m->desc) {
printed_something = 1;
if ((e = handle_annotation(ms, m)) != 0)
return e;
if (print_sep(ms, firstline) == -1)
return -1;
}
@ -248,14 +271,18 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
/* space if previous printed */
if (need_separator
&& ((m->flag & NOSPACE) == 0)
&& *MAGIC_DESC) {
if (file_printf(ms, " ") == -1)
&& *m->desc) {
if (print &&
file_printf(ms, " ") == -1)
return -1;
need_separator = 0;
}
if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1)
if (print && mprint(ms, m) == -1)
return -1;
if (*MAGIC_DESC)
ms->c.li[cont_level].off = moffset(ms, m);
if (*m->desc)
need_separator = 1;
/*
@ -270,11 +297,12 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
}
if (printed_something) {
firstline = 0;
returnval = 1;
if (print)
returnval = 1;
}
if ((ms->flags & MAGIC_CONTINUE) == 0 && printed_something) {
return 1; /* don't keep searching */
}
return returnval; /* don't keep searching */
}
}
return returnval; /* This is hit if -k is set or there is no match */
}
@ -285,7 +313,7 @@ check_fmt(struct magic_set *ms, struct magic *m)
regex_t rx;
int rc;
if (strchr(MAGIC_DESC, '%') == NULL)
if (strchr(m->desc, '%') == NULL)
return 0;
rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB);
@ -295,7 +323,7 @@ check_fmt(struct magic_set *ms, struct magic *m)
file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
return -1;
} else {
rc = regexec(&rx, MAGIC_DESC, 0, 0, 0);
rc = regexec(&rx, m->desc, 0, 0, 0);
regfree(&rx);
return !rc;
}
@ -328,7 +356,7 @@ mprint(struct magic_set *ms, struct magic *m)
float vf;
double vd;
int64_t t = 0;
char *buf;
char buf[128];
union VALUETYPE *p = &ms->ms_value;
switch (m->type) {
@ -338,13 +366,13 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
if (asprintf(&buf, "%c", (unsigned char)v) < 0)
return -1;
if (file_printf(ms, MAGIC_DESC, buf) == -1)
(void)snprintf(buf, sizeof(buf), "%c",
(unsigned char)v);
if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
if (file_printf(ms, MAGIC_DESC, (unsigned char) v) == -1)
if (file_printf(ms, m->desc, (unsigned char) v) == -1)
return -1;
break;
}
@ -359,13 +387,14 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
if (asprintf(&buf, "%hu", (unsigned short)v) < 0)
return -1;
if (file_printf(ms, MAGIC_DESC, buf) == -1)
(void)snprintf(buf, sizeof(buf), "%hu",
(unsigned short)v);
if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
if (file_printf(ms, MAGIC_DESC, (unsigned short) v) == -1)
if (
file_printf(ms, m->desc, (unsigned short) v) == -1)
return -1;
break;
}
@ -381,13 +410,12 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
if (asprintf(&buf, "%u", (uint32_t)v) < 0)
return -1;
if (file_printf(ms, MAGIC_DESC, buf) == -1)
(void)snprintf(buf, sizeof(buf), "%u", (uint32_t)v);
if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
if (file_printf(ms, MAGIC_DESC, (uint32_t) v) == -1)
if (file_printf(ms, m->desc, (uint32_t) v) == -1)
return -1;
break;
}
@ -398,7 +426,7 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_BEQUAD:
case FILE_LEQUAD:
v = file_signextend(ms, m, p->q);
if (file_printf(ms, MAGIC_DESC, (uint64_t) v) == -1)
if (file_printf(ms, m->desc, (uint64_t) v) == -1)
return -1;
t = ms->offset + sizeof(int64_t);
break;
@ -408,14 +436,14 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_BESTRING16:
case FILE_LESTRING16:
if (m->reln == '=' || m->reln == '!') {
if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
if (file_printf(ms, m->desc, m->value.s) == -1)
return -1;
t = ms->offset + m->vallen;
}
else {
if (*m->value.s == '\0')
p->s[strcspn(p->s, "\n")] = '\0';
if (file_printf(ms, MAGIC_DESC, p->s) == -1)
if (file_printf(ms, m->desc, p->s) == -1)
return -1;
t = ms->offset + strlen(p->s);
if (m->type == FILE_PSTRING)
@ -427,7 +455,7 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_BEDATE:
case FILE_LEDATE:
case FILE_MEDATE:
if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 1)) == -1)
if (file_printf(ms, m->desc, file_fmttime(p->l, 1)) == -1)
return -1;
t = ms->offset + sizeof(time_t);
break;
@ -436,7 +464,7 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_BELDATE:
case FILE_LELDATE:
case FILE_MELDATE:
if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 0)) == -1)
if (file_printf(ms, m->desc, file_fmttime(p->l, 0)) == -1)
return -1;
t = ms->offset + sizeof(time_t);
break;
@ -444,8 +472,8 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_QDATE:
case FILE_BEQDATE:
case FILE_LEQDATE:
if (file_printf(ms, MAGIC_DESC, file_fmttime((uint32_t)p->q, 1))
== -1)
if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q,
1)) == -1)
return -1;
t = ms->offset + sizeof(uint64_t);
break;
@ -453,8 +481,8 @@ mprint(struct magic_set *ms, struct magic *m)
case FILE_QLDATE:
case FILE_BEQLDATE:
case FILE_LEQLDATE:
if (file_printf(ms, MAGIC_DESC, file_fmttime((uint32_t)p->q, 0))
== -1)
if (file_printf(ms, m->desc, file_fmttime((uint32_t)p->q,
0)) == -1)
return -1;
t = ms->offset + sizeof(uint64_t);
break;
@ -467,13 +495,12 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
if (asprintf(&buf, "%g", vf) < 0)
return -1;
if (file_printf(ms, MAGIC_DESC, buf) == -1)
(void)snprintf(buf, sizeof(buf), "%g", vf);
if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
if (file_printf(ms, MAGIC_DESC, vf) == -1)
if (file_printf(ms, m->desc, vf) == -1)
return -1;
break;
}
@ -488,13 +515,12 @@ mprint(struct magic_set *ms, struct magic *m)
case -1:
return -1;
case 1:
if (asprintf(&buf, "%g", vd) < 0)
return -1;
if (file_printf(ms, MAGIC_DESC, buf) == -1)
(void)snprintf(buf, sizeof(buf), "%g", vd);
if (file_printf(ms, m->desc, buf) == -1)
return -1;
break;
default:
if (file_printf(ms, MAGIC_DESC, vd) == -1)
if (file_printf(ms, m->desc, vd) == -1)
return -1;
break;
}
@ -510,7 +536,7 @@ mprint(struct magic_set *ms, struct magic *m)
file_oomem(ms, ms->search.rm_len);
return -1;
}
rval = file_printf(ms, MAGIC_DESC, cp);
rval = file_printf(ms, m->desc, cp);
free(cp);
if (rval == -1)
@ -524,7 +550,7 @@ mprint(struct magic_set *ms, struct magic *m)
}
case FILE_SEARCH:
if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
if (file_printf(ms, m->desc, m->value.s) == -1)
return -1;
if ((m->str_flags & REGEX_OFFSET_START))
t = ms->search.offset;
@ -533,18 +559,118 @@ mprint(struct magic_set *ms, struct magic *m)
break;
case FILE_DEFAULT:
if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
if (file_printf(ms, m->desc, m->value.s) == -1)
return -1;
t = ms->offset;
break;
case FILE_INDIRECT:
t = ms->offset;
break;
default:
file_magerror(ms, "invalid m->type (%d) in mprint()", m->type);
return -1;
}
return(t);
return (int32_t)t;
}
private int32_t
moffset(struct magic_set *ms, struct magic *m)
{
switch (m->type) {
case FILE_BYTE:
return ms->offset + sizeof(char);
case FILE_SHORT:
case FILE_BESHORT:
case FILE_LESHORT:
return ms->offset + sizeof(short);
case FILE_LONG:
case FILE_BELONG:
case FILE_LELONG:
case FILE_MELONG:
return ms->offset + sizeof(int32_t);
case FILE_QUAD:
case FILE_BEQUAD:
case FILE_LEQUAD:
return ms->offset + sizeof(int64_t);
case FILE_STRING:
case FILE_PSTRING:
case FILE_BESTRING16:
case FILE_LESTRING16:
if (m->reln == '=' || m->reln == '!')
return ms->offset + m->vallen;
else {
union VALUETYPE *p = &ms->ms_value;
uint32_t t;
if (*m->value.s == '\0')
p->s[strcspn(p->s, "\n")] = '\0';
t = ms->offset + strlen(p->s);
if (m->type == FILE_PSTRING)
t++;
return t;
}
case FILE_DATE:
case FILE_BEDATE:
case FILE_LEDATE:
case FILE_MEDATE:
return ms->offset + sizeof(time_t);
case FILE_LDATE:
case FILE_BELDATE:
case FILE_LELDATE:
case FILE_MELDATE:
return ms->offset + sizeof(time_t);
case FILE_QDATE:
case FILE_BEQDATE:
case FILE_LEQDATE:
return ms->offset + sizeof(uint64_t);
case FILE_QLDATE:
case FILE_BEQLDATE:
case FILE_LEQLDATE:
return ms->offset + sizeof(uint64_t);
case FILE_FLOAT:
case FILE_BEFLOAT:
case FILE_LEFLOAT:
return ms->offset + sizeof(float);
case FILE_DOUBLE:
case FILE_BEDOUBLE:
case FILE_LEDOUBLE:
return ms->offset + sizeof(double);
break;
case FILE_REGEX:
if ((m->str_flags & REGEX_OFFSET_START) != 0)
return ms->search.offset;
else
return ms->search.offset + ms->search.rm_len;
case FILE_SEARCH:
if ((m->str_flags & REGEX_OFFSET_START) != 0)
return ms->search.offset;
else
return ms->search.offset + m->vallen;
case FILE_DEFAULT:
return ms->offset;
case FILE_INDIRECT:
return ms->offset;
default:
return 0;
}
}
#define DO_CVT(fld, cast) \
if (m->num_mask) \
@ -806,6 +932,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
const char *c;
const char *last; /* end of search region */
const char *buf; /* start of search region */
const char *end;
size_t lines;
if (s == NULL) {
@ -814,10 +941,10 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
return 0;
}
buf = (const char *)s + offset;
last = (const char *)s + nbytes;
end = last = (const char *)s + nbytes;
/* mget() guarantees buf <= last */
for (lines = linecnt, b = buf;
lines && ((b = strchr(c = b, '\n')) || (b = strchr(c, '\r')));
lines && ((b = memchr(c = b, '\n', end - b)) || (b = memchr(c, '\r', end - c)));
lines--, b++) {
last = b;
if (b[0] == '\r' && b[1] == '\n')
@ -825,7 +952,7 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
}
if (lines)
last = (const char *)s + nbytes;
ms->search.s = buf;
ms->search.s_len = last - buf;
ms->search.offset = offset;
@ -838,13 +965,13 @@ mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
const unsigned char *esrc = s + nbytes;
char *dst = p->s;
char *edst = &p->s[sizeof(p->s) - 1];
if (type == FILE_BESTRING16)
src++;
/* check for pointer overflow */
if (src < s) {
file_magerror(ms, "invalid offset %zu in mcopy()",
file_magerror(ms, "invalid offset %u in mcopy()",
offset);
return -1;
}
@ -904,7 +1031,9 @@ mget(struct magic_set *ms, const unsigned char *s,
if ((ms->flags & MAGIC_DEBUG) != 0) {
mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE));
#ifndef COMPILE_ONLY
file_mdump(m);
#endif
}
if (m->flag & INDIR) {
@ -929,9 +1058,11 @@ mget(struct magic_set *ms, const unsigned char *s,
off = q->l;
break;
case FILE_BELONG:
case FILE_BEID3:
off = (int32_t)((q->hl[0]<<24)|(q->hl[1]<<16)|
(q->hl[2]<<8)|(q->hl[3]));
break;
case FILE_LEID3:
case FILE_LELONG:
off = (int32_t)((q->hl[3]<<24)|(q->hl[2]<<16)|
(q->hl[1]<<8)|(q->hl[0]));
@ -1119,6 +1250,7 @@ mget(struct magic_set *ms, const unsigned char *s,
offset = ~offset;
break;
case FILE_BELONG:
case FILE_BEID3:
if (nbytes < (offset + 4))
return 0;
if (off) {
@ -1189,6 +1321,7 @@ mget(struct magic_set *ms, const unsigned char *s,
offset = ~offset;
break;
case FILE_LELONG:
case FILE_LEID3:
if (nbytes < (offset + 4))
return 0;
if (off) {
@ -1365,8 +1498,21 @@ mget(struct magic_set *ms, const unsigned char *s,
break;
}
if (m->flag & INDIROFFADD)
switch (m->in_type) {
case FILE_LEID3:
case FILE_BEID3:
offset = ((((offset >> 0) & 0x7f) << 0) |
(((offset >> 8) & 0x7f) << 7) |
(((offset >> 16) & 0x7f) << 14) |
(((offset >> 24) & 0x7f) << 21)) + 10;
break;
default:
break;
}
if (m->flag & INDIROFFADD) {
offset += ms->c.li[cont_level-1].off;
}
if (mcopy(ms, p, m->type, 0, s, offset, nbytes, count) == -1)
return -1;
ms->offset = offset;
@ -1374,7 +1520,9 @@ mget(struct magic_set *ms, const unsigned char *s,
if ((ms->flags & MAGIC_DEBUG) != 0) {
mdebug(offset, (char *)(void *)p,
sizeof(union VALUETYPE));
#ifndef COMPILE_ONLY
file_mdump(m);
#endif
}
}
@ -1384,14 +1532,14 @@ mget(struct magic_set *ms, const unsigned char *s,
if (nbytes < (offset + 1)) /* should alway be true */
return 0;
break;
case FILE_SHORT:
case FILE_BESHORT:
case FILE_LESHORT:
if (nbytes < (offset + 2))
return 0;
break;
case FILE_LONG:
case FILE_BELONG:
case FILE_LELONG:
@ -1410,7 +1558,7 @@ mget(struct magic_set *ms, const unsigned char *s,
if (nbytes < (offset + 4))
return 0;
break;
case FILE_DOUBLE:
case FILE_BEDOUBLE:
case FILE_LEDOUBLE:
@ -1430,6 +1578,15 @@ mget(struct magic_set *ms, const unsigned char *s,
return 0;
break;
case FILE_INDIRECT:
if ((ms->flags & (MAGIC_MIME|MAGIC_APPLE)) == 0 &&
file_printf(ms, m->desc) == -1)
return -1;
if (nbytes < offset)
return 0;
return file_softmagic(ms, s + offset, nbytes - offset,
BINTEST);
case FILE_DEFAULT: /* nothing to check */
default:
break;
@ -1460,7 +1617,7 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags)
if (0L == flags) { /* normal string: do it fast */
while (len-- > 0)
if ((v = *b++ - *a++) != '\0')
break;
break;
}
else { /* combine the others */
while (len-- > 0) {
@ -1474,8 +1631,8 @@ file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags)
if ((v = toupper(*b++) - *a++) != '\0')
break;
}
else if ((flags & STRING_COMPACT_BLANK) &&
isspace(*a)) {
else if ((flags & STRING_COMPACT_BLANK) &&
isspace(*a)) {
a++;
if (isspace(*b++)) {
while (isspace(*b))
@ -1570,26 +1727,27 @@ magiccheck(struct magic_set *ms, struct magic *m)
case 'x':
matched = 1;
break;
case '!':
matched = fv != fl;
break;
case '=':
matched = fv == fl;
break;
case '>':
matched = fv > fl;
break;
case '<':
matched = fv < fl;
break;
default:
matched = 0;
file_magerror(ms, "cannot happen with float: invalid relation `%c'", m->reln);
file_magerror(ms, "cannot happen with float: invalid relation `%c'",
m->reln);
return -1;
}
return matched;
@ -1603,23 +1761,23 @@ magiccheck(struct magic_set *ms, struct magic *m)
case 'x':
matched = 1;
break;
case '!':
matched = dv != dl;
break;
case '=':
matched = dv == dl;
break;
case '>':
matched = dv > dl;
break;
case '<':
matched = dv < dl;
break;
default:
matched = 0;
file_magerror(ms, "cannot happen with double: invalid relation `%c'", m->reln);
@ -1727,6 +1885,8 @@ magiccheck(struct magic_set *ms, struct magic *m)
return -1;
break;
}
case FILE_INDIRECT:
return 1;
default:
file_magerror(ms, "invalid type %d in magiccheck()", m->type);
return -1;
@ -1816,13 +1976,31 @@ magiccheck(struct magic_set *ms, struct magic *m)
return matched;
}
private int
handle_annotation(struct magic_set *ms, struct magic *m)
{
if (ms->flags & MAGIC_APPLE) {
if (file_printf(ms, "%.8s", m->apple) == -1)
return -1;
return 1;
}
if ((ms->flags & MAGIC_MIME_TYPE) && m->mimetype[0]) {
if (file_printf(ms, "%s", m->mimetype) == -1)
return -1;
return 1;
}
return 0;
}
private int
print_sep(struct magic_set *ms, int firstline)
{
if (ms->flags & MAGIC_MIME)
return 0;
if (firstline)
return 0;
/*
* we found another match
* we found another match
* put a newline and '-' to do some simple formatting
*/
return file_printf(ms, "\n- ");

View File

@ -105,12 +105,13 @@ A buffer overflow can only occur if your sprintf() do strange things or when
you use strange formats.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: vasprintf.c,v 1.7 2009/02/03 20:27:52 christos Exp $")
#endif /* lint */
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>