# ----------------------------Archive Formats--------------------------------------

# POSIX tar archives
257     string      ustar\000   POSIX tar archive
>8      byte        !0
>>8     string      x           \b, owner user name: "%.32s"
>40     byte        !0
>>40    string      x           \b, owner group name: "%.32s"

257     string      ustar\040\040\000    POSIX tar archive (GNU)
>8      byte        !0
>>8     string      x                    \b, owner user name: "%.32s"
>40     byte        !0
>>40    string      x                    \b, owner group name: "%.32s"

# Incremental snapshot gnu-tar format from:
# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
0       string      GNU\x20tar-     GNU tar incremental snapshot data,
>0      string      x               version: "%s"

# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
14      string  \x1aJar\x1b     JAR (ARJ Software, Inc.) archive data
0       string  JARCS           JAR (ARJ Software, Inc.) archive data

# PKZIP multi-volume archive
0       string          PK\x07\x08PK\x03\x04    Zip multi-volume archive data, at least PKZIP v2.50 to extract

# ZIP compression (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0       string      PK\003\004      Zip archive data,
>6      leshort     &0x01           encrypted
>4      byte        0x00            v0.0
>4      byte        0x09            at least v0.9 to extract,
>4      byte        0x0a            at least v1.0 to extract,
>4      byte        0x0b            at least v1.1 to extract,
>0x161  string      WINZIP          WinZIP self-extracting,
>4      byte        0x14
>>30    ubelong     !0x6d696d65     at least v2.0 to extract,
>18     lelong      !0
>>18    lelong      <0              {invalid}
>>18    lelong      x               compressed size: %d,
>>18    lelong      x               {jump:%d}
>22     lelong      !0
>>22    lelong      <0              {invalid}
>>22    lelong      x               uncompressed size: %d,
>30     byte        <0x2D           {invalid}
>30     byte        >0x7A           {invalid}
>26     leshort     x               {strlen:%d}
>30     string      x               name: {string}%s

# ZIP footer
0       string      PK\x05\x06      End of Zip archive
>20     leshort     >0
>>20    leshort     x               \b, comment:
>>20    leshort     x               {strlen:%d}
>>22    string      x               {string}"%s"

# ARJ archiver (jason@jarthur.Claremont.EDU)
0       uleshort    0xea60      ARJ archive data,
>2      leshort     x           header size: %d,
>5      byte        <1          {invalid}
>5      byte        >16         {invalid}
>5      byte        x           version %d,
>6      byte        <1          {invalid}
>6      byte        >16         {invalid}
>6      byte        x           minimum version to extract: %d,
>8      byte        <0          {invalid} flags,
>8      byte        &0x04       multi-volume,
>8      byte        &0x10       slash-switched,
>8      byte        &0x20       backup,
>9      byte        <0          {invalid},
>9      byte        >4          {invalid},
>9      byte        0           compression method: stored,
>9      byte        1           compression method: compressed most,
>9      byte        2           compression method: compressed,
>9      byte        3           compression method: compressed faster,
>9      byte        4           compression method: compressed fastest,
>10     byte        <0          {invalid}
>10     byte        >4          {invalid}
>10     byte        0           file type: binary,
>10     byte        1           file type: 7-bit text,
>10     byte        2           file type: comment header,
>10     byte        3           file type: directory,
>10     byte        4           file type: volume label,
>34     byte        !0
>>34    string      x           {name:%s}
>>34    string      x           original name: "%s",
>0xC    ledate      x           original file date: %s,
>0x10   lelong      <0          {invalid}
>0x10   lelong      x           compressed file size: %d,
>0x14   lelong      <0          {invalid}
>0x14   lelong      x           uncompressed file size: %d,
>7      byte        0           os: MS-DOS 
>7      byte        1           os: PRIMOS
>7      byte        2           os: Unix
>7      byte        3           os: Amiga
>7      byte        4           os: Macintosh
>7      byte        5           os: OS/2
>7      byte        6           os: Apple ][ GS
>7      byte        7           os: Atari ST
>7      byte        8           os: NeXT
>7      byte        9           os: VAX/VMS
>7      byte        >9          {invalid}
>7      byte        <0          {invalid}

# RAR archiver (http://kthoom.googlecode.com/hg/docs/unrar.html)
0   string      \x52\x61\x72\x21\x1A\x07\x00        RAR archive data, first volume type:
>9  ubyte       <0x72                               {invalid}
>9  ubyte       >0x7B                               {invalid}
>9  ubyte       0x72                                MARK_HEAD
>9  ubyte       0x73                                MAIN_HEAD
>9  ubyte       0x74                                FILE_HEAD
>9  ubyte       0x75                                COMM_HEAD
>9  ubyte       0x76                                AV_HEAD
>9  ubyte       0x77                                SUB_HEAD
>9  ubyte       0x78                                PROTECT_HEAD
>9  ubyte       0x79                                SIGN_HEAD
>9  ubyte       0x7A                                NEWSUB_HEAD
>9  ubyte       0x7B                                ENDARC_HEAD

# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
0    string        HPAK        HPACK archive data

# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
0    string        \351,\001JAM    JAM archive

# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
2    string        -lzs         LHa 2.x? archive data [lzs] [NSRL|LHA2]
>6   string        !-           {invalid}
2    string        -lh\40       LHa 2.x? archive data [lh ] [NSRL|LHA2]
>6   string        !-           {invalid}
2    string        -lhd         LHa 2.x? archive data [lhd] [NSRL|LHA2]
>6   string        !-           {invalid}
2    string        -lh2         LHa 2.x? archive data [lh2] [NSRL|LHA2]
>6   string        !-           {invalid}
2    string        -lh3         LHa 2.x? archive data [lh3] [NSRL|LHA2]
>6   string        !-           {invalid}
2    string        -lh4         LHa (2.x) archive data [lh4] [NSRL|LHA2]
>6   string        !-           {invalid}
2    string        -lh5         LHa (2.x) archive data [lh5] [NSRL|LHA2]
>6   string        !-           {invalid}
2    string        -lh6         LHa (2.x) archive data [lh6] [NSRL|LHA2]
>6   string        !-           {invalid}
2    string        -lh7         LHa (2.x) archive data [lh7] [NSRL|LHA2]
>6   string        !-           {invalid}


# cpio archives
#
# The SVR4 "cpio(4)" hints that there are additional formats, but they
# are defined as "short"s; I think all the new formats are
# character-header formats and thus are strings, not numbers.
#0       string          070707          ASCII cpio archive (pre-SVR4 or odc)

0       string      070701  ASCII cpio archive (SVR4 with no CRC),
>110    byte        0       {invalid}
>94     byte        <0x30   {invalid}
>94     byte        >0x66   {invalid}
>54     byte        <0x30   {invalid}
>54     byte        >0x66   {invalid}
>110    string      x       file name: "%s",
>94     string      x       file name length: "0x%.8s",
>54     string      x       file size: "0x%.8s"

0       string      070702  ASCII cpio archive (SVR4 with CRC)
>110    byte        0       {invalid}
>94     byte        <0x30   {invalid}
>94     byte        >0x66   {invalid}
>54     byte        <0x30   {invalid}
>54     byte        >0x66   {invalid}
>110    string      x       file name: "%s",
>94     string      x       file name length: "0x%.8s",
>54     string      x       file size: "0x%.8s"

# HP Printer Job Language
# The header found on Win95 HP plot files is the "Silliest Thing possible" 
# (TM)
# Every driver puts the language at some random position, with random case
# (LANGUAGE and Language)
# For example the LaserJet 5L driver puts the "PJL ENTER LANGUAGE" in line 10
# From: Uwe Bonnes <bon@elektron.ikp.physik.th-darmstadt.de>
# 
0       string          \033%-12345X@PJL    HP Printer Job Language data,
>0      string          x                   "%s"

#------------------------------------------------------------------------------
#
# RPM: file(1) magic for Red Hat Packages   Erik Troan (ewt@redhat.com)
#
0     ubelong       0xedabeedb   RPM
>4    byte          <1           {invalid}
>4    byte          >99          {invalid}
>4    byte          x            v%d
>6    beshort       0            bin
>6    beshort       1            src
>8    beshort       1            i386
>8    beshort       2            Alpha
>8    beshort       3            Sparc
>8    beshort       4            MIPS
>8    beshort       5            PowerPC
>8    beshort       6            68000
>8    beshort       7            SGI
>8    beshort       8            RS6000
>8    beshort       9            IA64
>8    beshort       10           Sparc64
>8    beshort       11           MIPSel
>8    beshort       12           ARM
>10   string        x            "%s"

# IBM AIX Backup File Format header and entry signatures
0     ulelong    0xea6b0009   BFF volume header,
>4    uleshort   x            checksum: 0x%.4X,
>6    leshort    <0           {invalid}
>6    leshort    0            {invalid}
>6    leshort    x            volume number: %d,
>8    ledate     x            current date: %s,
>12   ledate     x            starting date: %s,
>20   string     x            disk name: "%s",
>36   string     x            file system name: "%s",
>52   string     x            user name: "%s"

2     uleshort  0xea6b      BFF volume entry,
>22   lelong    <0          {invalid}
>22   lelong    0           directory,
>22   lelong    >0
>>22  lelong    x           file size: %d,
>>54  lelong    <0          {invalid}
>>54  lelong    0           {invalid}
>>54  lelong    x           compressed size: %d,
>58   lelong    !0          {invalid}
>62   byte      0           {invalid}
>62   byte      !0x2e
>>62  byte      !0x2f       {invalid}
>62   string    x           file name: "%s"

2       uleshort    0xea6c  BFF volume entry, compressed,
>22     lelong      <0      {invalid}
>22     lelong      0       directory,
>22     lelong      >0
>>22    lelong      x       file size: %d,
>>54    lelong      <0      {invalid}
>>54    lelong      0       {invalid}
>>54    lelong      x       compressed size: %d,
>58     lelong      !0      {invalid}
>62     byte        0       {invalid}
>62     byte        !0x2e
>>62    byte        !0x2f   {invalid}
>62     string      x       file name: "%s"

0       uleshort    0xea6d  BFF volume entry, AIXv3,
>22     lelong      <0      {invalid}
>22     lelong      0       directory,
>22     lelong      >0
>>22    lelong      x       file size: %d,
>>54    lelong      <0      {invalid}
>>54    lelong      0       {invalid}
>>54    lelong      x       compressed size: %d,
>58     lelong      !0      {invalid}
>62     byte        0       {invalid}
>62     byte        !0x2e
>>62    byte        !0x2f   {invalid}
>62     string      x       file name: "%s"

#------------------------------------------------------------------------------
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0       string      MSCF\0\0\0\0    Microsoft Cabinet archive data
# According to libmagic comments, CAB version number is always 1.3
>25     byte        !1              {invalid}
>24     byte        !3              {invalid}
>8      lelong      x               \b, %u bytes
>28     leshort     0               {invalid}
>28     leshort     1               \b, 1 file
>28     leshort     >1              \b, %u files

# InstallShield Cabinet files
0               string          ISc(            InstallShield Cabinet archive data
# TODO: Version number checks should be made more specific for false positive filtering
>5              byte&0xf0       =0x60           version 6,
>5              byte&0xf0       <0x60           version 4/5,
>5              byte&0xf0       >0x60           {invalid} version,
>12             lelong          <0              {invalid} offset,
>12             lelong          >100000         {invalid} offset,
>(12.l+40)      lelong          x               %u files

# Windows CE package files
0       string          MSCE\0\0\0\0    Microsoft WinCE install header
>20     lelong          0               \b, architecture-independent
>20     lelong          103             \b, Hitachi SH3
>20     lelong          104             \b, Hitachi SH4
>20     lelong          0xA11           \b, StrongARM
>20     lelong          4000            \b, MIPS R4000
>20     lelong          10003           \b, Hitachi SH3
>20     lelong          10004           \b, Hitachi SH3E
>20     lelong          10005           \b, Hitachi SH4
>20     lelong          70001           \b, ARM 7TDMI
>52     leshort         1               \b, 1 file
>52     leshort         >1              \b, %u files
>56     leshort         1               \b, 1 registry entry
>56     leshort         >1              \b, %u registry entries

0       string  \x00\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20        LBR archive data
>12     string  !\x00x00                                                {invalid}

# Parity archive reconstruction file, the 'par' file format now used on Usenet.
0       string          PAR\0   PARity archive data
>48     leshort         =0      - Index file
>48     leshort         >0      - file number %d

# Felix von Leitner <felix-file@fefe.de>
0       string  d8:announce     BitTorrent file

# BSA archives, based on http://forum.step-project.com/topic/5033-ramifications-of-bsa-extraction-in-mod-organizer/page-16
0       string          BSA\x00\x67             BSA archive, version: 103,
>8      byte            !0x24                   {invalid}
>8      byte            0x24                    folder records offset: %d
0       string          BSA\x00\x68             BSA archive, version: 104,
>8      byte            !0x24                   {invalid}
>8      byte            0x24                    folder records offset: %d

