Skip to content

Instantly share code, notes, and snippets.

@ssokolow
Last active January 31, 2022 08:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ssokolow/547c892a4a7da753aa419aa6790ec849 to your computer and use it in GitHub Desktop.
Save ssokolow/547c892a4a7da753aa419aa6790ec849 to your computer and use it in GitHub Desktop.
Example configuration file for my WIP file verifier
[filetype.3gpp]
description = "MPEG-4 Part 12 Media (3GPP)"
extension = "3gp"
handler = "ffmpeg"
[filetype.3gpp2]
description = "MPEG-4 Part 12 Media (3GPP2)"
extension = "3g2"
handler = "ffmpeg"
[filetype.7zip]
description = "7-Zip archive"
extension = "7z"
handler = ["p7zip", "lsar"]
header = [55, 122, 188, 175, 39, 28]
[filetype.aac]
description = "AAC Audio (ADTS Stream)"
extension = "aac"
handler = "ffmpeg"
[filetype.aifc]
description = "AIFF Audio (Compressed)"
extension = "aifc"
handler = "ffmpeg"
header = [[70, 79, 82, 77], [65, 73, 70, 70]]
[filetype.aiff]
description = "AIFF Audio"
extension = ["aif", "aiff"]
handler = "ffmpeg"
header = [[70, 79, 82, 77], [65, 73, 70, 70]]
[filetype.ape]
description = "Monkey's Audio"
extension = "ape"
handler = "ffmpeg"
header = [77, 65, 67, 32]
[filetype.apk]
container = "zip"
description = "Android application package"
extension = "apk"
[filetype.arj]
description = "ARJ archive"
extension = "arj"
handler = ["arj", "lsar"]
header = [96, 234]
[filetype.asf]
description = "Microsoft ASF"
extension = "asf"
handler = "ffmpeg"
header = [48, 38, 178, 117, 142, 102, 207, 17, 166, 217, 0, 170, 0, 98, 206, 108]
[filetype.avi]
description = "Microsoft AVI Video"
extension = "avi"
handler = "ffmpeg"
# TODO: Rework deserializing to support a header of the form
# [52, 49, 46, 46, ??, ??, ??, ??, 41, 56, 49, 20]
# TODO: Look into whether it would be useful to have some kind of fallback
# verifier for unidentified RIFF-based formats.
# TODO: Split .bin into RAR, BIN/CUE, etc.
[filetype.bin]
description = ".bin"
extension = "bin"
handler = "bin"
[filetype.binhex4]
description = "BinHex4 encoded"
extension = "hqx"
handler = "py_helper"
header = [40, 84, 104, 105, 115, 32, 102, 105, 108, 101, 32, 109, 117, 115,
116, 32, 98, 101, 32, 99, 111, 110, 118, 101, 114, 116, 101, 100, 32, 119,
105, 116, 104, 32, 66, 105, 110, 72, 101, 120, 32, 52, 46, 48, 41]
[filetype.bmp]
description = "Microsoft Device-Independent Bitmap Image"
extension = ["bmp", "dib"]
handler = "image"
header = [66, 77]
[filetype.bzip2]
description = "BZip2 compressed"
extension = "bz2"
handler = ["p7zip", "lsar", "py_helper"]
header = [66, 90, 104]
[filetype.cb7]
container = "7zip"
description = "Comic Book Archive (7-Zip)"
extension = "cb7"
[filetype.cbr]
container = "rar"
description = "Comic Book Archive (RAR)"
extension = "cbr"
[filetype.cbt]
container = "tar"
description = "Comic Book Archive (Tar)"
extension = "cbt"
[filetype.cbz]
container = "zip"
description = "Comic Book Archive (Zip)"
extension = "cbz"
[filetype.dashtoc]
container = "json"
description = "Dash/Zeal Docset Table of Contents"
extension = "dashtoc"
[filetype.dcx]
description = "Multi-page ZSoft PC Paintbrush Image"
extension = "dcx"
handler = "py_helper"
header = [177, 104, 222, 58]
multipage = true
# TODO: See if the "ar" crate is an acceptable substitute and, if so, how much
# size it would add to the output binary.
[filetype.deb]
description = "Debian Package"
extension = "deb"
handler = "p7zip"
header = [33, 60, 97, 114, 99, 104, 62]
[filetype.dmg]
description = "Apple DMG Disk Image"
extension = "dmg"
handler = "p7zip"
header = [120, 1, 115, 13, 98, 98, 96]
[filetype.docm]
container = "zip"
description = "Macro-enabled OOXML Document"
extension = "docm"
[filetype.docx]
container = "zip"
description = "OOXML Document"
extension = "docx"
[filetype.epub]
container = "zip"
description = "EPUB e-book"
extension = "epub"
handler = "epubcheck"
# TODO: Decide how to extend the schema so the system is capable of ensuring
# that a corrupted example of a subtype cannot pass validation as an
# ancestor type. (eg. letting corruption outside the EXE portion of an
# InnoSetup installer pass because it passes validation as an EXE)
#
# Given how difficult it is to *reliably* prevent "corruption caused the
# identity check to fail", maybe version 1.0 of this schema should just
# forbid having a container and subtype both have handlers and, instead
# rely on one of two situations:
#
# 1. A whole bunch of sibling EXE validators (InnoSetup, RAR, etc.) with
# no validator on the base EXE format. (Would prevent things like
# authenticode validation, but could be revised later.)
# 2. Support unpacking the container and using normal validators for the
# contents. (Since most containers are archive formats... but this is
# less able to validate things like "must have a manifest file")
#
# (Maybe something involving making it an error if something fails
# validation based on its extension despite passing validation for the
# header-detected type if that header-detected type is a supertype of the
# extension-detected type... though that still feels like there might be
# some kinds of concatenated formats that might erroneously pass.)
#
# ...also support running ClamAV on EXE and COM files since I plan to run
# this before burning stuff to DVD+R for archival.
#[filetype.exe]
#description = "DOS/Windows Executable"
#extension = "exe"
#header = [77, 90]
# TODO: Add a definition for self-extracting Zip, RAR, etc. files
#[filetype.exe_innosetup]
#container = "exe"
#description = "Inno Setup Installer"
#handler = "innoextract"
[filetype.f4a]
description = "FLV Audio"
extension = "f4a"
handler = "ffmpeg"
[filetype.f4b]
description = "FLV Audiobook"
extension = "f4b"
handler = "ffmpeg"
[filetype.f4v]
description = "FLV Video"
extension = "f4v"
handler = "ffmpeg"
# TODO: See how much work it would take to use something like `claxon` or
# `cauldron` to do md5sum verification internally.
[filetype.flac]
description = "FLAC Audio"
extension = "flac"
handler = "flac" # TODO: Can FFmpeg be asked to check the md5sum?
header = [102, 76, 97, 67]
# See if the `flic` crate is suitable for this use
[filetype.fli]
description = "Autodesk Animator FLIC Animation"
extension = ["fli"]
handler = "py_helper"
header = [17, 175]
header_offset = 2
# Source: http://steve.hollasch.net/cgindex/formats/fli.html
# See if the `flic` crate is suitable for this use
[filetype.flc]
description = "Autodesk Animator Pro FLIC Animation"
extension = ["flc"]
handler = "py_helper"
header = [18, 175]
header_offset = 2
# Source: http://www.retroarchive.org/swag/MISC/0140.PAS.html
[filetype.flv]
description = "Flash Video"
extension = "flv"
handler = "ffmpeg"
header = [70, 76, 86]
# TODO: A Gameboy ROM must contain the Nintendo logo for its startup screen
# from offset0x104to0x133, one of the longest signatures required in any
# file format.
[filetype.gif]
description = "GIF Image"
extension = "gif"
handler = "image"
header = [[71, 73, 70, 56, 55, 97], [71, 73, 70, 56, 57, 97]]
[filetype.gzip]
description = "GZip compressed"
extension = "gz"
handler = "gzip"
header = [31, 139]
[filetype.iso_dvdisaster]
description = "ISO 9660 CD/DVD image (DVDisaster ECC)"
extension = "iso"
handler = "dvdisaster"
header = [67, 68, 48, 48, 49]
# TODO: What does `jarsigner -verify` do on an unsigned JAR?
[filetype.jar]
container = "zip"
description = "Java ARchive"
extension = "jar"
[filetype.jpeg]
description = "JPEG/JFIF Image"
extension = ["jfi", "jfif", "jif", "jpe", "jpeg", "jpg"]
handler = "image"
header = [255, 216, 255]
# TODO: Either match only FF D8 (the actual JFIF magic number) or be
# *absolutely* certain that all relevant parsers restrict input to the
# combination I'm matching on.
[filetype.jpeg2k]
description = "JPEG 2000 Image"
extension = ["j2k", "jp2", "jpg2", "jpf", "jpx"]
handler = "py_helper"
header = [0, 0, 0, 12, 106, 80, 32, 32, 13, 10, 135, 10]
[filetype.json]
description = "JSON Data"
extension = "json"
handler = "json"
# TODO: Since LHA is used as a container for stuff, find a way to header detect
# TODO: When I have time, safety-dance the delharc crate and then decide if
# that makes it suitable for built-in LHA support.
[filetype.lha]
description = "LHA archive"
extension = ["lha", "lzh"]
handler = ["p7zip", "lsar"] # TODO: `lha tq {file}`
[filetype.lzip]
description = "Lzip compressed"
extension = "lz"
handler = "lzip"
header = [76, 90, 73, 80]
[filetype.lzma]
description = ".lzma compressed"
extension = "lzma"
handler = ["p7zip", "lsar", "py_helper"]
[filetype.lzx]
description = "LZX archive"
extension = "lzx"
handler = "lsar"
header = [76, 90, 88]
[filetype.m4a]
description = "MPEG-4 Part 14 Audio"
extension = "m4a"
handler = "ffmpeg"
[filetype.m4b]
description = "MPEG-4 Part 14 Audiobook"
extension = "m4b"
handler = "ffmpeg"
[filetype.m4r]
description = "MPEG-4 Part 14 Ringtone"
extension = "m4r"
handler = "ffmpeg"
[filetype.m4v]
description = "MPEG-4 Part 14 Video"
extension = "m4v"
handler = "ffmpeg"
[filetype.maff]
container = "zip"
description = "Mozilla Archive Format"
extension = "maff"
#[filetype.megadrive_rom]
#description = "SEGA Genesis/Megadrive ROM"
#header = [83, 69, 71, 65]
#header_offset = 256
## TODO: handler (Must stay commented out until we have one)
[filetype.mk3d]
description = "Matroska Video (3D)"
extension = "mk3d"
handler = "ffmpeg"
header = [26, 69, 223, 163]
[filetype.mka]
description = "Matroska Audio"
extension = "mka"
handler = "ffmpeg"
header = [26, 69, 223, 163]
[filetype.mkv]
description = "Matroska Video"
extension = "mkv"
handler = "ffmpeg"
header = [26, 69, 223, 163]
[filetype.mov]
description = "Quicktime Video"
extension = "mov"
handler = "ffmpeg"
[filetype.mp1]
description = "MPEG Layer 1 Audio"
extension = "mp1"
handler = "ffmpeg"
[filetype.mp2]
description = "MPEG Layer 2 Audio"
extension = "mp2"
handler = "ffmpeg"
[filetype.mp3]
description = "MPEG Layer 3 Audio"
extension = "mp3"
handler = "ffmpeg"
header = [[73, 68, 51], [255, 251], [255, 243], [255,242]]
[filetype.mp4]
description = "MPEG-4 Part 14 Video"
extension = "mp4"
handler = "ffmpeg"
header = [0, 0, 0, 32, 102, 116, 121, 112, 105, 115, 111, 109]
[filetype.mpeg]
description = "MPEG Video"
extension = ["mpe", "mpeg", "mpg"]
handler = "ffmpeg"
[filetype.mpeg_ts]
description = "MPEG Transport Stream"
extension = "ts"
handler = "ffmpeg"
[filetype.mpeg_tsa]
description = "MPEG Transport Stream Audio"
extension = "tsa"
handler = "ffmpeg"
[filetype.mpeg_tsv]
description = "MPEG Transport Stream Video"
extension = "tsv"
handler = "ffmpeg"
[filetype.ms_cab]
description = "Microsoft CAB"
extension = "cab"
handler = ["cabextract", "p7zip"] # NOTE: "lsar" reports "Unknown" with 0 exit
header = [77, 83, 67, 70]
[filetype.is_cab]
description = "InstallShield CAB"
extension = "cab"
handler = ["unshield"] # TODO: Do p7zip or lsar support any of these?
header = [73, 83, 99, 40]
[filetype.ms_cursor]
description = "Microsoft Cursor"
extension = "cur"
handler = "py_helper"
multipage = true
[filetype.ms_ico]
description = "Microsoft Icon"
extension = "ico"
handler = "image"
multipage = true
[filetype.msi]
description = "MSI Installer"
extension = "msi"
handler = ["p7zip", "lsar"]
[filetype.musepack]
description = "Musepack Audio"
extension = ["mp+", "mpc", "mpp"]
handler = "ffmpeg"
header = [[77, 80, 43], [77, 80, 67, 75]]
[filetype.odb]
container = "zip"
description = "ODF Database"
extension = "odb"
[filetype.odc]
container = "zip"
description = "ODF Chart"
extension = "odc"
[filetype.odf]
container = "zip"
description = "ODF Formula"
extension = "odf"
[filetype.odg]
container = "zip"
description = "ODF Drawing"
extension = "odg"
[filetype.odi]
container = "zip"
description = "ODF Image"
extension = "odi"
[filetype.odm]
container = "zip"
description = "ODF Master Document"
extension = "odm"
[filetype.odp]
container = "zip"
description = "ODF Presentation"
extension = "odp"
[filetype.ods]
container = "zip"
description = "ODF Spreadsheet"
extension = "ods"
[filetype.odt]
container = "zip"
description = "ODF Text Document"
extension = "odt"
[filetype.oga]
container = "ogx"
description = "Ogg containing audio (.oga)"
extension = "oga"
[filetype.ogg]
container = "ogx"
description = "Ogg Vorbis (.ogg)"
extension = "ogg"
[filetype.ogm]
container = "ogx"
description = "OGM (Ogg, Unofficial)"
extension = "ogm"
[filetype.ogv]
container = "ogx"
description = "Ogg containing video (.ogv)"
extension = "ogv"
[filetype.ogx]
description = "Ogg (unspecified) (.ogx)"
extension = "ogx"
handler = "ffmpeg"
header = [79, 103, 103, 83]
[filetype.opus]
description = "Opus Audio"
extension = "opus"
handler = "ffmpeg"
[filetype.otc]
container = "zip"
description = "ODF Chart Template"
extension = "otc"
[filetype.otf]
container = "zip"
description = "ODF Formula Template"
extension = "otf"
[filetype.otg]
container = "zip"
description = "ODF Drawing Template"
extension = "otg"
[filetype.oth]
container = "zip"
description = "ODF Web Page Template"
extension = "oth"
[filetype.oti]
container = "zip"
description = "ODF Image Template"
extension = "oti"
[filetype.otp]
container = "zip"
description = "ODF Presentation Template"
extension = "otp"
[filetype.ots]
container = "zip"
description = "ODF Spreadsheet Template"
extension = "ots"
[filetype.ott]
container = "zip"
description = "ODF Text Document Template"
extension = "ott"
[filetype.pbm]
description = "NetPBM Portable Bitmap Image"
extension = "pbm"
handler = "image"
header = [80, 49, 10]
# TODO: See how PIL and the `pcx` crate compare for catching errors
[filetype.pcx]
description = "ZSoft PC Paintbrush Image"
extension = "pcx"
handler = "py_helper"
header = [[10, 0, 1], [10, 2, 1], [10, 3, 1], [10, 4, 1], [10, 5, 1]]
# Source: https://www.fileformat.info/format/pcx/egff.htm
# TODO: See if the `pdf` or `lopdf` crate could replace pdftotext in this role
[filetype.pdf]
description = "PDF Document"
extension = "pdf"
handler = "pdftotext"
header = [[37, 80, 68, 70, 45, 49, 46], [37, 80, 68, 70, 0]]
# NOTE: https://www.garykessler.net/library/file_sigs.html says the PDF format
# also has a trailer, which could be useful for looking for ways to split
# and individually check the parts of a concatenated stream.
# NOTE: According to pocorgtfo07.pdf, some PDF parsers will accept this
# occurring anywhere within the first 1024 bytes and some only look for
# the PDF trailer and will accept a missing header, so I'll need something
# smarter.
# TODO: As hinted previously, PDF can be easily abused. For security reasons,
# Adobe Reader, the standard PDF reader, has blacklisted known magic
# signatures such as PNG or PE since version 10.1.5. It is thus not
# possible anymore to have a valid polyglot that would open in Adobe
# Reader as PDF. This is a good security measure even if it breaks
# compatibility with older releases of PoC‖GTFO.
# -- pocorgtfo07.pdf
[filetype.pgm]
description = "NetPBM Portable Graymap Image"
extension = "pgm"
handler = "image"
header = [80, 50, 10]
# NOTE: According to the PICO-8 Wiki, .p8.png files store the game data in the
# least significant bits of the screenshot and don't need special handling here
[filetype.png]
description = "PNG Image"
extension = "png"
handler = "image"
header = [137, 80, 78, 71, 13, 10, 26, 10]
[filetype.potm]
container = "zip"
description = "Macro-enabled OOXML Presentation Template"
extension = "potm"
[filetype.ppm]
description = "NetPBM Portable Pixmap Image"
extension = "ppm"
handler = "image"
header = [80, 51, 10]
[filetype.ppsx]
container = "zip"
description = "OOXML Presentation (Self-Starting)"
extension = "ppsx"
[filetype.pptm]
container = "zip"
description = "Macro-enabled OOXML Presentation"
extension = "pptm"
[filetype.pptx]
container = "zip"
description = "OOXML Presentation"
extension = "pptx"
# TODO: See if it's feasible to use `python-parser` or `rustpython-parser`
# for this and, if so, how much bigger it makes the output file.
[filetype.py]
description = "Python Source Code"
extension = "py"
handler = "py_helper"
[filetype.pyc]
description = "Python Bytecode"
extension = "pyc"
handler = "ignore"
[filetype.pyo]
description = "Python Bytecode (Optimized)"
extension = "pyo"
handler = "ignore"
[filetype.ra]
description = "RealAudio"
extension = "ra"
handler = "ffmpeg"
[filetype.rar]
description = "RAR archive"
extension = "rar"
handler = ["unrar", "p7zip", "lsar"]
header = [82, 97, 114, 33, 26, 7]
[filetype.rdf]
description = "RDF Document"
extension = "rdf"
container = "xml"
[filetype.rm]
description = "RealMedia Video"
extension = "rm"
handler = "ffmpeg"
[filetype.rmvb]
description = "RealMedia Video (VBR)"
extension = "rmvb"
handler = "ffmpeg"
# TODO: See if the `rpm-rs` crate can replace `rpm --checksig` and how much
# size it would add to the output binary.
[filetype.rpm]
description = "RPM Package"
extension = "rpm"
handler = ["rpm", "p7zip"]
header = [237, 171, 238, 219]
[filetype.rsn]
container = "rar"
description = "RAR-compressed SPC chiptune album"
extension = "rsn"
[filetype.rss]
description = "RSS Feed"
extension = "rss"
container = "xml"
[filetype.rv]
description = "RealVideo"
extension = "rv"
handler = "ffmpeg"
[filetype.sea]
description = "Self-Extracting Stuffit archive"
extension = "sea"
handler = "lsar"
[filetype.shorten]
description = "Shorten Audio"
extension = "shn"
handler = "ffmpeg"
[filetype.sit]
description = "Stuffit archive"
extension = "sit"
handler = "lsar"
header = [[83, 73, 84, 33, 0], [0x53, 0x74, 0x75, 0x66, 0x66, 0x49, 0x74, 0x20,
0x28, 0x63, 0x29, 0x31, 0x39, 0x39, 0x37, 0x2D]]
[filetype.speex]
description = "Speex Audio"
extension = "spx"
handler = "ffmpeg"
[filetype.sqlite3]
description = "SQLite3 Database"
extension = "sqlite3"
handler = "py_helper"
header = [83, 81, 76, 105, 116, 101, 32, 102, 111, 114, 109, 97, 116, 32, 51, 0]
[filetype.sun_au]
description = "Sun Audio (with header)"
handler = "ffmpeg"
header = [46, 115, 110, 100]
[filetype.svg]
description = "SVG Image"
extension = "svg"
container = "xml"
[filetype.svgz]
container = "gzip"
description = "SVG Image (GZip compressed)"
extension = "svgz"
[filetype.tar]
description = "Tar archive"
extension = "tar"
handler = ["p7zip", "lsar", "py_helper"]
header = [[117, 115, 116, 97, 114, 0, 48, 48], [117, 115, 116, 97, 114, 32, 32, 0]]
header_offset = 257
[filetype.targa]
description = "Truevision TGA Image"
extension = "tga"
handler = "image"
# NOTE: https://www.garykessler.net/library/file_sigs.html lists a trailer for
# these files but it's optional and not present in my test files.
# TODO: Rework how extensions are handled so ".tar.X" can result in an error
# if unpacking the ".X" compression produces something other than a ".tar".
[filetype.tbz2]
container = "bzip2"
description = "Tar archive (BZip2 compressed)"
extension = "tbz2"
[filetype.tgz]
container = "gzip"
description = "Tar archive (GZip compressed)"
extension = "tgz"
[filetype.tiff]
description = "TIFF Image"
extension = ["tif", "tiff"]
handler = "image"
header = [[73, 73, 42, 0], [77, 77, 0, 42]]
[filetype.tlz]
container = "lzma"
description = "Tar archive (.lzma compressed)"
extension = "tlz"
[filetype.toml]
description = "TOML Data"
extension = "toml"
handler = "toml"
[filetype.txt]
description = "Plaintext"
extension = "txt"
handler = "txt"
[filetype.txz]
container = "xz"
description = "Tar archive (.xz compressed)"
extension = "txz"
[filetype.uu]
description = "UUEncoded"
extension = ["uu", "uue"]
handler = "py_helper"
[filetype.vgacopy]
description = "Compressed VGA-COPY/386 Floppy Disk Image"
extension = "vcp"
container = "arj"
[filetype.voc]
description = "Creative Labs VOC Audio"
extension = "voc"
handler = "ffmpeg"
header = [67, 114, 101, 97, 116, 105, 118, 101, 32, 86, 111, 105, 99, 101, 32, 70]
[filetype.wave]
description = "Microsoft Waveform Audio"
extension = "wav"
handler = "ffmpeg"
# TODO: Rework deserializing to support a header of the form
# [52, 49, 46, 46, ??, ??, ??, ??, 57, 41, 56, 45]
[filetype.wavpack]
description = "WavPack Audio"
extension = "wv"
handler = "ffmpeg"
header = [119, 118, 112, 107]
[filetype.webm]
description = "WebM Video"
extension = "webm"
handler = "ffmpeg"
header = [26, 69, 223, 163]
[filetype.webp]
description = "WebP Image"
extension = "webp"
handler = "py_helper" # TODO: Check if `image` validates despite only supporting luma
# TODO: Rework deserializing to support a header of the form
# [52, 49, 46, 46, ??, ??, ??, ??, 57, 45, 42, 50]
[filetype.wma]
container = "asf"
description = "Windows Media Audio"
extension = "wma"
[filetype.wmv]
container = "asf"
description = "Windows Media Video"
extension = "wmv"
[filetype.xbm]
description = "X BitMap Image"
extension = "xbm"
handler = "py_helper"
[filetype.xlsm]
container = "zip"
description = "Macro-enabled OOXML Workbook"
extension = "xlsm"
[filetype.xlsx]
container = "zip"
description = "OOXML Workbook"
extension = "xlsx"
[filetype.xml]
description = "XML Data"
extension = "xml"
handler = "py_helper"
header = [60, 63, 120, 109, 108, 32]
# TODO: Decide how to handle formats that are less rigid on their 'header'
# format.
[filetype.xpi]
container = "zip"
description = "Mozilla XPInstall archive"
extension = "xpi"
[filetype.xpm]
description = "X PixMap Image"
extension = "xpm"
handler = "py_helper"
header = [47, 42, 32, 88, 80, 77, 32, 42, 47]
[filetype.ym]
container = "lha"
description = "Atari ST/Amstrad CPC YM2149 Chiptune"
extension = "ym"
[filetype.xz]
description = ".xz compressed"
extension = "xz"
handler = ["p7zip", "lsar", "py_helper"]
header = [253, 55, 122, 88, 90, 0]
# TODO: Decide how to disable fallback for formats that use Zip as a container
# and aren't supposed to support arbitrary compression algorithms.
# (Maybe a flag which means "Only use fallback if the handler is missing,
# not if it reports failure" combined with having the test suite verify
# that the first choice is something which can be specified to only
# allow the desired compression algorithms.)
[filetype.zip]
description = "Zip archive"
extension = "zip"
handler = ["zip", "p7zip", "lsar"]
header = [[80, 75, 1, 2], [80, 75, 3, 4], [80, 75, 5, 6], [80, 75, 7, 8]]
[filetype.zipsfx]
container = ["exe", "zip"]
description = "Self-extracting Zip archive (.exe)"
[filetype.zoo]
description = "Zoo archive"
extension = "zoo"
handler = "lsar"
header = [90, 79, 79]
# TODO: Integrate the rest of the formats supported by lsar:
# (https://github.com/ashang/unar#supported-old-formats)
[[override]]
path = "*/hts-cache/new.zip"
ignore = true
[[override]]
path = "*/.git"
ignore = true
# TODO: Decide how to indicate how thorough a handler is so something like
# "unpack the Zip container" isn't treated as equivalent to "unpack the Zip
# container and check the images inside the CBZ for corruption" but, at the
# same time, also come up with a way for a built-in handler to report
# "Unsupported variant of the filetype. Fall back to an external processor if
# available".
[handler.arj]
argv = ["arj", "t"]
description = "Open-source ARJ"
sources = "http://arj.sourceforge.net/"
[handler.cabextract]
argv = ["cabextract", "-t"]
sources = "https://www.cabextract.org.uk/"
[handler.dvdisaster]
argv = ["dvdisaster", "-t", "-i"]
sources = "https://en.wikipedia.org/wiki/Dvdisaster"
# TODO: Automated tests to verify that this is the best combination of
# error-detection flags to invoke ffmpeg with.
# - https://superuser.com/a/100290/48014
# - https://superuser.com/q/588147/48014
# - https://superuser.com/q/326629/48014
# - http://www.ffmpeg-archive.org/how-to-set-flags-td936142.html
# - https://www.reddit.com/r/DataHoarder/comments/bgsgr2/how_to_check_if_a_video_file_is_corrupted/
[handler.ffmpeg]
argv = ["ffmpeg", "-err_detect", "+crccheck+bitstream+buffer+explode+careful",
"-v quiet", "-xerror", "-f", "null", "-", "-i"]
description = "FFmpeg"
sources = "https://ffmpeg.org/download.html"
[handler.flac]
argv = ["flac", "-t"]
description = "The FLAC command-line utilities"
sources = "https://xiph.org/flac/download.html"
[handler.innoextract]
argv = ["innoextract", "-t", "-g"]
sources = "https://constexpr.org/innoextract/"
[handler.lsar]
argv = ["lsar", "-t"]
description = "The command-line tools for The Unarchiver"
sources = "https://theunarchiver.com/command-line"
# TODO: Maybe set fail_if_stdout = "... Unknown."
[handler.lzip]
argv = ["lzip", "-t"]
sources = "https://www.nongnu.org/lzip/lzip.html"
[handler.p7zip]
argv = ["7z", "t"]
description = "Command-line '7z' tool from 7-zip"
sources = ["https://7-zip.org/", "http://p7zip.sourceforge.net/"]
[handler.pdftotext]
argv = ["pdftotext", "{path}", "{devnull}"]
description = "pdftotext from Poppler or the Xpdf command-line tools"
fail_if_stderr = "Error"
sources = ["https://linuxappfinder.com/package/poppler-utils",
"https://www.xpdfreader.com/download.html"]
[handler.py_helper]
argv = ["verify-files-helper", "{filetype}", "{path}"]
description = "Python-based helper for more esoteric formats"
# TODO: sources = the repo for this project
[handler.rpm]
argv = ["rpm", "--checksig"]
description = "RPM"
sources = ["https://rpm.org/", "https://www.cygwin.com/"]
[handler.unrar]
argv = ["unrar", "t"]
description = "RARLAB UnRAR"
sources = "https://www.rarlab.com/rar_add.htm"
[handler.unshield]
argv = ["unshield", "t"]
description = "Unshield"
sources = "https://github.com/twogood/unshield"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment