-
-
Save ssokolow/547c892a4a7da753aa419aa6790ec849 to your computer and use it in GitHub Desktop.
Example configuration file for my WIP file verifier
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[filetype.3gpp] | |
description = "MPEG-4 Part 12 Media (3GPP)" | |
extension = "3gp" | |
handler = "ffmpeg" | |
[filetype.3gpp2] | |
description = "MPEG-4 Part 12 Media (3GPP2)" | |
extension = "3g2" | |
handler = "ffmpeg" | |
[filetype.7zip] | |
description = "7-Zip archive" | |
extension = "7z" | |
handler = ["p7zip", "lsar"] | |
header = [55, 122, 188, 175, 39, 28] | |
[filetype.aac] | |
description = "AAC Audio (ADTS Stream)" | |
extension = "aac" | |
handler = "ffmpeg" | |
[filetype.aifc] | |
description = "AIFF Audio (Compressed)" | |
extension = "aifc" | |
handler = "ffmpeg" | |
header = [[70, 79, 82, 77], [65, 73, 70, 70]] | |
[filetype.aiff] | |
description = "AIFF Audio" | |
extension = ["aif", "aiff"] | |
handler = "ffmpeg" | |
header = [[70, 79, 82, 77], [65, 73, 70, 70]] | |
[filetype.ape] | |
description = "Monkey's Audio" | |
extension = "ape" | |
handler = "ffmpeg" | |
header = [77, 65, 67, 32] | |
[filetype.apk] | |
container = "zip" | |
description = "Android application package" | |
extension = "apk" | |
[filetype.arj] | |
description = "ARJ archive" | |
extension = "arj" | |
handler = ["arj", "lsar"] | |
header = [96, 234] | |
[filetype.asf] | |
description = "Microsoft ASF" | |
extension = "asf" | |
handler = "ffmpeg" | |
header = [48, 38, 178, 117, 142, 102, 207, 17, 166, 217, 0, 170, 0, 98, 206, 108] | |
[filetype.avi] | |
description = "Microsoft AVI Video" | |
extension = "avi" | |
handler = "ffmpeg" | |
# TODO: Rework deserializing to support a header of the form | |
# [52, 49, 46, 46, ??, ??, ??, ??, 41, 56, 49, 20] | |
# TODO: Look into whether it would be useful to have some kind of fallback | |
# verifier for unidentified RIFF-based formats. | |
# TODO: Split .bin into RAR, BIN/CUE, etc. | |
[filetype.bin] | |
description = ".bin" | |
extension = "bin" | |
handler = "bin" | |
[filetype.binhex4] | |
description = "BinHex4 encoded" | |
extension = "hqx" | |
handler = "py_helper" | |
header = [40, 84, 104, 105, 115, 32, 102, 105, 108, 101, 32, 109, 117, 115, | |
116, 32, 98, 101, 32, 99, 111, 110, 118, 101, 114, 116, 101, 100, 32, 119, | |
105, 116, 104, 32, 66, 105, 110, 72, 101, 120, 32, 52, 46, 48, 41] | |
[filetype.bmp] | |
description = "Microsoft Device-Independent Bitmap Image" | |
extension = ["bmp", "dib"] | |
handler = "image" | |
header = [66, 77] | |
[filetype.bzip2] | |
description = "BZip2 compressed" | |
extension = "bz2" | |
handler = ["p7zip", "lsar", "py_helper"] | |
header = [66, 90, 104] | |
[filetype.cb7] | |
container = "7zip" | |
description = "Comic Book Archive (7-Zip)" | |
extension = "cb7" | |
[filetype.cbr] | |
container = "rar" | |
description = "Comic Book Archive (RAR)" | |
extension = "cbr" | |
[filetype.cbt] | |
container = "tar" | |
description = "Comic Book Archive (Tar)" | |
extension = "cbt" | |
[filetype.cbz] | |
container = "zip" | |
description = "Comic Book Archive (Zip)" | |
extension = "cbz" | |
[filetype.dashtoc] | |
container = "json" | |
description = "Dash/Zeal Docset Table of Contents" | |
extension = "dashtoc" | |
[filetype.dcx] | |
description = "Multi-page ZSoft PC Paintbrush Image" | |
extension = "dcx" | |
handler = "py_helper" | |
header = [177, 104, 222, 58] | |
multipage = true | |
# TODO: See if the "ar" crate is an acceptable substitute and, if so, how much | |
# size it would add to the output binary. | |
[filetype.deb] | |
description = "Debian Package" | |
extension = "deb" | |
handler = "p7zip" | |
header = [33, 60, 97, 114, 99, 104, 62] | |
[filetype.dmg] | |
description = "Apple DMG Disk Image" | |
extension = "dmg" | |
handler = "p7zip" | |
header = [120, 1, 115, 13, 98, 98, 96] | |
[filetype.docm] | |
container = "zip" | |
description = "Macro-enabled OOXML Document" | |
extension = "docm" | |
[filetype.docx] | |
container = "zip" | |
description = "OOXML Document" | |
extension = "docx" | |
[filetype.epub] | |
container = "zip" | |
description = "EPUB e-book" | |
extension = "epub" | |
handler = "epubcheck" | |
# TODO: Decide how to extend the schema so the system is capable of ensuring | |
# that a corrupted example of a subtype cannot pass validation as an | |
# ancestor type. (eg. letting corruption outside the EXE portion of an | |
# InnoSetup installer pass because it passes validation as an EXE) | |
# | |
# Given how difficult it is to *reliably* prevent "corruption caused the | |
# identity check to fail", maybe version 1.0 of this schema should just | |
# forbid having a container and subtype both have handlers and, instead | |
# rely on one of two situations: | |
# | |
# 1. A whole bunch of sibling EXE validators (InnoSetup, RAR, etc.) with | |
# no validator on the base EXE format. (Would prevent things like | |
# authenticode validation, but could be revised later.) | |
# 2. Support unpacking the container and using normal validators for the | |
# contents. (Since most containers are archive formats... but this is | |
# less able to validate things like "must have a manifest file") | |
# | |
# (Maybe something involving making it an error if something fails | |
# validation based on its extension despite passing validation for the | |
# header-detected type if that header-detected type is a supertype of the | |
# extension-detected type... though that still feels like there might be | |
# some kinds of concatenated formats that might erroneously pass.) | |
# | |
# ...also support running ClamAV on EXE and COM files since I plan to run | |
# this before burning stuff to DVD+R for archival. | |
#[filetype.exe] | |
#description = "DOS/Windows Executable" | |
#extension = "exe" | |
#header = [77, 90] | |
# TODO: Add a definition for self-extracting Zip, RAR, etc. files | |
#[filetype.exe_innosetup] | |
#container = "exe" | |
#description = "Inno Setup Installer" | |
#handler = "innoextract" | |
[filetype.f4a] | |
description = "FLV Audio" | |
extension = "f4a" | |
handler = "ffmpeg" | |
[filetype.f4b] | |
description = "FLV Audiobook" | |
extension = "f4b" | |
handler = "ffmpeg" | |
[filetype.f4v] | |
description = "FLV Video" | |
extension = "f4v" | |
handler = "ffmpeg" | |
# TODO: See how much work it would take to use something like `claxon` or | |
# `cauldron` to do md5sum verification internally. | |
[filetype.flac] | |
description = "FLAC Audio" | |
extension = "flac" | |
handler = "flac" # TODO: Can FFmpeg be asked to check the md5sum? | |
header = [102, 76, 97, 67] | |
# See if the `flic` crate is suitable for this use | |
[filetype.fli] | |
description = "Autodesk Animator FLIC Animation" | |
extension = ["fli"] | |
handler = "py_helper" | |
header = [17, 175] | |
header_offset = 2 | |
# Source: http://steve.hollasch.net/cgindex/formats/fli.html | |
# See if the `flic` crate is suitable for this use | |
[filetype.flc] | |
description = "Autodesk Animator Pro FLIC Animation" | |
extension = ["flc"] | |
handler = "py_helper" | |
header = [18, 175] | |
header_offset = 2 | |
# Source: http://www.retroarchive.org/swag/MISC/0140.PAS.html | |
[filetype.flv] | |
description = "Flash Video" | |
extension = "flv" | |
handler = "ffmpeg" | |
header = [70, 76, 86] | |
# TODO: A Gameboy ROM must contain the Nintendo logo for its startup screen | |
# from offset0x104to0x133, one of the longest signatures required in any | |
# file format. | |
[filetype.gif] | |
description = "GIF Image" | |
extension = "gif" | |
handler = "image" | |
header = [[71, 73, 70, 56, 55, 97], [71, 73, 70, 56, 57, 97]] | |
[filetype.gzip] | |
description = "GZip compressed" | |
extension = "gz" | |
handler = "gzip" | |
header = [31, 139] | |
[filetype.iso_dvdisaster] | |
description = "ISO 9660 CD/DVD image (DVDisaster ECC)" | |
extension = "iso" | |
handler = "dvdisaster" | |
header = [67, 68, 48, 48, 49] | |
# TODO: What does `jarsigner -verify` do on an unsigned JAR? | |
[filetype.jar] | |
container = "zip" | |
description = "Java ARchive" | |
extension = "jar" | |
[filetype.jpeg] | |
description = "JPEG/JFIF Image" | |
extension = ["jfi", "jfif", "jif", "jpe", "jpeg", "jpg"] | |
handler = "image" | |
header = [255, 216, 255] | |
# TODO: Either match only FF D8 (the actual JFIF magic number) or be | |
# *absolutely* certain that all relevant parsers restrict input to the | |
# combination I'm matching on. | |
[filetype.jpeg2k] | |
description = "JPEG 2000 Image" | |
extension = ["j2k", "jp2", "jpg2", "jpf", "jpx"] | |
handler = "py_helper" | |
header = [0, 0, 0, 12, 106, 80, 32, 32, 13, 10, 135, 10] | |
[filetype.json] | |
description = "JSON Data" | |
extension = "json" | |
handler = "json" | |
# TODO: Since LHA is used as a container for stuff, find a way to header detect | |
# TODO: When I have time, safety-dance the delharc crate and then decide if | |
# that makes it suitable for built-in LHA support. | |
[filetype.lha] | |
description = "LHA archive" | |
extension = ["lha", "lzh"] | |
handler = ["p7zip", "lsar"] # TODO: `lha tq {file}` | |
[filetype.lzip] | |
description = "Lzip compressed" | |
extension = "lz" | |
handler = "lzip" | |
header = [76, 90, 73, 80] | |
[filetype.lzma] | |
description = ".lzma compressed" | |
extension = "lzma" | |
handler = ["p7zip", "lsar", "py_helper"] | |
[filetype.lzx] | |
description = "LZX archive" | |
extension = "lzx" | |
handler = "lsar" | |
header = [76, 90, 88] | |
[filetype.m4a] | |
description = "MPEG-4 Part 14 Audio" | |
extension = "m4a" | |
handler = "ffmpeg" | |
[filetype.m4b] | |
description = "MPEG-4 Part 14 Audiobook" | |
extension = "m4b" | |
handler = "ffmpeg" | |
[filetype.m4r] | |
description = "MPEG-4 Part 14 Ringtone" | |
extension = "m4r" | |
handler = "ffmpeg" | |
[filetype.m4v] | |
description = "MPEG-4 Part 14 Video" | |
extension = "m4v" | |
handler = "ffmpeg" | |
[filetype.maff] | |
container = "zip" | |
description = "Mozilla Archive Format" | |
extension = "maff" | |
#[filetype.megadrive_rom] | |
#description = "SEGA Genesis/Megadrive ROM" | |
#header = [83, 69, 71, 65] | |
#header_offset = 256 | |
## TODO: handler (Must stay commented out until we have one) | |
[filetype.mk3d] | |
description = "Matroska Video (3D)" | |
extension = "mk3d" | |
handler = "ffmpeg" | |
header = [26, 69, 223, 163] | |
[filetype.mka] | |
description = "Matroska Audio" | |
extension = "mka" | |
handler = "ffmpeg" | |
header = [26, 69, 223, 163] | |
[filetype.mkv] | |
description = "Matroska Video" | |
extension = "mkv" | |
handler = "ffmpeg" | |
header = [26, 69, 223, 163] | |
[filetype.mov] | |
description = "Quicktime Video" | |
extension = "mov" | |
handler = "ffmpeg" | |
[filetype.mp1] | |
description = "MPEG Layer 1 Audio" | |
extension = "mp1" | |
handler = "ffmpeg" | |
[filetype.mp2] | |
description = "MPEG Layer 2 Audio" | |
extension = "mp2" | |
handler = "ffmpeg" | |
[filetype.mp3] | |
description = "MPEG Layer 3 Audio" | |
extension = "mp3" | |
handler = "ffmpeg" | |
header = [[73, 68, 51], [255, 251], [255, 243], [255,242]] | |
[filetype.mp4] | |
description = "MPEG-4 Part 14 Video" | |
extension = "mp4" | |
handler = "ffmpeg" | |
header = [0, 0, 0, 32, 102, 116, 121, 112, 105, 115, 111, 109] | |
[filetype.mpeg] | |
description = "MPEG Video" | |
extension = ["mpe", "mpeg", "mpg"] | |
handler = "ffmpeg" | |
[filetype.mpeg_ts] | |
description = "MPEG Transport Stream" | |
extension = "ts" | |
handler = "ffmpeg" | |
[filetype.mpeg_tsa] | |
description = "MPEG Transport Stream Audio" | |
extension = "tsa" | |
handler = "ffmpeg" | |
[filetype.mpeg_tsv] | |
description = "MPEG Transport Stream Video" | |
extension = "tsv" | |
handler = "ffmpeg" | |
[filetype.ms_cab] | |
description = "Microsoft CAB" | |
extension = "cab" | |
handler = ["cabextract", "p7zip"] # NOTE: "lsar" reports "Unknown" with 0 exit | |
header = [77, 83, 67, 70] | |
[filetype.is_cab] | |
description = "InstallShield CAB" | |
extension = "cab" | |
handler = ["unshield"] # TODO: Do p7zip or lsar support any of these? | |
header = [73, 83, 99, 40] | |
[filetype.ms_cursor] | |
description = "Microsoft Cursor" | |
extension = "cur" | |
handler = "py_helper" | |
multipage = true | |
[filetype.ms_ico] | |
description = "Microsoft Icon" | |
extension = "ico" | |
handler = "image" | |
multipage = true | |
[filetype.msi] | |
description = "MSI Installer" | |
extension = "msi" | |
handler = ["p7zip", "lsar"] | |
[filetype.musepack] | |
description = "Musepack Audio" | |
extension = ["mp+", "mpc", "mpp"] | |
handler = "ffmpeg" | |
header = [[77, 80, 43], [77, 80, 67, 75]] | |
[filetype.odb] | |
container = "zip" | |
description = "ODF Database" | |
extension = "odb" | |
[filetype.odc] | |
container = "zip" | |
description = "ODF Chart" | |
extension = "odc" | |
[filetype.odf] | |
container = "zip" | |
description = "ODF Formula" | |
extension = "odf" | |
[filetype.odg] | |
container = "zip" | |
description = "ODF Drawing" | |
extension = "odg" | |
[filetype.odi] | |
container = "zip" | |
description = "ODF Image" | |
extension = "odi" | |
[filetype.odm] | |
container = "zip" | |
description = "ODF Master Document" | |
extension = "odm" | |
[filetype.odp] | |
container = "zip" | |
description = "ODF Presentation" | |
extension = "odp" | |
[filetype.ods] | |
container = "zip" | |
description = "ODF Spreadsheet" | |
extension = "ods" | |
[filetype.odt] | |
container = "zip" | |
description = "ODF Text Document" | |
extension = "odt" | |
[filetype.oga] | |
container = "ogx" | |
description = "Ogg containing audio (.oga)" | |
extension = "oga" | |
[filetype.ogg] | |
container = "ogx" | |
description = "Ogg Vorbis (.ogg)" | |
extension = "ogg" | |
[filetype.ogm] | |
container = "ogx" | |
description = "OGM (Ogg, Unofficial)" | |
extension = "ogm" | |
[filetype.ogv] | |
container = "ogx" | |
description = "Ogg containing video (.ogv)" | |
extension = "ogv" | |
[filetype.ogx] | |
description = "Ogg (unspecified) (.ogx)" | |
extension = "ogx" | |
handler = "ffmpeg" | |
header = [79, 103, 103, 83] | |
[filetype.opus] | |
description = "Opus Audio" | |
extension = "opus" | |
handler = "ffmpeg" | |
[filetype.otc] | |
container = "zip" | |
description = "ODF Chart Template" | |
extension = "otc" | |
[filetype.otf] | |
container = "zip" | |
description = "ODF Formula Template" | |
extension = "otf" | |
[filetype.otg] | |
container = "zip" | |
description = "ODF Drawing Template" | |
extension = "otg" | |
[filetype.oth] | |
container = "zip" | |
description = "ODF Web Page Template" | |
extension = "oth" | |
[filetype.oti] | |
container = "zip" | |
description = "ODF Image Template" | |
extension = "oti" | |
[filetype.otp] | |
container = "zip" | |
description = "ODF Presentation Template" | |
extension = "otp" | |
[filetype.ots] | |
container = "zip" | |
description = "ODF Spreadsheet Template" | |
extension = "ots" | |
[filetype.ott] | |
container = "zip" | |
description = "ODF Text Document Template" | |
extension = "ott" | |
[filetype.pbm] | |
description = "NetPBM Portable Bitmap Image" | |
extension = "pbm" | |
handler = "image" | |
header = [80, 49, 10] | |
# TODO: See how PIL and the `pcx` crate compare for catching errors | |
[filetype.pcx] | |
description = "ZSoft PC Paintbrush Image" | |
extension = "pcx" | |
handler = "py_helper" | |
header = [[10, 0, 1], [10, 2, 1], [10, 3, 1], [10, 4, 1], [10, 5, 1]] | |
# Source: https://www.fileformat.info/format/pcx/egff.htm | |
# TODO: See if the `pdf` or `lopdf` crate could replace pdftotext in this role | |
[filetype.pdf] | |
description = "PDF Document" | |
extension = "pdf" | |
handler = "pdftotext" | |
header = [[37, 80, 68, 70, 45, 49, 46], [37, 80, 68, 70, 0]] | |
# NOTE: https://www.garykessler.net/library/file_sigs.html says the PDF format | |
# also has a trailer, which could be useful for looking for ways to split | |
# and individually check the parts of a concatenated stream. | |
# NOTE: According to pocorgtfo07.pdf, some PDF parsers will accept this | |
# occurring anywhere within the first 1024 bytes and some only look for | |
# the PDF trailer and will accept a missing header, so I'll need something | |
# smarter. | |
# TODO: As hinted previously, PDF can be easily abused. For security reasons, | |
# Adobe Reader, the standard PDF reader, has blacklisted known magic | |
# signatures such as PNG or PE since version 10.1.5. It is thus not | |
# possible anymore to have a valid polyglot that would open in Adobe | |
# Reader as PDF. This is a good security measure even if it breaks | |
# compatibility with older releases of PoC‖GTFO. | |
# -- pocorgtfo07.pdf | |
[filetype.pgm] | |
description = "NetPBM Portable Graymap Image" | |
extension = "pgm" | |
handler = "image" | |
header = [80, 50, 10] | |
# NOTE: According to the PICO-8 Wiki, .p8.png files store the game data in the | |
# least significant bits of the screenshot and don't need special handling here | |
[filetype.png] | |
description = "PNG Image" | |
extension = "png" | |
handler = "image" | |
header = [137, 80, 78, 71, 13, 10, 26, 10] | |
[filetype.potm] | |
container = "zip" | |
description = "Macro-enabled OOXML Presentation Template" | |
extension = "potm" | |
[filetype.ppm] | |
description = "NetPBM Portable Pixmap Image" | |
extension = "ppm" | |
handler = "image" | |
header = [80, 51, 10] | |
[filetype.ppsx] | |
container = "zip" | |
description = "OOXML Presentation (Self-Starting)" | |
extension = "ppsx" | |
[filetype.pptm] | |
container = "zip" | |
description = "Macro-enabled OOXML Presentation" | |
extension = "pptm" | |
[filetype.pptx] | |
container = "zip" | |
description = "OOXML Presentation" | |
extension = "pptx" | |
# TODO: See if it's feasible to use `python-parser` or `rustpython-parser` | |
# for this and, if so, how much bigger it makes the output file. | |
[filetype.py] | |
description = "Python Source Code" | |
extension = "py" | |
handler = "py_helper" | |
[filetype.pyc] | |
description = "Python Bytecode" | |
extension = "pyc" | |
handler = "ignore" | |
[filetype.pyo] | |
description = "Python Bytecode (Optimized)" | |
extension = "pyo" | |
handler = "ignore" | |
[filetype.ra] | |
description = "RealAudio" | |
extension = "ra" | |
handler = "ffmpeg" | |
[filetype.rar] | |
description = "RAR archive" | |
extension = "rar" | |
handler = ["unrar", "p7zip", "lsar"] | |
header = [82, 97, 114, 33, 26, 7] | |
[filetype.rdf] | |
description = "RDF Document" | |
extension = "rdf" | |
container = "xml" | |
[filetype.rm] | |
description = "RealMedia Video" | |
extension = "rm" | |
handler = "ffmpeg" | |
[filetype.rmvb] | |
description = "RealMedia Video (VBR)" | |
extension = "rmvb" | |
handler = "ffmpeg" | |
# TODO: See if the `rpm-rs` crate can replace `rpm --checksig` and how much | |
# size it would add to the output binary. | |
[filetype.rpm] | |
description = "RPM Package" | |
extension = "rpm" | |
handler = ["rpm", "p7zip"] | |
header = [237, 171, 238, 219] | |
[filetype.rsn] | |
container = "rar" | |
description = "RAR-compressed SPC chiptune album" | |
extension = "rsn" | |
[filetype.rss] | |
description = "RSS Feed" | |
extension = "rss" | |
container = "xml" | |
[filetype.rv] | |
description = "RealVideo" | |
extension = "rv" | |
handler = "ffmpeg" | |
[filetype.sea] | |
description = "Self-Extracting Stuffit archive" | |
extension = "sea" | |
handler = "lsar" | |
[filetype.shorten] | |
description = "Shorten Audio" | |
extension = "shn" | |
handler = "ffmpeg" | |
[filetype.sit] | |
description = "Stuffit archive" | |
extension = "sit" | |
handler = "lsar" | |
header = [[83, 73, 84, 33, 0], [0x53, 0x74, 0x75, 0x66, 0x66, 0x49, 0x74, 0x20, | |
0x28, 0x63, 0x29, 0x31, 0x39, 0x39, 0x37, 0x2D]] | |
[filetype.speex] | |
description = "Speex Audio" | |
extension = "spx" | |
handler = "ffmpeg" | |
[filetype.sqlite3] | |
description = "SQLite3 Database" | |
extension = "sqlite3" | |
handler = "py_helper" | |
header = [83, 81, 76, 105, 116, 101, 32, 102, 111, 114, 109, 97, 116, 32, 51, 0] | |
[filetype.sun_au] | |
description = "Sun Audio (with header)" | |
handler = "ffmpeg" | |
header = [46, 115, 110, 100] | |
[filetype.svg] | |
description = "SVG Image" | |
extension = "svg" | |
container = "xml" | |
[filetype.svgz] | |
container = "gzip" | |
description = "SVG Image (GZip compressed)" | |
extension = "svgz" | |
[filetype.tar] | |
description = "Tar archive" | |
extension = "tar" | |
handler = ["p7zip", "lsar", "py_helper"] | |
header = [[117, 115, 116, 97, 114, 0, 48, 48], [117, 115, 116, 97, 114, 32, 32, 0]] | |
header_offset = 257 | |
[filetype.targa] | |
description = "Truevision TGA Image" | |
extension = "tga" | |
handler = "image" | |
# NOTE: https://www.garykessler.net/library/file_sigs.html lists a trailer for | |
# these files but it's optional and not present in my test files. | |
# TODO: Rework how extensions are handled so ".tar.X" can result in an error | |
# if unpacking the ".X" compression produces something other than a ".tar". | |
[filetype.tbz2] | |
container = "bzip2" | |
description = "Tar archive (BZip2 compressed)" | |
extension = "tbz2" | |
[filetype.tgz] | |
container = "gzip" | |
description = "Tar archive (GZip compressed)" | |
extension = "tgz" | |
[filetype.tiff] | |
description = "TIFF Image" | |
extension = ["tif", "tiff"] | |
handler = "image" | |
header = [[73, 73, 42, 0], [77, 77, 0, 42]] | |
[filetype.tlz] | |
container = "lzma" | |
description = "Tar archive (.lzma compressed)" | |
extension = "tlz" | |
[filetype.toml] | |
description = "TOML Data" | |
extension = "toml" | |
handler = "toml" | |
[filetype.txt] | |
description = "Plaintext" | |
extension = "txt" | |
handler = "txt" | |
[filetype.txz] | |
container = "xz" | |
description = "Tar archive (.xz compressed)" | |
extension = "txz" | |
[filetype.uu] | |
description = "UUEncoded" | |
extension = ["uu", "uue"] | |
handler = "py_helper" | |
[filetype.vgacopy] | |
description = "Compressed VGA-COPY/386 Floppy Disk Image" | |
extension = "vcp" | |
container = "arj" | |
[filetype.voc] | |
description = "Creative Labs VOC Audio" | |
extension = "voc" | |
handler = "ffmpeg" | |
header = [67, 114, 101, 97, 116, 105, 118, 101, 32, 86, 111, 105, 99, 101, 32, 70] | |
[filetype.wave] | |
description = "Microsoft Waveform Audio" | |
extension = "wav" | |
handler = "ffmpeg" | |
# TODO: Rework deserializing to support a header of the form | |
# [52, 49, 46, 46, ??, ??, ??, ??, 57, 41, 56, 45] | |
[filetype.wavpack] | |
description = "WavPack Audio" | |
extension = "wv" | |
handler = "ffmpeg" | |
header = [119, 118, 112, 107] | |
[filetype.webm] | |
description = "WebM Video" | |
extension = "webm" | |
handler = "ffmpeg" | |
header = [26, 69, 223, 163] | |
[filetype.webp] | |
description = "WebP Image" | |
extension = "webp" | |
handler = "py_helper" # TODO: Check if `image` validates despite only supporting luma | |
# TODO: Rework deserializing to support a header of the form | |
# [52, 49, 46, 46, ??, ??, ??, ??, 57, 45, 42, 50] | |
[filetype.wma] | |
container = "asf" | |
description = "Windows Media Audio" | |
extension = "wma" | |
[filetype.wmv] | |
container = "asf" | |
description = "Windows Media Video" | |
extension = "wmv" | |
[filetype.xbm] | |
description = "X BitMap Image" | |
extension = "xbm" | |
handler = "py_helper" | |
[filetype.xlsm] | |
container = "zip" | |
description = "Macro-enabled OOXML Workbook" | |
extension = "xlsm" | |
[filetype.xlsx] | |
container = "zip" | |
description = "OOXML Workbook" | |
extension = "xlsx" | |
[filetype.xml] | |
description = "XML Data" | |
extension = "xml" | |
handler = "py_helper" | |
header = [60, 63, 120, 109, 108, 32] | |
# TODO: Decide how to handle formats that are less rigid on their 'header' | |
# format. | |
[filetype.xpi] | |
container = "zip" | |
description = "Mozilla XPInstall archive" | |
extension = "xpi" | |
[filetype.xpm] | |
description = "X PixMap Image" | |
extension = "xpm" | |
handler = "py_helper" | |
header = [47, 42, 32, 88, 80, 77, 32, 42, 47] | |
[filetype.ym] | |
container = "lha" | |
description = "Atari ST/Amstrad CPC YM2149 Chiptune" | |
extension = "ym" | |
[filetype.xz] | |
description = ".xz compressed" | |
extension = "xz" | |
handler = ["p7zip", "lsar", "py_helper"] | |
header = [253, 55, 122, 88, 90, 0] | |
# TODO: Decide how to disable fallback for formats that use Zip as a container | |
# and aren't supposed to support arbitrary compression algorithms. | |
# (Maybe a flag which means "Only use fallback if the handler is missing, | |
# not if it reports failure" combined with having the test suite verify | |
# that the first choice is something which can be specified to only | |
# allow the desired compression algorithms.) | |
[filetype.zip] | |
description = "Zip archive" | |
extension = "zip" | |
handler = ["zip", "p7zip", "lsar"] | |
header = [[80, 75, 1, 2], [80, 75, 3, 4], [80, 75, 5, 6], [80, 75, 7, 8]] | |
[filetype.zipsfx] | |
container = ["exe", "zip"] | |
description = "Self-extracting Zip archive (.exe)" | |
[filetype.zoo] | |
description = "Zoo archive" | |
extension = "zoo" | |
handler = "lsar" | |
header = [90, 79, 79] | |
# TODO: Integrate the rest of the formats supported by lsar: | |
# (https://github.com/ashang/unar#supported-old-formats) | |
[[override]] | |
path = "*/hts-cache/new.zip" | |
ignore = true | |
[[override]] | |
path = "*/.git" | |
ignore = true | |
# TODO: Decide how to indicate how thorough a handler is so something like | |
# "unpack the Zip container" isn't treated as equivalent to "unpack the Zip | |
# container and check the images inside the CBZ for corruption" but, at the | |
# same time, also come up with a way for a built-in handler to report | |
# "Unsupported variant of the filetype. Fall back to an external processor if | |
# available". | |
[handler.arj] | |
argv = ["arj", "t"] | |
description = "Open-source ARJ" | |
sources = "http://arj.sourceforge.net/" | |
[handler.cabextract] | |
argv = ["cabextract", "-t"] | |
sources = "https://www.cabextract.org.uk/" | |
[handler.dvdisaster] | |
argv = ["dvdisaster", "-t", "-i"] | |
sources = "https://en.wikipedia.org/wiki/Dvdisaster" | |
# TODO: Automated tests to verify that this is the best combination of | |
# error-detection flags to invoke ffmpeg with. | |
# - https://superuser.com/a/100290/48014 | |
# - https://superuser.com/q/588147/48014 | |
# - https://superuser.com/q/326629/48014 | |
# - http://www.ffmpeg-archive.org/how-to-set-flags-td936142.html | |
# - https://www.reddit.com/r/DataHoarder/comments/bgsgr2/how_to_check_if_a_video_file_is_corrupted/ | |
[handler.ffmpeg] | |
argv = ["ffmpeg", "-err_detect", "+crccheck+bitstream+buffer+explode+careful", | |
"-v quiet", "-xerror", "-f", "null", "-", "-i"] | |
description = "FFmpeg" | |
sources = "https://ffmpeg.org/download.html" | |
[handler.flac] | |
argv = ["flac", "-t"] | |
description = "The FLAC command-line utilities" | |
sources = "https://xiph.org/flac/download.html" | |
[handler.innoextract] | |
argv = ["innoextract", "-t", "-g"] | |
sources = "https://constexpr.org/innoextract/" | |
[handler.lsar] | |
argv = ["lsar", "-t"] | |
description = "The command-line tools for The Unarchiver" | |
sources = "https://theunarchiver.com/command-line" | |
# TODO: Maybe set fail_if_stdout = "... Unknown." | |
[handler.lzip] | |
argv = ["lzip", "-t"] | |
sources = "https://www.nongnu.org/lzip/lzip.html" | |
[handler.p7zip] | |
argv = ["7z", "t"] | |
description = "Command-line '7z' tool from 7-zip" | |
sources = ["https://7-zip.org/", "http://p7zip.sourceforge.net/"] | |
[handler.pdftotext] | |
argv = ["pdftotext", "{path}", "{devnull}"] | |
description = "pdftotext from Poppler or the Xpdf command-line tools" | |
fail_if_stderr = "Error" | |
sources = ["https://linuxappfinder.com/package/poppler-utils", | |
"https://www.xpdfreader.com/download.html"] | |
[handler.py_helper] | |
argv = ["verify-files-helper", "{filetype}", "{path}"] | |
description = "Python-based helper for more esoteric formats" | |
# TODO: sources = the repo for this project | |
[handler.rpm] | |
argv = ["rpm", "--checksig"] | |
description = "RPM" | |
sources = ["https://rpm.org/", "https://www.cygwin.com/"] | |
[handler.unrar] | |
argv = ["unrar", "t"] | |
description = "RARLAB UnRAR" | |
sources = "https://www.rarlab.com/rar_add.htm" | |
[handler.unshield] | |
argv = ["unshield", "t"] | |
description = "Unshield" | |
sources = "https://github.com/twogood/unshield" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment