Created
April 19, 2015 06:20
-
-
Save mgsmus/d54dd99d497d9b61004a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/************************************************************* | |
* Author: Shane Thompson * | |
* Date: June 2013 * | |
* Version: 0.1 * | |
* Copyright: (c) Shane Thompson 2012 * | |
* * | |
* Another quality product of Social-Library.org * | |
* * | |
* The code herein comes with NO warranty, implied or * | |
* otherwise. * | |
* Neither Shane Thompson or Social-Library will not be held * | |
* responsible should any problems arise from the use of * | |
* this code. * | |
* * | |
* All rights reserved 2012 * | |
* Copyright Social-Library.org 2012 * | |
* * | |
* This code is free to distribute/copy/re-use. It is * | |
* released to the public domain and as such has no * | |
* limitations. Credit appreciated where possible. * | |
* * | |
* * | |
* The following MIME sniffing class is based * | |
* on the specification published at the following * | |
* URL: http://mimesniff.spec.whatwg.org/ * | |
* * | |
* To use this class, just pass the filename or a file * | |
* resource to the construct - the class will then examine * | |
* the file and find the MIME type. * | |
* * | |
* The Public API: * | |
* - get_type: Gets the determined MIME type, returns * | |
* application/octet-stream if the MIME type * | |
* is unknown. * | |
* - is_text: Whether or not the file is plain text * | |
* (UTF-8 or not) * | |
* - is_font: Whether the file is a font file * | |
* - is_zip: Whether the file is zipped * | |
* - is_archive: Whether the file is an archive * | |
* - is_scriptable: * | |
* Determines whether the selected MIME type * | |
* may contain executable scripts * | |
************************************************************/ | |
class MimeReader { | |
protected $file = null, $detected_type = null, $num_bytes = 0, $header = null, $footer = null; | |
protected static $binary_characters = '', $whitespace_characters = '', $tag_terminating_characters = ''; | |
protected static $image = null, $media = null, $fonts = null, $ms_office = null, $archive = null, $text = null, $others = null, $unknown = null, $html = null; | |
public function __construct( $file ) { | |
$this->file = $file; | |
$this->num_bytes = 512; | |
if ( empty( self::$binary_characters ) ) { | |
self::$binary_characters .= "\x00\x01\x02\x03\x04\x05\x06\x07\0x08\x0B\x0E\x0F\x10\x11"; | |
self::$binary_characters .= "\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1C\x1D\x1E\x1F"; | |
} | |
if ( empty( self::$whitespace_characters ) ) { | |
self::$whitespace_characters .= "\x09\x0A\x0C\x0D\x20"; | |
} | |
if ( empty( self::$tag_terminating_characters ) ) { | |
self::$tag_terminating_characters .= "\x20\x3E"; | |
} | |
if ( is_null( self::$image ) ) { | |
$image = &self::$image; | |
$image = array(); | |
// Windows Icon | |
$image[] = array ( | |
'mime' => 'image/vnd.microsoft.icon', | |
'pattern' => "\x00\x00\x01\x00", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '', // none | |
); | |
// "BM" - BMP signature | |
$image[] = array ( | |
'mime' => 'image/bmp', | |
'pattern' => "\x42\x4D", | |
'mask' => "\xFF\xFF", | |
'ignore' => '' | |
); | |
// "GIF87a" - GIF signature | |
$image[] = array ( | |
'mime' => 'image/gif', | |
'pattern' => "\x47\x49\x46\x38\x37\x61", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "GIF89a" - GIF signature | |
$image[] = array ( | |
'mime' => 'image/gif', | |
'pattern' => "\x47\x49\x46\x38\x39\x61", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "RIFF" followed by 4 bytes followed by "WEBPVP" | |
$image[] = array ( | |
'mime' => 'image/webp', | |
'pattern' => "\x52\x49\x46\x46\x00\x00\x00\x00\x57\x45\x42\x50\x56\x50", | |
'mask' => "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// A byte with only the highest bit set followed by the string "PNG" followed by CR LF SUB LF - PNG signature | |
$image[] = array ( | |
'mime' => 'image/png', | |
'pattern' => "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// JPEG start of image marker followed by another marker | |
$image[] = array ( | |
'mime' => 'image/jpeg', | |
'pattern' => "\xFF\xD8\xFF", | |
'mask' => "\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// PSD signature | |
$image[] = array ( | |
'mime' => 'application/psd', | |
'pattern' => "\x38\x42\x50\x53", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
} | |
if ( is_null( self::$media ) ) { | |
$media = &self::$media; | |
$media = array(); | |
// The WebM signature | |
$media[] = array ( | |
'mime' => 'video/webm', | |
'pattern' => "\x1A\x45\xDF\xA3", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// The .snd signature | |
$media[] = array ( | |
'mime' => 'audio/basic', | |
'pattern' => "\x2E\x73\x6E\x64", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "FORM" followed by 4 bytes followed by "AIFF" - the AIFF signature | |
$media[] = array ( | |
'mime' => 'audio/aiff', | |
'pattern' => "\x46\x4F\x52\x4D\x00\x00\x00\x00\x41\x49\x46\x46", | |
'mask' => "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// MP3 without ID3 tag /****** UNTESTED ******/ | |
$media[] = array ( | |
'mime' => 'audio/mpeg', | |
'pattern' => "\xFF\xFB", | |
'mask' => "\xFF\xFF", | |
'ignore' => '' | |
); | |
// "ID3" and the ID3v2-tagged MP3 signature | |
$media[] = array ( | |
'mime' => 'audio/mpeg', | |
'pattern' => "\x49\x44\x33", | |
'mask' => "\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "OggS" followed by NUL - The OGG signature | |
$media[] = array ( | |
'mime' => 'application/ogg', | |
'pattern' => "\x4F\x67\x67\x53\x00", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "MThd" followed by 4 bytes representing the number 6 in 32 bits (big endian) - MIDI signature | |
$media[] = array ( | |
'mime' => 'audio/midi', | |
'pattern' => "\x4D\x54\x68\x64\x00\x00\x00\x06", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "RIFF" followed by 4 bytes followed by "AVI" - AVI signature | |
$media[] = array ( | |
'mime' => 'video/avi', | |
'pattern' => "\x52\x49\x46\x46\x00\x00\x00\x00\x41\x56\x49\x20", | |
'mask' => "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "RIFF" followed by 4 bytes followed by "WAVE" - WAVE signature | |
$media[] = array ( | |
'mime' => 'audio/wave', | |
'pattern' => "\x52\x49\x46\x46\x00\x00\x00\x00\x57\x41\x56\x45", | |
'mask' => "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
} | |
if ( is_null( self::$fonts ) ) { | |
$fonts = self::$fonts; | |
$fonts = array(); | |
// 34 bytes followed by "LP" - Opentype signature | |
$fonts[] = array ( | |
'mime' => 'application/vnd.ms-fontobject', | |
'pattern' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" . | |
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x4C\x50", | |
'mask' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" . | |
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xFF\xFF", | |
'ignore' => '' | |
); | |
// 4 bytes representing version type 1 of true type font | |
$fonts[] = array ( | |
'mime' => 'application/font-ttf', | |
'pattern' => "\x00\x01\x00\x00", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "OTTO" - Opentype signature | |
$fonts[] = array ( | |
'mime' => 'application/font-off', // application/vnd.ms-opentype | |
'pattern' => "\x4F\x54\x54\x4F", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "ttcf" - Truetype Collection signature | |
$fonts[] = array ( | |
'mime' => 'application/x-font-truetype-collection', | |
'pattern' => "\x74\x74\x63\x66", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// 'wOFF' - Web Open Font Format signature | |
$fonts[] = array ( | |
'mime' => 'application/font-woff', | |
'pattern' => "\x77\x4F\x46\x46", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
} | |
if ( is_null( self::$archive ) ) { | |
$archive = &self::$archive; | |
$archive = array(); | |
// GZIP signature | |
$archive[] = array ( | |
'mime' => 'application/x-gzip', | |
'pattern' => "\x1F\x8B\x08", | |
'mask' => "\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "PK" followed by ETX, EOT - ZIP signature | |
$archive[] = array ( | |
'mime' => 'application/zip', | |
'pattern' => "\x50\x4B\x03\x04", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// "Rar " followed by SUB, BEL, NUL - RAR signature | |
$archive[] = array ( | |
'mime' => 'application/x-rar-compressed', | |
'pattern' => "\x52\x61\x72\x20\x1A\x07\x00", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
} | |
if ( is_null( self::$text ) ) { | |
$text = &self::$text; | |
$text = array(); | |
// "%!PS-Adobe-" - Postscript signature | |
$text[] = array ( | |
'mime' => 'application/postscript', | |
'pattern' => "\x25\x50\x53\x2D\x41\x64\x6F\x62\x65", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
// UTF-16 Big Endian BOM text | |
$text[] = array ( | |
'mime' => 'text/plain', | |
'pattern' => "\xFF\xFE", | |
'mask' => "\xFF\xFF", | |
'ignore' => '' | |
); | |
// UTF-16 Little Endian BOM text | |
$text[] = array ( | |
'mime' => 'text/plain', | |
'pattern' => "\xFE\xFF", | |
'mask' => "\xFF\xFF", | |
'ignore' => '' | |
); | |
// UTF-8 BOM text | |
$text[] = array ( | |
'mime' => 'text/plain', | |
'pattern' => "\xEF\xBB\xBF", | |
'mask' => "\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
} | |
if ( is_null( self::$others ) ) { | |
$others = &self::$others; | |
$others = array(); | |
/* $others[] = array ( | |
'mime' => 'WINDOWS EXECUTABLE', | |
'pattern' => "\x4D\x5A", | |
'mask' => "\xFF\xFF", | |
'ignore' => '' | |
); | |
$others[] = array ( | |
'mime' => 'EXEC_LINKABLE', | |
'pattern' => "\x7F\x45\x4C\x46", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
*/ | |
} | |
if ( is_null( self::$ms_office ) ) { | |
$office = &self::$ms_office; | |
$office = array(); | |
$office[] = array ( | |
// application/vnd.openxmlformats-officedocument.wordprocessingml.document | |
'mime' => 'application/office-x', | |
'pattern' => "\x50\x4B\x03\x04\x14\x00\x06\x00", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
$office[] = array ( | |
'mime' => 'application/office', | |
'pattern' => "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
} | |
if ( is_null( self::$unknown ) ) { | |
$unknown = &self::$unknown; | |
$unknown = array(); | |
// "<!DOCTYPE HTML" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x21\x44\x4F\x43\x54\x59\x50\x45\x20\x48\x54\x4D\x4C", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<HTML" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x48\x54\x4D\x4C", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<HEAD" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x48\x45\x41\x44", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<SCRIPT" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x53\x43\x52\x49\x50\x54", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<IFRAME" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x49\x46\x52\x41\x4D\x45", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<H1" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x48\x31", | |
'mask' => "\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<DIV" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x44\x49\x56", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<FONT" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x46\x4F\x4E\x54", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<TABLE" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x54\x41\x42\x4C\x45", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<A" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x41", | |
'mask' => "\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<STYLE" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x53\x54\x59\x4C\x45", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<TITLE" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x54\x49\x54\x4C\x45", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<B" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x42", | |
'mask' => "\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<BODY" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x42\x4F\x44\x59", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<BR" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x42\x52", | |
'mask' => "\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<P" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x50", | |
'mask' => "\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "<!--" | |
$unknown[] = array ( | |
'mime' => 'text/html', | |
'pattern' => "\x3C\x21\x2D\x2D", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
$unknown[] = array ( | |
'mime' => 'text/xml', | |
'pattern' => "\x3C\x3F\x78\x6D\x6C", | |
'mask' => "\xFF\xFF\xFF\xFF\xFF", | |
'ignore' => self::$whitespace_characters | |
); | |
// "%PDF" - PDF signature | |
$unknown[] = array ( | |
'mime' => 'application/pdf', | |
'pattern' => "\x25\x50\x44\x46", | |
'mask' => "\xFF\xFF\xFF\xFF", | |
'ignore' => '' | |
); | |
} | |
$this->read_resource_header(); | |
$this->read_resource_footer(); | |
$this->detect_type(); | |
} | |
/* The public API */ | |
public function is_text() { | |
if ( 'application/postscript' === $this->detected_type ) return true; | |
if ( 'text/plain' === $this->detected_type ) return true; | |
return false; | |
} | |
public function is_font() { | |
if ( $this->detected_type === 'application/font-ttf' ) return true; | |
if ( $this->detected_type === 'application/font-cff' ) return true; | |
if ( $this->detected_type === 'application/font-otf' ) return true; | |
if ( $this->detected_type === 'application/font-sntf' ) return true; | |
if ( $this->detected_type === 'application/vds.ms-opentype' ) return true; | |
if ( $this->detected_type === 'application/font-woff' ) return true; | |
if ( $this->detected_type === 'application/vnd.ms-fontobject' ) return true; | |
return false; | |
} | |
public function is_zip() { | |
if ( $this->detected_type === 'application/zip' ) return true; | |
if ( substr( $this->detected_type, -4 ) === '+zip' ) return true; | |
return false; | |
} | |
public function is_archive() { | |
if ( $this->detected_type === 'application/x-rar-compressed' ) return true; | |
if ( $this->detected_type === 'application/zip' ) return true; | |
if ( $this->detected_type === 'application/x-gzip' ) return true; | |
return false; | |
} | |
public function is_scriptable() { | |
if ( $this->detected_type === 'text/html' ) return true; | |
if ( $this->detected_type === 'application/pdf' ) return true; | |
if ( $this->detected_type === 'application/postscript' ) return true; | |
return false; | |
} | |
public function get_type() { | |
return $this->detected_type; | |
} | |
/** | |
* Helper functions. | |
* Execution is passed over to detect_type after the | |
* construct sets up the data. | |
* | |
*/ | |
protected function read_resource_header() { | |
// We already have that many bytes... | |
if ( isset( $this->header{$this->num_bytes} ) ) | |
return; | |
if ( is_string( $this->file ) ) { | |
$fp = fopen( $this->file, 'rb' ); | |
$header = fread( $fp, $this->num_bytes ); | |
fclose( $fp ); | |
} else { | |
// The current position may not be at the start. Let's take it then set to | |
// start of file, read {$num_bytes} bytes then reset to last position. | |
$position = ftell( $this->file ); | |
fseek( $this->file, 0, SEEK_SET ); | |
$header = fread( $this->file, $this->num_bytes ); | |
fseek( $this->file, $position, SEEK_SET ); | |
} | |
$this->header = &$header; | |
} | |
protected function read_resource_footer() { | |
if ( isset( $this->footer{$this->num_bytes} ) ) | |
return; | |
if ( is_string( $this->file ) ) { | |
$fp = fopen( $this->file, 'rb' ); | |
fseek( $fp, -$this->num_bytes, SEEK_END ); | |
$footer = fread( $fp, $this->num_bytes ); | |
fclose( $fp ); | |
} else { | |
// The current position may not be at the end. Let's take it then set to | |
// end of file, read {$num_bytes} bytes then reset to last position. | |
$position = ftell( $this->file ); | |
fseek( $this->file, -$this->num_bytes, SEEK_END ); | |
$footer = fread( $this->file, $this->num_bytes ); | |
fseek( $this->file, $position, SEEK_SET ); | |
} | |
$this->footer = &$footer; | |
} | |
protected function match_pattern( $pattern, $mask, $ignore ) { | |
if ( empty( $pattern ) || empty( $mask ) ) { | |
return false; | |
} | |
$s = 0; | |
$sequence = &$this->header; | |
$seq_len = strlen( $sequence ); | |
$pattern_len = strlen( $pattern ); | |
$mask_len = strlen( $mask ); | |
if ( $pattern_len !== $mask_len ) { | |
return false; | |
} | |
// First we will set $s so that it ignores the first bytes if it needs to | |
if ( !empty( $ignore ) ) { | |
for ( $s = 0; $s < $seq_len; ) { | |
// This letter should not be ignored. | |
if ( strpos( $ignore, $sequence{$s} ) === false ) { | |
break; | |
} | |
++$s; | |
} | |
} | |
// Now we will compare. If it doesn't match the mask, we return false. | |
for ( $i = 0; $i < $pattern_len; ) { | |
$masked_data = $sequence{$s} & $mask{$i}; | |
if ( $masked_data !== $pattern{$i} ) { | |
return false; | |
} | |
++$i; ++$s; | |
} | |
// Mask matched. This pattern matches. | |
return true; | |
} | |
protected function html_match_pattern( $pattern, $mask, $ignore ) { | |
if ( empty( $pattern ) || empty( $mask ) ) { | |
return false; | |
} | |
$s = 0; $i = 0; | |
$sequence = &$this->header; | |
$seq_len = strlen( $sequence ); | |
$pattern_len = strlen( $pattern ); | |
$mask_len = strlen( $mask ); | |
if ( $pattern_len !== $mask_len ) { | |
return false; | |
} | |
// First we will set $s so that it ignores the first bytes if it needs to | |
if ( !empty( $ignore ) ) { | |
for (; $s < $seq_len; ) { | |
// This letter should not be ignored. | |
if ( strpos( $ignore, $sequence{$s} ) === false ) { | |
break; | |
} | |
++$s; | |
} | |
} | |
// Now we will compare. If it doesn't match the mask, we return false. | |
for (; $i < $pattern_len; ) { | |
$masked_data = $sequence{$s} & $mask{$i}; | |
if ( $masked_data !== $pattern{$i} ) { | |
return false; | |
} | |
++$i; ++$s; | |
} | |
// Mask matched. This pattern matches if the last character is tag-terminating. | |
return strpos( self::$tag_terminating_character, $sequence{$s} ); | |
} | |
protected function detect_type() { | |
if ( $this->sniff_images() ) return; | |
if ( $this->sniff_media() ) return; | |
if ( $this->sniff_fonts() ) return; | |
if ( $this->sniff_msoffice() ) return; | |
if ( $this->sniff_archive() ) return; | |
if ( $this->sniff_text() ) return; | |
if ( $this->sniff_unknown() ) return; | |
if ( $this->sniff_others() ) return; | |
} | |
/* Sniffer functions */ | |
protected function sniff_images() { | |
$num_imgs = count( self::$image ); | |
for ( $i = 0; $i < $num_imgs; $i++ ) { | |
$im = &self::$image[$i]; | |
if ( $this->match_pattern( $im['pattern'], $im['mask'], $im['ignore'] ) ) { | |
$this->detected_type = $im['mime']; | |
return true; | |
} | |
} | |
return false; | |
} | |
protected function sniff_media() { | |
$num_media = count( self::$media ); | |
for ( $i = 0; $i < $num_media; $i++ ) { | |
$m = &self::$media[$i]; | |
if ( $this->match_pattern( $m['pattern'], $m['mask'], $m['ignore'] ) ) { | |
$this->detected_type = $m['mime']; | |
return true; | |
} | |
} | |
if ( $this->sniff_mp4() ) { | |
$this->detected_type = 'video/mp4'; | |
return true; | |
} | |
return false; | |
} | |
protected function sniff_mp4() { | |
$sequence = &$this->header; | |
$seq_len = strlen( $sequence ); | |
if ( $seq_len < 12 ) { | |
return false; | |
} | |
$box_size = substr( $sequence, 0, 4 ); | |
$box_size = unpack( 'N', $box_size ); | |
$box_size = $box_size[1]; | |
if ( $seq_len < $box_size ) return false; | |
if ( $box_size % 4 ) return false; | |
if ( substr( $sequence, 4, 4 ) !== "\x66\x74\x79\x70" ) return false; | |
if ( substr( $sequence, 8, 3 ) === "\x6D\x70\x34" ) return true; | |
$i = 16; | |
while ( $i < $box_size ) { | |
if ( substr( $sequence, $i, 3 ) === "\x6D\x70\x34" ) return true; | |
$i += 4; | |
} | |
return false; | |
} | |
protected function sniff_fonts() { | |
$num_fonts = count( self::$fonts ); | |
for ( $i = 0; $i < $num_fonts; $i++ ) { | |
$f = &self::$fonts[$i]; | |
if ( $this->match_pattern( $f['pattern'], $f['mask'], $f['ignore'] ) ) { | |
$this->detected_type = $f['mime']; | |
return true; | |
} | |
} | |
return false; | |
} | |
protected function sniff_msoffice() { | |
// Yes, this function is a bit of a nightmare as far as code layout... | |
/// IT WORKS. Please don't change it functionally to "clean" it up. | |
$type = false; | |
$num_office = count( self::$ms_office ); | |
for ( $i = 0; $i < $num_office; $i++ ) { | |
$o = &self::$ms_office[$i]; | |
if ( $this->match_pattern( $o['pattern'], $o['mask'], $o['ignore'] ) ) { | |
$type = $o['mime']; | |
break; | |
} | |
} | |
// No match found... Do not continue | |
if ( $type === false ) { | |
return false; | |
} | |
// Powerpoint requires we read a bit further into the header of the document... | |
$this->num_bytes = 2056; | |
$this->read_resource_header(); | |
$eof = trim( $this->footer, chr(0) ); | |
$sof = trim( $this->header, chr(0) ); | |
if ( $type === 'application/office-x' ) { | |
// [Content_Types].xml | |
if ( strpos( $sof, "\x5b\x43\x6f\x6e\x74\x65\x6e\x74\x5f\x54\x79\x70\x65\x73\x5d\x2e\x78\x6d\x6c" ) !== false ) { | |
$f_pos = strpos( $sof, "\x50\x4B\x03\x04", 4 ); | |
if ( $f_pos !== false ) { | |
$s_pos = strpos( $sof, "\x50\x4B\x03\x04", $f_pos + 4 ); | |
if ( $s_pos !== false ) { | |
// 26 chars after $s_pos ( 26 + strlen( $s_pos ) ) = 30 | |
$type = substr( $sof, $s_pos + 30, 5 ); | |
if ( strpos( $type, 'word/' ) !== false ) { | |
$type = 'application/msword'; | |
} else if ( strpos( $type, 'ppt/' ) !== false ) { | |
$type = 'application/vnd.ms-powerpoint'; | |
} else if ( strpos( $type, 'xl/' ) !== false ) { | |
$type = 'application/vnd.ms-excel'; | |
} else if ( strpos( $type, 'x/' ) !== false ) { | |
$type = 'application/ms-office'; | |
} else { | |
return false; | |
} | |
} else { | |
return false; | |
} | |
} else { | |
return false; | |
} | |
} else { | |
return false; | |
} | |
} else if ( $type === 'application/office' ) { | |
// Word.Document. | |
if ( strpos( $eof, "\x57\x6F\x72\x64\x2E\x44\x6F\x63\x75\x6D\x65\x6E\x74\x2E" ) !== false ) { | |
$type = 'application/msword'; | |
// Microsoft Excel | |
} else if ( strpos( $eof, "\x4D\x69\x63\x72\x6F\x73\x6F\x66\x74\x20\x45\x78\x63\x65\x6C" ) !== false ) { | |
$type = 'application/vnd.ms-excel'; | |
// P.o.w.e.r.P.o.i.n.t. .D.o.c.u.m.e.n.t. | |
} else if ( strpos( $sof, "\x50\x00\x6F\x00\x77\x00\x65\x00\x72\x00\x50\x00\x6f\x00\x69\x00\x6e\x00\x74\x00\x20\x00\x44\x00\x6f\x00\x63\x00\x75\x00\x6d\x00\x65\x00\x6e\x00\x74\x00" ) !== false ) { | |
$type = 'application/vnd.ms-powerpoint'; | |
} else { | |
return false; | |
} | |
// What happended there? | |
} else { | |
return false; | |
} | |
$this->detected_type = $type; | |
return true; | |
} | |
protected function sniff_archive() { | |
$num_archives = count( self::$archive ); | |
for ( $i = 0; $i < $num_archives; $i++ ) { | |
$a = &self::$archive[$i]; | |
if ( $this->match_pattern( $a['pattern'], $a['mask'], $a['ignore'] ) ) { | |
$this->detected_type = $a['mime']; | |
return true; | |
} | |
} | |
return false; | |
} | |
protected function sniff_text() { | |
$num_texts = count( self::$text ); | |
for ( $i = 0; $i < $num_texts; $i++ ) { | |
$t = &self::$text[$i]; | |
if ( $this->match_pattern( $t['pattern'], $t['mask'], $t['ignore'] ) ) { | |
if ( $this->has_binary_data() ) { | |
return false; | |
} else { | |
$this->detected_type = $t['mime']; | |
return true; | |
} | |
} | |
} | |
return false; | |
} | |
protected function sniff_unknown() { | |
$num_unknown = count( self::$unknown ); | |
for ( $i = 0; $i < $num_unknown; $i++ ) { | |
$u = &self::$unknown[$i]; | |
if ( 'text/html' === $u['mime'] ) { | |
if ( $this->html_match_pattern( $u['pattern'], $u['mask'], $u['ignore'] ) ) { | |
$this->detected_type = 'text/html'; | |
return true; | |
} | |
} else { | |
if ( $this->match_pattern( $u['pattern'], $u['mask'], $u['ignore'] ) ) { | |
$this->detected_type = $u['mime']; | |
return true; | |
} | |
} | |
} | |
return false; | |
} | |
protected function sniff_others() { | |
$num_others = count( self::$others ); | |
for ( $i = 0; $i < $num_others; $i++ ) { | |
$o = &self::$others[$i]; | |
if ( $this->match_pattern( $o['pattern'], $o['mask'], $o['ignore'] ) ) { | |
$this->detected_type = $o['mime']; | |
return true; | |
} | |
} | |
return false; | |
} | |
protected function has_binary_data() { | |
static $binary_chars; | |
if ( is_string( $binary_chars ) ) | |
$binary_chars = str_split( $this->binary_characters ); | |
$num_chars = count( $binary_chars ); | |
for ( $i = 0; $i < $num_chars; $i++ ) { | |
if ( strpos( $this->header, $binary_chars[$i] ) !== false ) { | |
return true; | |
} | |
} | |
return false; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment