-
-
Save roktas/6da934e521b34741a979 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2011 The Go Authors. All rights reserved. | |
// Use of this source code is governed by a BSD-style | |
// license that can be found in the LICENSE file. | |
package main | |
import ( | |
"bytes" | |
"encoding/binary" | |
"fmt" | |
"os" | |
) | |
// The algorithm uses at most sniffLen bytes to make its decision. | |
const sniffLen = 512 | |
// DetectContentType implements the algorithm described | |
// at http://mimesniff.spec.whatwg.org/ to determine the | |
// Content-Type of the given data. It considers at most the | |
// first 512 bytes of data. DetectContentType always returns | |
// a valid MIME type: if it cannot determine a more specific one, it | |
// returns "application/octet-stream". | |
func DetectContentType(data []byte) string { | |
if len(data) > sniffLen { | |
data = data[:sniffLen] | |
} | |
// Index of the first non-whitespace byte in data. | |
firstNonWS := 0 | |
for ; firstNonWS < len(data) && isWS(data[firstNonWS]); firstNonWS++ { | |
} | |
for _, sig := range sniffSignatures { | |
if ct := sig.match(data, firstNonWS); ct != "" { | |
return ct | |
} | |
} | |
return "application/octet-stream" // fallback | |
} | |
func isWS(b byte) bool { | |
return bytes.IndexByte([]byte("\t\n\x0C\r "), b) != -1 | |
} | |
type sniffSig interface { | |
// match returns the MIME type of the data, or "" if unknown. | |
match(data []byte, firstNonWS int) string | |
} | |
// Data matching the table in section 6. | |
var sniffSignatures = []sniffSig{ | |
htmlSig("<!DOCTYPE HTML"), | |
htmlSig("<HTML"), | |
htmlSig("<HEAD"), | |
htmlSig("<SCRIPT"), | |
htmlSig("<IFRAME"), | |
htmlSig("<H1"), | |
htmlSig("<DIV"), | |
htmlSig("<FONT"), | |
htmlSig("<TABLE"), | |
htmlSig("<A"), | |
htmlSig("<STYLE"), | |
htmlSig("<TITLE"), | |
htmlSig("<B"), | |
htmlSig("<BODY"), | |
htmlSig("<BR"), | |
htmlSig("<P"), | |
htmlSig("<!--"), | |
&maskedSig{mask: []byte("\xFF\xFF\xFF\xFF\xFF"), pat: []byte("<?xml"), skipWS: true, ct: "text/xml; charset=utf-8"}, | |
&exactSig{[]byte("%PDF-"), "application/pdf"}, | |
&exactSig{[]byte("%!PS-Adobe-"), "application/postscript"}, | |
// UTF BOMs. | |
&maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFE\xFF\x00\x00"), ct: "text/plain; charset=utf-16be"}, | |
&maskedSig{mask: []byte("\xFF\xFF\x00\x00"), pat: []byte("\xFF\xFE\x00\x00"), ct: "text/plain; charset=utf-16le"}, | |
&maskedSig{mask: []byte("\xFF\xFF\xFF\x00"), pat: []byte("\xEF\xBB\xBF\x00"), ct: "text/plain; charset=utf-8"}, | |
&exactSig{[]byte("GIF87a"), "image/gif"}, | |
&exactSig{[]byte("GIF89a"), "image/gif"}, | |
&exactSig{[]byte("\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"), "image/png"}, | |
&exactSig{[]byte("\xFF\xD8\xFF"), "image/jpeg"}, | |
&exactSig{[]byte("BM"), "image/bmp"}, | |
&maskedSig{ | |
mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF"), | |
pat: []byte("RIFF\x00\x00\x00\x00WEBPVP"), | |
ct: "image/webp", | |
}, | |
&exactSig{[]byte("\x00\x00\x01\x00"), "image/vnd.microsoft.icon"}, | |
&exactSig{[]byte("\x4F\x67\x67\x53\x00"), "application/ogg"}, | |
&maskedSig{ | |
mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"), | |
pat: []byte("RIFF\x00\x00\x00\x00WAVE"), | |
ct: "audio/wave", | |
}, | |
&exactSig{[]byte("\x1A\x45\xDF\xA3"), "video/webm"}, | |
&exactSig{[]byte("\x52\x61\x72\x20\x1A\x07\x00"), "application/x-rar-compressed"}, | |
&exactSig{[]byte("\x50\x4B\x03\x04"), "application/zip"}, | |
&exactSig{[]byte("\x1F\x8B\x08"), "application/x-gzip"}, | |
// TODO(dsymonds): Re-enable this when the spec is sorted w.r.t. MP4. | |
//mp4Sig(0), | |
textSig(0), // should be last | |
} | |
type exactSig struct { | |
sig []byte | |
ct string | |
} | |
func (e *exactSig) match(data []byte, firstNonWS int) string { | |
if bytes.HasPrefix(data, e.sig) { | |
return e.ct | |
} | |
return "" | |
} | |
type maskedSig struct { | |
mask, pat []byte | |
skipWS bool | |
ct string | |
} | |
func (m *maskedSig) match(data []byte, firstNonWS int) string { | |
if m.skipWS { | |
data = data[firstNonWS:] | |
} | |
if len(data) < len(m.mask) { | |
return "" | |
} | |
for i, mask := range m.mask { | |
db := data[i] & mask | |
if db != m.pat[i] { | |
return "" | |
} | |
} | |
return m.ct | |
} | |
type htmlSig []byte | |
func (h htmlSig) match(data []byte, firstNonWS int) string { | |
data = data[firstNonWS:] | |
if len(data) < len(h)+1 { | |
return "" | |
} | |
for i, b := range h { | |
db := data[i] | |
if 'A' <= b && b <= 'Z' { | |
db &= 0xDF | |
} | |
if b != db { | |
return "" | |
} | |
} | |
// Next byte must be space or right angle bracket. | |
if db := data[len(h)]; db != ' ' && db != '>' { | |
return "" | |
} | |
return "text/html; charset=utf-8" | |
} | |
type mp4Sig int | |
func (mp4Sig) match(data []byte, firstNonWS int) string { | |
// c.f. section 6.1. | |
if len(data) < 8 { | |
return "" | |
} | |
boxSize := int(binary.BigEndian.Uint32(data[:4])) | |
if boxSize%4 != 0 || len(data) < boxSize { | |
return "" | |
} | |
if !bytes.Equal(data[4:8], []byte("ftyp")) { | |
return "" | |
} | |
for st := 8; st < boxSize; st += 4 { | |
if st == 12 { | |
// minor version number | |
continue | |
} | |
seg := string(data[st : st+3]) | |
switch seg { | |
case "mp4", "iso", "M4V", "M4P", "M4B": | |
return "video/mp4" | |
/* The remainder are not in the spec. | |
case "M4A": | |
return "audio/mp4" | |
case "3gp": | |
return "video/3gpp" | |
case "jp2": | |
return "image/jp2" // JPEG 2000 | |
*/ | |
} | |
} | |
return "" | |
} | |
type textSig int | |
func (textSig) match(data []byte, firstNonWS int) string { | |
// c.f. section 5, step 4. | |
for _, b := range data[firstNonWS:] { | |
switch { | |
case 0x00 <= b && b <= 0x08, | |
b == 0x0B, | |
0x0E <= b && b <= 0x1A, | |
0x1C <= b && b <= 0x1F: | |
return "" | |
} | |
} | |
return "text/plain; charset=utf-8" | |
} | |
func main() { | |
f, _ := os.Open(os.Args[1]) | |
defer f.Close() | |
b := make([]byte, 100) | |
_, _ = f.Read(b) | |
fmt.Println(DetectContentType(b)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment