Skip to content

Instantly share code, notes, and snippets.

@jchandra74 jchandra74/FileUtil.cs
Last active Aug 29, 2015

Embed
What would you like to do?
Detect MimeType and File Extension based on filename and falling back to fileStream for signature detection (specific to .binary extension)
//mimetypes: http://www.sitepoint.com/web-foundations/mime-types-complete-list/
//https://technet.microsoft.com/en-us/library/ee309278(office.12).aspx
public static class FileUtil
{
public static string DetectFileType(string filename, Stream fileStream)
{
var ext = Path.GetExtension(filename);
if (string.IsNullOrEmpty(ext))
{
return "application/octet-stream";
}
switch (ext.ToUpperInvariant())
{
case ".TXT":
//check if file contains <html>
if (IsHtml(fileStream))
{
return "text/html";
}
return "text/plain";
case ".BINARY":
if (IsJpeg(fileStream))
{
return "image/jpeg";
}
if (IsOldOfficeDoc(fileStream))
{
//Assume it is Word
return "application/msword";
}
if (IsZip(fileStream))
{
var openXmlType = GetOpenXmlType(fileStream);
return string.IsNullOrWhiteSpace(openXmlType) ? "application/zip" : openXmlType;
}
//Don't know what this is, so just return default.
return "application/octet-stream";
default:
return MimeMapping.GetMimeMapping(filename);
}
}
public static string DetectExtension(string fileName, Stream s)
{
var newExt = "";
using (var m = new MemoryStream())
{
s.Seek(0, SeekOrigin.Begin);
s.CopyTo(m);
s.Seek(0, SeekOrigin.Begin);
var fileType = DetectFileType(fileName, m);
switch (fileType)
{
case "application/pdf":
newExt = ".pdf";
break;
case "application/zip":
newExt = ".zip";
break;
case "text/html":
newExt = ".html";
break;
case "image/jpeg":
newExt = ".jpg";
break;
case "application/msword":
newExt = ".doc";
break;
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
newExt = ".docx";
break;
case "application/vnd.ms-word.document.macroEnabled.12":
newExt = ".docm";
break;
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
newExt = ".xlsx";
break;
case "application/vnd.ms-excel.sheet.macroEnabled.12":
newExt = ".xlsm";
break;
default:
newExt = Path.GetExtension(fileName) ?? "";
break;
}
}
return newExt;
}
//see: https://en.wikipedia.org/wiki/List_of_file_signatures
private static readonly byte[] oldOfficeSignature = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
private static readonly byte[] jpegSignature = { 0xff, 0xd8, 0xff, 0xe0 };
private static readonly byte[] zipSignature = { 0x50, 0x4b, 0x03, 0x04 };
private static readonly byte[] pdfSignature = { 0x25, 0x50, 0x44, 0x46 };
private static string GetOpenXmlType(Stream fileStream)
{
fileStream.Seek(0, SeekOrigin.Begin);
var archive = new ZipArchive(fileStream, ZipArchiveMode.Read);
var entries = archive.Entries;
var entry = entries.FirstOrDefault(e => e.Name == "[Content_Types].xml");
if (entry == null)
{
return "";
}
using (var reader = new StreamReader(entry.Open()))
{
var content = reader.ReadToEnd();
var result = "";
if (content.Contains("application/vnd.openxmlformats-officedocument.wordprocessingml"))
{
result = ".doc";
}
if (content.Contains("application/vnd.openxmlformats-officedocument.spreadsheetml"))
{
result = ".xls";
}
if (string.IsNullOrWhiteSpace(result))
{
return "application/zip"; //Don't know what type of openxml doc this is (only detecting doc and xls)
}
result = content.Contains("macroEnabled") ? result + "m" : result + "x";
switch (result)
{
case ".docx":
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
case ".docm":
return "application/vnd.ms-word.document.macroEnabled.12";
case ".xlsx":
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
case ".xlsm":
return "application/vnd.ms-excel.sheet.macroEnabled.12";
default:
return "";
}
}
}
private static bool CompareSignature(Stream stream, byte[] signature)
{
var bytes = new byte[signature.Length];
stream.Seek(0, SeekOrigin.Begin);
var read = stream.Read(bytes, 0, bytes.Length);
if (read != signature.Length)
{
return false;
}
return bytes.SequenceEqual(signature);
}
private static bool IsPdf(Stream fileStream)
{
return CompareSignature(fileStream, pdfSignature);
}
private static bool IsZip(Stream fileStream)
{
return CompareSignature(fileStream, zipSignature);
}
private static bool IsOldOfficeDoc(Stream fileStream)
{
return CompareSignature(fileStream, oldOfficeSignature);
}
private static bool IsJpeg(Stream fileStream)
{
return CompareSignature(fileStream, jpegSignature);
}
private static bool IsHtml(Stream fileStream)
{
var text = GetTextFileContent(fileStream);
return !string.IsNullOrWhiteSpace(text) && text.ToUpperInvariant().Contains("<HTML");
}
private static string GetTextFileContent(Stream fileStream)
{
fileStream.Seek(0, SeekOrigin.Begin);
using (var reader = new StreamReader(fileStream))
{
return reader.ReadToEnd();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.