Skip to content

Instantly share code, notes, and snippets.

@benrr101
Created April 10, 2016 01:49
Show Gist options
  • Save benrr101/d6a0bdc3f99df97c37d76687175964f8 to your computer and use it in GitHub Desktop.
Save benrr101/d6a0bdc3f99df97c37d76687175964f8 to your computer and use it in GitHub Desktop.
FormMultipart Parsers
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace DolomiteWcfService.MultipartParser
{
public abstract class MultiPartPart
{
#region Factory Method
/// <summary>
/// Creates a new multipart part based on the headers provided. If the content-type is
/// included and is not a text/plain, then it is a file multipart part. Otherwise, it is a
/// form data multipart part
/// </summary>
/// <param name="headers">
/// The headers processed from an HTTP multipart request. Headers must be known ahead of
/// time in order to properly determine the type of the multipart request
/// </param>
/// <returns>A new multipart part based on the headers provided</returns>
public static MultiPartPart CreateMultiPartPart(List<string> headers)
{
Dictionary<string, string> workingHeaders = new Dictionary<string, string>();
// Process the headers to determine the type of the part
foreach (string header in headers)
{
// Parse the header regex to strip out the header name and the content
Regex headerRegex = new Regex(@"([^\s]+): (.*)$",
RegexOptions.Compiled | RegexOptions.CultureInvariant);
Match headerMatch = headerRegex.Match(header);
// Store the header
workingHeaders.Add(headerMatch.Groups[1].Value, headerMatch.Groups[2].Value);
}
// Determine the type of the part based on the content type
MultiPartPart part;
if (!workingHeaders.ContainsKey("Content-Type"))
{
part = new FormMultiPartPart();
}
else
{
part = new FileMultiPartPart();
}
part.Headers = workingHeaders;
// Determine the name of the multipart form-data based on content-disposition header
if (!workingHeaders.ContainsKey("Content-Disposition"))
{
throw new InvalidDataException("Multipart part is missing Content-Disposition header.");
}
string contentDisposition = part.Headers["Content-Disposition"];
Regex nameRegex = new Regex(@"name=""([^\s]+)""", RegexOptions.Compiled | RegexOptions.CultureInvariant);
Match nameMatch = nameRegex.Match(contentDisposition);
part.Name = nameMatch.Groups[1].Value;
return part;
}
#endregion
/// <summary>
/// The name of the form-data provided by this multipart part
/// </summary>
public string Name { get; private set; }
/// <summary>
/// The headers for the multipart part
/// </summary>
public Dictionary<string, string> Headers { get; private set; }
public abstract void WriteBytes(byte[] bytesToWrite, int offset, int length);
public abstract void Complete();
}
/// <summary>
/// A multipart part suited for writing files to the disk.
/// </summary>
public class FileMultiPartPart : MultiPartPart
{
private FileStream _outFile;
/// <summary>
/// The path to the file that was created for storing this multipart part
/// </summary>
public string FileTempPath { get; set; }
/// <summary>
/// A read-only stream to the temporary file that was created for this multipart part.
/// This should be processed using <c>using</c> or disposed.
/// </summary>
public FileStream Stream
{
get { return File.OpenRead(FileTempPath); }
}
/// <summary>
/// Constructs a new FileMultiPartPart by constructing a new temporary file
/// </summary>
protected internal FileMultiPartPart()
{
// Create a new file with a temp name
// TODO: Figure out how to get a base path into there
FileTempPath = Guid.NewGuid().ToString();
_outFile = File.OpenWrite(FileTempPath);
}
/// <summary>
/// Writes bytes out to the temporary storage file
/// </summary>
/// <param name="bytesToWrite">The array bytes to write from</param>
/// <param name="offset">
/// The 0-based index into <paramref name="bytesToWrite"/> indicating where to start
/// writing bytes from
/// </param>
/// <param name="length">
/// The number of bytes from <paramref name="bytesToWrite"/> to write to the file
/// </param>
public override void WriteBytes(byte[] bytesToWrite, int offset, int length)
{
if (_outFile == null)
{
throw new InvalidOperationException("Multipart file has already been finalized.");
}
_outFile.Write(bytesToWrite, offset, length);
}
/// <summary>
/// Method to be called when creation of the multipart part is completed. This is to clean
/// up resources and indicate that the part cannot be written to again.
/// </summary>
public override void Complete()
{
// Close up the file writers and whatnot
_outFile.Close();
_outFile = null;
}
}
/// <summary>
/// A multipart part suited for reading form values into memory
/// </summary>
public class FormMultiPartPart : MultiPartPart
{
private StringBuilder _valueBuilder;
/// <summary>
/// The content of the multipart, or the value of the field.
/// </summary>
public string Value { get; set; }
protected internal FormMultiPartPart()
{
_valueBuilder = new StringBuilder();
}
/// <summary>
/// Stores bytes to the value builder
/// </summary>
/// <param name="bytesToWrite">The array of bytes to take from</param>
/// <param name="offset">The </param>
/// <param name="length"></param>
public override void WriteBytes(byte[] bytesToWrite, int offset, int length)
{
if (_valueBuilder == null)
{
throw new InvalidOperationException("Multipart form data has already been finalized.");
}
// TODO: Handle non-ASCII content?
_valueBuilder.Append(Encoding.ASCII.GetString(bytesToWrite, offset, length).ToCharArray());
}
public override void Complete()
{
// Figure out the value
Value = _valueBuilder.ToString();
_valueBuilder = null;
}
}
}
using System;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.IO;
using System.Text;
namespace DolomiteWcfService.MultipartParser
{
public class MultipartStreamParser
{
private enum Mode
{
/// <summary>
/// The mode where we're searching for the boundary definition at the
/// </summary>
Boundary,
/// <summary>
/// The mode where we're finding and storing headers
/// </summary>
Header,
/// <summary>
/// The mode where we're writing out bytes
/// </summary>
Bytes,
/// <summary>
/// The mode where we either find the end of the multipart or begin processing another
/// multipart part.
/// </summary>
BytesProcess
}
#region Control Strings
/// <summary>
/// Bytes used to mark the end of a line in the multibyte format
/// </summary>
private static readonly byte[] LineSeparator = { (byte)'\r', (byte)'\n' };
/// <summary>
/// Bytes used to mark end of the multipart last multipart.
///
/// Ex:
/// ---boundary
/// bytes
/// ---boundary
/// morebytes
/// ---boundary--
/// </summary>
private static readonly byte[] EndOfMultipart = { (byte)'-', (byte)'-' };
/// <summary>
/// Bytes used to mark the boundary of a part of the multipart.
/// </summary>
private static byte[] _boundary;
#endregion
#region Internal Parsing State
/// <summary>
/// Length of the buffer to read from stream
/// </summary>
private const int BufferLength = 4096;
/// <summary>
/// The stream that will be parsed.
/// </summary>
private readonly Stream _inputStream;
/// <summary>
/// Temporary storage for byte arrays. Used for composing the boundary and headers.
/// </summary>
private readonly List<byte> _byteArrayBuilder;
/// <summary>
/// The current mode of the parsing
/// </summary>
private Mode _mode;
/// <summary>
/// The list of headers for the current multipart part
/// </summary>
private List<string> _workingHeaderList;
#endregion
#region External Properties
/// <summary>
/// The collection of multiparts found during parsing
/// </summary>
public List<MultiPartPart> MultiParts;
#endregion
/// <summary>
/// Constructs a new multipart stream parser. Initializes the internal state.
/// </summary>
/// <param name="stream">The stream from which to parse multiparts</param>
public MultipartStreamParser(Stream stream)
{
_inputStream = stream;
// Initialize the internal state
_mode = Mode.Boundary;
_byteArrayBuilder = new List<byte>(BufferLength);
// Initialize the external properties
MultiParts = new List<MultiPartPart>();
}
/// <summary>
/// Begins parsing the stream to find multipart parts
/// </summary>
public void Parse()
{
// We begin in boundary search mode
byte[] buffer = new byte[BufferLength];
// Read bytes into the buffer while there are bytes to read in
int bytesRead;
while ((bytesRead = _inputStream.Read(buffer, 0, buffer.Length)) > 0)
{
switch (_mode)
{
case Mode.Boundary:
BoundaryModeParse(buffer, 0, bytesRead);
break;
case Mode.Header:
HeaderModeParse(buffer, 0, bytesRead);
break;
case Mode.Bytes:
BytesModeParse(buffer, 0, bytesRead);
break;
case Mode.BytesProcess:
BytesProcessModeParse(buffer, 0, bytesRead);
break;
}
}
}
private void BoundaryModeParse(byte[] buffer, int offset, int length)
{
// Search for a line break in the buffer
int relLineBreakIndex = SearchByteArray(buffer, offset, length, LineSeparator);
int missingBytes = LineSeparator.Length - (length - relLineBreakIndex);
if (relLineBreakIndex < 0)
{
// We didn't find the line break, so throw everything into the boundary builder
AddBytes(buffer, offset, length, _byteArrayBuilder);
}
else if (missingBytes > 0 && missingBytes < LineSeparator.Length)
{
// Bust out the emergency buffer
byte[] emergencyBytes;
if (EmergencyBuffer(missingBytes, _inputStream, LineSeparator, out emergencyBytes))
{
// We found the line break, everything before it is the boundary. We do not
// need the emergency buffer
ContinueFromBoundaryMode(buffer, offset, relLineBreakIndex, length, false);
}
else
{
// We didn't find the line break, so throw everything into the boundary builder
// including the emergency buffer
AddBytes(buffer, offset, length, _byteArrayBuilder);
AddBytes(emergencyBytes, 0, emergencyBytes.Length, _byteArrayBuilder);
}
}
else
{
// We found the line break, everything before the line break goes into the boundary
// builder and we use that as the boundary.
// Switch into header mode, and pass the remaining buffer to the header processor.
ContinueFromBoundaryMode(buffer, offset, relLineBreakIndex, length, true);
}
}
private void HeaderModeParse(byte[] buffer, int offset, int length)
{
// Search for a line break in the buffer
int relLineBreakIndex = SearchByteArray(buffer, offset, length, LineSeparator);
int missingBytes = LineSeparator.Length - (length - relLineBreakIndex);
if (relLineBreakIndex < 0)
{
// We didn't find the line break, so throw everything into the byte array builder
AddBytes(buffer, offset, length, _byteArrayBuilder);
}
else if (relLineBreakIndex == 0)
{
// We found the line break at the beginning. Create a new multipart part
MultiParts.Add(MultiPartPart.CreateMultiPartPart(_workingHeaderList));
_workingHeaderList = new List<string>();
// Jump into byte mode
_mode = Mode.Bytes;
BytesModeParse(buffer, offset + LineSeparator.Length, length - LineSeparator.Length);
}
else if (missingBytes > 0 && missingBytes < LineSeparator.Length)
{
// Bust out the emergency buffer
byte[] emergencyBytes;
if (EmergencyBuffer(missingBytes, _inputStream, LineSeparator, out emergencyBytes))
{
// We found the line break, everything it makes up the header, no need to
// recover the emergency buffer
ContinueToHeaderMode(buffer, offset, relLineBreakIndex, length, false);
}
else
{
// We didn't find the line break, so throw everything into the header builder,
// including the emergency buffer
AddBytes(buffer, offset, length, _byteArrayBuilder);
AddBytes(emergencyBytes, 0, emergencyBytes.Length, _byteArrayBuilder);
}
}
else
{
// We found the line break, start processing more headers
ContinueToHeaderMode(buffer, offset, relLineBreakIndex, length, true);
}
}
private void BytesModeParse(byte[] buffer, int offset, int length)
{
// Search for a boundary
int relBoundaryIndex = SearchByteArray(buffer, offset, length, _boundary);
int missingBytes = _boundary.Length - (length - relBoundaryIndex);
MultiPartPart currentPart = MultiParts[MultiParts.Count - 1];
if (relBoundaryIndex < 0)
{
// We didn't find the boundary. Take all the bytes and throw it into the multipart
currentPart.WriteBytes(buffer, offset, length);
}
else if (missingBytes > 0 && missingBytes < _boundary.Length)
{
// Bust out the emergency buffer
byte[] emergencyBytes;
if (EmergencyBuffer(missingBytes, _inputStream, _boundary, out emergencyBytes))
{
// We found the boundary marker, everything before it goes into the output file
// There's no need to recover the emergency buffer.
ContinueToByteProcessMode(buffer, offset, relBoundaryIndex, length, false);
}
else
{
// We didn't find the boundary marker, so throw everything into the output file
// including the emergency buffer
currentPart.WriteBytes(buffer, offset, relBoundaryIndex);
currentPart.WriteBytes(emergencyBytes, 0, emergencyBytes.Length);
}
}
else
{
// We found boundary, continue into byte process mode
ContinueToByteProcessMode(buffer, offset, relBoundaryIndex, length, true);
}
}
private void BytesProcessModeParse(byte[] buffer, int offset, int length)
{
// Search for a line break
int relEndIndex = SearchByteArray(buffer, offset, length, EndOfMultipart);
int relNewLineIndex = SearchByteArray(buffer, offset, length, LineSeparator);
int missingEndBytes = EndOfMultipart.Length - (length - relEndIndex);
int missingNewLineBytes = LineSeparator.Length - (length - relNewLineIndex);
// Case 1) End marker immediately -> Close up shop
// Case 2) New line immediately -> Header mode again
// Case 3) End or line marker split -> Bust out the emergency buffer
// Case 4) Anything else -> Error
if (relEndIndex == 0 && relNewLineIndex == EndOfMultipart.Length)
{
// Close up shop
return;
}
if(relNewLineIndex == 0)
{
// Go back to header mode
_mode = Mode.Header;
_workingHeaderList = new List<string>();
int bytesRemoved = LineSeparator.Length;
HeaderModeParse(buffer, offset + bytesRemoved, length - bytesRemoved );
}
else if (missingEndBytes > 0 && missingEndBytes < EndOfMultipart.Length)
{
// Bust out the emergency buffer and look for the end marker
byte[] emergencyBytes;
if (!EmergencyBuffer(missingEndBytes, _inputStream, EndOfMultipart, out emergencyBytes))
{
// We found a - and something else. This is invalid.
ContinueToErrorState();
}
}
else if (missingNewLineBytes > 0 && missingNewLineBytes < LineSeparator.Length)
{
// Bust out the emergency buffer and look for the line separator
byte[] emergencyBytes;
if (EmergencyBuffer(missingNewLineBytes, _inputStream, LineSeparator, out emergencyBytes))
{
// We found the new line, go back to header mode. But throw out the emergency
// buffer. It isn't needed.
_mode = Mode.Header;
_workingHeaderList = new List<string>();
}
else
{
// We found a \r and something else. This is invalid
ContinueToErrorState();
}
}
else
{
ContinueToErrorState();
}
}
#region Transition Methods
private void ContinueFromBoundaryMode(byte[] buffer, int offset, int lengthToAdd, int originalLength, bool cont)
{
// Add the current buffer to the boundary builder, and store it off as the boundary
AddBytes(buffer, offset, lengthToAdd, _byteArrayBuilder);
_boundary = _byteArrayBuilder.ToArray();
// Initialize header mode state
_mode = Mode.Header;
_byteArrayBuilder.Clear();
_workingHeaderList = new List<string>();
// Calculate the number of bytes we removed from the original buffer and start the
// next parser at that location
int bytesRemoved = lengthToAdd + LineSeparator.Length;
if (cont)
{
HeaderModeParse(buffer, offset + bytesRemoved, originalLength - bytesRemoved);
}
}
private void ContinueToHeaderMode(byte[] buffer, int offset, int lengthToAdd, int originalLength, bool cont)
{
// Store off the header. Then continue to process more headers using the remaing bytes
// from the buffer
AddBytes(buffer, offset, lengthToAdd, _byteArrayBuilder);
_workingHeaderList.Add(Encoding.ASCII.GetString(_byteArrayBuilder.ToArray()));
_byteArrayBuilder.Clear();
// Calculate the number of bytes removed from the original buffer and start the next
// parser at that location
int bytesRemoved = lengthToAdd + LineSeparator.Length;
if (cont)
{
HeaderModeParse(buffer, offset + bytesRemoved, originalLength - bytesRemoved);
}
}
private void ContinueToByteProcessMode(byte[] buffer, int offset, int lengthToAdd, int originalLength, bool cont)
{
// Write the buffer to the current multipart part
MultiPartPart currentPart = MultiParts[MultiParts.Count - 1];
currentPart.WriteBytes(buffer, offset, lengthToAdd);
currentPart.Complete();
// Transition into byte process mode
int bytesRemoved = lengthToAdd + _boundary.Length;
_mode = Mode.BytesProcess;
if (cont)
{
BytesProcessModeParse(buffer, offset + bytesRemoved, originalLength - bytesRemoved);
}
}
private static void ContinueToErrorState()
{
throw new FormatException("Improperly formed multipart request. Expected end marker or newline.");
}
#endregion
#region Utility Methods
/// <summary>
/// Searches a given byte array for another byte array. Only finds the first instance.
/// </summary>
/// <param name="haystack">The byte array to search</param>
/// <param name="offset">The offset into the haystack to start searching from</param>
/// <param name="length">
/// The number of bytes to search in <paramref name="haystack"/>. Generally, this is the
/// number of bytes in <paramref name="haystack"/> unless <paramref name="haystack"/> is
/// only partially filled.
/// </param>
/// <param name="needle">The byte array to search for</param>
/// <returns>
/// The index into <paramref name="haystack"/> where <paramref name="needle"/> was found,
/// relative to the offset provided.
/// -1 indicates that <paramref name="needle"/> was not found.
/// </returns>
[Pure]
private static int SearchByteArray(byte[] haystack, int offset, int length, byte[] needle)
{
// Iterate over the haystack looking for bytes from needle
for (int haystackIndex = 0; haystackIndex < length; haystackIndex++)
{
// Only start iterating over needle if the first byte matches
if (haystack[haystackIndex + offset] == needle[0])
{
// Record the first index of the needle
int firstIndex = haystackIndex;
// Iterate over the needle in sync with the haystack to see if they match
bool match = true;
int needleIndex = 0;
while (match && needleIndex < needle.Length && haystackIndex < length)
{
match = haystack[haystackIndex + offset] == needle[needleIndex];
needleIndex++;
haystackIndex++;
}
// If we are still matching, then return the first index of the needle
if (match)
{
return firstIndex;
}
}
}
// If we make it here, we never found it.
return -1;
}
/// <summary>
/// Adds bytes from the input byte array to the output list of bytes.
/// </summary>
/// <param name="inputBytes">The byte array to add bytes from</param>
/// <param name="offset">
/// The offset into <paramref name="inputBytes"/> from which to begin reading bytes.
/// </param>
/// <param name="length">
/// The number of bytes to add the <paramref name="outputBytes"/>
/// </param>
/// <param name="outputBytes">A list of bytes that will be appended to</param>
private static void AddBytes(byte[] inputBytes, int offset, int length, List<byte> outputBytes)
{
for (int i = 0; i < length; ++i)
{
outputBytes.Add(inputBytes[i + offset]);
}
}
/// <summary>
/// Reads in <paramref name="missingBytes"/> bytes from <paramref name="input"/> and checks
/// to see if they match the last <paramref name="missingBytes"/> in
/// <paramref name="control"/>. The bytes from the stream are returned via
/// <paramref name="emergencyBuffer"/>.
/// </summary>
/// <remarks>
/// This method is used when part of a control string is read into the working buffer. The
/// missing bytes from the control string are searched for in order to determine what is
/// the next step in processing the buffer.
/// </remarks>
/// <param name="missingBytes">
/// The number of bytes from <paramref name="control"/> that were missing at the end of the
/// working buffer.
/// </param>
/// <param name="input">The stream to read bytes from</param>
/// <param name="control">The string to look for</param>
/// <param name="emergencyBuffer">The bytes that were read in for comparison</param>
/// <returns>
/// <c>true</c> if the bytes read in match the last bytes of <paramref name="control"/>.
/// <c>false</c> otherwise.
/// </returns>
private static bool EmergencyBuffer(int missingBytes, Stream input, byte[] control, out byte[] emergencyBuffer)
{
// Read in the number of missing bytes into the the emergency buffer
emergencyBuffer = new byte[missingBytes];
int bytesRead = input.Read(emergencyBuffer, 0, missingBytes);
if (bytesRead <= 0)
{
string message = String.Format("Expected to receive at least {0} more bytes. Got none.", missingBytes);
throw new InvalidOperationException(message);
}
// We're going to repurpose the SearchByteArray by turning it inside out. We're going
// to search the control bytes for the bytes we just read in.
return SearchByteArray(control, control.Length - missingBytes, missingBytes, emergencyBuffer) == 0;
}
#endregion
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment