Created
April 10, 2016 01:49
-
-
Save benrr101/d6a0bdc3f99df97c37d76687175964f8 to your computer and use it in GitHub Desktop.
FormMultipart Parsers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
namespace DolomiteWcfService.MultipartParser | |
{ | |
public abstract class MultiPartPart | |
{ | |
#region Factory Method | |
/// <summary> | |
/// Creates a new multipart part based on the headers provided. If the content-type is | |
/// included and is not a text/plain, then it is a file multipart part. Otherwise, it is a | |
/// form data multipart part | |
/// </summary> | |
/// <param name="headers"> | |
/// The headers processed from an HTTP multipart request. Headers must be known ahead of | |
/// time in order to properly determine the type of the multipart request | |
/// </param> | |
/// <returns>A new multipart part based on the headers provided</returns> | |
public static MultiPartPart CreateMultiPartPart(List<string> headers) | |
{ | |
Dictionary<string, string> workingHeaders = new Dictionary<string, string>(); | |
// Process the headers to determine the type of the part | |
foreach (string header in headers) | |
{ | |
// Parse the header regex to strip out the header name and the content | |
Regex headerRegex = new Regex(@"([^\s]+): (.*)$", | |
RegexOptions.Compiled | RegexOptions.CultureInvariant); | |
Match headerMatch = headerRegex.Match(header); | |
// Store the header | |
workingHeaders.Add(headerMatch.Groups[1].Value, headerMatch.Groups[2].Value); | |
} | |
// Determine the type of the part based on the content type | |
MultiPartPart part; | |
if (!workingHeaders.ContainsKey("Content-Type")) | |
{ | |
part = new FormMultiPartPart(); | |
} | |
else | |
{ | |
part = new FileMultiPartPart(); | |
} | |
part.Headers = workingHeaders; | |
// Determine the name of the multipart form-data based on content-disposition header | |
if (!workingHeaders.ContainsKey("Content-Disposition")) | |
{ | |
throw new InvalidDataException("Multipart part is missing Content-Disposition header."); | |
} | |
string contentDisposition = part.Headers["Content-Disposition"]; | |
Regex nameRegex = new Regex(@"name=""([^\s]+)""", RegexOptions.Compiled | RegexOptions.CultureInvariant); | |
Match nameMatch = nameRegex.Match(contentDisposition); | |
part.Name = nameMatch.Groups[1].Value; | |
return part; | |
} | |
#endregion | |
/// <summary> | |
/// The name of the form-data provided by this multipart part | |
/// </summary> | |
public string Name { get; private set; } | |
/// <summary> | |
/// The headers for the multipart part | |
/// </summary> | |
public Dictionary<string, string> Headers { get; private set; } | |
public abstract void WriteBytes(byte[] bytesToWrite, int offset, int length); | |
public abstract void Complete(); | |
} | |
/// <summary> | |
/// A multipart part suited for writing files to the disk. | |
/// </summary> | |
public class FileMultiPartPart : MultiPartPart | |
{ | |
private FileStream _outFile; | |
/// <summary> | |
/// The path to the file that was created for storing this multipart part | |
/// </summary> | |
public string FileTempPath { get; set; } | |
/// <summary> | |
/// A read-only stream to the temporary file that was created for this multipart part. | |
/// This should be processed using <c>using</c> or disposed. | |
/// </summary> | |
public FileStream Stream | |
{ | |
get { return File.OpenRead(FileTempPath); } | |
} | |
/// <summary> | |
/// Constructs a new FileMultiPartPart by constructing a new temporary file | |
/// </summary> | |
protected internal FileMultiPartPart() | |
{ | |
// Create a new file with a temp name | |
// TODO: Figure out how to get a base path into there | |
FileTempPath = Guid.NewGuid().ToString(); | |
_outFile = File.OpenWrite(FileTempPath); | |
} | |
/// <summary> | |
/// Writes bytes out to the temporary storage file | |
/// </summary> | |
/// <param name="bytesToWrite">The array bytes to write from</param> | |
/// <param name="offset"> | |
/// The 0-based index into <paramref name="bytesToWrite"/> indicating where to start | |
/// writing bytes from | |
/// </param> | |
/// <param name="length"> | |
/// The number of bytes from <paramref name="bytesToWrite"/> to write to the file | |
/// </param> | |
public override void WriteBytes(byte[] bytesToWrite, int offset, int length) | |
{ | |
if (_outFile == null) | |
{ | |
throw new InvalidOperationException("Multipart file has already been finalized."); | |
} | |
_outFile.Write(bytesToWrite, offset, length); | |
} | |
/// <summary> | |
/// Method to be called when creation of the multipart part is completed. This is to clean | |
/// up resources and indicate that the part cannot be written to again. | |
/// </summary> | |
public override void Complete() | |
{ | |
// Close up the file writers and whatnot | |
_outFile.Close(); | |
_outFile = null; | |
} | |
} | |
/// <summary> | |
/// A multipart part suited for reading form values into memory | |
/// </summary> | |
public class FormMultiPartPart : MultiPartPart | |
{ | |
private StringBuilder _valueBuilder; | |
/// <summary> | |
/// The content of the multipart, or the value of the field. | |
/// </summary> | |
public string Value { get; set; } | |
protected internal FormMultiPartPart() | |
{ | |
_valueBuilder = new StringBuilder(); | |
} | |
/// <summary> | |
/// Stores bytes to the value builder | |
/// </summary> | |
/// <param name="bytesToWrite">The array of bytes to take from</param> | |
/// <param name="offset">The </param> | |
/// <param name="length"></param> | |
public override void WriteBytes(byte[] bytesToWrite, int offset, int length) | |
{ | |
if (_valueBuilder == null) | |
{ | |
throw new InvalidOperationException("Multipart form data has already been finalized."); | |
} | |
// TODO: Handle non-ASCII content? | |
_valueBuilder.Append(Encoding.ASCII.GetString(bytesToWrite, offset, length).ToCharArray()); | |
} | |
public override void Complete() | |
{ | |
// Figure out the value | |
Value = _valueBuilder.ToString(); | |
_valueBuilder = null; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics.Contracts; | |
using System.IO; | |
using System.Text; | |
namespace DolomiteWcfService.MultipartParser | |
{ | |
public class MultipartStreamParser | |
{ | |
private enum Mode | |
{ | |
/// <summary> | |
/// The mode where we're searching for the boundary definition at the | |
/// </summary> | |
Boundary, | |
/// <summary> | |
/// The mode where we're finding and storing headers | |
/// </summary> | |
Header, | |
/// <summary> | |
/// The mode where we're writing out bytes | |
/// </summary> | |
Bytes, | |
/// <summary> | |
/// The mode where we either find the end of the multipart or begin processing another | |
/// multipart part. | |
/// </summary> | |
BytesProcess | |
} | |
#region Control Strings | |
/// <summary> | |
/// Bytes used to mark the end of a line in the multibyte format | |
/// </summary> | |
private static readonly byte[] LineSeparator = { (byte)'\r', (byte)'\n' }; | |
/// <summary> | |
/// Bytes used to mark end of the multipart last multipart. | |
/// | |
/// Ex: | |
/// ---boundary | |
/// bytes | |
/// ---boundary | |
/// morebytes | |
/// ---boundary-- | |
/// </summary> | |
private static readonly byte[] EndOfMultipart = { (byte)'-', (byte)'-' }; | |
/// <summary> | |
/// Bytes used to mark the boundary of a part of the multipart. | |
/// </summary> | |
private static byte[] _boundary; | |
#endregion | |
#region Internal Parsing State | |
/// <summary> | |
/// Length of the buffer to read from stream | |
/// </summary> | |
private const int BufferLength = 4096; | |
/// <summary> | |
/// The stream that will be parsed. | |
/// </summary> | |
private readonly Stream _inputStream; | |
/// <summary> | |
/// Temporary storage for byte arrays. Used for composing the boundary and headers. | |
/// </summary> | |
private readonly List<byte> _byteArrayBuilder; | |
/// <summary> | |
/// The current mode of the parsing | |
/// </summary> | |
private Mode _mode; | |
/// <summary> | |
/// The list of headers for the current multipart part | |
/// </summary> | |
private List<string> _workingHeaderList; | |
#endregion | |
#region External Properties | |
/// <summary> | |
/// The collection of multiparts found during parsing | |
/// </summary> | |
public List<MultiPartPart> MultiParts; | |
#endregion | |
/// <summary> | |
/// Constructs a new multipart stream parser. Initializes the internal state. | |
/// </summary> | |
/// <param name="stream">The stream from which to parse multiparts</param> | |
public MultipartStreamParser(Stream stream) | |
{ | |
_inputStream = stream; | |
// Initialize the internal state | |
_mode = Mode.Boundary; | |
_byteArrayBuilder = new List<byte>(BufferLength); | |
// Initialize the external properties | |
MultiParts = new List<MultiPartPart>(); | |
} | |
/// <summary> | |
/// Begins parsing the stream to find multipart parts | |
/// </summary> | |
public void Parse() | |
{ | |
// We begin in boundary search mode | |
byte[] buffer = new byte[BufferLength]; | |
// Read bytes into the buffer while there are bytes to read in | |
int bytesRead; | |
while ((bytesRead = _inputStream.Read(buffer, 0, buffer.Length)) > 0) | |
{ | |
switch (_mode) | |
{ | |
case Mode.Boundary: | |
BoundaryModeParse(buffer, 0, bytesRead); | |
break; | |
case Mode.Header: | |
HeaderModeParse(buffer, 0, bytesRead); | |
break; | |
case Mode.Bytes: | |
BytesModeParse(buffer, 0, bytesRead); | |
break; | |
case Mode.BytesProcess: | |
BytesProcessModeParse(buffer, 0, bytesRead); | |
break; | |
} | |
} | |
} | |
private void BoundaryModeParse(byte[] buffer, int offset, int length) | |
{ | |
// Search for a line break in the buffer | |
int relLineBreakIndex = SearchByteArray(buffer, offset, length, LineSeparator); | |
int missingBytes = LineSeparator.Length - (length - relLineBreakIndex); | |
if (relLineBreakIndex < 0) | |
{ | |
// We didn't find the line break, so throw everything into the boundary builder | |
AddBytes(buffer, offset, length, _byteArrayBuilder); | |
} | |
else if (missingBytes > 0 && missingBytes < LineSeparator.Length) | |
{ | |
// Bust out the emergency buffer | |
byte[] emergencyBytes; | |
if (EmergencyBuffer(missingBytes, _inputStream, LineSeparator, out emergencyBytes)) | |
{ | |
// We found the line break, everything before it is the boundary. We do not | |
// need the emergency buffer | |
ContinueFromBoundaryMode(buffer, offset, relLineBreakIndex, length, false); | |
} | |
else | |
{ | |
// We didn't find the line break, so throw everything into the boundary builder | |
// including the emergency buffer | |
AddBytes(buffer, offset, length, _byteArrayBuilder); | |
AddBytes(emergencyBytes, 0, emergencyBytes.Length, _byteArrayBuilder); | |
} | |
} | |
else | |
{ | |
// We found the line break, everything before the line break goes into the boundary | |
// builder and we use that as the boundary. | |
// Switch into header mode, and pass the remaining buffer to the header processor. | |
ContinueFromBoundaryMode(buffer, offset, relLineBreakIndex, length, true); | |
} | |
} | |
private void HeaderModeParse(byte[] buffer, int offset, int length) | |
{ | |
// Search for a line break in the buffer | |
int relLineBreakIndex = SearchByteArray(buffer, offset, length, LineSeparator); | |
int missingBytes = LineSeparator.Length - (length - relLineBreakIndex); | |
if (relLineBreakIndex < 0) | |
{ | |
// We didn't find the line break, so throw everything into the byte array builder | |
AddBytes(buffer, offset, length, _byteArrayBuilder); | |
} | |
else if (relLineBreakIndex == 0) | |
{ | |
// We found the line break at the beginning. Create a new multipart part | |
MultiParts.Add(MultiPartPart.CreateMultiPartPart(_workingHeaderList)); | |
_workingHeaderList = new List<string>(); | |
// Jump into byte mode | |
_mode = Mode.Bytes; | |
BytesModeParse(buffer, offset + LineSeparator.Length, length - LineSeparator.Length); | |
} | |
else if (missingBytes > 0 && missingBytes < LineSeparator.Length) | |
{ | |
// Bust out the emergency buffer | |
byte[] emergencyBytes; | |
if (EmergencyBuffer(missingBytes, _inputStream, LineSeparator, out emergencyBytes)) | |
{ | |
// We found the line break, everything it makes up the header, no need to | |
// recover the emergency buffer | |
ContinueToHeaderMode(buffer, offset, relLineBreakIndex, length, false); | |
} | |
else | |
{ | |
// We didn't find the line break, so throw everything into the header builder, | |
// including the emergency buffer | |
AddBytes(buffer, offset, length, _byteArrayBuilder); | |
AddBytes(emergencyBytes, 0, emergencyBytes.Length, _byteArrayBuilder); | |
} | |
} | |
else | |
{ | |
// We found the line break, start processing more headers | |
ContinueToHeaderMode(buffer, offset, relLineBreakIndex, length, true); | |
} | |
} | |
private void BytesModeParse(byte[] buffer, int offset, int length) | |
{ | |
// Search for a boundary | |
int relBoundaryIndex = SearchByteArray(buffer, offset, length, _boundary); | |
int missingBytes = _boundary.Length - (length - relBoundaryIndex); | |
MultiPartPart currentPart = MultiParts[MultiParts.Count - 1]; | |
if (relBoundaryIndex < 0) | |
{ | |
// We didn't find the boundary. Take all the bytes and throw it into the multipart | |
currentPart.WriteBytes(buffer, offset, length); | |
} | |
else if (missingBytes > 0 && missingBytes < _boundary.Length) | |
{ | |
// Bust out the emergency buffer | |
byte[] emergencyBytes; | |
if (EmergencyBuffer(missingBytes, _inputStream, _boundary, out emergencyBytes)) | |
{ | |
// We found the boundary marker, everything before it goes into the output file | |
// There's no need to recover the emergency buffer. | |
ContinueToByteProcessMode(buffer, offset, relBoundaryIndex, length, false); | |
} | |
else | |
{ | |
// We didn't find the boundary marker, so throw everything into the output file | |
// including the emergency buffer | |
currentPart.WriteBytes(buffer, offset, relBoundaryIndex); | |
currentPart.WriteBytes(emergencyBytes, 0, emergencyBytes.Length); | |
} | |
} | |
else | |
{ | |
// We found boundary, continue into byte process mode | |
ContinueToByteProcessMode(buffer, offset, relBoundaryIndex, length, true); | |
} | |
} | |
private void BytesProcessModeParse(byte[] buffer, int offset, int length) | |
{ | |
// Search for a line break | |
int relEndIndex = SearchByteArray(buffer, offset, length, EndOfMultipart); | |
int relNewLineIndex = SearchByteArray(buffer, offset, length, LineSeparator); | |
int missingEndBytes = EndOfMultipart.Length - (length - relEndIndex); | |
int missingNewLineBytes = LineSeparator.Length - (length - relNewLineIndex); | |
// Case 1) End marker immediately -> Close up shop | |
// Case 2) New line immediately -> Header mode again | |
// Case 3) End or line marker split -> Bust out the emergency buffer | |
// Case 4) Anything else -> Error | |
if (relEndIndex == 0 && relNewLineIndex == EndOfMultipart.Length) | |
{ | |
// Close up shop | |
return; | |
} | |
if(relNewLineIndex == 0) | |
{ | |
// Go back to header mode | |
_mode = Mode.Header; | |
_workingHeaderList = new List<string>(); | |
int bytesRemoved = LineSeparator.Length; | |
HeaderModeParse(buffer, offset + bytesRemoved, length - bytesRemoved ); | |
} | |
else if (missingEndBytes > 0 && missingEndBytes < EndOfMultipart.Length) | |
{ | |
// Bust out the emergency buffer and look for the end marker | |
byte[] emergencyBytes; | |
if (!EmergencyBuffer(missingEndBytes, _inputStream, EndOfMultipart, out emergencyBytes)) | |
{ | |
// We found a - and something else. This is invalid. | |
ContinueToErrorState(); | |
} | |
} | |
else if (missingNewLineBytes > 0 && missingNewLineBytes < LineSeparator.Length) | |
{ | |
// Bust out the emergency buffer and look for the line separator | |
byte[] emergencyBytes; | |
if (EmergencyBuffer(missingNewLineBytes, _inputStream, LineSeparator, out emergencyBytes)) | |
{ | |
// We found the new line, go back to header mode. But throw out the emergency | |
// buffer. It isn't needed. | |
_mode = Mode.Header; | |
_workingHeaderList = new List<string>(); | |
} | |
else | |
{ | |
// We found a \r and something else. This is invalid | |
ContinueToErrorState(); | |
} | |
} | |
else | |
{ | |
ContinueToErrorState(); | |
} | |
} | |
#region Transition Methods | |
private void ContinueFromBoundaryMode(byte[] buffer, int offset, int lengthToAdd, int originalLength, bool cont) | |
{ | |
// Add the current buffer to the boundary builder, and store it off as the boundary | |
AddBytes(buffer, offset, lengthToAdd, _byteArrayBuilder); | |
_boundary = _byteArrayBuilder.ToArray(); | |
// Initialize header mode state | |
_mode = Mode.Header; | |
_byteArrayBuilder.Clear(); | |
_workingHeaderList = new List<string>(); | |
// Calculate the number of bytes we removed from the original buffer and start the | |
// next parser at that location | |
int bytesRemoved = lengthToAdd + LineSeparator.Length; | |
if (cont) | |
{ | |
HeaderModeParse(buffer, offset + bytesRemoved, originalLength - bytesRemoved); | |
} | |
} | |
private void ContinueToHeaderMode(byte[] buffer, int offset, int lengthToAdd, int originalLength, bool cont) | |
{ | |
// Store off the header. Then continue to process more headers using the remaing bytes | |
// from the buffer | |
AddBytes(buffer, offset, lengthToAdd, _byteArrayBuilder); | |
_workingHeaderList.Add(Encoding.ASCII.GetString(_byteArrayBuilder.ToArray())); | |
_byteArrayBuilder.Clear(); | |
// Calculate the number of bytes removed from the original buffer and start the next | |
// parser at that location | |
int bytesRemoved = lengthToAdd + LineSeparator.Length; | |
if (cont) | |
{ | |
HeaderModeParse(buffer, offset + bytesRemoved, originalLength - bytesRemoved); | |
} | |
} | |
private void ContinueToByteProcessMode(byte[] buffer, int offset, int lengthToAdd, int originalLength, bool cont) | |
{ | |
// Write the buffer to the current multipart part | |
MultiPartPart currentPart = MultiParts[MultiParts.Count - 1]; | |
currentPart.WriteBytes(buffer, offset, lengthToAdd); | |
currentPart.Complete(); | |
// Transition into byte process mode | |
int bytesRemoved = lengthToAdd + _boundary.Length; | |
_mode = Mode.BytesProcess; | |
if (cont) | |
{ | |
BytesProcessModeParse(buffer, offset + bytesRemoved, originalLength - bytesRemoved); | |
} | |
} | |
private static void ContinueToErrorState() | |
{ | |
throw new FormatException("Improperly formed multipart request. Expected end marker or newline."); | |
} | |
#endregion | |
#region Utility Methods | |
/// <summary> | |
/// Searches a given byte array for another byte array. Only finds the first instance. | |
/// </summary> | |
/// <param name="haystack">The byte array to search</param> | |
/// <param name="offset">The offset into the haystack to start searching from</param> | |
/// <param name="length"> | |
/// The number of bytes to search in <paramref name="haystack"/>. Generally, this is the | |
/// number of bytes in <paramref name="haystack"/> unless <paramref name="haystack"/> is | |
/// only partially filled. | |
/// </param> | |
/// <param name="needle">The byte array to search for</param> | |
/// <returns> | |
/// The index into <paramref name="haystack"/> where <paramref name="needle"/> was found, | |
/// relative to the offset provided. | |
/// -1 indicates that <paramref name="needle"/> was not found. | |
/// </returns> | |
[Pure] | |
private static int SearchByteArray(byte[] haystack, int offset, int length, byte[] needle) | |
{ | |
// Iterate over the haystack looking for bytes from needle | |
for (int haystackIndex = 0; haystackIndex < length; haystackIndex++) | |
{ | |
// Only start iterating over needle if the first byte matches | |
if (haystack[haystackIndex + offset] == needle[0]) | |
{ | |
// Record the first index of the needle | |
int firstIndex = haystackIndex; | |
// Iterate over the needle in sync with the haystack to see if they match | |
bool match = true; | |
int needleIndex = 0; | |
while (match && needleIndex < needle.Length && haystackIndex < length) | |
{ | |
match = haystack[haystackIndex + offset] == needle[needleIndex]; | |
needleIndex++; | |
haystackIndex++; | |
} | |
// If we are still matching, then return the first index of the needle | |
if (match) | |
{ | |
return firstIndex; | |
} | |
} | |
} | |
// If we make it here, we never found it. | |
return -1; | |
} | |
/// <summary> | |
/// Adds bytes from the input byte array to the output list of bytes. | |
/// </summary> | |
/// <param name="inputBytes">The byte array to add bytes from</param> | |
/// <param name="offset"> | |
/// The offset into <paramref name="inputBytes"/> from which to begin reading bytes. | |
/// </param> | |
/// <param name="length"> | |
/// The number of bytes to add the <paramref name="outputBytes"/> | |
/// </param> | |
/// <param name="outputBytes">A list of bytes that will be appended to</param> | |
private static void AddBytes(byte[] inputBytes, int offset, int length, List<byte> outputBytes) | |
{ | |
for (int i = 0; i < length; ++i) | |
{ | |
outputBytes.Add(inputBytes[i + offset]); | |
} | |
} | |
/// <summary> | |
/// Reads in <paramref name="missingBytes"/> bytes from <paramref name="input"/> and checks | |
/// to see if they match the last <paramref name="missingBytes"/> in | |
/// <paramref name="control"/>. The bytes from the stream are returned via | |
/// <paramref name="emergencyBuffer"/>. | |
/// </summary> | |
/// <remarks> | |
/// This method is used when part of a control string is read into the working buffer. The | |
/// missing bytes from the control string are searched for in order to determine what is | |
/// the next step in processing the buffer. | |
/// </remarks> | |
/// <param name="missingBytes"> | |
/// The number of bytes from <paramref name="control"/> that were missing at the end of the | |
/// working buffer. | |
/// </param> | |
/// <param name="input">The stream to read bytes from</param> | |
/// <param name="control">The string to look for</param> | |
/// <param name="emergencyBuffer">The bytes that were read in for comparison</param> | |
/// <returns> | |
/// <c>true</c> if the bytes read in match the last bytes of <paramref name="control"/>. | |
/// <c>false</c> otherwise. | |
/// </returns> | |
private static bool EmergencyBuffer(int missingBytes, Stream input, byte[] control, out byte[] emergencyBuffer) | |
{ | |
// Read in the number of missing bytes into the the emergency buffer | |
emergencyBuffer = new byte[missingBytes]; | |
int bytesRead = input.Read(emergencyBuffer, 0, missingBytes); | |
if (bytesRead <= 0) | |
{ | |
string message = String.Format("Expected to receive at least {0} more bytes. Got none.", missingBytes); | |
throw new InvalidOperationException(message); | |
} | |
// We're going to repurpose the SearchByteArray by turning it inside out. We're going | |
// to search the control bytes for the bytes we just read in. | |
return SearchByteArray(control, control.Length - missingBytes, missingBytes, emergencyBuffer) == 0; | |
} | |
#endregion | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment