Created
February 9, 2011 12:46
-
-
Save jcdickinson/818408 to your computer and use it in GitHub Desktop.
A Memoizing Text Reader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Represents a way to access a text stream | |
/// using random access (lines only). | |
/// </summary> | |
public class RandomAccessReader : IDisposable | |
{ | |
private List<long> _lineOffsets | |
= new List<long>(); | |
/// <summary> | |
/// Gets the current line. | |
/// </summary> | |
public int Line | |
{ | |
get; | |
private set; | |
} | |
private Encoding _encoding; | |
/// <summary> | |
/// Gets or sets the encoding. | |
/// </summary> | |
/// <value> | |
/// The encoding. | |
/// </value> | |
public Encoding Encoding | |
{ | |
get | |
{ | |
return _encoding; | |
} | |
set | |
{ | |
if (value == null) | |
throw new ArgumentNullException("value"); | |
_encoding = value; | |
Reset(); | |
} | |
} | |
/// <summary> | |
/// Gets or sets the new line string. | |
/// </summary> | |
/// <value> | |
/// The new line string. | |
/// </value> | |
public string NewLine | |
{ | |
get; | |
set; | |
} | |
/// <summary> | |
/// Gets or sets a value indicating whether the stream is owned by this instance. | |
/// </summary> | |
/// <value> | |
/// <see langword="true"/> if the stream is owned by this instance; otherwise, <see langword="false"/>. | |
/// </value> | |
public bool OwnsStream | |
{ | |
get; | |
set; | |
} | |
private Stream _stream; | |
private Decoder _decoder; | |
private byte[] _byteBuffer = new byte[1]; | |
private int _endlinePosition; | |
/// <summary> | |
/// Initializes a new instance of the <see cref="RandomAccessReader"/> class. | |
/// </summary> | |
/// <param name="stream">The stream.</param> | |
/// <param name="encoding">The encoding.</param> | |
public RandomAccessReader(Stream stream, Encoding encoding) | |
: this(stream, encoding, true) | |
{ | |
} | |
/// <summary> | |
/// Initializes a new instance of the <see cref="RandomAccessReader"/> class. | |
/// </summary> | |
/// <param name="stream">The stream.</param> | |
/// <param name="encoding">The encoding.</param> | |
/// <param name="ownsStream">if set to <see langword="true"/> the stream is owned by this instance; otherwise, </param> | |
public RandomAccessReader(Stream stream, Encoding encoding, bool ownsStream) | |
{ | |
if (stream == null) | |
throw new ArgumentNullException("stream"); | |
encoding = encoding ?? Encoding.UTF8; | |
OwnsStream = ownsStream; | |
_stream = stream; | |
NewLine = Environment.NewLine; | |
_lineOffsets.Add(0); | |
_encoding = encoding; | |
Reset(); | |
} | |
/// <summary> | |
/// Seeks to the specified line. | |
/// </summary> | |
/// <param name="line">The line to seek to.</param> | |
/// <returns>The line that was actually moved to.</returns> | |
/// <remarks> | |
/// For seek-forward operations there is no guarantee the line will | |
/// be reached. | |
/// </remarks> | |
public int Seek(int line) | |
{ | |
if (line < _lineOffsets.Count) | |
{ | |
_stream.Position = _lineOffsets[line]; | |
Line = line; | |
} | |
else if (line > _lineOffsets.Count) | |
{ | |
while (Line != line && (ReadLine() != null)) | |
{ | |
// Do nothing. | |
} | |
} | |
return Line; | |
} | |
/// <summary> | |
/// Reads a line from the reader. | |
/// </summary> | |
/// <returns>The line.</returns> | |
public string ReadLine() | |
{ | |
var line = new StringBuilder(); | |
var readCount = 0; | |
var newLineReached = false; | |
while ((readCount = _stream.Read(_byteBuffer, 0, 1)) == 1) | |
{ | |
// Determine if the decoder is ready to return | |
// characters. | |
var charCount = _decoder.GetCharCount(_byteBuffer, 0, 1, false); | |
if (charCount == 0) | |
{ | |
continue; | |
} | |
// Get the characters and enumerate through them. | |
var charBuffer = new char[charCount]; | |
charCount = _decoder.GetChars(_byteBuffer, 0, 1, charBuffer, 0, false); | |
for (var i = 0; i < charCount; i++) | |
{ | |
var c = charBuffer[i]; | |
if (c == NewLine[_endlinePosition]) | |
{ | |
_endlinePosition++; | |
if (_endlinePosition == NewLine.Length) | |
{ | |
newLineReached = true; | |
_endlinePosition = 0; | |
break; | |
} | |
} | |
else | |
{ | |
if (line == null) | |
line = new StringBuilder(); | |
if (_endlinePosition != 0) // We had a partial match on a newline. | |
{ | |
line.Append(NewLine.Substring(_endlinePosition)); | |
_endlinePosition = 0; | |
} | |
// Append the current character. | |
line.Append(c); | |
} | |
} | |
if (newLineReached) | |
break; | |
} | |
Line++; | |
// Add the position of the line if we need to. | |
if (readCount != 0 && Line >= _lineOffsets.Count) | |
{ | |
_lineOffsets.Add(_stream.Position); | |
} | |
if (readCount == 0) | |
return null; | |
else | |
return line.ToString(); | |
} | |
/// <summary> | |
/// Resets this instance. | |
/// </summary> | |
private void Reset() | |
{ | |
_decoder = _encoding.GetDecoder(); | |
} | |
/// <summary> | |
/// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. | |
/// </summary> | |
public void Dispose() | |
{ | |
var stream = Interlocked.Exchange(ref _stream, null); | |
if (OwnsStream && stream != null) | |
{ | |
stream.Dispose(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment