Skip to content

Instantly share code, notes, and snippets.

@daggerrz
Created June 11, 2012 21:00
Show Gist options
  • Save daggerrz/2912672 to your computer and use it in GitHub Desktop.
Save daggerrz/2912672 to your computer and use it in GitHub Desktop.
Memory mapping files larger than Integer.MAX_VALUE
import java.io.RandomAccessFile
import java.nio.channels.FileChannel
import org.jboss.netty.buffer.{ByteBufferBackedChannelBuffer, ChannelBuffer, ChannelBuffers}
/**
* Maps a the filename to a memory mapped random access file across 1 or more buffers.
* Support files up to Long.MAX_VALUE.
*
* @param filename the file to map
* @param maxBufferSize the maximum number of bytes to map per buffer
*/
class MemoryMappedFile(filename: String, maxBufferSize: Int = Int.MaxValue) {
private[this] val file = new RandomAccessFile(filename, "r")
val size = file.length()
// OK, so we can only handle 2bn buffers, but that should suffice
private[this] val bufferCount = (if (size % maxBufferSize == 0) size / maxBufferSize else size / maxBufferSize + 1).toInt
private[io] val buffers : Array[ChannelBuffer] = Array.ofDim[ChannelBuffer](bufferCount)
// Initialize buffers
(0 until bufferCount).foreach { i =>
val startIndex = i * maxBufferSize
val remainingInFile = size - startIndex
val bufferSize = math.min(remainingInFile, maxBufferSize)
buffers(i) = new ByteBufferBackedChannelBuffer(file.getChannel.map(FileChannel.MapMode.READ_ONLY, startIndex, bufferSize))
}
/**
* Gets a specific byte in the file. Use getBuffer is large(ish) chunks of continuous data is to be
* returned.
*
* @param index
* @return
*/
def apply(index: Long) = {
val bufferIndex = (index / maxBufferSize).toInt
val indexInBuffer = (index - bufferIndex * maxBufferSize).toInt
buffers(bufferIndex).getByte(indexInBuffer)
}
/**
* Gets a ChannelBuffer for the specified range in the file.
*
* @param index the start index
* @param size the number of bytes to return
* @return a ChannelBuffer with the specified range
*/
def getBuffer(index: Long, size: Int) = {
val destination = ChannelBuffers.buffer(size)
val endIndex = index + size
def copyBuffers(startIndex: Long, destinationIndex: Int) {
val bufferIndex = (startIndex / maxBufferSize).toInt
val buffer = buffers(bufferIndex)
val bufferStartIndex: Long = bufferIndex.toLong * maxBufferSize
// Where in the buffer should we start?
val indexInBuffer = (startIndex - bufferStartIndex).toInt
val bufferEndIndex: Long = bufferStartIndex + maxBufferSize
val bytesToRead = if (endIndex > bufferEndIndex) maxBufferSize - indexInBuffer else math.min(size, endIndex - bufferStartIndex).toInt
// Mutate destination buffer
buffer.getBytes(indexInBuffer, destination, destinationIndex, bytesToRead)
if (endIndex > bufferEndIndex) {
copyBuffers(bufferEndIndex, destinationIndex + bytesToRead)
}
}
copyBuffers(index, 0)
destination.readerIndex(0)
destination.writerIndex(size)
destination
}
}
object MemoryMappedFile {
def apply(filename: String) = new MemoryMappedFile(filename)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment