Skip to content

Instantly share code, notes, and snippets.

@alancnet
Created September 28, 2016 23:51
Show Gist options
  • Save alancnet/f6e192afb05608b394809abbb4dc29f5 to your computer and use it in GitHub Desktop.
Save alancnet/f6e192afb05608b394809abbb4dc29f5 to your computer and use it in GitHub Desktop.
HashSet implemented against a file system.
import java.io.{File, RandomAccessFile}
class HashSetFile(file: File, modulus: Int = 32) {
val raf = new RandomAccessFile(file, "rw")
def filePosition = raf.getFilePointer
def fileLength = raf.length
val bytesInLong = java.lang.Long.BYTES
val indexesStart = bytesInLong
val start = System.currentTimeMillis()
if (fileLength == 0) {
initFile()
}
def incrementRecordCount() = {
val recordCount = getRecordCount + 1
raf.seek(0)
raf.writeLong(recordCount)
println(s"UniqueCount: $recordCount")
}
def getRecordCount = {
raf.seek(0)
raf.readLong()
}
def initFile() = {
raf.seek(0)
raf.writeLong(0)
val emptyBuffer: Array[Byte] = scala.Array.fill(1024 * 32){0}
0L.until(modulus.toLong * bytesInLong / emptyBuffer.length).foreach(x => {
raf.write(emptyBuffer)
})
}
def getOffsetOfIndexOfValue(value: Array[Byte]): Int = {
(bytesInLong * math.abs(java.util.Arrays.hashCode(value) % modulus)) + bytesInLong
}
def add(value: Array[Byte]): Boolean = {
def writeNewRecord() = {
raf.seek(fileLength)
raf.writeLong(value.length.toLong) // Write Length of value
raf.writeLong(-1L) // Make space for offset of next value in list
raf.write(value) // Write the value
incrementRecordCount()
true
}
def checkCurrentLinkedListValue(offset: Long): Boolean = {
raf.seek(offset)
val lengthOfCurrentValue = raf.readLong
val nextOffsetInList = raf.readLong
if (lengthOfCurrentValue == value.length.toLong) { // possibly the same value
val buffer: Array[Byte] = Array.fill(lengthOfCurrentValue.toInt) {0}
raf.read(buffer)
if (java.util.Arrays.equals(value, buffer)) { // value already exists
return false
}
}
if (nextOffsetInList == -1L) { // if here, then value is new
// update the nextOffsetInList
// then write the value to that offset
raf.seek(offset + bytesInLong) // Seek to where nextOffsetInListIsWritten
raf.writeLong(fileLength) // The next value will be written at the end of the file
writeNewRecord() // Write the new value
} else {
checkCurrentLinkedListValue(nextOffsetInList)
}
}
val indexOffset = getOffsetOfIndexOfValue(value)
raf.seek(indexOffset)
val linkedListOffset = raf.readLong()
if (linkedListOffset == 0L){
raf.seek(indexOffset)
raf.writeLong(fileLength)
writeNewRecord()
} else {
checkCurrentLinkedListValue(linkedListOffset)
}
}
def exists(value: Array[Byte]): Boolean = {
def checkCurrentLinkedListValue(offset: Long): Boolean = {
raf.seek(offset)
val lengthOfCurrentValue = raf.readLong
val nextOffsetInList = raf.readLong
if (lengthOfCurrentValue == value.length.toLong) {
val buffer: Array[Byte] = Array.fill(lengthOfCurrentValue.toInt) {0}
raf.read(buffer)
if (java.util.Arrays.equals(value, buffer)) {
return true
}
}
if (nextOffsetInList == -1L) {
false
} else {
checkCurrentLinkedListValue(nextOffsetInList)
}
}
val indexOffset = getOffsetOfIndexOfValue(value)
raf.seek(indexOffset)
val nextOffsetInList = raf.readLong
if (nextOffsetInList == 0L) {
false
} else {
checkCurrentLinkedListValue(nextOffsetInList)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment