Skip to content

Instantly share code, notes, and snippets.

@blever
Created November 12, 2011 22:28
Show Gist options
  • Save blever/1361224 to your computer and use it in GitHub Desktop.
Save blever/1361224 to your computer and use it in GitHub Desktop.
Loading legacy Writable data
object SequenceFileInput {
/** Reading in from a sequence file:
* - specify path to sequence file
* - need to specify the Writable classes that have been serialised in the sequence file
* - provide functions that take can get the value out of Writables, plus the WireFormat definitions of K and V; this
* is all implicit so that for a lot of the common cases you don't have to fill it in (it's possible that the WireFormat
* args could be dropped and instead derived from the Writables themselves given they implement write and readFields). */
def fromSequenceFile[K, V, WtK <: WritableComparable, WtV <: Writable]
(keyClass: Class[WtK], valueClass: Class[WtV], path: String)
(implicit getK: WtK => K, getV: WtV => V, wfK: WireFormat[K], wfV: WireFormat[V]): DList[(K, V)] = {
val loader = new Loader[(K, V)] {
def mkInputStore(node: AST.Load[String]) = new InputStore(node) {
def inputTypeName = typeName
val inputPath = new Path(path)
val inputFormat = classOf[SequenceFileInputFormat]
val converter = new InputConverter[WtK, WtV, (K, V)] {
def fromKeyValue(key: WtK, value: WtV): (K, V) = (getK(key), getV(value))
}
}
}
new DList(Smart.Load(loader))
}
}
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
object Example {
/* Usage example: reading in a sequence file where the key was IntWritable and the value was LongWritable */
val x: DList[(Int, Long)] = fromSequenceFile("hdfs://path/to/my/sequence-file", classOf[IntWritable], classOf[LongWritable])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment