Skip to content

Instantly share code, notes, and snippets.

@blever
blever / gist:1361178
Created November 12, 2011 21:58
Augmenting DataSource/DataSink with Converter idea
/* Something along the lines of Crunch's "Converter" but split out into separate input/output traits */
trait InputConverter[K, V, S] {
def fromKeyValue(K key, V value): S
}
trait OutputConverter[K, V, S] {
toKeyValue(s: S): (K, V)
}
@blever
blever / gist:1361224
Created November 12, 2011 22:28
Loading legacy Writable data
object SequenceFileInput {
/** Reading in from a sequence file:
* - specify path to sequence file
* - need to specify the Writable classes that have been serialised in the sequence file
* - provide functions that take can get the value out of Writables, plus the WireFormat definitions of K and V; this
* is all implicit so that for a lot of the common cases you don't have to fill it in (it's possible that the WireFormat
* args could be dropped and instead derived from the Writables themselves given they implement write and readFields). */
def fromSequenceFile[K, V, WtK <: WritableComparable, WtV <: Writable]
(keyClass: Class[WtK], valueClass: Class[WtV], path: String)
@blever
blever / gist:1508379
Created December 22, 2011 00:38
Using converter for Sequence files
object SequenceFileInput {
/** Reading in from a sequence file:
* - specify path to sequence file
* - need to specify the Writable/WritableComparable classes that have been serialised in the sequence file
* - provide functions that take can get the value out of Writables plus the WireFormat definitions of K and V; this
* is all implicit so that for a lot of the common cases you don't have to fill it in */
def fromSequenceFile[K, V, WtK <: Writable, WtV <: Writable]
(path: String)
(implicit mK: Manifest[K], wfK: WireFormat[K],
@blever
blever / gist:1557626
Created January 3, 2012 23:46
SequenceFileOutput using converter
trait OutputConverter[K, V, S] {
toKeyValue(s: S): (K, V)
}
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
trait DataSink[K, V, B] {
def outputTypeName: String
def outputPath: Path
def outputFormat: Class[_ <: FileOutputFormat[K,V]]
object DistinctSpecApp extends ScoobiApp with EftposConfiguration {
def run() {
// need more than 1 reducer so that key-values get split across multiple partitions
configuration.setMinReducers(2)
// TODO - need to remove 'shuffle'
val src = 'a' to 'z'
val input = scala.util.Random.shuffle(('a' to 'z').flatMap(c => List.fill(100)(c)).map(_.toString * 20)).toDList
persist(input.distinct.materialise).toSeq.map(_.head).sorted foreach { println }
import collection.immutable.VectorBuilder
object QuickBifunctor {
implicit def pairmap2[A, B](k: (A, B)) = new {
def :->[C](f: B => C): (A, C) =
(k._1, f(k._2))
}
}
sealed trait Order
@blever
blever / grouped.scala
Last active December 10, 2015 17:58 — forked from tonymorris/grouped.scala
import collection.immutable.VectorBuilder
object QuickBifunctor {
implicit def pairmap2[A, B](k: (A, B)) = new {
def :->[C](f: B => C): (A, C) =
(k._1, f(k._2))
}
}
sealed trait Order
@blever
blever / Foo.scala
Created February 27, 2013 04:54
WireFormat for case class with type parameter.
import com.nicta.scoobi.Scoobi._
case class Foo[A : WireFormat](id: String, bar: A)
object Foo {
implicit def fooFmt[A : WireFormat]: WireFormat[Foo[A]] = new WireFormat[Foo[A]] {
import java.io._
def toWire(x: Foo[A], out: DataOutput) {
implicitly[WireFormat[String]].toWire(x.id, out)
import org.specs2.mutable.SpecificationLike
import org.specs2.specification.BeforeAfterExample
import org.specs2.specification.BeforeAfter
import org.kiji.schema.KijiClientTest
abstract class KijiSpecification extends KijiClientTest with SpecificationLike with BeforeAfterExample {
override def beforeAfterContext = new BeforeAfter {
def before {
setupKijiTest()
@blever
blever / r.pp
Last active December 31, 2015 13:39
Puppet defined type for installing R libraries.
#
# R libraries (on CRAN).
#
define tarski::r::library($lib = $title, $using = undef) {
Exec {
path => "/bin:/sbin:/usr/bin:/usr/sbin"
}
$require_base = [
Package['R'],