Skip to content

Instantly share code, notes, and snippets.

@ccsevers
ccsevers / sparse_expander.py
Created April 9, 2014 17:34
SparseExpanderDataset for pylearn2
from pylearn2.datasets.dataset import Dataset
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from pylearn2.utils.iteration import (SequentialSubsetIterator,
FiniteDatasetIterator,
resolve_iterator_class)
import functools
import logging
import numpy
import warnings
@ccsevers
ccsevers / AvroUnpackedSourceTest.scala
Last active August 29, 2015 13:57
UnpackedAvroSource passing test
class AvroUnpackedSourceTest extends Specification with ScalaCheck {
"Running M/R jobs on Avro data" should {
"work for unpacked sources" in {
val prop = Prop.forAll(MyAvroRecordGenerators.myAvroGen) { (record: MyAvroRecord) =>
var res: Double = 0.0
val tempFolder = new File(System.getProperty("java.io.tmpdir"))
val tempInput = new File(tempFolder, "input")
tempInput.mkdirs
@ccsevers
ccsevers / MyApp.scala
Last active December 11, 2015 06:19
Scoobi Avro Example
package com.ebay.scoobitest
import edu.berkeley.cs.avro.marker._
import edu.berkeley.cs.avro.runtime._
import com.nicta.scoobi.Scoobi._
case class LongRec(var f1: Long) extends AvroRecord
case class Cluster(var firstSearchTime: Long) extends AvroRecord
@ccsevers
ccsevers / AvroReadExample.java
Created October 29, 2012 18:27
cascading.avro wordcount example
package cascading.avro.examples;
import java.util.Properties;
import cascading.flow.Flow;
import cascading.flow.FlowDef;
import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.operation.aggregator.Count;
import cascading.operation.regex.RegexFilter;
import cascading.operation.regex.RegexSplitGenerator;