Skip to content

Instantly share code, notes, and snippets.

View colinmarc's full-sized avatar

Colin Marc colinmarc

View GitHub Profile
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.OutputCommitter;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapred.TaskAttemptContext;
import org.apache.hadoop.mapred.JobConf;
import org.apache.parquet.hadoop.ParquetOutputCommitter;
class Flatten(T)
include Iterator(T)
def initialize(@iterator : Iterator(Iterator(T)))
@current = @iterator.next
end
def next_
if (current = @current).is_a?(Iterator::Stop)
stop
class Flatten(T)
include Iterator(T)
def initialize(@iterator : Iterator(Iterator(T)))
@current = @iterator.next
end
def next
if @current.is_a?(Iterator::Stop)
stop
### Keybase proof
I hereby claim:
* I am colinmarc on github.
* I am colinmarc (https://keybase.io/colinmarc) on keybase.
* I have a public key whose fingerprint is 5130 CC66 3FE4 134A 0BF7 AF4B ED8F F019 649B E820
To claim this, I am signing this object:
@colinmarc
colinmarc / gist:de4fc8e2f2805a3e02e9
Created March 31, 2015 14:43
HadoopS3CredentialsProvider.java
import org.apache.hadoop.conf.Configuration;
import com.amazonaws.AmazonClientException;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
public class HadoopS3CredentialsProvider implements AWSCredentialsProvider {
private Configuration conf;
@colinmarc
colinmarc / Sequins.scala
Created September 4, 2014 12:02
A scalding source for Sequins data
import com.twitter.scalding._
import cascading.pipe.Pipe
import spray.json.JsonWriter
import org.apache.hadoop.io.BytesWritable
case class Sequins[V](p : String)(implicit writer: JsonWriter[V])
extends FixedPathSource(p)
with WritableSequenceFileScheme
with TypedSink[(String, V)] {
@colinmarc
colinmarc / MongoScheme.java
Created July 26, 2014 17:24
mongo-hadoop + scalding
import java.io.IOException;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import cascading.flow.FlowProcess;
import cascading.scheme.Scheme;
import cascading.scheme.SourceCall;
import cascading.scheme.SinkCall;
scala> val x = 123.asInstanceOf[String]
java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.String
at .<init>(<console>:22)
at .<clinit>(<console>)
at .<init>(<console>:11)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
@colinmarc
colinmarc / makefile-gevent.py
Created April 17, 2012 16:31
makefile in gevent vs python2.7
#version with gevent
import gevent.socket as socket
from gevent import spawn
s = socket.socket()
s.bind(('', 9599))
s.listen(5)
def handle_client(client):
@colinmarc
colinmarc / mysocket.py
Created March 31, 2012 20:33
an approach to monkeypatching in gevent
__target__ = 'socket'
#TODO find a way to not require this line
_real = __import__(__target__)
__implements__ = [
'socket',
'getaddrinfo',
#'gethostname',
#'gethostbyname',