Skip to content

Instantly share code, notes, and snippets.

Colin Marc colinmarc

View GitHub Profile
View DirectParquetOutputCommitter.java
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.OutputCommitter;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapred.TaskAttemptContext;
import org.apache.hadoop.mapred.JobConf;
import org.apache.parquet.hadoop.ParquetOutputCommitter;
View flatten.cr
class Flatten(T)
include Iterator(T)
def initialize(@iterator : Iterator(Iterator(T)))
@current = @iterator.next
end
def next_
if (current = @current).is_a?(Iterator::Stop)
stop
View gist:8379bea8816dc417571c
class Flatten(T)
include Iterator(T)
def initialize(@iterator : Iterator(Iterator(T)))
@current = @iterator.next
end
def next
if @current.is_a?(Iterator::Stop)
stop
View gist:1664bfb17987dfc187ed
### Keybase proof
I hereby claim:
* I am colinmarc on github.
* I am colinmarc (https://keybase.io/colinmarc) on keybase.
* I have a public key whose fingerprint is 5130 CC66 3FE4 134A 0BF7 AF4B ED8F F019 649B E820
To claim this, I am signing this object:
@colinmarc
colinmarc / gist:de4fc8e2f2805a3e02e9
Created Mar 31, 2015
HadoopS3CredentialsProvider.java
View gist:de4fc8e2f2805a3e02e9
import org.apache.hadoop.conf.Configuration;
import com.amazonaws.AmazonClientException;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
public class HadoopS3CredentialsProvider implements AWSCredentialsProvider {
private Configuration conf;
@colinmarc
colinmarc / Sequins.scala
Created Sep 4, 2014
A scalding source for Sequins data
View Sequins.scala
import com.twitter.scalding._
import cascading.pipe.Pipe
import spray.json.JsonWriter
import org.apache.hadoop.io.BytesWritable
case class Sequins[V](p : String)(implicit writer: JsonWriter[V])
extends FixedPathSource(p)
with WritableSequenceFileScheme
with TypedSink[(String, V)] {
@colinmarc
colinmarc / MongoScheme.java
Created Jul 26, 2014
mongo-hadoop + scalding
View MongoScheme.java
import java.io.IOException;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import cascading.flow.FlowProcess;
import cascading.scheme.Scheme;
import cascading.scheme.SourceCall;
import cascading.scheme.SinkCall;
View gist:6575295
scala> val x = 123.asInstanceOf[String]
java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.String
at .<init>(<console>:22)
at .<clinit>(<console>)
at .<init>(<console>:11)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
@colinmarc
colinmarc / makefile-gevent.py
Created Apr 17, 2012
makefile in gevent vs python2.7
View makefile-gevent.py
#version with gevent
import gevent.socket as socket
from gevent import spawn
s = socket.socket()
s.bind(('', 9599))
s.listen(5)
def handle_client(client):
@colinmarc
colinmarc / mysocket.py
Created Mar 31, 2012
an approach to monkeypatching in gevent
View mysocket.py
__target__ = 'socket'
#TODO find a way to not require this line
_real = __import__(__target__)
__implements__ = [
'socket',
'getaddrinfo',
#'gethostname',
#'gethostbyname',
You can’t perform that action at this time.