Skip to content

Instantly share code, notes, and snippets.

@danharvey
danharvey / docker-compose.yml
Created April 3, 2015 09:02
Confluent stack docker-compose example.
kafka:
image: state/kafka:0.8.2.0-0
net: host
environment:
KAFKA_BROKER_ID: 0
KAFKA_ADVERTISED_HOST: localhost
schemaregistry:
image: state/confluent-schema-registry:1.0-2
net: host
@danharvey
danharvey / gist:10474265
Created April 11, 2014 14:39
Freebase Dump gzip splitting
grep -obUaP "\x1F\x8B\x08\x00\x00\x00\x00\x00" $FILENAME | cut -d ":" -f 1 > splits.txt
cat > split.py << EOF
import sys
input_file = sys.argv[1]
offset_file = sys.argv[2]
output_file = input_file.replace('.gz','')
offsets = map(long, open(offset_file).read().rstrip().split("\n"))
with open(input_file,"rb") as f:
@danharvey
danharvey / gist:840403
Created February 23, 2011 13:05
TableInputFormat configuration patch
--- src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java 2011-02-09 00:23:13.000000000 +0000
+++ src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java 2011-02-23 13:01:25.126278999 +0000
@@ -88,7 +88,7 @@
this.conf = configuration;
String tableName = conf.get(INPUT_TABLE);
try {
- setHTable(new HTable(new Configuration(conf), tableName));
+ setHTable(new HTable(tableName));
} catch (Exception e) {
LOG.error(StringUtils.stringifyException(e));
package uk.co.danharvey.pig.storage;
import java.io.IOException;
public class JsonStorage extends StoreFunc {
private RecordWriter<Text, NullWritable> writer;
private ResourceSchema schema;
@Override
public OutputFormat getOutputFormat() throws IOException {
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
import com.google.common.base.CharMatcher;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
@danharvey
danharvey / ReadBenchmark.java
Created October 29, 2010 17:36
A quick read benchmark I've created to test mongodb on our dataset read distribution.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
import com.google.common.base.CharMatcher;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;