Skip to content

Instantly share code, notes, and snippets.

@thomasjungblut
thomasjungblut / HAMA-559_bench_1.java
Created October 17, 2012 19:49
HAMA-559, caliper benchmark
package de.jungblut.benchmark;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hama.bsp.TaskAttemptID;
import org.apache.hama.bsp.TaskID;
import org.apache.hama.bsp.message.DiskQueue;
import org.apache.hama.bsp.message.MessageQueue;
@thomasjungblut
thomasjungblut / DiskSerializationBenchmark.java
Created October 18, 2012 14:42
Benchmark of sequential disk datastructures
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;
import org.apache.jdbm.DBMaker;
import com.google.caliper.Param;
import com.google.caliper.Runner;
@thomasjungblut
thomasjungblut / DiskSerializationProfile.java
Created October 19, 2012 09:39
Profiling helper for disk serialization
import java.text.NumberFormat;
import org.apache.hadoop.io.IntWritable;
import de.jungblut.datastructure.DiskList;
import de.jungblut.datastructure.PrefetchCache;
public class DiskSerializationProfile {
static final NumberFormat NUMBER_FORMAT = NumberFormat.getNumberInstance();
@thomasjungblut
thomasjungblut / gist:4134189
Created November 23, 2012 05:58
Bacon Generator
package de.jungblut.bacon;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashSet;
@thomasjungblut
thomasjungblut / gist:4495808
Last active June 21, 2017 01:32
serialize bloom filters
package de.jungblut.benchmark;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnels;
@thomasjungblut
thomasjungblut / gist:4639695
Created January 26, 2013 02:20
mat mult benchmark
package de.jungblut.benchmark;
import java.util.Random;
import com.google.caliper.Param;
import com.google.caliper.Runner;
import com.google.caliper.SimpleBenchmark;
public class MatMultBenchmark extends SimpleBenchmark {
@thomasjungblut
thomasjungblut / gist:5048046
Last active December 14, 2015 07:08
On-disk MergeSort Numbers and code
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.hadoop.io.IntWritable;
@thomasjungblut
thomasjungblut / gist:5146284
Last active November 6, 2016 10:39
Image Segmentation using mean shift clustering
package de.jungblut.ml;
import java.awt.Color;
import java.awt.FlowLayout;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@thomasjungblut
thomasjungblut / gist:5318761
Created April 5, 2013 11:58
simple pos tagger using HMM with ~ 91.82% accuracy with a small trainingset of 70k words and 10k test words.
package de.jungblut.ml;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
@thomasjungblut
thomasjungblut / gist:5390600
Last active December 16, 2015 06:19
Inverted Index in less than 50 lines of code (and I was verbose!)
package de.jungblut.index
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.MultiMap
import scala.collection.mutable.HashMap
import scala.collection.mutable.Set
final class Index[T](nGramSize: Int) {
private val index = new HashMap[String, Set[T]] with MultiMap[String, T]