Skip to content

Instantly share code, notes, and snippets.

/**
* A Counter can issue sequential ids, and return the largest id issued so far.
*/
public class Counter {
private long nextValue = 0;
/**
* @return the next value from this counter.
*/
public long nextValue() {
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.attribute.PosixFilePermission;
import java.util.EnumSet;
import java.util.Set;
public class FilePermissions {
private static final Set<PosixFilePermission> POSIX_755 =
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
/**
* Takes two possibly empty bags, a left bag and a right bag.
* Returns the right bag if it is not empty; otherwise
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import java.util.Iterator;
import java.util.List;
public class RoundRobinIterator<S> extends AbstractIterator<S> {
private final List<Iterator<S>> iters;
private int index = 0;
@adamv
adamv / gist:938a95008d58cb1e048f
Last active August 29, 2015 14:20
Find the first element of a list with the best score
object Finder {
def findMax[T](list: List[T], scoring: (T) => Option[(Int, T)]): Option[T] = {
var best: Option[(Int, T)] = Option.empty
for (t <- list) {
val score = scoring(t)
if (score.isDefined) {
if (best.isDefined) {
if (best.get._1 < score.get._1) best = score
} else {
@adamv
adamv / gist:1cba555dea079a754d54
Last active August 29, 2015 14:22
Globbing.findFiles recursively finds files matching a glob pattern.
import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
def saveHadoopAvro[A: ClassTag](rdd: RDD[A], path: String, schema: Schema): Unit = {
val job: Job = Job.getInstance()
AvroJob.setOutputKeySchema(job, schema)
rdd.map(row => (new AvroKey(row), NullWritable.get()))
.saveAsNewAPIHadoopFile(
path,
classOf[AvroKey[A]],
classOf[NullWritable],
@adamv
adamv / gist:28f275ca7ca87818ef53
Last active August 29, 2015 14:22
JSON string -> ? -> Smile
import com.fasterxml.jackson.core.JsonFactory
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.databind.util.TokenBuffer
import com.fasterxml.jackson.dataformat.smile.SmileFactory
object SmileDemo {
val JSON_FACTORY: JsonFactory = new JsonFactory()
val SMILE_FACTORY: SmileFactory = new SmileFactory()
val JSON = new ObjectMapper(JSON_FACTORY)
@adamv
adamv / gist:3f67a16a605e11627bed
Created June 5, 2015 17:57
Create and use a temporary file.
#!/usr/bin/env python3.4
import tempfile
from subprocess import call
with tempfile.NamedTemporaryFile() as f:
for i in range(500):
f.write(bytes("abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij\n", 'UTF-8'))
f.flush()
call(["cat", f.name])
// Subclass TextOutputFormat so we can use a base ".json" extension.
// adamv: I feel as if this shouldn't be this complicated.
static class JsonTextOutputFormat extends TextOutputFormat<LongWritable, Text> {
@Override
public RecordWriter<LongWritable, Text>
getRecordWriter(TaskAttemptContext job
) throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
boolean isCompressed = getCompressOutput(job);
String keyValueSeparator = conf.get(SEPERATOR, "\t");