Skip to content

Instantly share code, notes, and snippets.

def saveHadoopAvro[A: ClassTag](rdd: RDD[A], path: String, schema: Schema): Unit = {
val job: Job = Job.getInstance()
AvroJob.setOutputKeySchema(job, schema)
rdd.map(row => (new AvroKey(row), NullWritable.get()))
.saveAsNewAPIHadoopFile(
path,
classOf[AvroKey[A]],
classOf[NullWritable],
@adamv
adamv / gist:1cba555dea079a754d54
Last active August 29, 2015 14:22
Globbing.findFiles recursively finds files matching a glob pattern.
import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
@adamv
adamv / gist:938a95008d58cb1e048f
Last active August 29, 2015 14:20
Find the first element of a list with the best score
object Finder {
def findMax[T](list: List[T], scoring: (T) => Option[(Int, T)]): Option[T] = {
var best: Option[(Int, T)] = Option.empty
for (t <- list) {
val score = scoring(t)
if (score.isDefined) {
if (best.isDefined) {
if (best.get._1 < score.get._1) best = score
} else {
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import java.util.Iterator;
import java.util.List;
public class RoundRobinIterator<S> extends AbstractIterator<S> {
private final List<Iterator<S>> iters;
private int index = 0;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
/**
* Takes two possibly empty bags, a left bag and a right bag.
* Returns the right bag if it is not empty; otherwise
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.attribute.PosixFilePermission;
import java.util.EnumSet;
import java.util.Set;
public class FilePermissions {
private static final Set<PosixFilePermission> POSIX_755 =
/**
* A Counter can issue sequential ids, and return the largest id issued so far.
*/
public class Counter {
private long nextValue = 0;
/**
* @return the next value from this counter.
*/
public long nextValue() {
/**
* Allows an operation to be retried up to a maximum number of tries.
*/
public class RetryCounter {
/** The maximum number of retries. */
private final int maxAttempts;
/** The number of attempts so far. */
private int attempts;
public class RetryCounter {
private final int maxAttempts;
private int attempts;
private boolean successful;
public RetryCounter(final int maxAttempts) {
this.maxAttempts = maxAttempts;
this.attempts = 0;
this.successful = false;
}
@adamv
adamv / pairedlist.java
Created August 10, 2011 18:40
Is this dumb?
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicReference;
public class PairedList<T> {
private ConcurrentLinkedQueue<T> list1;
private ConcurrentLinkedQueue<T> list2;
private AtomicReference<ConcurrentLinkedQueue<T>> activeList;
public PairedList() {