Skip to content

Instantly share code, notes, and snippets.

@dfilimon
dfilimon / VectorBinaryAssign.java
Created April 23, 2013 15:22
VectorBinaryAssign
package org.apache.mahout.math;
import com.google.common.base.Preconditions;
import org.apache.mahout.math.function.DoubleDoubleFunction;
import org.apache.mahout.math.set.OpenIntHashSet;
import java.util.Iterator;
public abstract class VectorBinaryAssign {
private static final VectorBinaryAssign operations[] = new VectorBinaryAssign[] {
@dfilimon
dfilimon / TestSequentialAccessSparseVector.java
Created April 12, 2013 16:26
Vector iteration Mahout.
@Test
public void testVectorIteration() {
Vector vector = new SequentialAccessSparseVector(100);
vector.set(0, 1);
vector.set(2, 2);
vector.set(4, 3);
vector.set(6, 4);
Iterator<Vector.Element> vectorIterator = vector.iterateNonZero();
Vector.Element element = null;
int i = 0;
@dfilimon
dfilimon / BallKMeans.java
Created March 29, 2013 11:07
ConcurrentModificationException from Multinomial
// Re-weight everything according to the minimum distance to a seed.
for (int currSeedIndex : seedSelector) {
WeightedVector curr = datapoints.get(currSeedIndex);
double newWeight = nextSeed.getWeight() * distanceMeasure.distance(nextSeed, curr);
if (newWeight < seedSelector.getWeight(currSeedIndex)) {
seedSelector.set(currSeedIndex, newWeight);
}
}
@dfilimon
dfilimon / numprimes.R
Created February 3, 2013 22:52
Number of primes less or equal to n vs n
library('gmp')
primes <- data.frame(1:1000000)
names(primes) <- 'nums'
primes$isprime <- isprime(primes$nums)
pn <- primes[primes$isprime == 2, 'nums']
e <- ecdf(pn)
plot(e, xlab='n', ylab='Number of primes less or equal to n')
@dfilimon
dfilimon / SyntaxTreeNodeVisitor.java
Created January 26, 2013 14:48
Visitor pattern with reflection
package syntax;
import syntax.tree.SyntaxTreeNode;
import syntax.tree.arithmetic.*;
import syntax.tree.comparison.EqualityNode;
import syntax.tree.comparison.IsVoidNode;
import syntax.tree.comparison.LessOrEqualThanNode;
import syntax.tree.comparison.LessThanNode;
import syntax.tree.conditional.CaseNode;
import syntax.tree.conditional.ConditionNode;
public static void trainActual(Iterable<Pair<Text, VectorWritable>> inputIterable, String outBase,
Map<String, Integer> clusterNamesToIds) throws IOException {
Map<String, Centroid> actualClusters = Maps.newHashMap();
computeActualClusters(inputIterable, actualClusters);
OnlineLogisticRegression learningAlgorithm =
new OnlineLogisticRegression(NUM_CLASSES, NUM_FEATURES_ACTUAL, new L1());
for (Pair<Text, VectorWritable> pair : inputIterable) {
Vector actualCentroid = pair.getSecond().get();
@dfilimon
dfilimon / DelegatingVector.java
Created January 2, 2013 21:43
DelegatingVector's clone()
@Override
public Vector clone() {
DelegatingVector r;
try {
r = (DelegatingVector) super.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException("Clone not supported for DelegatingVector, shouldn't be possible");
}
// delegate points to original without this
r.delegate = delegate.clone();
@dfilimon
dfilimon / StreamingKMeansTest.java
Created January 2, 2013 18:13
Logback output to stream.
ByteArrayOutputStream bs = new ByteArrayOutputStream(2048);
LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory();
lc.reset();
lc.setName("context");
lc.start();
PatternLayoutEncoder encoder = new PatternLayoutEncoder();
encoder.setContext(lc);
encoder.setPattern("%message\n");
@dfilimon
dfilimon / Seqfile3DProjector.java
Created December 27, 2012 19:28
Projecting vectors
// Project the vector.
Vector vectorValue = value.get().clone();
Vector projectedVector = new DenseVector(3);
for (int i = 0; i < 3; ++i) {
projectedVector.set(i, vectorValue.dot(basisVectors.get(i)));
}
projectedVector = projectedVector.normalize();
writer.printf("%f %f %f\n", projectedVector.get(0), projectedVector.get(1),
projectedVector.get(2));
@dfilimon
dfilimon / plot-streaming-kmeans.r
Created December 6, 2012 22:24
Trying to plot different columns
plot_skm <- function(infile) {
print(infile)
library(package=ggplot2)
skm <- read.table(infile, skip=1)
cur_dev <- dev.cur()
plot_file <- paste(infile, '-numDataPointSeen.png', sep='')