Skip to content

Instantly share code, notes, and snippets.

object FizzBuzz extends App {
val nones = Stream.continually(None)
val fizzes: Stream[Option[String]] = nones.take(2) ++ Some("Fizz") #:: fizzes
val buzzes: Stream[Option[String]] = nones.take(4) ++ Some("Buzz") #:: buzzes
for (((fizz, buzz), n) <- fizzes zip buzzes zip (1 to 100)) {
println(fizz.map(_ + buzz.getOrElse("")).orElse(buzz).getOrElse(n))
}
@samklr
samklr / DotProduct.scala
Created August 30, 2013 21:21
DotProduct matrix in scala and on spark
def dotProduct(vector: Array[Int], matrix: Array[Array[Int]]): Array[Int] = {
// ignore dimensionality checks for simplicity of example
(0 to (matrix(0).size - 1)).toArray.map( colIdx => {
val colVec: Array[Int] = matrix.map( rowVec => rowVec(colIdx) )
val elemWiseProd: Array[Int] = (vector zip colVec).map( entryTuple => entryTuple._1 * entryTuple._2 )
elemWiseProd.sum
} )
}
@samklr
samklr / mli.scala
Created September 30, 2013 12:01
Bag Of Words
import mli.interface._
def main(args : Array[String]){
val mc = new MLContext(sc)
val inputTable = mc.loadFile("/enwiki_txt").filter(r => List("ARTS","LIFE") contains r(0).toString).cache()
val firstFive = inputTable.take(5)
val taggedInputTable = inputTable.project(Seq(0,2))
.map(r => {
val label = if(r(0).toString == "ARTS") 1.0 else -1.0
MLRow(label, r(1))
@samklr
samklr / load.scala
Last active December 24, 2015 13:59
MLBASE
val inputTable = mc.loadFile("/enwiki_txt")
.filter(r => List("ARTS","LIFE")
.contains(r(0).toString)
.cache()
val firstFive = inputTable.take(5)
val taggedInputTable = inputTable.project(Seq(0,2))
.map(r => {
val label = if(r(0).toString == "ARTS") 1.0 else -1.0
import mli.feat._
// c is the column on which we want to perform N-gram extraction
// n is the N-gram length, e.g., n=2 corresponds to bigrams
// k is the number of top N-grams we want to use (sorted by N-gram frequency)
val (featurizedData, ngfeaturizer) = NGrams.extractNGrams(taggedInputTable, c=1, n=2, k=1000,
stopWords = NGrams.stopWords)
val (scaledData, featurizer) = Scale.scale(
featurizedData.filter(_.nonZeros.length > 5).cache(),
@samklr
samklr / Train.scala
Created October 3, 2013 12:43
Train
import mli.ml.classification._
val model = SVMAlgorithm.train(trainData, SVMParameters(learningRate=10.0,regParam=1.0, maxIterations=50))
//Prediction + Model assessment
val firstDataPoint = trainData.take(1)(0)
model.predict(firstDataPoint.tail)
val trainVsPred = trainData.map(r => MLRow(r(0), model.predict(r.tail)))
val trainError = trainVsPred.filter(r => r(0) != r(1)).numRows.toDouble/trainData.numRows
def evalModel(model: SVMModel, testData: MLTable) = {
val trainData = model.trainingData
val trainVsPred = trainData.map(r => MLRow(r(0), model.predict(r.tail)))
val trainErr = trainVsPred.filter(r => r(0).toNumber != r(1).toNumber).numRows.toDouble / trainData.numRows
val testVsPred = testData.map(r => MLRow(r(0), model.predict(r.tail)))
val testErr = testVsPred.filter(r => r(0).toNumber != r(1).toNumber).numRows.toDouble / testData.numRows
(trainErr, testErr)
}
@samklr
samklr / MR.java
Created October 23, 2013 09:47
Map Reduce Bingo
public class WordCount extends Configured implements Tool {
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private HashMap<String,Integer> buffer;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
buffer = new HashMap<String, Integer>();
}
@samklr
samklr / KmeasnStreamScalding.scala
Created October 24, 2013 13:05
Streaming Kmeans Mahout Scalding
lazy val clust = new StreamingKMeans(new FastProjectionSearch(new
EuclideanDistanceMeasure,5,10),
args("sloppyclusters").toInt, (10e-6).asInstanceOf[Float])
var count = 0;
val sloppyClusters =
TextLine(args("input"))
.map{ str =>
val vec = str.split("\t").map(_.toDouble)
val cent = new Centroid(count, new DenseVector(vec))
count += 1
package nodescala
import com.sun.net.httpserver._
import scala.concurrent._
import scala.concurrent.duration._
import ExecutionContext.Implicits.global
import scala.async.Async.{async, await}
import scala.collection._
import scala.collection.JavaConversions._
import java.util.concurrent.{Executor, ThreadPoolExecutor, TimeUnit, LinkedBlockingQueue}