Sam Bessalah samklr

## Kmeans.scala
class Point(dx: Double, dy: Double) {
  val x: Double = dx
  val y: Double = dy

  override def toString(): String = {
    "(" + x + ", " + y + ")"
  }

  def dist(p: Point): Double = {
    return x * p.x + y * p.y

## PortfolioSelection.scala
import com.twitter.scalding._

import cern.colt.matrix.{DoubleFactory2D, DoubleFactory1D }
import cern.colt.matrix.linalg.Algebra
import java.util.StringTokenizer

class Portfolios(args : Args) extends Job(args) {

val cash = 1000.0 // money at hand
val error = 1 // its ok if we cannot invest the last dollar

## MRCountWord.java
public class CountWordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    @Override
    public void map(LongWritable key, Text value, Context context) throws
                     IOException, InterruptedException {

        String line = value.toString();
        StringTokenizer itr = new StringTokenizer(line);

        while (itr.hasMoreTokens()) {

## WordCount.scala
class WordCountJob(args : Args) extends Job(args) {
  TextLine( args("input") )
    .flatMap('line -> 'word) { line : String => tokenize(line) }
    .groupBy('word) { _.size }
    .write( Tsv( args("output") ) )
}
                  BECOMES

 class WordCountJob(args: Args) extends Job(args) {
   val lines : TypedPipe[String] = TextLine(args("input"))

## stokk.scala
class StocksDividendsJoin4(args : Args) extends Job(args) {
  val stockSchema = ('symd, 'price_open, 'price_high, 'price_low, 'price_close, 'volume, 'price_adj_close)
  val dividendsSchema = ('dymd, 'dividend)
  val stocksPipe = new Csv(args("stocks"), stockSchema).read.project('symd, 'price_close)
  val dividendsPipe = new Csv(args("dividends"), dividendsSchema)  .read

  /*
* Inner join! Use the "tiny" variant that attempts to replicate the dividend table
* to all nodes for faster joining.
* Then suppress the extra ymd and specify the remaining fields in the desired order.

## stocksDividend.scala
class StocksDividendsRevisited8(args : Args) extends Job(args) {

  val stocksSchema =
    ('symd, 'price_open, 'price_high, 'price_low, 'price_close, 'volume, 'price_adj_close)
  val dividendsSchema = ('dymd, 'dividend)

  def startPipe(rootPath: String, symbol: String, beforeSchema: Fields, afterSchema: Fields) =
    new Csv(rootPath+"/"+symbol+".csv", beforeSchema)
      .read
      .write(Tsv("output/dump1"))

## scrunch.scala

object WordCount extends PipelineApp {

  def ScrunchWordCount(file: String) = {
    read(from.textFile(file))
      .flatMap(_.split("\\W+")
      .filter(!_.isEmpty()))
      .count
  }

## Dojo-ml.fs
// This F# dojo is directly inspired by the Digit Recognizer
// competition from Kaggle.com:
// http://www.kaggle.com/c/digit-recognizer
// The datasets below are simply shorter versions of
// the training dataset from Kaggle.

// 0. Load data files from the following location:

// http://brandewinder.blob.core.windows.net/public/digitssample.csv
// http://brandewinder.blob.core.windows.net/public/digitscheck.csv

## exercises.scala
This is how we create an inclusive/exclusive range of Ints:

  val ie = 0 until 500

//1. Fill in the missing item to create a range of Ints from 1 to 100 inclusive

  val ints = 1 ??? 100

//2. Find the sum of the integers in this range

## lens-example-tradingday.scala
object TradingDays extends App {

  import scalaz._
  import Scalaz._

  case class Trade(sym: String, trader: String, qty: Int)

  case class TradingDay(symbols: Map[String, SymDay] = Map.empty)

  object TradingDay {
	class Point(dx: Double, dy: Double) {
	val x: Double = dx
	val y: Double = dy

	override def toString(): String = {
	"(" + x + ", " + y + ")"
	}

	def dist(p: Point): Double = {
	return x * p.x + y * p.y
	import com.twitter.scalding._

	import cern.colt.matrix.{DoubleFactory2D, DoubleFactory1D }
	import cern.colt.matrix.linalg.Algebra
	import java.util.StringTokenizer

	class Portfolios(args : Args) extends Job(args) {

	val cash = 1000.0 // money at hand
	val error = 1 // its ok if we cannot invest the last dollar
	public class CountWordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

	@Override
	public void map(LongWritable key, Text value, Context context) throws
	IOException, InterruptedException {

	String line = value.toString();
	StringTokenizer itr = new StringTokenizer(line);

	while (itr.hasMoreTokens()) {
	class WordCountJob(args : Args) extends Job(args) {
	TextLine( args("input") )
	.flatMap('line -> 'word) { line : String => tokenize(line) }
	.groupBy('word) { _.size }
	.write( Tsv( args("output") ) )
	}
	BECOMES

	class WordCountJob(args: Args) extends Job(args) {
	val lines : TypedPipe[String] = TextLine(args("input"))
	class StocksDividendsJoin4(args : Args) extends Job(args) {
	val stockSchema = ('symd, 'price_open, 'price_high, 'price_low, 'price_close, 'volume, 'price_adj_close)
	val dividendsSchema = ('dymd, 'dividend)
	val stocksPipe = new Csv(args("stocks"), stockSchema).read.project('symd, 'price_close)
	val dividendsPipe = new Csv(args("dividends"), dividendsSchema) .read

	/*
	* Inner join! Use the "tiny" variant that attempts to replicate the dividend table
	* to all nodes for faster joining.
	* Then suppress the extra ymd and specify the remaining fields in the desired order.
	class StocksDividendsRevisited8(args : Args) extends Job(args) {

	val stocksSchema =
	('symd, 'price_open, 'price_high, 'price_low, 'price_close, 'volume, 'price_adj_close)
	val dividendsSchema = ('dymd, 'dividend)

	def startPipe(rootPath: String, symbol: String, beforeSchema: Fields, afterSchema: Fields) =
	new Csv(rootPath+"/"+symbol+".csv", beforeSchema)
	.read
	.write(Tsv("output/dump1"))

	object WordCount extends PipelineApp {

	def ScrunchWordCount(file: String) = {
	read(from.textFile(file))
	.flatMap(_.split("\\W+")
	.filter(!_.isEmpty()))
	.count
	}
	// This F# dojo is directly inspired by the Digit Recognizer
	// competition from Kaggle.com:
	// http://www.kaggle.com/c/digit-recognizer
	// The datasets below are simply shorter versions of
	// the training dataset from Kaggle.

	// 0. Load data files from the following location:

	// http://brandewinder.blob.core.windows.net/public/digitssample.csv
	// http://brandewinder.blob.core.windows.net/public/digitscheck.csv
	This is how we create an inclusive/exclusive range of Ints:

	val ie = 0 until 500

	//1. Fill in the missing item to create a range of Ints from 1 to 100 inclusive

	val ints = 1 ??? 100

	//2. Find the sum of the integers in this range
	object TradingDays extends App {

	import scalaz._
	import Scalaz._

	case class Trade(sym: String, trader: String, qty: Int)

	case class TradingDay(symbols: Map[String, SymDay] = Map.empty)

	object TradingDay {