Skip to content

Instantly share code, notes, and snippets.

package com.databricks.spark.jira
import scala.io.Source
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.sources.{TableScan, BaseRelation, RelationProvider}
@darkseed
darkseed / gist:12f58e684768529b72d7d89f0440ea5e
Created February 7, 2017 09:16 — forked from marmbrus/gist:15e72f7bc22337cf6653
Parallel list files on S3 with Spark
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.conf.Configuration
case class S3File(path: String, isDir: Boolean, size: Long) {
def children = listFiles(path)
}
def listFiles(path: String): Seq[S3File] = {
val fs = FileSystem.get(new java.net.URI(path), new Configuration())
fs.listStatus(new Path(path)).map(s => S3File(s.getPath.toString, s.isDir, s.getLen))
@darkseed
darkseed / a3c.py
Created January 24, 2017 10:23 — forked from awjuliani/a3c.py
class AC_Network():
def __init__(self,s_size,a_size,scope,trainer):
with tf.variable_scope(scope):
#Input and visual encoding layers
self.inputs = tf.placeholder(shape=[None,s_size],dtype=tf.float32)
self.imageIn = tf.reshape(self.inputs,shape=[-1,84,84,1])
self.conv1 = slim.conv2d(activation_fn=tf.nn.elu,
inputs=self.imageIn,num_outputs=16,
kernel_size=[8,8],stride=[4,4],padding='VALID')
self.conv2 = slim.conv2d(activation_fn=tf.nn.elu,
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Advanced Functional Programming with Scala - Notes

Copyright © 2017 Fantasyland Institute of Learning. All rights reserved.

1. Mastering Functions

A function is a mapping from one set, called a domain, to another set, called the codomain. A function associates every element in the domain with exactly one element in the codomain. In Scala, both domain and codomain are types.

val square : Int => Int = x => x * x
def sigmoid(x: Double) = 1.0 / (1.0 + math.exp(-x))
def f1measure(TP: Double, TN: Double, FP: Double, FN: Double, alpha: Double = 1) = {
val P = precision(TP, FP)
val R = recall(TP, FN)
(2.0 * P * R) / (P + R)
}
def precision(TP: Double, FP: Double) = TP / (FP + TP)
@darkseed
darkseed / spark_knn_approximation.py
Created November 11, 2016 13:20 — forked from tomron/spark_knn_approximation.py
A naive approximation of k-nn algorithm (k-nearest neighbors) in pyspark. Approximation quality can be controlled by number of repartitions and number of repartition
from __future__ import print_function
import sys
from math import sqrt
import argparse
from collections import defaultdict
from random import randint
from pyspark import SparkContext
@darkseed
darkseed / knapsack_problem.scala
Created July 11, 2016 12:29 — forked from bmarcot/knapsack_problem.scala
The Knapsack Problem, in Scala -- Keywords: dynamic programming, recursion, scala.
def knapsack_aux(x: (Int, Int), is: List[Int]): List[Int] = {
for {
w <- is.zip(is.take(x._1) ::: is.take(is.size - x._1).map(_ + x._2))
} yield math.max(w._1, w._2)
}
def knapsack_rec(xs: List[(Int, Int)], is: List[Int]): List[List[Int]] = {
xs match {
case x :: xs => knapsack_aux(x, is) :: knapsack_rec(xs, knapsack_aux(x, is))
case _ => Nil
@darkseed
darkseed / idea.scala
Created May 27, 2016 11:32 — forked from Pet3ris/idea.scala
Typed type tensors in scala
sealed trait Tensor[V] {
val n, m: Int
def apply(vs: List[V], vds: List[V => Double]): Double
}
case class TUnit[V](v: V) extends Tensor[V] {
val n = 1
val m = 0
def apply(vs: List[V], vds: List[V => Double]): Double = vds head(v)
}
@darkseed
darkseed / BestMatchSearcher.scala
Created April 7, 2016 14:04 — forked from beiske/BestMatchSearcher.scala
Code for getting started with Elasticsearch and Lire
import java.nio.file.Files
import java.nio.file.Paths
import scala.Array.canBuildFrom
import scala.collection.JavaConverters.iterableAsScalaIterableConverter
import scala.concurrent.ExecutionContext.Implicits.global
import org.elasticsearch.client.transport.TransportClient
import org.elasticsearch.common.settings.ImmutableSettings
import org.elasticsearch.common.transport.InetSocketTransportAddress