Ruban Monu rubanm

## CircuitBreaker.scala
import scala.util.{ Failure, Success, Try }

// based on http://martinfowler.com/bliki/CircuitBreaker.html

object CircuitBreaker {
  def apply[T](_timeout: Long, _threshold: Int)(work: () => T): CircuitBreaker[T] =
    new FailureThresholdCircuitBreaker[T](work) {
      override protected def timeout = _timeout
      override protected def threshold = _threshold
    }

## HdfsUtil.scala
import org.apache.hadoop.hdfs.DFSUtil
import org.apache.hadoop.mapred.JobConf
import org.slf4j.LoggerFactory

import java.net.{ HttpURLConnection, InetSocketAddress, URL }
import scala.collection.JavaConverters._
import scala.util.Try

/**
 * Detects the currently active namenode and returns its webhdfs url for the specified path.

## Unshuffle.scala
// Deal a deck of cards into three piles.

def unshuffle(cards: List[Int]): (List[Int], List[Int], List[Int]) = cards.length match {
  case 0 => (Nil, Nil, Nil)
  case 1 => (cards, Nil, Nil)
  case length =>
    val (left, right) = cards.splitAt(length / 2)
    val ((a, b, c), (d, e, f)) = (unshuffle(left), unshuffle(right))
    if (c.length == a.length) (a ::: d, b ::: e, c ::: f)
    else if (b.length == a.length) (a ::: e, b ::: f, c ::: d)

## OrderedListMonoid.scala
object OrderedListMonoid {
  def zero: List[Int] = List[Int]()
  def plus(left: List[Int], right: List[Int]): List[Int] =
  (left, right) match {
    case (Nil, r) => r
    case (l, Nil) => l
    case (lhead :: ltail, rhead :: rtail) =>
      if (lhead <= rhead) lhead :: plus(ltail, right)
      else rhead :: plus(left, rtail)
  }

## Inversions.scala
// I commonly ask / get asked this question in interviews,
// but no candidate has ever used Scala so I thought I'd give it a try.

def inv(list : List[Int]) : Int = doInv(list)._1

def doInv(list : List[Int]) : (Int, List[Int]) =
  if (list.length <= 1) {
    (0, list)
  } else {
    val (left, right) = list.splitAt(list.length / 2)

## MultipleTsvFilesWithDiscriminator.scala
// One of my Scalding jobs runs into this cascading issue
// when there are a large number of input files/steps involved:
// https://groups.google.com/forum/#!searchin/cascading-user/hadoopplanner/cascading-user/R0FMbAOCgt0/BraEMBJ0Xi0J

// A workaround for this is to reduce the total number of steps
// by read multiple files together in a single flow.
// This can be done using Scalding's MultipleTsvFiles source.
// However, I need to insert some per-file discriminator fields
// to uniquely identify which file each tuple comes from.

## PagingTakeWhile.scala
val responses = MutableList[Response]()

Stream.from(0, PAGE_SIZE).takeWhile { case offset =>

  request.setOffset(offset)
  val response = Await.result(client.execute(request))

  response.getResponseCode match {
    case ResponseCode.OK => responses += (response); true
    case ResponseCode.END => false

## paramiko-sftp.py
import paramiko
import socket
import socks

# set up SOCKS proxy
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, proxy_details['host'],
    proxy_details['port'], True, proxy_details['username'],
    proxy_details['password'])
socket.socket = socks.socksocket
	import scala.util.{ Failure, Success, Try }

	// based on http://martinfowler.com/bliki/CircuitBreaker.html

	object CircuitBreaker {
	def apply[T](_timeout: Long, _threshold: Int)(work: () => T): CircuitBreaker[T] =
	new FailureThresholdCircuitBreaker[T](work) {
	override protected def timeout = _timeout
	override protected def threshold = _threshold
	}
	import org.apache.hadoop.hdfs.DFSUtil
	import org.apache.hadoop.mapred.JobConf
	import org.slf4j.LoggerFactory

	import java.net.{ HttpURLConnection, InetSocketAddress, URL }
	import scala.collection.JavaConverters._
	import scala.util.Try

	/**
	* Detects the currently active namenode and returns its webhdfs url for the specified path.
	// Deal a deck of cards into three piles.

	def unshuffle(cards: List[Int]): (List[Int], List[Int], List[Int]) = cards.length match {
	case 0 => (Nil, Nil, Nil)
	case 1 => (cards, Nil, Nil)
	case length =>
	val (left, right) = cards.splitAt(length / 2)
	val ((a, b, c), (d, e, f)) = (unshuffle(left), unshuffle(right))
	if (c.length == a.length) (a ::: d, b ::: e, c ::: f)
	else if (b.length == a.length) (a ::: e, b ::: f, c ::: d)
	object OrderedListMonoid {
	def zero: List[Int] = List[Int]()
	def plus(left: List[Int], right: List[Int]): List[Int] =
	(left, right) match {
	case (Nil, r) => r
	case (l, Nil) => l
	case (lhead :: ltail, rhead :: rtail) =>
	if (lhead <= rhead) lhead :: plus(ltail, right)
	else rhead :: plus(left, rtail)
	}
	// I commonly ask / get asked this question in interviews,
	// but no candidate has ever used Scala so I thought I'd give it a try.

	def inv(list : List[Int]) : Int = doInv(list)._1

	def doInv(list : List[Int]) : (Int, List[Int]) =
	if (list.length <= 1) {
	(0, list)
	} else {
	val (left, right) = list.splitAt(list.length / 2)
	// One of my Scalding jobs runs into this cascading issue
	// when there are a large number of input files/steps involved:
	// https://groups.google.com/forum/#!searchin/cascading-user/hadoopplanner/cascading-user/R0FMbAOCgt0/BraEMBJ0Xi0J

	// A workaround for this is to reduce the total number of steps
	// by read multiple files together in a single flow.
	// This can be done using Scalding's MultipleTsvFiles source.
	// However, I need to insert some per-file discriminator fields
	// to uniquely identify which file each tuple comes from.
	val responses = MutableList[Response]()

	Stream.from(0, PAGE_SIZE).takeWhile { case offset =>

	request.setOffset(offset)
	val response = Await.result(client.execute(request))

	response.getResponseCode match {
	case ResponseCode.OK => responses += (response); true
	case ResponseCode.END => false
	import paramiko
	import socket
	import socks

	# set up SOCKS proxy
	socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, proxy_details['host'],
	proxy_details['port'], True, proxy_details['username'],
	proxy_details['password'])
	socket.socket = socks.socksocket