View CircuitBreaker.scala
import scala.util.{ Failure, Success, Try } | |
// based on http://martinfowler.com/bliki/CircuitBreaker.html | |
object CircuitBreaker { | |
def apply[T](_timeout: Long, _threshold: Int)(work: () => T): CircuitBreaker[T] = | |
new FailureThresholdCircuitBreaker[T](work) { | |
override protected def timeout = _timeout | |
override protected def threshold = _threshold | |
} |
View HdfsUtil.scala
import org.apache.hadoop.hdfs.DFSUtil | |
import org.apache.hadoop.mapred.JobConf | |
import org.slf4j.LoggerFactory | |
import java.net.{ HttpURLConnection, InetSocketAddress, URL } | |
import scala.collection.JavaConverters._ | |
import scala.util.Try | |
/** | |
* Detects the currently active namenode and returns its webhdfs url for the specified path. |
View Unshuffle.scala
// Deal a deck of cards into three piles. | |
def unshuffle(cards: List[Int]): (List[Int], List[Int], List[Int]) = cards.length match { | |
case 0 => (Nil, Nil, Nil) | |
case 1 => (cards, Nil, Nil) | |
case length => | |
val (left, right) = cards.splitAt(length / 2) | |
val ((a, b, c), (d, e, f)) = (unshuffle(left), unshuffle(right)) | |
if (c.length == a.length) (a ::: d, b ::: e, c ::: f) | |
else if (b.length == a.length) (a ::: e, b ::: f, c ::: d) |
View OrderedListMonoid.scala
object OrderedListMonoid { | |
def zero: List[Int] = List[Int]() | |
def plus(left: List[Int], right: List[Int]): List[Int] = | |
(left, right) match { | |
case (Nil, r) => r | |
case (l, Nil) => l | |
case (lhead :: ltail, rhead :: rtail) => | |
if (lhead <= rhead) lhead :: plus(ltail, right) | |
else rhead :: plus(left, rtail) | |
} |
View Inversions.scala
// I commonly ask / get asked this question in interviews, | |
// but no candidate has ever used Scala so I thought I'd give it a try. | |
def inv(list : List[Int]) : Int = doInv(list)._1 | |
def doInv(list : List[Int]) : (Int, List[Int]) = | |
if (list.length <= 1) { | |
(0, list) | |
} else { | |
val (left, right) = list.splitAt(list.length / 2) |
View MultipleTsvFilesWithDiscriminator.scala
// One of my Scalding jobs runs into this cascading issue | |
// when there are a large number of input files/steps involved: | |
// https://groups.google.com/forum/#!searchin/cascading-user/hadoopplanner/cascading-user/R0FMbAOCgt0/BraEMBJ0Xi0J | |
// A workaround for this is to reduce the total number of steps | |
// by read multiple files together in a single flow. | |
// This can be done using Scalding's MultipleTsvFiles source. | |
// However, I need to insert some per-file discriminator fields | |
// to uniquely identify which file each tuple comes from. |
View PagingTakeWhile.scala
val responses = MutableList[Response]() | |
Stream.from(0, PAGE_SIZE).takeWhile { case offset => | |
request.setOffset(offset) | |
val response = Await.result(client.execute(request)) | |
response.getResponseCode match { | |
case ResponseCode.OK => responses += (response); true | |
case ResponseCode.END => false |
View paramiko-sftp.py
import paramiko | |
import socket | |
import socks | |
# set up SOCKS proxy | |
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, proxy_details['host'], | |
proxy_details['port'], True, proxy_details['username'], | |
proxy_details['password']) | |
socket.socket = socks.socksocket |