Skip to content

Instantly share code, notes, and snippets.

View MishaelRosenthal's full-sized avatar

Mishael Rosenthal MishaelRosenthal

View GitHub Profile
package com.twitter.mrosenthal.timelines.adhoc.dataset_transform
import scala.collection.{GenTraversableOnce, SeqLike}
import scala.math.Ordering
object IsSortedExample extends App {
implicit class RichGenTraversableOnce[A](val trav: GenTraversableOnce[A]) extends AnyVal {
/**
* O(1) space, O(n) time. Stops as soon as it finds an unsorted pair.
package com.twitter.mrosenthal.timelines.adhoc.dataset_transform
import scala.util.Random
object OrderingCTRProbabilities extends App {
val rand = new Random(17)
def insertionSort[T](seq: Seq[T])(insertLocationFunc: (T, Seq[T]) => Int)(implicit ord: Ordering[T]): Seq[T] = {
def insert(seq: Seq[T], location: Int, elem: T): Seq[T] = {
package com.twitter.timelines.data_processing.ad_hoc.feature_selection.utils
import scala.annotation.tailrec
import scala.collection.GenTraversableOnce
/**
* Almost optimal Disjoint-set data structure:
* https://en.wikipedia.org/wiki/Disjoint-set_data_structure
*/
object DisjointSet {
included:
promoterDiscovery.py - trains a classifier, and predicts if a DNA sequence is a promoter or enhancer
testPromoterDiscovery - includes unit tests for the promoterDiscovery class
this README
usage:
promoterDiscovery.py [-h] [-c] [-p SEQUENCE] [-f FILE] [-t FILE]\n"
-h show this help message
-c run 10 fold cross validation
-t FILE train model and save into FILE
@MishaelRosenthal
MishaelRosenthal / LockPool.scala
Last active November 24, 2015 12:16
Provides a lock per key, and enables sync execution per key.
package core
import org.slf4j.LoggerFactory
import scala.collection.mutable
class LockPool{
type Lock = AnyRef
case class LockAndCounter(lock: AnyRef, counter: Int)
val logger = LoggerFactory.getLogger(getClass)
@MishaelRosenthal
MishaelRosenthal / FetchLockFetch.scala
Last active November 23, 2015 12:39
Ferforms the first fetch. If it results in None, locks, performs the second fetch, and releases
package core
object FetchLockFetch {
/**
* Try to fetch using inexpensiveFetch, obtain lock, try again (using inexpensiveFetch),
* fall back to more expensive expensiveFetch.
*/
def fetchLockFetch[T](inexpensiveFetch: => Option[T], lockFunc: => ((=> Option[T]) => Option[T]),
expensiveFetch: => Option[T]): Option[T] =
inexpensiveFetch.orElse{
@MishaelRosenthal
MishaelRosenthal / BlockingExecutor.scala
Last active May 18, 2016 20:58
Throttles the executions
package core
import java.util.concurrent.ArrayBlockingQueue
class BlockingExecutor(maxParallelism: Int) extends SBLogger{
private case object Execution
private val queue = new ArrayBlockingQueue[Execution.type](maxParallelism)
def execute[T](execution: => T): T = {
@MishaelRosenthal
MishaelRosenthal / AutoRefreshingWrapper.scala
Last active November 23, 2015 13:17
A wrapper that refreshes the inner every refresfRate
package core
import org.joda.time.DateTime
import scala.concurrent.duration.Duration
import scala.util.{Failure, Success, Try}
class AutoRefreshingWrapper[T](refreshFunc: => Try[T], refreshRate: Duration) extends SBLogger {
@volatile
package core.common.datastructures
import scala.collection.generic.{GenericCompanion, CanBuildFrom}
import scala.collection.{SeqLike, mutable}
import scala.collection.mutable.ListBuffer
final class LimitedParSeq[T] private (internal: Seq[T], parLimit: Int) extends Seq[T] with SeqLike[T,LimitedParSeq[T]] {
override def length: Int = internal.length
@MishaelRosenthal
MishaelRosenthal / GroupByKeySmallNumberOfGroups.scala
Last active May 8, 2020 09:59
RDD group by small number of groups
package core.sparkTest.utils
import java.io._
import java.nio.file.Files
import core.Pimps._
import org.apache.hadoop.io.compress.CompressionCodec
import org.apache.hadoop.io.{BytesWritable, NullWritable}
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat