Skip to content

Instantly share code, notes, and snippets.

@hugoferreira
Last active April 11, 2016 23:47
Show Gist options
  • Save hugoferreira/5736068 to your computer and use it in GitHub Desktop.
Save hugoferreira/5736068 to your computer and use it in GitHub Desktop.
Unix utilities in Scala
import java.io.{BufferedOutputStream, FileOutputStream, FileInputStream, BufferedInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import scala.io.{Source, Codec}
import scala.language.{reflectiveCalls, implicitConversions}
object main extends App {
import utils._
val inFile = "/Users/bytter/Documents/Development/shiftforward/spitz/coopeventsfiltered.log.gz"
gzcat(inFile) | cols("\t", 0) | uniq | print
gzcat(inFile) | cols("\t", 1) | sort | uniq | print
gzcat(inFile) | lc | print
gzcat(inFile) | wc | lc | print
gzcat(inFile) | egrep(".*") | lc | print
gzcat(inFile) | grep("Purchase") | gzip("/tmp/purchases.log.gz")
gzcat(inFile) | cut(0, 15) | head(5) | print
}
object utils {
def lift[A, B](f: PartialFunction[A, B]): (Iterator[A] => Iterator[B]) = _ collect f
implicit def pimpIterator[A](src: Iterator[A]) = new {
def |[U](f: Iterator[A] => U): U = f(src)
}
def cat(fileName: String)(implicit codec: Codec) =
scala.io.Source.fromFile(fileName).getLines()
def gzcat(fileName: String) =
Source.fromInputStream(new GZIPInputStream(new BufferedInputStream(new FileInputStream(fileName)))).getLines()
def gzip(fileName: String)(source: Iterator[String]) {
val file = new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(fileName)))
source foreach { l => file.write((l + '\n').getBytes) }
file.close()
}
def print[A](source: Iterator[A]) { source foreach println }
def lc[A](lines: Iterator[A]) = Seq(lines.size).iterator
def head[A](n: Int)(lines: Iterator[A]) = lines.take(n)
def uniq[A](lines: Iterator[A]) = lines.toStream.distinct.toIterator
def sort(lines: Iterator[String]) = lines.toStream.sorted.toIterator
def strip = lift[String, String] { case x => x.stripMargin }
def cut(s: Int, e: Int) = lift[String, String] { case x => x.substring(s, e) }
def wc = lift[String, Int] { case x => x.split(" ").size }
def egrep(regex: String) = lift[String, String] { case x if x.matches(regex) => x }
def grep(regex: String) = lift[String, String] { case x if x.contains(regex) => x }
def cols(separator: String, cs: Int*) = lift[String, String] { case x =>
val separated = x.split(separator)
cs.map(i => separated(i)).mkString(separator)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment