Last active
April 11, 2016 23:47
-
-
Save hugoferreira/5736068 to your computer and use it in GitHub Desktop.
Unix utilities in Scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.{BufferedOutputStream, FileOutputStream, FileInputStream, BufferedInputStream} | |
import java.util.zip.{GZIPOutputStream, GZIPInputStream} | |
import scala.io.{Source, Codec} | |
import scala.language.{reflectiveCalls, implicitConversions} | |
object main extends App { | |
import utils._ | |
val inFile = "/Users/bytter/Documents/Development/shiftforward/spitz/coopeventsfiltered.log.gz" | |
gzcat(inFile) | cols("\t", 0) | uniq | print | |
gzcat(inFile) | cols("\t", 1) | sort | uniq | print | |
gzcat(inFile) | lc | print | |
gzcat(inFile) | wc | lc | print | |
gzcat(inFile) | egrep(".*") | lc | print | |
gzcat(inFile) | grep("Purchase") | gzip("/tmp/purchases.log.gz") | |
gzcat(inFile) | cut(0, 15) | head(5) | print | |
} | |
object utils { | |
def lift[A, B](f: PartialFunction[A, B]): (Iterator[A] => Iterator[B]) = _ collect f | |
implicit def pimpIterator[A](src: Iterator[A]) = new { | |
def |[U](f: Iterator[A] => U): U = f(src) | |
} | |
def cat(fileName: String)(implicit codec: Codec) = | |
scala.io.Source.fromFile(fileName).getLines() | |
def gzcat(fileName: String) = | |
Source.fromInputStream(new GZIPInputStream(new BufferedInputStream(new FileInputStream(fileName)))).getLines() | |
def gzip(fileName: String)(source: Iterator[String]) { | |
val file = new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(fileName))) | |
source foreach { l => file.write((l + '\n').getBytes) } | |
file.close() | |
} | |
def print[A](source: Iterator[A]) { source foreach println } | |
def lc[A](lines: Iterator[A]) = Seq(lines.size).iterator | |
def head[A](n: Int)(lines: Iterator[A]) = lines.take(n) | |
def uniq[A](lines: Iterator[A]) = lines.toStream.distinct.toIterator | |
def sort(lines: Iterator[String]) = lines.toStream.sorted.toIterator | |
def strip = lift[String, String] { case x => x.stripMargin } | |
def cut(s: Int, e: Int) = lift[String, String] { case x => x.substring(s, e) } | |
def wc = lift[String, Int] { case x => x.split(" ").size } | |
def egrep(regex: String) = lift[String, String] { case x if x.matches(regex) => x } | |
def grep(regex: String) = lift[String, String] { case x if x.contains(regex) => x } | |
def cols(separator: String, cs: Int*) = lift[String, String] { case x => | |
val separated = x.split(separator) | |
cs.map(i => separated(i)).mkString(separator) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment