Skip to content

Instantly share code, notes, and snippets.

@cdecl
Last active December 11, 2015 09:34
Show Gist options
  • Save cdecl/3ede1c39edff34d3e37b to your computer and use it in GitHub Desktop.
Save cdecl/3ede1c39edff34d3e37b to your computer and use it in GitHub Desktop.
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.log4j.Logger
import org.apache.log4j.Level
object App {
def main(args: Array[String]) {
Logger.getLogger("org").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
val sc = new SparkContext("local[*]", "MyApp")
RDDRun(sc)
sc.stop()
}
def RDDRun(sc: SparkContext) {
val logfile = "iis.log"
val logRdd = sc.textFile(logfile)
val rddR = logRdd.filter(_.indexOf("#") == -1)
.map(_.split(" "))
.map(x=> (x(10), 1))
.reduceByKey(_+_)
.sortBy(_._2, false)
for (t <- rddR.collect()) {
println("%s : %s".format(t._2, t._1))
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment