Skip to content

Instantly share code, notes, and snippets.

@MasseGuillaume
Created October 4, 2014 23:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MasseGuillaume/3b5ac9f720b0586a6f82 to your computer and use it in GitHub Desktop.
Save MasseGuillaume/3b5ac9f720b0586a6f82 to your computer and use it in GitHub Desktop.
MillionDollarHomepage.scala
2014-10-04 22:40:19.717UTC ERROR[scalakata-playground-akka.actor.default-dispatcher-14] a.i.SelectionHandler - Error during selector management task: [java.nio.channels.ClosedChannelException]
java.nio.channels.ClosedChannelException: null
at java.nio.channels.spi.AbstractSelectableChannel.register(AbstractSelectableChannel.java:197) ~[na:1.8.0_20]
at akka.io.SelectionHandler$ChannelRegistryImpl$$anon$4.tryRun(SelectionHandler.scala:157) ~[akka-actor_2.11-2.3.6.jar:na]
at akka.io.SelectionHandler$ChannelRegistryImpl$Task.run(SelectionHandler.scala:215) ~[akka-actor_2.11-2.3.6.jar:na]
at akka.util.SerializedSuspendableExecutionContext.run$1(SerializedSuspendableExecutionContext.scala:68) ~[akka-actor_2.11-2.3.6.jar:na]
at akka.util.SerializedSuspendableExecutionContext.run(SerializedSuspendableExecutionContext.scala:72) ~[akka-actor_2.11-2.3.6.jar:na]
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:41) ~[akka-actor_2.11-2.3.6.jar:na]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_20]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[na:1.8.0_20]
at java.lang.Thread.run(Thread.java:745) ~[na:1.8.0_20]
/***
scalaVersion := "2.11.2"
resolvers ++= Seq(
"Sonatype" at "https://oss.sonatype.org/content/repositories/releases",
"spray repo" at "http://repo.spray.io"
)
libraryDependencies ++= Seq(
"io.spray" %% "spray-client" % "1.3.2-20140909",
"com.typesafe.akka" %% "akka-slf4j" % "2.3.6",
"ch.qos.logback" % "logback-classic" % "1.0.0" % "runtime",
"com.typesafe.akka" %% "akka-actor" % "2.3.6",
"nu.validator.htmlparser" % "htmlparser" % "1.4"
)
***/
object MillionDollarHomepageMain extends App {
def parse(html: Array[Byte]): Option[scala.xml.Node] = {
import nu.validator.htmlparser._
import sax.HtmlParser
import scala.xml.parsing.NoBindingFactoryAdapter
val hp = new HtmlParser()
val saxer = new NoBindingFactoryAdapter
hp.setContentHandler(saxer)
val stream = new java.io.ByteArrayInputStream(html)
val source = new org.xml.sax.InputSource(stream)
hp.parse(source)
stream.close()
Option(saxer.rootElem)
}
import scala.concurrent.{Future, Await}
import akka.actor.ActorSystem
import akka.pattern.ask
import akka.io.IO
import akka.util.Timeout
import scala.concurrent.duration._
import spray.http._
import spray.client.pipelining._
import spray.can.Http
import spray.can.Http.ConnectionAttemptFailedException
import com.typesafe.config.{ ConfigValueFactory, ConfigFactory, Config }
val config: Config = ConfigFactory.parseString("""
akka {
loggers = ["akka.event.slf4j.Slf4jLogger"]
loglevel = DEBUG
}
spray.can.host-connector.max-retries = 1
""")
implicit val system = ActorSystem("scalakata-playground", config)
import system.dispatcher // execution context for futures
implicit val timeout = Timeout(6000 seconds)
val pipeline: HttpRequest => Future[HttpResponse] = sendReceive
val response = pipeline(Get("http://www.milliondollarhomepage.com/"))
def html = Await.result(response, timeout.duration) match {
case HttpResponse(StatusCodes.OK, entity: HttpEntity.NonEmpty, _,_) =>
parse(entity.data.toByteArray)
case _ => None
}
def tryo[T](body: () => T): Either[Throwable, T] = {
try { Right(body()) } catch{
case scala.util.control.NonFatal(e) => Left(e)
}
}
def siteLists = html.map{ x =>
val areaMap = (x \\ "map" filter (h=>(h \ "@id" toString) == "Map"))
(areaMap \ "area").map{ a =>
val coords = a \@ "coords"
val parsedCoords = coords.split(",") match {
case Array(left, top, right, bottom) =>
tryo{() => (left.toInt, top.toInt, right.toInt, bottom.toInt)}
case _ => Left(coords)
}
val href = a \@ "href"
val uri = tryo{() => new java.net.URI(href)}
(parsedCoords, uri)
}
}
def sites = siteLists.map { sl =>
val hostValues = sl.collect{
case (Right((left, top, right, bottom)), Right(uri)) if uri.getHost != null =>
(uri, uri.getHost, (bottom - top) * (right - left))
}
hostValues.foldLeft(Map.empty[String, (Set[java.net.URI], Int)]){ case (acc, (uri, host, v)) =>
acc.updated(host,
acc.get(host).
map{ case(uris, total) => (uris + uri, total + v)}.
getOrElse((Set(uri), v))
)
}
}
def hostChecks = Future.traverse{
for {
(host, (uris, total)) <- sites.get.to[List]
uri = uris.head
} yield ((host, total), pipeline(Get(s"${uri.getScheme}://${uri.getHost}")))
} {
case ((host, total), r) =>
val isAlive =
r.map{
case HttpResponse(StatusCodes.OK, _, _, _) => true
case _ => false
}.recover {
case _: ConnectionAttemptFailedException => false
}
isAlive.map(a => (host, total, a))
}
hostChecks.map{ hc =>
val (alives, deads) = hc.partition(_._3)
def totalCost(sites: List[(String, Int, Boolean)]) = sites.map(_._2).sum
val tca = totalCost(alives)
val tcd = totalCost(deads)
def printToFile(f: java.io.File)(op: java.io.PrintWriter => Unit) {
val p = new java.io.PrintWriter(f)
try { op(p) } finally { p.close() }
}
import java.io._
printToFile(new File("results.txt"))(p => {
p.println(alives)
p.println(deads)
p.println(tca)
p.println(tcd)
})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment