Created
October 4, 2014 23:06
-
-
Save MasseGuillaume/3b5ac9f720b0586a6f82 to your computer and use it in GitHub Desktop.
MillionDollarHomepage.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2014-10-04 22:40:19.717UTC ERROR[scalakata-playground-akka.actor.default-dispatcher-14] a.i.SelectionHandler - Error during selector management task: [java.nio.channels.ClosedChannelException] | |
java.nio.channels.ClosedChannelException: null | |
at java.nio.channels.spi.AbstractSelectableChannel.register(AbstractSelectableChannel.java:197) ~[na:1.8.0_20] | |
at akka.io.SelectionHandler$ChannelRegistryImpl$$anon$4.tryRun(SelectionHandler.scala:157) ~[akka-actor_2.11-2.3.6.jar:na] | |
at akka.io.SelectionHandler$ChannelRegistryImpl$Task.run(SelectionHandler.scala:215) ~[akka-actor_2.11-2.3.6.jar:na] | |
at akka.util.SerializedSuspendableExecutionContext.run$1(SerializedSuspendableExecutionContext.scala:68) ~[akka-actor_2.11-2.3.6.jar:na] | |
at akka.util.SerializedSuspendableExecutionContext.run(SerializedSuspendableExecutionContext.scala:72) ~[akka-actor_2.11-2.3.6.jar:na] | |
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:41) ~[akka-actor_2.11-2.3.6.jar:na] | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_20] | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[na:1.8.0_20] | |
at java.lang.Thread.run(Thread.java:745) ~[na:1.8.0_20] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*** | |
scalaVersion := "2.11.2" | |
resolvers ++= Seq( | |
"Sonatype" at "https://oss.sonatype.org/content/repositories/releases", | |
"spray repo" at "http://repo.spray.io" | |
) | |
libraryDependencies ++= Seq( | |
"io.spray" %% "spray-client" % "1.3.2-20140909", | |
"com.typesafe.akka" %% "akka-slf4j" % "2.3.6", | |
"ch.qos.logback" % "logback-classic" % "1.0.0" % "runtime", | |
"com.typesafe.akka" %% "akka-actor" % "2.3.6", | |
"nu.validator.htmlparser" % "htmlparser" % "1.4" | |
) | |
***/ | |
object MillionDollarHomepageMain extends App { | |
def parse(html: Array[Byte]): Option[scala.xml.Node] = { | |
import nu.validator.htmlparser._ | |
import sax.HtmlParser | |
import scala.xml.parsing.NoBindingFactoryAdapter | |
val hp = new HtmlParser() | |
val saxer = new NoBindingFactoryAdapter | |
hp.setContentHandler(saxer) | |
val stream = new java.io.ByteArrayInputStream(html) | |
val source = new org.xml.sax.InputSource(stream) | |
hp.parse(source) | |
stream.close() | |
Option(saxer.rootElem) | |
} | |
import scala.concurrent.{Future, Await} | |
import akka.actor.ActorSystem | |
import akka.pattern.ask | |
import akka.io.IO | |
import akka.util.Timeout | |
import scala.concurrent.duration._ | |
import spray.http._ | |
import spray.client.pipelining._ | |
import spray.can.Http | |
import spray.can.Http.ConnectionAttemptFailedException | |
import com.typesafe.config.{ ConfigValueFactory, ConfigFactory, Config } | |
val config: Config = ConfigFactory.parseString(""" | |
akka { | |
loggers = ["akka.event.slf4j.Slf4jLogger"] | |
loglevel = DEBUG | |
} | |
spray.can.host-connector.max-retries = 1 | |
""") | |
implicit val system = ActorSystem("scalakata-playground", config) | |
import system.dispatcher // execution context for futures | |
implicit val timeout = Timeout(6000 seconds) | |
val pipeline: HttpRequest => Future[HttpResponse] = sendReceive | |
val response = pipeline(Get("http://www.milliondollarhomepage.com/")) | |
def html = Await.result(response, timeout.duration) match { | |
case HttpResponse(StatusCodes.OK, entity: HttpEntity.NonEmpty, _,_) => | |
parse(entity.data.toByteArray) | |
case _ => None | |
} | |
def tryo[T](body: () => T): Either[Throwable, T] = { | |
try { Right(body()) } catch{ | |
case scala.util.control.NonFatal(e) => Left(e) | |
} | |
} | |
def siteLists = html.map{ x => | |
val areaMap = (x \\ "map" filter (h=>(h \ "@id" toString) == "Map")) | |
(areaMap \ "area").map{ a => | |
val coords = a \@ "coords" | |
val parsedCoords = coords.split(",") match { | |
case Array(left, top, right, bottom) => | |
tryo{() => (left.toInt, top.toInt, right.toInt, bottom.toInt)} | |
case _ => Left(coords) | |
} | |
val href = a \@ "href" | |
val uri = tryo{() => new java.net.URI(href)} | |
(parsedCoords, uri) | |
} | |
} | |
def sites = siteLists.map { sl => | |
val hostValues = sl.collect{ | |
case (Right((left, top, right, bottom)), Right(uri)) if uri.getHost != null => | |
(uri, uri.getHost, (bottom - top) * (right - left)) | |
} | |
hostValues.foldLeft(Map.empty[String, (Set[java.net.URI], Int)]){ case (acc, (uri, host, v)) => | |
acc.updated(host, | |
acc.get(host). | |
map{ case(uris, total) => (uris + uri, total + v)}. | |
getOrElse((Set(uri), v)) | |
) | |
} | |
} | |
def hostChecks = Future.traverse{ | |
for { | |
(host, (uris, total)) <- sites.get.to[List] | |
uri = uris.head | |
} yield ((host, total), pipeline(Get(s"${uri.getScheme}://${uri.getHost}"))) | |
} { | |
case ((host, total), r) => | |
val isAlive = | |
r.map{ | |
case HttpResponse(StatusCodes.OK, _, _, _) => true | |
case _ => false | |
}.recover { | |
case _: ConnectionAttemptFailedException => false | |
} | |
isAlive.map(a => (host, total, a)) | |
} | |
hostChecks.map{ hc => | |
val (alives, deads) = hc.partition(_._3) | |
def totalCost(sites: List[(String, Int, Boolean)]) = sites.map(_._2).sum | |
val tca = totalCost(alives) | |
val tcd = totalCost(deads) | |
def printToFile(f: java.io.File)(op: java.io.PrintWriter => Unit) { | |
val p = new java.io.PrintWriter(f) | |
try { op(p) } finally { p.close() } | |
} | |
import java.io._ | |
printToFile(new File("results.txt"))(p => { | |
p.println(alives) | |
p.println(deads) | |
p.println(tca) | |
p.println(tcd) | |
}) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment