Skip to content

Instantly share code, notes, and snippets.

@brikis98
Created April 1, 2012 20:05
Show Gist options
  • Save brikis98/2278236 to your computer and use it in GitHub Desktop.
Save brikis98/2278236 to your computer and use it in GitHub Desktop.
Seven Languages in Seven Weeks: Scala, Day 3
import io.Source
import scala.actors.Actor._
// Regex to pick up external links; very simplified, so it'll miss some
val linkRegex = "(?i)<a.+?href=\"(http.+?)\".*?>(.+?)</a>".r
object PageLoader {
def load(url: String) = {
try {
Source.fromURL(url).mkString
} catch {
case e: Exception => System.err.println(e)
""
}
}
def getPageSize(url: String) = load(url).length
def getPageSizeAndLinks(url: String) = {
val content = load(url)
val links = linkRegex.findAllIn(content).matchData.toList.map(_.group(1))
(content.length, links)
}
}
val urls = List("http://duckduckgo.com/",
"http://www.bing.com",
"http://www.google.com",
"http://www.wolframalpha.com/")
def timeMethod(method: () => Unit) {
val start = System.nanoTime
method()
val end = System.nanoTime
println("Method took " + (end - start)/1000000000.0 + " seconds.")
}
def sequential() {
for (url <- urls) {
val (size, links) = PageLoader.getPageSizeAndLinks(url)
val totalSize = crawlLinks(size, links)
printOutput(url, size, links, totalSize)
}
}
def crawlLinks(size: Int, links: List[String]): Int = links match {
case Nil => size
case head :: tail => crawlLinks(size + PageLoader.getPageSize(head), tail)
}
def printOutput(url: String, size: Int, links: List[String], totalSize: Int) {
println(url + ": size = " + size + ", links = " + links.length + ", total size = " + totalSize)
}
def concurrent() {
val caller = self
urls.foreach { url =>
actor {
val (size, links) = PageLoader.getPageSizeAndLinks(url)
val linkCollectorActor = self
links.foreach(link => actor { linkCollectorActor ! PageLoader.getPageSize(link) })
var totalSize = size
for (i <- 1 to links.length) {
receive { case linkSize: Int => totalSize += linkSize }
}
caller ! (url, size, links, totalSize)
}
}
for (i <- 1 to urls.length) {
receive {
case (url: String, size: Int, links: List[String], totalSize: Int) => printOutput(url, size, links, totalSize)
}
}
}
println("Sequential run:")
timeMethod(sequential)
println("Concurrent run:")
timeMethod(concurrent)
Sequential run:
http://duckduckgo.com/: size = 4547, links = 1, total size = 22326
http://www.bing.com: size = 31932, links = 15, total size = 746931
http://www.google.com: size = 11358, links = 10, total size = 1153942
http://www.wolframalpha.com/: size = 22476, links = 7, total size = 202468
Method took 19.802951 seconds.
Concurrent run:
http://www.google.com: size = 11370, links = 10, total size = 1152555
http://duckduckgo.com/: size = 4547, links = 1, total size = 22326
http://www.bing.com: size = 31932, links = 15, total size = 746230
http://www.wolframalpha.com/: size = 22454, links = 7, total size = 202446
Method took 2.745976 seconds.
import io.Source
import scala.actors.Actor._
object PageLoader {
def getPageSize(url: String) = Source.fromURL(url).mkString.length
}
val urls = List("http://www.yahoo.com",
"http://www.twitter.com",
"http://www.google.com",
"http://www.cnn.com")
def timeMethod(method: () => Unit) {
val start = System.nanoTime
method()
val end = System.nanoTime
println("Method took " + (end - start)/1000000000.0 + " seconds.")
}
def sequential() {
for (url <- urls) {
println("Size for " + url + ": " + PageLoader.getPageSize(url))
}
}
def concurrent() {
val caller = self
for (url <- urls) {
actor { caller ! (url, PageLoader.getPageSize(url)) }
}
for (i <- 1 to urls.size) {
receive {
case (url, size) =>
println("Size for " + url + ": " + size)
}
}
}
println("Sequential run:")
timeMethod(sequential)
println("Concurrent run:")
timeMethod(concurrent)
Sequential run:
Size for http://www.yahoo.com: 225020
Size for http://www.twitter.com: 41642
Size for http://www.google.com: 12365
Size for http://www.cnn.com: 94664
Method took 2.286357 seconds.
Concurrent run:
Size for http://www.google.com: 11370
Size for http://www.cnn.com: 94664
Size for http://www.yahoo.com: 225178
Size for http://www.twitter.com: 41642
Method took 0.711652 seconds.
@brikis98
Copy link
Author

brikis98 commented Apr 2, 2012

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment