Created
April 1, 2012 20:05
-
-
Save brikis98/2278236 to your computer and use it in GitHub Desktop.
Seven Languages in Seven Weeks: Scala, Day 3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io.Source | |
import scala.actors.Actor._ | |
// Regex to pick up external links; very simplified, so it'll miss some | |
val linkRegex = "(?i)<a.+?href=\"(http.+?)\".*?>(.+?)</a>".r | |
object PageLoader { | |
def load(url: String) = { | |
try { | |
Source.fromURL(url).mkString | |
} catch { | |
case e: Exception => System.err.println(e) | |
"" | |
} | |
} | |
def getPageSize(url: String) = load(url).length | |
def getPageSizeAndLinks(url: String) = { | |
val content = load(url) | |
val links = linkRegex.findAllIn(content).matchData.toList.map(_.group(1)) | |
(content.length, links) | |
} | |
} | |
val urls = List("http://duckduckgo.com/", | |
"http://www.bing.com", | |
"http://www.google.com", | |
"http://www.wolframalpha.com/") | |
def timeMethod(method: () => Unit) { | |
val start = System.nanoTime | |
method() | |
val end = System.nanoTime | |
println("Method took " + (end - start)/1000000000.0 + " seconds.") | |
} | |
def sequential() { | |
for (url <- urls) { | |
val (size, links) = PageLoader.getPageSizeAndLinks(url) | |
val totalSize = crawlLinks(size, links) | |
printOutput(url, size, links, totalSize) | |
} | |
} | |
def crawlLinks(size: Int, links: List[String]): Int = links match { | |
case Nil => size | |
case head :: tail => crawlLinks(size + PageLoader.getPageSize(head), tail) | |
} | |
def printOutput(url: String, size: Int, links: List[String], totalSize: Int) { | |
println(url + ": size = " + size + ", links = " + links.length + ", total size = " + totalSize) | |
} | |
def concurrent() { | |
val caller = self | |
urls.foreach { url => | |
actor { | |
val (size, links) = PageLoader.getPageSizeAndLinks(url) | |
val linkCollectorActor = self | |
links.foreach(link => actor { linkCollectorActor ! PageLoader.getPageSize(link) }) | |
var totalSize = size | |
for (i <- 1 to links.length) { | |
receive { case linkSize: Int => totalSize += linkSize } | |
} | |
caller ! (url, size, links, totalSize) | |
} | |
} | |
for (i <- 1 to urls.length) { | |
receive { | |
case (url: String, size: Int, links: List[String], totalSize: Int) => printOutput(url, size, links, totalSize) | |
} | |
} | |
} | |
println("Sequential run:") | |
timeMethod(sequential) | |
println("Concurrent run:") | |
timeMethod(concurrent) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sequential run: | |
http://duckduckgo.com/: size = 4547, links = 1, total size = 22326 | |
http://www.bing.com: size = 31932, links = 15, total size = 746931 | |
http://www.google.com: size = 11358, links = 10, total size = 1153942 | |
http://www.wolframalpha.com/: size = 22476, links = 7, total size = 202468 | |
Method took 19.802951 seconds. | |
Concurrent run: | |
http://www.google.com: size = 11370, links = 10, total size = 1152555 | |
http://duckduckgo.com/: size = 4547, links = 1, total size = 22326 | |
http://www.bing.com: size = 31932, links = 15, total size = 746230 | |
http://www.wolframalpha.com/: size = 22454, links = 7, total size = 202446 | |
Method took 2.745976 seconds. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io.Source | |
import scala.actors.Actor._ | |
object PageLoader { | |
def getPageSize(url: String) = Source.fromURL(url).mkString.length | |
} | |
val urls = List("http://www.yahoo.com", | |
"http://www.twitter.com", | |
"http://www.google.com", | |
"http://www.cnn.com") | |
def timeMethod(method: () => Unit) { | |
val start = System.nanoTime | |
method() | |
val end = System.nanoTime | |
println("Method took " + (end - start)/1000000000.0 + " seconds.") | |
} | |
def sequential() { | |
for (url <- urls) { | |
println("Size for " + url + ": " + PageLoader.getPageSize(url)) | |
} | |
} | |
def concurrent() { | |
val caller = self | |
for (url <- urls) { | |
actor { caller ! (url, PageLoader.getPageSize(url)) } | |
} | |
for (i <- 1 to urls.size) { | |
receive { | |
case (url, size) => | |
println("Size for " + url + ": " + size) | |
} | |
} | |
} | |
println("Sequential run:") | |
timeMethod(sequential) | |
println("Concurrent run:") | |
timeMethod(concurrent) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sequential run: | |
Size for http://www.yahoo.com: 225020 | |
Size for http://www.twitter.com: 41642 | |
Size for http://www.google.com: 12365 | |
Size for http://www.cnn.com: 94664 | |
Method took 2.286357 seconds. | |
Concurrent run: | |
Size for http://www.google.com: 11370 | |
Size for http://www.cnn.com: 94664 | |
Size for http://www.yahoo.com: 225178 | |
Size for http://www.twitter.com: 41642 | |
Method took 0.711652 seconds. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See Seven Languages in Seven Weeks: Scala, Day 3 for more info.