Skip to content

Instantly share code, notes, and snippets.

@jlandahl
Last active December 21, 2015 08:19
Show Gist options
  • Save jlandahl/6277757 to your computer and use it in GitHub Desktop.
Save jlandahl/6277757 to your computer and use it in GitHub Desktop.
name := "wot-scala"
version := "0.0.1"
scalaVersion := "2.10.2"
libraryDependencies ++= Seq(
"org.jsoup" % "jsoup" % "1.7.2",
"com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.1.3"
)
import java.io.File
import scala.collection.JavaConversions._
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
object ParseVehicleData extends App {
val baseURL = "http://worldoftanks.com"
val categories = Map(
"Light Tanks" -> "lt",
"Medium Tanks" -> "mt",
"Heavy Tanks" -> "ht",
"Tank Destroyers" -> "td",
"SPGs" -> "spg")
val tiers = Map(
"I" -> 1,
"II" -> 2,
"III" -> 3,
"IV" -> 4,
"V" -> 5,
"VI" -> 6,
"VII" -> 7,
"VIII" -> 8,
"IX" -> 9,
"X" -> 10)
case class Vehicle(id: String, name: String, category: String, tier: Int, url: String)
def parseVehicle(category: String, elem: Element) = {
val name = elem.select(".b-encyclopedia-list_name").text
val relativeURL = elem.select(".b-encyclopedia-list_linc").attr("href")
val id = relativeURL.split("/").last.toLowerCase
val tierText = elem.select(".b-encyclopedia-list_level").text
val tier = tiers(tierText)
val url = baseURL + relativeURL
Vehicle(id, name, category, tier, url)
}
val input = new File("wot-encyclopedia-20130819.html")
val doc = Jsoup.parse(input, "UTF-8", "http://worldoftanks.com/encyclopedia/vehicles/")
doc.select(".b-encyclopedia-type").iterator.foreach { elem =>
val category = categories(elem.text)
elem.nextElementSibling.select(".b-encyclopedia-list_point").iterator.foreach { elem =>
val vehicle = parseVehicle(category, elem)
println(vehicle)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment