Skip to content

Instantly share code, notes, and snippets.

@ncordon
Created April 2, 2021 11:57
Show Gist options
  • Save ncordon/3311003e49708ac4485a76bcbeedeab8 to your computer and use it in GitHub Desktop.
Save ncordon/3311003e49708ac4485a76bcbeedeab8 to your computer and use it in GitHub Desktop.
Scrap timezone offsets
import $ivy.`net.ruippeixotog::scala-scraper:2.2.0`
import net.ruippeixotog.scalascraper.browser.JsoupBrowser
import net.ruippeixotog.scalascraper.dsl.DSL._
import net.ruippeixotog.scalascraper.dsl.DSL.Extract._
import net.ruippeixotog.scalascraper.dsl.DSL.Parse._
import net.ruippeixotog.scalascraper.model._
def extractMinutesOffset(s: String): Long = {
val offset = raw"([+−]{1})(\d{2}):(\d{2})".r
s match {
case offset(sign, hh, mm) =>
(if (sign == "+") 1 else - 1) * (hh.toInt * 60 + mm.toInt)
}
}
val browser = JsoupBrowser()
val doc = browser.get("https://en.wikipedia.org/wiki/List_of_tz_database_time_zones")
val content = doc >> elements("#mw-content-text")
val tables = content.extract(".wikitable").toArray
val table = tables(0)
val rows = table.extract("td").toArray.grouped(8).toArray
rows.foreach { row =>
val name = row(2).text
val deprecated = row(4).text == "Deprecated"
val stdOffset = row(5).text.trim
val dstOffset = row(6).text.trim
val offset = extractMinutesOffset(dstOffset) - extractMinutesOffset(stdOffset)
if (offset != 60L && offset != 0 && !deprecated) {
println(s"$name has an offset of $offset")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment