Skip to content

Instantly share code, notes, and snippets.

@dimitrisli
Created January 14, 2014 17:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dimitrisli/8422364 to your computer and use it in GitHub Desktop.
Save dimitrisli/8422364 to your computer and use it in GitHub Desktop.
Scala Tennis Historical Data Retriever
package data.analysis.tennis
import scala.io.Source
import java.util.Date
import java.text.SimpleDateFormat
object TennisDataAnalysis extends App{
def wrapStringInt(stringInt:String) = if(stringInt=="") None else Some(stringInt.toInt)
case class TennisMatch(location:String, tournament:String, date:Date, series:String,
surface:String, round:String, bestOf:Int, winner:String, loser:String,
W1:Option[Int], L1:Option[Int], W2:Option[Int], L2:Option[Int], W3:Option[Int],
L3:Option[Int], W4:Option[Int], L4:Option[Int], W5:Option[Int], L5:Option[Int],
Wsets:Option[Int], Lsets:Option[Int], comment:String)
val sourceSite = "http://www.tennis-data.co.uk/"
val years = List(2010,2011,2012,2013)
val tournaments = List("ausopen","frenchopen","usopen","wimbledon")
val urls = years.map(year => sourceSite+year+"/").flatMap(urlYear => tournaments.map(tours=> urlYear+tours+".csv"))
val data =
urls.flatMap{urlYearTour =>
Source.fromURL(urlYearTour).getLines.drop(1).map(_.split(","))
.map{g => TennisMatch(g(1), g(2), new SimpleDateFormat("dd/mm/yyyy").parse(g(3)), g(4),
g(6),g(7),g(8).toInt, g(9), g(10),
wrapStringInt(g(15)), wrapStringInt(g(16)), wrapStringInt(g(17)), wrapStringInt(g(18)),
wrapStringInt(g(19)), wrapStringInt(g(20)), wrapStringInt(g(21)), wrapStringInt(g(22)),
wrapStringInt(g(23)), wrapStringInt(g(24)), wrapStringInt(g(25)), wrapStringInt(g(26)),
g(27))}}
data.foreach(println)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment