Skip to content

Instantly share code, notes, and snippets.

@evelinag
Created November 17, 2015 23:54
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save evelinag/0ce68655f2aae1ecabcb to your computer and use it in GitHub Desktop.
Save evelinag/0ce68655f2aae1ecabcb to your computer and use it in GitHub Desktop.
Analysing box office success of James Bond films using HTML type provider
#load "packages/FsLab/FsLab.fsx"
open FSharp.Data
open XPlot.GoogleCharts
let bondUrl = "https://en.wikipedia.org/w/index.php?title=List_of_James_Bond_films&oldid=688916363"
type BondProvider = HtmlProvider<"https://en.wikipedia.org/w/index.php?title=List_of_James_Bond_films&oldid=688916363">
let bondWiki = BondProvider.Load(bondUrl)
let boxOffice =
let allBoxOffice =
[| for row in bondWiki.Tables.``Box office``.Rows ->
row.Title, row.Year, row.Budget2, row.``Box office 2``, row.``Bond actor`` |]
allBoxOffice.[1..allBoxOffice.Length-3]
|> Array.map (fun (titleRaw, yr, bdgt, bo, actorRaw) ->
let actor = actorRaw.[actorRaw.Length/2 + 1 .. ]
let title =
match titleRaw |> Seq.tryFindIndex ((=) '!') with
| Some(idx) -> titleRaw.[idx+1 ..]
| None -> titleRaw
title, int yr, float bdgt, float bo, actor)
let rating =
let allRatings =
[| for row in bondWiki.Tables.``Reception and accolades``.Rows ->
row.Film, row.``Rotten Tomatoes`` |]
allRatings.[0..allRatings.Length-2]
|> Array.map (fun (title, r) ->
title, r.[0..r.IndexOf('%')-1] |> float )
let options =
Options(
title = "Bond fims - rating and box office",
hAxis = Axis(title = "Year"),
vAxis = Axis(title = "Box office (millions $)"),
bubble = Bubble(textStyle=TextStyle(color="transparent")),
colors = [| "red"; "gold" |]
)
Array.map2 (fun (title, yr, bdgt, bo, actor) (_, rt) ->
title + " (" + actor + ")", yr, bo, rt, bdgt ) boxOffice rating
|> Chart.Bubble
|> Chart.WithLabels(["Title"; "Year"; "Box office"; "Rating"; "Budget"])
|> Chart.WithOptions(options)
// Use RProvider to replicate the plot from http://opiateforthemass.es/articles/james-bond-film-ratings/
open RProvider
open RProvider.ggplot2
let (++) (plot1:RDotNet.SymbolicExpression) (plot2:RDotNet.SymbolicExpression) =
R.``+``(plot1, plot2)
let df =
namedParams [
"Title", box (boxOffice |> Array.map (fun (t, _,_,_,_) -> t))
"Actor", box (boxOffice |> Array.map (fun (_,_,_,_,a) -> a) |> R.as_factor)
"Year", box (boxOffice |> Array.map (fun (_,y,_,_,_) -> y))
"Budget", box (boxOffice |> Array.map (fun (_, _,b,_,_) -> b))
"BoxOffice", box (boxOffice |> Array.map (fun (_, _,_,b,_) -> b))
"Rating", box (rating |> Array.map snd)
]
|> R.data_frame
let dfActors =
let actorYrs =
boxOffice
|> Seq.groupBy (fun (_,_,_,_,a) -> a)
|> Seq.map (fun (a, dt) ->
a, Seq.map (fun (_,y,_,_,_) -> y) dt |> Seq.min, Seq.map (fun (_,y,_,_,_) -> y) dt |> Seq.max)
|> Array.ofSeq
|> Array.map (fun (a, y1, y2) -> if y1 = y2 then a, y1, y2+1 else a, y1, y2)
namedParams [
"Actor", box (Array.map (fun (a,_,_) -> a) actorYrs)
"YearMin", box (Array.map (fun (_,y,_) -> y) actorYrs)
"YearMax", box (Array.map (fun (_,_,y) -> y) actorYrs)]
|> R.data_frame
R.ggplot()
// background rectangles based on actors
++ R.geom__rect(
namedParams [
"data", box dfActors
"mapping", box (
R.aes__string(
namedParams["xmin", box "YearMin"; "xmax", box "YearMax"; "ymin", box "-Inf"; "ymax", box "Inf";
"fill", box "Actor"]))
"alpha", box 0.3])
// write actor names on rectangles
++ R.geom__text(
namedParams [
"data", box dfActors
"mapping", box (
R.aes__string(
namedParams["x", box "YearMin"; "y", box (Array.map (fun (_,_,_,b,_) -> b) boxOffice |> Array.max);
"label", box "Actor"; "angle", box 90; "hjust", box 1; "vjust", box 1]))
"alpha", box 0.6
"size", box 5])
// film names
++ R.geom__text(
namedParams [
"data", box df
"mapping", box (
R.aes__string(
namedParams["x", box "Year"; "y", box 0;
"label", box "Title"; "angle", box 90; "hjust", box 0; "vjust", box 0.5]))
"size", box 4])
// film data
++ R.geom__point(
data=df,
mapping = R.aes__string(
namedParams["x", "Year"; "y", "BoxOffice"; "size", "Budget"; "colour", "Rating"]))
// Rotten tomatoes rating gradient
++ R.scale__colour__continuous(
namedParams["low", "red"; "high", "green"; "name", "Rotten Tomatoes rating"])
// Increase minimum point size for readability
++ R.scale__size__continuous(
namedParams["name", box "Budget (2005 mil. dollars)"; "range", box [3; 10]])
++ R.theme__bw()
++ R.theme(namedParams["plot.title", R.element__text(lineheight=0.8, face="bold")])
++ R.guides(namedParams["fill", false])
++ R.labs(
namedParams["title", "Box office results, budgets, and ratings of James Bond films\n"
"x", ""; "y", "Box office earnings (in 2005 mil. dollars)"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment