Skip to content

Instantly share code, notes, and snippets.

@isaacabraham
Last active October 19, 2015 21:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save isaacabraham/45e9d00dd9c126c7143c to your computer and use it in GitHub Desktop.
Save isaacabraham/45e9d00dd9c126c7143c to your computer and use it in GitHub Desktop.
// 46 seconds - download data, convert to provided type and partition across nodes in-memory only
let persistedHousePrices =
[ "http://publicdata.landregistry.gov.uk/market-trend-data/price-paid-data/a/pp-2015.csv" ]
|> CloudFlow.OfHttpFileByLine
|> CloudFlow.map (HousePrices.ParseRows >> Seq.head)
|> CloudFlow.persist StorageLevel.Memory
|> cluster.Run
// 5 seconds - get average house price by month
let pricesByMonth =
persistedHousePrices
|> CloudFlow.groupBy(fun row -> row.DateOfTransfer.Month)
|> CloudFlow.map(fun (month, rows) -> month, rows |> Seq.averageBy (fun row -> float row.Price))
|> CloudFlow.toArray
|> cluster.Run
// 1 second - get property types in London
let londonProperties =
persistedHousePrices
|> CloudFlow.filter(fun row -> row.TownCity = "LONDON")
|> CloudFlow.countBy(fun row -> row.PropertyType)
|> CloudFlow.toArray
|> cluster.Run
(*
val londonProperties : (string * int64) [] =
[|("T", 8622L); ("D", 582L); ("S", 2327L); ("F", 22288L)|]
Terraced Detached Semi Flat
*)
// 5 seconds - get % new builds by county
let newBuildsByCounty =
persistedHousePrices
|> CloudFlow.groupBy(fun row -> row.County)
|> CloudFlow.map(fun (county, rows) ->
let rows = rows |> Seq.toList
let newBuilds = rows |> List.filter(fun r -> r.NewBuild = "Y") |> List.length
let percentageNewBuilds = (100. / float rows.Length) * float newBuilds
county, percentageNewBuilds)
|> CloudFlow.toArray
|> cluster.Run
|> Array.sortByDescending snd
(*
val newBuildsByCounty : (string * float) [] =
[|("RUTLAND", 19.79434447); ("MIDDLESBROUGH", 17.20430108);
("NEWPORT", 16.91896705); ("HARTLEPOOL", 16.52892562);
("BEDFORD", 16.09907121); ("CENTRAL BEDFORDSHIRE", 15.94540613);
("LEICESTERSHIRE", 15.74045328); ("WREKIN", 14.43452381);
("BRIDGEND", 14.26294821); ("SLOUGH", 14.09135083);
("FLINTSHIRE", 14.08450704); ("MILTON KEYNES", 12.75510204);
("DARLINGTON", 12.61930011); ("CITY OF PETERBOROUGH", 12.61872456);
("WARRINGTON", 11.68305379); ("WINDSOR AND MAIDENHEAD", 10.7751938);
("CITY OF KINGSTON UPON HULL", 10.71225071);
etc. etc. *)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment