-
-
Save isaacabraham/45e9d00dd9c126c7143c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 46 seconds - download data, convert to provided type and partition across nodes in-memory only | |
let persistedHousePrices = | |
[ "http://publicdata.landregistry.gov.uk/market-trend-data/price-paid-data/a/pp-2015.csv" ] | |
|> CloudFlow.OfHttpFileByLine | |
|> CloudFlow.map (HousePrices.ParseRows >> Seq.head) | |
|> CloudFlow.persist StorageLevel.Memory | |
|> cluster.Run | |
// 5 seconds - get average house price by month | |
let pricesByMonth = | |
persistedHousePrices | |
|> CloudFlow.groupBy(fun row -> row.DateOfTransfer.Month) | |
|> CloudFlow.map(fun (month, rows) -> month, rows |> Seq.averageBy (fun row -> float row.Price)) | |
|> CloudFlow.toArray | |
|> cluster.Run | |
// 1 second - get property types in London | |
let londonProperties = | |
persistedHousePrices | |
|> CloudFlow.filter(fun row -> row.TownCity = "LONDON") | |
|> CloudFlow.countBy(fun row -> row.PropertyType) | |
|> CloudFlow.toArray | |
|> cluster.Run | |
(* | |
val londonProperties : (string * int64) [] = | |
[|("T", 8622L); ("D", 582L); ("S", 2327L); ("F", 22288L)|] | |
Terraced Detached Semi Flat | |
*) | |
// 5 seconds - get % new builds by county | |
let newBuildsByCounty = | |
persistedHousePrices | |
|> CloudFlow.groupBy(fun row -> row.County) | |
|> CloudFlow.map(fun (county, rows) -> | |
let rows = rows |> Seq.toList | |
let newBuilds = rows |> List.filter(fun r -> r.NewBuild = "Y") |> List.length | |
let percentageNewBuilds = (100. / float rows.Length) * float newBuilds | |
county, percentageNewBuilds) | |
|> CloudFlow.toArray | |
|> cluster.Run | |
|> Array.sortByDescending snd | |
(* | |
val newBuildsByCounty : (string * float) [] = | |
[|("RUTLAND", 19.79434447); ("MIDDLESBROUGH", 17.20430108); | |
("NEWPORT", 16.91896705); ("HARTLEPOOL", 16.52892562); | |
("BEDFORD", 16.09907121); ("CENTRAL BEDFORDSHIRE", 15.94540613); | |
("LEICESTERSHIRE", 15.74045328); ("WREKIN", 14.43452381); | |
("BRIDGEND", 14.26294821); ("SLOUGH", 14.09135083); | |
("FLINTSHIRE", 14.08450704); ("MILTON KEYNES", 12.75510204); | |
("DARLINGTON", 12.61930011); ("CITY OF PETERBOROUGH", 12.61872456); | |
("WARRINGTON", 11.68305379); ("WINDSOR AND MAIDENHEAD", 10.7751938); | |
("CITY OF KINGSTON UPON HULL", 10.71225071); | |
etc. etc. *) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment