Skip to content

Instantly share code, notes, and snippets.

@jamessdixon
Created August 10, 2014 10:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamessdixon/3c60daaa0f31eead1ce4 to your computer and use it in GitHub Desktop.
Save jamessdixon/3c60daaa0f31eead1ce4 to your computer and use it in GitHub Desktop.
Traverse Census Data using F#
#r "../packages/FSharp.Data.2.0.9/lib/portable-net40+sl5+wp8+win8/FSharp.Data.dll"
open FSharp.Data
type censusDataContext = CsvProvider<"https://portalvhdspgzl51prtcpfj.blob.core.windows.net/censuschicken/AK.TXT">
type stateCodeContext = CsvProvider<"https://portalvhdspgzl51prtcpfj.blob.core.windows.net/censuschicken/states.csv">
let stateCodes = stateCodeContext.Load("https://portalvhdspgzl51prtcpfj.blob.core.windows.net/censuschicken/states.csv");
let stopwatch = new System.Diagnostics.Stopwatch()
stopwatch.Start()
let fetchStateData (stateCode:string)=
let uri = System.String.Format("https://portalvhdspgzl51prtcpfj.blob.core.windows.net/censuschicken/{0}.TXT",stateCode)
censusDataContext.Load(uri)
let usaData = stateCodes.Rows
|> Seq.collect(fun r -> fetchStateData(r.Abbreviation).Rows)
stopwatch.Stop()
printfn "Serial: %A" stopwatch.Elapsed.Seconds
stopwatch.Reset()
stopwatch.Start()
let fetchStateDataAsync(stateCode:string)=
async{
let uri = System.String.Format("https://portalvhdspgzl51prtcpfj.blob.core.windows.net/censuschicken/{0}.TXT",stateCode)
let! stateData = censusDataContext.AsyncLoad(uri)
return stateData
}
let usaData' = stateCodes.Rows
|> Seq.map(fun r -> fetchStateDataAsync(r.Abbreviation))
|> Async.Parallel
|> Async.RunSynchronously
|> Seq.collect(fun x -> x.Rows)
stopwatch.Stop()
printfn "Parallel: %A" stopwatch.Elapsed.Seconds
let nameSum = usaData'
|> Seq.groupBy(fun r -> r.Mary)
|> Seq.map(fun (n,a) -> n,a |> Seq.sumBy(fun (r) -> r.``14``))
|> Seq.toArray
let totalNames = nameSum |> Seq.sumBy(fun (n,c) -> c)
let nameAverage = nameSum
|> Seq.map(fun (n,c) -> n,c,float c/ float totalNames)
|> Seq.sortBy(fun (n,c,a) -> -a - 1.)
|> Seq.toArray
let nameSearch = usaData'
|> Seq.filter(fun r -> r.Mary = "James")
|> Seq.groupBy(fun r -> r.F)
|> Seq.map(fun (n,a) -> n,a |> Seq.sumBy(fun (r) -> r.``14``))
|> Seq.toArray
let nameSearch' name =
let nameFilter = usaData'
|> Seq.filter(fun r -> r.Mary = name)
|> Seq.groupBy(fun r -> r.F)
|> Seq.map(fun (n,a) -> n,a |> Seq.sumBy(fun (r) -> r.``14``))
let nameSum = nameFilter |> Seq.sumBy(fun (n,c) -> c)
nameFilter
|> Seq.map(fun (n,c) -> n, c, float c/float nameSum)
|> Seq.toArray
nameSearch' "James"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment