Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jamessdixon/2e51e07170d5133b3a9e15385995d16b to your computer and use it in GitHub Desktop.
Save jamessdixon/2e51e07170d5133b3a9e15385995d16b to your computer and use it in GitHub Desktop.
Age and Sex Analysis Of Microsoft USA MVPs
#r "WindowsBase.dll"
#r "System.Net.Http.dll"
#r "PresentationCore.dll"
#r "../packages/FSharp.Data.2.3.2/lib/net40/FSharp.Data.dll"
open System
open System.IO
open System.Web
open System.Net
open FSharp.Data
open System.Net.Http
open System.Threading
open System.Net.Http.Headers
open System.Windows.Media.Imaging
open System.Text.RegularExpressions
let getPageContents(pageNumber:int) =
let uri = new Uri("http://mvp.microsoft.com/en-us/search-mvp.aspx?lo=United+States&sl=0&browse=False&sc=s&ps=36&pn=" + pageNumber.ToString())
let request = WebRequest.Create(uri)
request.Method <- "GET"
let response = request.GetResponse()
use stream = response.GetResponseStream()
use reader = new StreamReader(stream)
reader.ReadToEnd()
let contents =
[|1..19|]
|> Array.map(fun i -> getPageContents i)
|> Seq.reduce(fun x y -> x + y)
let getUrisFromPageContents(pageContents:string) =
let pattern = "/PublicProfile/Photo/\d+"
let matchCollection = Regex.Matches(pageContents, pattern)
matchCollection
|> Seq.cast
|> Seq.map(fun (m:Match) -> m.Value)
|> Seq.map(fun v -> "https://mvp.microsoft.com/en-us" + v + "?language=en-us")
|> Seq.toArray
let uris = getUrisFromPageContents contents
let saveImage uri =
use client = new WebClient()
let id = Guid.NewGuid()
let path = @"F:\Git\ChickenSoftware.ParseMvpPages.Solution\ChickenSoftware.ParseMvpPages\photos\" + id.ToString() + ".jpg"
client.DownloadFile(Uri(uri),path)
uris
|> Seq.iter saveImage
let getOxfordResults path =
let queryString = HttpUtility.ParseQueryString(String.Empty)
queryString.Add("returnFaceId","true")
queryString.Add("returnFaceLandmarks","false")
queryString.Add("returnFaceAttributes","age,gender")
let uri = "https://api.projectoxford.ai/face/v1.0/detect?" + queryString.ToString()
let bytes = File.ReadAllBytes(path)
let client = new HttpClient()
client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key","xxxxxxxxxxx")
let response = new HttpResponseMessage()
let content = new ByteArrayContent(bytes)
content.Headers.ContentType <- MediaTypeHeaderValue("application/octet-stream")
let result = client.PostAsync(uri,content).Result
Thread.Sleep(TimeSpan.FromSeconds(5.0))
match result.StatusCode with
| HttpStatusCode.OK -> Some (result.Content.ReadAsStringAsync().Result)
| _ -> None
type FaceInfo = JsonProvider<Sample="[{\"faceId\":\"83045097-daa1-4f1c-8669-ed012e9b5975\",\"faceRectangle\":{\"top\":187,\"left\":209,\"width\":214,\"height\":214},\"faceAttributes\":{\"gender\":\"male\",\"age\":42.8}}]">
let parseOxfordResuls results =
match results with
| Some r ->
let face = FaceInfo.Parse(r)
match Seq.length face with
| 0 -> None
| _ -> let header = face |> Seq.head
Some(header.FaceAttributes.Age,header.FaceAttributes.Gender)
| None -> None
#time
let results =
let path = @"F:\Git\ChickenSoftware.ParseMvpPages.Solution\ChickenSoftware.ParseMvpPages\photos"
Directory.GetFiles(path)
|> Array.map(fun f -> getOxfordResults f)
|> Array.map(fun r -> parseOxfordResuls r)
#r "../packages/MathNet.Numerics.3.13.1/lib/net40/MathNet.Numerics.dll"
#r "../packages/MathNet.Numerics.FSharp.3.13.1/lib/net40/MathNet.Numerics.FSharp.dll"
open MathNet.Numerics.Statistics
Seq.length results
let ages =
results
|> Seq.filter(fun r -> r.IsSome)
|> Seq.map(fun o -> fst o.Value)
|> Seq.map(fun a -> float a)
let stats = new DescriptiveStatistics(ages)
let count = stats.Count
let largest = stats.Maximum
let smallest = stats.Minimum
let mean = stats.Mean
let median = Statistics.Median(ages)
let variance = stats.Variance
let standardDeviation = stats.StandardDeviation
let kurtosis = stats.Kurtosis
let skewness = stats.Skewness
let lowerQuartile = Statistics.LowerQuartile(ages)
let uppserQuartile = Statistics.UpperQuartile(ages)
let histogram = new Histogram(ages,10)
[0..9]
|> Seq.mapi(fun i v -> histogram.Item(i).LowerBound, histogram.Item(i).UpperBound, histogram.Item(i).Count)
|> Seq.iter(fun (u,l,v) -> printfn "%f-%f %f" u l v)
#r "../packages/FSharp.Charting.0.90.14/lib/net40/FSharp.Charting.dll"
open FSharp.Charting
let chart = Chart.Histogram(ages,Intervals=10.0)
Chart.Show(chart)
let gender =
results
|> Seq.filter(fun r -> r.IsSome)
|> Seq.map(fun o -> snd o.Value)
gender
|> Seq.countBy(fun v -> v)
|> Seq.map(fun (g,c) -> g, c, float c/float count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment