Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Age and Sex Analysis Of Microsoft USA MVPs
#r "WindowsBase.dll"
#r "System.Net.Http.dll"
#r "PresentationCore.dll"
#r "../packages/FSharp.Data.2.3.2/lib/net40/FSharp.Data.dll"
open System
open System.IO
open System.Web
open System.Net
open FSharp.Data
open System.Net.Http
open System.Threading
open System.Net.Http.Headers
open System.Windows.Media.Imaging
open System.Text.RegularExpressions
let getPageContents(pageNumber:int) =
let uri = new Uri("http://mvp.microsoft.com/en-us/search-mvp.aspx?lo=United+States&sl=0&browse=False&sc=s&ps=36&pn=" + pageNumber.ToString())
let request = WebRequest.Create(uri)
request.Method <- "GET"
let response = request.GetResponse()
use stream = response.GetResponseStream()
use reader = new StreamReader(stream)
reader.ReadToEnd()
let contents =
[|1..19|]
|> Array.map(fun i -> getPageContents i)
|> Seq.reduce(fun x y -> x + y)
let getUrisFromPageContents(pageContents:string) =
let pattern = "/PublicProfile/Photo/\d+"
let matchCollection = Regex.Matches(pageContents, pattern)
matchCollection
|> Seq.cast
|> Seq.map(fun (m:Match) -> m.Value)
|> Seq.map(fun v -> "https://mvp.microsoft.com/en-us" + v + "?language=en-us")
|> Seq.toArray
let uris = getUrisFromPageContents contents
let saveImage uri =
use client = new WebClient()
let id = Guid.NewGuid()
let path = @"F:\Git\ChickenSoftware.ParseMvpPages.Solution\ChickenSoftware.ParseMvpPages\photos\" + id.ToString() + ".jpg"
client.DownloadFile(Uri(uri),path)
uris
|> Seq.iter saveImage
let getOxfordResults path =
let queryString = HttpUtility.ParseQueryString(String.Empty)
queryString.Add("returnFaceId","true")
queryString.Add("returnFaceLandmarks","false")
queryString.Add("returnFaceAttributes","age,gender")
let uri = "https://api.projectoxford.ai/face/v1.0/detect?" + queryString.ToString()
let bytes = File.ReadAllBytes(path)
let client = new HttpClient()
client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key","xxxxxxxxxxx")
let response = new HttpResponseMessage()
let content = new ByteArrayContent(bytes)
content.Headers.ContentType <- MediaTypeHeaderValue("application/octet-stream")
let result = client.PostAsync(uri,content).Result
Thread.Sleep(TimeSpan.FromSeconds(5.0))
match result.StatusCode with
| HttpStatusCode.OK -> Some (result.Content.ReadAsStringAsync().Result)
| _ -> None
type FaceInfo = JsonProvider<Sample="[{\"faceId\":\"83045097-daa1-4f1c-8669-ed012e9b5975\",\"faceRectangle\":{\"top\":187,\"left\":209,\"width\":214,\"height\":214},\"faceAttributes\":{\"gender\":\"male\",\"age\":42.8}}]">
let parseOxfordResuls results =
match results with
| Some r ->
let face = FaceInfo.Parse(r)
match Seq.length face with
| 0 -> None
| _ -> let header = face |> Seq.head
Some(header.FaceAttributes.Age,header.FaceAttributes.Gender)
| None -> None
#time
let results =
let path = @"F:\Git\ChickenSoftware.ParseMvpPages.Solution\ChickenSoftware.ParseMvpPages\photos"
Directory.GetFiles(path)
|> Array.map(fun f -> getOxfordResults f)
|> Array.map(fun r -> parseOxfordResuls r)
#r "../packages/MathNet.Numerics.3.13.1/lib/net40/MathNet.Numerics.dll"
#r "../packages/MathNet.Numerics.FSharp.3.13.1/lib/net40/MathNet.Numerics.FSharp.dll"
open MathNet.Numerics.Statistics
Seq.length results
let ages =
results
|> Seq.filter(fun r -> r.IsSome)
|> Seq.map(fun o -> fst o.Value)
|> Seq.map(fun a -> float a)
let stats = new DescriptiveStatistics(ages)
let count = stats.Count
let largest = stats.Maximum
let smallest = stats.Minimum
let mean = stats.Mean
let median = Statistics.Median(ages)
let variance = stats.Variance
let standardDeviation = stats.StandardDeviation
let kurtosis = stats.Kurtosis
let skewness = stats.Skewness
let lowerQuartile = Statistics.LowerQuartile(ages)
let uppserQuartile = Statistics.UpperQuartile(ages)
let histogram = new Histogram(ages,10)
[0..9]
|> Seq.mapi(fun i v -> histogram.Item(i).LowerBound, histogram.Item(i).UpperBound, histogram.Item(i).Count)
|> Seq.iter(fun (u,l,v) -> printfn "%f-%f %f" u l v)
#r "../packages/FSharp.Charting.0.90.14/lib/net40/FSharp.Charting.dll"
open FSharp.Charting
let chart = Chart.Histogram(ages,Intervals=10.0)
Chart.Show(chart)
let gender =
results
|> Seq.filter(fun r -> r.IsSome)
|> Seq.map(fun o -> snd o.Value)
gender
|> Seq.countBy(fun v -> v)
|> Seq.map(fun (g,c) -> g, c, float c/float count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.