Last active
February 19, 2019 03:24
-
-
Save Thorium/507930f73958ac6959db5dc4274ae268 to your computer and use it in GitHub Desktop.
Parsing UserAgent strings with FSharp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open System.IO | |
open System.Net | |
open System.Text.RegularExpressions | |
let req = HttpWebRequest.Create "https://raw.githubusercontent.com/ua-parser/uap-core/master/regexes.yaml" | |
let resp = (new StreamReader(req.GetResponse().GetResponseStream())).ReadToEnd() | |
let lines = resp.Split( [| Environment.NewLine; "\r"; "\n"; "\r\n" |], StringSplitOptions.RemoveEmptyEntries) | |
/// Minimal YAML-file parsing | |
let yamlParse = | |
let yamlParsed, lastName, lastMap = | |
lines |> | |
Seq.filter(fun line -> | |
not(line.Trim().StartsWith("#") || line.Trim().Length = 0) && line.Contains(":") | |
) |> Seq.fold(fun (mapping:Map<string,ResizeArray<Map<string,string>>>,name,activemap:ResizeArray<Map<string,string>>) line -> | |
let mc = line.IndexOf ':' | |
match line.[0] with | |
| ' ' -> | |
let key, startLine = | |
match line.Substring(0, mc).Trim() with | |
| x when x.StartsWith("-") -> | |
x.Substring(1).Trim(), true | |
| y -> y, false | |
let valu = | |
match line.Substring(mc + 1).Trim() with | |
| x when x.StartsWith("'") && x.EndsWith("'") -> x.Substring(1, x.Length-2) | |
| x when x.StartsWith("\"") && x.EndsWith("\"") -> x.Substring(1, x.Length-2) | |
| y -> y | |
if startLine then | |
activemap.Add( Map.empty.Add(key, valu)) | |
else | |
activemap.[activemap.Count-1] <- activemap.[activemap.Count-1].Add(key, valu) | |
mapping, name, activemap | |
| _ -> | |
let mapped = | |
if name <> "" && (not (activemap |> Seq.isEmpty)) then | |
mapping.Add(name, activemap) | |
else mapping | |
let newMap = ResizeArray() | |
mapped,line.Substring(0, mc).Trim(), newMap | |
) (Map.empty,"", ResizeArray()) | |
if lastName <> "" && (not (lastMap |> Seq.isEmpty)) then | |
yamlParsed.Add(lastName, lastMap) | |
else yamlParsed | |
let getParser parserName (parameters:string list) = | |
yamlParse.[parserName] | |
|> Seq.filter(fun p -> p.ContainsKey("regex")) | |
|> Seq.map(fun parser -> | |
let reg = Regex(parser.["regex"], RegexOptions.IgnoreCase ||| RegexOptions.Compiled) | |
let groups = reg.GetGroupNumbers().Length | |
reg, | |
parameters |> List.mapi(fun idx p -> | |
if groups > idx && parser.ContainsKey p then parser.[p] else "" | |
) | |
) |> Seq.toArray | |
// To add more versions, add more parameters. https://github.com/ua-parser/uap-core/blob/master/docs/specification.md | |
let os = getParser "os_parsers" ["os_replacement"; "os_v1_replacement"; "os_v2_replacement"] | |
let browser = getParser "user_agent_parsers" ["family_replacement"; "v1_replacement"; "v2_replacement"] | |
let device = getParser "device_parsers" ["device_replacement"; "brand_replacement"; "model_replacement"] | |
let parseCollection (coll:(Regex*List<string>)[]) (uaString:string) = | |
if String.IsNullOrEmpty uaString then [||] else | |
coll |> Array.filter(fun (regex,_) -> | |
regex.IsMatch(uaString)) | |
|> Array.map(fun (regex,pars) -> | |
let matchedData = regex.Match uaString | |
let grps = regex.GetGroupNames() | |
pars | |
|> List.mapi(fun idx label -> | |
let itemName = (idx+1).ToString() | |
let groupName = regex.GroupNumberFromName(itemName) | |
let itemValue = matchedData.Groups.[groupName].Value | |
if label <> "" then | |
label.Replace("$"+itemName, itemValue) | |
else itemValue | |
) |> List.filter(fun p -> not(String.IsNullOrEmpty p)) |> List.distinct | |
) |> Array.filter(fun p -> p |> List.isEmpty |> not) |> Array.distinct | |
type UASoftware = { Item: string; MajorVersion: string; MinorVersion: string } | |
type UADevice = { Item: string; Brand: string; Model: string } | |
type UAInfo = { Browser: UASoftware option; Os: UASoftware option; Device: UADevice option } | |
with override __.ToString() = | |
String.Join(" - ", [| | |
(if __.Browser.IsSome then __.Browser.Value.Item + " " + __.Browser.Value.MajorVersion else ""); | |
(if __.Os.IsSome then __.Os.Value.Item + " " + __.Os.Value.MajorVersion else ""); | |
(if __.Device.IsSome then __.Device.Value.Brand + " " + __.Device.Value.Item + " " + __.Device.Value.Model else "") | |
|] |> Seq.filter(fun x -> x <> "")).Trim() | |
/// Parse the UserAgent | |
let parse uaString = | |
let pickInfo = | |
Array.tryHead >> Option.bind(function | |
| [] -> None | |
| [h] -> Some {Item = h; MajorVersion = ""; MinorVersion = ""} | |
| [h;v] -> Some {Item = h; MajorVersion = v; MinorVersion = ""} | |
| [h;v;t] | |
| h::v::t::_ -> Some {Item = h; MajorVersion = v; MinorVersion = t}) | |
let browserInfo = parseCollection browser uaString |> pickInfo | |
let osInfo = parseCollection os uaString |> pickInfo | |
let deviceInfo = parseCollection device uaString |> pickInfo | |
{ Browser = browserInfo; Os = osInfo; Device = deviceInfo |> Option.map(fun d -> | |
{ Item = d.Item; Brand = d.MajorVersion; Model = d.MinorVersion})} | |
//let uaString = "Mozilla/5.0 (iPhone; CPU iPhone OS 5_1_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B206 Safari/7534.48.3" | |
//printfn "%O" (parse uaString) // Prints: Mobile Safari 5 - iOS 5 - Apple iPhone |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Identify user's browser, Os and device by the browser's UserAgent string.