Created
January 24, 2009 20:06
-
-
Save mattpodwysocki/51530 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#light | |
namespace Microsoft.MapReduce | |
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>] | |
[<CompilationMapping(SourceConstructFlags.Module)>] | |
module String = | |
open System.Text.RegularExpressions | |
let words (s:string) : string list = | |
[for m in Regex.Matches(s, @"\w+") -> m.Value] | |
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>] | |
[<CompilationMapping(SourceConstructFlags.Module)>] | |
[<AutoOpen>] | |
module Operators = | |
let const x _ = x | |
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>] | |
[<CompilationMapping(SourceConstructFlags.Module)>] | |
module Tuple = | |
let uncurry f p = f (fst p) (snd p) | |
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>] | |
[<CompilationMapping(SourceConstructFlags.Module)>] | |
module Map = | |
let insert_with (f:'a -> 'a -> 'a) (kx:'k) (x:'a) (t:Map<'k,'a>) : Map<'k, 'a> = | |
match Map.tryfind kx t with | |
| None -> Map.add kx x t | |
| Some v -> t |> Map.remove kx |> Map.add kx (f v x) | |
module WordCount = | |
let map_reduce m r = | |
let map_per_key : Map<'k1, 'v1> -> ('k2 * 'v2) list = | |
Map.to_list >> List.map (Tuple.uncurry m) >> List.concat | |
let group_by_key (l:('k2 * 'v2) list) : Map<'k2,'v2 list> = | |
let insert d (k2, v2) = Map.insert_with (@) (k2) [v2] d | |
l |> List.fold_left insert Map.empty | |
let reduce_per_key : Map<'k2, 'v2 list> -> Map<'k2,'v3> = | |
let unSome k (Some v) = v | |
let isSome k = function | |
| Some _ -> true | |
| None -> false | |
Map.mapi r >> Map.filter isSome >> Map.mapi unSome | |
map_per_key >> group_by_key >> reduce_per_key | |
let word_occurrence_count : Map<string, string> -> Map<string, int> = | |
let m = const (String.words >> List.map(fun s -> (s, 1))) | |
let r = const (List.sum >> Some) | |
map_reduce m r | |
module MainModule = | |
[<EntryPoint>] | |
let main(args:string array) = | |
printfn "%A" | |
(WordCount.word_occurrence_count | |
(Map.empty | |
|> Map.add "doc1" "appreciate the unfold" | |
|> Map.add "doc2" "fold the fold")) | |
0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment