Skip to content

Instantly share code, notes, and snippets.

@mattpodwysocki
Created January 24, 2009 20:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattpodwysocki/51530 to your computer and use it in GitHub Desktop.
Save mattpodwysocki/51530 to your computer and use it in GitHub Desktop.
#light
namespace Microsoft.MapReduce
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
[<CompilationMapping(SourceConstructFlags.Module)>]
module String =
open System.Text.RegularExpressions
let words (s:string) : string list =
[for m in Regex.Matches(s, @"\w+") -> m.Value]
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
[<CompilationMapping(SourceConstructFlags.Module)>]
[<AutoOpen>]
module Operators =
let const x _ = x
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
[<CompilationMapping(SourceConstructFlags.Module)>]
module Tuple =
let uncurry f p = f (fst p) (snd p)
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
[<CompilationMapping(SourceConstructFlags.Module)>]
module Map =
let insert_with (f:'a -> 'a -> 'a) (kx:'k) (x:'a) (t:Map<'k,'a>) : Map<'k, 'a> =
match Map.tryfind kx t with
| None -> Map.add kx x t
| Some v -> t |> Map.remove kx |> Map.add kx (f v x)
module WordCount =
let map_reduce m r =
let map_per_key : Map<'k1, 'v1> -> ('k2 * 'v2) list =
Map.to_list >> List.map (Tuple.uncurry m) >> List.concat
let group_by_key (l:('k2 * 'v2) list) : Map<'k2,'v2 list> =
let insert d (k2, v2) = Map.insert_with (@) (k2) [v2] d
l |> List.fold_left insert Map.empty
let reduce_per_key : Map<'k2, 'v2 list> -> Map<'k2,'v3> =
let unSome k (Some v) = v
let isSome k = function
| Some _ -> true
| None -> false
Map.mapi r >> Map.filter isSome >> Map.mapi unSome
map_per_key >> group_by_key >> reduce_per_key
let word_occurrence_count : Map<string, string> -> Map<string, int> =
let m = const (String.words >> List.map(fun s -> (s, 1)))
let r = const (List.sum >> Some)
map_reduce m r
module MainModule =
[<EntryPoint>]
let main(args:string array) =
printfn "%A"
(WordCount.word_occurrence_count
(Map.empty
|> Map.add "doc1" "appreciate the unfold"
|> Map.add "doc2" "fold the fold"))
0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment