Skip to content

Instantly share code, notes, and snippets.

@mneedham
Created December 17, 2009 14:01
Show Gist options
  • Save mneedham/258766 to your computer and use it in GitHub Desktop.
Save mneedham/258766 to your computer and use it in GitHub Desktop.
#light
open System
open System.IO
open System.Text.RegularExpressions
let (|File|Directory|) path = if(Directory.Exists path) then Directory(path) else File(path)
let getFileSystemEntries path = Directory.GetFileSystemEntries path |> Array.to_list
let files path =
let rec inner fileSystemEntries files =
match fileSystemEntries with
| [] -> files
| File path :: rest -> inner rest (path :: files)
| Directory path :: rest -> inner (List.append rest (getFileSystemEntries path)) files
inner (getFileSystemEntries path) []
let downloadFile path = (new StreamReader(File.OpenRead path)).ReadToEnd()
let words input= Regex.Matches(input, "\w+") |> Seq.cast |> Seq.map (fun (x:Match) -> x.Value.ToLower())
let wordCount = files >>
List.map downloadFile >>
List.map words >>
List.fold (fun acc x -> Seq.append acc x) Seq.empty >>
Seq.groupBy (fun x -> x) >>
Seq.map (fun (value, sequence) -> (value, Seq.length sequence))
let writeTo (path:string) (values:seq<string * int>) =
use writer = new StreamWriter(path)
values |> Seq.iter (fun (value,count) -> writer.WriteLine(value + " " + count.ToString()))
let startTime = DateTime.Now
let count = wordCount "Z:\\20_newsgroups\\alt.atheism"
printfn "Writing counts in alphabetical order"
count |> Seq.sort |> writeTo "C:\\results\\counts-alphabetical-fsharp.txt"
printfn "Writing counts in descending order"
count |> Seq.sortBy (fun (_, count) -> count * -1) |> writeTo "C:\\results\\counts-descending-fsharp.txt"
let endTime = DateTime.Now
printfn "Finished in: %d seconds" (endTime - startTime).Seconds
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment