Skip to content

Instantly share code, notes, and snippets.

@studoot
Last active December 1, 2016 10:47
Show Gist options
  • Save studoot/f883c041cf6aaeaa1a752114963c16b7 to your computer and use it in GitHub Desktop.
Save studoot/f883c041cf6aaeaa1a752114963c16b7 to your computer and use it in GitHub Desktop.
Bad word filter (challenge at https://www.codeproject.com/questions/1157796/coding-challenge-bad-word-filter) implementation in F#
open Printf
open System
open System.Diagnostics
open System.Text
open System.Text.RegularExpressions
type System.String with
member s.SplitAt(index) =
let lengthToUse = min index s.Length
(s.Substring(0, lengthToUse), s.Substring(lengthToUse))
member s.SplitOff(numCharsFromRight) =
let splitIndex = max 0 (s.Length - numCharsFromRight)
s.SplitAt(splitIndex)
// Define an active pattern to match a string starting with a prefix and return the remnant of the string
let (|Prefix|_|) (ignoreCase:bool) (prefix:string) (s:string) =
let (maybePrefix, _) as split = s.SplitAt(prefix.Length)
match String.Compare(maybePrefix, prefix, ignoreCase) with
| 0 -> Some(split)
| _ -> None
// Define an active pattern to match a string ending with a suffix and return the remnant of the string
let (|Suffix|_|) (ignoreCase:bool) (suffix:string) (s:string) =
let (_, maybeSuffix) as split = s.SplitOff(suffix.Length)
match String.Compare(maybeSuffix, suffix, ignoreCase) with
| 0 -> Some(split)
| _ -> None
// Adjust case of a good word to reflect how the bad word's been used
let AdjustCase (suffix:string) (badBit:string) (goodWord:string) (ignoreCase:bool) =
// If the bad pattern respects case, just return the good word
if not ignoreCase then
goodWord
// If the bad use is UPPER CASE, return UPPER CASE
else if badBit = suffix.ToUpper() then
goodWord.ToUpper()
// If the bad use is lower case, return lower case
else if badBit = suffix.ToLower() then
goodWord.ToLower()
// If the first character of the bad use is UPPER CASE, return UPPER CASE
else if badBit.[0] = Char.ToUpper(suffix.[0]) then
Char.ToUpper(goodWord.[0]).ToString() + goodWord.Substring(1)
// No case adjustment needed - just return the string
else
goodWord
// Create a filter (a closure of string -> string) from a filter spec (pair of strings,
// first = bad word, second = replacement). Applying the closure to a string will sanitise it.
let createFilter (spec:string*string) =
let rec createFilterHelper (badWord:string) (goodWord:string) (ignoreCase:bool) =
match badWord with
// Case sensitive?
| Suffix true "!" (prefix, _) -> createFilterHelper prefix goodWord false
// badWord is a prefix (<string>*)
| Suffix true "*" (prefix, _) ->
fun (word:string) ->
match word with
| Prefix ignoreCase prefix (badBit, restOfWord) ->
Some ((AdjustCase prefix badBit goodWord ignoreCase) + restOfWord)
| anyOtherWord -> None
// badWord is a sufffix (*<string>)
| Prefix true "*" (_, suffix) ->
fun (word:string) ->
match word with
| Suffix ignoreCase suffix (restOfWord, badBit) ->
Some (restOfWord + (AdjustCase suffix badBit goodWord ignoreCase))
| anyOtherWord -> None
// badWord is fixed
| anyOtherWord ->
fun (word:string) ->
match String.Compare(word, badWord, ignoreCase) with
| 0 -> Some (AdjustCase badWord word goodWord ignoreCase)
| _ -> None
// Invoke createFilterHelper, adding in the case indication parameter
let (badPattern, goodWord) = spec
createFilterHelper badPattern goodWord true
// Define a null filter as a terminator for a list of filters so we always get a succeeding filter
let nullFilter word = Some word
// Create a filter list from a spec list
let createFilters specs = (specs |> List.map createFilter) @ [nullFilter]
// Apply a sequence of filters to a word
let filterAWord (filters:(string -> string option) seq) (word:string) =
filters
|> Seq.map (fun filter -> filter word)
|> Seq.find Option.isSome
|> Option.get
// Apply a sequence of filters to each word in a string
let filterWords filters text = Regex.Replace(text, "\w+", MatchEvaluator(fun regexMatch -> filterAWord filters regexMatch.Value))
// These are my test filter specs
let filterSpecs = [ ("poop*", "p**p"); ("PHB!", "boss") ; ("gotten", "become") ]
// And my test filters
let compiledFilters = createFilters filterSpecs
let tests =
printfn "Poophead -> P**phead = %b" ((filterAWord compiledFilters "Poophead") = "P**phead")
printfn "PHB -> boss = %b" ((filterAWord compiledFilters "PHB") = "boss")
printfn "Phb -> Phb = %b" ((filterAWord compiledFilters "Phb") = "Phb")
printfn "gotten -> become = %b" ((filterAWord compiledFilters "gotten") = "become")
printfn "{long string} = %b" ((filterWords compiledFilters "My PHB has started his new blog phblog.com. He's SUCH A MISBEGOTTEN POOPHEAD!") = "My boss has started his new blog phblog.com. He's SUCH A MISBEGOTTEN P**PHEAD!")
////////////////////////////////////////////////////////////////////////////////
//
// Benchmarking code...
//
let badMessage1 = "My PHB is such a poophead. It's gotten worse since his promotion"
let badMessage2 = "My PHB has started his new blog phblog.com. He's SUCH A MISBEGOTTEN POOPHEAD!"
let FsFilterSpecs = [ ("poop*", "p**p"); ("PHB!", "boss") ; ("gotten", "become") ]
let FsFilters = createFilters FsFilterSpecs
// Tests that compile the filters every time they're run
let FsSolutionBasic _ = filterWords (createFilters FsFilterSpecs) badMessage1
let FsSolutionBonus _ = filterWords (createFilters FsFilterSpecs) badMessage1
// Tests using pre-compiled filters
let PreCompiledFsSolutionBasic _ = filterWords FsFilters badMessage1
let PreCompiledFsSolutionBonus _ = filterWords FsFilters badMessage1
// Function to measure execution time of a function with type () -> 'a
let measureTime codeToMeasure =
let stopwatch = new Stopwatch();
stopwatch.Start()
ignore <| codeToMeasure ()
stopwatch.Stop()
stopwatch.Elapsed
// Write out a summary of all tests containing `keyword`
let SummaryForKeyword keyword (timings:Map<string,List<TimeSpan>>) =
let sb = new StringBuilder()
bprintf sb "%s Test\n\n" keyword
Map.toSeq timings
|> Seq.filter (fun (k, _) -> k.Contains(keyword))
|> Seq.map (
fun (k, v) ->
let ms = v |> Seq.map (fun ts -> ts.TotalMilliseconds)
sprintf " %s : MIN: %g ms | MAX %g ms | AVG %g ms" k (Seq.min ms) (Seq.max ms) (Seq.average ms)
)
|> Seq.fold (fun (sb:StringBuilder) s -> sb.AppendLine(s)) sb
|> fun sb -> sb.ToString()
let Summary iterations totalTests timings =
let summaryTemplate:Printf.TextWriterFormat<_> = """
Statistics
----------
Tests run: %d
Iterations/test: %d
-----------------------------------------------------------------------------
%s
%s
"""
printfn summaryTemplate totalTests iterations (SummaryForKeyword "Basic" timings) (SummaryForKeyword "Bonus" timings)
// Test a solution
let TestSolution id testToRun iterations totalTests =
ignore <| testToRun () // running once seems to reduce margin of diffrene in tests...
printfn "%s Test" id
// Create a list of elapsed times, one for each test run
let allMeasuredTimes =
List.init totalTests (fun _ ->
// Measure an `iteration` count loop of the solution under test
let elapsed = measureTime (fun _ ->
for i = 1 to iterations do
ignore <| testToRun ())
printfn " * timing > %g" elapsed.TotalMilliseconds
elapsed)
(id, allMeasuredTimes)
let TestRun iterations totalTests =
Map.ofList [
TestSolution "FsSolution - Basic" FsSolutionBasic iterations totalTests;
TestSolution "FsSolution - Bonus" FsSolutionBonus iterations totalTests;
TestSolution "PreCompiledFsSolution - Basic" PreCompiledFsSolutionBasic iterations totalTests;
TestSolution "PreCompiledFsSolution - Bonus" PreCompiledFsSolutionBonus iterations totalTests]
|> Summary iterations totalTests
[<EntryPoint>]
let main argv : int =
TestRun 100 10
TestRun 1000 10
Console.WriteLine("-- Press any key to exit --");
Console.ReadKey() |> ignore
0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment