Skip to content

Instantly share code, notes, and snippets.

@pharpend
Last active October 6, 2019 02:53
Show Gist options
  • Save pharpend/c6fe10486092fb9e61161ec4284859c4 to your computer and use it in GitHub Desktop.
Save pharpend/c6fe10486092fb9e61161ec4284859c4 to your computer and use it in GitHub Desktop.
Regex checker

This document is available at https://gist.github.com/pharpend/c6fe10486092fb9e61161ec4284859c4

Caveats

  • This only works on UNIX-like systems (Macintosh, Linux, BSD, etc)
  • This procedure assumes you have a lot of spare hard disk space (>1GB) and a reasonably fast computer.
  • Failing these critera, do not attempt this procedure!

How to install this program (UNIX-like systems only)

  1. Install the Haskell stack

  2. Download all of these files into one directory

  3. Make part1.hs executable:

    $ chmod +x part1.hs
    

How to use this program

Run

$ ./part1.hs

The program stack will take care of installing the correct version of ghc (Haskell compiler) and the necessary libraries. It will take some time to run the first time, but will subsequently be faster.

module Combinators where
import Control.Monad (forM_)
import Test.Hspec
import Test.QuickCheck
import Text.RE.PCRE
alphabetical :: Gen Char
alphabetical = elements $ ['A'..'Z'] ++ ['a'..'z']
lower :: Gen Char
lower = elements ['a'..'z']
mkSpecificTests :: [String] -> [String] -> RE -> Spec
mkSpecificTests positives negatives myRegex =
describe "specific tests" $ do
describe "positives" $ do
forM_ positives $ \str -> do
it ("matches " ++ str) $
(str ?=~ myRegex) `shouldSatisfy` matched
describe "negatives" $ do
forM_ negatives $ \str -> do
it ("fails " ++ str) $
(str ?=~ myRegex) `shouldNotSatisfy` matched
repeatedWord :: Gen String
repeatedWord = do
w <- word
n <- arbitrary `suchThat` (>= 1) :: Gen Int
return $ foldl (\accumulator _ -> accumulator ++ " " ++ w) w [1 .. n]
word :: Gen String
word = listOf1 alphabetical
-- wordChar :: Gen Char
-- wordChar = elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "_"
#!/usr/bin/env stack
{- stack script
--resolver lts-14.6
--package hspec
--package QuickCheck
--package regex
--package regex-with-pcre
-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Main where
import Combinators
import PositiveExamples
import Control.Monad (forM_)
import Test.Hspec
import Test.QuickCheck
import Text.RE.PCRE
import Text.Printf
main :: IO ()
main =
hspec $ do
partATests
partBTests
partCTests
partDTests
partETests
partATests :: Spec
partATests =
describe ("Part A: " ++ reSource regexPartA) $ do
describe "randomized tests" $ do
it "matches positive examples" $
property $ \(AlphabeticString str) -> matched $ str ?=~ regexPartA
-- xit "fails negative examples" $ pending
let positives =
[ "abcdef"
, "foobar"
, "AsLongAsEverythingIsAlphabetical"
, "KittGroupLinguistics"
]
negatives =
[ "1234"
, ".-=[]"
, "\\%@!~~~"
, "abc123def"
, "the quick brown fox jumps over the sleazy dog"
]
mkSpecificTests positives negatives regexPartA
partBTests :: Spec
partBTests =
describe ("Part B: " ++ reSource regexPartB) $ do
describe "randomized tests" $ do
it "matches positive examples" $
property $ \(LowercaseEndWithB str) -> matched $ str ?=~ regexPartB
-- xit "fails negative examples" $ pending
let positives = ["b", "weionxb", "swnzqqqb"]
negatives =
[ ""
, "Hello"
, "hi"
, "this string has spaces"
, "HasCapitalsb"
, "endswithcapitalB"
]
myRegex = regexPartB
mkSpecificTests positives negatives myRegex
partCTests :: Spec
partCTests =
describe ("Part C: " ++ reSource regexPartC) $ do
describe "randomized tests" $ do
it "matches positive examples" $
property $ \(ConsecutiveRepeat str) -> matched $ str ?=~ regexPartC
-- xit "fails negative examples" $ pending
let positives =
[ "This this regex is not case sensitive"
, "this this"
, "that x x y y y z z"
, "pooxz asazz zzpxx zzpxx zzpxx yy11dd"
]
negatives =
[ "this string has no repeated consecutive words."
, ""
, "asd;fijzxi"
, "this string has this string has a multi-word repeating sequence"
]
myRegex = regexPartC
mkSpecificTests positives negatives myRegex
partDTests :: Spec
partDTests =
describe ("Part D: " ++ reSource regexPartD) $ do
-- describe "randomized tests" $ do
-- xit "matches positive examples" $ do
-- property $ \(BothCaveAndSpider str) -> matched $ str ?=~ regexPartD
-- xit "fails negative examples" $ pending
let positives =
[ "there is a spider in the cave"
, "cave spider"
, "spider cave"
, "asj8nvn cave aj;oefi;j; 48821 spider"
]
negatives =
[ "spiders and caves"
, "the quick brown fox"
, ""
, "cave"
, "spider"
, "spider but not caves"
, "cave but not spiders"
, "spiderman is in a cave"
, "PrefixForcave spider"
, "cave prefixedspider"
]
myRegex = regexPartD
mkSpecificTests positives negatives myRegex
partETests :: Spec
partETests =
describe ("Part E: " ++ reSource regexPartE) $ do
describe "randomized tests" $ do
it "matches random positive examples" $
property $ \(NorthDakotaPhone str) -> matched $ str ?=~ regexPartE
-- xit "fails random negative examples" $ pending
describe "specific tests" $ do
describe "positive examples" $ do
let positives =
[ "+17010987654"
, "17010987654"
, "1-701-098-7654"
, "+1 (701) 098 7654"
, "1-(701).098 7654"
]
in forM_ positives $ \numberString -> do
it ("matches " ++ numberString) $
(numberString ?=~ regexPartE) `shouldSatisfy` matched
describe "negative examples" $ do
let negatives =
[ "+18010987654"
, "7010987654"
, "1-701-098-654"
, "the quick brown fox jumps over the lazy dog"
, "1***(701).098 7654"
]
in forM_ negatives $ \numberString -> do
it ("fails " ++ numberString) $
(numberString ?=~ regexPartE) `shouldNotSatisfy` matched
regexPartA :: RE
regexPartA = [re|^[A-Za-z]+$|]
regexPartB :: RE
regexPartB = [re|^[a-z]*b$|]
regexPartC :: RE
regexPartC = [reMultilineInsensitive|\b([a-z]+)\s+(\1\s*)+|]
regexPartD :: RE
regexPartD = [re|(?=.*\bcave\b)(?=.*\bspider\b)|]
regexPartE :: RE
regexPartE = [re|\+?1[.\- ]*(701|\(701\))[.\- ]*[0-9]{3}[.\- ]*[0-9]{4}|]
-- regexPartE = [re|\+?1( \(|-|\.)*701(\) |-|.)*[0-9]{3}( |-|.)*[0-9]{4}|]
module PositiveExamples where
import Combinators
import Test.QuickCheck
newtype PartA = AlphabeticString String
deriving (Show, Eq)
instance Arbitrary PartA where
arbitrary = AlphabeticString <$> listOf1 alphabetical
newtype PartB = LowercaseEndWithB String
deriving (Show, Eq)
instance Arbitrary PartB where
arbitrary = LowercaseEndWithB . (++ "b") <$> listOf lower
newtype PartC = ConsecutiveRepeat String
deriving (Show, Eq)
instance Arbitrary PartC where
arbitrary = ConsecutiveRepeat <$> repeatedWord
newtype PartD = BothCaveAndSpider String
deriving (Show, Eq)
instance Arbitrary PartD where
arbitrary = undefined
newtype PartE = NorthDakotaPhone String
deriving (Show, Eq)
instance Arbitrary PartE where
arbitrary = do
plusYesNo <- arbitrary :: Gen Bool
parensYesNo <- arbitrary :: Gen Bool
spaces1 <- listOf $ elements seps
spaces2 <- listOf $ elements seps
threeDigits <- vectorOf 3 (elements digit)
spaces3 <- listOf $ elements seps
fourDigits <- vectorOf 4 (elements digit)
let plusStr =
if plusYesNo
then "+"
else ""
areaCodeStr =
if parensYesNo
then "(701)"
else "701"
result =
mconcat
[ plusStr
, "1"
, spaces1
, areaCodeStr
, spaces2
, threeDigits
, spaces3
, fourDigits
]
return $ NorthDakotaPhone result
where
digit = ['0' .. '9']
seps = " -."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment