Metaxal/regex-golf-norvig.py

## regex-golf-norvig.py
#!/usr/bin/python
# From: http://nbviewer.ipython.org/url/norvig.com/ipython/xkcd1313.ipynb
import re

def verify(regex, winners, losers):
    "Return true iff the regex matches all winners but no losers."
    missed_winners = {W for W in winners if not re.search(regex, W)}
    matched_losers = {L for L in losers if re.search(regex, L)}
    if missed_winners:
        print "Error: should match but did not:", ', '.join(missed_winners)
    if matched_losers:
        print "Error: should not match but did:", ', '.join(matched_losers)
    return not (missed_winners or matched_losers)

def findregex(winners, losers):
    "Find a regex that matches all winners but no losers (sets of strings)."
    # Make a pool of regex components, then pick from them to cover winners.
    # On each iteration, add the 'best' component to 'solution',
    # remove winners covered by best, and keep in 'pool' only components
    # that still match some winner.
    pool = regex_components(winners, losers)
    solution = []
    def score(r): return 4 * len(matches(r, winners)) - len(r)
    while winners:
        best = max(pool, key=score)
        solution.append(best)
        winners = winners - matches(best, winners)
        pool = {r for r in pool if matches(r, winners)}
    return OR(solution)

def matches(regex, strings):
    "Return a set of all the strings that are matched by regex."
    return {s for s in strings if re.search(regex, s)}

OR = '|'.join # Join a sequence of strings with '|' between them

def regex_components(winners, losers):
    "Return components that match at least one winner, but no loser."
    wholes = {'^'+winner+'$' for winner in winners}
    parts = {d for w in wholes for s in subparts(w) for d in dotify(s)}
    return wholes | {p for p in parts if not matches(p, losers)}

def subparts(word):
    "Return a set of subparts of word, consecutive characters up to length 4."
    return set(word[i:i+n] for i in range(len(word)) for n in (1, 2, 3, 4))

def dotify(part):
    "Return all ways to replace a subset of chars in part with '.'."
    if part == '':
        return {''}
    else:
        return {c+rest for rest in dotify(part[1:])
                for c in replacements(part[0])}

def replacements(char):
    "Return replacement characters for char (char + '.' unless char is special)."
    if (char == '^' or char == '$'):
        return char
    else:
        return char + '.'

def words(text): return set(text.lower().split())

def lines(text): return {line.upper().strip() for line in text.split('/')}

################ Data

winners = words('''washington adams jefferson jefferson madison madison monroe
    monroe adams jackson jackson van-buren harrison polk taylor pierce buchanan
    lincoln lincoln grant grant hayes garfield cleveland harrison cleveland mckinley
    mckinley roosevelt taft wilson wilson harding coolidge hoover roosevelt
    roosevelt roosevelt roosevelt truman eisenhower eisenhower kennedy johnson nixon
    nixon carter reagan reagan bush clinton clinton bush bush obama obama''')

losers = words('''clinton jefferson adams pinckney pinckney clinton king adams
    jackson adams clay van-buren van-buren clay cass scott fremont breckinridge
    mcclellan seymour greeley tilden hancock blaine cleveland harrison bryan bryan
    parker bryan roosevelt hughes cox davis smith hoover landon wilkie dewey dewey
    stevenson stevenson nixon goldwater humphrey mcgovern ford carter mondale
    dukakis bush dole gore kerry mccain romney''')

losers = losers - winners

starwars = lines('''The Phantom Menace / Attack of the Clones / Revenge of the Sith /
    A New Hope / The Empire Strikes Back / Return of the Jedi''')

startrek = lines('''The Wrath of Khan / The Search for Spock / The Voyage Home /
    The Final Frontier / The Undiscovered Country / Generations / First Contact /
    Insurrection / Nemesis''')

def findboth(A, B):
    "Find a regex to match A but not B, and vice-versa.  Print summary."
    for (W, L, legend) in [(A, B, 'A-B'), (B, A, 'B-A')]:
        solution = findregex(W, L)
        assert verify(solution, W, L)
        ratio = len('^(' + OR(W) + ')$') / float(len(solution))
        print '%3d chars, %4.1f ratio, %2d winners %s: %r' % (
            len(solution), ratio , len(W), legend, solution)

################ Tests

def test1():
    assert subparts('^it$') == {'^', 'i', 't', '$', '^i', 'it', 't$', '^it', 'it$', '^it$'}
    assert subparts('this') == {'t', 'h', 'i', 's', 'th', 'hi', 'is', 'thi', 'his', 'this'}
    assert dotify('it') == {'it', 'i.', '.t', '..'}
    assert dotify('^it$') == {'^it$', '^i.$', '^.t$', '^..$'}
    assert dotify('this') == {'this', 'thi.', 'th.s', 'th..', 't.is', 't.i.', 't..s', 't...',
                              '.his', '.hi.', '.h.s', '.h..', '..is', '..i.', '...s', '....'}
    assert replacements('x') == 'x.'
    assert replacements('^') == '^'
    assert replacements('$') == '$'
    assert regex_components({'win'}, {'losers', 'bin', 'won'}) == {
        '^win$', '^win', '^wi.', 'wi.',  'wi', '^wi', 'win$', 'win', 'wi.$'}
    assert regex_components({'win'}, {'losers', 'bin', 'won', 'wine'}) == {
        '^win$', 'win$', 'wi.$'}
    assert matches('a|b|c', {'a', 'b', 'c', 'd', 'e'}) == {'a', 'b', 'c'}
    assert matches('a|b|c', {'any', 'bee', 'succeed', 'dee', 'eee!'}) == {
        'any', 'bee', 'succeed'}
    assert verify('a+b+', {'ab', 'aaabb'}, {'a', 'bee', 'a b'})
    assert findregex({"ahahah", "ciao"},  {"ahaha", "bye"}) == 'a.$'
    assert OR(['a', 'b', 'c']) == 'a|b|c'
    assert OR(['a']) == 'a'
    assert words('This is a TEST this is') == {'this', 'is', 'a', 'test'}
    assert lines('Testing / 1 2 3 / Testing over') == {'TESTING', '1 2 3', 'TESTING OVER'}
    return 'test1 passes'

if __name__ == '__main__':
    print test1()
    findboth(winners, losers)
# python regex-golf.py  1,95s user 0,01s system 99% cpu 1,962 total

## regex-golf-racket.rkt
#lang racket

;;;; Racket translation of: http://nbviewer.ipython.org/url/norvig.com/ipython/xkcd1313.ipynb

(module+ test
  (require rackunit))

;; Faster variant of filter that does not keep the initial element order
(define (filter f list)
  (let loop ([l list] [result null])
    (if (null? l)
        result #;(reverse result) ; do not reverse!
        (loop (cdr l)
              (if (f (car l))
                  (cons (car l) result)
                  result)))))

(define winners-syms
  (remove-duplicates
    '(washington adams jefferson jefferson madison madison monroe
    monroe adams jackson jackson van-buren harrison polk taylor pierce buchanan
    lincoln lincoln grant grant hayes garfield cleveland harrison cleveland mckinley
    mckinley roosevelt taft wilson wilson harding coolidge hoover roosevelt
    roosevelt roosevelt roosevelt truman eisenhower eisenhower kennedy johnson nixon
    nixon carter reagan reagan bush clinton clinton bush bush obama obama)))

(define losers-syms
  (remove-duplicates
    '(jefferson adams pinckney pinckney clinton king adams
    jackson adams clay van-buren van-buren clay cass scott fremont breckinridge
    mcclellan seymour greeley tilden hancock blaine cleveland harrison bryan bryan
    parker bryan roosevelt hughes cox davis smith hoover landon wilkie dewey dewey
    stevenson stevenson nixon goldwater humphrey mcgovern ford carter mondale
    dukakis bush dole gore kerry mccain romney)))

(define winners (map symbol->string winners-syms))
;; Some winners are also losers (sometimes with different first names), so we remove them
(define losers
  (map symbol->string (remove* winners-syms losers-syms)))

;; Return true iff the regex matches all winners but no losers.
(define (verify regex winners losers)
  (define missed-winners (filter (λ(w)(not (regexp-match regex w)))
                                 winners))
  (define matched-losers (filter (λ(l)(regexp-match regex l))
                                 losers))
  (unless (empty? missed-winners)
    (printf "Error: should match but did not: ~a\n" missed-winners))
  (unless (empty? matched-losers)
    (printf "Error: should not match but did: ~a\n" matched-losers))
  (and (empty? missed-winners) (empty? matched-losers)))


;; Finds a regex that matches all winners but no losers (lists of strings).
;; Make a pool of regex components, then pick from them to cover winners.
;; On each iteration, add the 'best' component to 'solution',
;; remove winners covered by best, and keep in 'pool' only components
;; that still match some winner.
(define (find-regex winners losers)
  (define (score r winners)
    (- (* 4 (length (matches r winners)))
       (string-length r)))
  (let loop ([pool (regex-components winners losers)]
             [winners winners]
             [solution '()])
    (if (empty? winners)
        (OR solution)
        (let* ([best (argmax (λ(r)(score r winners)) pool)]
               [winners (remove* (matches best winners) winners)])
          (loop (filter (λ(r)(not (empty? (matches r winners))))
                        pool)
                winners
                (cons best solution))))))

;; Returns a set of all the strings that are matched by rx
(define (matches rx lstr)
  (filter (λ(w)(regexp-match? rx w)) lstr))

;; Joins a sequence of string with "|" betwen them
(define (OR lstr)
  (string-join lstr "|"))

;; Return components that match at least one winner, but no loser.
(define (regex-components winners losers)
  (define wholes (map (λ(s)(string-append "^" s "$")) winners))
  (define parts
    (for*/list ([w (in-list wholes)] [s (in-list (subparts w))] [d (in-list (dotify s))])
      d))
  (remove-duplicates (append wholes (filter (λ(p)(empty? (matches p losers)))
                                            parts))))

;; Returns a set of subparts of word, consecutive characters up to length 4.
(define (subparts word)
  (define len (string-length word))
  (for*/list ([i (in-range len)] [n (in-range (min 4 (- len i)))])
    (substring word i (+ i n 1))))

;; Returns all ways to replace a subset of chars in part with '.'.
(define (dotify part)
  (if (string=? part "")
      (list "")
      (for*/list ([c (in-list (replacements (string-ref part 0)))]
                  [rest (in-list (dotify (substring part 1)))])
        (string-append (string c) rest))))

;; Returns replacement characters for chr (chr + '.' unless chr is special)
(define (replacements chr)
  (if (or (char=? chr #\^) (char=? chr #\$))
      (list chr)
      (list chr #\.)))

(module+ test
  (check set=? (subparts "^it$") '("^" "i" "t" "$" "^i" "it" "t$" "^it" "it$" "^it$"))
  (check set=? (subparts "this") '("t" "h" "i" "s" "th" "hi" "is" "thi" "his" "this"))
  (check set=? (dotify "it") '("it" "i." ".t" ".."))
  (check set=? (dotify "^it$") '("^it$" "^i.$" "^.t$" "^..$"))
  (check set=? (dotify "this") '("this" "thi." "th.s" "th.." "t.is" "t.i." "t..s" "t..." ".his" ".hi." ".h.s" ".h.." "..is" "..i." "...s" "...."))
  (check set=? (replacements #\x) '(#\x #\.))
  (check set=? (replacements #\^) '(#\^))
  (check set=? (replacements #\$) '(#\$))
  (check set=? (regex-components '("win") '("losers" "bin" "won"))
         '("^win$" "^win" "^wi." "wi."  "wi" "^wi" "win$" "win" "wi.$"))
  (check set=? (regex-components '("win") '("losers" "bin" "won" "wine"))
         '("^win$" "win$" "wi.$"))

  (check set=? (matches "a|b|c" '("a" "b" "c" "d" "e"))
                '("a" "b" "c"))
  (check set=? (matches "a|b|c" '("any" "bee" "succeed" "dee" "eee!"))
                '("any" "bee" "succeed"))
  (check-true (verify "a+b+" '("ab" "aaabb") '("a" "bee" "a b")))
  (check-equal? (find-regex '("ahahah" "ciao") '("ahaha" "bye")) "a.$" )
  (check-equal? (OR '("a" "b" "c")) "a|b|c")
  (check-equal? (OR '("a")) "a")
  )

;; Finds a regex to match A but not B, and vice-versa. Prints summary.
(define (find-both A B)
  (for ([W (in-list (list A B))] [L (in-list (list B A))] [legend (in-list '("A-B" "B-A"))])
    (define solution (find-regex W L))
    (when (verify solution W L)
      (define ratio (exact->inexact (/ (string-length (string-append "^(" (OR W) ")$"))
                                       (string-length solution))))
      (printf "~a chars, ~a ratio, ~a winners ~a: ~a\n"
              (string-length solution)
              (~r ratio)
              (length W)
              legend
              solution))))

(module+ main
  (time (find-both winners losers)))
; cpu time: 51633 real time: 51578 gc time: 587

## regex-timing.py
#!/usr/bin/python
import re
import random

lstr_chars = ['a', 'b', 'c', 'd']
lrx_chars = lstr_chars + ['.', '$', '^']

lstr = [ ''.join([random.choice(lstr_chars) for i in range(10)]) for j in range(1000)]
lrx  = [ ''.join([random.choice(lrx_chars) for i in range(4)]) for j in range(1000)]

if __name__ == '__main__':
    #print lstr
    #print lrx
    for s  in lstr:
        for rx in lrx:
            re.search(rx, s)

# 25,78s user 0,01s system 100% cpu 25,772 total

## regex-timing.rkt
#lang racket

(define lstr-chars '(#\a #\b #\c #\d))
(define lrx-chars (list* #\. #\$ #\^ lstr-chars))

(define (choose l)
  (list-ref l (random (length l))))

(define lstr (build-list 1000 (λ(i)(build-string 10 (λ(j)(choose lstr-chars))))))

(define lrx (build-list 1000 (λ(i)(build-string 4 (λ(j)(choose lrx-chars))))))

(void
 (for*/list ([str lstr]
             [rx lrx]
             #:when (regexp-match rx str))
   str))

; 23,23s user 0,07s system 100% cpu 23,274 total
	#!/usr/bin/python
	# From: http://nbviewer.ipython.org/url/norvig.com/ipython/xkcd1313.ipynb
	import re

	def verify(regex, winners, losers):
	"Return true iff the regex matches all winners but no losers."
	missed_winners = {W for W in winners if not re.search(regex, W)}
	matched_losers = {L for L in losers if re.search(regex, L)}
	if missed_winners:
	print "Error: should match but did not:", ', '.join(missed_winners)
	if matched_losers:
	print "Error: should not match but did:", ', '.join(matched_losers)
	return not (missed_winners or matched_losers)

	def findregex(winners, losers):
	"Find a regex that matches all winners but no losers (sets of strings)."
	# Make a pool of regex components, then pick from them to cover winners.
	# On each iteration, add the 'best' component to 'solution',
	# remove winners covered by best, and keep in 'pool' only components
	# that still match some winner.
	pool = regex_components(winners, losers)
	solution = []
	def score(r): return 4 * len(matches(r, winners)) - len(r)
	while winners:
	best = max(pool, key=score)
	solution.append(best)
	winners = winners - matches(best, winners)
	pool = {r for r in pool if matches(r, winners)}
	return OR(solution)

	def matches(regex, strings):
	"Return a set of all the strings that are matched by regex."
	return {s for s in strings if re.search(regex, s)}

	OR = '\|'.join # Join a sequence of strings with '\|' between them

	def regex_components(winners, losers):
	"Return components that match at least one winner, but no loser."
	wholes = {'^'+winner+'$' for winner in winners}
	parts = {d for w in wholes for s in subparts(w) for d in dotify(s)}
	return wholes \| {p for p in parts if not matches(p, losers)}

	def subparts(word):
	"Return a set of subparts of word, consecutive characters up to length 4."
	return set(word[i:i+n] for i in range(len(word)) for n in (1, 2, 3, 4))

	def dotify(part):
	"Return all ways to replace a subset of chars in part with '.'."
	if part == '':
	return {''}
	else:
	return {c+rest for rest in dotify(part[1:])
	for c in replacements(part[0])}

	def replacements(char):
	"Return replacement characters for char (char + '.' unless char is special)."
	if (char == '^' or char == '$'):
	return char
	else:
	return char + '.'

	def words(text): return set(text.lower().split())

	def lines(text): return {line.upper().strip() for line in text.split('/')}

	################ Data

	winners = words('''washington adams jefferson jefferson madison madison monroe
	monroe adams jackson jackson van-buren harrison polk taylor pierce buchanan
	lincoln lincoln grant grant hayes garfield cleveland harrison cleveland mckinley
	mckinley roosevelt taft wilson wilson harding coolidge hoover roosevelt
	roosevelt roosevelt roosevelt truman eisenhower eisenhower kennedy johnson nixon
	nixon carter reagan reagan bush clinton clinton bush bush obama obama''')

	losers = words('''clinton jefferson adams pinckney pinckney clinton king adams
	jackson adams clay van-buren van-buren clay cass scott fremont breckinridge
	mcclellan seymour greeley tilden hancock blaine cleveland harrison bryan bryan
	parker bryan roosevelt hughes cox davis smith hoover landon wilkie dewey dewey
	stevenson stevenson nixon goldwater humphrey mcgovern ford carter mondale
	dukakis bush dole gore kerry mccain romney''')

	losers = losers - winners

	starwars = lines('''The Phantom Menace / Attack of the Clones / Revenge of the Sith /
	A New Hope / The Empire Strikes Back / Return of the Jedi''')

	startrek = lines('''The Wrath of Khan / The Search for Spock / The Voyage Home /
	The Final Frontier / The Undiscovered Country / Generations / First Contact /
	Insurrection / Nemesis''')

	def findboth(A, B):
	"Find a regex to match A but not B, and vice-versa. Print summary."
	for (W, L, legend) in [(A, B, 'A-B'), (B, A, 'B-A')]:
	solution = findregex(W, L)
	assert verify(solution, W, L)
	ratio = len('^(' + OR(W) + ')$') / float(len(solution))
	print '%3d chars, %4.1f ratio, %2d winners %s: %r' % (
	len(solution), ratio , len(W), legend, solution)

	################ Tests

	def test1():
	assert subparts('^it$') == {'^', 'i', 't', '$', '^i', 'it', 't$', '^it', 'it$', '^it$'}
	assert subparts('this') == {'t', 'h', 'i', 's', 'th', 'hi', 'is', 'thi', 'his', 'this'}
	assert dotify('it') == {'it', 'i.', '.t', '..'}
	assert dotify('^it$') == {'^it$', '^i.$', '^.t$', '^..$'}
	assert dotify('this') == {'this', 'thi.', 'th.s', 'th..', 't.is', 't.i.', 't..s', 't...',
	'.his', '.hi.', '.h.s', '.h..', '..is', '..i.', '...s', '....'}
	assert replacements('x') == 'x.'
	assert replacements('^') == '^'
	assert replacements('$') == '$'
	assert regex_components({'win'}, {'losers', 'bin', 'won'}) == {
	'^win$', '^win', '^wi.', 'wi.', 'wi', '^wi', 'win$', 'win', 'wi.$'}
	assert regex_components({'win'}, {'losers', 'bin', 'won', 'wine'}) == {
	'^win$', 'win$', 'wi.$'}
	assert matches('a\|b\|c', {'a', 'b', 'c', 'd', 'e'}) == {'a', 'b', 'c'}
	assert matches('a\|b\|c', {'any', 'bee', 'succeed', 'dee', 'eee!'}) == {
	'any', 'bee', 'succeed'}
	assert verify('a+b+', {'ab', 'aaabb'}, {'a', 'bee', 'a b'})
	assert findregex({"ahahah", "ciao"}, {"ahaha", "bye"}) == 'a.$'
	assert OR(['a', 'b', 'c']) == 'a\|b\|c'
	assert OR(['a']) == 'a'
	assert words('This is a TEST this is') == {'this', 'is', 'a', 'test'}
	assert lines('Testing / 1 2 3 / Testing over') == {'TESTING', '1 2 3', 'TESTING OVER'}
	return 'test1 passes'

	if __name__ == '__main__':
	print test1()
	findboth(winners, losers)
	# python regex-golf.py 1,95s user 0,01s system 99% cpu 1,962 total
	#lang racket

	;;;; Racket translation of: http://nbviewer.ipython.org/url/norvig.com/ipython/xkcd1313.ipynb

	(module+ test
	(require rackunit))

	;; Faster variant of filter that does not keep the initial element order
	(define (filter f list)
	(let loop ([l list] [result null])
	(if (null? l)
	result #;(reverse result) ; do not reverse!
	(loop (cdr l)
	(if (f (car l))
	(cons (car l) result)
	result)))))

	(define winners-syms
	(remove-duplicates
	'(washington adams jefferson jefferson madison madison monroe
	monroe adams jackson jackson van-buren harrison polk taylor pierce buchanan
	lincoln lincoln grant grant hayes garfield cleveland harrison cleveland mckinley
	mckinley roosevelt taft wilson wilson harding coolidge hoover roosevelt
	roosevelt roosevelt roosevelt truman eisenhower eisenhower kennedy johnson nixon
	nixon carter reagan reagan bush clinton clinton bush bush obama obama)))

	(define losers-syms
	(remove-duplicates
	'(jefferson adams pinckney pinckney clinton king adams
	jackson adams clay van-buren van-buren clay cass scott fremont breckinridge
	mcclellan seymour greeley tilden hancock blaine cleveland harrison bryan bryan
	parker bryan roosevelt hughes cox davis smith hoover landon wilkie dewey dewey
	stevenson stevenson nixon goldwater humphrey mcgovern ford carter mondale
	dukakis bush dole gore kerry mccain romney)))

	(define winners (map symbol->string winners-syms))
	;; Some winners are also losers (sometimes with different first names), so we remove them
	(define losers
	(map symbol->string (remove* winners-syms losers-syms)))

	;; Return true iff the regex matches all winners but no losers.
	(define (verify regex winners losers)
	(define missed-winners (filter (λ(w)(not (regexp-match regex w)))
	winners))
	(define matched-losers (filter (λ(l)(regexp-match regex l))
	losers))
	(unless (empty? missed-winners)
	(printf "Error: should match but did not: ~a\n" missed-winners))
	(unless (empty? matched-losers)
	(printf "Error: should not match but did: ~a\n" matched-losers))
	(and (empty? missed-winners) (empty? matched-losers)))


	;; Finds a regex that matches all winners but no losers (lists of strings).
	;; Make a pool of regex components, then pick from them to cover winners.
	;; On each iteration, add the 'best' component to 'solution',
	;; remove winners covered by best, and keep in 'pool' only components
	;; that still match some winner.
	(define (find-regex winners losers)
	(define (score r winners)
	(- (* 4 (length (matches r winners)))
	(string-length r)))
	(let loop ([pool (regex-components winners losers)]
	[winners winners]
	[solution '()])
	(if (empty? winners)
	(OR solution)
	(let* ([best (argmax (λ(r)(score r winners)) pool)]
	[winners (remove* (matches best winners) winners)])
	(loop (filter (λ(r)(not (empty? (matches r winners))))
	pool)
	winners
	(cons best solution))))))

	;; Returns a set of all the strings that are matched by rx
	(define (matches rx lstr)
	(filter (λ(w)(regexp-match? rx w)) lstr))

	;; Joins a sequence of string with "\|" betwen them
	(define (OR lstr)
	(string-join lstr "\|"))

	;; Return components that match at least one winner, but no loser.
	(define (regex-components winners losers)
	(define wholes (map (λ(s)(string-append "^" s "$")) winners))
	(define parts
	(for*/list ([w (in-list wholes)] [s (in-list (subparts w))] [d (in-list (dotify s))])
	d))
	(remove-duplicates (append wholes (filter (λ(p)(empty? (matches p losers)))
	parts))))

	;; Returns a set of subparts of word, consecutive characters up to length 4.
	(define (subparts word)
	(define len (string-length word))
	(for*/list ([i (in-range len)] [n (in-range (min 4 (- len i)))])
	(substring word i (+ i n 1))))

	;; Returns all ways to replace a subset of chars in part with '.'.
	(define (dotify part)
	(if (string=? part "")
	(list "")
	(for*/list ([c (in-list (replacements (string-ref part 0)))]
	[rest (in-list (dotify (substring part 1)))])
	(string-append (string c) rest))))

	;; Returns replacement characters for chr (chr + '.' unless chr is special)
	(define (replacements chr)
	(if (or (char=? chr #\^) (char=? chr #\$))
	(list chr)
	(list chr #\.)))

	(module+ test
	(check set=? (subparts "^it$") '("^" "i" "t" "$" "^i" "it" "t$" "^it" "it$" "^it$"))
	(check set=? (subparts "this") '("t" "h" "i" "s" "th" "hi" "is" "thi" "his" "this"))
	(check set=? (dotify "it") '("it" "i." ".t" ".."))
	(check set=? (dotify "^it$") '("^it$" "^i.$" "^.t$" "^..$"))
	(check set=? (dotify "this") '("this" "thi." "th.s" "th.." "t.is" "t.i." "t..s" "t..." ".his" ".hi." ".h.s" ".h.." "..is" "..i." "...s" "...."))
	(check set=? (replacements #\x) '(#\x #\.))
	(check set=? (replacements #\^) '(#\^))
	(check set=? (replacements #\$) '(#\$))
	(check set=? (regex-components '("win") '("losers" "bin" "won"))
	'("^win$" "^win" "^wi." "wi." "wi" "^wi" "win$" "win" "wi.$"))
	(check set=? (regex-components '("win") '("losers" "bin" "won" "wine"))
	'("^win$" "win$" "wi.$"))

	(check set=? (matches "a\|b\|c" '("a" "b" "c" "d" "e"))
	'("a" "b" "c"))
	(check set=? (matches "a\|b\|c" '("any" "bee" "succeed" "dee" "eee!"))
	'("any" "bee" "succeed"))
	(check-true (verify "a+b+" '("ab" "aaabb") '("a" "bee" "a b")))
	(check-equal? (find-regex '("ahahah" "ciao") '("ahaha" "bye")) "a.$" )
	(check-equal? (OR '("a" "b" "c")) "a\|b\|c")
	(check-equal? (OR '("a")) "a")
	)

	;; Finds a regex to match A but not B, and vice-versa. Prints summary.
	(define (find-both A B)
	(for ([W (in-list (list A B))] [L (in-list (list B A))] [legend (in-list '("A-B" "B-A"))])
	(define solution (find-regex W L))
	(when (verify solution W L)
	(define ratio (exact->inexact (/ (string-length (string-append "^(" (OR W) ")$"))
	(string-length solution))))
	(printf "~a chars, ~a ratio, ~a winners ~a: ~a\n"
	(string-length solution)
	(~r ratio)
	(length W)
	legend
	solution))))

	(module+ main
	(time (find-both winners losers)))
	; cpu time: 51633 real time: 51578 gc time: 587
	#!/usr/bin/python
	import re
	import random

	lstr_chars = ['a', 'b', 'c', 'd']
	lrx_chars = lstr_chars + ['.', '$', '^']

	lstr = [ ''.join([random.choice(lstr_chars) for i in range(10)]) for j in range(1000)]
	lrx = [ ''.join([random.choice(lrx_chars) for i in range(4)]) for j in range(1000)]

	if __name__ == '__main__':
	#print lstr
	#print lrx
	for s in lstr:
	for rx in lrx:
	re.search(rx, s)

	# 25,78s user 0,01s system 100% cpu 25,772 total
	#lang racket

	(define lstr-chars '(#\a #\b #\c #\d))
	(define lrx-chars (list* #\. #\$ #\^ lstr-chars))

	(define (choose l)
	(list-ref l (random (length l))))

	(define lstr (build-list 1000 (λ(i)(build-string 10 (λ(j)(choose lstr-chars))))))

	(define lrx (build-list 1000 (λ(i)(build-string 4 (λ(j)(choose lrx-chars))))))

	(void
	(for*/list ([str lstr]
	[rx lrx]
	#:when (regexp-match rx str))
	str))

	; 23,23s user 0,07s system 100% cpu 23,274 total