Returns a vector of [start end] offsets for substrings in s.
(defn substring-offsets | |
"Returns a seq of [start end] offsets for substrings in s." | |
[s substring] | |
(when-not (empty? substring) | |
(let [s (.toLowerCase s) | |
substring (.toLowerCase substring) | |
s-length (count s) | |
substring-length (count substring)] | |
(loop [offset 0 | |
offsets nil] | |
(let [substring-start-offset (.indexOf s substring offset)] | |
(if (= -1 substring-start-offset) | |
offsets | |
(let [substring-end-offset (+ substring-start-offset substring-length) | |
bounded-substring-end-offset (min substring-end-offset s-length) | |
substring-from-offsets (subs s | |
substring-start-offset | |
bounded-substring-end-offset) | |
beginning-of-string? (= 0 substring-start-offset) | |
end-of-string? (= s-length substring-end-offset) | |
char-before-substring (when-not beginning-of-string? | |
(subs s | |
(- substring-start-offset 1) | |
substring-start-offset)) | |
char-after-substring (when-not end-of-string? | |
(subs s | |
bounded-substring-end-offset | |
(+ 1 bounded-substring-end-offset)))] | |
(if (and (or beginning-of-string? (delimiter? char-before-substring)) | |
(or end-of-string? (delimiter? char-after-substring)) | |
(= substring substring-from-offsets)) | |
(recur substring-end-offset | |
(conj offsets | |
[substring-start-offset bounded-substring-end-offset])) | |
offsets)))))))) | |
(substring-offsets "The quick brown fox jumps over the lazy dog" "the") | |
;; => ([0 3] [31 34]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment