Skip to content

Instantly share code, notes, and snippets.

@erikprice
Created January 28, 2010 23:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save erikprice/289274 to your computer and use it in GitHub Desktop.
Save erikprice/289274 to your computer and use it in GitHub Desktop.
(ns datasifter)
;; Write a data sifter, sift, that partitions a string into a list of lists.
;; Start with the case of using letters as a delimiter, and numbers as data.
;; There can be any number of repetitions of numbers & letters.
;;
;; user=>(sift "a1b2cd34")
;; (("a" ("1")) ("b" ("2")) ("c" ()) ("d" ("3" "4")))
;;
;; from http://fulldisclojure.blogspot.com/2010/01/code-kata-data-sifter.html
(defn is-delimiter?
"Returns true if d represents a delimiter."
[d]
(condp = (type d)
java.lang.Character (Character/isLetter d)
java.lang.String (Character/isLetter (first d))
false))
(defn split-delimiter
"Splits s into two sequences: a delimiter char (or empty string if s does
not lead off with a delimiter); and the rest of s."
[s]
(split-at (if (is-delimiter? (first s)) 1 0) s))
(defn split-data
"Splits s into two sequences: the leading characters up to the first
delimiter; and the rest of s."
[s]
(split-with #(not (is-delimiter? %)) s))
(defn make-chunk
"Returns a sequence whose first element is delim and whose second element is
the list of characters in data."
[delim data]
(list (if (empty? delim) '() (str (first delim)))
(apply list (map str data))))
(defn split-chunk
"Returns a two-element sequence consisting of the first chunk (see make-chunk)
in s, and the rest of s."
[s]
(let [[delimiter delimited] (split-delimiter s)
[data remaining] (split-data delimited)]
(vector (make-chunk delimiter data) remaining)))
(defn normalize
"Converts strings-or-chars to a char. There should be a better way."
[strings-or-chars]
(map #(if (= (type %) java.lang.String)
(first %)
%)
strings-or-chars))
(defn sift
"Returns a list of chunks in s, delimited by strings or characters that
are letters. pred may be supplied to change the definition of
a delimiter."
([s]
(loop [chunks '()
s (normalize s)]
(if (empty? s)
(reverse chunks)
(let [[chunk remaining] (split-chunk s)]
(recur (conj chunks chunk) remaining)))))
([pred s]
(binding [is-delimiter? pred]
(sift s))))
(def *test-str* "a1b2cd34")
(def *test-list* '("a" "1" "b" "2" "c" "d" "3" "4"))
(def *test-vector* ["a" "1" "b" "2" "c" "d" "3" "4"])
(sift *test-str*)
(sift *test-list*)
(sift *test-vector*)
(sift #(= \c %) *test-vector*)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment