Skip to content

Instantly share code, notes, and snippets.

What would you like to do?

Balanced Paren Clusters

Given a string of balanced parentheses, split it up into top-level "clusters".


(clusters "") ;=> []
(clusters "()") ;=> ["()"]
(clusters "(())") ;=> ["(())"]
(clusters "()()()") ;=> ["()" "()" "()"]
(clusters "(()())()(())") ;=> ["(()())" "()" "(())"]

You can assume the strings will only contain \( and \) and will be fully balanced.


  1. Handle unbalanced parens with a nice error message showing the relevant positions.
  2. Handle other braces, such as {} and [].

Thanks to this site for the problem idea, where it is rated Very Hard in Python. The problem has been modified.

Please submit your solutions as comments on this gist.

To subscribe:

Copy link

steffan-westcott commented Jun 21, 2021

(def brackets {\( \), \{ \}, \[ \], \< \>})
(def opening-brackets (set (keys brackets)))
(def closing-brackets (set (vals brackets)))

(defn opening-bracket? [ch]
  (opening-brackets ch))

(defn closing-bracket? [ch]
  (closing-brackets ch))

(defn matching-brackets? [left right]
  (= (get brackets left) right))

(defn clusters [s]
  (loop [result [] lefts [] i 0]
    (let [left (peek lefts)]
      (if (< i (count s))
        (let [ch (nth s i)]
            (opening-bracket? ch)
            (recur result (conj lefts i) (inc i))

            (not (closing-bracket? ch))
            (recur result lefts (inc i))

            (empty? lefts)
            {:error {:type :no-opening-char, :index i}}

            (not (matching-brackets? (nth s left) ch))
            {:error {:type :bracket-mismatch, :left-index left, :right-index i}}

            (= 1 (count lefts))
            (recur (conj result (subs s left (inc i))) (pop lefts) (inc i))

            (recur result (pop lefts) (inc i))))

        (if (empty? lefts)
          {:error {:type :no-closing-char, :index left}})))))

Copy link

mchampine commented Jun 22, 2021

(defn clusters [p]
  (->> (map {\( 1 \) -1} p)
       (reductions +)
       (map vector p)
       (partition-by #(= [\) 0] %))
       (partition 2)
       (map #(apply concat %))
       (map #(apply str (map first %)))
       (into [])))

or, compactly:

(defn clusters [p]
  (let [depth (reductions + (map {\( 1 \) -1} p))
        pairs (partition-by #(= [\) 0] %) (map vector p depth))
        clust (map #(apply concat %) (partition 2 pairs))]
    (into [] (map #(apply str (map first %)) clust))))

Copy link

sztamas commented Jun 22, 2021

(defn clusters [s]
  (let [remove-spaces #(clojure.string/replace % " " "")]
    (->> (str "(" s ")")
         (mapv (comp remove-spaces str)))))

Copy link

vpetruchok commented Jun 22, 2021

(defn balanced-stack [s start end]
  (let [s (map vector s (range start end))]
    (loop [st           [] ;stack
           [fst & more] s ]
      (if-not fst
        (let [[ch i] fst]
          (case ch
            (recur (conj st i) more)

            (if (empty? st)
              (recur (conj st i) nil)
              (recur (pop st) more))

            (recur st more)))))))

(defn clusters [s]
  (loop [start 0
         end   1
         res   []
         st    []]
    (if (> end (count s))
      (if (empty? st)
        (str "Error: unbalanced input: pos=" (peek st)))
      (let [candidate (subs s start end)
            st        (balanced-stack candidate start end)]
        (if (empty? st)
          (recur end   (inc end) (conj res candidate) st)
          (recur start (inc end) res st))))))

Copy link

ehonsey commented Jun 23, 2021

(defn- balance-parens [char {:keys [stack coll]}]
  (let [result {:stack (if (= char \()
                         (conj stack char)
                         (into [] (rest stack)))
                :coll  (conj (into [] (drop-last coll))
                             (str (last coll) char))}]
    (if (empty? (:stack result))
      (update result :coll #(conj % ""))

(defn clusters [s]
  (->> (seq s)
       (reduce (fn [acc char] (balance-parens char acc))
               {:stack [] :coll []})
       (filter seq)))

Copy link

JonathanHarford commented Jun 26, 2021

(defn clusters [s']
  (loop [[x & xs] s'
         acc []
         acc-cluster []
         polarity 0]
      (nil? x) acc
      (= x \() (recur xs
                      (conj acc-cluster x)
                      (inc polarity))
      (= x \)) (if (= 1 polarity)
                 (recur xs
                        (conj acc (apply str (conj acc-cluster x)))
                 (recur xs
                        (conj acc-cluster x)
                        (dec polarity))))))

Copy link

diavoletto76 commented Jun 28, 2021

(defn count-par [x]
  (cond (= x \()  1
        (= x \)) -1))

(defn to-string [xs]
  (mapv (partial apply str) xs))

(defn clusters
  ([xs] (to-string (clusters xs [] 0 0)))
  ([[x & xs :as xall] acc index balance]
   (cond (< balance 0) (throw (ex-info "Unbalanced par" {:index index}))
         (and (empty? xall) (empty? acc)) acc
         (and (empty? xall) (> balance 0)) (throw (ex-info "Unbalanced par" {:index index}))
         (empty? xall) (vector acc)
         (and (> index 0) (zero? balance))
         (lazy-seq (cons acc (clusters xs [x] (inc index) (+ balance (count-par x)))))
         :else (clusters xs (conj acc x) (inc index) (+ balance (count-par x))))))

Copy link

KingCode commented Oct 19, 2021

EDIT: Added an arity to clusters to allow plugin-style arbitrary balancing characters (see bottom examples).

Phew... This one is an exercise in understanding that all/most of the work is already done in the input!

Strategy is to maintain a stack of the most recently unmatched opening chars:

  • when reading an opening character, append it to the current cluster, push it on the stack and continue
  • otherwise:
    -- if the new char matches the stack's, append it to the cluster being built, pop the stack and continue.
    -- otherwise output an explanation and location of the error and return nil.
  • when the stack is empty the new cluster is complete: append it to the result and continue

Since a valid (including empty or null) input string never results in a nil return, nil can be returned unambiguously
upon error without having to fill the screen with a stack trace:

(defn ->config [config-str]
  (let [c->o (->> config-str seq (partition 2) (mapcat reverse)
                  (apply hash-map))
        os (set (vals c->o))
        cs (set (keys c->o))]
    {:opener c->o, :opener? os, :closer? cs}))

(defn push [stack, c, idx] (conj stack [c idx]))
(defn peek-char [stack] (first (peek stack)))
(declare message-and-nil)

(defn clusters 
   (clusters s "()[]{}"))
  ([s config-str]
   (let [{:keys [opener opener? closer?]} (->config config-str)]
     (loop [unread (seq s), stack [], current [],
            clusters [], idx 0, error nil]
       (let [c (first unread)
             peeked (peek-char stack)]
           error ;; failed
           (message-and-nil error s)

           (and (empty? stack) (seq current)) ;; new cluster
           (recur unread, stack, [], 
                  (conj clusters current)
                  idx, nil)

           (and (not c) (seq stack)) ;; done, prematurely
           (recur nil nil nil nil nil {:error :incomplete 
                                       :idx idx :unmatched (peek stack)})

           (not c)  ;; done successfully
           (map (partial apply str) clusters)
           (opener? c) ;; adding opener to budding cluster
           (recur (rest unread)
                  (push stack c idx)
                  (conj current c), clusters, (inc idx), nil)

           (and (closer? c) (not= (opener c) peeked)) ;; can't complete cluster 
           (recur nil nil nil nil nil {:error :bad-match :opener (peek stack)
                                       :char c :idx idx})

           (closer? c)  ;; adding closer to budding cluster 
           (recur (rest unread), (pop stack)
                  (conj current c), clusters, (inc idx), nil)

           :else ;; illegal character error 
           (recur nil nil nil nil nil {:error :bad-char :char c :idx idx})))))))

(defn mark [at-idxs s]
  (let [s (.concat s " "), at-idxs (set at-idxs) 
        marks (->> (for [i (range (count s))
                         :let [c (if (at-idxs i) \^ \space)]]
                     c) (apply str))]
    (str s "\n" marks)))

(defn msg [& txt] (->> txt (interpose "\n") (apply str) println)) 

(defn message-and-nil [{:keys [error idx opener unmatched] c :char} s] 
  (let [[o o-idx] (cond opener opener unmatched unmatched :else [nil nil])]
    (case error
      (if (nil? opener)
        (msg (str "Too much input: character '" c "' (index " idx ")") 
             (mark [idx] s))
        (msg (str "Bad match: character '" c " (index " idx ")"
                  " doesn't match character '" o "' (index " o-idx ")")
             (mark [idx o-idx] s)))

      (msg (str "Incomplete: unmatched character '" o "' (index " o-idx ")")
           (mark [idx o-idx] s))

      (msg (str "Bad character '" c "' at index " idx)
           (mark [idx] s))))

(clusters "((())())()") ;=> ["((())()) ()"]
(clusters "([{}[()]]){()}") ;=> ("([{}[()]])" "{()}")
(clusters "([{}[()]ooops){()}") 
;;=> Bad character 'o' at index 8
;;=> ([{}[()]ooops){()} 
;;=>         ^
;;=> nil
(clusters "(()]")
;; => Bad match: character '] (index 3) doesn't match character '(' (index 0)
;; => (()] 
;; => ^  ^ 
;; nil
(clusters "(")
;; => Incomplete: unmatched character '(' (index 0)
;; => ( 
;; => ^^
;; => nil
(clusters "())")
;; => Too much input: character ')' (index2)
;; => ()) 
;; =>   ^ 
;; => nil
(clusters "abcdef" "afbecd") ;;=> ("abcdef")
(clusters "abcdef" "abcdef") ;;=> ("ab" "cd" "ef")
(clusters "abcd(ef" "afbecd")
;; => Bad character '(' at index 4
;; => abcd(ef 
;; =>     ^   
;; => nil

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment