Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
435 PurelyFunctional.tv Newsletter

Longest Delimited Substring

A delimited string is a string that starts with a character, ends with the same character, and doesn't contain the character anywhere else (besides the beginning and end). Here are some examples:

  • "abbbbbbba" is delimited because it starts and ends with \a.
  • "ajjjjaffa" is not delimited because, though it starts and ends with \a, it also contains \a inside.
  • "bkfifoifu" is not delimited because it doesn't end with the same character it starts with.
  • "aa" is delimited.
  • "aufodiufa" is delimited.

Your task is to write a function that returns the longest delimited substring of a given string.

Examples:

(delimited "ffdsfuiofl") ;=> "fuiof"
(delimited "abbcdefg") ;=> "bb"
(delimited "opoiifdopf") ;=> "poiifdop"

In the case of ties, return the substring that appears first.

Thanks to this site for the problem idea, where it is rated Expert in JavaScript. The problem has been modified.

Please submit your solutions as comments on this gist.

To subscribe: https://purelyfunctional.tv/newsletter/

@steffan-westcott

This comment has been minimized.

Copy link

@steffan-westcott steffan-westcott commented Jul 19, 2021

(defn delimited [s]
  (some->> (range (count s))
           (keep #(->> (subs s %)
                       (re-find #"^(.).*?\1")
                       first))
           not-empty
           (reduce #(max-key count %2 %1))))
@jonasseglare

This comment has been minimized.

Copy link

@jonasseglare jonasseglare commented Jul 19, 2021

Linear time complexity:

(defn acc [[m i offs n] c]
  (into [(assoc m c i) (inc i)]
        (or (if-let [l (get m c)]
              (let [n+ (- (inc i) l)]
                (if (< n n+)
                  [l n+])))
            [offs n])))

(defn delimited [input]
  (let [[_ _ offs n] (reduce acc [{} 0 0 0] input)]
    (if (< 0 n)
      (subs input offs (+ offs n)))))
@diavoletto76

This comment has been minimized.

Copy link

@diavoletto76 diavoletto76 commented Jul 19, 2021

(defn delimited? [xs]
  (and (= (first xs) (last xs))
       (not (some (set (take 1 xs)) (butlast (rest xs))))))

(defn delimited-subs [xs]
  (for [start (range 0 (count xs))
        end (range 0 (inc (count xs)))
        :when (> end start)
        :let [substring (subs xs start end)]
        :when (delimited? substring)]
    substring))

(defn longer [xs]
  (loop [[x' & xs' :as xs-all] xs acc nil]
    (if (empty? xs-all)
      acc
      (recur xs' (if (> (count x') (count acc)) x' acc)))))

(defn delimited [xs]
  (->> (delimited-subs xs)
       (longer)))
@kawas44

This comment has been minimized.

Copy link

@kawas44 kawas44 commented Jul 19, 2021

(defn delimited-seq [s]
  (lazy-seq
    (when (fnext s)
      (let [singl (set (take 1 s))]
        (if-let [ds (and (seq (filter singl (next s)))
                         (take-while (complement singl) (next s)))]
          (cons (concat singl ds singl)
                (delimited-seq (rest s)))
          (delimited-seq (rest s)))))))

(defn delimited [s]
  (->> (delimited-seq s)
       (sort-by count >)
       (first)
       (apply str)))
@kthu

This comment has been minimized.

Copy link

@kthu kthu commented Jul 19, 2021

  (defn delimited-subs
    [s delimitor]
    (->> s
         (drop (:index delimitor))
         (take (inc (:length delimitor)))
         (apply str)))


  (defn longest-delimitor
    [delimitors]
    (apply max-key :length delimitors))


  (defn char-indexes->delimitors
    [is]
    (mapv (fn [[a b]]
            {:index  b
             :length (- a b)})
          (partition 2 1 is)))


  (defn char-indexes
    [s]
    (loop [index 0
           ch    (first s)
           st    (rest s)
           res   {}]
      (if (empty? st)
        (vals res)
        (recur (inc index)
               (first st)
               (rest st)
               (update res ch conj index)))))


  (defn delimited
    [s]
    (->> (char-indexes s)
         (map char-indexes->delimitors)
         (map longest-delimitor)          ; Longest of each char
         (longest-delimitor)              ; Longest overall
         (delimited-subs s)))


  (delimited "ffdsfuiofl") ;=> "fuiof"
  (delimited "abbcdefg")   ;=> "bb"
  (delimited "opoiifdopf") ;=> "poiifdop"
@luskwater

This comment has been minimized.

Copy link

@luskwater luskwater commented Jul 19, 2021

(defn vconj
  "Append `item` to vector `v` using `conj`; if `v` is null, return vector `[item]`"
  [v item]
  (if (nil? v)
    (vector item)
    (conj v item)))

(defn indices
  "Record characters in a string and their index values

  \"bbb\" => {\\b [0 1 2]}
  \"oqloc\" => {\\o [0 3]
                \\q [1]
                \\l [2]
                \\c [4]}
  "
  [s]
  (loop [[ch & chs] (seq s)
         result {}
         i 0]
    (if (nil? ch)
      result
      (recur chs (update result ch vconj i) (inc i)))
    )
  )


(defn max-seq
  "Given `[ch [i0 i1 ...]]`, find maximum `len` = `i(n) - i(n-1)`, return `[len, ch, i(n-1)]`"
  [[ch indexes]]
  (loop [max 0
         index-longest 0
         [a-pair & pairs] (partition 2 1 indexes)]
    (if (nil? a-pair)
      [max ch index-longest]
      (let [[i-n-1 i-n] a-pair
            len (- i-n i-n-1)]
        (if (> len max)
          (recur len i-n-1 pairs)
          (recur max index-longest pairs))))))


(defn delimited
  "Identify indices of characters appearing more than once.
  Remove uninteresting characters.
  Find the longest distance between two of the same character.
  Take the results and sort appropriately to get first occurrence of longest result.
  Extract that substring, or return `nil`"
  [s]
  (let [indices (indices s)
        multiples (filter (fn [[_ v]]
                            (> (count v) 1)) indices)
        letters (map max-seq multiples)]
    (when-not (empty? letters)
      (let [[len ch start] (->> letters
                                (sort-by #(- (last %)))
                                (sort-by #(- (first %)))
                                first)]
        (subs s start (+ start (inc len)))))))```
@kthu

This comment has been minimized.

Copy link

@kthu kthu commented Jul 20, 2021

Updated to properly deal with ties

(defn delimited-subs
  [s delimitor]
  (->> s
       (drop (:index delimitor))
       (take (inc (:length delimitor)))
       (apply str)))


(defn char-indexes->delimitors
  [is]
  (mapv (fn [[a b]]
          {:index  b
           :length (- a b)})
        (partition 2 1 is)))


(defn char-indexes
  [s]
  (loop [index 0
         ch    (first s)
         st    (rest s)
         res   {}]
    (if (nil? ch)
      (vals res)
      (recur (inc index)
             (first st)
             (rest st)
             (update res ch conj index)))))


(defn delimited
  [s]
  (->> (char-indexes s)                ; list of indexes for each appearing char
       (map char-indexes->delimitors)  ; a delimitor is a map containing the start index and length of delimited substrings
       flatten
       (sort-by (juxt :length :index)
                #(compare %2 %1))
       (partition-by :length)
       first                           ; Longest delimitors
       last                            ; Longest delimitor with lowest index
       (delimited-subs s)))            ; The substring
@jpmonettas

This comment has been minimized.

Copy link

@jpmonettas jpmonettas commented Jul 20, 2021

(defn delimited [s]
  (let [[a b] (-> (reduce (fn [{:keys [i last-seen] :as r} c]
                            (-> r
                                (update :i inc)
                                (update :last-seen assoc c i)
                                (update :longest (fn [[a b]]                                   
                                                   (let [last-c-idx (get last-seen c i)]
                                                     (if (> (- i last-c-idx) (- b a))
                                                       [last-c-idx i]
                                                       [a b]))))))
                          {:i 0
                           :longest [0 0]
                           :last-seen {}}
                          s)
                  :longest)]
    (subs s a (inc b))))
@safehammad

This comment has been minimized.

Copy link

@safehammad safehammad commented Jul 21, 2021

I'm sticking to the principle that, if it involves twiddling text, there has to be a regex out there to simplify things. And sometimes complicate things ;) This uses a non-greedy match for delimited text then searches for the longest, favouring the first match.

(defn delimited [s]
  (->> (mapcat #(re-seq (re-pattern (str % ".*?" %)) s) s)
       (reverse)
       (apply max-key count)))
@jpmonettas

This comment has been minimized.

Copy link

@jpmonettas jpmonettas commented Jul 21, 2021

@safehammad

Nice, you can save some time by mapping over the (set s) instead of the entire input, like :

(defn delimited [s]
  (->> (set s)
       (mapcat #(re-seq (re-pattern (str % ".*?" %)) s))
       (apply max-key count)))

but one problem I see with regex is that you will have to escape special characters or things like this wont work :

(delimited "a$bbbb$c") ;=> "bb"
@safehammad

This comment has been minimized.

Copy link

@safehammad safehammad commented Jul 21, 2021

Nice, you can save some time by mapping over the (set s) instead of the entire input

Thanks, @jpmonettas! And good point about the special characters.

So (reverse) needs to be part of the pipeline to ensure that the first substring is returned in the case of a tie.

@miner

This comment has been minimized.

Copy link

@miner miner commented Jul 22, 2021

I have a transducer solution. The internal bookkeeping finds all the indices for each character. The xform converts to [distance -start] vectors for the pairwise indices. These vectors compare appropriately for finding the longest. Converting back to a substring requires a little math.

(defn delimited [s]
  (transduce (comp (filter next)
                   (mapcat (fn [rs] (map (fn [end st] (vector (- end st) (- st))) rs (rest rs)))))
             (fn ([di ej] (if (neg? (compare di ej)) ej di))
                 ([[d i]] (when (pos? d) (subs s (- i) (- (inc d) i)))))
             [-1]
             (vals (reduce-kv (fn [m i ch] (update m ch conj i)) {} (vec s)))))
@alex-gerdom

This comment has been minimized.

Copy link

@alex-gerdom alex-gerdom commented Jul 22, 2021

(defn delimited? [s]
  (let [middle (-> s rest butlast)
        delim  (first s)]
    (and (= (first s) (last s))
         (empty? (filter #{delim} middle)))))

(defn delimited [s]
  (let [s-len (count s)]
    (loop [position 0
           to-take  (count s)]
      (cond
        ;; Case: Empty string
        (= to-take 0) ""
        ;; Case: We've passed the end of the string
        (< s-len (+ position to-take)) (recur 0 (dec to-take))
        ;; Case: Successfully Found
        (delimited? (->> s (drop position) (take to-take)))
        (->> s (drop position) (take to-take) (apply str))
        ;; Default Case: Advance search by 1 character
        :else (recur (inc position) to-take)))))
@celebrimbor379

This comment has been minimized.

Copy link

@celebrimbor379 celebrimbor379 commented Jul 23, 2021

I'm relatively new to Clojure and am a bit surprised no other solutions have used map-indexed -- is this considered an anti-pattern?

I'm also a bit surprised the Clojure standard library doesn't provide a version of group-by that takes a function to extract the value. The key can be the result of a function, but the value is added as-is. Otherwise pretty much the whole thing would be possible using just higher-order functions.

Great challenge, this was fun to noodle on and I learned a lot looking at everybody else's solutions!

(defn reverse-diff [[start end]]
  (- end start))

(def get-max-tuple (partial apply max-key reverse-diff))

(def indices-to-tuples (partial partition 2 1))

(defn group-by-chars [acc [idx chr]]
  (update acc chr (fnil conj []) idx))

(defn find-start-and-end [input-string]
  (->> input-string                                 ;; "wmmmcpwoqm"
       (map-indexed vector)                         ;; ([0 w] [1 m] [2 m] [3 m] [4 c] [5 p] [6 w] [7 o] [8 q] [9 m])
       (reduce group-by-chars {})                   ;; {w [0 6], m [1 2 3 9], c [4], p [5], o [7], q [8]}
       (map second)                                 ;; ([0 6] [1 2 3 9] [4] [5] [7] [8])
       (filter #(> (count %) 1))                    ;; ([0 6] [1 2 3 9])
       (map (comp get-max-tuple indices-to-tuples)) ;; ((0 6) (3 9))
       (sort-by first >)                            ;; ((3 9) (0 6))
       get-max-tuple))                              ;; (0 6)

(defn delimited [input-string]
  (let [[start end] (find-start-and-end input-string)]
    (subs input-string start (inc end)))) 
@mcuervoe

This comment has been minimized.

Copy link

@mcuervoe mcuervoe commented Jul 23, 2021

(defn delimited [s]
  (->> (range (count s))
       (reduce (fn [longest i]
                 (let [substr (subs s i)
                       found-groups (re-find #"^(.)(.*?)\1" substr)
                       found ((fnil first "") found-groups)]
                   (if (> (count found) (count longest))
                     found
                     longest)))
               "")))
@vindkaldr

This comment has been minimized.

Copy link

@vindkaldr vindkaldr commented Jul 24, 2021

(defn extract-indexes
  [string character]
  (->> (map-indexed #(when (= %2 character) %1) string)
       (filter number?)))

(defn generate-ranges
  [indexes]
  (partition 2 1 indexes))

(defn extract-substring
  [string range]
  (subs string (first range) (inc (second range))))

(defn delimited
  ([string]
   (->> (distinct string)
        (mapcat #(delimited string %))
        (sort-by count)
        (partition-by count)
        (last)
        (first)))
  ([string character]
   (->> (extract-indexes string character)
        (generate-ranges)
        (map #(extract-substring string %)))))


(deftest longest-delimited-substring
  (testing "blank parameters"
    (is (= (delimited nil) nil))
    (is (= (delimited "") nil)))
  (testing "without delimited substring"
    (is (= (delimited "abc") nil)))
  (testing "with delimited substring"
    (is (= (delimited "aba") "aba"))
    (is (= (delimited "xabay") "aba")))
  (testing "with delimited substrings of same lengths"
    (is (= (delimited "abaca") "aba"))
    (is (= (delimited "xabacay") "aba")))
  (testing "with delimited substrings of different lengths"
    (is (= (delimited "abacca") "acca"))
    (is (= (delimited "xabaccay") "acca")))
  (testing "examples"
    (is (= (delimited "ffdsfuiofl") "fuiof"))
    (is (= (delimited "abbcdefg") "bb"))
    (is (= (delimited "opoiifdopf") "poiifdop"))))
@miner

This comment has been minimized.

Copy link

@miner miner commented Jul 24, 2021

It's slightly faster if we take advantage of short-circuiting when the current longest is longer than the possible width of the remaining substrings. Also, the shortest possible string would be 2 chars so we can skip a couple of candidates. This version is based on some of the other contributions, particularly the regex approach. [Updated to simplify logic.]

(defn delimited [s]
  (let [cnt (count s)]
    (reduce (fn [longest start]
              (let [len (count longest)]
                (if (>= len (- cnt start))
                  (reduced longest)
                  (let [found (nth (re-find #"^(.).*?\1" (subs s start)) 0)]
                    (if (>= len (count found)) longest found)))))
            nil
            (range (- cnt 2)))))
@mainej

This comment has been minimized.

Copy link

@mainej mainej commented Jul 25, 2021

I came up with a few solutions. This one is pretty minimalist.

(def fconj (fnil conj []))

(defn subs-from-bounds [s [start end]]
  (subs s start (inc end)))

(defn longest-delimited
  "Returns the longest delimited substring of `s`, if one exists."
  [s]
  (some->> #_1 s
           #_2 (map-indexed vector)
           #_3 (reduce (fn [result [i c]]
                         (update result c fconj i))
                       {})
           #_4 (mapcat (fn [[_c positions]]
                         (partition 2 1 positions)))
           #_5 seq
           #_6 (sort-by (fn [[start _end]] (- start)))
           #_7 (apply max-key (fn [[start end]] (- end start)))
           #_8 (subs-from-bounds s)))
  1. The original string
    "xababbbab"
  2. Characters and their positions
    [[\x 0] [\a 1] [\b 2] [\a 3] ,,,]
  3. Map of characters to all their positions
    {\x [0], \a [1 3 7], \b [2 4 5 6 8]}
  4. The pairwise positions for each character, concatenated. These are the
    bounds of (all) the delimited substrings. Conveniently, this discards
    characters that have only one occurrence.
    [[1 3] [3 7]
    [2 4] [4 5] [5 6] [6 8]]
  5. Short-circuit when no delimited substring exists
  6. Process earlier delimited substrings last, so that (7) resolves ties with
    the first.
    [[6 8] [5 6] [4 5] [3 7] [2 4] [1 3]]
  7. Bounds of the longest delimited substring
    [3 7]
  8. The longest delimited substring
    "abbba"

Edit: Fix to return first delimited substring in case of ties.

@igajsin

This comment has been minimized.

Copy link

@igajsin igajsin commented Jul 25, 2021

The idea is quite simple:

  1. Populate a map {:letter [substring1 substring2 … substringN]}
  2. Filter only delimited substrings
  3. Sort and take the longest.
(defn add-new-letter [db l k]
  (conj db {k [l]}))

(defn add-old-letter [db l k]
  (let [old-value (k db)]
    (conj db {k (conj old-value l)})))

(defn add-l [db k l]
  (let [v (k db)
        hd (first v)
        tl (rest v)
        new-v (conj tl (str hd l))]
    (assoc-in db [k] new-v)))

(defn add-value [db l]
  (reduce #(add-l %1 %2 l) db (keys db)))

(defn add-key [db l]
  (let [k (keyword l)]
    (if (contains? db k) (add-old-letter db l k)
        (add-new-letter db l k))))

(defn prepare [s]
  (let [db {}
        l (clojure.string/split s #"")]
    (reduce #(add-key (add-value %1 %2) %2) db l)))

(defn delimited? [s]
  (cond
   (zero? (count s)) true ;; an empty string is delimited
   (= 1 (count s)) false ;; a single letter is not delimited
   (and
    (= (first s) (last s)) ;; same start and end
    (not                   ;; no delimiter in the middle of a string
     (clojure.string/includes? (subs s 1 (dec (count s)))
                               (str (first s))))) true
   :else false))

(defn delimited [s]
  (let [db (prepare s)
        delimited-strings (filter delimited? (mapcat #(% db) (keys db)))
        sorted-delimited-strings (sort #(> (count %1) (count %2)) delimited-strings)]
    (first sorted-delimited-strings)))

(defn test []
  (and
   (= (delimited "ffdsfuiofl") "fuiof")
   (= (delimited "abbcdefg")  "bb")
   (= (delimited "opoiifdopf") "poiifdop")))

@KingCode

This comment has been minimized.

Copy link

@KingCode KingCode commented Oct 6, 2021

(defn windows 
  "Constructs a set of all contiguous subsequences of xs."
[xs]
  (let [siz (count xs) 
        step (fn [subs n]
               (if (< siz n)
                 subs
                 (recur (conj subs (take n xs))
                        (inc n))))]
    (if (empty? xs)
      #{[]}
      (-> (windows (drop 1 xs))
          (into (step #{} 0))))))

(defn delimited [s]
  (->> s windows
       (sequence 
        (comp (filter #(< 1 (count %)))
              (filter #(= (first %) (last %)))
              (filter (fn [window]
                        (let [not-delim? (complement #{(first window)})]
                          (->> window rest butlast
                               (every? not-delim?)))))))
       (sort-by (comp - count))
       ;; below code preserves order for competing best delimiters
       (partition-by count)
       first
       (map (partial apply str))
       (reduce (fn 
                 ([] nil) ;; no delimited string found
                 ([early-bird kand]
                  (if (< (.indexOf s kand)
                         (.indexOf s early-bird))
                    kand
                    early-bird))))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment