Skip to content

Instantly share code, notes, and snippets.

@luisgabriel
Created April 1, 2013 20:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luisgabriel/5287722 to your computer and use it in GitHub Desktop.
Save luisgabriel/5287722 to your computer and use it in GitHub Desktop.
(ns tsearch.core
(:require [clojure.java.io :as cjio])
(:require [tsearch.scanner :as scanner])
(:require [tsearch.lexer :as lexer])
(:gen-class))
(defn occurrences-of [file]
(with-open [rdr (cjio/reader file)]
(loop [lines (line-seq rdr) hmap (hash-map)]
(if (empty? lines)
hmap
(recur (rest lines) (lexer/process-content (first lines) hmap))))))
(defn all-occurrences [files]
(loop [fs files acc (list)]
(if (empty? fs)
acc
(let [file (first fs)
occurrences (occurrences-of file)]
(recur (rest fs) (conj acc [(.getCanonicalPath file) occurrences]))))))
(defn -main [& args]
;; work around dangerous default behaviour in Clojure
(alter-var-root #'*read-eval* (constantly false))
(def files (scanner/all-files (first args)))
(def occurrences (all-occurrences files))
(println (str "Path: " (first args)))
(doseq [oc occurrences]
(print (str "-> " (first oc) ": "))
(println (count (nth oc 1))))
)
(ns tsearch.lexer
(:require [clojure.string :as cjstr]))
(def a (Character/getNumericValue \a))
(def z (Character/getNumericValue \z))
(def A (Character/getNumericValue \A))
(def Z (Character/getNumericValue \Z))
(def zero (Character/getNumericValue \0))
(def nine (Character/getNumericValue \9))
(defn is-ascii-alpha-num [c]
(let [n (Character/getNumericValue c)]
(or (and (>= n a) (<= n z))
(and (>= n A) (<= n Z))
(and (>= n zero) (<= n nine)))))
(defn is-valid [c]
(or (is-ascii-alpha-num c)
(Character/isSpaceChar c)
(.equals (str \newline) (str c))))
(defn lower-and-replace [c]
(if (.equals (str \newline) (str c)) \space (Character/toLowerCase c)))
(defn tokenize [content]
(let [filtered (filter is-valid content)
lowered (map lower-and-replace filtered)]
(cjstr/split (apply str lowered) #"\s+")))
(defn process-content [content initial-map]
(let [words (tokenize content)]
(loop [ws words i 0 hmap initial-map]
(if (empty? ws)
hmap
(recur (rest ws) (+ i 1) (merge-with concat hmap (hash-map (first ws) (list i))))))))
(ns tsearch.scanner
(:require [clojure.java.io :as cjio]))
(defn all-files [path]
(let [entries (file-seq (cjio/file path))]
(filter (memfn isFile) entries)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment