Created
November 30, 2021 15:13
-
-
Save uzilan/c93069eefaa3f8d360c50ad671f62375 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns zer.ubba.bel.clojure_bible | |
(:require [clojure.java.io :as io]) | |
(:require [clojure.string :as str]) | |
(:gen-class)) | |
(def data-file (io/resource "bible.txt")) | |
(def boringWords | |
(str/split "the,and,of,to,that,for,in,i,his,a,with,it,be,is,not,they,thou" #",")) | |
(defn readTheBible [] | |
(def text (slurp data-file)) | |
(def lines (str/split text, #"\n")) | |
(def words (mapcat #(str/split % #" ") lines)) | |
(def noempty (filter #(re-matches #"^\w+" %) words)) | |
(def lower (map #(str/lower-case %) noempty)) | |
(defn isboring [word] (some? (some #(= word %) boringWords))) | |
(def noboring (remove #(isboring %) lower)) | |
(frequencies noboring)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment