(ns gramgen {:doc "GramGen generates sentences from a CFG. Comes with an inbuilt cfg - This is based on the program on chapter 2, and it can be written much simpler, but this program is written the way it is because I wanted to try out some new features of clojure. to generate a random tree use the function generate-tree, which generates a random parse tree from the grammar we have provided. If you want to use a more advanced grammar, then use the create grammar function to create a complex grammars. Grammars are represented as seqences where the first of the seqence is the LHS for the grammar and the rest are RHS choices for the grammar. If the rhs itself consists of a sequence, it is considered a non-terminal and atomic items such as strings are considered terminals. " } (:use [clojure.contrib.str-utils :only [str-join]])) (defprotocol ProductionRuleProtocol "A simple protoocol implementing the production rule protocol - which has a lhs and rhs" (lhs [this] "get the left-hand side of this production rule") (some-rhs [this] "randomly choose one right hand side from the right hand side available for this rule")) (defrecord ProductionRule [l r] ProductionRuleProtocol (lhs [this] l) (some-rhs [this] (rand-nth r))) (defprotocol GrammarProtocol "A Protocol that defines the grammar, which is a collection of associated production rules" (add-rule [this rule] "adds a rule to this grammar") (generate-tree [this lhs] "using the lhs, which is the starting point, generate a parse-tree") (generate-sent [this lhs] "same as generate tree, but just generates the sentence without any other nodes")) (defrecord SimpleEnglishGrammar [rules] GrammarProtocol ;; create a new version of the grammar by associating one more rule to this grammar. (add-rule [this rule] (SimpleEnglishGrammar. (assoc (:rules this) (lhs rule) rule))) (generate-tree [this prodsym] ;; start generating a tree by recursively expanding the rule if our lhs ;; is of type seqence, otherwise just return one item from it. (let [rule (get (:rules this) prodsym) rhs (some-rhs rule)] (if (seq? rhs) (map #(generate-tree this %) rhs) [(lhs rule) rhs])) ) (generate-sent [this prodsym] ;; start generating a tree by recursively expanding the rule if our lhs ;; is of type seqence, otherwise just return one item from it. (let [rule (get (:rules this) prodsym) rhs (some-rhs rule)] (if (seq? rhs) (mapcat #(generate-sent this %) rhs) [rhs])) )) (defn create-grammar "create-grammar creates a bunch of rules from a sequence of pairs, first a keyword or symbol naming the lhs and second a sequence of either seqs or non-terminals which serve as the lhs." [rulelist] (let [grammar (SimpleEnglishGrammar. {}) production-rule #(ProductionRule. (first %) (rest %))] (reduce #(add-rule %1 (production-rule %2)) grammar rulelist))) (def *simple-grammar* '((sent (np vp)) (np (dt nn)) (vp (vb np)) (dt "the" "a") (nn "man" "woman" "cat" "ball") (vb "likes" "calls" "plays" "throws"))) (defn rand-tree "generate a tree from lhs" [lhs] (let [grammar (create-grammar *simple-grammar*)] (generate-tree grammar lhs))) (defn rand-sent [] (let [grammar (create-grammar *simple-grammar*)] (str-join \space (generate-sent grammar 'sent))))