Created
January 26, 2011 11:10
-
-
Save janus/796564 to your computer and use it in GitHub Desktop.
A function that splits sentence\paragraph if it contains [ \* \{ ]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;Build a function splitParagraph which, given a paragraph string, returns an array of paragraph fragments. ;;Think of a good way to represent the fragments. | |
;;Clojure version (naive) | |
(ns moon) | |
(def map-tag {\{ \} \* \* }) | |
;; the tags use will use for as breaking point.... | |
(def map-type { \* "Footnotes" \{ "References"}) | |
;; what you intend to use as your type | |
(def map-num { \{ 1 \* 1 }) | |
;; just to make use you started one char above | |
(def cstring "*CARL FRIDAY LEWIS* is in the house { we built for our God } we wait*the cool things is here* {let's do it}") | |
(defn get-num [char-key] | |
(get map-num char-key 0)) | |
;; return 0 or one of the value of map-num | |
(defn process-tag [n astring] | |
(lazy-seq | |
(if (= n 0) | |
'() | |
(let [start-char (. astring (charAt 0)) | |
end (if (contains? map-tag start-char) | |
(. astring (indexOf (int (get map-tag start-char )) 1)) | |
(let [[star brack ][(. astring (indexOf (int \{) 1)) (. astring (indexOf (int \*) 1))]] | |
;; This is needed to figure out the end of sentence when it started | |
;; without either \{ or \* | |
;; | |
(cond | |
;; This is ugly.... get a better algorithm. I am also searching for one. | |
(and (not (= -1 star))(not (= -1 brack))) (min star brack) | |
;; if star and brack are in the sentence take the minimum | |
(and (= -1 star) (= -1 brack )) (count astring) | |
;; if brack and star are not in the sentence , take the length of | |
;; the sentence | |
;;take which ever that is not negative | |
(and (not (= -1 star ))(= -1 brack)) star | |
(and (not (= -1 brack ))(= -1 star)) brack)))] | |
(cons {:type (get map-type start-char "Normal"):content (subs astring (get-num start-char) end)} | |
(process-tag (if ( = (count astring) end) | |
0 | |
(count (subs astring (+ (get-num start-char) end)))) | |
(if ( = (count astring) end) | |
"" | |
(subs astring (+ (get-num start-char) end))))))))) | |
;;Building up a list of map | |
(print (process-tag (count cstring) cstring)) | |
;; One could say that the above is not functional... I agree, but I have lazy and cons stuff there. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python Version ... here I cheated. However, if you spend time with the cojure version you will be able to code it well | |
datastore = [] | |
def processTag(text): | |
while len(text) > 0: | |
# simple infinite loop | |
# | |
if((text.startswith("*"))): | |
numTemp = text[1:len(text)].find("*") | |
starcontent = text[1:numTemp + 1] | |
datastore.append({"type": "Star", "content": starcontent}) | |
text = text[numTemp + 2:] | |
if ((text.startswith("{"))): | |
numTemp = text.find("}") | |
brackcontent = text[1:numTemp ] | |
datastore.append({"type": "Footnotes", "content": brackcontent}) | |
text = text[numTemp + 2:] | |
else: | |
num = text.find("{") | |
# This is where I cheated, figure it out and rectify it | |
if num == -1: | |
datastore.append({"type": "Normal", "content": text}) | |
text = "" | |
else: | |
norcontent = text[ : num] | |
datastore.append({"type": "Normal", "content": norcontent}) | |
text = text[num:] | |
removeTag("*CARL FRIDAY LEWIS* is in the house { we built for our God } we wait*the cool things is here* {let's do it}") | |
print(datastore) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment