Skip to content

Instantly share code, notes, and snippets.

@rcampbell
Created February 25, 2010 15:01
Show Gist options
  • Save rcampbell/314600 to your computer and use it in GitHub Desktop.
Save rcampbell/314600 to your computer and use it in GitHub Desktop.
Rendering Calais tag aggregates
(ns calais.web
(:use compojure
[compojure.http response]
[clojure.contrib.def :only [defn-memo]]
[clojure.contrib.seq-utils :only [rand-elt]]
[clojure.contrib.str-utils :only [re-gsub]])
(:require [calais.rdf :as rdf])
(:import [java.io File]))
(declare percent)
(def title "Entity Recognition & Decoration Prototype")
(defn section-gen
([title]
(section-gen title #(identity [:li (:name %)
[:small {:style "color: blue;"}
(percent (:relevance %))]])))
([title li-fn]
(fn [list]
(when-not (empty? list)
(let [rating (if ((first list) :score) :score :relevance)]
(html [:h3 title]
[:ul (map li-fn (reverse (sort-by rating list)))]))))))
(def add-companies (section-gen "Companies"
#(identity [:li (str (:name %) " [" (:ticker %) "]")
[:small {:style "color: blue;"}
(percent (:score %))]])))
(def add-technologies (section-gen "Technologies"))
(def add-industry-terms (section-gen "Industry Terms"))
(def add-products (section-gen "Products"))
(def add-people (section-gen "People"))
(defn add-meta-section [file]
(let [m (rdf/ask-all file)]
(str (add-companies (m :companies))
(add-technologies (m :technologies))
(add-industry-terms (m :industry-terms))
(add-products (m :products))
(add-people (m :people)))))
(defn results-page [id]
(html
(doctype :html4)
[:html
[:head
[:title id]]
[:body
[:h1 id]
[:a {:href (str "/original/" id)} "Download DOC"]
(add-meta-section (File. (str "s:/idc-rdf/" id ".rdf")))
[:br]
[:a {:href "/"} "Back"]]]))
(defn get-random-ids [num]
(let [all-ids (map #(apply str (drop-last 4 (.getName %)))
(filter #(> (.length %) 3072) (.listFiles (File. "S:/idc-rdf"))))]
(loop [some-ids #{}]
(if (= (count some-ids) num) some-ids
(recur (conj some-ids (rand-elt all-ids)))))))
(defn-memo show-tag-section [category]
(letfn [(sort-and-filter [category]
(rdf/sorted-by-val (rdf/filter-with
((deref rdf/freq) category))))]
(html
[:h3 (capitalize (name category))]
[:ul (pmap #(identity [:li
[:a {:href (str "/tag/"
(re-gsub #"\." ""
(url-encode (first %))))}
(first %)]
[:small " (" (second %) ")"]])
(sort-and-filter category))])))
(defn-memo count-docs []
(count (.listFiles (File. "S:/idc-doc"))))
(defn percent [val]
(str " - " (int (* 100 (Float/valueOf val))) "%"))
(defn index-page []
(html
(doctype :html4)
[:html
[:head
[:title title]]
[:body
[:table {:width "100%"}
[:tr
[:td {:width "50%" :valign "top"}
[:h4 "Enter any valid document number (container id):"]
(form-to [:post "/"]
(text-field :container-id)
(submit-button "Submit"))]
[:td {:width "50%" :valign "top"}
[:h4 "Some documents to get you started:"]
[:ul (map #(identity [:li [:a {:href %} %]]) (get-random-ids 10))
[:li "..." (count-docs) " more..." [:a {:href "/"} "refresh"]]]]]]
[:hr]
[:table {:width "100%"}
[:tr
[:td {:valign "top" :width "25%"} (show-tag-section :companies)]
[:td {:valign "top" :width "25%"} (show-tag-section :technologies)]
[:td {:valign "top" :width "25%"} (show-tag-section :industry-terms)]
[:td {:valign "top" :width "25%"} (show-tag-section :products)]]]]]))
(defn doc-not-found []
(html
(doctype :html4)
[:html
[:head
[:title title]]
[:body
[:h4 (str "The document you entered has not yet been run "
"through Calais. Please try another document.")]
[:a {:href "/"} "Try Again"]]]))
(defn tag-page [tag]
(html
(doctype :html4)
[:html
[:head
[:title "Documents tagged as " tag]]
[:body
[:h1 "Documents tagged as " tag]
[:ul (map #(identity [:li [:a {:href (str "/" %)} %]])
((deref rdf/xref) tag))]
[:a {:href "/"} "Back"]]]))
(defroutes calais-app
(GET "/"
(index-page))
(GET "/tag/:tag"
(tag-page (params :tag)))
(POST "/"
(try
(results-page (.trim (params :container-id)))
(catch Exception _ (doc-not-found))))
(GET "/:id"
(try
(results-page (.trim (params :id)))
(catch Exception _ (doc-not-found))))
(GET "/original/:id"
(let [filename (str (.trim (params :id)) ".doc")]
[{:header {"Content-Type" "application/msword"
"Content-Disposition" (str "attachment; filename="
filename)
"Content-Type" "application/force-download"}}
(File. (str "S:/idc-doc/" filename))]))
(ANY "*"
(page-not-found)))
; (run-server {:port 8081} "/*" (servlet calais-app))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment