Skip to content

Instantly share code, notes, and snippets.

@glorphindale
Created February 26, 2014 17:16
Show Gist options
  • Save glorphindale/9234032 to your computer and use it in GitHub Desktop.
Save glorphindale/9234032 to your computer and use it in GitHub Desktop.
Пример использования enlive для выдирания информации из страниц
(ns enlive-examples.core
(:require [clojure.string :as string]
[net.cgrand.enlive-html :as html]
[cheshire.core :as chesh]))
(def ^:dynamic *base-url* "http://2013.codefest.ru/members/")
(defn fetch-url [url]
(html/html-resource (java.net.URL. url)))
(def raw-data
(fetch-url *base-url*))
(def selected
(html/select raw-data [[:div (html/attr= :data-role "peoples-names")] :li.b-peoples__item]))
(defn with-letter? [item]
(= (-> item :content (#(nth % 1)) :attrs :class)
"b-peoples__letter"))
(defn item->name [item]
(if-let [href (first (html/select item [[:a (html/attr-starts :href "/speaker")]]))]
(-> href html/text)
(if (with-letter? item)
(-> item :content (nth 2) string/trim)
(-> item :content (nth 0) string/trim))))
(defn item->person [item]
(let [pname (item->name item)
parts (-> (html/select item [:span.b-peoples__company html/text])
first
(string/split #"," 2))
[pcomp ppos] (->> parts
(map string/trim)
(map string/lower-case))]
[pname pcomp ppos]))
(doall (map item->person (take 25 selected)))
(def grouped-data (map item->person selected))
(comment
(->> grouped-data
(map second)
frequencies
(sort-by second)
reverse
(take 15)))
(spit "codefest-2013-raw.json" (chesh/generate-string grouped-data) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment