Skip to content

Instantly share code, notes, and snippets.

@minikomi
Last active June 13, 2018 08:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save minikomi/791ff90cdd21e146ed305c61c1d13a8d to your computer and use it in GitHub Desktop.
Save minikomi/791ff90cdd21e146ed305c61c1d13a8d to your computer and use it in GitHub Desktop.
(ns getlife.core
(:gen-class)
(:require [crouton.html :as html]
[com.rpl.specter :as s]
[clojure.string :as str]
[clojure.data.csv :as csv]
[clojure.java.io :as io]
))
(defn parse-page [n]
(html/parse (io/resource (str "html/" n ".html"))))
(def h (parse-page 1))
(defn <> [attr-name pred]
(s/recursive-path
[] p
(s/if-path map?
(s/if-path
(if (= attr-name :tag)
[:tag #(= % pred)]
[:attrs attr-name #(when % (pred %))])
(s/continue-then-stay [:content s/ALL p])
[:content s/ALL p]))))
(def sub-select
(s/multi-path
[(<> :class #(= % "fl-member-card__info")) :content s/FIRST]
[(<> :class #(= % "fl-avatar__link")) :attrs :href]
[(<> :class #(= % "fl-avatar__link")) :content s/FIRST :attrs :title]
))
(defn cleanup [[age-sex-pref link name]]
(let [[agesex pref] (str/split (str/trim age-sex-pref) #"\ +")
age (apply str (take-while #(java.lang.Character/isDigit %) agesex))
sex (apply str (drop-while #(java.lang.Character/isDigit %) agesex))]
[age sex pref (str "https://fetlife.com" link) name]))
(defn get-data []
(reduce into []
(for [n (range 1 186)]
(->> (parse-page n)
(s/select
[(<> :class #(str/includes? % "fl-member-card fl-flag"))])
(map
#(s/select sub-select %))
(map cleanup)))))
(defn gen-csv []
(with-open [writer (io/writer "out.csv")]
(csv/write-csv writer (get-data) :separator \tab)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment