- Operate on data as Clojure data structures
- CSV, TSV, JSON
- Wires up various third-party APIs
c4row
andc4rows
names.csv.header
; | |
; An illustrative example of using d and c4 to process a tab | |
; delimited file. This workflow starts with a file that may | |
; be missing a lot of Factual IDs, and attempts to fill them | |
; in via Factual's live Crosswalk, Resolve, and full text search | |
; features. | |
; | |
; For every line where factual_id is empty, attempts to fill it in | |
; with the result of Crosswalking with foursquare data. If foursquare |
; Step 1: Start with a file of entity data. Should include these columns: | |
; * Restaurant Name | |
; * Address | |
; * State | |
; * Postal Code | |
; | |
; Create a new file, with a new "Factual ID" column populated by Resolve. | |
resolved-john.csv <- short-john.csv [c4] | |
(+col! "Factual ID") | |
(!=> "Factual ID" |
Factual factual = new Factual(MY_KEY, MY_SECRET); |
(ns aaron | |
(:require [clojure.java.io :as io]) | |
(defn do-lines [file f] | |
(with-open [rdr (io/reader file)] | |
(doseq [line (line-seq rdr)] | |
(f line)))) | |
(defn do-lines-with-linenos [file f] |
{"is_closed" false, | |
"is_claimed" true, | |
"review_count" 76, | |
"snippet_image_url" | |
"http://s3-media2.ak.yelpcdn.com/photo/pfElY3JEPHdZIXJAVw_9wQ/ms.jpg", | |
"image_url" | |
"http://s3-media3.ak.yelpcdn.com/bphoto/8XoszNLo1KeM4ePhVnnAxw/ms.jpg", | |
"name" "Pink Taco", | |
"rating_img_url_large" | |
"http://s3-media4.ak.yelpcdn.com/assets/2/www/img/ccf2b76faa2c/ico/stars/stars_large_4.png", |
;; currently: | |
(write | |
(map | |
(fn [row] | |
(let [yelp-id (row "yelp-id")] | |
(if-not (empty? yelp-id) | |
(let [biz (yelp/lookup-business yelp-id) | |
loc (biz "location")] | |
(-> row | |
(assoc "name_other" (biz "name")) |
# D is a data workflow tool | |
# Handling your dependency graph | |
# c4 is a set of D protocols for one specific use case... | |
* local input and output files, exactly one each | |
* |
# D is a data workflow tool | |
# Handling your dependency graph | |
# c4 is a set of D protocols for one specific use case... | |
* local input and output files, exactly one each | |
* |
; Get foursquare crosswalk ids | |
resolved-accuracy-inputs-fsq-crosswalked.json <- resolved-accuracy-inputs.json [c4] | |
(write | |
(map | |
(fn [row] | |
(let [factual-id (row "factual_id")] | |
(if-not (empty? factual-id) | |
(let [fsq-id (:namespace_id (first (select crosswalk | |
(where (= :factual_id factual-id) | |
(= :namespace "foursquare")))))] |