(set-env! | |
:dependencies '[[boot/core "2.8.2" :scope "provided"] | |
[boot/pod "2.8.2" :scope "provided"] | |
[boot/base "2.8.2" :scope "provided"] | |
[http-kit "2.2.0"] | |
[enlive "1.1.6"] | |
[cheshire "5.7.1"] | |
[clojure-csv/clojure-csv "2.0.1"] | |
[clj-time "0.13.0"] | |
[org.clojure/core.match "0.3.0-alpha4"] | |
] | |
:resource-paths #{"resources" "src"}) | |
(task-options! | |
pom {:project 'companies2gdp | |
:version "0.0.1-SNAPSHOT"}) | |
(ns boot.user | |
(:require | |
[org.httpkit.client :as http-kit] | |
[net.cgrand.enlive-html :as html] | |
[cheshire.core :as json] | |
[clojure.edn :as edn])) | |
(defn parse-int [s] | |
(try | |
(Double/parseDouble (re-find #"\A-?\d+" s)) | |
(catch Exception e))) | |
(defn gdp [] | |
"Extract GDP figures for countries" | |
(let [data (slurp "prices.csv") | |
lines (->> (clojure.string/split data #"\n") | |
(map (fn [line] (clojure.string/split line #"\t+"))) | |
(map (fn [data] {:country (clojure.string/trim (nth data 1)) | |
:gdp | |
(-> (clojure.string/replace (nth data 2) "," "") | |
(parse-int) | |
(* 1000000))})))] | |
(spit "gdp.edn" (prn-str lines)))) | |
(defn companies [] | |
"Extract compaines and profit" | |
(let [companies (map | |
(fn [l] (clojure.string/split l #"\t")) | |
(-> (slurp "companies.tsv") | |
(clojure.string/split #"\n"))) | |
data (map (fn [company] {:name (nth company 1) | |
:price (-> | |
(nth company 2) | |
(clojure.string/replace #"\$" "") | |
(clojure.string/replace #"," "") | |
(parse-int) | |
(* 1000000) | |
)}) companies)] | |
(spit "companies.edn" (prn-str data)))) | |
(defn flags [] | |
"Flag lookup" | |
(let [flags (map | |
#(clojure.string/split %1 #"\t") | |
(-> | |
(slurp "flags.tsv") | |
(clojure.string/split #"\n")))] | |
(reduce (fn [acc flag] (assoc acc (last flag) (nth flag 1))) {} flags))) | |
(defonce flag-lookup (flags)) | |
(defn format [data] | |
(->> (map (fn [company] | |
(str (:name company) "\n" (clojure.string/join "" (map (fn [a] (get flag-lookup (:country a))) (:gdp company) )))) data) | |
(clojure.string/join "\n"))) | |
(deftask join [] | |
"Combine GDP, companies and flag data" | |
(let [prices (edn/read-string (slurp "gdp.edn")) | |
companies (edn/read-string (slurp "companies.edn")) | |
data (->> | |
companies | |
(map (fn [company] | |
(->> | |
(map | |
(fn [price] | |
(let [gdp (:gdp price) | |
company-price (:price company)] | |
(when (and gdp company-price | |
(> company-price gdp)) | |
(do | |
price)))) | |
prices) | |
(filter seq) | |
flatten | |
(sort-by (fn [v] (:gdp v))) | |
reverse | |
(assoc company :gdp)))) | |
(filter #(seq (:gdp %1))))] | |
(spit "out.txt" (format data)))) | |
(comment | |
(join) | |
(companies) | |
(gdp) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment