Skip to content

Instantly share code, notes, and snippets.

@josephwilk
Last active April 3, 2019 22:19
Show Gist options
  • Save josephwilk/17015a54b9146dec16b047ced2d6afd4 to your computer and use it in GitHub Desktop.
Save josephwilk/17015a54b9146dec16b047ced2d6afd4 to your computer and use it in GitHub Desktop.
(set-env!
:dependencies '[[boot/core "2.8.2" :scope "provided"]
[boot/pod "2.8.2" :scope "provided"]
[boot/base "2.8.2" :scope "provided"]
[http-kit "2.2.0"]
[enlive "1.1.6"]
[cheshire "5.7.1"]
[clojure-csv/clojure-csv "2.0.1"]
[clj-time "0.13.0"]
[org.clojure/core.match "0.3.0-alpha4"]
]
:resource-paths #{"resources" "src"})
(task-options!
pom {:project 'companies2gdp
:version "0.0.1-SNAPSHOT"})
(ns boot.user
(:require
[org.httpkit.client :as http-kit]
[net.cgrand.enlive-html :as html]
[cheshire.core :as json]
[clojure.edn :as edn]))
(defn parse-int [s]
(try
(Double/parseDouble (re-find #"\A-?\d+" s))
(catch Exception e)))
(defn gdp []
"Extract GDP figures for countries"
(let [data (slurp "prices.csv")
lines (->> (clojure.string/split data #"\n")
(map (fn [line] (clojure.string/split line #"\t+")))
(map (fn [data] {:country (clojure.string/trim (nth data 1))
:gdp
(-> (clojure.string/replace (nth data 2) "," "")
(parse-int)
(* 1000000))})))]
(spit "gdp.edn" (prn-str lines))))
(defn companies []
"Extract compaines and profit"
(let [companies (map
(fn [l] (clojure.string/split l #"\t"))
(-> (slurp "companies.tsv")
(clojure.string/split #"\n")))
data (map (fn [company] {:name (nth company 1)
:price (->
(nth company 2)
(clojure.string/replace #"\$" "")
(clojure.string/replace #"," "")
(parse-int)
(* 1000000)
)}) companies)]
(spit "companies.edn" (prn-str data))))
(defn flags []
"Flag lookup"
(let [flags (map
#(clojure.string/split %1 #"\t")
(->
(slurp "flags.tsv")
(clojure.string/split #"\n")))]
(reduce (fn [acc flag] (assoc acc (last flag) (nth flag 1))) {} flags)))
(defonce flag-lookup (flags))
(defn format [data]
(->> (map (fn [company]
(str (:name company) "\n" (clojure.string/join "" (map (fn [a] (get flag-lookup (:country a))) (:gdp company) )))) data)
(clojure.string/join "\n")))
(deftask join []
"Combine GDP, companies and flag data"
(let [prices (edn/read-string (slurp "gdp.edn"))
companies (edn/read-string (slurp "companies.edn"))
data (->>
companies
(map (fn [company]
(->>
(map
(fn [price]
(let [gdp (:gdp price)
company-price (:price company)]
(when (and gdp company-price
(> company-price gdp))
(do
price))))
prices)
(filter seq)
flatten
(sort-by (fn [v] (:gdp v)))
reverse
(assoc company :gdp))))
(filter #(seq (:gdp %1))))]
(spit "out.txt" (format data))))
(comment
(join)
(companies)
(gdp)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment