Skip to content

Instantly share code, notes, and snippets.

@jdevoo
Created October 14, 2015 06:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdevoo/71007a5e4ecf35e5d3f7 to your computer and use it in GitHub Desktop.
Save jdevoo/71007a5e4ecf35e5d3f7 to your computer and use it in GitHub Desktop.
(ns cascalab.webgraph
(:use [cascalog api])
(:require [cascalog.cascading.tap :as tap]
[cascalog.logic.ops :as c])
(:gen-class))
(defn- parse-str [^String s]
(seq (clojure.string/split s #"\s+")))
(defmapcatfn mk-node [from to]
[[from] [to]])
(defn nodes [edges]
(<- [?node]
(edges ?from ?to)
(mk-node ?from ?to :> ?node)
(:distinct true)))
(defn node-cnt [nodes]
(first (first (??<- [?cnt]
(nodes ?node)
(c/distinct-count ?node :> ?cnt)))))
(defn out-degree [edges]
(let [vertices (nodes edges)]
(<- [?node ?out_d]
(vertices ?node)
(edges ?node !!to)
(c/!count !!to :> ?out_d))))
(defn edges-tap [path]
(let [src (tap/hfs-textline path)]
(<- [?from ?to]
(src ?line)
((filterfn [^String line] (not (.startsWith line "#"))) ?line)
(parse-str ?line :> ?from ?to))))
(defn pr-init [nodes n]
(<- [?node ?pr]
(nodes ?node)
(div 1 n :> ?pr)))
(defmapfn out-factor [r d teleport]
(if (not (zero? d)) (* (- 1 teleport) (div r d)) 0))
(defmapfn safe-add [a b]
(if a (+ a b) b))
(defn calc-pr [edges vertices n pageranks teleport]
(let [degrees (out-degree edges)
out-factors (<- [?node ?out]
(pageranks ?node ?pr)
(degrees ?node ?d)
(out-factor ?pr ?d teleport :> ?out))
outlink-terms (<- [?node ?pr-outlink]
(edges ?from ?node)
(out-factors ?from ?out)
(c/sum ?out :> ?pr-outlink))
leakage-term (/ (- 1 (first (first (??<- [?s]
(outlink-terms _ ?out)
(c/sum ?out :> ?s))))) n)]
(<- [?node ?pr_new]
(vertices ?node)
(outlink-terms ?node !!outlink-term)
(safe-add !!outlink-term leakage-term :> ?pr_new))))
(defn pr-tap [path]
(let [src (tap/hfs-textline path)]
(<- [?node ?pr]
(src ?line)
(parse-str ?line :> ?node ?pr-str)
((mapfn [d] (Double/parseDouble d)) ?pr-str :> ?pr))))
(defmapfn abs-delta [^Double a ^Double b]
(Math/abs ^Double (- a b)))
(defn delta-pr-taps [path iter]
(let [pr-old (pr-tap (str path (dec iter)))
pr-new (pr-tap (str path iter))]
(first (first (??<- [?delta]
(pr-old ?node ?old)
(pr-new ?node ?new)
(abs-delta ?old ?new :> ?tmp)
(c/sum ?tmp :> ?delta))))))
(defn pagerank [in-path out-path epsilon teleport]
(let [e (edges-tap in-path)
v (nodes e)
n (node-cnt v)
pr-path (str out-path "/pagerank/iter-")
pr0 (pr-init v n)]
(?- (hfs-textline (str pr-path 0)) pr0)
(loop [pr pr0
iter 1]
(let [pr-new (calc-pr e v n pr teleport)]
(?- (hfs-textline (str pr-path iter)) pr-new)
(if (> (delta-pr-taps pr-path iter) epsilon)
(recur
(pr-tap (str pr-path iter))
(inc iter)))))))
(defn -main [in-path out-path epsilon teleport]
(pagerank in-path out-path epsilon teleport))
;;(def e (edges-tap "resources/mmds.txt"))
;;(def v (nodes e))
;;(time (def n (node-cnt v)))
;;(def pr0 (pr-init v n))
;;(?- (stdout) pr0)
;;(?- (stdout) (out-degree e))
;;(?- (stdout) (calc-pr e v n pr0 0.8))
;;(pagerank "resources/mmds.txt" "resources" 1e-5 0.2)
;;(pagerank "resources/web-Google.txt" "resources" 1e-5 0.2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment