Skip to content

Instantly share code, notes, and snippets.

@daemianmack
Created August 18, 2012 09:30
Show Gist options
  • Save daemianmack/3385630 to your computer and use it in GitHub Desktop.
Save daemianmack/3385630 to your computer and use it in GitHub Desktop.
org chart scrape-builder from FM
require 'rubygems'
require 'set'
require 'hpricot'
require 'mechanize'
USER_ACCOUNT = "dmack@federatedmedia.net"
PASSWORD = ARGV[0]
MAX_LINKS = 200
SLEEP_INTERVAL = 0.5
###########
# Helpers #
###########
module Enumerable
def uniq_by
h = {}
inject([]) {|a,x| h[yield(x)] ||= a << x}
end
end
class OmniscientHash < Hash
def produce key # default_proc for < 1.9
self[key] = Person.new key unless self.has_key? key
self[key]
end
end
# Thought it would be neat to visually weight number of descendant
# reports to get a sense of departmental distribution. End result
# not smooth curve as hoped -- report counts distributed too chunkily.
def make_heat_map max
$colors = %w(magenta hotpink maroon crimson red orange yellow greenyellow steelblue cornflowerblue)
quanta = (1..max-1).step(max / $colors.count.to_f).to_a.reverse # e.g. [0.0, 9.16, 18.33...]
quanta.map!{|x| x.floor}.reverse # e.g. [165, 155, 146...]
$heat_map = quanta.zip $colors
$heat_map.push([0, "grey"]) # With zero reports, you get a boring color.
end
def render
`dot -Tsvg graph.out > temp.svg`
# git clone https://github.com/vidarh/diagram-tools
`xsltproc diagram-tools/notugly.xsl temp.svg > graph.svg`
#`rm graph.out temp.svg`
end
#########################################################################
# Screen-scrape the two worksimple pages that have direct links to each #
# FM employee. Stash reporting graph data in Person::PEOPLE. #
#########################################################################
def harvest
visited = []
find_targets.uniq_by {|x| x.href}.each do |link|
p 'Visited max links!' and break if visited.length > MAX_LINKS
visited.push link
this_guy, his_manager = search_for_people link
# GOALLLLLLLLLL
Person::PEOPLE.produce(this_guy).reports_to(his_manager)
sleep SLEEP_INTERVAL
end
Person::PEOPLE.produce("Tom Davies").reports_to("Nate Perry-Thistle") # Tom's not in yet.
end
def find_targets
agent = Mechanize.new
page = agent.get 'https://federatedmedia.getworksimple.com/session/new'
page = page.form_with :action => '/session' do |f|
f.field_with(:name => "user_session[email]").value = USER_ACCOUNT
f.field_with(:name => "user_session[password]").value = PASSWORD
f.field_with(:name => "authenticity_token").value = f.authenticity_token
end.submit
page = page.links.select {|x| x.text == "Teams" }.first.click # Page 1 of listing.
targets = page.links.select {|x| x.href.match "/users/" }
page = page.links.select {|x| x.text.match "Next" }.first.click # Page 2 of listing.
targets.push(page.links.select {|x| x.href.match "/users/" }).flatten!
targets.reject! {|x| x.text.match "Profile" } # Snip my account edit pages.
end
def search_for_people link
h = Hpricot link.click.content
# \nThis Guy\n => This Guy
this_guy = h.search("div.name").innerHTML.split("\n").last.squeeze(" ")
# \n\nHis Manager\n => His Manager
his_manager = h.search('div.reviewer a.user').text.split("\n").last
# Workaround Amy Yeh's zombied entry ("/users/7894").
his_manager = "Charlie Speight" if his_manager == "Amy Yeh"
puts "#{this_guy}'s manager is #{his_manager}"
[this_guy, his_manager]
end
#######################################################################
# Model for people, with each person holding a list of their reports. #
#######################################################################
class Person
attr_accessor :name, :pk, :reports
PEOPLE = OmniscientHash.new
def initialize name
@name = name
@pk = name.downcase.gsub(/[ '-]/, "_") # Punctuated names break DOT. :(
@reports = []
PEOPLE[name] = self
end
def reports_to name
PEOPLE.produce(name).reports << self if name
end
# Recurse over person's reports, return total top-down count.
def self.count_sub_reports person
return 0 if person.reports.count == 0
count = 0
# Wants to be a list comprehension so bad.
person.reports.each do |r|
count += 1 + count_sub_reports(r)
end
count
end
end
##########################################################################
# Generate a graphviz chart showing the reporting structure described in #
# Person::PEOPLE. The three basic DOT concerns -- points, edges and #
# boxes -- fail to parse in our top-down rendering if intermixed, #
# requiring that we sort by type. #
##########################################################################
class OrgChart
def initialize root_person
@seen = Set.new
@points, @edges, @boxes = [], [], []
node root_person
walk_reports_of root_person
emit_DOT
end
def node person
return false if @seen.member? person.pk
@seen << person.pk
color = apply_heat_map person
shape = person.reports.empty? ? "box" : "oval"
@boxes << %Q{ p#{person.pk} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] }
@boxes << %Q{ p#{person.pk+"1"} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] }
@boxes << %Q{ p#{person.pk+"2"} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] }
@boxes << %Q{ p#{person.pk+"3"} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] }
@boxes << %Q{ p#{person.pk+"4"} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] }
end
def walk_reports_of person
person.reports.each do |r|
if !@seen.member? r
@points << " p#{person.pk} [ shape = point ]"
@edges << " p#{person.pk} -> p#{r.pk}"
end
walk_reports_of r if node r
end
end
def apply_heat_map person
count = Person.count_sub_reports person
num, hex = $heat_map.find { |num, hex| count >= num }
hex
end
def emit_DOT
open("graph.out", "w") do |f|
f.write "digraph orgchart {"
[@points, @edges, @boxes].map {|x| f.write(x.join("\n")) }
f.write "}"
end
end
end
#######
# Go. #
#######
def main
# harvest
# make_heat_map Person.count_sub_reports(Person::PEOPLE["John Battelle"])
# OrgChart.new Person::PEOPLE["John Battelle"]
p = YAML::load(open("marshalled.txt"))
make_heat_map Person.count_sub_reports(p["John Battelle"])
OrgChart.new p["John Battelle"]
render
end
# ruby graph.rb [PASSWORD]
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment