Skip to content

Instantly share code, notes, and snippets.

@melborne
Created September 27, 2012 06:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save melborne/3792505 to your computer and use it in GitHub Desktop.
Save melborne/3792505 to your computer and use it in GitHub Desktop.
Gviz sample: U.S. States
# encoding: UTF-8
require "nokogiri"
require "open-uri"
module Gviz
class Scraper
class << self
def build(path)
parse get(path)
end
def get(url)
Nokogiri::HTML(open url)
rescue OpenURI::HTTPError => e
STDERR.puts "HTTP Access Error:#{e}"
exit
end
def parse(html)
q = []
html.css("table.wikitable tr").each do |tr|
q << tr
end
q
end
end
end
end
if __FILE__ == $0
url = "http://en.wikipedia.org/wiki/List_of_U.S._states"
header, *body = Gviz::Scraper.build(url)
p header.css('th').map(&:text)
p body.map { |tr| tr.css('td').map(&:text) }
end
# http://en.wikipedia.org/wiki/List_of_U.S._states
HEADER = ["Name", "IPA", "USPS", "Flag", "Statehood", "Area (sq mi)", "Population (2011[update])", "Capital", "Most populous city", "Preceding entity", "GDP\n($millions)\n"]
BODY = [["Alabama", "/ˌæləˈbæmə/", "AL", "", "01819-12-14December 14, 1819", "&1000000000013576500000052,419 sq mi (135,765 km2)", "4,802,740", "Montgomery", "Birmingham", "Alabama Territory", "174,400"], ["Alaska", "/əˈlæskə/", "AK", "", "01959-01-03January 3, 1959", "&10000000001717854000000663,267 sq mi (1,717,854 km2)", "722,718", "Juneau", "Anchorage", "Alaska Territory", "45,600"], ["Arizona", "/ˌærɪˈzoʊnə/", "AZ", "", "01912-02-14February 14, 1912", "&10000000000295254000000113,998 sq mi (295,254 km2)", "6,482,505", "Phoenix", "Phoenix", "Arizona Territory", "261,300"], ["Arkansas", "/ˈɑrkənsɔː/", "AR", "", "01836-06-15June 15, 1836", "&1000000000013700200000052,897 sq mi (137,002 km2)", "2,937,979", "Little Rock", "Little Rock", "Arkansas Territory", "105,800"], ["California", "/ˌkælɪˈfɔrnjə/", "CA", "", "01850-09-09September 9, 1850", "&10000000000423970000000163,700 sq mi (423,970 km2)", "37,691,912", "Sacramento", "Los Angeles", "Directly admitted from Mexican Cession", "1,936,400"], ["Colorado", "/ˌkɒləˈrædoʊ/", "CO", "", "01876-08-01August 1, 1876", "&10000000000269837000000104,185 sq mi (269,837 km2)", "5,116,796", "Denver", "Denver", "Colorado Territory", "259,700"],
--- 中略 ---
["Washington", "/ˈwɒʃɪŋtən/", "WA", "", "01889-11-11November 11, 1889", "&1000000000018482700000071,362 sq mi (184,827 km2)", "6,830,038", "Olympia", "Seattle", "Washington Territory", "351,100"], ["West Virginia", "/ˌwɛst vərˈdʒɪnjə/", "WV", "", "01863-06-20June 20, 1863", "&1000000000006275500000024,230 sq mi (62,755 km2)", "1,855,364", "Charleston", "Charleston", "Divided off from Virginia without the consent of that state", "66,600"], ["Wisconsin", "/wɪsˈkɒnsɪn/", "WI", "", "01848-05-29May 29, 1848", "&1000000000016963900000065,498 sq mi (169,639 km2)", "5,711,767", "Madison", "Milwaukee", "Wisconsin Territory, formed from the Northwest Territory", "251,400"], ["Wyoming", "/waɪˈoʊmɪŋ/", "WY", "", "01890-07-10July 10, 1890", "&1000000000025334800000097,818 sq mi (253,348 km2)", "568,158", "Cheyenne", "Cheyenne", "Wyoming Territory", "38,200"]]
# encoding: UTF-8
require "csv"
require_relative "usdata"
_, *links = CSV.read('uslinks.csv')
links = links.map do |usps, name, *link|
[usps.intern, name, link.map(&:intern)]
end.sort
usdata = BODY.map do |name, ipa, usps, flag, sthood, area, pop, cap, popcity, pre, gdp|
name = name.sub(/\[\d+\]$/, '')
sthood = sthood[/\d{4}$/]
area = area[/(?<=\()[\d,]+/]
area = '858,927' if usps == 'AK'
[usps.intern, name, cap] +
[sthood, area, pop, gdp].map { |e| e.delete ',' }.map(&:to_i)
end.sort
usdata = usdata.zip(links).map { |data, links| data << links.last }
require "gviz"
era = <<EOS.lines.map { |line| Range.new *line.split('-').map(&:to_i) }
1776-1790
1791-1799
1800-1819
1820-1839
1840-1859
1860-1879
1880-1899
1900-1950
1950-1959
EOS
def minmax(data, idx)
data.map{ |d| d[idx-1] }.minmax.tap { |minmax| break Range.new *minmax }
end
class Fixnum
def norm(current, target=1..10)
unit = (self - current.begin) / (current.end - current.begin).to_f
(unit * (target.end - target.begin) + target.begin).round
end
end
area_minmax = minmax(usdata, 5)
pop_minmax = minmax(usdata, 6)
gv = Gviz.new(:USA)
gv.graph do
global layout:'neato', overlap:false, label:'U.S. states', fontsize:92, size:32
nodes colorscheme:'blues9', style:'filled', shape:'polygon', regular:true
edges arrowhead:'none'
usdata.each do |id, name, cap, sthood, area, pop, gdp, link|
sthood = 9 - era.index { |r| r.include? sthood }
area = area.norm(area_minmax, 1..6)
pop = pop.norm(pop_minmax, 4..12)
route id => link
node id, label: name, fillcolor:sthood, width:area, fontsize:14*area, sides:pop
node id, fontcolor:'white' if sthood > 7
end
end
gv.save(:usa, :png)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment