-
-
Save tommetge/4481169 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby | |
require 'rubygems' | |
require 'mechanize' | |
# Utility methods | |
def merge_povs(first_pov, second_pov) | |
second_pov.each do |k, v| | |
sum = [first_pov[k].to_i, second_pov[k].to_i].inject(:+) | |
first_pov[k] = sum | |
end | |
return first_pov | |
end | |
# Scraper methods | |
def get_chapter_povs(agent, chapter_url) | |
puts " Fetching #{chapter_url}..." | |
chapter = agent.get(chapter_url) | |
povs = chapter.search("p").text.scan(/[a-zA-Z0-9].* POV/) | |
chapter_povs = {} | |
povs.each do |pov| | |
if chapter_povs[pov] | |
chapter_povs[pov] = chapter_povs[pov] + 1 | |
else | |
chapter_povs[pov] = 1 | |
end | |
end | |
return chapter_povs | |
end | |
def get_book_pov(agent, starting_page, book_url) | |
puts "Fetching #{book_url}..." | |
book_povs = {} | |
book = agent.get(starting_page + "/" + book_url) | |
# Scrape chapter URLs | |
chapters = book.search("ol li a").map do |ch_src| | |
ch_src.attributes["href"].value | |
end | |
# Add prologues and friends | |
book.search("ul li a").each do |extra| | |
chapters << extra.attributes["href"].value | |
end | |
chapters.each do |chapter_url| | |
povs = get_chapter_povs(agent, chapter_url) rescue {} | |
merge_povs(book_povs, povs) | |
end | |
return book_povs | |
end | |
# The real business: scraping books for POVs | |
def main | |
agent = Mechanize.new | |
starting_page = "http://encyclopaedia-wot.org" | |
page = agent.get(starting_page) | |
books = page.search('ol li a').map do |book_src| | |
book_src.attributes["href"].value | |
end | |
final_povs = {} | |
books.each do |book| | |
povs = get_book_pov(agent, starting_page, book) | |
pp povs | |
merge_povs(final_povs, povs) | |
end | |
pp final_povs | |
end | |
main |
Sorted, for easier scanning:
{"Abaldar Yulan POV"=>1,
"Adelorna Bastine POV"=>1,
"Alliandre POV"=>1,
"Almen Bunt POV"=>1,
"Alteima POV"=>1,
"Alviarin Freidhen POV"=>4,
"Alviarin POV"=>1,
"Androl POV"=>4,
"Aran'gar POV"=>2,
"Arymilla POV"=>1,
"Asmodean POV"=>1,
"Asne Zeramene POV"=>1,
"Assid Bakuun POV"=>1,
"Aviendha POV"=>13,
"Bain POV"=>1,
"Barmellin POV"=>1,
"Barriga POV"=>1,
"Bayle Domon POV"=>4,
"Bayrd POV"=>1,
"Beonin POV"=>1,
"Bertome Saighan POV"=>1,
"Bethamin Zeami POV"=>1,
"Birgitte POV"=>2,
"Cadsuane Melaidhrin POV"=>1,
"Cadsuane POV"=>15,
"Chulein POV"=>1,
"Cyndane POV"=>3,
"Dain Bornhald POV"=>2,
"Daved Hanlon POV"=>2,
"Davram Bashere POV"=>2,
"Delana Mosalaine POV"=>2,
"Demandred POV"=>5,
"Demira Eriff POV"=>2,
"Dyelin Taravin POV"=>1,
"Eamon Valda POV"=>3,
"Eben Hopwil POV"=>1,
"Egeanin Sarna POV"=>4,
"Egeanin Tamarath POV"=>1,
"Egwene POV"=>110,
"Egwene al'Vere POV"=>1,
"Elaida POV"=>3,
"Elaida a'Roihan POV"=>4,
"Elayne POV"=>66,
"Elenia Sarand POV"=>1,
"Ellorien Traemane POV"=>1,
"Elza Penfell POV"=>3,
"Ethenielle POV"=>1,
"Faile POV"=>23,
"Falendre POV"=>1,
"Falion Bhoda POV"=>2,
"Fortuona POV"=>1,
"Furyk Karede POV"=>4,
"Gabrelle POV"=>1,
"Galad Damodred POV"=>1,
"Galad POV"=>15,
"Galina Casban POV"=>5,
"Galina POV"=>2,
"Gareth Bryne POV"=>4,
"Gawyn POV"=>16,
"Geofram Bornhald POV"=>5,
"Gholam POV"=>1,
"Graendal POV"=>9,
"Hadnan Kadere POV"=>2,
"Harine POV"=>1,
"High Lady Suroth Sabelle Meldarath POV"=>1,
"Isam POV"=>1,
"Isam/Luc POV"=>1,
"Ituralde POV"=>5,
"Jaichim Carridin POV"=>4,
"Jaret Byar POV"=>1,
"Jesse Bilal POV"=>1,
"Joline Maza POV"=>1,
"Katerine Alruddin POV"=>2,
"Kennar Miraj POV"=>2,
"Lan POV"=>4,
"Leane Sharif POV"=>1,
"Leilwin POV"=>1,
"Lelaine Akashi POV"=>1,
"Liandrin POV"=>4,
"Loial POV"=>1,
"Luan Norwelyn POV"=>1,
"Maeric POV"=>1,
"Malenarin Rai POV"=>1,
"Masema Dagar POV"=>1,
"Mat POV"=>90,
"Merana Ambrey POV"=>3,
"Mesaana POV"=>2,
"Mili Skane POV"=>1,
"Min POV"=>29,
"Moghedien POV"=>7,
"Moiraine POV"=>8,
"Moiraine1 POV"=>1,
"Morgase POV"=>11,
"Morgase Trakand POV"=>1,
"Moridin POV"=>2,
"Myrelle POV"=>1,
"Nesune Bihara POV"=>2,
"Noal Charin POV"=>1,
"Nynaeve POV"=>57,
"Olver POV"=>1,
"Omni POV"=>7,
"Osan'gar POV"=>3,
"Padan Fain POV"=>10,
"Pedron Niall POV"=>5,
"Perrin POV"=>132,
"Pevara POV"=>5,
"Raefar Kisman POV"=>1,
"Rahvin POV"=>1,
"Rand POV"=>202,
"Reanne Corly POV"=>2,
"Renald Fanwar POV"=>1,
"Rhadam Asunawa POV"=>1,
"Rodel Ituralde POV"=>5,
"Romanda Cassin POV"=>2,
"Romanda POV"=>1,
"Saerin Asnobar POV"=>2,
"Sahra Covenry POV"=>1,
"Samitsu POV"=>1,
"Sammael POV"=>2,
"Sarene Nemdahl POV"=>1,
"Seaine Herimon POV"=>3,
"Seanchan POV"=>1,
"Seeker POV"=>1,
"Semirhage POV"=>2,
"Sevanna POV"=>6,
"Shaidar Haran POV"=>1,
"Shalon POV"=>2,
"Sheriam Bayanar POV"=>2,
"Sheriam POV"=>1,
"Siuan POV"=>4,
"Siuan Sanche POV"=>10,
"Sorilea POV"=>1,
"Sulin POV"=>1,
"Suroth POV"=>2,
"Talmanes POV"=>5,
"Tarna Feir POV"=>1,
"The Watcher4 POV"=>1,
"Thom Merrilin POV"=>4,
"Timna POV"=>1,
"Toveine Gazal POV"=>2,
"Tuon POV"=>6,
"Tylee Khirgan POV"=>1,
"Varek POV"=>1,
"Verin Mathwin POV"=>3,
"Verin POV"=>2,
"Vilnar Barada POV"=>1,
"Weilin Aldragoran POV"=>1,
"Yukiri POV"=>1}
{"Rand POV"=>202,
"Moiraine POV"=>8,
"Perrin POV"=>132,
"Nynaeve POV"=>57,
"Omni POV"=>7,
"Egwene POV"=>110,
"Min POV"=>29,
"Geofram Bornhald POV"=>5,
"Bayle Domon POV"=>4,
"Jaret Byar POV"=>1,
"Liandrin POV"=>4,
"Padan Fain POV"=>10,
"Thom Merrilin POV"=>4,
"Jaichim Carridin POV"=>4,
"Mat POV"=>90,
"Siuan Sanche POV"=>10,
"Verin Mathwin POV"=>3,
"Pedron Niall POV"=>5,
"Egeanin Sarna POV"=>4,
"Elayne POV"=>66,
"Seeker POV"=>1,
"Elaida a'Roihan POV"=>4,
"Dain Bornhald POV"=>2,
"Suroth POV"=>2,
"Faile POV"=>23,
"Sahra Covenry POV"=>1,
"Moiraine1 POV"=>1,
"Hadnan Kadere POV"=>2,
"Asmodean POV"=>1,
"Morgase POV"=>11,
"Gareth Bryne POV"=>4,
"Alteima POV"=>1,
"Moghedien POV"=>7,
"Rahvin POV"=>1,
"Merana Ambrey POV"=>3,
"Dyelin Taravin POV"=>1,
"Luan Norwelyn POV"=>1,
"Ellorien Traemane POV"=>1,
"Vilnar Barada POV"=>1,
"Demira Eriff POV"=>2,
"Bain POV"=>1,
"Sorilea POV"=>1,
"Myrelle POV"=>1,
"Galina Casban POV"=>5,
"Sevanna POV"=>6,
"Gawyn POV"=>16,
"Sammael POV"=>2,
"Graendal POV"=>9,
"Semirhage POV"=>2,
"Demandred POV"=>5,
"Sheriam Bayanar POV"=>2,
"Sulin POV"=>1,
"Aviendha POV"=>13,
"Katerine Alruddin POV"=>2,
"Nesune Bihara POV"=>2,
"Sarene Nemdahl POV"=>1,
"Delana Mosalaine POV"=>2,
"Eamon Valda POV"=>3,
"Rhadam Asunawa POV"=>1,
"Romanda Cassin POV"=>2,
"Lelaine Akashi POV"=>1,
"Mesaana POV"=>2,
"Osan'gar POV"=>3,
"Falion Bhoda POV"=>2,
"Seanchan POV"=>1,
"Alviarin Freidhen POV"=>4,
"Seaine Herimon POV"=>3,
"Maeric POV"=>1,
"Shaidar Haran POV"=>1,
"Joline Maza POV"=>1,
"Noal Charin POV"=>1,
"Cadsuane POV"=>15,
"The Watcher4 POV"=>1,
"Reanne Corly POV"=>2,
"Elaida POV"=>3,
"Toveine Gazal POV"=>2,
"Daved Hanlon POV"=>2,
"Moridin POV"=>2,
"Gholam POV"=>1,
"Chulein POV"=>1,
"Morgase Trakand POV"=>1,
"Cadsuane Melaidhrin POV"=>1,
"Aran'gar POV"=>2,
"Assid Bakuun POV"=>1,
"Furyk Karede POV"=>4,
"Kennar Miraj POV"=>2,
"Bertome Saighan POV"=>1,
"Varek POV"=>1,
"Davram Bashere POV"=>2,
"Abaldar Yulan POV"=>1,
"Ethenielle POV"=>1,
"Elza Penfell POV"=>3,
"Barmellin POV"=>1,
"Timna POV"=>1,
"Cyndane POV"=>3,
"Verin POV"=>2,
"Eben Hopwil POV"=>1,
"Asne Zeramene POV"=>1,
"Mili Skane POV"=>1,
"Tuon POV"=>6,
"Bethamin Zeami POV"=>1,
"Egeanin Tamarath POV"=>1,
"Raefar Kisman POV"=>1,
"Isam/Luc POV"=>1,
"Shalon POV"=>2,
"Elenia Sarand POV"=>1,
"Alviarin POV"=>1,
"Pevara POV"=>5,
"Rodel Ituralde POV"=>5,
"Gabrelle POV"=>1,
"Yukiri POV"=>1,
"Samitsu POV"=>1,
"Galina POV"=>2,
"Birgitte POV"=>2,
"Beonin POV"=>1,
"Arymilla POV"=>1,
"Loial POV"=>1,
"Weilin Aldragoran POV"=>1,
"Harine POV"=>1,
"Tarna Feir POV"=>1,
"Galad Damodred POV"=>1,
"High Lady Suroth Sabelle Meldarath POV"=>1,
"Egwene al'Vere POV"=>1,
"Adelorna Bastine POV"=>1,
"Saerin Asnobar POV"=>2,
"Jesse Bilal POV"=>1,
"Siuan POV"=>4,
"Leane Sharif POV"=>1,
"Sheriam POV"=>1,
"Romanda POV"=>1,
"Renald Fanwar POV"=>1,
"Falendre POV"=>1,
"Tylee Khirgan POV"=>1,
"Masema Dagar POV"=>1,
"Galad POV"=>15,
"Alliandre POV"=>1,
"Androl POV"=>4,
"Fortuona POV"=>1,
"Almen Bunt POV"=>1,
"Lan POV"=>4,
"Ituralde POV"=>5,
"Malenarin Rai POV"=>1,
"Olver POV"=>1,
"Barriga POV"=>1,
"Bayrd POV"=>1,
"Talmanes POV"=>5,
"Isam POV"=>1,
"Leilwin POV"=>1}