Skip to content

Instantly share code, notes, and snippets.

@benwilson512
Created September 2, 2012 13:03
Show Gist options
  • Save benwilson512/3598504 to your computer and use it in GitHub Desktop.
Save benwilson512/3598504 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'json'
SITE = "http://www.righttofoodindia.org/"
links = JSON.parse(File.open("map.json").read)
# links.each do |link|
# filename = link["name"].downcase.gsub(" ", "_").gsub("'", "").gsub("/", "")
# category = link["uri"].split("/").first
# url = SITE + link["uri"]
# command = "curl #{url} -o ./stuff/#{filename}.html"
# `#{command}`
# end
categories = []
links.each do |link|
category = link["uri"].split("/").first
categories << category unless category.include?("html")
end
categories = categories.uniq
values = []
total = 61877.0
categories.each do |cat|
results = `wc -w stuff/#{cat}/*`
value = results.split("\n").last.split(" ").first.to_f
values << value
puts "#{cat}: #{((value/total) * 100).round(1) }%"
end
def process(root)
Dir.glob("#{root}/*").each do |obj|
new_path = obj.gsub("stuff/", "text/")
if obj.include?("html")
text = `sed -n '/^$/!{s/<[^>]*>//g;p;}' #{obj}`
File.open(new_path, "w") do |file|
file.syswrite(text)
end
else
`mkdir #{new_path}`
process(obj)
end
end
end
process("stuff")
[
{
"uri": "index.html",
"name": "Home Page"
},
{
"uri": "latest.html",
"name": "Latest additions"
},
{
"uri": "campaign/campaign.html",
"name": "Introduction"
},
{
"uri": "campaign/disclaimer.html",
"name": "Disclaimer"
},
{
"uri": "foundation.html",
"name": "Foundation Statement"
},
{
"uri": "campaign/secretariat.html",
"name": "About the Secretariat"
},
{
"uri": "contactus.html",
"name": "Contact Addresses"
},
{
"uri": "links/updates.html",
"name": "Campaign Updates"
},
{
"uri": "fin/fin_intro.html",
"name": "Finance and Accounts"
},
{
"uri": "links/links.html",
"name": "Useful links"
},
{
"uri": "rtowork/ega_intro.html",
"name": "Introduction"
},
{
"uri": "rtowork/ega_latest_activities.html",
"name": "Current Highlights"
},
{
"uri": "rtowork/ega_keydocs.html",
"name": "Key Documents"
},
{
"uri": "rtowork/ega_events.html",
"name": "Events"
},
{
"uri": "rtowork/ega_rozgarupdates.html",
"name": "Rozgar Updates"
},
{
"uri": "rtowork/ega_articles.html",
"name": "Articles on EGA"
},
{
"uri": "rtowork/ega_news.html",
"name": "EGA in the News"
},
{
"uri": "rtowork/ega_briefing.html",
"name": "Campaign Materials"
},
{
"uri": "rtowork/ega_archives.html",
"name": "Archives"
},
{
"uri": "mdm/mdm_intro.html",
"name": "Mid-day Meals"
},
{
"uri": "mdm/mdm_scorders.html",
"name": "Supreme Court Orders"
},
{
"uri": "mdm/mdm_comrs.html",
"name": "Commissioners' Interventions"
},
{
"uri": "mdm/mdm_events.html",
"name": "Events"
},
{
"uri": "mdm/mdm_glines.html",
"name": "Mid-day Meal Guidelines"
},
{
"uri": "mdm/mdm_surveys.html",
"name": "Field Surveys"
},
{
"uri": "mdm/mdm_articles.html",
"name": "Articles"
},
{
"uri": "mdm/mdm_campaignmaterials.html",
"name": "Campaign Materials"
},
{
"uri": "mdm/mdm_news.html",
"name": "MDMs in the news"
},
{
"uri": "right_to_food_act_intro.html",
"name": "Right to Food Act"
},
{
"uri": "right_to_food_act_events.html",
"name": "Events"
},
{
"uri": "right_to_food_act_key_docs.html",
"name": "Key Documents"
},
{
"uri": "right_to_food_act.html",
"name": "Articles"
},
{
"uri": "icds/icds_index.html",
"name": "Integrated Child Development Services ICDS"
},
{
"uri": "icds/icds_orders.html",
"name": "Supreme Court Orders on ICDS"
},
{
"uri": "icds/icds_comrs_interventions.html",
"name": "Commissioners' Interventions"
},
{
"uri": "icds/icds_comrs_reports.html",
"name": "ICDS in Commissioner's Reports"
},
{
"uri": "icds/icds_nac.html",
"name": "ICDS in the National Advisory Council"
},
{
"uri": "icds/icds_glines.html",
"name": "Official ICDS Documents"
},
{
"uri": "icds/icds_surveys.html",
"name": "Field Surveys"
},
{
"uri": "icds/icds_articles.html",
"name": "Articles"
},
{
"uri": "icds/icds_events.html",
"name": "ICDS events"
},
{
"uri": "icds/icds_news.html",
"name": "ICDS in the news"
},
{
"uri": "pds/pds_intro.html",
"name": "Public Distribution System"
},
{
"uri": "pds/pds_articles.html",
"name": "Articles"
},
{
"uri": "case/case.html",
"name": "The 'Right to Food' case"
},
{
"uri": "orders/interimorders.html",
"name": "Supreme Court Orders"
},
{
"uri": "comrs/comrs_intro.html",
"name": "Supreme Court Commissioners"
},
{
"uri": "campaign/camp_primers.html",
"name": "Primers"
},
{
"uri": "campaign/camp_postersplays.html",
"name": "Posters and Plays"
},
{
"uri": "campaign/camp_pamphlets.html",
"name": "Pamphlets"
},
{
"uri": "campaign/camp_background.html",
"name": "Background Material"
},
{
"uri": "hindi/hindi_main.html",
"name": "Hindi section"
},
{
"uri": "hindi/campaign.html",
"name": "About the Campaign"
},
{
"uri": "hindi/legal.html",
"name": "Legal Action"
},
{
"uri": "hindi/child.html",
"name": "Children's right to food"
},
{
"uri": "hindi/ega.html",
"name": "Employment Guarantee"
},
{
"uri": "hindi/pds.html",
"name": "Public Distribution System"
},
{
"uri": "hindi/primers.html",
"name": "Primers and Posters"
},
{
"uri": "links/articles_intro.html",
"name": "Articles"
},
{
"uri": "links/field_reports.html",
"name": "Field Reports"
},
{
"uri": "research/research.html",
"name": "Field Surveys"
},
{
"uri": "research/social_audits.html",
"name": "Social Audits"
},
{
"uri": "rtowork/ega_news.html",
"name": "EGA in the News"
},
{
"uri": "mdm/mdm_news.html",
"name": "MDMs in the news"
},
{
"uri": "icds/icds_news.html",
"name": "ICDS in the news"
},
{
"uri": "links/links.html",
"name": "Useful links"
},
{
"uri": "events/kolkataconvention.html",
"name": "Kolkata Convention on the Right to Food and Work"
},
{
"uri": "rtowork/ray-intro.html",
"name": "Rozgar Adhikar Yatra"
},
{
"uri": "rtowork/banner.html",
"name": "Banner project"
},
{
"uri": "rtowork/egaconvention.html",
"name": "19 September 2004 Convention on Right to Work"
},
{
"uri": "events/bhopalconvention/bhopalmeeting.html",
"name": "Bhopal Convention"
},
{
"uri": "mdm/action.html",
"name": "Action day on Mid-day Meals April 2002"
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment