anotherjesse (owner)

Revisions

gist: 75300 Download_button fork
public
Description:
create a report of most tweeted firefox extensions
Public Clone URL: git://gist.github.com/75300.git
Embed All Files: show embed
Ruby #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
require 'rubygems'
require 'hpricot'
require 'open-uri'
 
links = Hash.new(0)
 
# get 50 pages of results from backtweet
(1..50).each do |n|
  fn = "backtype-#{n}.html"
  unless File.exist?(fn)
    open(fn, 'w') do |f|
      f.write open("http://backtweets.com/search?q=addons.mozilla.org&page=#{n}").read
    end
    sleep 2
  end
 
  # parse the pages
  data = open(fn).read
  (data.scan /href="https:\/\/addons.mozilla.org\/([^\/]*\/)?firefox\/addon\/(\d+)/).each { |d| links[d.last] += 1 }
end
 
# grab all the amo pages
links.keys.each do |id|
  fn = "amo-#{id}.html"
  next if File.exist?(fn)
  open(fn, 'w') do |f|
    f.write open("https://addons.mozilla.org/en-US/firefox/addon/#{id}").read
    sleep 2
  end
end
 
 
# create the report
 
f = open('report.html','w')
 
groups = links.group_by &:last
 
groups.keys.sort.reverse.each do |n|
  ids = (groups[n].collect &:first)
  names = ids.collect do |id|
    name = Hpricot(open("amo-#{id}.html").read).at('title').inner_text.split(" :: ").first rescue id.to_s
    [name, id]
  end
 
  names.sort.each do |name, id|
    f.write "<li><a href='https://addons.mozilla.org/firefox/addon/#{id}'>#{name}</a> <a href='http://backtweets.com/search?q=addon%2F#{id}'>#{n}</a></li>\n"
  end
end
 
f.close