Created
July 11, 2014 17:38
-
-
Save heatherm/13d2107101f2d2f25c69 to your computer and use it in GitHub Desktop.
Android Finance App Rating Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nokogiri' | |
require "net/http" | |
require "uri" | |
require 'statistics2' | |
def ci_lower_bound pos, n, confidence | |
return 0 if n == 0 | |
z = Statistics2.pnormaldist(1-(1-confidence)/2) | |
phat = 1.0*pos/n | |
(phat + z*z/(2*n) - z * Math.sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n) | |
end | |
def get_rating_count css_selector | |
doc.css(css_selector).inner_text.gsub(",", "").to_f | |
end | |
# https://play.google.com/store/apps/category/FINANCE/collection/topselling_free?hl=en | |
# $('.card-click-target').each(function(){hrefs.push($(this).attr('href'));}) | |
app_paths = ["ADD SOME"] | |
ratings = {} | |
app_paths.each do |path| | |
uri = URI.parse("http://play.google.com#{path}") | |
http = Net::HTTP.new(uri.host, uri.port) | |
request = Net::HTTP::Get.new(uri) | |
response = http.start { |http| | |
http.request(request) | |
} | |
if response.code == "302" | |
location = URI.parse(response.header['location']) | |
response = Net::HTTP.get_response(location) | |
end | |
doc = Nokogiri::HTML(response.body) | |
app_name = doc.css('.document-title div').inner_text | |
total_review_count = get_rating_count '.reviews-stats .reviews-num' | |
five_star_count = get_rating_count '.five .bar-number' | |
four_star_count = get_rating_count '.four .bar-number' | |
positive_review_count = five_star_count + four_star_count | |
lower_bound = ci_lower_bound positive_review_count, total_review_count, 0.95 | |
ratings[app_name] = lower_bound | |
end | |
puts ratings.sort_by { |k, v| -v }.to_h |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment