Skip to content

Instantly share code, notes, and snippets.

@robmckinnon
Created November 15, 2008 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robmckinnon/25249 to your computer and use it in GitHub Desktop.
Save robmckinnon/25249 to your computer and use it in GitHub Desktop.
grabs uk fund prices
require 'rubygems'
require 'hpricot'
require 'open-uri'
require 'activesupport'
require 'morph'
Hpricot.buffer_size = 2621444
@@step_off = 1
def open_doc url
begin
open(url, {
"User-Agent" =>
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
"Accept" =>
"text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
"Accept-Language" =>
"en-us,en;q=0.7,en-gb;q=0.3",
"Accept-Charset" =>
"ISO-8859-1,utf-8;q=0.7,*;q=0.7"
}) { |f| Hpricot(f) }
rescue Exception => e
@@step_off += 1
sleep 2 * @@step_off
open_doc url
end
end
# example usage:
# id = 'rufferllp/ruffer/RZBST'
# fund = Fund.new id
# fund.to_csv
class Fund
include Morph
def initialize fund=nil
if fund
url = "http://funds.ft.com/funds/#{fund}"
doc = open_doc url
each_entry doc do |label, value|
morph(label, value)
end
time = Time.now.utc.to_s
self.time = time.match(/\d\d:\d\d:\d\d/)[0]
self.name = doc.at('.FundNameHeader').inner_text
self.url = url
self.date = Date.today.to_s
self.id_name = "#{url}##{date}"
end
end
def bid_price
@bid_offer ? bid_gbx : nav_gbx
end
def offer_price
if respond_to? :offer_gbx
@bid_offer ? offer_gbx : ''
else
@bid_offer ? offer_ : ''
end
end
def to_csv
"#{name}, GBP, #{bid_price}, #{offer_price}, #{percentage_change}"
end
private
def each_entry doc
get_price_rows(doc).each do |row|
cells = (row/'td').collect(&:inner_text).collect(&:strip).delete_if(&:blank?)
cells.in_groups_of(2) do |entry|
yield entry[0].gsub('-','_'), entry[1]
end
end
end
def get_price_rows doc
rows = rows_starting 'Bid(GBX)', doc
@bid_offer = rows.size > 0
rows = rows_starting 'Nav(GBX)', doc unless @bid_offer
rows
end
def rows_starting label, doc
rows = (doc/"table/tr/td/span/[text()='#{label}']/../../../../tr")
if rows.size == 0
rows = (doc/"table/tr/td[text()='#{label}']/../../tr")
end
rows
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment