Last active
May 16, 2021 03:01
-
-
Save JohnMertz/cbefa2955382d3f1324a6290a0be868e to your computer and use it in GitHub Desktop.
Tangerine Investment Fund Scraper/Export Tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# This script fetches the performance data for various Tangerine Bank investment funds and exports them to: | |
# - CSVs called <fund_name>.csv with all historical data in format: "YYYY-MM-DD","12.34" | |
# - (optional) A single JSON file with trend data: {"Fund Name":{"0":12.34,"1":"12.56"...},"Fund Name":...} | |
# - (optional) PNG line chart | |
# All data is publicly available from: https://www.tangerine.ca/en/products/investing/performance | |
# This is just a handy way to use the data locally for various purposes. | |
# | |
# Should be run daily Tuesday-Saturday, such as with cron: | |
# 0 1 * * 2-6 /path/to/tangerine.pl | |
use strict; | |
use warnings; | |
use LWP::UserAgent; | |
use JSON::XS; | |
use XML::Hash::XS; | |
# Directory where CSV, PNG and JSON files will be located | |
my $data_dir = '/home/user/tangerine/'; | |
my $json_file = $data_dir."tangerine.json"; | |
# Enable generation of PNG from CSV | |
my $gnuplot = 1; | |
my $chart_start = `date --date='6 months ago' +%Y-%m-%d`; | |
chomp($chart_start); | |
# Days to include in json file; prior business days | |
# 0 = yesterday, 5 =~ -7d, 21 =~ -1m, 125 =~ -6m | |
my @days = ( 0, 5, 21, 125 ); | |
# Funds you wish to collect information on as well as their API ID | |
my %funds = ( | |
"Balanced" => "F000000S68", | |
"Balanced_Growth" => "F000000S6A", | |
"Balanced_Income" => "F000000S66", | |
"Equity_Growth" => "F00000NNHK", | |
"Dividend" => "F00000Y1ZY", | |
"Balanced_ETF" => "F000015SOA", | |
"Growth_ETF" => "F000015SOB", | |
"Equity_ETF" => "F000015SOC" | |
); | |
# Worker objects | |
my $ua = LWP::UserAgent->new(); | |
my $json = JSON::XS->new(); | |
my $xml = XML::Hash::XS->new(); | |
# HTTP resources | |
my $code_url = "https://www.tangerine.ca/fberoot/json/msutil.json"; | |
my $data_url = "http://api.morningstar.com/service/mf/Price/Mstarid/FUND?callback=?format=json&accesscode=CODE&startdate=START&enddate=DATE"; | |
# Fetch API Key | |
my $res = $ua->get($code_url); | |
my $msutil = $json->decode($res->content()); | |
my $code = $msutil->{'ValidAccessCode'}->{'AccessCode'}; | |
$data_url =~ s/CODE/$code/g; | |
# Most recent end date is yesterday | |
my $yesterday = `date --date='1 days ago' +%Y-%m-%d`; | |
chomp $yesterday; | |
$data_url =~ s/DATE/$yesterday/g; | |
# Hash for generating JSON | |
my %summaries = (); | |
foreach my $fund (keys %funds) { | |
%{$summaries{$fund}} = (); | |
my $csv_file = $data_dir.$fund.".csv"; | |
my $fund_url = $data_url; | |
$fund_url =~ s/FUND/$funds{$fund}/g; | |
my $last; | |
my @all; | |
# If CSV already exists, only fetch from last day; load in data for summaries later on | |
if (-e $csv_file) { | |
open(my $fh, '<', $csv_file); | |
my $val; | |
while (<$fh>) { | |
$last = $val = $_; | |
$last =~ s/^"(\d\d\d\d\-\d\d-\d\d)".*/$1/; | |
$val =~ s/^.*"(\d\d(\.\d+))"$/$1/; | |
chomp($last); | |
chomp($val); | |
push(@all, { 'd' => $last, 'v' => $val }); | |
} | |
$fund_url =~ s/START/$last/g; | |
# If CSV doesn't yet exist, import all data | |
} else { | |
$fund_url =~ s/START/2000\-01\-01/g; | |
} | |
$res = $ua->get($fund_url); | |
my $data = $xml->xml2hash($res->content()); | |
if (!defined($data->{data})) { | |
print STDERR "Failed to fetch $fund data\n"; | |
next; | |
} | |
# Force new data to array | |
my $data_arr = (); | |
if (scalar($data->{data}->{Prices}->{p}) == 0) { | |
next; | |
} elsif (ref($data->{data}->{Prices}->{p}) eq 'HASH') { | |
push(@$data_arr, $data->{data}->{Prices}->{p}); | |
} else { | |
$data_arr = $data->{data}->{Prices}->{p}; | |
} | |
# Skip if I already have the latest data | |
if ((defined $last) && $last eq $data_arr->[scalar(@$data_arr)-1]->{d}) { | |
print STDERR "$last is the same day as $data_arr->[scalar(@$data_arr)-1]->{d}. Already have latest data.\n"; | |
next; | |
} | |
# Append to csv_file | |
open(my $fh, '>>', $csv_file); | |
foreach my $day (@$data_arr) { | |
if (defined($last) && $day->{d} le $last) { | |
next; | |
} | |
push(@all, { 'd' => $day->{d}, 'v' => $day->{v} }); | |
my $value = $day->{v}; | |
# Correct trailing zeros | |
if ($value =~ /^\d+$/) { | |
$value .= '.00'; | |
} elsif ($value =~ /^\d+\.\d$/) { | |
$value .= '0'; | |
} | |
print $fh '"'.$day->{d}.'","'.$value.'"'."\n"; | |
} | |
close $fh; | |
# If days have been defined, add to hash for JSON export | |
if (scalar @days) { | |
foreach my $day (@days) { | |
$summaries{$fund}{$day} = $all[(scalar @all) - ($day+1)]->{v}; | |
} | |
} | |
# If a gnuplot script is defined, execute | |
if (defined $gnuplot) { | |
system("gnuplot -e '" . | |
" set term png size 800,300;" . | |
" set output \"" . ${data_dir} . ${fund} . ".png\";" . | |
" set datafile separator \",\";" . | |
" unset multiplot;" . | |
" set title \"\";" . | |
" set xdata time;" . | |
" set timefmt \"%Y-%m-%d\";" . | |
" set format y \"\$%.2s\";" . | |
" set autoscale y;" . | |
" set grid;" . | |
" set xrange [\"" . $chart_start . "\":\"" . $yesterday . "\"];" . | |
" plot \"" . ${data_dir} . ${fund} . ".csv\" using 1:2 with linespoints notitle'" | |
); | |
} | |
} | |
open (my $fh, '>', $json_file); | |
print $fh encode_json(\%summaries); | |
close $fh; | |
exit(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment