Skip to content

Instantly share code, notes, and snippets.

@JohnMertz
Last active May 16, 2021 03:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JohnMertz/cbefa2955382d3f1324a6290a0be868e to your computer and use it in GitHub Desktop.
Save JohnMertz/cbefa2955382d3f1324a6290a0be868e to your computer and use it in GitHub Desktop.
Tangerine Investment Fund Scraper/Export Tool
#!/usr/bin/perl
# This script fetches the performance data for various Tangerine Bank investment funds and exports them to:
# - CSVs called <fund_name>.csv with all historical data in format: "YYYY-MM-DD","12.34"
# - (optional) A single JSON file with trend data: {"Fund Name":{"0":12.34,"1":"12.56"...},"Fund Name":...}
# - (optional) PNG line chart
# All data is publicly available from: https://www.tangerine.ca/en/products/investing/performance
# This is just a handy way to use the data locally for various purposes.
#
# Should be run daily Tuesday-Saturday, such as with cron:
# 0 1 * * 2-6 /path/to/tangerine.pl
use strict;
use warnings;
use LWP::UserAgent;
use JSON::XS;
use XML::Hash::XS;
# Directory where CSV, PNG and JSON files will be located
my $data_dir = '/home/user/tangerine/';
my $json_file = $data_dir."tangerine.json";
# Enable generation of PNG from CSV
my $gnuplot = 1;
my $chart_start = `date --date='6 months ago' +%Y-%m-%d`;
chomp($chart_start);
# Days to include in json file; prior business days
# 0 = yesterday, 5 =~ -7d, 21 =~ -1m, 125 =~ -6m
my @days = ( 0, 5, 21, 125 );
# Funds you wish to collect information on as well as their API ID
my %funds = (
"Balanced" => "F000000S68",
"Balanced_Growth" => "F000000S6A",
"Balanced_Income" => "F000000S66",
"Equity_Growth" => "F00000NNHK",
"Dividend" => "F00000Y1ZY",
"Balanced_ETF" => "F000015SOA",
"Growth_ETF" => "F000015SOB",
"Equity_ETF" => "F000015SOC"
);
# Worker objects
my $ua = LWP::UserAgent->new();
my $json = JSON::XS->new();
my $xml = XML::Hash::XS->new();
# HTTP resources
my $code_url = "https://www.tangerine.ca/fberoot/json/msutil.json";
my $data_url = "http://api.morningstar.com/service/mf/Price/Mstarid/FUND?callback=?format=json&accesscode=CODE&startdate=START&enddate=DATE";
# Fetch API Key
my $res = $ua->get($code_url);
my $msutil = $json->decode($res->content());
my $code = $msutil->{'ValidAccessCode'}->{'AccessCode'};
$data_url =~ s/CODE/$code/g;
# Most recent end date is yesterday
my $yesterday = `date --date='1 days ago' +%Y-%m-%d`;
chomp $yesterday;
$data_url =~ s/DATE/$yesterday/g;
# Hash for generating JSON
my %summaries = ();
foreach my $fund (keys %funds) {
%{$summaries{$fund}} = ();
my $csv_file = $data_dir.$fund.".csv";
my $fund_url = $data_url;
$fund_url =~ s/FUND/$funds{$fund}/g;
my $last;
my @all;
# If CSV already exists, only fetch from last day; load in data for summaries later on
if (-e $csv_file) {
open(my $fh, '<', $csv_file);
my $val;
while (<$fh>) {
$last = $val = $_;
$last =~ s/^"(\d\d\d\d\-\d\d-\d\d)".*/$1/;
$val =~ s/^.*"(\d\d(\.\d+))"$/$1/;
chomp($last);
chomp($val);
push(@all, { 'd' => $last, 'v' => $val });
}
$fund_url =~ s/START/$last/g;
# If CSV doesn't yet exist, import all data
} else {
$fund_url =~ s/START/2000\-01\-01/g;
}
$res = $ua->get($fund_url);
my $data = $xml->xml2hash($res->content());
if (!defined($data->{data})) {
print STDERR "Failed to fetch $fund data\n";
next;
}
# Force new data to array
my $data_arr = ();
if (scalar($data->{data}->{Prices}->{p}) == 0) {
next;
} elsif (ref($data->{data}->{Prices}->{p}) eq 'HASH') {
push(@$data_arr, $data->{data}->{Prices}->{p});
} else {
$data_arr = $data->{data}->{Prices}->{p};
}
# Skip if I already have the latest data
if ((defined $last) && $last eq $data_arr->[scalar(@$data_arr)-1]->{d}) {
print STDERR "$last is the same day as $data_arr->[scalar(@$data_arr)-1]->{d}. Already have latest data.\n";
next;
}
# Append to csv_file
open(my $fh, '>>', $csv_file);
foreach my $day (@$data_arr) {
if (defined($last) && $day->{d} le $last) {
next;
}
push(@all, { 'd' => $day->{d}, 'v' => $day->{v} });
my $value = $day->{v};
# Correct trailing zeros
if ($value =~ /^\d+$/) {
$value .= '.00';
} elsif ($value =~ /^\d+\.\d$/) {
$value .= '0';
}
print $fh '"'.$day->{d}.'","'.$value.'"'."\n";
}
close $fh;
# If days have been defined, add to hash for JSON export
if (scalar @days) {
foreach my $day (@days) {
$summaries{$fund}{$day} = $all[(scalar @all) - ($day+1)]->{v};
}
}
# If a gnuplot script is defined, execute
if (defined $gnuplot) {
system("gnuplot -e '" .
" set term png size 800,300;" .
" set output \"" . ${data_dir} . ${fund} . ".png\";" .
" set datafile separator \",\";" .
" unset multiplot;" .
" set title \"\";" .
" set xdata time;" .
" set timefmt \"%Y-%m-%d\";" .
" set format y \"\$%.2s\";" .
" set autoscale y;" .
" set grid;" .
" set xrange [\"" . $chart_start . "\":\"" . $yesterday . "\"];" .
" plot \"" . ${data_dir} . ${fund} . ".csv\" using 1:2 with linespoints notitle'"
);
}
}
open (my $fh, '>', $json_file);
print $fh encode_json(\%summaries);
close $fh;
exit();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment