Skip to content

Instantly share code, notes, and snippets.

@kmx
Last active October 12, 2015 12:57
Show Gist options
  • Save kmx/c46859f002b93a6c3683 to your computer and use it in GitHub Desktop.
Save kmx/c46859f002b93a6c3683 to your computer and use it in GitHub Desktop.
PDL::TS + PDL::FI
package PDL::FI;
use 5.010;
use strict;
use warnings;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(
fi_absdiff fi_shift
fi_log_ret fi_net_ret fi_gross_ret fi_net_cum fi_gross_cum
fi_gross_cum1 fi_net_cum1
ts_OHLC_add_RC ts_OHLC_to_period ts_OHLC_to_period_FASTER
ts_yahoo_quotes ts_yahoo_splits_dividends
);
our %EXPORT_TAGS = (all => \@EXPORT_OK);
use Carp;
use PDL;
use PDL::Apply ':all';
sub fi_absdiff {
my ($pdl) = @_;
my $p0 = $pdl->slice("0:-2");
my $p1 = $pdl->slice("1:-1");
my $rv = PDL->new($pdl->type, 'BAD')->glue(0, $p1 - $p0); # = P[t] - P[t-1]
return $rv;
}
sub fi_shift {
my ($pdl, $lag, $fill) = @_;
my $rv = $pdl->copy;
$rv->badflag(1);
$rv .= (defined $fill ? PDL::Core::topdl($fill) : PDL->new('BAD'));
if ($lag>0) {
$rv->slice([$lag, -1]) .= $pdl->slice([0, -1-$lag]);
}
else {
$rv->slice([0, -1+$lag]) .= $pdl->slice([-$lag, -1]);
}
return $rv;
}
sub fi_log_ret { # Logarithmic Total Returns
my ($pdl) = @_;
return fi_absdiff(log($pdl)); # = log(P[t]) - log(P[t-1])
}
sub fi_net_ret { # (Arithmetic) Total (net) Returns (percentage change, -1.0 <= retval <= 1.0)
my ($pdl) = @_;
my $p0 = $pdl->slice("0:-2");
my $p1 = $pdl->slice("1:-1");
return PDL->new($pdl->type, 'BAD')->glue(0, ($p1 / $p0) - 1); # = (P[t] - P[t-1]) / P[t]
}
sub fi_gross_ret { # (Arithmetic) Total Gross Returns (vector)
my ($pdl) = @_;
my $total_returns = fi_net_ret($pdl); # Total Returns
return 1 + $total_returns; # Gross Daily Returns
}
sub fi_gross_cum { # Cumulative Gross Returns (vector)
my ($pdl) = @_;
return cumuprodover(fi_gross_ret($pdl));
}
sub fi_net_cum { # Cumulative Net Return (vector)
my ($pdl) = @_;
return fi_gross_cum($pdl) - 1;
}
sub fi_gross_cum1 { # Cumulative Gross Returns (scalar)
my ($pdl) = @_;
return exp(sum(fi_log_ret($pdl)));
}
sub fi_net_cum1 { # Cumulative Return (scalar)
my ($pdl) = @_;
return fi_gross_cum1($pdl) - 1;
}
## Time Series related functions
sub ts_OHLC_to_period {
my ($self, $period) = @_;
croak "ts_OHLC_to_period: undefined period" unless defined $period;
my @cn = $self->col_names;
my $newts;
if ($period eq '*') {
$newts = PDL::TS->new($self->idx->slice([-1, -1]));
for my $cni (0..$#cn) {
my $old = $self->coli($cni+1);
if ($cn[$cni] =~ /(open|\.O)$/i) {
$newts->col_append($cn[$cni] => PDL::Core::topdl($old->at(0))->reshape(1));
}
elsif ($cn[$cni] =~ /(high|\.H)$/i) {
$newts->col_append($cn[$cni] => PDL::Core::topdl($old->max)->reshape(1));
}
elsif ($cn[$cni] =~ /(low|\.L)$/i) {
$newts->col_append($cn[$cni] => PDL::Core::topdl($old->min)->reshape(1));
}
elsif ($cn[$cni] =~ /(volume|\.V)$/i) {
$newts->col_append($cn[$cni] => PDL::Core::topdl($old->sum)->reshape(1));
}
elsif ($cn[$cni] =~ /(return|\.R)$/i) {
$newts->col_append($cn[$cni] => PDL::Core::topdl($old->prod)->reshape(1));
}
else { # default: the last element (suitable for close, adjusted_close)
$newts->col_append($cn[$cni] => PDL::Core::topdl($old->at(-1))->reshape(1));
}
}
}
else {
my $ep = $self->idx->dt_endpoints($period);
my $new_idx = $self->idx->index($ep)->dt_align($period, 1);
$newts = PDL::TS->new($new_idx);
my $slices = $self->idx->dt_slices($period);
for my $cni (0..$#cn) {
my $old = $self->coli($cni+1);
my $newpdl;
if ($cn[$cni] =~ /(open|\.O)$/i) {
$newpdl = apply_slice($old, $slices, 'at', 0);
}
elsif ($cn[$cni] =~ /(high|\.H)$/i) {
$newpdl = apply_slice($old, $slices, 'max');
}
elsif ($cn[$cni] =~ /(low|\.L)$/i) {
$newpdl = apply_slice($old, $slices, 'min');
}
elsif ($cn[$cni] =~ /(volume|\.V)$/i) {
$newpdl = apply_slice($old, $slices, 'sum');
}
elsif ($cn[$cni] =~ /(return|\.R)$/i) {
$newpdl = apply_slice($old, $slices, 'prod');
}
else { # default: the last element (suitable for close, adjusted_close)
$newpdl = apply_slice($old, $slices, 'at', -1);
}
$newts->col_append($cn[$cni] => $newpdl);
}
}
return $newts;
}
sub ts_OHLC_add_RC {
my ($self) = @_;
my $new_idx = $self->idx->copy;
my $newts = PDL::TS->new($new_idx);
my @cn = $self->col_names;
for my $cni (0..$#cn) {
my $old = $self->coli($cni+1);
$newts->col_append($cn[$cni] => $old->copy);
if ($cn[$cni] =~ /(return|\.R)$/i) {
$newts->col_append($cn[$cni]."C" => cumuprodover($old));
}
}
return $newts;
}
sub ts_OHLC_to_period_FASTER {
my ($self, $period) = @_;
my $ep = $self->idx->dt_endpoints($period);
my $new_idx = $self->idx->index($ep)->dt_align($period, 1);
my $newts = PDL::TS->new($new_idx);
my $slices = $self->idx->dt_slices($period)->unpdl;
my @cn = $self->col_names;
for my $cni (0..$#cn) {
my $old = $self->coli($cni+1);
my @new;
if ($cn[$cni] =~ /(open|\.O)$/i) {
push @new, $old->slice($_)->at(0) for (@$slices);
}
elsif ($cn[$cni] =~ /(high|\.H)$/i) {
push @new, $old->slice($_)->max for (@$slices);
}
elsif ($cn[$cni] =~ /(low|\.L)$/i) {
push @new, $old->slice($_)->min for (@$slices);
}
elsif ($cn[$cni] =~ /(volume|\.V)$/i) {
push @new, $old->slice($_)->sum for (@$slices);
}
elsif ($cn[$cni] =~ /(return|\.R)$/i) {
push @new, $old->slice($_)->prod for (@$slices);
}
else { # default: the last element (suitable for close, adjusted_close)
push @new, $old->slice($_)->at(-1) for (@$slices);
}
$newts->col_append($cn[$cni] => pdl(\@new));
}
return $newts;
}
sub ts_yahoo_quotes {
my $options = ref $_[-1] eq 'HASH' ? pop : {};
my ($what, $from, $to, @symbols) = @_;
croak "new_yahoo_quotes: incomplete args" unless $what && $from && $to && @symbols > 0;
croak "new_yahoo_quotes: invalid what='$what'" unless $what =~ /^[OHLCVAR]+$/;
my $f = _prepare_ymd($from) or croak "new_yahoo_quotes: invalid from='$from'";
my $t = _prepare_ymd($to, 1) or croak "new_yahoo_quotes: invalid to='$to'";
warn "new_yahoo_quotes: gonna get ($f..$t) for: " . join(" ", @symbols) . "\n";
my @ts;
my $t0 = time;
my $ua = _prepare_ua($options);
my $delay = Mojo::IOLoop->delay(sub {
my ($delay, %data) = @_;
for (@symbols) {
carp "new_yahoo_quotes: no data for symbol '$_'" and next unless defined $data{$_};
my $tmp = PDL::TS->new_from_csv(\$data{$_})->sort;
$tmp->col_rename("Open", "$_.O");
$tmp->col_rename("High", "$_.H");
$tmp->col_rename("Low", "$_.L");
$tmp->col_rename("Close", "$_.C");
$tmp->col_rename("Volume", "$_.V");
$tmp->col_rename("Adj Close", "$_.A");
$tmp->col_append("$_.R" => fi_gross_ret($tmp->col("$_.A"))) if $what =~ /R/;
$tmp->col_delete("$_.O") unless $what =~ /O/;
$tmp->col_delete("$_.H") unless $what =~ /H/;
$tmp->col_delete("$_.L") unless $what =~ /L/;
$tmp->col_delete("$_.C") unless $what =~ /C/;
$tmp->col_delete("$_.V") unless $what =~ /V/;
$tmp->col_delete("$_.A") unless $what =~ /A/;
push @ts, $tmp;
}
});
for my $sym (@symbols) {
my $url = _yahoo_url("Q", $sym, $f, $t);
carp "new_yahoo_quotes: no url for '$sym'" and next unless $url;
my $cb = $delay->begin(0, 2);
$ua->get($url, sub { $cb->($sym => $_[1]->res->code eq '200' ? $_[1]->res->body : undef) });
}
$delay->wait;
warn sprintf("new_yahoo_quotes: download finished (%.2f sec)\n", time-$t0);
PDL::TS->new_from_merge('outer', @ts)->sort;
}
sub ts_yahoo_splits_dividends {
my $options = ref $_[-1] eq 'HASH' ? pop : {};
my ($what, $from, $to, @symbols) = @_;
# example: http://ichart.finance.yahoo.com/x?a=0&b=6&c=1990&d=11&e=31&f=2010&g=v&s=AAPL&z=999999
croak "new_yahoo_sd: incomplete args" unless $from && $to && @symbols > 0;
croak "new_yahoo_sd: invalid what='$what'" unless $what =~ /^[SD]+$/;
my $f = _prepare_ymd($from) or croak "new_yahoo_sd: invalid from='$from'";
my $t = _prepare_ymd($to, 1) or croak "new_yahoo_sd: invalid to='$to'";
warn "new_yahoo_sd: gonna get ($f..$t) for: " . join(" ", @symbols) . "\n";
my @ts;
my $t0 = time;
my $ua = _prepare_ua($options);
my $delay = Mojo::IOLoop->delay(sub {
my ($delay, %data) = @_;
for (@symbols) {
carp "new_yahoo_sd: no data for symbol '$_'" and next unless defined $data{$_};
if ($what =~ /D/) {
my @lines = grep { /^DIVIDEND/ } split /\n/, $data{$_};
my $csv = "date,D\n";
for (@lines) {
my (undef, $ymd, $val) = split /,/;
$ymd = _prepare_ymd($ymd);
if ($ymd && defined $val) {
$csv .= sprintf("%s,%.99g\n", $ymd, $val);
}
}
my $tmp = PDL::TS->new_from_csv(\$csv, { col_prefix=>"$_."} );
push @ts, $tmp;
}
if ($what =~ /S/) {
my @lines = grep { /^SPLIT/ } split /\n/, $data{$_};
my $csv = "date,S\n";
for (@lines) {
my (undef, $ymd, $ratio) = split /,/;
$ymd = _prepare_ymd($ymd);
if ($ymd && $ratio =~ /^([\d\.]+):([\d\.]+)$/) {
$csv .= sprintf("%s,%.99g\n", $ymd, $1 / $2);
}
}
my $tmp = PDL::TS->new_from_csv(\$csv, { col_prefix=>"$_."} );
push @ts, $tmp;
}
}
});
for my $sym (@symbols) {
my $url = _yahoo_url("S", $sym, $f, $t);
carp "new_yahoo_sd: no url for '$sym'" and next unless $url;
my $cb = $delay->begin(0, 2);
$ua->get($url, sub { $cb->($sym => $_[1]->res->code eq '200' ? $_[1]->res->body : undef) });
}
$delay->wait;
warn printf("new_yahoo_sd: download finished (%.2f sec)\n", time-$t0);
PDL::TS->new_from_merge('outer', @ts)->sort;
}
### The quarter-end shares outstanding are obtained from the SEC EDGAR database http://www.sec.gov/edgar/searchedgar/companysearch.html
# http://www.fool.com/knowledge-center/2015/09/09/how-to-calculate-common-stock-outstanding-from-a-b.aspx
# http://www.sec.gov/spotlight/xbrl/filings-and-feeds.shtml
# http://www.sec.gov/Archives/edgar/monthly/
# https://www.quandl.com/search?query=MSFT%20outstanding%20shares&type=all
# GOOD: https://ycharts.com/companies/MSFT/shares_outstanding
# GOOD: https://finance.yahoo.com/q/ks?s=MSFT
# EXACT but OLD: https://www.quandl.com/data/SEC/MSFT_COMMONSTOCKSHARESOUTSTANDING_Q-MICROSOFT-CORP-MSFT-Quarterly-Common-Stock-Shares-Outstanding
### private functions
sub _prepare_ua {
my $options = shift;
eval { require Mojo::UserAgent } or croak "FATAL: Mojo::UserAgent not installed";
my $ua = Mojo::UserAgent->new;
$ua->proxy->http($options->{proxy})->https($options->{proxy}) if $options->{proxy};
$ua->proxy->detect if $options->{proxy_detect};
return $ua;
}
sub _prepare_ymd {
my ($input, $last_day) = @_;
$input =~ s/\s//g;
my $tm;
if ($input =~ /^today(([+-])(\d+))?$/) {
$tm = Time::Moment->now_utc;
$tm = $tm->plus_days($3*1) if defined $1 && $2 eq '+';
$tm = $tm->minus_days($3*1) if defined $1 && $2 eq '-';
}
elsif ($input =~ /^\d\d\d\d$/) {
my ($m, $d) = $last_day ? (12, 31) : (1, 1);
$tm = eval { Time::Moment->new(year=>$input, month=>$m, day=>$d, hour=>0, minute=>0, second=>0) } or return undef;
}
else {
my ($y, $m, $d) = $input =~ m|^(\d\d\d\d+)[/-]?(\d\d)[/-]?(\d\d)$|;
$tm = eval { Time::Moment->new(year=>$y, month=>$m, day=>$d, hour=>0, minute=>0, second=>0) } or return undef;
}
return $tm->strftime("%Y-%m-%d");
}
sub _yahoo_url {
my ($what, $sym, $from, $to) = @_;
my ($pc, $pa, $pb) = $from =~ /^(\d\d\d\d)\-?(\d\d)\-?(\d\d)$/;
my ($pf, $pd, $pe) = $to =~ /^(\d\d\d\d)\-?(\d\d)\-?(\d\d)$/;
# 2009-09-11 (a=0&b=6&c=2009) .. 2010-03-10 (d=11&e=31&f=2010) >> http://ichart.finance.yahoo.com/table.csv?a=0&b=6&c=2009&d=11&e=31&f=2010&g=d&s=YHOO
return sprintf('http://ichart.finance.yahoo.com/table.csv?a=%02d&b=%02d&c=%04d&d=%02d&e=%02d&f=%04d&g=d&s=%s&ignore=.csv', $pa-1, $pb, $pc, $pd-1, $pe, $pf, $sym) if $what eq "Q";
return sprintf('http://ichart.finance.yahoo.com/x?a=%02d&b=%02d&c=%04d&d=%02d&e=%02d&f=%04d&g=v&s=%s&z=999999&ignore=.csv', $pa-1, $pb, $pc, $pd-1, $pe, $pf, $sym) if $what eq "S";
return undef;
}
1;
__END__
TODO:
* func: fill_badvals http://www.inside-r.org/packages/cran/xts/docs/na.locf.xts
* func: lag https://stat.ethz.ch/R-manual/R-patched/library/stats/html/lag.html + http://www.inside-r.org/packages/cran/zoo/docs/diff.zoo
INTRODUCTION:
getting data:
my $qts = ts_yahoo_quotes('CAR', '2010-01-01', '2015-09-30', qw/IBM AAPL TSLA MSFT/);
$qts->show;
package PDL::TS;
=head1 NAME
PDL::TS - PDL based Time Series
=head1 SYNOPSIS
Experimental! Be careful!
=cut
use 5.010;
use strict;
use warnings;
use Carp;
use Scalar::Util qw(blessed looks_like_number);
use PDL;
use PDL::DateTime;
use PDL::IO::CSV qw(rcsv1D wcsv1D);
use PDL::IO::DBI qw(rdbi1D);
use PDL::IO::Sereal qw(rsereal wsereal);
use Time::HiRes qw(time);
use PDL::Apply ':all';
sub new {
my ($class, $idx, @list) = @_;
croak "new: missing index" unless defined $idx;
croak "new: index has to be PDL::DateTime object" unless ref $idx eq 'PDL::DateTime';
croak "new: index has to be 1D PDL::DateTime object" unless $idx->ndims == 1;
croak "new: index has to be non-empty" unless $idx->nelem > 0;
my $self = bless { idx=>$idx, col=>[] }, $class;
$self->col_append(@list) if @list > 0;
return $self;
}
sub new_from_merge {
my ($class, $type, @list) = @_;
croak "new_from_merge: undefined merge type" unless $type;
croak "new_from_merge: invalid merge type '$type'" unless $type eq 'inner' or $type eq 'outer';
croak "new_from_merge: no data" unless @list > 0;
my $self = shift(@list)->copy;
$self = $self->merge($type, $_) for (@list);
return $self;
}
sub new_from_concat {
my ($class, @list) = @_;
croak "new_from_concat: no data" unless @list > 0;
my $self = shift(@list)->copy;
$self->concat(@list);
return $self;
}
sub new_from_csv {
my ($class, $csv_file, $options) = @_;
my $args = {};
$args->{idx_col_name} = delete $options->{idx_col_name} // '';
$args->{idx_col_id} = delete $options->{idx_col_id};
$args->{col_names} = delete $options->{col_names} // [];
$args->{col_prefix} = delete $options->{col_prefix} // '';
$options->{detect_datetime} = 1 unless defined $options->{detect_datetime};
$options->{header} = 'auto' unless defined $options->{header};
my @all_data = rcsv1D($csv_file, $options);
return PDL::TS->_new_from_1D_piddles($args, @all_data);
}
sub new_from_sereal {
my ($class, $sereal_file) = @_;
my $ts = rsereal($sereal_file);
croak "new_from_sereal: invalid object type" unless blessed $ts && $ts->isa($class);
return $ts;
}
sub new_from_dbi {
my $options = ref $_[-1] eq 'HASH' ? pop : {};
my ($class, $dbh, $sql, $bind) = @_;
my $args = {};
$args->{idx_col_name} = delete $options->{idx_col_name} // '';
$args->{idx_col_id} = delete $options->{idx_col_id};
$args->{col_names} = delete $options->{col_names} // [];
$args->{col_prefix} = delete $options->{col_prefix} // '';
$options->{detect_datetime} = 1 unless defined $options->{detect_datetime};
$options->{header} = 'auto' unless defined $options->{header};
my @all_data = rdbi1D($dbh, $sql, $bind, $options);
return PDL::TS->_new_from_1D_piddles($args, @all_data);
}
sub _new_from_1D_piddles {
my ($class, $args, @all_data) = @_;
my $idx_col_name = $args->{idx_col_name};
my $idx_col_id = $args->{idx_col_id};
my $ts_indx;
if ($idx_col_name || defined $idx_col_id) {
# try to find index column based on idx_col_id / idx_col_name options
for (0..$#all_data) {
if (defined $idx_col_id) {
if ($idx_col_id == $_) {
$ts_indx = $all_data[$_];
splice @all_data, $_, 1;
}
}
if (my $n = $all_data[$_]->hdr->{col_name}) {
if ($idx_col_name eq $n) {
$ts_indx = $all_data[$_];
splice @all_data, $_, 1;
}
}
last if defined $ts_indx;
}
croak "new_from_csv: idx column not found" unless defined $ts_indx;
}
if (!defined $ts_indx) {
# try to find index column as the first DateTime column
for (0..$#all_data) {
last if defined $ts_indx;
if (blessed($all_data[$_]) && $all_data[$_]->isa('PDL::DateTime')) {
$ts_indx = $all_data[$_];
splice @all_data, $_, 1;
}
}
}
croak "new_from_csv: cannot guess idx column" if !defined $ts_indx;
my @list;
for (0..$#all_data) {
my $cname = $args->{col_names}->[$_] // $all_data[$_]->hdr->{col_name} // sprintf("C%02d", $_+1);
push @list, $args->{col_prefix}.$cname => $all_data[$_];
}
return PDL::TS->new($ts_indx, @list);
}
### columns related functions
sub col :lvalue {
my ($self, $name) = @_;
return $self->coli($self->col_name_i($name));
}
sub coli :lvalue {
my ($self, $i) = @_;
return undef unless (defined $i) && ($i > 0) && (defined $self->{col}->[$i-1]);
return $self->{col}->[$i-1]->[1];
}
sub col_names {
my ($self, @new_names) = @_;
my @cn = map { $_->[0] } @{$self->{col}};
return @cn unless @new_names > 0;
my $nl = scalar(@new_names);
my $cl = scalar(@cn);
carp "col_names: invalid new_names length $nl (expected $cl)" if $nl != $cl;
for (0..$#cn) {
my $name = $new_names[$_];
#my $name = $self->col_name_mk_uniq($new_names[$_]); # find uniq column name
$self->{col}->[$_][0] = $name;
$self->{col}->[$_][1]->hdr->{col_name} = $name;
}
return $self;
}
sub col_name_i {
my ($self, $name) = @_;
return undef unless defined $name;
# linear search, perhaps a bit slow
if (ref $name eq 'Regexp') {
for (1..scalar(@{$self->{col}})) {
return $_ if $self->{col}->[$_-1]->[0] =~ $name;
}
}
else {
for (1..scalar(@{$self->{col}})) {
return $_ if $self->{col}->[$_-1]->[0] eq $name;
}
}
return undef;
}
sub col_move {
my ($self, $name, $new_i) = @_;
my $old_i = $self->col_name_i($name);
croak "col_move: column '$name' not found" unless defined $old_i;
return $self->coli_move($old_i, $new_i);
}
sub coli_move {
my ($self, $old_i, $new_i) = @_;
my $c = $self->coli($old_i);
croak "coli_move: bad old_i='$old_i'" unless defined $c;
croak "coli_move: bad new_i='$new_i'" unless defined $new_i && $new_i >=1 && $new_i <= $self->ncols;
my $old = $self->{col}->[$old_i-1];
my $new = $self->{col}->[$new_i-1];
splice @{$self->{col}}, $new_i, 0, $old; # inset col
splice @{$self->{col}}, $old_i-1, 1; # delete col
return $self;
}
sub col_rename {
my ($self, $name, $new_name) = @_;
croak "col_rename: bad name" unless defined $name;
croak "col_rename: bad new name" unless defined $new_name;
return $self if $new_name eq $name;
my $i = $self->col_name_i($name);
croak "col_rename: column '$name' not found" unless defined $i;
return $self->coli_rename($i, $new_name);
}
sub coli_rename {
my ($self, $i, $new_name) = @_;
croak "coli_rename: bad new name" unless defined $new_name;
croak "coli_rename: bad index" unless (defined $i) && ($i > 0) && (defined $self->{col}->[$i-1]);
$self->{col}->[$i-1]->[0] = $new_name;
$self->{col}->[$i-1]->[1]->hdr->{col_name} = $new_name;
return $self;
}
sub col_replace {
my ($self, @list) = @_;
while (@list) {
carp "col_replace: odd number of elements in column definitions" and last if @list < 2;
my ($name, $data) = (shift @list, shift @list);
croak "col_replace: bad name" unless defined $name;
my $i = $self->col_name_i($name);
croak "col_replace: column '$name' not found" unless defined $i;
$self->coli_replace($i, $data);
}
return $self;
}
sub coli_replace {
my ($self, @list) = @_;
while (@list) {
carp "coli_replace: odd number of elements in column definitions" and last if @list < 2;
my ($i, $data) = (shift @list, shift @list);
croak "coli_replace: bad data" unless defined $data && blessed $data && $data->isa('PDL');
croak "coli_replace: bad index" unless (defined $i) && ($i > 0) && (defined $self->{col}->[$i-1]);
$self->{col}->[$i-1]->[1] = $data;
}
return $self;
}
sub col_delete {
my ($self, @names) = @_;
my $counter = 0;
COLI: for my $i (reverse (1..$self->ncols)) {
my $n = $self->{col}->[$i-1]->[0];
for my $name (@names) {
if (ref $name eq 'Regexp') {
if ($n =~ $name) {
$self->coli_delete($i);
$counter++;
next COLI;
}
}
elsif ($n eq $name) {
$self->coli_delete($i);
$counter++;
next COLI;
}
}
}
croak "col_delete: column not found" unless defined $counter > 0;
return $self; #XXX-FIXME what to return: $self or deleted elements?
}
sub coli_delete {
my ($self, @indices) = @_;
for my $i (reverse sort @indices) {
croak "coli_delete: bad index" unless (defined $i) && ($i > 0) && (defined $self->{col}->[$i-1]);
my $rv = splice @{$self->{col}}, $i-1, 1;
}
return $self; #XXX-FIXME what to return: $self or deleted elements?
}
sub col_insert {
my ($self, $name, $colname, $data) = @_;
croak "col_insert: bad name" unless defined $name;
my $i = $self->col_name_i($name);
croak "col_insert: column '$name' not found" unless defined $i;
return $self->coli_insert($i, $colname, $data);
}
sub coli_insert {
my ($self, $i, $colname, $data) = @_;
croak "coli_insert: bad index" unless (defined $i) && ($i > 0) && (defined $self->{col}->[$i-1]);
croak "coli_insert: bad data" unless defined $data && blessed $data && $data->isa('PDL');
my $size = $self->idx->nelem;
my $s = $data->nelem;
my $d = $data->ndims;
croak "coli_insert: invalid data (dims=$d, expected 1D)" unless $d == 1;
croak "coli_insert: invalid data (elems=$s, max. expected $size)" unless $s <= $size;
$data = $data->append(ones($data->type, $size-$s)->setbadif(1)) if $s < $size;
splice @{$self->{col}}, $i-1, 0, [$colname, $data];
return $self;
}
sub col_name_mk_uniq {
my ($self, $name) = @_;
return $name unless defined $self->col_name_i($name);
if ($name =~ /^(.*)\((\d+)\)$/) {
# name like: "ColumnName(2)"
my ($prefix, $n) = ($1, $2);
while (1) {
$n++;
return "$prefix($n)" unless defined $self->col_name_i("$prefix($n)");
}
}
else {
my $n = 2;
while (1) {
return "$name($n)" unless defined $self->col_name_i("$name($n)");
$n++;
}
}
croak "col_name_mk_uniq: fatal error";
}
sub col_append {
my ($self, @list) = @_;
my $size = $self->idx->nelem;
while (@list) {
carp "col_append: odd number of elements in column definitions" and last if @list < 2;
my ($k, $v) = (shift @list, shift @list);
croak "col_append: invalid colname" unless defined $k && !ref $k;
if (blessed $v && $v->isa('PDL')) {
my $s = $v->nelem;
my $d = $v->ndims;
croak "col_append: invalid column '$k' (dims=$d, expected 1D)" unless $d == 1;
croak "col_append: invalid column '$k' (elems=$s, max. expected $size)" unless $s <= $size;
$v = $v->append(ones($v->type, $size-$s)->setbadif(1)) if $s < $size;
}
elsif (ref $v eq 'ARRAY') {
my $s = scalar @$v;
croak "col_append: invalid column '$k' (elems=$s, max. expected $size)" unless $s <= $size;
push @$v, ((undef) x ($size-$s)) if $s < $size;
$v = pdl($v);
}
else {
croak "col_append: invalid data for column '$k'";
}
#carp "coll_add: duplicate column name '$k'" if defined $self->col_name_i($k); # warn on duplicate col name
#$k = $self->col_name_mk_uniq($k); # find uniq col name for duplicate name
$v->hdr->{col_name} = $k;
push @{$self->{col}}, [$k => $v];
}
return $self;
}
sub ncols {
my $self = shift;
return scalar(@{$self->{col}});
}
sub nbadvals {
my ($self, @i) = @_;
my $nc = $self->ncols;
my $count = 0;
if (@i == 0) {
$count = $self->idx->nbad;
$count += $self->coli($_)->nbad for (1..$self->ncols);
}
else {
for my $coli (@i) {
if ($coli == 0) {
$count += $self->idx->nbad
}
elsif ($coli <= $nc) {
$count += $self->coli($nc)->nbad;
}
}
}
return $count;
}
sub nrows { $_[0]->idx->nelem }
sub idx { $_[0]->{idx} }
sub to_string {
my ($self, $head, $tail, $format) = @_;
my $nr = $self->nrows;
my $nc = $self->ncols;
my @c = ('idx', $self->col_names);
my @d;
my $numfmt = "%.5g";
my $space = " ";
$head //= 0;
$tail //= 0;
my $limit = $head + $tail;
# prepare
if ($limit > 0 && $nr > $limit) {
if ($tail > 0) {
$d[0] = $self->idx->slice([0,$head])->append($self->idx->slice([-$tail,-1]))->dt_unpdl($format);
}
else {
$d[0] = $self->idx->slice([0,$head])->dt_unpdl($format);
}
$d[0][$head] = "...";
}
else {
$d[0] = $self->idx->dt_unpdl($format);
}
for my $i (1 .. $nc) {
my $p = $self->coli($i);
if ($limit > 0 && $nr > $limit) {
$d[$i] = $p->slice([0,$head])->append($p->slice([-$tail,-1]))->unpdl;
$d[$i][$head] = '...';
}
else {
$d[$i] = $p->unpdl;
}
}
#summary rows
my @s;
my $i = -1;
$s[0][++$i] = "Type:"; $s[$_][$i] = $self->coli($_)->type for (1 .. $nc);
$s[0][++$i] = "Min:"; $s[$_][$i] = sprintf($numfmt, $self->coli($_)->min ) for (1 .. $nc);
#$s[0][++$i] = "Avg:"; $s[$_][$i] = sprintf($numfmt, $self->coli($_)->avg ) for (1 .. $nc);
$s[0][++$i] = "Median:"; $s[$_][$i] = sprintf($numfmt, $self->coli($_)->median) for (1 .. $nc);
#$s[0][++$i] = "Mode:"; $s[$_][$i] = sprintf($numfmt, $self->coli($_)->mode ) for (1 .. $nc);
$s[0][++$i] = "Max:"; $s[$_][$i] = sprintf($numfmt, $self->coli($_)->max ) for (1 .. $nc);
$s[0][++$i] = "Prod:"; $s[$_][$i] = sprintf($numfmt, $self->coli($_)->prod ) for (1 .. $nc);
#$s[0][++$i] = "Sum:"; $s[$_][$i] = sprintf($numfmt, $self->coli($_)->sum ) for (1 .. $nc);
$s[0][++$i] = "Bad:"; $s[$_][$i] = sprintf($numfmt, $self->coli($_)->nbad ) for (1 .. $nc);
# calc widths
my @w;
for my $i (0 .. $nc) {
my $max = length $c[$i];
for (@{$d[$i]}) {
$_ = sprintf($numfmt, $_) if looks_like_number($_);
my $lcell = length "$_";
$max = $lcell if $lcell > $max;
}
for (@{$s[$i]}) {
$_ = sprintf($numfmt, $_) if looks_like_number($_);
my $lcell = length "$_";
$max = $lcell if $lcell > $max;
}
$w[$i] = $max;
}
# index
my $i_uni = $self->idx->is_uniq ? 'yes' : 'no';
my $i_ord = $self->idx->is_increasing ? 'inc' : $self->idx->is_decreasing ? 'dec' : 'no';
my $i_reg = $self->idx->is_regular ? 'yes' : 'no';
my $i_per = $self->idx->dt_periodicity || 'none';
# print
my @out;
my $hr = join($space, map { "=" x $w[$_] } (0..$#w));
push @out, $hr;
push @out, join($space, map { sprintf("% $w[$_]s", $c[$_]) } (0..$#w));
push @out, $hr;
for my $i (0..scalar(@{$d[0]})-1) {
push @out, join($space, map { sprintf("% $w[$_]s", $d[$_][$i]) } (0..$#w));
}
push @out, $hr;
for my $i (0..scalar(@{$s[0]})-1) {
push @out, join($space, map { sprintf("% $w[$_]s", $s[$_][$i]) } (0..$#w));
}
push @out, $hr;
push @out, "Index: uniq=$i_uni sorted=$i_ord regular=$i_reg periodicity=$i_per";
if ($i_ord ne 'no') {
my $from = $self->idx->dt_at(0);
my $to = $self->idx->dt_at(-1);
push @out, "Index range: $from ... $to";
}
push @out, "$nr rows, $nc columns";
return join("\n", @out);
}
sub show {
my ($self, $limit, $format) = @_;
$limit //= 50;
my $head = int($limit/2);
print $self->to_string($head, $limit - $head, $format), "\n";
}
sub head {
my ($self, $limit, $format) = @_;
$limit //= 25;
print $self->to_string($limit, 0, $format), "\n";
}
sub tail {
my ($self, $limit, $format) = @_;
$limit //= 25;
print $self->to_string(0, $limit, $format), "\n";
}
sub group_by {
my ($self, $period, $func) = (shift, shift, shift);
croak "group_by: undefined period" unless defined $period;
my $newts;
if ($period eq '*') {
$newts = PDL::TS->new($self->idx->slice([-1, -1]));
my @cn = $self->col_names;
for my $cni (0..$#cn) {
my $newpdl = apply_over($self->coli($cni+1), $func, @_)->reshape(1);
$newts->col_append($cn[$cni] => $newpdl);
}
}
else {
my $ep = $self->idx->dt_endpoints($period);
my $new_idx = $self->idx->index($ep)->dt_align($period, 1);
$newts = PDL::TS->new($new_idx);
my $slices = $self->idx->dt_slices($period);
my @cn = $self->col_names;
for my $cni (0..$#cn) {
$newts->col_append($cn[$cni] => apply_slice($self->coli($cni+1), $slices, $func, @_));
}
}
return $newts;
}
sub group_by_FASTER {
my ($self, $period, $func) = (shift, shift, shift);
my $ep = $self->idx->dt_endpoints($period);
my $new_idx = $self->idx->index($ep)->dt_align($period, 1);
my $newts = PDL::TS->new($new_idx);
my $slices = $self->idx->dt_slices($period)->unpdl;
my @cn = $self->col_names;
for my $cni (0..$#cn) {
my $old = $self->coli($cni+1);
my @new;
if (ref $func ne 'CODE') {
push @new, $old->slice($_)->$func(@_) for (@$slices);
}
else {
push @new, $func->($old->slice($_), @_) for (@$slices);
}
$newts->col_append($cn[$cni] => pdl(\@new));
}
return $newts;
}
sub _col_apply_func {
my ($newts, $oldname, $oldpdl, $func, @fargs) = @_;
my $newpdl;
if (ref $func ne 'CODE') {
$newpdl = PDL::Core::topdl($oldpdl->$func(@_));
}
else {
$newpdl = PDL::Core::topdl($func->($oldpdl, @_));
}
$newpdl->reshape(1) if $newpdl->ndims == 0; # convert scalar piddle to 1x1 piddle
$newts->col_append($oldname => $newpdl);
}
sub apply_all {
#XXX-FIXME BROKEN!!!
my $self = shift;
my $newts = PDL::TS->new($self->idx->copy);
my @cn = $self->col_names;
_col_apply_func($newts, $cn[$_], $self->coli($_+1), @_) for (0..$#cn);
return $newts;
}
sub apply_col {
#XXX-FIXME BROKEN!!!
my ($self, $col) = (shift, shift);
my $newts = PDL::TS->new($self->idx->copy);
my @cn = $self->col_names;
$col = [ $col ] unless ref $col eq 'ARRAY';
my %wanted = map { $_ => 1 } @$col;
for my $cni (0..$#cn) {
if ($wanted{$cn[$cni]}) {
_col_apply_func($newts, $cn[$cni], $self->coli($cni+1), @_);
}
else {
$newts->col_append($cn[$cni] => $self->coli($cni+1)->copy, @_);
}
}
return $newts;
}
sub apply_coli {
#XXX-FIXME BROKEN!!!
my ($self, $coli) = (shift, shift);
my $newts = PDL::TS->new($self->idx->copy);
my @cn = $self->col_names;
$coli = [ $coli ] unless ref $coli eq 'ARRAY';
my %wanted = map { int($_) => 1 } @$coli;
for my $cni (0..$#cn) {
if ($wanted{$cni+1}) {
_col_apply_func($newts, $cn[$cni], $self->coli($cni+1), @_);
}
else {
$newts->col_append($cn[$cni] => $self->coli($cni+1)->copy, @_);
}
}
return $newts;
}
sub apply_colre {
#XXX-FIXME BROKEN!!!
my ($self, $re) = (shift, shift);
my $newts = PDL::TS->new($self->idx->copy);
croak "apply_colre: re is not an Regexp" unless ref $re eq 'Regexp';
my @cn = $self->col_names;
for my $cni (0..$#cn) {
if ($cn[$cni] =~ $re) {
_col_apply_func($newts, $cn[$cni], $self->coli($cni+1), @_);
}
else {
$newts->col_append($cn[$cni] => $self->coli($cni+1)->copy, @_);
}
}
return $newts;
}
sub select_rows {
my $self = shift;
my $newts = PDL::TS->new($self->idx->index(@_));
my @cn = $self->col_names;
$newts->col_append($cn[$_] => $self->coli($_+1)->index(@_)) for (0..$#cn);
return $newts;
}
sub slice_rows {
my $self = shift;
my $newts = PDL::TS->new($self->idx->slice(@_));
my @cn = $self->col_names;
$newts->col_append($cn[$_] => $self->coli($_+1)->slice(@_)) for (0..$#cn);
return $newts;
}
sub dice_rows {
my $self = shift;
my $newts = PDL::TS->new($self->idx->dice(@_));
my @cn = $self->col_names;
$newts->col_append($cn[$_] => $self->coli($_+1)->dice(@_)) for (0..$#cn);
return $newts;
}
sub select_colsi {
my ($self, @i) = @_;
my $new_idx = $self->idx->copy;
my $newts = PDL::TS->new($new_idx);
for (@i) {
carp "select_colsi: invalid index '$_'" and next unless defined $self->{col}->[$_-1];
$newts->col_append($self->{col}->[$_-1][0] => $self->{col}->[$_-1][1]->copy);
}
return $newts;
}
sub select_cols {
my ($self, @cols) = @_;
my $new_idx = $self->idx->copy;
my $newts = PDL::TS->new($new_idx);
for (@cols) {
my $i = $self->col_name_i($_);
carp "select_cols: col '$_' not found" and next unless defined $i;
$newts->col_append($self->{col}->[$i-1][0] => $self->{col}->[$i-1][1]->copy);
}
return $newts;
}
sub colsi_to_matrix {
my ($self, @i) = @_;
croak "colsi_to_matrix: no indices" if @i == 0;
my @pdls;
for (@i) {
my $c = $self->coli($_);
croak "colsi_to_matrix: column id='$_' not found" unless defined $c;
push @pdls, $c;
}
return cat(@pdls);
}
sub cols_to_matrix {
my ($self, @n) = @_;
my @pdls;
if (@n > 0) {
for (@n) {
my $c = $self->col($_);
croak "cols_to_matrix: column '$_' not found" unless defined $c;
push @pdls, $c;
}
}
else {
@pdls = map { $self->coli($_) } (1..$self->ncols);
}
return cat(@pdls);
}
sub copy {
my $self = shift;
my $new_idx = $self->idx->copy;
my $newts = PDL::TS->new($new_idx);
for (1..$self->ncols) {
$newts->col_append($self->{col}->[$_-1][0] => $self->{col}->[$_-1][1]->copy);
}
return $newts;
}
sub sorti {
my ($self, $coli) = shift;
my $i = defined $coli ? $self->coli($coli)->qsorti : $self->idx->qsorti;
my $new_idx = $self->idx->index($i);
my $newts = PDL::TS->new($new_idx);
for (1..$self->ncols) {
$newts->col_append($self->{col}->[$_-1][0] => $self->{col}->[$_-1][1]->index($i));
}
return $newts;
}
sub sort {
my ($self, $name) = shift;
return $self->sorti($self->col_name_i($name));
}
sub concat {
my ($self, @list) = @_;
my $nc = $self->ncols;
for my $other (@list) {
my $or = $other->nrows;
#$self->idx->append($other->idx);
$self->{idx} = $self->idx->append($other->idx); #XXX-FIXME ugly, but $self->idx->inplace->append(..) does not work
for (1..$nc) {
my $o = $other->coli($_);
if (defined $o) {
$self->{col}->[$_-1][1] = $self->coli($_)->append($o); #XXX-HACK
}
else {
my $fill = zeroes($or); $fill .= pdl('BAD');
$self->{col}->[$_-1][1] = $self->coli($_)->append($fill); #XXX-HACK
}
}
}
return $self;
}
sub merge {
my ($self, $how, $other) = @_;
croak "merge: undefined merge type" unless $how;
my $left_idx = $self->idx;
my $left_idx_of_idx = qsorti $left_idx;
my $right_idx = $other->idx;
my $right_idx_of_idx = qsorti $right_idx;
my $all_idx;
if ($how eq 'inner') {
# inner join
$all_idx = setops($left_idx, 'AND', $right_idx);
}
elsif ($how eq 'outer') {
# outer join
$all_idx = setops($left_idx, 'OR', $right_idx);
}
elsif ($how eq 'left') {
# left outer join
$all_idx = setops($left_idx, 'OR', setops($left_idx, 'AND', $right_idx));
}
elsif ($how eq 'right') {
# righ touter join
$all_idx = setops($right_idx, 'OR', setops($left_idx, 'AND', $right_idx));
}
else {
croak "merge: invalid merge type '$how'";
}
my $new_left_idx = vsearch_match($all_idx, $left_idx->index($left_idx_of_idx));
$new_left_idx = $new_left_idx->setbadif($new_left_idx < 0);
my $new_right_idx = vsearch_match($all_idx, $right_idx->index($right_idx_of_idx));
$new_right_idx = $new_right_idx->setbadif($new_right_idx < 0);
my $newts = PDL::TS->new($all_idx);
my @cn = $self->col_names;
$newts->col_append($cn[$_] => $self->coli($_+1)->index1d($new_left_idx)) for (0..$#cn);
@cn = $other->col_names;
$newts->col_append($cn[$_] => $other->coli($_+1)->index1d($new_right_idx)) for (0..$#cn);
return $newts; #XXX-FIXME
}
sub write_csv {
my ($self, $filename_or_filehandle, $options) = @_;
$options //= {};
$options->{header} = 'auto' unless defined $options->{header};
$self->idx->hdr->{col_name} //= ($self->idx->dt_periodicity || 'index'); #XXX-TODO reconsider?
my @pdls = ($self->idx);
push @pdls, $self->coli($_) for (1..$self->ncols);
wcsv1D(@pdls, $filename_or_filehandle, $options);
}
sub write_sereal {
my ($self, $filename_or_filehandle) = @_;
wsereal($self, $filename_or_filehandle);
}
1;
__END__
FINISHED:
* new
* new_from_csv
* new_from_dbi
* new_from_sereal
* new_from_merge ### XXX-TODO better name? new_from_ts? new_from_multi_ts?
* apply_all
* apply_col
* apply_coli
* apply_colre
* col_append
* col_name_i
* col_name_mk_uniq ### XXX-TODO remove?
* col_names
* col(i)
* col(i)_delete
* col(i)_insert
* col(i)_rename
* col(i)_replace
* col(i)_move
* copy
* group_by ### XXX-TODO better name? group_by_period?
* idx
* merge
* ncols
* nrows
* cols(i)_to_matrix
* select_cols(i)
* select_rows
* dice_rows
* slice_rows
* sort
* sorti
* to_string
* show
* tail
* head
* write_csv
* write_sereal
TODO:
!!!BROKEN!!! - apply_all + apply_col + apply_coli + apply_colre
**** nice-to-have
* $ts->concatenate($other_ts)
* $ts->to_array something like unpdl
**** too much work
* new_from_xlsx + write_xlsx Perhaps first implement PDL::IO::Excel, Spreadsheet::XLSX::Reader::LibXML
* overloading - does it make sense for PDL::TS?
http://www.inside-r.org/packages/cran/zoo/docs
rollapply http://www.inside-r.org/packages/cran/zoo/docs/rollapply
agregate http://www.inside-r.org/packages/cran/zoo/docs/aggregate.zoo
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html
rolling_apply(arg, window, func, min_periods=None, freq=None, center=False, args=(), kwargs={}) http://pandas.pydata.org/pandas-docs/stable/generated/pandas.rolling_apply.html
groupby(by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False) http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.groupby.html
apply(func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds) http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html
applymap(func) http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.applymap.html
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment