Skip to content

Instantly share code, notes, and snippets.

@rurban
Last active January 18, 2016 17:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rurban/2f1eabc751ebdc3cd056 to your computer and use it in GitHub Desktop.
Save rurban/2f1eabc751ebdc3cd056 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
# git clone https://github.com/BuzzFeedNews/2016-01-tennis-betting-analysis
# cd 2016-01-tennis-betting-analysis
use Digest::SHA 'sha256_hex';
# atp top100 2008-12-22
# wget 'http://www.atpworldtour.com/en/rankings/singles?rankDate=2008-12-22' -o singles.html
# grep /overview singles.html | perl -lne'm{">(.*)<\/a>} && print $1' > players
my @n = split(/\n/, `cat players`);
my (%c,%loser, %winner);
# get all the losers hashes and skip the grep
# highest prob (the 4 players with Bonferroni significance > 95%)
# 1. (58) f16cc81d239ad735c51cc71442cda44c4d1a9323eb41018314d228c80c352e50
# 2. (235) 33367d214715ab5f5e335cd67dbc90e62983b98e5278a4eadf39c3a18124509e
# 3. (293) 6702a5de750846f45a3d977f50023c1b20156c61949f2f407b9c5b71d7d93a18
# 4. (82) 9c92af8ca1b57024bd0a39b73db8be44b25bcde4115549cd80e7ef15fc3bd516
#
# vs e.g. Martin Vassallo Arguello:
# 57964fd78b1e7efda07dc8a1f3593342d48a35fe1f823b46eaf72ea9bac81afa
# Nikolay Davydenko
# 30fa25c6f80677171a61c42c757356640222277b0da62ad048a3b7bb9777bfd8
open my $csv, '<', 'data/anonymous_betting_data.csv';
for (<$csv>) {
my @s = split ',', $_;
$loser{$s[12]}++;
$winner{$s[13]}++;
}
#print keys %loser;
sub search {
for (@_) {
next if exists $c{$_};
$c{$_}++;
my $hash = sha256_hex($_);
#print "$hash\t$_\n-----\n";
print $_ if $hash eq $loser{$_};
print $_ if $hash eq $winner{$_};
}
}
search @n;
#exit;
#
#my @x;
#for my $s ($n) {
# $_ = $s;
# s/AVY/AWY/g;
# push @x, $_;
# s/ay/ai/;
# push @x, $_;
#}
#push @n, @x;
#@x=();
#for my $s (@n) {
# $_ = $s;
# s/ikola././;
# push @x, $_;
# s/\.//;
# push @x, $_;
#}
#push @n, @x;
@x=();
for my $s (@n) {
$_ = $s;
push @x, lc $_, uc $_;
}
push @n, @x;
@x=();
for my $s (@n) {
$_ = $s;
s/^(.+) (.+)$/$2 $1/;
push @x, $_;
s/^(.+) (.+)$/uc($1)." $2"/e;
push @x, $_;
$_ = $s;
s/^(.).+ (.+)$/$2 $1/;
push @x, $_;
$_ = $s;
s/^(.).+ (.+)$/$2 $1./;
push @x, $_;
}
push @n, @x;
@x= ();
for (@n) {
push @x, "$_\n";
}
push @n, @x;
search @n;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment