Skip to content

Instantly share code, notes, and snippets.

@ceekz
Last active December 17, 2015 07:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ceekz/5570475 to your computer and use it in GitHub Desktop.
Save ceekz/5570475 to your computer and use it in GitHub Desktop.
use List::Util;
sub cosine_similarity {
my ($vector_1, $vector_2, $is_pearson) = @_;
my %union;
foreach (keys %{$vector_1}, keys %{$vector_2}) {
$union{$_}++;
}
my $avg_1 = List::Util::sum(values %{$vector_1}) / scalar(keys %union);
my $avg_2 = List::Util::sum(values %{$vector_2}) / scalar(keys %union);
my $inner_product = 0.0;
my $norm_1 = 0.0;
my $norm_2 = 0.0;
foreach (keys %union) {
my $val_1 = $vector_1->{$_} || 0;
my $val_2 = $vector_2->{$_} || 0;
if ($is_pearson) {
$val_1 = $val_1 - $avg_1;
$val_2 = $val_2 - $avg_2;
}
$inner_product += $val_1 * $val_2;
$norm_1 += $val_1 ** 2;
$norm_2 += $val_2 ** 2;
}
return ($norm_1 && $norm_2) ? $inner_product / (sqrt($norm_1) * sqrt($norm_2)) : 0.0;
}
sub correlation_coefficient {
return cosine_similarity($_[0], $_[1], 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment