Skip to content

Instantly share code, notes, and snippets.

@y-tag
Created August 6, 2011 02:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save y-tag/1128929 to your computer and use it in GitHub Desktop.
Save y-tag/1128929 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use warnings;
use 5.008;
use Data::Dumper;
use DownhillSimplex;
main();
sub main {
if (@ARGV < 2) {
print STDERR "Usage: $0 train_file test_file [rank_num]\n";
exit 1;
}
my $train_f = shift @ARGV;
my $test_f = shift @ARGV;
my $rank_num = @ARGV ? shift @ARGV : 10;
my $data = read_data($train_f, $rank_num);
my $feature_num = @{$data->[0]{candiate}[0]{feature}};
my @vertex = ();
foreach (1..$feature_num) {
push @vertex, 10;
}
my $params = { data => $data, rank_num => $rank_num};
my $coes = { maxiter => 1000 };
my $vertices = make_vertices(\@vertex);
my ($best, $best_score) = DownhillSimplex::simplex($vertices, $params, $coes, \&eval_func, \&disp_func);
#$data = read_data($test_f, $rank_num);
#$params = { data => $data, rank_num => $rank_num};
#my $score = eval_func($best, $params);
##print Dumper($best, 1 - $score);
#print join("\t", 1 - $best_score, 1 - $score), "\n";
open(my $fh, '<', $test_f) or die $!;
while (my $line = <$fh>) {
chomp $line;
my $score = get_score_per_line($best, $line);
print $score, "\n";
}
close($fh);
}
sub eval_func {
my ($vertex, $params) = @_;
my $rank_num = $params->{rank_num};
my $data = $params->{data};
my $ndcg = 0;
my $num = 0;
foreach my $datum (@$data) {
$num += 1;
my @scores = ();
foreach my $candiate (@{$datum->{candiate}}) {
my $score = inner_prod_array($candiate->{feature}, $vertex);
push @scores, {score => $score, rel => $candiate->{rel}};
}
@scores = sort {$b->{score} <=> $a->{score}} @scores;
my @rels = map {$_->{rel}} @scores;
splice(@rels, $rank_num) if @rels > $rank_num;
$ndcg += calc_dcg(\@rels) / $datum->{z};
}
$ndcg /= $num;
1 - $ndcg;
}
sub disp_func {
my ($iter, $vertices, $params) = @_;
#print Dumper($iter, $vertices->[0]);
#print STDERR join("\t", $iter, $vertices->[0]{score}), "\n";
print STDERR join("\t", $iter, $vertices->[int(@$vertices)-1]{score}), "\n";
}
sub get_score_per_line {
my ($vertex, $line) = @_;
my ($rel, $qid, @rest) = split(/ /, $line);
(undef, $qid) = split(/:/, $qid);
my $feature_hash = {};
foreach my $feature (@rest) {
my ($k, $v) = split(/:/, $feature);
next unless defined($k) && defined($v);
$feature_hash->{$k} = $v;
}
my @feature_array = ();
foreach (sort {$a <=> $b} keys %$feature_hash) {
push @feature_array, $feature_hash->{$_};
}
my $score = inner_prod_array(\@feature_array, $vertex);
$score;
}
sub inner_prod_array {
my ($array1, $array2) = @_;
my $sum = 0;
foreach my $i (0..int(@$array1)-1) {
$sum += $array1->[$i] * $array2->[$i];
}
$sum;
}
sub read_data {
my ($data_f, $rank_num) = @_;
my $data_hash = {};
open(my $fh, '<', $data_f) or die "";
while (my $line = <$fh>) {
chomp $line;
my ($rel, $qid, @rest) = split(/ /, $line);
(undef, $qid) = split(/:/, $qid);
$data_hash->{$qid}{candiate} = [] unless $data_hash->{$qid}{candiate};
my $tmp_hash = {rel => $rel};
my $feature_hash = {};
foreach my $feature (@rest) {
my ($k, $v) = split(/:/, $feature);
next unless defined($k) && defined($v);
$feature_hash->{$k} = $v;
}
my @feature_array = ();
foreach (sort {$a <=> $b} keys %$feature_hash) {
push @feature_array, $feature_hash->{$_};
}
$tmp_hash->{feature} = \@feature_array;
push @{$data_hash->{$qid}{candiate}}, $tmp_hash;
}
close($fh);
foreach my $qid (keys %$data_hash) {
my @rels = sort {$b <=> $a}
map {$_->{rel}} @{$data_hash->{$qid}{candiate}};
splice(@rels, $rank_num) if @rels > $rank_num;
my $z = calc_dcg(\@rels);
$data_hash->{$qid}{z} = $z;
delete $data_hash->{$qid} if $z == 0;
}
my $data = [values %$data_hash];
$data;
}
sub calc_dcg {
my $rels = shift;
my $i = 0;
my $dcg = 0;
foreach (@$rels) {
$i += 1;
$dcg += (2**$_ - 1) * log(2) / log(1 + $i);
}
$dcg;
}
sub make_vertices {
my $vertex = shift;
my @vertices = ();
my $n = @$vertex;
foreach my $i (0..$n) {
my @tmp = ();
foreach my $j (0..$n-1) {
if ($j == $i) {
push @tmp, $vertex->[$j];
} else {
push @tmp, $vertex->[$j] * rand();
}
}
push @vertices, \@tmp;
}
\@vertices;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment