Skip to content

Instantly share code, notes, and snippets.

@lh3
Last active September 19, 2016 19:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lh3/dd9a9e873ce04cc6d4ac8c8f9b0e375f to your computer and use it in GitHub Desktop.
Save lh3/dd9a9e873ce04cc6d4ac8c8f9b0e375f to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
my %opts = (t=>'ct', m=>50);
getopts('t:m:3', \%opts);
my $type;
if ($opts{t} eq 'p') { # body part
$type = 0;
} elsif ($opts{t} eq 'ct') { # cell type
$type = 1;
} elsif ($opts{t} eq 'd') { # disease status
$type = 2;
} elsif ($opts{t} eq 'cl') { # cell line
$type = 3;
}
my (@a, %c);
while (<>) {
chomp;
next if $. == 1;
tr/ /_/;
my @t = split("\t");
my $key;
if ($type == 0) {
$key = $t[6];
$key = undef if $key =~ /_(and|or)_/; # skip mixed tissues
} elsif ($type == 2) {
$key = $t[8];
} elsif ($type == 1) {
my $a = $t[3];
my $b = $t[6];
my $c = $t[5];
$key = $c eq '__'? "$b:$a" : "$c:$a";
$key =~ s/__/\*/g;
$key = undef if $key eq '*:*' || $key =~ /_(and|or)_/;
} elsif ($type == 3) {
my $a = $t[4];
my $b = $t[6];
$b = '*' if $b eq '__';
$key = "$b:$a";
$key = undef if $a eq '__';
}
if ($key && $key ne '__' && $key ne '-') {
push(@a, [$t[0], $key]);
$c{$key} = 0 unless defined $c{$key};
++$c{$key};
}
}
my $id = 0;
my %h;
for my $key (sort keys %c) {
$h{$key} = $id++ if $c{$key} >= $opts{m};
}
unless (defined $opts{3}) {
print join("\t", '#sample', sort(keys %h)), "\n";
}
for my $x (@a) {
next unless defined $h{$x->[1]};
if (defined $opts{3}) {
print join("\t", $x->[0], $h{$x->[1]}, $x->[1]), "\n";
} else {
my $n = scalar(keys %h);
my @b;
for (my $i = 0; $i < $n; ++$i) {
$b[$i] = 0;
}
$b[$h{$x->[1]}] = 1;
print join("\t", $x->[0], @b), "\n";
}
}
#!/usr/bin/perl
use strict;
use warnings;
die("Usage: gtex-eval.pl <truth.y52.snd> <predict.y52.snd>\n") if @ARGV < 2;
open(TRUTH, $ARGV[0]=~/\.gz$/? "gzip -dc $ARGV[0] |" : $ARGV[0]) || die;
open(PREDICT, $ARGV[1]=~/\.gz$/? "gzip -dc $ARGV[1] |" : $ARGV[1]) || die;
my $lineno = 0;
my @tissues;
my @g = (0, 0);
my @ti = (0, 0);
while (1) {
my $t = <TRUTH>;
my $p = <PREDICT>;
last unless $t && $p;
chomp($t);
chomp($p);
++$lineno;
my @t = split("\t", $t);
my @p = split("\t", $p);
if ($lineno == 1) {
@tissues = @t;
} else {
my $start;
if (@t == 53 || @t == 38) {
my $tg = $t[1] > $t[2]? 0 : 1;
my $pg = $p[1] > $p[2]? 0 : 1;
if ($tg == $pg) {
++$g[0];
} else {
++$g[1];
print("GE\t$t[0]\t$t[1]\t$t[2]\n");
}
$start = 3;
} elsif (@t == 51 || @t == 36 || @t == 40) {
$start = 1;
} else {
die;
}
my ($max, $tt, $pt) = (-1, -1, -1);
for (my $i = $start; $i < @t; ++$i) {
($max, $tt) = ($t[$i], $i) if $max < $t[$i];
}
$max = -1;
for (my $i = $start; $i < @p; ++$i) {
($max, $pt) = ($p[$i], $i) if $max < $p[$i];
}
printf("TT\t$t[0]\t%.3f\t$tissues[$tt]\n", $p[$tt]);
if ($pt == $tt) {
++$ti[0];
} else {
++$ti[1];
printf("TE\t$t[0]\t%.3f\t%.3f\t$tissues[$tt]\t$tissues[$pt]\n", $p[$tt], $p[$pt]);
}
}
}
close(PREDICT);
close(TRUTH);
printf("GA\t%.2f%%\n", 100 * $g[0] / ($g[0] + $g[1])) if @tissues == 53 || @tissues == 38;
printf("TA\t%.2f%%\n", 100 * $ti[0] / ($ti[0] + $ti[1]));
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
# ReCount download link: http://duffel.rail.bio/recount/SRP012682/SRP012682.tsv
my %opts = (c=>30);
getopts('mc:e', \%opts);
die("Usage: gtex2feat.pl [-me] [-c $opts{c}] <SRP012682.tsv>\n") if @ARGV == 0 && -t STDIN;
my (@a, %cnt, %extract);
while (<>) {
next if $. == 1;
chomp;
my @t = split("\t");
next if $t[8] eq 'TRUE';
my $t = $t[25];
$t =~ tr/ /_/;
my $g = $t[18];
my $male = ($g =~ /_male_/)? 1 : 0;
my $female = 1 - $male;
my $e = $t[30];
$e =~ tr/ /_/;
$extract{$e} = 1;
if (defined $opts{m}) {
if ($t =~ /Cerebell/) {
$t = "Cerebellum";
} elsif ($t =~ /Brain/) {
$t = "Brain";
}
}
$cnt{$t} = 0 unless defined($cnt{$t});
++$cnt{$t};
push(@a, [$t[3], $t, $male, $female, $e]) if ($. > 1 && $t ne "");
}
my $id = 0;
my %tissues;
for my $t (sort keys(%cnt)) {
$tissues{$t} = $id++ if $cnt{$t} >= $opts{c};
warn("Warning: dropped tissue '$t'\n") if $cnt{$t} < $opts{c};
}
if (defined $opts{e}) {
$id = 0;
for my $e (sort keys(%extract)) {
$extract{$e} = $id++;
}
print(join("\t", "#sample", "male", "female", sort(keys(%extract)), sort(keys(%tissues))), "\n");
} else {
print(join("\t", "#sample", "male", "female", sort(keys(%tissues))), "\n");
}
for my $x (@a) {
next unless defined($tissues{$x->[1]});
my (@b, @e);
for (my $i = 0; $i < scalar(keys(%tissues)); ++$i) {
$b[$i] = 0;
}
$b[$tissues{$x->[1]}] = 1;
if (defined $opts{e}) {
for (my $i = 0; $i < scalar(keys(%extract)); ++$i) {
$e[$i] = 0;
}
$e[$extract{$x->[4]}] = 1;
print(join("\t", $x->[0], $x->[2], $x->[3], @e, @b), "\n");
} else {
print(join("\t", $x->[0], $x->[2], $x->[3], @b), "\n");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment