Skip to content

Instantly share code, notes, and snippets.

@alexpreynolds
Created April 29, 2014 22:36
Show Gist options
  • Save alexpreynolds/8b3b915b1b56431dc237 to your computer and use it in GitHub Desktop.
Save alexpreynolds/8b3b915b1b56431dc237 to your computer and use it in GitHub Desktop.
Converts a matrix of set memberships to an Eulergrid runall statement with cardinalities
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use Data::Dumper;
use EulerSet;
#
# sample input:
#
# GM06990.DS7748 TH1.DS7840 HepG2.DS7764 K562.DS9767 SKNSH.DS8482
# 0 0 1 0 0
# 1 0 1 0 0
# 0 0 0 1 0
#
my ($fdr);
my $optResult = GetOptions ("fdr=s" => \$fdr);
if (! defined $fdr) { die "specify --fdr=str\n"; }
#
# edit these run parameters, based on environment
#
my $eulergridScript = "/home/areynolds/proj/eulergrid/eulergrid.pl";
my $resultsDir = "/home/areynolds/proj/eulergrid/results/footprintOverlaps/012510";
my @headers = ();
my $lineCtr = 0;
my $powerSets;
my %eulerSets;
my $eulerSet;
my $maxEulerSetLength = 0;
while (my $inputLine = <STDIN>) {
chomp ($inputLine);
my @elements = split("\t", $inputLine);
if ($lineCtr == 0) {
# parse header row
foreach my $element (@elements) {
push (@headers, $element);
}
my $powerSets = powerset(@headers);
foreach my $powerSet (@{$powerSets}) {
my @subsets = @{$powerSet};
my $indicatorStr = "";
foreach my $subsetHeader (@headers) {
my %headerTable;
map { $headerTable{$_} = 1 } $subsetHeader;
my @intersection = grep ($headerTable{$_}, @subsets);
if (scalar @intersection == 0) {
$indicatorStr .= "0";
}
else {
$indicatorStr .= "1";
}
}
if (scalar @subsets > $maxEulerSetLength) {
$maxEulerSetLength = scalar @subsets;
}
$eulerSet = new EulerSet(\@subsets, $indicatorStr, scalar @subsets, 0);
$eulerSets{$indicatorStr} = $eulerSet;
}
}
else {
# parse remaining lines by joining @elements
my $elementIndicator = join("", @elements);
# increment count of euler set
my $count = $eulerSets{$elementIndicator}->getCount();
$eulerSets{$elementIndicator}->setCount($count + 1);
}
$lineCtr++;
}
# group euler sets by length
my $eulerSetRefsByLength;
foreach my $key (sort ascendingLength keys %eulerSets) {
$eulerSet = $eulerSets{$key};
my $length = $eulerSet->getLength();
if (defined $eulerSetRefsByLength->[$length]) {
my $count = scalar @{ $eulerSetRefsByLength->[$length] };
$eulerSetRefsByLength->[$length]->[$count] = $eulerSet;
}
else {
push (@{ $eulerSetRefsByLength->[$length] }, $eulerSet);
}
}
# sort within eulerSetByLength grouping
my $totalCnt = 0;
my $excEulerSetsRef;
for (my $index = 0; $index <= $maxEulerSetLength; $index++) {
# generate ref table
my %eulerSetRefs;
foreach my $eulerSetRef (@{$eulerSetRefsByLength->[$index]}) {
$eulerSetRefs{$eulerSetRef->getIndicator()} = $eulerSetRef;
}
# sort ref table in descending alphabetical key order
foreach my $eulerSetIndicatorKey (reverse sort keys(%eulerSetRefs)) {
my $ref = $eulerSetRefs{$eulerSetIndicatorKey};
my $ind = $ref->getIndicator();
my $cnt = $ref->getCount();
$totalCnt += $cnt;
print STDERR "$index\t$ind\t$cnt\n";
$excEulerSetsRef->[$index]->{$ind} = $cnt;
}
}
print STDERR "total count: $totalCnt\n";
# combine scores
my $combEulerSetsRef;
for (my $index = 1; $index <= $maxEulerSetLength; $index++) {
my $excSetRef = $excEulerSetsRef->[$index];
foreach my $excIndKey (reverse sort keys %{$excSetRef}) {
my $combCount = $excSetRef->{$excIndKey};
my @excIndElements = split("", $excIndKey);
for (my $combIndex = $index + 1; $combIndex <= $maxEulerSetLength; $combIndex++) {
my $testSetRef = $excEulerSetsRef->[$combIndex];
foreach my $testIndKey (reverse sort keys %{$testSetRef}) {
my @testIndElements = split("", $testIndKey);
my $opFlag = 0; # TRUE
for (my $testIndex = 0; $testIndex < scalar @testIndElements; $testIndex++) {
if (($excIndElements[$testIndex] == 1) && ($testIndElements[$testIndex] == 0)) {
$opFlag = 1; # FALSE
}
}
if ($opFlag == 0) {
$combCount += $testSetRef->{$testIndKey};
}
}
}
$combEulerSetsRef->[$index]->{$excIndKey} = $combCount;
print STDERR "$index\t$excIndKey\t$combCount\n";
}
}
#
# We now have excEulerSetsRef and combEulerSetsRef to make the runall.script
#
my @setNamesArray;
foreach my $header (@headers) {
my @setNameElements = split("\\.", $header);
push (@setNamesArray, $setNameElements[0]);
}
my $setNames = join(",", @setNamesArray);
my $plotTitle="Core__Footprint__Overlaps__($fdr)";
my @setCardinalitiesArray;
foreach (my $combIndex = 1; $combIndex <= $maxEulerSetLength; $combIndex++) {
foreach my $indKey (reverse sort keys %{$combEulerSetsRef->[$combIndex]}) {
push (@setCardinalitiesArray, $combEulerSetsRef->[$combIndex]->{$indKey});
}
}
my $setCardinalities = join(",", @setCardinalitiesArray);
my $setTotal = $totalCnt;
my $outputFilename = "$resultsDir/$fdr.overlaps.png";
my $offCellColor = "gray80";
my $onCellColor = "springgreen4";
my @ctsCountsArray;
foreach my $excKey (reverse sort keys %{$excEulerSetsRef->[1]}) {
push (@ctsCountsArray, $excEulerSetsRef->[1]->{$excKey});
}
my $ctsCounts = join(",", @ctsCountsArray);
print STDERR "\nmkdir -p $resultsDir; $eulergridScript --setNames=\"$setNames\" --plotTitle=\"$plotTitle\" --setCardinalities=$setCardinalities --setTotal=$setTotal --outputFilename=\"$outputFilename\" --offCellColor=\"$offCellColor\" --onCellColor=\"$onCellColor\" --ctsCounts=$ctsCounts\n";
# cf. http://perl.plover.com/LOD/199803.html
sub powerset {
return [[]] if @_ == 0;
my $first = shift;
my $pow = &powerset;
[ map { [$first, @$_ ], [ @$_] } @$pow ];
}
sub ascendingLength {
$eulerSets{$a}->getLength() <=> $eulerSets{$b}->getLength();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment