Skip to content

Instantly share code, notes, and snippets.

@abuzarhamza
Last active August 29, 2015 14:24
Show Gist options
  • Save abuzarhamza/2de091e7a55823e23988 to your computer and use it in GitHub Desktop.
Save abuzarhamza/2de091e7a55823e23988 to your computer and use it in GitHub Desktop.
perl program to extract table
#this program parse the page http://www.edgar-wingender.de/muTF_classification-1.html
#arrange the output in the below format
#TF name Superclass Class Family subfamily
#HOXA7 Helix-turn-helix domains Homeo domain factors HOX-related factors HOX6-7
#c-Myc Basic domains Basic helix-loop-helix factors (bHLH) bHLH-ZIP factors Myc / Max factors
#<tr class="superclass_tr"> | <td class="superclass_descr_td" colspan="9"><i>Superclass</i>: Basic domains</td>
#<tr class="class_tr"> | <td colspan="8"><i>Class</i>: Basic leucine zipper factors (bZIP)</td>
#<tr class="family_tr"> | <td colspan="4"><i>Family</i>: Jun-related factors</td>
#<tr class="subfamily_tr"> | <td colspan="3" nowrap=""><i>Subfamily</i>: Jun factors</td>
#<tr class="genus_tr"> | <td colspan="2">c-Jun</td>
use strict;
use warnings;
use LWP::UserAgent;
use HTML::TreeBuilder;
my $agent = LWP::UserAgent->new();
my $res = $agent->get('http://www.edgar-wingender.de/muTF_classification-1.html');
if ( $res->is_success) {
my $superClassName = "";
my $className = "";
my $familyName = "";
my $tnfName = "";
#parsing the object
my $tree = HTML::TreeBuilder->new();
$tree->parse( $res->decoded_content);
#this is array to get the superclass_tr objects
#<tr class="superclass_tr">
my ( @superClass) = $tree->look_down('_tag','tr' , 'class','superclass_tr');
foreach my $superClassObject (@superClass) {
if ( $superClassObject) {
#extract the text
#<td class="superclass_descr_td" colspan="9"><i>Superclass</i>: Basic domains</td>
my $superClassTextObj = $superClassObject->look_down('_tag','td','class','superclass_descr_td');
$superClassName = $superClassTextObj->as_text if ( $superClassTextObj);
#this is array to get the class objects
#<tr class="class_tr">
my ( @class) = $superClassObject->lookdown('_tag','class','class_tr');
foreach my $classObject ( @class) {
if ( $classObject) {
#extract the text
#<td colspan="8"><i>Class</i>: Basic leucine zipper factors (bZIP)</td>
}
}
}
}
}
else {
print STDERR $res->status_line, "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment