Last active
August 29, 2015 14:24
-
-
Save abuzarhamza/2de091e7a55823e23988 to your computer and use it in GitHub Desktop.
perl program to extract table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#this program parse the page http://www.edgar-wingender.de/muTF_classification-1.html | |
#arrange the output in the below format | |
#TF name Superclass Class Family subfamily | |
#HOXA7 Helix-turn-helix domains Homeo domain factors HOX-related factors HOX6-7 | |
#c-Myc Basic domains Basic helix-loop-helix factors (bHLH) bHLH-ZIP factors Myc / Max factors | |
#<tr class="superclass_tr"> | <td class="superclass_descr_td" colspan="9"><i>Superclass</i>: Basic domains</td> | |
#<tr class="class_tr"> | <td colspan="8"><i>Class</i>: Basic leucine zipper factors (bZIP)</td> | |
#<tr class="family_tr"> | <td colspan="4"><i>Family</i>: Jun-related factors</td> | |
#<tr class="subfamily_tr"> | <td colspan="3" nowrap=""><i>Subfamily</i>: Jun factors</td> | |
#<tr class="genus_tr"> | <td colspan="2">c-Jun</td> | |
use strict; | |
use warnings; | |
use LWP::UserAgent; | |
use HTML::TreeBuilder; | |
my $agent = LWP::UserAgent->new(); | |
my $res = $agent->get('http://www.edgar-wingender.de/muTF_classification-1.html'); | |
if ( $res->is_success) { | |
my $superClassName = ""; | |
my $className = ""; | |
my $familyName = ""; | |
my $tnfName = ""; | |
#parsing the object | |
my $tree = HTML::TreeBuilder->new(); | |
$tree->parse( $res->decoded_content); | |
#this is array to get the superclass_tr objects | |
#<tr class="superclass_tr"> | |
my ( @superClass) = $tree->look_down('_tag','tr' , 'class','superclass_tr'); | |
foreach my $superClassObject (@superClass) { | |
if ( $superClassObject) { | |
#extract the text | |
#<td class="superclass_descr_td" colspan="9"><i>Superclass</i>: Basic domains</td> | |
my $superClassTextObj = $superClassObject->look_down('_tag','td','class','superclass_descr_td'); | |
$superClassName = $superClassTextObj->as_text if ( $superClassTextObj); | |
#this is array to get the class objects | |
#<tr class="class_tr"> | |
my ( @class) = $superClassObject->lookdown('_tag','class','class_tr'); | |
foreach my $classObject ( @class) { | |
if ( $classObject) { | |
#extract the text | |
#<td colspan="8"><i>Class</i>: Basic leucine zipper factors (bZIP)</td> | |
} | |
} | |
} | |
} | |
} | |
else { | |
print STDERR $res->status_line, "\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment