keymon/grepxml.pl

## grepxml.pl
#!/bin/env perl
#
# This is a simple script that "greps" an XML based on
# the names of the xml elements. It prints the contents
# of the Text data in that element
#
# Author: Hector Rivas
#
use XML::Parser;
use Getopt::Std;

my %Options;
my $print_content; # Print or not this line
my $first=1; # If this is the first line or not
@parent = (); # Stores the name of the parent groups
@element_content = (); # Stores the content of current element
@element_subcontent = (""); # Stores the content of the childs

# initialize the parser
my $parser = XML::Parser->new( Handlers => {
                                Start=>\&handle_start,
                                End=>\&handle_end,
                                Char=>\&handle_char,
                                });

# Parse the options
if (not getopts('q1etp', \%Options)) {
    print_help();
    exit 1;
}
# Get the file name
$filename = shift @ARGV;
if (not $filename) {
    print_help();
    exit 0;
}

# Elements to query. We create a hash for this.
my %element_filter;
@element_filter{@ARGV} = ();
$parser->parsefile($filename);
# Print the acumulated content
print $element_subcontent[0];

sub print_help() {
    print <<HelpText;
Usage: grepxml [options] input.xml [Element1 ...]

This program greps an XML printing the node names and the Text data.
Options:
    -1  Print the first node searched and exit.
    -q  Print the data, not the node names.
    -e  Print also elements with empty content
    -p  Print all the parents for eache elements (root.sub1.sub2.element val)

HelpText
}

# Perl trim function to remove whitespace from the start and end of the string
sub trim($)
{
    my $string = shift;
    $string =~ s/^\s+//;
    $string =~ s/\s+$//;
    return $string;
}

# For each element
sub handle_start {
    my( $expat, $element, %attrs ) = @_;

    # Add the initial content "" and subcontent
    unshift(@element_content, "");
    unshift(@element_subcontent, "");
    unshift(@parent, $element);
}

# Process the Text nodes
sub handle_char {
    my( $expat, $content ) = @_;

    # Append the content to the last element
    $element_content[0] = $element_content[0] . $content;
}


# At the end
sub handle_end {
    my( $expat, $element, %attrs ) = @_;

    # Get all the path
    my $element_path = join(".", reverse(@parent)); shift @parent;

    # If we have to filter check if it is in the hash
    my $print_content=1 if (keys( %element_filter ) == 0 or
        (exists $element_filter{$element} or exists $element_filter{$element_path}));

    # Get the acumulated content
    my $content = trim(shift @element_content);
    # And the subelement content
    my $subcontent=shift @element_subcontent;

    # The new content
    my $new_content="";

    # If the Text is not empty (or enabled print empty contents) and we have to print this element
    if (($content or $Options{'e'}) and $print_content) {
        # Get the content of the parent:

        if ($Options{'t'}) {
            $new_content .= " " x ($#element_content+1);
        }
        if (not $Options{'q'}) {
            if ($Options{'p'}) {
                $new_content .=  "$element_path ";
            } else {
                $new_content .= "$element ";
            }
        }
        $new_content .= "$content\n";
        # If option -1 is set and we are filtering for this element, exit.
        if ($Options{'1'} and
            (exists $element_filter{$element} or exists $element_filter{$element_path})) {
            print $new_content;
            exit 0
        }
    }
    $element_subcontent[0].=$new_content.$subcontent;
}
	#!/bin/env perl
	#
	# This is a simple script that "greps" an XML based on
	# the names of the xml elements. It prints the contents
	# of the Text data in that element
	#
	# Author: Hector Rivas
	#
	use XML::Parser;
	use Getopt::Std;

	my %Options;
	my $print_content; # Print or not this line
	my $first=1; # If this is the first line or not
	@parent = (); # Stores the name of the parent groups
	@element_content = (); # Stores the content of current element
	@element_subcontent = (""); # Stores the content of the childs

	# initialize the parser
	my $parser = XML::Parser->new( Handlers => {
	Start=>\&handle_start,
	End=>\&handle_end,
	Char=>\&handle_char,
	});

	# Parse the options
	if (not getopts('q1etp', \%Options)) {
	print_help();
	exit 1;
	}
	# Get the file name
	$filename = shift @ARGV;
	if (not $filename) {
	print_help();
	exit 0;
	}

	# Elements to query. We create a hash for this.
	my %element_filter;
	@element_filter{@ARGV} = ();
	$parser->parsefile($filename);
	# Print the acumulated content
	print $element_subcontent[0];

	sub print_help() {
	print <<HelpText;
	Usage: grepxml [options] input.xml [Element1 ...]

	This program greps an XML printing the node names and the Text data.
	Options:
	-1 Print the first node searched and exit.
	-q Print the data, not the node names.
	-e Print also elements with empty content
	-p Print all the parents for eache elements (root.sub1.sub2.element val)

	HelpText
	}

	# Perl trim function to remove whitespace from the start and end of the string
	sub trim($)
	{
	my $string = shift;
	$string =~ s/^\s+//;
	$string =~ s/\s+$//;
	return $string;
	}

	# For each element
	sub handle_start {
	my( $expat, $element, %attrs ) = @_;

	# Add the initial content "" and subcontent
	unshift(@element_content, "");
	unshift(@element_subcontent, "");
	unshift(@parent, $element);
	}

	# Process the Text nodes
	sub handle_char {
	my( $expat, $content ) = @_;

	# Append the content to the last element
	$element_content[0] = $element_content[0] . $content;
	}


	# At the end
	sub handle_end {
	my( $expat, $element, %attrs ) = @_;

	# Get all the path
	my $element_path = join(".", reverse(@parent)); shift @parent;

	# If we have to filter check if it is in the hash
	my $print_content=1 if (keys( %element_filter ) == 0 or
	(exists $element_filter{$element} or exists $element_filter{$element_path}));

	# Get the acumulated content
	my $content = trim(shift @element_content);
	# And the subelement content
	my $subcontent=shift @element_subcontent;

	# The new content
	my $new_content="";

	# If the Text is not empty (or enabled print empty contents) and we have to print this element
	if (($content or $Options{'e'}) and $print_content) {
	# Get the content of the parent:

	if ($Options{'t'}) {
	$new_content .= " " x ($#element_content+1);
	}
	if (not $Options{'q'}) {
	if ($Options{'p'}) {
	$new_content .= "$element_path ";
	} else {
	$new_content .= "$element ";
	}
	}
	$new_content .= "$content\n";
	# If option -1 is set and we are filtering for this element, exit.
	if ($Options{'1'} and
	(exists $element_filter{$element} or exists $element_filter{$element_path})) {
	print $new_content;
	exit 0
	}
	}
	$element_subcontent[0].=$new_content.$subcontent;
	}