jnbek/Mediawiki::API.pm

## Mediawiki::API.pm
# Mediawiki::API
# A Perl library to access the Mediawiki API
#
# TODO: Go through and wrap (probably) all of the 'die's in a check for dieOnError

package Mediawiki::API;

use strict;
use Data::Dumper;
use LWP::UserAgent;
use HTTP::Cookies;
use XML::Simple;
use POSIX qw(strftime);
use HTML::Entities;
use Encode;

##########################################################
## Enable a native code XML parser - makes a huge difference
$XML::Simple::PREFERRED_PARSER = "XML::Parser";
#$XML::Simple::PREFERRED_PARSER = "XML::LibXML::SAX";

###########################################################

=pod

Mediawiki::API -Provides methods to access the Mediawiki API via an object
oriented interface. Attempts be less stupid about errors.

=head1 Synopsis

 $api = Mediawiki::API->new();
 $api->base_url($newurl);
 @list = @{$api->pages_in_category($categoryTitle)};
 $api->edit_page($pageTitle, $pageContent, $editSummary);

=cut

#############################################################

=head1 Methods

=head2 Initialize the object

=over

=item $api = Mediawiki::API->new();

Create a new API object

=back

=cut

###

sub new {
  my $self = {};

  $self->{'agent'} = new LWP::UserAgent;
  $self->{'agent'}->cookie_jar(HTTP::Cookies->new());

  $self->{'baseurl'} = 'http://192.168.1.71/~mw/wiki/api.php';
  $self->{'loggedin'} = 'false';

  ## Configuration parameters
  $self->{'maxlag'} = 5;           # server-side load balancing param
  $self->{'maxRetryCount'} = 3;    # retries at the HTTP level
  $self->{'debugLevel'} = 1;       # level of verbosity for debug output
  $self->{'requestCount'} = 0;     # count total HTTP requests
  $self->{'htmlMode'} = 0;         # escape output for CGI output
  $self->{'debugXML'} = 0;         # print extra debugging for XML parsing
  $self->{'cmsort'} = 'sortkey';   # request this sort order from API
  $self->{'querylimit'} = 500;     # number of results to request per query
  $self->{'botlimit'} = 5000;      # number of results to request if bot
  $self->{'decodePrint'} = 1;      # don't UTF-8 output
  $self->{'xmlretrydelay'} = 10;   # pause after XML level failure
  $self->{'xmlretrylimit'} = 10;   # retries at XML level
  $self->{'setbotflag'} = 1;       # if the bot flag isn't set, the edit won't show up as a bot edit

  $self->{'cacheEditToken'} = 0;
  $self->{'cacheDeleteToken'} = 0;
  $self->{'cacheProtectToken'} = 0;

  $self->{'logintries'} = 5;       # delay on login throttle

  $self->{'dieOnError'} = 1;       # if this is 0, makeXmlRequest will just return instead of 'die'ing the whole script.

  bless($self);
  return $self;
}

#############################################################

=head2 Get/set configuration parameters

=over

=item $url = $api->base_url($newurl);

=item $url = $api->base_url();

Set and/or fetch the url of the
Mediawiki server.  It should be a full URL to api.php on the server.

=cut

sub base_url () {
  my $self = shift;
  my $newurl = shift;
  if ( defined $newurl)  {
    $self->{'baseurl'} = $newurl;
    $self->print(1, "A Set base URL to: $newurl");
  }

  return $self->{'baseurl'};
}

####################################

=item $level = $api->max_retries($count)

=item $level = $api->max_retries();

Set the number of times to retry the HTTP portion of an API
request. Retries can also be generated by API responses or
maxlag; this doesn't affect those.

=cut

sub max_retries  {
  my $self = shift;
  my $count = shift;
  if ( defined $count)  {
    $self->{'maxRetryCount'} = $count;
    $self->print(1, "A Set maximum retry count to: $count");
  }

  return $self->{'maxRetryCount'};
}

########################################################

=item $level = $api->html_mode($new_level)

=item $level = $api->html_mode();

HTML mode - for when the output is passed to a browser. If the level is
1, output will be in HTML. If the level is 0 (the default), the output
is in plain text.

=cut

sub html_mode  {
  my $self = shift;
  my $mode = shift;
  if ( defined $mode)  {
    $self->{'htmlMode'} = $mode;
    if ( $self->{'htmlMode'} > 0 ) {
      $self->print(1, "A Enable HTML mode");
    } else {
      $self->print(1, "A Disable HTML mode");
    }
  }

  return $self->{'htmlMode'};
}

###########################################################

### Internal function, may have no effect depending on debugging code

sub debug_xml  {
  my $self = shift;
  my $mode = shift;
  if ( defined $mode)  {
    $self->{'debugXML'} = $mode;
    if ( $self->{'debugXML'} > 0 ) {
      $self->print(1, "A Enable XML debug mode");
    } else {
      $self->print(1, "A Disable XML debug mode");
    }
  }

  return $self->{'debugXML'};
}

#######################################################

=item $level = $api->debug_level($newlevel);

=item $level = $api->debug_level();

Set the level of output, from 0 to 5. Level 1 gives minimal feedback,
level 5 is detailed for debugging.  Intermediate levels give intermediate
amounts of information.

=cut

sub debug_level {
 my $self = shift;
 my $level = shift;
 if ( defined $level) {
   $self->{'debugLevel'} = $level;
   $self->print(1,"A Set debug level to: $level");
 }
 return $self->{'debugLevel'};
}

######################################################

sub decode_print {
    my $self = shift;
    my $dPrint = shift;
    if(defined $dPrint){
        $self->{'decodePrint'} = $dPrint;
        $self->print(1,"A Set decodePrint to: $dPrint");
    }
    return $self->{'decodePrint'};
}

######################################################

=item $lag = $api->maxlag($newlag)

=item $lag = $api->maxlag()

Get and/or set the maxlag value for requests.

=cut

sub maxlag {
  my $self = shift;
  my $maxlag = shift;

  if ( defined $maxlag) {
    $self->{'maxlag'} = $maxlag;
    $self->print(1,"A Maxlag set to " . $self->{'maxlag'});
  }

  return $self->{'maxlag'};
}

####################################

=item $level = $api->cmsort($order)

=item $level = $api->cmsort();

Set the way that category member lists are sorted when they
arrive from the server. The $order parmater must be 'timestamp'
or 'sortkey'.

=cut

sub cmsort  {
  my $self = shift;
  my $order = shift;

  if ( defined $order)  {

    if ( ! ( $order eq 'sortkey' || $order eq 'timestamp') ) {
      die "cmsort parameter must be 'timestamp' or 'sortkey', not '$order'.\n";
    }

    $self->{'cmsort'} = $order;
    $self->print(1, "A Set category sort order to: $order");
  }

  return $self->{'cmsort'};
}

#############################################################

=head2 Log in

=back

=over

=item $api->login($userName, $password)

Log in to the Mediawiki server, check whether the user has a bot flag,
and set some defaults appropriately

=back

=cut

sub login {
 my $self = shift;
 my $userName = shift;
 my $userPassword = shift;
 my $tries = shift || $self->{'logintries'};
 $tries--;

 if (  $tries == 0 ) {
    die "Too many login attempts\n";
 }

 $self->print(1,"A Logging in");

 my $xml  = $self->makeXMLrequest(
                      [ 'action' => 'login',
                        'format' => 'xml',
                        'lgname' => $userName,
                        'lgpassword' => $userPassword  ]);

  if ( ! defined $xml->{'login'}
       || ! defined $xml->{'login'}->{'result'}) {

    $self->print(4, "E no login result.\n" . Dumper($xml));
    $self->handleXMLerror("login err");
  }

  my $result = $xml->{'login'}->{'result'};

  if ( $result ne 'Success' ) {
    if ( $result eq 'Throttled' || $result eq 'NeedToWait') {
      my $wait = $xml->{'login'}->{'wait'} || 10;
      $self->print(3, "R Login delayed: $result, sleeping "
                      . (2 + $wait) . " seconds\n");
      $self->print(5, Dumper($xml));

      sleep (2 + $wait);
      return $self->login($userName, $userPassword, $tries);
    }  elsif ( $result eq 'NeedToken' ) {
      my $oldxml = $xml;
         $xml  = $self->makeXMLrequest(
                      [ 'action' => 'login',
                        'format' => 'xml',
                        'lgname' => $userName,
                        'cookieprefix' => $oldxml->{'login'}->{'cookieprefix'},
                        'sessionid' => $oldxml->{'login'}->{'sessionid'},
                        'lguserid' => $oldxml->{'login'}->{'lguserid'},
                        'lgtoken' => $oldxml->{'login'}->{'token'},
                        'lgpassword' => $userPassword  ]);

       if ( ! defined $xml->{'login'}
         || ! defined $xml->{'login'}->{'result'}) {
              $self->print(4, "E no login result.\n" . Dumper($xml));
              $self->handleXMLerror("login err");
        }

        if ( $xml->{'login'}->{'result'} ne 'Success' ) {
          $self->print(5, "Login error on second phase\n");
          $self->print(5, Dumper($xml));
       }
    } else {
       $self->print(5, "Login error\n");
      $self->print(5, Dumper($xml));
      die( "Login error. Message was: '" . $xml->{'login'}->{'result'} . "'\n");
    }
  }

  $self->print(1,"R Login successful");

  foreach $_ ( 'lgusername', 'lgtoken', 'lguserid' ) {
    $self->print(5, "I\t" . $_ . " => " . $xml->{'login'}->{$_} );
    $self->{$_} = $xml->{'login'}->{$_};
  }

  $self->{'loggedin'} = 'true';

  if ( $self->is_bot() ) {
    $self->print (1,"R Logged in user has bot rights");
  }

  delete $self->{'editToken'};

}

##################################

sub login_from_file {
  my $self = shift;
  my $file = shift;
  open IN, "<$file" or die "Can't open file $file: $!\n";

  my ($a, $b, $user, $pass, $o);
  $o = $/;
  $/ = "\n";
  while ( <IN> ) {
    chomp;
    ($a, $b) = split /\s+/, $_, 2;
    if ( $a eq 'user') { $user = $b;}
    if ( $a eq 'pass') { $pass = $b;}
  }

  close IN;
  $/ = $o;

  if ( ! defined $user ) {
    die "No username to log in\n";
  }

  if ( ! defined $pass ) {
    die "No password to log in\n";
  }

  $self->login($user, $pass);

}

##############################################################

# Internal function

sub cookie_jar {
  my $self = shift;
  return $self->{'agent'}->cookie_jar();
}

##############################################################


=head2 Edit pages

=over

=item $api->edit_page($pageTitle, $pageContent, $editSummary, $params);

Edit a page.

The array reference $params allows configuration. Valid parameters listed
at http://www.mediawiki.org/wiki/API:Edit_-_Create%26Edit_pages#Token

Returns undef on success.
Returns the API.php result hash on error.

=back

=cut

sub edit_page {
  my $self = shift;
  my $pageTitle = shift;
  my $pageContent = shift;
  my $editSummary = shift;
  my $params = shift || [];

  $self->print(1,"A Editing $pageTitle");

  my $editToken;

  if ( 1 == $self->{'cacheEditToken'}
         && defined $self->{'editToken'} ) {
    $editToken = $self->{'editToken'};
    $self->print(5, "I using cached edit token: $editToken");
  } else {
    $editToken = $self->edit_token($pageTitle);
  }

  # Now, the editToken might be equal to this (eg: when creating a page).
  #if ( $editToken eq '+\\' ) { die "Bad edit token!\n"; }

  # If the bot flag isn't set, the edit won't be recorded as a bot
  # edit, so it won't be hidden even with hide-bots set.
  if($self->{'setbotflag'} != 0){
    push(@$params, 'bot'=>"1");
  }

  my $query =
      [ 'action' => 'edit',
    'token' => $editToken,
    'summary' => $editSummary,
    'text' => $pageContent,
    'title' => $pageTitle,
    'format' => 'xml',
       @$params  ];

  my $res  = $self->makeXMLrequest($query);

  $self->print(5, 'R editing response: ' . Dumper($res));

  if ( $res->{'edit'}->{'result'} eq 'Success' ) {
      return "";
  } else {
      return $res;
  }

}

############################################################
# internal function

sub edit_token {
  my $self = shift;
  my $pageTitle = shift;

  my $xml  = $self->makeXMLrequest(
                  [ 'action' => 'query',
                    'prop' => 'info',
                    'titles' => $pageTitle,
                    'intoken' => 'edit',
                    'format' => 'xml']);

  if ( ! defined $xml->{'query'}
       || ! defined $xml->{'query'}->{'pages'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}->{'edittoken'} ) {
     $self->handleXMLerror($xml);
  }

  my $editToken = $xml->{'query'}->{'pages'}->{'page'}->{'edittoken'};
  $self->print(5, "R edit token: ... $editToken ...");

  if ( 1 == $self->{'cacheEditToken'} ) {
    $self->{'editToken'} = $editToken;
    $self->print(5, "I caching edit token");
  }

  return $editToken;
}

######################################################

=item $api->purge($pageTitles);

Purge the cache of the pages given in the pageTitles

Returns undef on success.
Returns the API.php result hash on error.

=cut
sub purge {
  my $self = shift;
  my $pageTitles = join("|", @_);

  $self->print(1,"A Purging $pageTitles");

  my $query =
      [ 'action' => 'purge',
    'titles' => $pageTitles,
    'format' => 'xml',
    ];

  my $res  = $self->makeXMLrequest($query);

  $self->print(5, 'R deletion response: ' . Dumper($res));

  if ( exists($res->{'purge'}->{'page'}->{'purged'}) ) {
      return "";
  } else {
        print "Error trying to purge cache...\n";
        print Dumper($res);
        return $res;
  }
}

######################################################

=item $api->delete_page($pageTitle, $reason);

Delete the page given by $pageTitle and provide the explanation
that is given in $reason.

Returns undef on success.
Returns the API.php result hash on error.

=cut
sub delete_page {
  my $self = shift;
  my $pageTitle = shift;
  my $reason = shift;
  my $params = shift || [];

  $self->print(1,"A Deleting $pageTitle");

  my $deleteToken;

  if ( 1 == $self->{'cacheDeleteToken'}
         && defined $self->{'deleteToken'} ) {
    $deleteToken = $self->{'deleteToken'};
    $self->print(5, "I using cached delete token: $deleteToken");
  } else {
    $deleteToken = $self->delete_token($pageTitle);
  }

  if ( $deleteToken eq '+\\' ) { die "Bad delete token!\n"; }

  my $query =
      [ 'action' => 'delete',
    'title' => $pageTitle,
    'token' => $deleteToken,
    'reason' => $reason,
    'format' => 'xml',
    @$params  ];

  my $res  = $self->makeXMLrequest($query);

  $self->print(5, 'R deletion response: ' . Dumper($res));

  if ( $res->{'delete'}->{'result'} eq 'Success' ) {
      return "";
  } else {
      return $res;
  }
}

############################################################
# internal function

# Fetches the 'delete' token which needs to be passed back to the
# delete request in delete_page().
sub delete_token {
  my $self = shift;
  my $pageTitle = shift;

  my $xml  = $self->makeXMLrequest(
                  [ 'action' => 'query',
                    'prop' => 'info',
                    'titles' => $pageTitle,
                    'intoken' => 'delete',
                    'format' => 'xml']);

  if ( ! defined $xml->{'query'}
       || ! defined $xml->{'query'}->{'pages'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}->{'deletetoken'} ) {
     $self->handleXMLerror($xml);
  }

  my $deleteToken = $xml->{'query'}->{'pages'}->{'page'}->{'deletetoken'};
  $self->print(5, "R delete token: ... $deleteToken ...");

  if ( 1 == $self->{'cacheDeleteToken'} ) {
    $self->{'deleteToken'} = $deleteToken;
    $self->print(5, "I caching delete token");
  }

  return $deleteToken;
}


######################################################

=item $api->protect_page($pageTitle, $reason);

=item $api->protect_page($pageTitle, $reason, ['protections' => 'edit=autoconfirmed|move=sysop']);

Protect the page given by $pageTitle and provide the explanation
that is given in $reason.

This default to protects edits and moves to only be do-able for
sysops and with no expiration.  This should be kept backward compatible.

Pass in additional or different parameters to change the protections.
For example:
$api->protect_page($pageTitle, $reason, ['protections' => 'edit=autoconfirmed|move=sysop']);
would let autoconfirmed users edit the page, but only allow sysops to move the page.

Cascading protection is NOT on (and there is currently no parameter
for it in this subroutine).

Returns undef on success.
Returns the API.php result hash on error.

NOTE: If this returns "missingtitle-createonly", that means that you tried to protect/unprotect
a page that doesn't exist yet.  The only protection allowed in that case is "create".

=cut
sub protect_page {
  my $self = shift;
  my $pageTitle = shift;
  my $reason = shift;
  my $params = shift || [];

  $self->print(1,"A (Un?)protecting $pageTitle");

  my $protectToken;

  if ( 1 == $self->{'cacheProtectToken'}
         && defined $self->{'protectToken'} ) {
    $protectToken = $self->{'protectToken'};
    $self->print(5, "I using cached protect token: $protectToken");
  } else {
    $protectToken = $self->protect_token($pageTitle);
  }

  if ( $protectToken eq '+\\' ) { die "Bad protect token!\n"; }

    # If protections weren't specified, add a default (edits and moves for sysops only).
    if(! (grep $_ eq "protections", @{$params})){
        push(@{$params}, "protections");
        push(@{$params}, "edit=sysop|move=sysop");
    }

  my $query =
      [ 'action' => 'protect',
    'title' => $pageTitle,
    'token' => $protectToken,
    'reason' => $reason,
    'format' => 'xml',
    @$params  ];

  my $res  = $self->makeXMLrequest($query);

  $self->print(5, 'R protection response: ' . Dumper($res));

  if ( $res->{'protect'}->{'result'} eq 'Success' ) {
      return "";
  } else {
      return $res;
  }
}

############################################################

=item $api->unprotect_page($pageTitle, $reason);

Basically just an alias to protect-page but which defaults to no protections.

=cut
sub unprotect_page {
  my $self = shift;
  my $pageTitle = shift;
  my $reason = shift;
  my $params = shift || [];

  # If protections weren't specified, set the default of no protection.
  if(! (grep $_ eq "protections", @{$params})){ # like in_array()
    push(@{$params}, "protections");
    push(@{$params}, "edit=all|move=all");
  }

  return $self->protect_page($pageTitle, $reason, $params);
} # end unprotect_page()

############################################################
# internal function

# Fetches the 'protect' token which needs to be passed back to the
# protect request in protect_page().
sub protect_token {
  my $self = shift;
  my $pageTitle = shift;

  my $xml  = $self->makeXMLrequest(
                  [ 'action' => 'query',
                    'prop' => 'info',
                    'titles' => $pageTitle,
                    'intoken' => 'protect',
                    'format' => 'xml']);

  if ( ! defined $xml->{'query'}
       || ! defined $xml->{'query'}->{'pages'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}->{'protecttoken'} ) {
     $self->handleXMLerror($xml, "Couldn't find protecttoken.");
  }

  my $protectToken = $xml->{'query'}->{'pages'}->{'page'}->{'protecttoken'};
  $self->print(5, "R protect token: ... $protectToken ...");

  if ( 1 == $self->{'cacheProtectToken'} ) {
    $self->{'protectToken'} = $protectToken;
    $self->print(5, "I caching protect token");
  }

  return $protectToken;
}


############################################################
#

=head2 Get lists of pages

=over

=item $articles =
    $api->pages_in_category($categoryTitle [ , $namespace])

Fetch the list of page titles in a category. Optional numeric
parameter to filter by namespace. Return $articles, an array ref.

=cut

sub pages_in_category {
  my $self = shift;
  my $categoryTitle = shift;
  my $namespace = shift;

  my $results = $self->pages_in_category_detailed($categoryTitle,$namespace);

  my @articles;

  my $result;
  foreach $result (@{$results}) {
      push @articles, $result->{'title'};
  }

  return \@articles;
}

############################################################
# Compatibility function from old framework

=item $articles = $api->fetch_backlinks_compat($pageTitle)

Fetch list of pages that link to a given page.
Returns $articles as an array reference.

=cut

sub fetch_backlinks_compat {
  my $self = shift;
  my $pageTitle = shift;

  my $results = $self->backlinks($pageTitle);

  my @articles;

  my $result;
  foreach $result (@{$results}) {
      push @articles, $result->{'title'};
  }

  return \@articles;
}

##############################################################

=item $pages = $api->backlinks($pageTitle);

Fetch the pages that link to a particular page title.
Returns a reference to an array.

=cut

sub backlinks {
  my $self = shift;
  my $pageTitle = shift;

  $self->print(1,"A Fetching backlinks for $pageTitle");

  my %queryParameters =  ( 'action' => 'query',
                           'list' => 'backlinks',
                           'bllimit' => $self->{'querylimit'},
#                           'titles' => $pageTitle,
                           'bltitle' => $pageTitle,
                           'format' => 'xml');

  if ( $self->is_bot) {
    $queryParameters{'bllimit'} = $self->{'botlimit'};
  }

  my $results
    = $self->fetchWithContinuation(\%queryParameters,
                    ['query', 'backlinks', 'bl'],
                    'bl',
                    ['query-continue', 'backlinks', 'blcontinue'],
                    'blcontinue');

  return $results;
}

################################################################

=item $articles =

$api->pages_in_category_detailed($categoryTitle [, $namespace])

Fetch the contents of a category. Optional parameter to select a
specific namespace. Returns a reference to an array of hash
references.

=cut

sub pages_in_category_detailed {
  my $self = shift;
  my $categoryTitle = shift;
  my $namespace = shift;

  $self->print(1,"A Fetching category contents for $categoryTitle");

### The behavior keeps changing with respect to whether
### the Category: prefix should be included.
  if ( $categoryTitle =~ /^Category:/) {
#    $self->print(1,"WARNING: Don't pass categories with namespace included");
#    $categoryTitle =~ s/^Category://;
  } else {
    $categoryTitle = 'Category:' . $categoryTitle;
  }

  my %queryParameters =  ( 'action' => 'query',
                           'list' => 'categorymembers',
                           'cmlimit' => $self->{'querylimit'},
                           'cmsort' => $self->{'cmsort'},
                           'cmprop' => 'ids|title|sortkey|timestamp',
                           'cmtitle' => $categoryTitle,
                           'format' => 'xml' );

  if ( defined $namespace ) {
    $queryParameters{'cmnamespace'} = $namespace;
  }

  if ( $self->is_bot) { $queryParameters{'cmlimit'} = $self->{'botlimit'}; }

  my $results
    = $self->fetchWithContinuation(\%queryParameters,
                    ['query', 'categorymembers', 'cm'],
                    'cm',
                    ['query-continue', 'categorymembers', 'cmcontinue'],
                    'cmcontinue');

  return $results;
}

#############################################################

=item $list = $api->where_embedded($templateName);

Fetch the list of pages that tranclude $templateName.
If $templateName refers to a template, it SHOULD start with "Template:".
Returns a reference to an array of hash references.

=cut

sub where_embedded {
  my $self = shift;
  my $templateTitle = shift;

  $self->print(1,"A Fetching list of pages transcluding $templateTitle");

  my %queryParameters =  ( 'action' => 'query',
                           'list' => 'embeddedin',
                           'eilimit' => $self->{'querylimit'},
                           'eititle' => $templateTitle,
                           'format' => 'xml' );

  if ( $self->is_bot) {
    $queryParameters{'eilimit'} = $self->{'botlimit'};
  }

  my $results
    = $self->fetchWithContinuation(\%queryParameters,
                    ['query', 'embeddedin', 'ei'],
                    'ei',
                    ['query-continue', 'embeddedin', 'eicontinue'],
                    'eicontinue');

  return $results;
}

#############################################################

=item $list = $api->templates_on_page($pageTitle);

Gets a list of all pages included in the provided page.
This module can be used as a generator.

Returns an array of the titles of the templates included
on the page.
NOTE: It seems other functions return references to arrays. Any
specific reason?  Regardless, this should match that format for
uniformity.  But notice then next TODO... instead of re-writing
this subroutine for that, just finish making this work for
multiple titles.

TODO: Make a version of this subroutine which takes multiple
pages at once (since the API can do that) and returns a more
complex structure for the resutls.

=cut

sub templates_on_page {
    my $self = shift;
    my $pageTitle = shift;

    $self->print(1,"A Fetching list of templates transcluded on $pageTitle");

    my %queryParameters =  ( 'action' => 'query',
                             'prop' => 'templates',
                             'titles' => $pageTitle,
                             'format' => 'xml' );

    if ( $self->is_bot) {
        $queryParameters{'eilimit'} = $self->{'botlimit'};
    }

    my $results = $self->makeXMLrequest([%queryParameters]);

    my @arr = ();
    $results = $self->child_data($results, ['query', 'pages', 'page']);
    if(exists($results->{'templates'})){ # 'leaf-node' pages won't include any templates
        my $data = $self->child_data($results, ['templates', 'tl']);

        # If there is only one result, this just returns a hash.  Wrap it in an array.
        if(ref($data) ne 'ARRAY'){
            $data = [$data];
        }

        my $result;
        foreach $result (@$data) {
            push(@arr, $result->{'title'});
        }
    }

    return @arr;
} # end templates_on_page

#############################################################

=item $list = $api->log_events($pageName, $params);

Fetch a list of log entries for the page.
Returns a reference to an array of hashes.

=cut

sub log_events {
  my $self = shift;
  my $pageTitle = shift;
  my $params  = shift || [];

  $self->print(1,"A Fetching log events for $pageTitle");


  my %queryParameters =  ( 'action' => 'query',
                           'list' => 'logevents',
                           'lelimit' => $self->{'querylimit'},
                           'format' => 'xml' ,
                            @$params);

  if ( defined $pageTitle ) {
    $queryParameters{'letitle'}  = $pageTitle;
  }

  if ( $self->is_bot) {
    $queryParameters{'lelimit'} = $self->{'botlimit'}
  }

  my $results
    = $self->fetchWithContinuation(\%queryParameters,
                    ['query', 'logevents','item'],
                    'item',
                    ['query-continue', 'logevents', 'lestart'],
                    'lestart');

  return $results;
}

#############################################################

=item $list = $api->image_embedded($imageName);

Fetch the list of pages that display the image $imageName.
The value of $imageName should NOT start with "Image:".
Returns a reference to an array of hash references.

=cut

sub image_embedded {
  my $self = shift;
  my $imageTitle = shift;

  $self->print(1,"A Fetching list of pages displaying image $imageTitle");

  my %queryParameters =  ( 'action' => 'query',
                           'list' => 'imageusage',
                           'iulimit' => $self->{'querylimit'},
                           'iutitle' => $imageTitle,
                           'format' => 'xml' );

  if ( $self->is_bot) {
    $queryParameters{'iulimit'} = $self->{'botlimit'};
  }

  my $results
    = $self->fetchWithContinuation(\%queryParameters,
                    ['query', 'imageusage', 'iu'],
                    'iu',
                    ['query-continue', 'imageusage', 'iucontinue'],
                    'iucontinue');

  return $results;
}
######################################################


######################################################

=item $text = $api->content($pageTitles);

Fetch the content (wikitext) of a page or pages.  $pageTitles
can be either a scalar, in which case it is the title of the page
to be fetched, or a reference to a list of page titles. If a single
title is passed, the text is returned. If an array reference is passed,
a hash reference is returned.

=cut

sub content {
  my $self = shift;
  my $titles = shift;

  if (ref($titles) eq "") {
     return $self->content_single($titles);
  }

  if ( scalar @$titles == 1) {
     return $self->content_single(${$titles}[0]);
  }

  $self->print(1,"A Fetching content of " . scalar @$titles . " pages");

  my $titlestr = join "|", @$titles;

  my %queryParameters =  ( 'action' => 'query',
                           'prop' => 'revisions',
                           'titles' => $titlestr,
                           'rvprop' => 'content',
                           'format' => 'xml' );

  my $results
    = $self->makeXMLrequest([%queryParameters]);

  my $arr = {};
  my $data = $self->child_data($results, ['query', 'pages', 'page']);

  my $result;

  foreach $result ( @$data) {
    $arr->{$result->{'title'}} = $result->{'revisions'}->{'rev'}->{'content'};
  }

  return $arr;
}


#########################################################

## Internal function

sub content_single {
  my $self = shift;
  my $pageTitle = shift;

  $self->print(1,"A Fetching content of $pageTitle");

  my %queryParameters =  ( 'action' => 'query',
                           'prop' => 'revisions',
                           'titles' => $pageTitle,
                           'rvprop' => 'content',
                           'format' => 'xml' );

  my $results
    = $self->makeXMLrequest([%queryParameters]);

  return $self->child_data_if_defined($results,
                       ['query', 'pages', 'page', 'revisions', 'rev', 'content'], '');
}
#########################################################

=item $text = $api->content_section($pageTitle, $secNum);

Fetch the content (wikitext) of a particular section of a page.
The lede section is #0.

=cut

sub content_section {
  my $self = shift;
  my $pageTitle = shift;
  my $section = shift;

  if ( ! ( $section =~ /^\d+$/ ) ) {
    die "Bad section: '$section'. Must be a nonnegative integer.\n";
  }

  $self->print(1,"A Fetching content of $pageTitle");

  my %queryParameters =  ( 'action' => 'query',
                           'prop' => 'revisions',
                           'titles' => $pageTitle,
                           'rvprop' => 'content',
                           'rvsection' => $section,
                           'format' => 'xml' );
  my $results
    = $self->makeXMLrequest([%queryParameters]);

  return $self->child_data_if_defined($results,
                       ['query', 'pages', 'page', 'revisions', 'rev'], '');
}


###################################################

=item $text = $api->revisions($pageTitle, $count);

Fetch the most recent $count revisions of a page.

=cut

sub revisions {
  my $self = shift;
  my $title = shift;

  my $count = shift;
  if ( ! defined $count ) {
    $count = $self->{'querylimit'};
  }

  my $what = "ids|flags|timestamp|size|comment|user";

  my $xml = $self->makeXMLrequest([ 'format' => 'xml',
                                       'action' => 'query',
                                       'prop' => 'revisions',
                                       'rvprop' => $what,
                                       'rvlimit' => $count,
                                       'titles' => encode("utf8", $title)  ],
                                     [ 'page', 'rev' ]);
  my $t = $self->child_data_if_defined($xml, ['query','pages','page']);

  # TODO: This line failed... REMOVE AFTER MORE TESTING.
  #return $self->child_data_if_defined(${$t}[0], ['revisions', 'rev']);
  return $self->child_data_if_defined($t, ['revisions', 'rev']);
}


################################################################

=item $info = $api->page_info($page);

Fetch information about a page. Returns a reference to a hash.

=cut

sub page_info {
  my $self = shift;
  my $pageTitle = shift;

  $self->print(1,"A Fetching info for $pageTitle");

  my $what = 'protection|talkid|subjectid';

  my %queryParameters =  ( 'action' => 'query',
                           'prop' => 'info',
                           'inprop' => $what,
                           'titles' => $pageTitle,
                           'format' => 'xml' );

  my $results
    = $self->makeXMLrequest([%queryParameters]);

  return $self->child_data($results,  ['query', 'pages', 'page']);
}

################################################################

=item $api->rollback_page($pageTitle);

=item $api->rollback_page( $pageTitle, ["summary" => "Bot detected that this page was vandalism."] );

http://www.mediawiki.org/wiki/API:Rollback

Rolling back a page means undoing the last series of edits by
one user. In other words, rollback keeps undoing revision after
revision until it encounters one made by someone different.

Optional parameters include summary and 'markbot' flag.

=cut
sub rollback_page {
    my $self = shift;
    my $pageTitle = shift;
    my $params = shift || [];

    $self->print(1,"A Rolling back $pageTitle");

    my $rollbackToken;
    my $lastEditor;

    # NOTE: Rollback token cannot be cached. It depends on the page-title, the
    # specific login session, and the user to rollback.
    ($rollbackToken, $lastEditor) = $self->rollback_tokenAndEditor($pageTitle);

    if ( $rollbackToken eq '+\\' ) { die "Bad rollback token!\n"; }

    my $query =
            [ 'action' => 'rollback',
            'title' => $pageTitle,
            'token' => $rollbackToken,
            'user' => $lastEditor,
            'format' => 'xml',
            @$params  ];

    my $res  = $self->makeXMLrequest($query);

    $self->print(5, 'R rollback response: ' . Dumper($res));

    if ( $res->{'rollback'}->{'revid'} ne '' ) {
        return "";
    } else {
        return $res;
    }
} # end rollback_page()

############################################################
# internal function

# Fetches the 'rollback' token which needs to be passed back to the
# rollback request in rollback_page().
# Can't cache roll
sub rollback_tokenAndEditor {
  my $self = shift;
  my $pageTitle = shift;

  my $xml  = $self->makeXMLrequest(
                  [ 'action' => 'query',
                    'prop' => 'revisions',
                    'titles' => $pageTitle,
                    'rvtoken' => 'rollback',
                    'format' => 'xml']);

  if ( ! defined $xml->{'query'}
       || ! defined $xml->{'query'}->{'pages'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}->{'revisions'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}->{'revisions'}->{'rev'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}->{'revisions'}->{'rev'}->{'rollbacktoken'}
       || ! defined $xml->{'query'}->{'pages'}->{'page'}->{'revisions'}->{'rev'}->{'user'}) {
     $self->handleXMLerror($xml, "Couldn't find rollbacktoken and/or user.");
  }

  my $rollbackToken = $xml->{'query'}->{'pages'}->{'page'}->{'revisions'}->{'rev'}->{'rollbacktoken'};
  my $lastEditor = $xml->{'query'}->{'pages'}->{'page'}->{'revisions'}->{'rev'}->{'user'};
  $self->print(5, "R rollback token: ... $rollbackToken ...");
  $self->print(5, "R last editor: ... $lastEditor ...");

  return ($rollbackToken, $lastEditor);
} # end rollback_tokenAndEditor()

#######################################################

# Internal Function

sub fetchWithContinuation {
  my $self = shift;
  my $queryParameters = shift;
  my $dataPath = shift;
  my $dataName = shift;
  my $continuationPath = shift;
  my $continuationName = shift;

  $self->add_maxlag_param($queryParameters);

  $self->print(5, "I Query parameters:\n" . Dumper($queryParameters));

  my $xml = $self->makeXMLrequest([ %{$queryParameters}], [$dataName]);
  my @results = @{$self->child_data_if_defined($xml, $dataPath, [])};

#  $self->print(6, Dumper($xml));

  while ( defined $xml->{'query-continue'} ) {
    $self->print(5, "CONTINUE: " . Dumper($xml->{'query-continue'}));

    $queryParameters->{$continuationName} =
    encode("utf8",$self->child_data( $xml, $continuationPath,
                                     "Error in categorymembers xml"));
    $xml =$self->makeXMLrequest([ %{$queryParameters}], [$dataName]);
    @results = (@results,
                @{$self->child_data_if_defined($xml, $dataPath, [])} );

  }

  return \@results;
}

#######################################################
# Internal function

sub add_maxlag_param {
  my $self = shift;
  my $hash = shift;

  if ( defined $self->{'maxlag'} && $self->{'maxlag'} >= 0 ) {
    $hash->{'maxlag'} = $self->{'maxlag'}
  }
}

##############################################################

=item $contribs = $api->user_contribs($userName);

Fetch the list of nondeleted edits by a user. Returns a
reference to an array of hash references.

=cut

sub user_contribs {
  my $self = shift;
  my $userName = shift;
  my @results;
  $self->print(1,"A Fetching contribs for $userName");

  my %queryParameters =  ( 'action' => 'query',
                           'list' => 'usercontribs',
                           'uclimit' => $self->{'querylimit'},
                           'ucdirection' => 'older',
                           'ucuser' => $userName,
                           'format' => 'xml' );

  if ( $self->is_bot) {
    $queryParameters{'uclimit'} = $self->{'botlimit'};
  }

  $self->add_maxlag_param(\%queryParameters);

  my $res  = $self->makeHTTPrequest([ %queryParameters ]);

  my $xml = $self->parse_xml($res);

  @results =  @{$self->child_data( $xml, ['query', 'usercontribs', 'item'],
                                          "Error in usercontribs xml")};

  while ( defined $xml->{'query-continue'} ) {
    $queryParameters{'ucstart'} =
            $self->child_data( $xml, ['query-continue', 'usercontribs', 'ucstart'],
                               "Error in usercontribs xml");

    $self->print(3, "I Continue from: " . $xml->{'query-continue'}->{'usercontribs'}->{'ucstart'} );

    $res  = $self->makeHTTPrequest([%queryParameters]);

    $xml = $self->parse_xml($res);

    @results = ( @results,
                 @{$self->child_data( $xml, ['query', 'usercontribs', 'item'],
                                      "Error in usercontribs xml")});
  }

  return \@results;
}

#######################

=item $api->parse( $wikitext )

Parse a chunk of wiki code and return the HTML result.

=cut

sub parse {
  my $self = shift;
  my $content = shift;

  my $r = $self->makeXMLrequest(['action'=>'parse',
                                 'text'=>encode('utf8', $content),
                                 'format' => 'xml'
                                  ]);

  return $self->child_data($r, ['parse']);
}


##############################################################

=back

=head2 Information about the logged in user

=over

=item $api->watchlist($limit, $window);

Fetch list of pages on the user's watchlist that have been
recently edited.   Numeric parameters: $limit is maximum number
of pages to return, $window is number of hours of history to fetch.

=cut

sub watchlist {
 my $self = shift;

 $self->print(1,"A Fetching watchlist entries");

 my $timeStamp;

 my $limit = shift;
 my $window = shift;

 if ( ! defined $limit) {
   $limit = 100;
 }

 if ( ! defined $window) {
   $window = 24;
 }

 $self->print(2,"I Maximum result count: $limit\n");
 $self->print(2,"I Time window for entries: $window\n");

 my $delay = $window * 60 * 60; # window is in hours
 $timeStamp = strftime('%Y-%m-%dT%H:%M:00Z', gmtime(time() - $delay));

 my $xml  = $self->makeXMLrequest(
                  [ 'action' => 'query',
                    'list' => 'watchlist',
                    'wllimit' => $limit,
                    'wlprop' =>  'ids|title|timestamp|user|comment|flags',
                    'wlend' => $timeStamp,
                    'format' => 'xml'  ]);

  if ( ! defined $xml->{'query'}
       || ! defined $xml->{'query'}->{'watchlist'}
       || ! defined $xml->{'query'}->{'watchlist'}->{'item'} ) {
     $self->handleXMLerror($xml);
  }

#  return $xml->{'query'}->{'watchlist'}->{'item'};
   return $self->child_data($xml, ['query','watchlist','item']);
}

##############################################################

=item $properties = $api->user_properties();

Fetch the properties the server reports for the logged in
user.  Returns a references to an array.

=cut

sub user_properties {
  my $self = shift;
  my @results;
  $self->print(1,"A Fetching information about logged in user");

  my %queryParameters =  ( 'action' => 'query',
                           'meta' => 'userinfo',
                           'uiprop' => 'rights|hasmsg',
                           'format' => 'xml' );

  $self->add_maxlag_param(\%queryParameters);

  my $xml  = $self->makeXMLrequest([ %queryParameters ]);

  return $self->child_data($xml,['query','userinfo']);
}

##############################################################

=item $info = $api->site_info();

Fetch information about the MediaWiki site (namespaces,
main page, etc.)

=cut

sub site_info {
  my $self = shift;
  my @results;
  $self->print(1,"A Fetching information mediawiki site");

  my %queryParameters =  ( 'action' => 'query',
                           'meta' => 'siteinfo',
                           'siprop' => 'general|namespaces|statistics|interwikimap|dbrepllag',
                           'format' => 'xml' );

  $self->add_maxlag_param(\%queryParameters);

  my $xml  = $self->makeXMLrequest([ %queryParameters ]);

  return $self->child_data($xml,['query']);
}

##############################################################

=item $rights = $api->user_rights();

Fetch the rights (flags) the server reports for the logged
in user.  Returns a reference to an array of rights.

=cut


sub user_rights {
   my $self = shift;
   my $r = $self->user_properties();
   return $self->child_data($r, ['rights','r']);
}

#############################################################


=item $api->user_is_bot()

Returns nonzero if the logged in user has the 'bot' flag

=cut

sub user_is_bot {
  my $self = shift;
  my $rights = $self->user_rights();

  my $r;
  foreach $r ( @{$rights}) {
    if ( $r eq 'bot') {
      return 1;
    }
  }
  return 0;
}

##############################################################

=item $api->items_on_special($specialPageName,[ , $limit[, $offset]])

NOTE: THIS IS NOT YET SUPPORTED BY THE API - EXTENSIONS AND SKINS ON
THE WIKI MAY AFFECT WHETHER THIS SUBROUTINE WORKS.

Returns a list of pages that were listed in a standard special page
list.

=cut

sub items_on_special {
    my $self = shift;
    my $pageName = shift;
    my $limit = $self->{'querylimit'};
    my $offset = 0;
    my @pagesFound = ();
    if(@_){
        $limit = shift;
    }
    if(@_){
        $offset = shift;
    }

    my %queryParameters =  (
        'title' => $pageName,
        'limit' => $limit,
        'offset' => $offset,
    );

    # Since this is a regular page and not an api request, temporarily change the base_url.
    my $origBaseUrl = $self->base_url();
    my $indexBaseUrl = $origBaseUrl;
    $indexBaseUrl =~ s/api\.php/index\.php/i;
    $self->base_url($indexBaseUrl);
    my $content = $self->makeHTTPrequest([ %queryParameters ]);
    $self->base_url($origBaseUrl);

    # If using HTML5, we only want the content inside of <article>
    if($content =~ /<article[ >](.*)<\/article>/is){
        $content = $1;
    }

    if($content =~ /^.*<h1.*?<[ou]l[^>]*>(.*?)<\/[ou]l>/is){ # get the first list after the last h1 heading (anything before is likely to be the skin)
        $content = $1;
        while($content =~ /<li[^>]*><a[^>]*>(.*?)<\/a>.*?<\/li>/is){
            $content = $`.$';
            my $pageFound = $1;
            $pageFound = htmlspecialchars_decode($pageFound);
            push(@pagesFound, $pageFound);
        }
    } else {
        $self->print(1, "E ERROR: Could not find list on special page $pageName\"!");
    }

    return \@pagesFound;
} # end items_on_special

##############################################################

=back

=head2 Advanced usage and internal functions

=over

=item $api->makeXMLrequest($queryArgs [ , $arrayNames])

Makes a request to the server, parses the result, and
attempts to detect errors from the API and retry.

Optional parameter $arrayNames is used for the 'ForceArray'
parameter of XML::Simple.

=cut

sub makeXMLrequest {
  my $self = shift;
  my $args = shift;
  my $arrayNames = shift;

  my $retryCount = 0;

  my $edelay = $self->{'xmlretrydelay'};

  my $res;
  my $xml;

  while (1) {
    $retryCount++;
    if ( $retryCount > $self->{'xmlretrylimit'} ) {
        if($self->{'dieOnError'} != 0){
            die "Aborting: too many retries in getXMLrequest\n";
        } else {
            return $xml;
        }
    }

    $res = $self->makeHTTPrequest($args);

    $self->print(7, "Got result\n$res\n---\n");

    if ( length $res == 0) {
      $self->print(1,"E Error: empty XML response");
      $self->print(2,"I Query params: \n" . Dumper($args));
      $self->print(2,"I ... sleeping $edelay seconds");
      sleep $edelay;
      next;
    }

    eval {
      if ( defined $arrayNames ) {
        $xml = $self->parse_xml($res, 'ForceArray', $arrayNames);
      } else {
        $xml = $self->parse_xml($res);
      }
    };

    if ( $@ ) {
      $self->print(3, "Error parsing XML - truncated response?");
      $self->print(3, Dumper($@));
      sleep $edelay;
      next;
    }

    $self->print(6, "XML dump:");
    $self->print(6, Dumper($xml));

    last if ( ! defined $xml->{'error'} );

    if ( $xml->{'error'}->{'code'} eq 'maxlag') {
      $xml->{'error'}->{'info'} =~ /: (\d+) seconds/;
      my $lag = $1;
      if ($lag > 0) {
        $self->print(2,"E Maximum server lag exceeded");
        $self->print(3,"I Current lag $lag, limit " . $self->{'maxlag'});
      }
      sleep $lag + 1;
      $retryCount--; # this is not an error
      next;
    }

    $self->print(2,"E APR response indicates error");
    $self->print(3, "Err: " . $xml->{'error'} ->{'code'});
    $self->print(4, "Info: " . $xml->{'error'} ->{'info'});
    $self->print(4, "Details: " . Dumper($xml) . "\n");
    sleep $edelay;
  }

#  return decode_recursive($xml);
    return $xml;
} # end makeXMLrequest

######################################

=item $api->makeHTTPrequest($args)

Makes an HTTP request and returns the resulting content. This is the
most low-level access to the server. It provides error detection and
automatically retries failed attempts as appropriate. Most queries will
use a more specific method.

The $args parameter must be a reference to an array of KEY => VALUE
pairs. These are passed directly to the HTTP POST request.

=cut

sub makeHTTPrequest {
  my $self = shift;
  my $args = shift;

#  $self->{'requestCount'}++;

  my $retryCount = 0;
  my $delay = 4;

  my $res;

  while (1) {
    $self->{'requestCount'}++;

    if ( $retryCount == 0) {
      $self->print(2, "A  Making HTTP request (" . $self->{'requestCount'} . ")");
      $self->print(5, "I  Base URL: " . $self->{'baseurl'});
      my $k = 0;
      while ( $k < scalar @{$args}) {
        if ( ! defined ${$args}[$k+1] ) { ${$args}[$k+1] = ''; }
        $self->print(5, "I\t" . ${$args}[$k] . " => '"
                       . ${$args}[$k+1] . "'");
        $k += 2;
      }

    } else {
      $self->print(1,"A  Repeating request ($retryCount)");
    }

    $res = $self->{'agent'}->post($self->{'baseurl'}, $args);
    last if $res->is_success();

#    print Dumper($res);

    $self->print(1, "HTTP response code: " . $res->code() ) ;
    $self->print(5, "Dump of response: " . Dumper($res) );


    if (defined $res->header('x-squid-error')) {
      $self->print(1,"I  Squid error: " . $res->header('x-squid-error'));
    }

    $retryCount++;

    $self->print(3, "I  Sleeping for " . $delay . " seconds");

    sleep $delay;
    $delay = $delay * 2;

    if ( $retryCount > $self->{'maxRetryCount'}) {
      my $errorString =
           "Exceeded maximum number of tries for a single request.\n";
      $errorString .=
       "Final HTTP error code was " . $res->code() . " " . $res->message . "\n";
      $errorString .= "Aborting.\n";
      die($errorString);
    }
  }

  return $res->content();
}


##############################################################
# Internal function

sub child_data_is_defined {
  my $self = shift;
  my $p = shift;
  my @r = @{shift()};

  my $name;
  foreach $name ( @r) {
    if ( ! defined $p->{$name})  {
      return 0;
    }
  }
  return 1;
}

################################################################

# Internal function

sub child_data {
  my $self = shift;
  my $p = shift;
  my @r = @{shift()};
  my $errorMsg = shift;

  my $name;
  foreach $name ( @r) {
    if ( ! defined $p->{$name}) {
        $self->handleXMLerror($p, "$errorMsg; child '$name' not defined");
    }
    $p = $p->{$name}
  }

  return $p;
}

####
# Given a hash reference with nested data, follows the path provided
# in the second parameter to get the final value. If at any point the
# path is not defined as expected, then the default value (third parameter)
# is returned.
####
sub child_data_if_defined {
  my $self = shift;
  my $p = shift;
  my @r = @{shift()};
  my $default = shift;

  my $name;
  foreach $name ( @r) {
    if ( ! defined $p->{$name}) {
        return $default;
    }
    $p = $p->{$name};
  }

  return $p;
}

###################################

# Internal function

sub print {
  my $self = shift;
  my $limit = shift;
  my $message = shift;

  if ( $self->{'decodePrint'} == 1) {
    $message = decode("utf8", $message);
  }

  if ( $limit <= $self->{'debugLevel'} ) {
    print $message;
    if ( $self->{'htmlMode'} > 0) {
      print " <br/>\n";
    } else {
      print "\n";
    }
  }
}

#############################################################

# Internal method

sub dump {
  my $self = shift;
  return Dumper($self);
}


##############################################################

# Internal function

sub handleXMLerror {
  my $self = shift;
  my $xml = shift;
  my $text =  shift;

  my $error = "XML error";

  if ( defined $text) {
    $error = $error . ": " . $text;
  }

  print Dumper($xml);

  die "$error\n";
}
#######################################

### Recursively decode entities from the XML data structure

sub decode_recursive {
  my $data = shift;
  my $newdata;
  my $i;

  if ( ref($data) eq "" ) {
#    return decode_entities($data);
     return undo_htmlspecialchars($data);
  }

  if ( ref($data) eq "SCALAR") {
#    $newdata = decode_entities($$data);
    $newdata = undo_htmlspecialchars($$data);
    return \$newdata;
  } elsif ( ref($data) eq "ARRAY" ) {
    $newdata = [];
    foreach $i ( @$data) {
      push @$newdata, decode_recursive($i);
    }
    return $newdata;
  } elsif ( ref($data) eq "HASH") {
    $newdata = {};
    foreach $i ( keys %$data ) {
      $newdata->{decode_recursive($i)} = decode_recursive($data->{$i});
    }
    return $newdata;
  }

  die "Bad value $data\n";
}


#######################################################

# Internal function

sub is_bot {
  my $self = shift;

  if ( ! defined $self->{'isbot'} )  {
    if ( $self->user_is_bot() ) {
      $self->{'isbot'} = "true";
    } else {
      $self->{'isbot'} = "false";
    }
  }

  return ( $self->{'isbot'} eq 'true');
}


##############################################################
# Internal function

sub parse_xml {
  my $self = shift;
  if ( $self->debug_xml() > 0) {
    print "DEBUG_XML Parsing at " . time() . "\n";
  }
  my $xml;

  #  The API may return XML that is not valid UTF-8
  my $t = decode("utf8", $_[0]);
  $_[0] = encode("UTF-8", $t);  # this is secret code for strict UTF-8

    my $xmlString = $_[0];
    eval {
        $xmlString =~ s/(^\s+|\s+$)//g;
        $xml = XML::Simple::parse_string($xmlString);
    };
  if ( $@ ) {
    print "XML PARSING ERROR 1\n";
#    print "Code: $! \n";
# not well-formed (invalid token)
    print Dumper($xmlString);
    die;
  }

  if ( $self->debug_xml() > 0) {
    print "DEBUG_XML Finish parsing at " . time() . "\n";
  }

  return $xml;
}

##############################################
# internal function

sub undo_htmlspecialchars  {
  my $text = shift;
  my %trans = ( '&amp;' => '&',
                '&quot;' => '"',
                '&#039;' => '\'',
                '&lt;' => '<',
                '&gt;' => '>' );
  my $tran;
  foreach $tran ( keys %trans ) {
    $text =~ s/\Q$tran\E/$trans{$tran}/g;
  }
  return $text;
}

# This seems a bit imperfect.  Is there a better way to do this?
if(!defined('htmlspecialchars_decode')){
    ####
    # Reverses PHP's htmlspecialchars().  This is not the same as
    # PHP's htmlspecialchars_decode because it does not have a second
    # parameter for the type of decode to do... this undoes ALL special
    # char encoding.
    ####
    sub htmlspecialchars_decode{
        my $retVal = shift;
        $retVal =~ s/&amp;/&/ig;
        $retVal =~ s/&apos;/'/ig;
        $retVal =~ s/&lt;/</ig;
        $retVal =~ s/&gt;/>/ig;
        $retVal =~ s/&quot;/"/ig;
        $retVal =~ s/&#039;/'/ig;

        # This is from a list of latin1 encodings.
        $retVal =~ s/&nbsp;|&#160;/ /g;
        $retVal =~ s/&iexcl;|&#161;/¡/g;
        $retVal =~ s/&cent;|&#162;/¢/g;
        $retVal =~ s/&pound;|&#163;/£/g;
        $retVal =~ s/&curren;|&#164;/¤/g;
        $retVal =~ s/&yen;|&#165;/¥/g;
        $retVal =~ s/&brvbar;|&#166;/¦/g;
        $retVal =~ s/&sect;|&#167;/§/g;
        $retVal =~ s/&uml;|&#168;/¨/g;
        $retVal =~ s/&copy;|&#169;/©/g;
        $retVal =~ s/&ordf;|&#170;/ª/g;
        $retVal =~ s/&laquo;|&#171;/«/g;
        $retVal =~ s/&not;|&#172;/¬/g;
        $retVal =~ s/&shy;|&#173;//g;
        $retVal =~ s/&reg;|&#174;/®/g;
        $retVal =~ s/&macr;|&#175;/¯/g;
        $retVal =~ s/&deg;|&#176;/°/g;
        $retVal =~ s/&plusmn;|&#177;/±/g;
        $retVal =~ s/&sup2;|&#178;/²/g;
        $retVal =~ s/&sup3;|&#179;/³/g;
        $retVal =~ s/&acute;|&#180;/´/g;
        $retVal =~ s/&micro;|&#181;/µ/g;
        $retVal =~ s/&para;|&#182;/¶/g;
        $retVal =~ s/&middot;|&#183;/·/g;
        $retVal =~ s/&cedil;|&#184;/¸/g;
        $retVal =~ s/&sup1;|&#185;/¹/g;
        $retVal =~ s/&ordm;|&#186;/º/g;
        $retVal =~ s/&raquo;|&#187;/»/g;
        $retVal =~ s/&frac14;|&#188;/¼/g;
        $retVal =~ s/&frac12;|&#189;/½/g;
        $retVal =~ s/&frac34;|&#190;/¾/g;
        $retVal =~ s/&iquest;|&#191;/¿/g;
        $retVal =~ s/&Agrave;|&#192;/À/g;
        $retVal =~ s/&Aacute;|&#193;/Á/g;
        $retVal =~ s/&Acirc;|&#194;/Â/g;
        $retVal =~ s/&Atilde;|&#195;/Ã/g;
        $retVal =~ s/&Auml;|&#196;/Ä/g;
        $retVal =~ s/&Aring;|&#197;/Å/g;
        $retVal =~ s/&AElig;|&#198;/Æ/g;
        $retVal =~ s/&Ccedil;|&#199;/Ç/g;
        $retVal =~ s/&Egrave;|&#200;/È/g;
        $retVal =~ s/&Eacute;|&#201;/É/g;
        $retVal =~ s/&Ecirc;|&#202;/Ê/g;
        $retVal =~ s/&Euml;|&#203;/Ë/g;
        $retVal =~ s/&Igrave;|&#204;/Ì/g;
        $retVal =~ s/&Iacute;|&#205;/Í/g;
        $retVal =~ s/&Icirc;|&#206;/Î/g;
        $retVal =~ s/&Iuml;|&#207;/Ï/g;
        $retVal =~ s/&ETH;|&#208;/Ð/g;
        $retVal =~ s/&Ntilde;|&#209;/Ñ/g;
        $retVal =~ s/&Ograve;|&#210;/Ò/g;
        $retVal =~ s/&Oacute;|&#211;/Ó/g;
        $retVal =~ s/&Ocirc;|&#212;/Ô/g;
        $retVal =~ s/&Otilde;|&#213;/Õ/g;
        $retVal =~ s/&Ouml;|&#214;/Ö/g;
        $retVal =~ s/&times;|&#215;/×/g;
        $retVal =~ s/&Oslash;|&#216;/Ø/g;
        $retVal =~ s/&Ugrave;|&#217;/Ù/g;
        $retVal =~ s/&Uacute;|&#218;/Ú/g;
        $retVal =~ s/&Ucirc;|&#219;/Û/g;
        $retVal =~ s/&Uuml;|&#220;/Ü/g;
        $retVal =~ s/&Yacute;|&#221;/Ý/g;
        $retVal =~ s/&THORN;|&#222;/Þ/g;
        $retVal =~ s/&szlig;|&#223;/ß/g;
        $retVal =~ s/&agrave;|&#224;/à/g;
        $retVal =~ s/&aacute;|&#225;/á/g;
        $retVal =~ s/&acirc;|&#226;/â/g;
        $retVal =~ s/&atilde;|&#227;/ã/g;
        $retVal =~ s/&auml;|&#228;/ä/g;
        $retVal =~ s/&aring;|&#229;/å/g;
        $retVal =~ s/&aelig;|&#230;/æ/g;
        $retVal =~ s/&ccedil;|&#231;/ç/g;
        $retVal =~ s/&egrave;|&#232;/è/g;
        $retVal =~ s/&eacute;|&#233;/é/g;
        $retVal =~ s/&ecirc;|&#234;/ê/g;
        $retVal =~ s/&euml;|&#235;/ë/g;
        $retVal =~ s/&igrave;|&#236;/ì/g;
        $retVal =~ s/&iacute;|&#237;/í/g;
        $retVal =~ s/&icirc;|&#238;/î/g;
        $retVal =~ s/&iuml;|&#239;/ï/g;
        $retVal =~ s/&eth;|&#240;/ð/g;
        $retVal =~ s/&ntilde;|&#241;/ñ/g;
        $retVal =~ s/&ograve;|&#242;/ò/g;
        $retVal =~ s/&oacute;|&#243;/ó/g;
        $retVal =~ s/&ocirc;|&#244;/ô/g;
        $retVal =~ s/&otilde;|&#245;/õ/g;
        $retVal =~ s/&ouml;|&#246;/ö/g;
        $retVal =~ s/&divide;|&#247;/÷/g;
        $retVal =~ s/&oslash;|&#248;/ø/g;
        $retVal =~ s/&ugrave;|&#249;/ù/g;
        $retVal =~ s/&uacute;|&#250;/ú/g;
        $retVal =~ s/&ucirc;|&#251;/û/g;
        $retVal =~ s/&uuml;|&#252;/ü/g;
        $retVal =~ s/&yacute;|&#253;/ý/g;
        $retVal =~ s/&thorn;|&#254;/þ/g;
        $retVal =~ s/&yuml;|&#255;/ÿ/g;

        return $retVal;
    } # end htmlspecialchars_decode()
}

################################
# Close POD

=back

=head1 Copryright

Copyright 2008-2010 by Carl Beckhorn, Sean Colombo.

Released under GNU Public License (GPL) 2.0.

TODO:
    - It appears that this framework does not follow 301-redirects yet.  Add that.

=cut


########################################################
## Return success upon loading class
1;