Skip to content

Instantly share code, notes, and snippets.

@NewMexicoKid
Created December 6, 2016 02:41
Show Gist options
  • Save NewMexicoKid/789a3ccce6803c5a76fc39ad25ff733d to your computer and use it in GitHub Desktop.
Save NewMexicoKid/789a3ccce6803c5a76fc39ad25ff733d to your computer and use it in GitHub Desktop.
Quick perl script to retrieve word count data for a list of NaNoWriMo uids
#!/usr/bin/perl -w
#
# Create a simple wordcount history graph using the wcapi
# T. Yao, 11/08/2005
#
## HISTORY
# If the xml file is older than one hour, refresh it. Otherwise, don't do anything.
# 24 October 2012 - tyao - Updated to use the new alphabetical uids
# 2013-10-27 - tyao - updated to 2013
# 2015-10-23 - tyao - updated to 2015
#
#OLD use LWP::Simple; ## Note: LWP::Simple no longer works on the nano site; you have to use WWW:Mechanize
#
use MIME::Base64;
use WWW::Mechanize;
use FileHandle;
use Date::Calc qw(Delta_YMDHMS Today_and_Now);
my $mech;
$mech = WWW::Mechanize->new( autocheck => 0, stack_depth => 0 );
$mech->get( "http://nanowrimo.org/sign_in" );
#----------------------------------------------------------------------
# Set the password, username (mechname) and the file with uids
#----------------------------------------------------------------------
my $password = 'Fill this in with your NaNo Password';
my $mechname = 'Fill this in with your NaNo Username';
my $file = 'This is your filename that has the uids, one per line';
#----------------------------------------------------------------------
$mech->submit_form(
form_number => 1,
fields => { 'user_session[password]' => $password,
'user_session[name]' => $mechname,
'commit' => 'Login' },
);
warn "Warning: difficulty logging in\n" unless ($mech->success);
my $uid;
my $XML;
my $XMLDIR = '.'; ## Wherever you want to put the retrieved XML files
print "Reading $file\n";
my $fh = new FileHandle "$file","r";
my @uidlist = ();
if (defined $fh) {
while (<$fh>) {
chomp;
push @uidlist,$_;
}
undef $fh;
}
my ($year,$month,$day, $hour,$min,$sec) = Today_and_Now();
foreach $uid (@uidlist) {
my $outputfile = "${uid}_wchistory.xml";
if (-f "$XMLDIR/$outputfile") {
my $mtime = (stat("$XMLDIR/$outputfile"))[9];
# my ($year2,$month2,$day2, $hour2,$min2,$sec2)
my ($sec2,$min2,$hour2,$mday2,$mon2,$year2,$wday2,$yday2,$isdst2)= localtime($mtime);
my $month2 = $mon2 + 1;
$year2 = 1900 + $year2;
my $day2 = $mday2;
my ($y,$m,$d,$h,$mn,$s) = Delta_YMDHMS($year2,$month2,$day2,$hour2,$min2,$sec2,$year,$month,$day, $hour,$min,$sec);
# print "DEBUG: y=$y m=$m d=$d h=$h mn=$mn s=$s\n";
# print "DEBUG: year=$year month=$month day=$day hour=$hour min=$min sec=$sec\n";
# print "DEBUG: year=$year2 month=$month2 day=$day2 hour=$hour2 min=$min2 sec=$sec2\n";
my $hours = $s/3600 + $mn/60 + $h + $d*24 + $m*365/12*24 + $y*365*24;
if ($hours > 1 or $hours < 0) {
print "Updating uid=$uid (hours=$hours)\n";
$mech->get("http://nanowrimo.org/wordcount_api/wchistory/$uid");
warn "Can't get wchistory for $uid: ", $mech->response->status_line unless $mech->success;
my $XML = $mech->content();
writefile($XML,$uid,$outputfile) if ($XML);
} else {
print "Skipping update for uid=$uid (hours=$hours)\n";
}
} else {
print "Creating uid=$uid\n";
$mech->get("http://nanowrimo.org/wordcount_api/wchistory/$uid");
warn "Can't get wchistory for $uid: ", $mech->response->status_line unless $mech->success;
my $XML = $mech->content();
writefile($XML,$uid,$outputfile) if ($XML);
}
}
### Site stats
my ($year,$month,$day, $hour,$min,$sec) = Today_and_Now();
my $outputfile = "wcstats_wchistory.xml";
if (-f "$XMLDIR/$outputfile") {
my $mtime = (stat("$XMLDIR/$outputfile"))[9];
# my ($year2,$month2,$day2, $hour2,$min2,$sec2)
my ($sec2,$min2,$hour2,$mday2,$mon2,$year2,$wday2,$yday2,$isdst2)= localtime($mtime);
my $month2 = $mon2 + 1;
$year2 = 1900 + $year2;
my $day2 = $mday2;
my ($y,$m,$d,$h,$mn,$s) = Delta_YMDHMS($year2,$month2,$day2,$hour2,$min2,$sec2,$year,$month,$day, $hour,$min,$sec);
my $hours = $s/3600 + $mn/60 + $h + $d*24 + $m*365/12*24 + $y*365*24;
if ($hours > 1 or $hours < 0) {
print "Updating wcstats\n";
$mech->get("http://nanowrimo.org/wordcount_api/wcstats");
die "Can't get wchistory for site: ", $mech->response->status_line unless $mech->success;
my $XML = $mech->content();
writestatsfile($XML,$outputfile);
} else {
print "Skipping update for wcstats (hours=$hours)\n";
}
} else {
print "Creating wcstats\n";
$mech->get("http://nanowrimo.org/wordcount_api/wcstats");
die "Can't get wchistory for site: ", $mech->response->status_line unless $mech->success;
my $XML = $mech->content();
writestatsfile($XML,$outputfile);
}
exit 0;
sub writestatsfile {
my $XML = shift;
my $outputfile = shift;
my $fh = new FileHandle "$XMLDIR/$outputfile","w";
if (defined $fh) {
print $fh $XML;
undef $fh;
} else {
die "Unable to write to $XMLDIR/$outputfile\n";
}
#### Extract data
my %data = ();
my $min_total = 0;
my $max_total = 0;
my $wcavg_total = 0;
my $wcstd_total = 0;
my $gh = new FileHandle "$XMLDIR/wcstats.dat","w";
if (defined $gh) {
while ($XML =~ m{<wcentry>(.*?)</wcentry>}sg) {
my $STUFF = $1;
my ($wcdate,$min,$max,$wcavg,$wcstd) = extract_wcstats($STUFF);
$min_total += $min;
$max_total += $max;
$wcavg_total += $wcavg;
$wcstd_total += $wcstd;
print $gh qq{$wcdate\t$wcavg_total\t$min_total\t$max_total\t$wcstd_total\n};
}
undef $gh;
} else {
die "Unable to create ${XMLDIR}/wcstats.dat\n";
}
}
sub writefile {
my $XML = shift;
my $new_uid = shift;
my $outputfile = shift;
my $fh = new FileHandle "$XMLDIR/$outputfile","w";
if (defined $fh) {
print $fh $XML;
undef $fh;
} else {
die "Unable to write to $XMLDIR/$outputfile\n";
}
#### Extract data
my %data = ();
my $uid = extract_field($XML,'uid');
my $uname = extract_field($XML,'uname');
my $user_wordcount = extract_field($XML,'user_wordcount');
my $wctotal = 0;
while ($XML =~ m{<wcentry>(.*?)</wcentry>}sg) {
my $ENTRY = $1;
my ($wc,$wcdate) = extract_entry($ENTRY);
$wctotal += $wc;
$data{$wcdate} = $wctotal;
}
#### Create data file per uid processed
my $gh = new FileHandle "$XMLDIR/${new_uid}_data.dat","w";
if (defined $gh) {
print $gh qq{# name: $uname
# wordcount: $user_wordcount\n};
foreach my $date (sort keys %data) {
print $gh qq{$date\t$data{$date}\n};
#DEBUG print qq{DEBUG: $date\t$data{$date}\n};
}
undef $gh;
} else {
die "Unable to create ${XMLDIR}/${new_uid}_data.dat\n";
}
return;
}
sub extract_entry {
my $STUFF = shift;
my $wc;
my $wcdate;
if ($STUFF =~ m{<wc>(.*?)</wc>}s) {
$wc = $1;
} else {
$wc = '';
}
if ($STUFF =~ m{<wcdate>(.*?)</wcdate>}s) {
$wcdate = $1;
} else {
$wcdate = '';
}
return ($wc,$wcdate);
}
sub extract_wcstats {
my $STUFF = shift;
my $wcdate;
my $min;
my $max;
my $wcavg;
my $wcstd;
if ($STUFF =~ m{<wcdate>(.*?)</wcdate>}s) {
$wcdate = $1;
} else {
$wcdate = '';
}
if ($STUFF =~ m{<min>(.*?)</min>}s) {
$min = $1;
} else {
$min = '';
}
if ($STUFF =~ m{<max>(.*?)</max>}s) {
$max = $1;
} else {
$max = '';
}
if ($STUFF =~ m{<average>(.*?)</average>}s) {
$wcavg = $1;
} else {
$wcavg = '';
}
if ($STUFF =~ m{<stddev>(.*?)</stddev>}s) {
$wcstd = $1;
} else {
$wcstd = '';
}
return ($wcdate,$min,$max,$wcavg,$wcstd);
}
sub extract_field {
my $STUFF = shift;
my $field = shift;
my $value;
if ($STUFF =~ m{<$field>(.*?)</$field>}s) {
$value = $1;
} else {
$value = '';
}
return ($value);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment