Created
November 29, 2009 22:44
-
-
Save ap/245103 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# original by towo <towo@ydal.de> under CC-BY | |
use strict; | |
use File::Basename qw( basename ); | |
use File::Spec::Functions qw( catfile ); | |
use Scalar::Utils qw( openhandle ); | |
my %monthnum = qw( | |
Jan 01 Feb 02 Mar 03 | |
Apr 04 May 05 Jun 06 | |
Jul 07 Aug 08 Sep 09 | |
Oct 10 Nov 11 Dec 12 | |
); | |
foreach my $file ( @ARGV ) { | |
open my $fh, '<', $file or do { | |
warn "Unable to open $file for reading: $!\n"; | |
next; | |
}; | |
( my $header = <$fh> ) =~ s!<.*?>!!g; | |
if ( $header !~ m{^(<HTML><HEAD><TITLE>)?IM Sessions with .*?(</TITLE></HEAD><BODY BGCOLOR=".*?">)?$}i ) { | |
warn "$file does not seem to be a Gaim conversation.\n"; | |
next; | |
} | |
my $target = basename $file, '.log'; | |
unless ( -d $target or mkdir $target ) { | |
warn "Could not create directory $target: $!\n"; | |
next; | |
} | |
my $outfh; | |
while ( <$fh> ) { | |
s!</?(FONT|B|I|ALIGN|HTML|HEAD|TITLE|HR|BR|BODY|H3).*?>!!ig; | |
if ( m/^ ?---- New Conversation @ (.*) ----$/ ) { | |
my $session = $1; | |
close $outfh if openhandle $outfh; | |
my ( $mname, $day, $hr, $min, $sec, $year ) | |
= ( $session =~ m/\w{3} (\w{3}) ([0-9 ]{2}) (\d{2}):(\d{2}):(\d{2}) (\d{4})/; | |
if ( not defined $year ) { | |
warn "Could not recognize session identifier: «$session»\n"; | |
last; | |
} | |
my $date = sprintf '%s-%s-%02d.%s%s%s', $year, $monthnum{$mname}, $day, $hr, $min, $sec; | |
my $outfile = catfile $target, $date . '.txt'; | |
open $outfh, '>', $outfile or do { | |
warn "Could not write to $outfile: $!\n"; | |
last; | |
}; | |
} | |
# proper files start with a session header | |
# so we should have an outfile open by now | |
if ( not openhandle $outfh ) { | |
warn "$file starts with garbage instead of a session identifier\n"; | |
last; | |
} | |
print { $outfh } $_; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment