http://metatalk.metafilter.com/21489 - comment dump to bookmarks file
#!/usr/bin/env perl | |
use 5.010; | |
use strict; | |
use warnings; | |
use utf8::all; | |
our $VERSION = '0.01'; | |
use Getopt::Long::Descriptive; | |
use Pod::Usage; | |
my ($opt, $usage) = describe_options( | |
'%c %o', | |
[ 'in|i=s' => 'input file (default: my-mefi-comments.txt)', | |
{ default => 'my-mefi-comments.txt' } ], | |
[ 'out|o=s' => 'output file (default: bookmarks.html)', | |
{ default => 'bookmarks.html' } ], | |
[], | |
[ 'stdout' => 'output to STDOUT only' ], | |
[ 'fancy|f' => 'make bookmarks more fancy' ], | |
[], | |
[ 'help|h|?' => 'print usage message and exit' ], | |
[ 'manual|man' => 'print manua page and exit' ], | |
); | |
if ($opt->{help} ) { print $usage->text; exit } | |
if ($opt->{manual}) { pod2usage({ verbose => 2, exit => 0 }) } | |
# | |
# here for demo purposes. | |
# t title, u uri, b blurb, s strftime, n time, c sub-site | |
# Emitter variables be overridden to customize output: | |
# {page,thread,comment}_{open,close} | |
# | |
if ($opt->{fancy}) { | |
$Emitter::thread_open = <<'_EOT_'; | |
<DT><H3 FOLDED ADD_DATE="%n">%c: %t</H3> | |
<DL><p> | |
_EOT_ | |
$Emitter::comment_open = <<'_EOT_'; | |
<DT><A HREF="%u" ADD_DATE="%n" LAST_VISIT="%n" LAST_MODIFIED="%n">(%{%Y-%m-%d %T}m) %b</A> | |
_EOT_ | |
} | |
my $page = bookmarks_from_file($opt->{in}); | |
if ($opt->{fancy}) { | |
$page->title('My Fancy Bookmarks'); | |
} | |
if ( $opt->{stdout} ) { | |
print Emitter->emit_as_text($page); | |
} | |
else { | |
Emitter->emit_to_file($page, $opt->{out}); | |
} | |
exit; | |
####################################################################### | |
# | |
# create our Page{ Thread{ Comment+ }+ } structure. | |
# | |
sub bookmarks_from_file { | |
my $filename = shift; | |
open my $fh, '< :utf8 :crlf', $filename | |
or die "Can't open '$filename' for reading: $!\n"; | |
return bookmarks_from_fh($fh); | |
} | |
sub bookmarks_from_fh { | |
my $fh = shift; | |
# TODO pass in a class composed of a base Page class | |
# with additional roles composited in. | |
my $Class = shift || 'Page'; | |
local $/ = "\n-----\n"; | |
my %thread_for_comment; | |
my $page = $Class->new; | |
while (my $rec = <$fh>) { | |
chomp $rec; | |
my ($date, $url, $text) = split /\n/, $rec, 3; | |
# take a comment post and add it to the thread | |
# which itself is added to the page (of threads of comments). | |
my $comment = Comment->new( | |
timestamp => $date, | |
uri => $url, | |
text => $text, | |
); | |
my $title = $comment->title; | |
my $thread = $thread_for_comment{ $title }; | |
unless ( $thread ) { | |
# clone the post url and remove the #anchor (fragment). | |
my $thread_uri = $comment->uri->clone; | |
$thread_uri->fragment(undef); | |
$thread = Thread->new( | |
uri => $thread_uri, | |
); | |
# add thread to the page and keep track of it for later. | |
$page->add_entry($thread); | |
$thread_for_comment{ $title } = $thread; | |
} | |
$thread->add_entry($comment); | |
} | |
return $page; | |
} | |
BEGIN { | |
# | |
# classes and roles. Page{ Thread{ Comment+ }+ } | |
# | |
package Entries::Role { | |
use Moo::Role; | |
has entries => ( | |
is => 'ro', | |
default => sub { [] }, | |
); | |
sub add_entry { | |
my $self = shift; | |
push @{ $self->entries }, @_; | |
} | |
} | |
package Page { | |
use Moo; | |
with 'Entries::Role'; | |
has title => ( | |
is => 'rw', | |
default => sub { 'Bookmarks' }, | |
); | |
} | |
package URI::Role { | |
use Moo::Role; | |
use URI; | |
has uri => ( | |
is => 'ro', | |
isa => sub { | |
die "$_[0] is not a URI!" | |
unless $_[0]->isa('URI'); | |
}, | |
coerce => sub { | |
URI->new($_[0]); | |
}, | |
required => 1, | |
); | |
has title => ( | |
is => 'ro', | |
lazy => 1, | |
builder => '_build_title', | |
); | |
sub _build_title { | |
my $self = shift; | |
my $t = $self->uri->path; # /blah/the-long-title | |
$t =~ s!.*/!!; # the-long-title | |
$t =~ s!-! !g; # the long title | |
return $t; | |
} | |
} | |
package Thread { | |
use Moo; | |
with 'URI::Role'; | |
with 'Entries::Role'; | |
} | |
package Comment { | |
use Moo; | |
use Time::Piece; | |
use HTML::Strip; | |
use Text::Wrap; | |
use utf8; | |
with 'URI::Role'; | |
has timestamp => ( | |
is => 'ro', | |
isa => sub { | |
die "$_[0] is not a Time::Piece!" | |
unless $_[0]->isa('Time::Piece'); | |
}, | |
coerce => sub { | |
(my $t = $_[0]) =~ s/\.\d+$//; # remove fractional seconds. | |
Time::Piece->strptime($t, '%Y-%m-%d %T'); | |
}, | |
required => 1, | |
handles => { | |
strftime => 'strftime', | |
}, | |
); | |
has text => ( | |
is => 'ro', | |
required => 1, | |
); | |
has blurb => ( | |
is => 'ro', | |
lazy => 1, | |
builder => '_build_blurb', | |
); | |
sub _build_blurb { | |
my $self = shift; | |
# strip HTML | |
my $p = HTML::Strip->new; | |
my $b = $p->parse( $self->text ); | |
$p->eof; | |
# HTML::Strip being lame. | |
utf8::decode($b); | |
# do some smart wrapping to ~72 columns. | |
# then take the first line as our blurb. | |
$b = wrap('', '', $b); | |
$b = (split /\n/, $b)[0]; | |
return $b; | |
} | |
} | |
package Emitter { | |
use utf8::all; | |
use String::Formatter method_stringf => { | |
-as => '_stringf', | |
codes => { | |
t => 'title', | |
b => 'blurb', | |
u => 'uri', | |
m => 'strftime', | |
n => sub { time }, | |
c => sub { | |
my $h = $_[0]->uri->host; | |
$h =~ s/\..*//; | |
__PACKAGE__->sitemap($h); | |
}, | |
}, | |
}; | |
my %sitemap = ( | |
www => 'MetaFilter', | |
ask => 'AskMeFi', | |
projects => 'Projects', | |
music => 'Music', | |
jobs => 'Jobs', | |
irl => 'IRL', | |
metatalk => 'MetaTalk', | |
); | |
sub sitemap { | |
my ($class, $host) = @_; | |
return $sitemap{$host}; | |
} | |
our $page_open = <<'_EOT_'; | |
<!DOCTYPE NETSCAPE-Bookmark-file-1> | |
<!--This is an automatically generated file. | |
It will be read and overwritten. | |
Do Not Edit! --> | |
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> | |
<Title>%t</Title> | |
<H1>%t</H1> | |
<DL> | |
_EOT_ | |
our $page_close = <<'_EOT_'; | |
</DL> | |
_EOT_ | |
our $thread_open = <<'_EOT_'; | |
<DT><H3 FOLDED ADD_DATE="%n">%t</H3> | |
<DL><p> | |
_EOT_ | |
our $thread_close = <<'_EOT_'; | |
</DL><p> | |
_EOT_ | |
our $comment_open = <<'_EOT_'; | |
<DT><A HREF="%u" ADD_DATE="%n" LAST_VISIT="%n" LAST_MODIFIED="%n">%b</A> | |
_EOT_ | |
our $comment_close = ''; | |
sub emit_as_text { | |
my ($self, $page) = @_; | |
my $output = ""; | |
open my $fh, '>', \$output or die "Can't open a string!"; | |
$self->emit_to_fh($page, $fh); | |
# so, the logic goes: when using Perl::IO::scalar to do IO to a | |
# string, the string is treated as a File, and files are by nature | |
# a stream of *bytes*. so we need to turn that string of bytes | |
# into a true unicode thing. otherwise when we print it back out | |
# to a utf8 filehandle (stdout or a file) Perl will notice any | |
# non-ASCII (>127) characters and end up double-encoding them | |
# to utf8 again. the alternative would be to set the output | |
# filehandle to :std or :raw when outputting this string. | |
# sigh, unicode can be such a PITA. | |
utf8::decode($output); | |
return $output; | |
} | |
sub emit_to_file { | |
my ($self, $page, $filename) = @_; | |
open my $fh, '> :utf8', $filename | |
or die "Can't open '$filename' for writing $!"; | |
$self->emit_to_fh($page, $fh); | |
} | |
sub emit_to_fh { | |
my ($self, $page, $fh) = @_; | |
# open, (open, (open, close)+ close)+, close | |
print $fh _stringf $page_open, $page; | |
for my $t ( @{ $page->entries } ) { | |
print $fh _stringf $thread_open, $t; | |
for my $c ( @{ $t->entries } ) { | |
print $fh _stringf $comment_open, $c; | |
print $fh _stringf $comment_close, $c; | |
} | |
print $fh _stringf $thread_close, $t; | |
} | |
print $fh _stringf $page_close, $page; | |
} | |
} | |
} # BEGIN | |
__END__ | |
=head1 NAME | |
my-mefi-bookmarks - convert Metafilter comment dump to Netscape bookmarks file. | |
=head1 VERSION | |
0.01 | |
=head1 SYNOPSIS | |
my-mefi-bookmarks | |
my-mefi-bookmarks --fancy --out ~/public_html/mefi-bookmarks.html | |
=head1 OPTIONS | |
Some options can be abbreviated. | |
=over 4 | |
=item B<--in> I<filename> | |
Specify an alternate input file. Default: F<my-mefi-comments.txt> | |
=item B<--out> I<filename> | |
Specify an alternate output file. Default: F<bookmarks.html> | |
=item B<--stdout> | |
Output to STDOUT instead of a file. | |
=item B<--fancy> | |
Make the output a bit fancier. This is also a little demo of customization possibilities. See the source for ideas. | |
=item B<--help> | |
Show a brief help message and exit. | |
=item B<--manual> | |
Show this manual page and exit. | |
=back | |
=cut | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Example fancy output: http://jklmnop.net/metafilter/21489/fancy.html