Created
March 1, 2012 12:45
-
-
Save zengargoyle/1949635 to your computer and use it in GitHub Desktop.
http://metatalk.metafilter.com/21489 - comment dump to bookmarks file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use 5.010; | |
use strict; | |
use warnings; | |
use utf8::all; | |
our $VERSION = '0.01'; | |
use Getopt::Long::Descriptive; | |
use Pod::Usage; | |
my ($opt, $usage) = describe_options( | |
'%c %o', | |
[ 'in|i=s' => 'input file (default: my-mefi-comments.txt)', | |
{ default => 'my-mefi-comments.txt' } ], | |
[ 'out|o=s' => 'output file (default: bookmarks.html)', | |
{ default => 'bookmarks.html' } ], | |
[], | |
[ 'stdout' => 'output to STDOUT only' ], | |
[ 'fancy|f' => 'make bookmarks more fancy' ], | |
[], | |
[ 'help|h|?' => 'print usage message and exit' ], | |
[ 'manual|man' => 'print manua page and exit' ], | |
); | |
if ($opt->{help} ) { print $usage->text; exit } | |
if ($opt->{manual}) { pod2usage({ verbose => 2, exit => 0 }) } | |
# | |
# here for demo purposes. | |
# t title, u uri, b blurb, s strftime, n time, c sub-site | |
# Emitter variables be overridden to customize output: | |
# {page,thread,comment}_{open,close} | |
# | |
if ($opt->{fancy}) { | |
$Emitter::thread_open = <<'_EOT_'; | |
<DT><H3 FOLDED ADD_DATE="%n">%c: %t</H3> | |
<DL><p> | |
_EOT_ | |
$Emitter::comment_open = <<'_EOT_'; | |
<DT><A HREF="%u" ADD_DATE="%n" LAST_VISIT="%n" LAST_MODIFIED="%n">(%{%Y-%m-%d %T}m) %b</A> | |
_EOT_ | |
} | |
my $page = bookmarks_from_file($opt->{in}); | |
if ($opt->{fancy}) { | |
$page->title('My Fancy Bookmarks'); | |
} | |
if ( $opt->{stdout} ) { | |
print Emitter->emit_as_text($page); | |
} | |
else { | |
Emitter->emit_to_file($page, $opt->{out}); | |
} | |
exit; | |
####################################################################### | |
# | |
# create our Page{ Thread{ Comment+ }+ } structure. | |
# | |
sub bookmarks_from_file { | |
my $filename = shift; | |
open my $fh, '< :utf8 :crlf', $filename | |
or die "Can't open '$filename' for reading: $!\n"; | |
return bookmarks_from_fh($fh); | |
} | |
sub bookmarks_from_fh { | |
my $fh = shift; | |
# TODO pass in a class composed of a base Page class | |
# with additional roles composited in. | |
my $Class = shift || 'Page'; | |
local $/ = "\n-----\n"; | |
my %thread_for_comment; | |
my $page = $Class->new; | |
while (my $rec = <$fh>) { | |
chomp $rec; | |
my ($date, $url, $text) = split /\n/, $rec, 3; | |
# take a comment post and add it to the thread | |
# which itself is added to the page (of threads of comments). | |
my $comment = Comment->new( | |
timestamp => $date, | |
uri => $url, | |
text => $text, | |
); | |
my $title = $comment->title; | |
my $thread = $thread_for_comment{ $title }; | |
unless ( $thread ) { | |
# clone the post url and remove the #anchor (fragment). | |
my $thread_uri = $comment->uri->clone; | |
$thread_uri->fragment(undef); | |
$thread = Thread->new( | |
uri => $thread_uri, | |
); | |
# add thread to the page and keep track of it for later. | |
$page->add_entry($thread); | |
$thread_for_comment{ $title } = $thread; | |
} | |
$thread->add_entry($comment); | |
} | |
return $page; | |
} | |
BEGIN { | |
# | |
# classes and roles. Page{ Thread{ Comment+ }+ } | |
# | |
package Entries::Role { | |
use Moo::Role; | |
has entries => ( | |
is => 'ro', | |
default => sub { [] }, | |
); | |
sub add_entry { | |
my $self = shift; | |
push @{ $self->entries }, @_; | |
} | |
} | |
package Page { | |
use Moo; | |
with 'Entries::Role'; | |
has title => ( | |
is => 'rw', | |
default => sub { 'Bookmarks' }, | |
); | |
} | |
package URI::Role { | |
use Moo::Role; | |
use URI; | |
has uri => ( | |
is => 'ro', | |
isa => sub { | |
die "$_[0] is not a URI!" | |
unless $_[0]->isa('URI'); | |
}, | |
coerce => sub { | |
URI->new($_[0]); | |
}, | |
required => 1, | |
); | |
has title => ( | |
is => 'ro', | |
lazy => 1, | |
builder => '_build_title', | |
); | |
sub _build_title { | |
my $self = shift; | |
my $t = $self->uri->path; # /blah/the-long-title | |
$t =~ s!.*/!!; # the-long-title | |
$t =~ s!-! !g; # the long title | |
return $t; | |
} | |
} | |
package Thread { | |
use Moo; | |
with 'URI::Role'; | |
with 'Entries::Role'; | |
} | |
package Comment { | |
use Moo; | |
use Time::Piece; | |
use HTML::Strip; | |
use Text::Wrap; | |
use utf8; | |
with 'URI::Role'; | |
has timestamp => ( | |
is => 'ro', | |
isa => sub { | |
die "$_[0] is not a Time::Piece!" | |
unless $_[0]->isa('Time::Piece'); | |
}, | |
coerce => sub { | |
(my $t = $_[0]) =~ s/\.\d+$//; # remove fractional seconds. | |
Time::Piece->strptime($t, '%Y-%m-%d %T'); | |
}, | |
required => 1, | |
handles => { | |
strftime => 'strftime', | |
}, | |
); | |
has text => ( | |
is => 'ro', | |
required => 1, | |
); | |
has blurb => ( | |
is => 'ro', | |
lazy => 1, | |
builder => '_build_blurb', | |
); | |
sub _build_blurb { | |
my $self = shift; | |
# strip HTML | |
my $p = HTML::Strip->new; | |
my $b = $p->parse( $self->text ); | |
$p->eof; | |
# HTML::Strip being lame. | |
utf8::decode($b); | |
# do some smart wrapping to ~72 columns. | |
# then take the first line as our blurb. | |
$b = wrap('', '', $b); | |
$b = (split /\n/, $b)[0]; | |
return $b; | |
} | |
} | |
package Emitter { | |
use utf8::all; | |
use String::Formatter method_stringf => { | |
-as => '_stringf', | |
codes => { | |
t => 'title', | |
b => 'blurb', | |
u => 'uri', | |
m => 'strftime', | |
n => sub { time }, | |
c => sub { | |
my $h = $_[0]->uri->host; | |
$h =~ s/\..*//; | |
__PACKAGE__->sitemap($h); | |
}, | |
}, | |
}; | |
my %sitemap = ( | |
www => 'MetaFilter', | |
ask => 'AskMeFi', | |
projects => 'Projects', | |
music => 'Music', | |
jobs => 'Jobs', | |
irl => 'IRL', | |
metatalk => 'MetaTalk', | |
); | |
sub sitemap { | |
my ($class, $host) = @_; | |
return $sitemap{$host}; | |
} | |
our $page_open = <<'_EOT_'; | |
<!DOCTYPE NETSCAPE-Bookmark-file-1> | |
<!--This is an automatically generated file. | |
It will be read and overwritten. | |
Do Not Edit! --> | |
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"> | |
<Title>%t</Title> | |
<H1>%t</H1> | |
<DL> | |
_EOT_ | |
our $page_close = <<'_EOT_'; | |
</DL> | |
_EOT_ | |
our $thread_open = <<'_EOT_'; | |
<DT><H3 FOLDED ADD_DATE="%n">%t</H3> | |
<DL><p> | |
_EOT_ | |
our $thread_close = <<'_EOT_'; | |
</DL><p> | |
_EOT_ | |
our $comment_open = <<'_EOT_'; | |
<DT><A HREF="%u" ADD_DATE="%n" LAST_VISIT="%n" LAST_MODIFIED="%n">%b</A> | |
_EOT_ | |
our $comment_close = ''; | |
sub emit_as_text { | |
my ($self, $page) = @_; | |
my $output = ""; | |
open my $fh, '>', \$output or die "Can't open a string!"; | |
$self->emit_to_fh($page, $fh); | |
# so, the logic goes: when using Perl::IO::scalar to do IO to a | |
# string, the string is treated as a File, and files are by nature | |
# a stream of *bytes*. so we need to turn that string of bytes | |
# into a true unicode thing. otherwise when we print it back out | |
# to a utf8 filehandle (stdout or a file) Perl will notice any | |
# non-ASCII (>127) characters and end up double-encoding them | |
# to utf8 again. the alternative would be to set the output | |
# filehandle to :std or :raw when outputting this string. | |
# sigh, unicode can be such a PITA. | |
utf8::decode($output); | |
return $output; | |
} | |
sub emit_to_file { | |
my ($self, $page, $filename) = @_; | |
open my $fh, '> :utf8', $filename | |
or die "Can't open '$filename' for writing $!"; | |
$self->emit_to_fh($page, $fh); | |
} | |
sub emit_to_fh { | |
my ($self, $page, $fh) = @_; | |
# open, (open, (open, close)+ close)+, close | |
print $fh _stringf $page_open, $page; | |
for my $t ( @{ $page->entries } ) { | |
print $fh _stringf $thread_open, $t; | |
for my $c ( @{ $t->entries } ) { | |
print $fh _stringf $comment_open, $c; | |
print $fh _stringf $comment_close, $c; | |
} | |
print $fh _stringf $thread_close, $t; | |
} | |
print $fh _stringf $page_close, $page; | |
} | |
} | |
} # BEGIN | |
__END__ | |
=head1 NAME | |
my-mefi-bookmarks - convert Metafilter comment dump to Netscape bookmarks file. | |
=head1 VERSION | |
0.01 | |
=head1 SYNOPSIS | |
my-mefi-bookmarks | |
my-mefi-bookmarks --fancy --out ~/public_html/mefi-bookmarks.html | |
=head1 OPTIONS | |
Some options can be abbreviated. | |
=over 4 | |
=item B<--in> I<filename> | |
Specify an alternate input file. Default: F<my-mefi-comments.txt> | |
=item B<--out> I<filename> | |
Specify an alternate output file. Default: F<bookmarks.html> | |
=item B<--stdout> | |
Output to STDOUT instead of a file. | |
=item B<--fancy> | |
Make the output a bit fancier. This is also a little demo of customization possibilities. See the source for ideas. | |
=item B<--help> | |
Show a brief help message and exit. | |
=item B<--manual> | |
Show this manual page and exit. | |
=back | |
=cut | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example fancy output: http://jklmnop.net/metafilter/21489/fancy.html