Skip to content

Instantly share code, notes, and snippets.

@hissohathair
Created November 5, 2012 11:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hissohathair/4016817 to your computer and use it in GitHub Desktop.
Save hissohathair/4016817 to your computer and use it in GitHub Desktop.
Perl code to convert RSS to Atom for importing into Blogger
<?xml version='1.0' encoding='UTF-8'?>
<feed>
<!-- Easiest way to generate this file is to create a new blog in the template you want,
export a blank Atom dump for it, and add the string "INSERT HERE" between XML comments
just before the closing feed tag -->
<!-- Like this: -->
<!-- INSERT HERE -->
</feed>
#!/usr/bin/perl -w
#
# Search for the "[TODO]" strings to make your changes.
#
use warnings;
use strict;
###use criticism 'brutal';
use XML::RSS;
use XML::Atom::Entry;
use LWP::Simple;
my $url = shift @ARGV
|| die "Need to pass a URL as first argument";
my $content = '';
if ( open( my $fh, '<', '.rss2atom-cache' ) ) {
while (<$fh>) {
$content .= $_;
}
close($fh);
}
else {
$content = get($url) or die "get: Cannot get $url\n";
open( my $fh, '>', '.rss2atomcache' )
|| warn "open: Cannot write cache ($!)\n";
print $fh $content;
close($fh);
}
my $rss = new XML::RSS();
$rss->parse($content);
my $feed = XML::Atom::Feed->new();
my $blogId = "[TODO]"; # put your blog ID here
my $dt = DateTime->now();
my $author = XML::Atom::Person->new;
$author->name('[TODO]'); # your name here
$author->email('[TODO]'); # your email address
my $num_posts = 0;
foreach my $item ( @{ $rss->{'items'} } ) {
my $id = make_id();
my $entry = XML::Atom::Entry->new();
$entry->id("\ntag:blogger.com,1999:blog-$blogId.post-$id");
$entry->published( $dt->ymd('-') . "T" . $dt->hms . '.001-08:00' );
$entry->updated( $entry->published );
$entry->category(
{
scheme => 'http://schemas.google.com/g/2005#kind',
term => 'http://schemas.google.com/blogger/2008/kind#post'
}
);
$entry->title( $item->{'title'}, { type => 'text' } );
$entry->content( $item->{'description'}, { type => 'html' } );
$entry->add_link(
make_link(
{
rel => 'replies',
type => 'application/atom+xml',
href => "http://whatswhat-fashion-news.blogspot.com/feeds/$id/comments/default",
title => 'Post Comments',
}
)
);
$entry->add_link(
make_link(
{
rel => 'replies',
type => 'text/html',
href => $item->{'link'} . "#comment-form",
title => '0 Comments'
}
)
);
$entry->add_link(
make_link(
{
rel => 'edit',
type => 'application/atom+xml',
# [TODO] fix this URL
href => "http://whatswhat-fashion-news.blogspot.com/feeds/$blogId/posts/default/$id",
}
)
);
$entry->add_link(
make_link(
{
rel => 'self',
type => 'application/atom+xml',
# [TODO] fix this URL
href => "http://whatswhat-fashion-news.blogspot.com/feeds/$blogId/posts/default/$id",
}
)
);
$entry->add_link(
make_link(
{
href => $item->{'link'},
rel => 'alternate',
type => 'text/html',
title => $item->{'title'},
}
)
);
$entry->author($author);
$feed->add_entry($entry);
$dt->subtract( days => 7 );
$num_posts++;
}
print STDERR "$num_posts posts\n";
# This is the ugly part. A bunch of regex's to format the XML in the very
# perverse and unusual way Blogger seemed to need it. (Kids -- don't use
# regex to process XML!)
#
my $xml = $feed->as_xml;
$xml =~ s/^\s+//gm;
$xml =~ s/[\n\r]+//gs;
$xml =~ s/issued>/published>/gs;
$xml =~ s/modified>/updated>/gs;
$xml =~ s.</entry>.<thr:total>0</thr:total></entry>.gs;
$xml =~ s/^.*?<entry>/<entry>/;
$xml =~ s:</feed>::;
$xml =~ s: mode="xml"::;
$xml =~ s:":':g;
$xml =~ s:(<content[^>]+>)(.*?)(</content>):&fix_content_tag($1, $2, $3):ge;
open( my $fh, '<', "blogger-import-master.xml" )
|| die "open: Cannot read master ($!)\n";
my $master = '';
while (<$fh>) {
$master .= $_;
}
close($fh);
$master =~ s/<!-- INSERT HERE -->/$xml/;
print $master;
sub fix_content_tag
{
my ( $begin, $html, $end ) = @_;
$html =~ s/</&lt;/g;
$html =~ s/>/&gt;/g;
return $begin . $html . $end;
}
sub make_link
{
my $hash = shift;
my $link = XML::Atom::Link->new();
foreach my $k ( keys %{$hash} ) {
$link->$k( $hash->{$k} );
}
return $link;
}
sub make_id
{
# Classic ID: 818 439 446 451 821 498 8;
# 999 439 446 451 821 498 8
my $id = '';
for ( my $i = 0 ; $i < 6 ; $i++ ) {
$id .= 100 + int( rand(900) );
}
$id .= 1 + int( rand(9) );
return $id;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment