Created
November 5, 2012 11:41
-
-
Save hissohathair/4016817 to your computer and use it in GitHub Desktop.
Perl code to convert RSS to Atom for importing into Blogger
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version='1.0' encoding='UTF-8'?> | |
<feed> | |
<!-- Easiest way to generate this file is to create a new blog in the template you want, | |
export a blank Atom dump for it, and add the string "INSERT HERE" between XML comments | |
just before the closing feed tag --> | |
<!-- Like this: --> | |
<!-- INSERT HERE --> | |
</feed> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# | |
# Search for the "[TODO]" strings to make your changes. | |
# | |
use warnings; | |
use strict; | |
###use criticism 'brutal'; | |
use XML::RSS; | |
use XML::Atom::Entry; | |
use LWP::Simple; | |
my $url = shift @ARGV | |
|| die "Need to pass a URL as first argument"; | |
my $content = ''; | |
if ( open( my $fh, '<', '.rss2atom-cache' ) ) { | |
while (<$fh>) { | |
$content .= $_; | |
} | |
close($fh); | |
} | |
else { | |
$content = get($url) or die "get: Cannot get $url\n"; | |
open( my $fh, '>', '.rss2atomcache' ) | |
|| warn "open: Cannot write cache ($!)\n"; | |
print $fh $content; | |
close($fh); | |
} | |
my $rss = new XML::RSS(); | |
$rss->parse($content); | |
my $feed = XML::Atom::Feed->new(); | |
my $blogId = "[TODO]"; # put your blog ID here | |
my $dt = DateTime->now(); | |
my $author = XML::Atom::Person->new; | |
$author->name('[TODO]'); # your name here | |
$author->email('[TODO]'); # your email address | |
my $num_posts = 0; | |
foreach my $item ( @{ $rss->{'items'} } ) { | |
my $id = make_id(); | |
my $entry = XML::Atom::Entry->new(); | |
$entry->id("\ntag:blogger.com,1999:blog-$blogId.post-$id"); | |
$entry->published( $dt->ymd('-') . "T" . $dt->hms . '.001-08:00' ); | |
$entry->updated( $entry->published ); | |
$entry->category( | |
{ | |
scheme => 'http://schemas.google.com/g/2005#kind', | |
term => 'http://schemas.google.com/blogger/2008/kind#post' | |
} | |
); | |
$entry->title( $item->{'title'}, { type => 'text' } ); | |
$entry->content( $item->{'description'}, { type => 'html' } ); | |
$entry->add_link( | |
make_link( | |
{ | |
rel => 'replies', | |
type => 'application/atom+xml', | |
href => "http://whatswhat-fashion-news.blogspot.com/feeds/$id/comments/default", | |
title => 'Post Comments', | |
} | |
) | |
); | |
$entry->add_link( | |
make_link( | |
{ | |
rel => 'replies', | |
type => 'text/html', | |
href => $item->{'link'} . "#comment-form", | |
title => '0 Comments' | |
} | |
) | |
); | |
$entry->add_link( | |
make_link( | |
{ | |
rel => 'edit', | |
type => 'application/atom+xml', | |
# [TODO] fix this URL | |
href => "http://whatswhat-fashion-news.blogspot.com/feeds/$blogId/posts/default/$id", | |
} | |
) | |
); | |
$entry->add_link( | |
make_link( | |
{ | |
rel => 'self', | |
type => 'application/atom+xml', | |
# [TODO] fix this URL | |
href => "http://whatswhat-fashion-news.blogspot.com/feeds/$blogId/posts/default/$id", | |
} | |
) | |
); | |
$entry->add_link( | |
make_link( | |
{ | |
href => $item->{'link'}, | |
rel => 'alternate', | |
type => 'text/html', | |
title => $item->{'title'}, | |
} | |
) | |
); | |
$entry->author($author); | |
$feed->add_entry($entry); | |
$dt->subtract( days => 7 ); | |
$num_posts++; | |
} | |
print STDERR "$num_posts posts\n"; | |
# This is the ugly part. A bunch of regex's to format the XML in the very | |
# perverse and unusual way Blogger seemed to need it. (Kids -- don't use | |
# regex to process XML!) | |
# | |
my $xml = $feed->as_xml; | |
$xml =~ s/^\s+//gm; | |
$xml =~ s/[\n\r]+//gs; | |
$xml =~ s/issued>/published>/gs; | |
$xml =~ s/modified>/updated>/gs; | |
$xml =~ s.</entry>.<thr:total>0</thr:total></entry>.gs; | |
$xml =~ s/^.*?<entry>/<entry>/; | |
$xml =~ s:</feed>::; | |
$xml =~ s: mode="xml"::; | |
$xml =~ s:":':g; | |
$xml =~ s:(<content[^>]+>)(.*?)(</content>):&fix_content_tag($1, $2, $3):ge; | |
open( my $fh, '<', "blogger-import-master.xml" ) | |
|| die "open: Cannot read master ($!)\n"; | |
my $master = ''; | |
while (<$fh>) { | |
$master .= $_; | |
} | |
close($fh); | |
$master =~ s/<!-- INSERT HERE -->/$xml/; | |
print $master; | |
sub fix_content_tag | |
{ | |
my ( $begin, $html, $end ) = @_; | |
$html =~ s/</</g; | |
$html =~ s/>/>/g; | |
return $begin . $html . $end; | |
} | |
sub make_link | |
{ | |
my $hash = shift; | |
my $link = XML::Atom::Link->new(); | |
foreach my $k ( keys %{$hash} ) { | |
$link->$k( $hash->{$k} ); | |
} | |
return $link; | |
} | |
sub make_id | |
{ | |
# Classic ID: 818 439 446 451 821 498 8; | |
# 999 439 446 451 821 498 8 | |
my $id = ''; | |
for ( my $i = 0 ; $i < 6 ; $i++ ) { | |
$id .= 100 + int( rand(900) ); | |
} | |
$id .= 1 + int( rand(9) ); | |
return $id; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment