Skip to content

Instantly share code, notes, and snippets.

@briandfoy
Created November 9, 2020 08:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save briandfoy/63eacd366f6157eca5d122c55f134e61 to your computer and use it in GitHub Desktop.
Save briandfoy/63eacd366f6157eca5d122c55f134e61 to your computer and use it in GitHub Desktop.
Turn a WordPress XML dump into a bunch of local files
#!perl
use v5.10;
use experimental qw(signatures);
use File::Path qw(make_path);
use File::Spec::Functions;
use Mojo::DOM;
use Mojo::URL;
use Mojo::Util qw(dumper);
use XML::LibXML;
my $xml = do {
local $/;
my $file = $ARGV[0];
open my $fh, '<:raw', $file or die "Could not open $file: $!";
<$fh>;
};
my $dom = XML::LibXML->load_xml( string => $xml );
my @items = map {
my $node = $_;
my %hash = map {
$plain = s/\A\w+://r;
$plain => '' . $node->findnodes( $_ );
} qw(
title link dc:creator wp:post_id wp:post_date_gmt
wp:post_name wp:status wp:post_type content:encoded );
my @categories = map {
my %hash;
$hash{type} = $_->getAttribute( 'domain' );
$hash{nicename} = $_->getAttribute( 'nicename' );
\%hash;
} $node->findnodes( 'category' );
$hash{categories} = \@categories;
\%hash;
} $dom->findnodes( '/rss/channel/item' );
my $base = Mojo::URL->new( $items[0]{link} )->host;
my $local_dir = catfile( $base, '_posts' );
make_path $local_dir;
ITEM: foreach my $item ( @items ) {
next if $item->{status} eq 'draft';
my $url = Mojo::URL->new( $item->{link} );
my $path = $url->path;
my $file = $path =~ s|/|-|gr;
$file =~ s/\A-|-\z//g;
my $path = catfile( $local_dir, "$file.html" );
open my $fh, '>:encoding(UTF-8)', $path or do {
warn "Could not open <$index>: $!";
next ITEM;
};
my @categories = map { $_->{nicename} }
grep { $_->{type} eq 'category' }
$item->{categories}->@*;
my @tags = map { $_->{nicename} }
grep { $_->{type} eq 'post_tag' }
$item->{categories}->@*;
say $fh <<~"HERE";
<!--
title: $item->{title}
link: $item->{link}
author: $item->{creator}
post_id: $item->{post_id}
date: $item->{post_date_gmt}
post_name: $item->{post_name}
status: $item->{status}
type: $item->{post_type}
categories: @categories
tags: @tags
-->
HERE
say $fh $item->{encoded};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment