Skip to content

Instantly share code, notes, and snippets.

Created August 21, 2013 12:36
Show Gist options
  • Save xtetsuji/6293910 to your computer and use it in GitHub Desktop.
Save xtetsuji/6293910 to your computer and use it in GitHub Desktop.
From posterous (gone one of blog service) exported article XMLs to WordPress WXR format.
#!/usr/bin/env perl
# *.txt > wxr.xml
use strict;
use warnings;
use utf8;
use Data::Dumper;
use Data::Section::Simple 'get_data_section';
use Date::Format 'time2str';
use Date::Parse 'str2time';
use Text::Xslate;
use constant DEBUG => $ENV{DEBUG};
binmode STDOUT, ':utf8';
my @files = @ARGV; # 事前に作成した txt ファイルを期待
if (!@files) {
die "$0 files.txt...";
my $wxr_tmpl_content = get_data_section('wxr.xml');
my $tx = Text::Xslate->new( type => 'text' );
my %vars;
$vars{items} = [];
for my $file (@files) {
my %data = read_data($file);
my %item;
for my $key (qw/Title Publish-Date Link Tags/) { # Tags は使わない?
#warn "$key\: $data{$key}\n" if DEBUG;
$item{title} = $data{Title};
$item{content} = $data{Content};
my $pub_date = $data{'Publish-Date'}; # Publish-Date: Wed Feb 1 09:05:00 2012
$item{post_date} = time2str('%Y-%m-%d %H:%M:%S', str2time($pub_date));
$item{post_date_gmt} = time2str('%Y-%m-%d %H:%M:%S', str2time($pub_date) - 3600*9);
my $post_name = $data{Link}; # Link:
$post_name =~ s{.*/}{};
$item{post_name} = $post_name;
push @{$vars{items}}, \%item;
print $tx->render_string($wxr_tmpl_content, \%vars);
# if ( DEBUG ) {
# for my $item (@{$vars{items}}) {
# print $item->{title} . "\n";
# }
# }
sub read_data {
my $file = shift;
open my $fh, '<:utf8', $file
or die;
my %data;
while (<$fh>) {
last if /^$/;
my ($key, $value) = split /:\s*/, $_, 2;
$data{$key} = $value;
# Title: NMLノート 2012年1月版
# Publish-Date: Wed Feb 1 09:05:00 2012
# Link:
# Tags: classical
$data{Content} = do { local $/; <$fh>; };
close $fh;
return wantarray ? %data : \%data;
@@ wxr.xml
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
<!-- [wp:wxr_version] WXRのバージョン -->
<!-- [item] 投稿等(繰り返し) -->
: for $items -> $item {
<title><: $item.title :></title><!-- <: $item.post_name :> -->
<content:encoded><![CDATA[<: $item.content :>]]></content:encoded>
<wp:post_date><: $item.post_date :></wp:post_date>
<wp:post_date_gmt><: $item.post_date_gmt :></wp:post_date_gmt>
<wp:post_name><: $item.post_name :></wp:post_name>
<!-- [wp:postmeta] 投稿等のカスタムフィールド(繰り返し) -->
<!-- [/wp:postmeta] -->
: }
<!-- [/item] -->
#!/usr/bin/env perl
# ./ 12345678.xml
# for f in *.xml ; do
# ./ $f > ${f%.xml}.txt
# done
use strict;
use warnings;
use utf8;
use Date::Parse 'str2time';
use Date::Format 'time2str';
binmode STDOUT, ':utf8';
my $file = shift;
### read
open my $fh, '<:utf8:crlf', $file;
my $content = do { local $/; <$fh>; };
close $fh;
my ($title) = $content =~ m{<title>(.*?)</title>}msx;
my ($pub_date) = $content =~ m{<pubDate>(.*?)</pubDate>}msx;
$pub_date = scalar localtime str2time($pub_date); # ロケールをJSTに
my ($link) = $content =~ m{<link>(.*?)</link>}msx;
my ($content_encoded) = $content =~ m{<content:encoded><!\[CDATA\[(.*?)\]\]>}msx;
$content_encoded =~ s/&rarr;/→/g;
$content_encoded =~ s/&larr;/←/g;
$content_encoded =~ s/&hellip/…/g;
my @tags = $content =~ m{<category \s+ domain="tag" \s+ nicename="(.*?)"}gmsx;
### output
print <<END_OUTPUT;
Title: $title
Publish-Date: $pub_date
Link: $link
Tags: @{[ join ", ", @tags ]}
Title: 月末やうるう年を判定して実行するかどうかcronで条件分岐
Publish-Date: Wed Feb 29 23:20:45 2012
Tags: bash, linux
<p>うるう年にブログを更新するのがアツいという世間の流行に乗せられている おがた です。</p>
<p>そういう時に、月末やうるう年を判定して、それをステータスコードを返して "&amp;&amp;" や "||" で後続のコマンドを実行するかしないか判定するコマンド "" と "" というシェルスクリプトを作ってみました。</p>
<script src=""></script>
<script src=""></script>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment