Created
August 21, 2013 12:36
-
-
Save xtetsuji/6293910 to your computer and use it in GitHub Desktop.
From posterous (gone one of blog service) exported article XMLs to WordPress WXR format.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# concat-wxr.pl *.txt > wxr.xml | |
use strict; | |
use warnings; | |
use utf8; | |
use Data::Dumper; | |
use Data::Section::Simple 'get_data_section'; | |
use Date::Format 'time2str'; | |
use Date::Parse 'str2time'; | |
use Text::Xslate; | |
use constant DEBUG => $ENV{DEBUG}; | |
binmode STDOUT, ':utf8'; | |
my @files = @ARGV; # 事前に作成した txt ファイルを期待 | |
if (!@files) { | |
die "$0 files.txt..."; | |
} | |
my $wxr_tmpl_content = get_data_section('wxr.xml'); | |
my $tx = Text::Xslate->new( type => 'text' ); | |
my %vars; | |
$vars{items} = []; | |
for my $file (@files) { | |
my %data = read_data($file); | |
my %item; | |
for my $key (qw/Title Publish-Date Link Tags/) { # Tags は使わない? | |
#warn "$key\: $data{$key}\n" if DEBUG; | |
$item{title} = $data{Title}; | |
$item{content} = $data{Content}; | |
my $pub_date = $data{'Publish-Date'}; # Publish-Date: Wed Feb 1 09:05:00 2012 | |
$item{post_date} = time2str('%Y-%m-%d %H:%M:%S', str2time($pub_date)); | |
$item{post_date_gmt} = time2str('%Y-%m-%d %H:%M:%S', str2time($pub_date) - 3600*9); | |
my $post_name = $data{Link}; # Link: http://post.tetsuji.jp/nml-20121 | |
$post_name =~ s{.*/}{}; | |
$item{post_name} = $post_name; | |
} | |
push @{$vars{items}}, \%item; | |
} | |
print $tx->render_string($wxr_tmpl_content, \%vars); | |
# if ( DEBUG ) { | |
# for my $item (@{$vars{items}}) { | |
# print $item->{title} . "\n"; | |
# } | |
# } | |
sub read_data { | |
my $file = shift; | |
open my $fh, '<:utf8', $file | |
or die; | |
my %data; | |
while (<$fh>) { | |
last if /^$/; | |
chomp; | |
my ($key, $value) = split /:\s*/, $_, 2; | |
$data{$key} = $value; | |
} | |
# Title: NMLノート 2012年1月版 | |
# Publish-Date: Wed Feb 1 09:05:00 2012 | |
# Link: http://post.tetsuji.jp/nml-20121 | |
# Tags: classical | |
$data{Content} = do { local $/; <$fh>; }; | |
close $fh; | |
return wantarray ? %data : \%data; | |
} | |
__DATA__ | |
@@ wxr.xml | |
<?xml version="1.0" encoding="UTF-8"?> | |
<rss version="2.0" | |
xmlns:excerpt="http://wordpress.org/export/1.0/excerpt/" | |
xmlns:content="http://purl.org/rss/1.0/modules/content/" | |
xmlns:wfw="http://wellformedweb.org/CommentAPI/" | |
xmlns:dc="http://purl.org/dc/elements/1.1/" | |
xmlns:wp="http://wordpress.org/export/1.0/"> | |
<channel> | |
<!-- [wp:wxr_version] WXRのバージョン --> | |
<wp:wxr_version>1.0</wp:wxr_version> | |
<!-- [item] 投稿等(繰り返し) --> | |
: for $items -> $item { | |
<item> | |
<title><: $item.title :></title><!-- <: $item.post_name :> --> | |
<dc:creator>xtetsuji</dc:creator> | |
<content:encoded><![CDATA[<: $item.content :>]]></content:encoded> | |
<excerpt:encoded><![CDATA[]]></excerpt:encoded> | |
<wp:post_date><: $item.post_date :></wp:post_date> | |
<wp:post_date_gmt><: $item.post_date_gmt :></wp:post_date_gmt> | |
<wp:comment_status>open</wp:comment_status> | |
<wp:ping_status>open</wp:ping_status> | |
<wp:post_name><: $item.post_name :></wp:post_name> | |
<wp:status>publish</wp:status> | |
<wp:post_type>post</wp:post_type> | |
<wp:post_password></wp:post_password> | |
<!-- [wp:postmeta] 投稿等のカスタムフィールド(繰り返し) --> | |
<!-- | |
<wp:postmeta> | |
<wp:meta_key>カスタムフィールドのキー</wp:meta_key> | |
<wp:meta_value>カスタムフィールドの値</wp:meta_value> | |
</wp:postmeta> | |
--> | |
<!-- [/wp:postmeta] --> | |
</item> | |
: } | |
<!-- [/item] --> | |
</channel> | |
</rss> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# | |
# ./posterousxml-extract.pl 12345678.xml | |
# | |
# for f in *.xml ; do | |
# ./posterousxml-extract.pl $f > ${f%.xml}.txt | |
# done | |
use strict; | |
use warnings; | |
use utf8; | |
use Date::Parse 'str2time'; | |
use Date::Format 'time2str'; | |
binmode STDOUT, ':utf8'; | |
my $file = shift; | |
### read | |
open my $fh, '<:utf8:crlf', $file; | |
my $content = do { local $/; <$fh>; }; | |
close $fh; | |
my ($title) = $content =~ m{<title>(.*?)</title>}msx; | |
my ($pub_date) = $content =~ m{<pubDate>(.*?)</pubDate>}msx; | |
$pub_date = scalar localtime str2time($pub_date); # ロケールをJSTに | |
my ($link) = $content =~ m{<link>(.*?)</link>}msx; | |
my ($content_encoded) = $content =~ m{<content:encoded><!\[CDATA\[(.*?)\]\]>}msx; | |
$content_encoded =~ s/→/→/g; | |
$content_encoded =~ s/←/←/g; | |
$content_encoded =~ s/&hellip/…/g; | |
my @tags = $content =~ m{<category \s+ domain="tag" \s+ nicename="(.*?)"}gmsx; | |
### output | |
print <<END_OUTPUT; | |
Title: $title | |
Publish-Date: $pub_date | |
Link: $link | |
Tags: @{[ join ", ", @tags ]} | |
$content_encoded | |
END_OUTPUT |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Title: 月末やうるう年を判定して実行するかどうかcronで条件分岐 | |
Publish-Date: Wed Feb 29 23:20:45 2012 | |
Link: http://post.tetsuji.jp/leap-year-cron | |
Tags: bash, linux | |
<p>うるう年にブログを更新するのがアツいという世間の流行に乗せられている おがた です。</p> | |
<p>特にネタは無いなぁと思いつつ、お仕事からうるう年っぽいネタを出してみます。</p> | |
<p>シェルスクリプトを「月末に実行する」「毎月29日に実行するけど、うるう年ではない2月の場合は3月1日に実行する」といった要請があって、cronの日付指定だと難しいケースがあります。</p> | |
<p>そういう時に、月末やうるう年を判定して、それをステータスコードを返して "&&" や "||" で後続のコマンドを実行するかしないか判定するコマンド "end-of-month.sh" と "leap-year.sh" というシェルスクリプトを作ってみました。</p> | |
<p> </p> | |
<script src="https://gist.github.com/1941043.js?file=end-of-month.sh"></script> | |
<p> </p> | |
<script src="https://gist.github.com/1941043.js?file=leap-year.sh"></script> | |
<p>使い方はソースコードに書いてある通りです。</p> | |
<p>2月29日中にブログを更新したくて、中野の地下の飲み屋でビールを飲んでおでんを食べながらのブログ更新です。本当なら今日は健康診断だったので、バリウムの都合でアルコールは飲まないようにと言われているのですが。まぁ一杯だけ…;。お店の良い雰囲気の中、3月を迎えることになりそうです。でも健康的になるべく早く帰って就寝する予定です。</p> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment