Skip to content

Instantly share code, notes, and snippets.

@argrath
Created August 26, 2018 11:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save argrath/9a12eead968c4f683b1af612fb1f9c8e to your computer and use it in GitHub Desktop.
Save argrath/9a12eead968c4f683b1af612fb1f9c8e to your computer and use it in GitHub Desktop.
make frab.xml for builderscon tokyo 2018
use strict;
use warnings;
use JSON::PP qw(decode_json);
use XML::TreePP;
use LWP::UserAgent;
#use utf8;
use Data::Dumper;
sub guid {
my $id = shift;
return
}
my $ua = LWP::UserAgent->new();
$ua->default_header('Accept-Language' => "ja,ja-JP");
my $blank = { '#text' => undef };
my @dates = ('2018-09-06', '2018-09-07', '2018-09-08');
my $host = 'https://builderscon.io';
my $conference = {
acronym => 'builderscon2018',
title => 'Builderscon Tokyo 2018',
start => $dates[0],
end => $dates[$#dates],
days => $#dates + 1,
timeslot_duration => '00:05',
};
my %roomcode = (
'藤原洋記念ホール' => 'A 藤原洋記念ホール',
'多目的教室1' => 'B 多目的教室1',
'多目的教室2' => 'C 多目的教室2',
'多目的教室3' => 'D 多目的教室3',
'イベントホール' => 'E イベントホール',
'HUB' => 'HUB',
);
my @days;
for (@dates) {
my $date = $_;
my $js = $ua->get($host . '/tokyo/2018/timetable?date=' . $date);
my @srcentry = extracttable($js->content, $date);
my %room;
for(@srcentry){
my $e = g2frab($_);
my $room = $e->{room};
push @{$room{$room}}, $e;
}
my @day;
for(sort keys %room){
my %r;
$r{'-name'} = $_;
$r{event} = $room{$_};
push @day, \%r;
}
my $day = {
'-index' => 1,
'-date' => $date,
'-start' => $date . 'T10:00:00+09:00',
'-end' => $date . 'T22:00:00+09:00',
room => \@day,
};
push @days, $day;
}
my $xml = {
schedule => {
version => 'copy',
conference => $conference,
day => \@days,
}
};
my $tpp = XML::TreePP->new;
$tpp->set(first_out => ["version"]);
$tpp->writefile('builderscon2018.xml', $xml);
# 入力データから使う部分を切り出し
sub extracttable {
my ($html, $date) = @_;
my @list = split /\n/, $html;
my $f = 0;
my $col = -1;
my $row = 0;
my $span;
my @skip;
my $time;
my @ret;
for(@list){
s@\ch@@g; #glitch
if(/<tbody/){
$f = 1;
next;
}
if($f == 0){
next;
}
if(m@/tbody@){
last;
}
if(/<tr>/){
$col = 0;
$row++;
next;
}
if(m@</tr>@){
next;
}
for( ; ; ){
my $skip = $skip[$col];
if(defined $skip && $skip[$col] > $row){
$col++;
} else {
last;
}
}
if(/<td/){
m@<td ([^>]*)>(.*)</td>@;
my($prop, $content) = ($1, $2);
if($prop =~ /time-cell/){
$time = $content;
$col++;
next;
}
if($prop =~ /empty/){
$col++;
next;
}
if($prop =~ /session/){
$prop =~ /rowspan="(\d+)"/;
my $timelen = $1;
$skip[$col] = $row + $timelen;
$content =~ m@<a class="title" href="(.+)">(.+)</a>@;
my ($url, $title) = ($1, $2);
$url =~ m@session/(.+)@;
my $guid = $1;
my $d = getdetail($url);
my $data = {
id => $guid,
guid => $guid,
date => $date,
startat => $time,
talktime => $timelen * 5,
trackid => $col,
room => $roomcode{$d->{room}},
title => $title,
description => $d->{description},
author => $d->{author},
authorid => $d->{authorid},
language => $d->{language},
};
push @ret, $data;
$col++;
next;
}
} else {
warn 'Unexpected line:' . $_;
}
}
return @ret;
}
sub getdetail {
my $url = shift;
my $detail = $ua->get($host . $url);
# print Dumper($detail);
my $html = $detail->content;
my @list = split /\n/, $html;
my $f = 0;
my $desc = '';
my %ret;
for(@list){
s@\ch@@g; #glitch
if($f == 0 && m@<a href="(.+)" target="_blank">(.+)</a>@){
$ret{authorurl} = $1;
$ret{author} = $2;
my @part = split m@/@, $ret{authorurl};
$ret{authorid} = $part[2];
$f = 1;
next;
}
if($f == 0){
next;
}
if(m@<h5>概要</h5>@){
$f = 2;
next;
}
if($f == 2){
if(/center-align submit-btn/){
$desc =~ s@^ *@@g;
$ret{description} = $desc;
$f = 3;
next;
}
$desc .= $_;
next;
}
if(m@<td>部屋</td>@){
$f = 4;
next;
}
if($f == 4){
m@<td>(.+)</td>@;
$ret{room} = $1;
$f = 5;
next;
}
if(m@<td>講演に使用される言語</td>@){
$f = 6;
next;
}
if($f == 6){
my $lang = 'ja';
if(m@<td>英語</td>@){
$lang = 'en';
}
$ret{language} = $lang;
$f = 7;
last;
}
}
return \%ret;
}
# frab形式に変換
sub g2frab {
my %e;
my $s = shift;
$e{'-id'} = $s->{id};
$e{date} = sprintf '%sT%s:00+09:00', $s->{date}, $s->{startat};
$e{start} = $s->{startat};
{
$s->{talktime} =~ /(\d+)/;
$e{duration} = sprintf('00:%02d', $1);
}
$e{room} = $s->{room};
$e{slug} = 'slag_' . $s->{id};
$e{recording} = {
license => $blank,
optout => {
'#text' => 'false'
}
};
$e{title} = $s->{title};
$e{subtitle} = $blank;
$e{track} = $s->{room};
$e{type} = 'lecture';
$e{language} = $s->{language};
$e{abstract} = $blank;
$e{description} = $s->{description};
$e{persons} = {
person => {
'-id' => $s->{authorid},
'#text' => $s->{author},
}
};
$e{links} = $blank;
return \%e;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment