Created
October 29, 2019 06:41
-
-
Save miyagawa/35e562d6caf8fd9f736911dead7a5acc to your computer and use it in GitHub Desktop.
so-net XMLTV exporter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# coding: utf-8 | |
require 'date' | |
require 'open-uri' | |
require 'nokogiri' | |
class Station | |
attr_accessor :id, :name, :channel | |
def xmltv_id | |
id + ".tv.so-net.ne.jp" | |
end | |
end | |
class Program | |
attr_accessor :title, :subtitle, :summary, :start, :stop, :station, :genres, :replay, :new | |
def url | |
URI("https://tv.so-net.ne.jp/schedule/#{station.id}#{start.strftime("%Y%m%d%H%M")}.action") | |
end | |
end | |
class Genre | |
attr_reader :text | |
TRANSLATION = { | |
"劇場/公演" => nil, | |
"趣味/教育" => "Educational", | |
"情報/ワイドショー" => nil, | |
"映画" => "Movie", | |
"福祉" => nil, | |
"音楽" => "Music", | |
"アニメ/特撮" => "Animated", | |
"その他" => "Other", | |
"ドラマ" => "Drama", | |
"ニュース/報道" => "News", | |
"スポーツ" => "Sports", | |
"バラエティー" => "Talk Show", | |
"ドキュメンタリー/教養" => "Documentary", | |
} | |
def initialize(text) | |
@text = text | |
end | |
def translated | |
TRANSLATION[@text] | |
end | |
end | |
class Fetcher | |
def initialize | |
@area = ENV['AREA_ID'] || '23' # Tokyo | |
@stations = {} | |
@previous = {} | |
@programs = [] | |
@genre = {} | |
end | |
def dates | |
(0..7).map do |i| | |
Date.today + i | |
end | |
end | |
def run | |
dates.each do |date| | |
fetch "https://tv.so-net.ne.jp/chart/#{@area}.action?head=#{date.strftime('%Y%m%d')}0000&span=24" | |
end | |
fixup_last_program | |
puts emit_tv_xml | |
end | |
def fixup_last_program | |
@programs.each do |program| | |
unless program.stop | |
warn "Fetching #{program.url}" | |
doc = Nokogiri::HTML(open(program.url)) | |
minutes = /((\d+)分)/ | |
.match( doc.css(".mainColumn dl.basicTxt dd").text )[1].to_i | |
program.stop = program.start + minutes * 60 | |
end | |
end | |
end | |
def fetch(url) | |
warn "Fetching #{url}" | |
doc = Nokogiri::HTML(open(URI(url))) | |
doc.css('.cell-station.cell-top').each do |cell| | |
if cell.attr('id') =~ /^cell-station-top-(\d+)/ | |
id = $1 | |
name = cell.attr('title') | |
channel = /^\s*(\d+)\./.match(cell.css('a:not(.inlineArrow)').text)[1] | |
station = Station.new | |
station.id = id | |
station.name = name | |
station.channel = channel | |
@stations[station.id] ||= station | |
end | |
end | |
doc.css('#chart-footer-genres li label').each do |label| | |
id = label.css('input').first.attr('value') | |
@genre[id] ||= Genre.new(label.text.strip) | |
end | |
doc.css('.cell-schedule').each do |cell| | |
a = cell.css('.td-schedule a.schedule-link').first or next | |
match = /\/schedule\/(?<station>\d{6})(?<time>\d{12})/ | |
.match(a.attr('href')) | |
genres = cell.attr('class').scan(/cell-genre-(\d+)/).flatten.map {|id| @genre[id] }.compact | |
program = Program.new | |
program.summary = cell.css('.schedule-summary').text.strip | |
program.start = Time.strptime(match[:time], '%Y%m%d%H%M') | |
program.station = @stations[match[:station]] | |
program.genres = genres | |
title = a.text.strip | |
title.gsub! /(?:\[(?:字|解|デ|(再)|二|双)\])+(.*)$/ do | |
if $1 | |
program.replay = true | |
end | |
if $2 | |
program.subtitle = $2 | |
end | |
'' | |
end | |
title.sub! /^\[新\]/ do | |
program.new = true | |
'' | |
end | |
title.gsub! /(?:【|「)(.+?)(?:】|」)$/ do | |
program.subtitle = $1 | |
'' | |
end | |
program.title = title | |
if prev = @previous[match[:station]] | |
prev.stop = program.start | |
end | |
@previous[match[:station]] = program | |
@programs << program | |
end | |
end | |
def emit_tv_xml | |
builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml| | |
xml.doc.create_internal_subset('tv', nil, "xmltv.dtd") | |
xml.tv(:"source-info-url" => "https://tv.so-net.ne.jp/", :"source-info-name" => "So-Net TV") { | |
@stations.each do |id, station| | |
xml.channel(id: station.xmltv_id) { | |
xml.send(:"display-name", station.name) | |
xml.send(:"display-name", station.channel + " " + station.name) | |
xml.send(:"display-name", station.channel) | |
} | |
end | |
@programs.each do |program| | |
next unless program.stop | |
xml.programme( | |
start: program.start.strftime("%Y%m%d%H%M%S %z"), | |
stop: program.stop.strftime("%Y%m%d%H%M%S %z"), | |
channel: program.station.xmltv_id, | |
) { | |
xml.title(lang: "ja") { xml.text program.title } | |
if program.subtitle | |
xml.send(:"sub-title", lang: "ja") { xml.text program.subtitle } | |
end | |
xml.desc(lang: "ja") { xml.text program.summary } | |
program.genres.each do |genre| | |
xml.category(lang: "ja") { xml.text genre.text } | |
if genre.translated | |
xml.category(lang: "en") { xml.text genre.translated } | |
end | |
end | |
if program.replay | |
xml.send :"previously-shown" | |
end | |
if program.new | |
xml.new | |
end | |
} | |
end | |
} | |
end | |
builder.to_xml | |
end | |
end | |
ENV['TZ'] = 'Asia/Tokyo' | |
Fetcher.new.run |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment