Skip to content

Instantly share code, notes, and snippets.

@miyagawa
Created October 29, 2019 06:41
Show Gist options
  • Save miyagawa/35e562d6caf8fd9f736911dead7a5acc to your computer and use it in GitHub Desktop.
Save miyagawa/35e562d6caf8fd9f736911dead7a5acc to your computer and use it in GitHub Desktop.
so-net XMLTV exporter
#!/usr/bin/env ruby
# coding: utf-8
require 'date'
require 'open-uri'
require 'nokogiri'
class Station
attr_accessor :id, :name, :channel
def xmltv_id
id + ".tv.so-net.ne.jp"
end
end
class Program
attr_accessor :title, :subtitle, :summary, :start, :stop, :station, :genres, :replay, :new
def url
URI("https://tv.so-net.ne.jp/schedule/#{station.id}#{start.strftime("%Y%m%d%H%M")}.action")
end
end
class Genre
attr_reader :text
TRANSLATION = {
"劇場/公演" => nil,
"趣味/教育" => "Educational",
"情報/ワイドショー" => nil,
"映画" => "Movie",
"福祉" => nil,
"音楽" => "Music",
"アニメ/特撮" => "Animated",
"その他" => "Other",
"ドラマ" => "Drama",
"ニュース/報道" => "News",
"スポーツ" => "Sports",
"バラエティー" => "Talk Show",
"ドキュメンタリー/教養" => "Documentary",
}
def initialize(text)
@text = text
end
def translated
TRANSLATION[@text]
end
end
class Fetcher
def initialize
@area = ENV['AREA_ID'] || '23' # Tokyo
@stations = {}
@previous = {}
@programs = []
@genre = {}
end
def dates
(0..7).map do |i|
Date.today + i
end
end
def run
dates.each do |date|
fetch "https://tv.so-net.ne.jp/chart/#{@area}.action?head=#{date.strftime('%Y%m%d')}0000&span=24"
end
fixup_last_program
puts emit_tv_xml
end
def fixup_last_program
@programs.each do |program|
unless program.stop
warn "Fetching #{program.url}"
doc = Nokogiri::HTML(open(program.url))
minutes = /((\d+)分)/
.match( doc.css(".mainColumn dl.basicTxt dd").text )[1].to_i
program.stop = program.start + minutes * 60
end
end
end
def fetch(url)
warn "Fetching #{url}"
doc = Nokogiri::HTML(open(URI(url)))
doc.css('.cell-station.cell-top').each do |cell|
if cell.attr('id') =~ /^cell-station-top-(\d+)/
id = $1
name = cell.attr('title')
channel = /^\s*(\d+)\./.match(cell.css('a:not(.inlineArrow)').text)[1]
station = Station.new
station.id = id
station.name = name
station.channel = channel
@stations[station.id] ||= station
end
end
doc.css('#chart-footer-genres li label').each do |label|
id = label.css('input').first.attr('value')
@genre[id] ||= Genre.new(label.text.strip)
end
doc.css('.cell-schedule').each do |cell|
a = cell.css('.td-schedule a.schedule-link').first or next
match = /\/schedule\/(?<station>\d{6})(?<time>\d{12})/
.match(a.attr('href'))
genres = cell.attr('class').scan(/cell-genre-(\d+)/).flatten.map {|id| @genre[id] }.compact
program = Program.new
program.summary = cell.css('.schedule-summary').text.strip
program.start = Time.strptime(match[:time], '%Y%m%d%H%M')
program.station = @stations[match[:station]]
program.genres = genres
title = a.text.strip
title.gsub! /(?:\[(?:字|解|デ|(再)|二|双)\])+(.*)$/ do
if $1
program.replay = true
end
if $2
program.subtitle = $2
end
''
end
title.sub! /^\[新\]/ do
program.new = true
''
end
title.gsub! /(?:【|「)(.+?)(?:】|」)$/ do
program.subtitle = $1
''
end
program.title = title
if prev = @previous[match[:station]]
prev.stop = program.start
end
@previous[match[:station]] = program
@programs << program
end
end
def emit_tv_xml
builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
xml.doc.create_internal_subset('tv', nil, "xmltv.dtd")
xml.tv(:"source-info-url" => "https://tv.so-net.ne.jp/", :"source-info-name" => "So-Net TV") {
@stations.each do |id, station|
xml.channel(id: station.xmltv_id) {
xml.send(:"display-name", station.name)
xml.send(:"display-name", station.channel + " " + station.name)
xml.send(:"display-name", station.channel)
}
end
@programs.each do |program|
next unless program.stop
xml.programme(
start: program.start.strftime("%Y%m%d%H%M%S %z"),
stop: program.stop.strftime("%Y%m%d%H%M%S %z"),
channel: program.station.xmltv_id,
) {
xml.title(lang: "ja") { xml.text program.title }
if program.subtitle
xml.send(:"sub-title", lang: "ja") { xml.text program.subtitle }
end
xml.desc(lang: "ja") { xml.text program.summary }
program.genres.each do |genre|
xml.category(lang: "ja") { xml.text genre.text }
if genre.translated
xml.category(lang: "en") { xml.text genre.translated }
end
end
if program.replay
xml.send :"previously-shown"
end
if program.new
xml.new
end
}
end
}
end
builder.to_xml
end
end
ENV['TZ'] = 'Asia/Tokyo'
Fetcher.new.run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment