Skip to content

Instantly share code, notes, and snippets.

@Coro365
Last active May 21, 2020 11:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Coro365/c5208122060f795834c1bfedf95f0aa9 to your computer and use it in GitHub Desktop.
Save Coro365/c5208122060f795834c1bfedf95f0aa9 to your computer and use it in GitHub Desktop.
Download reuters video
require 'open-uri'
require 'fileutils'
module Natural_sort
def natural_sort
zero_length = max_num_length_of(self)
number_formated_names = self.map do |data|
name_array = data.scan(/(\D*)(\d+)(\D*)/).flatten.reject(&:empty?)
name_array = [data] if name_array.empty?
number_formated_name_array = name_array.map do |e|
e.match(/\d/) ? format("%0#{zero_length}d", e.to_i) : e
end
number_formated_name_array.join
end
names_hash = Hash[self.zip number_formated_names]
names_hash_sorted = Hash[names_hash.sort_by{ |k,v| v }]
names_hash_sorted.keys
end
def max_num_length_of(names)
max_num_len_of_names = names.map do |name|
if m = name.scan(/\d+/)
m.map { |e| e.length }.sort.last
end
end
max_num_len_of_names.compact.sort.last
end
end
include Natural_sort
def join_ts(m3u8)
ts_files = Dir.glob(File.join(__dir__, 'temp_ts', '*')).natural_sort
mp4_file = File.join(__dir__, 'mp4', [m3u8[:id], m3u8[:title]].join('-') + '.mp4')
cmd = ['/usr/local/bin/ffmpeg', '-y', '-i', 'concat:' + ts_files.join('|'), '-c', 'copy', mp4_file]
system(*cmd)
end
def delete_temp_dir
FileUtils.rm_rf(File.join(__dir__, 'temp_ts'))
end
def check_downloaded
Dir.glob(File.join(__dir__, 'mp4', '*')).map { |e| File.basename(e).split('-').first }
end
def add_log(recode)
recode = [Time.now, recode + "\n"].join(",\s")
File.write(File.join(__dir__, 'downlod.log'), recode, mode: 'a')
end
def scrap_quality_m3u8_urls
puts('Scrap quality m3u8')
video_url = 'https://jp.reuters.com/video/'
video_html = URI.open(video_url).read
reg = %r["file":"(https://ajo.prod.reuters.tv/rest/v2/playlist/assets.*?\.m3u8)","share":{"title":"(.*?)","url"]
video_html.scan(reg).uniq
end
def to_id(url)
url.split('/').grep(/\d{6}/).first
end
def downlod(file_path, url)
File.write(File.join(file_path), URI.open(url).read)
end
saved_video_ids = check_downloaded
quality_m3u8_urls = scrap_quality_m3u8_urls.reject { |e| saved_video_ids.include?(to_id(e[0])) || !e[1].include?('字幕') }
puts('Download quality m3u8')
x1080_m3u8s = quality_m3u8_urls.map do |m3u8_url, title|
id = m3u8_url.split('/').grep(/\d{6}/).first
title = title.gsub(/(.*?日)/,'').tr('0-9a-zA-Z=!?「」 、', '0-9a-zA-Z=!? ')
{ url: URI.open(m3u8_url).read.split("\n").last, id: id, title: title }
end
puts("Download #{x1080_m3u8s.size}video")
x1080_m3u8s.each do |x1080_m3u8|
add_log(x1080_m3u8[:id])
FileUtils.mkdir_p(File.join(__dir__, 'temp_ts'))
FileUtils.mkdir_p(File.join(__dir__, 'mp4'))
puts("Download ts file (#{x1080_m3u8[:id]})")
vidoe_parts_line = URI.open(x1080_m3u8[:url]).read.split("\n")
vidoe_parts = vidoe_parts_line.reject { |e| e[0] == '#' }
vidoe_parts.each do |ts_url|
downlod([__dir__, 'temp_ts', File.basename(ts_url)], ts_url)
end
puts('Convert mp4')
join_ts(x1080_m3u8)
sleep 2
delete_temp_dir
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment