Skip to content

Instantly share code, notes, and snippets.

@NewAlexandria
Last active August 30, 2023 19:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NewAlexandria/c2d0ee0160631e4d0e88b0ce4b97c917 to your computer and use it in GitHub Desktop.
Save NewAlexandria/c2d0ee0160631e4d0e88b0ce4b97c917 to your computer and use it in GitHub Desktop.
POC code for extracting segment data from a HowTube.com HAR session.
#!/usr/bin/ruby
require 'json'
seg_rx = /segment[0-9]+\.ts/
file_num_rx = /^segment([0-9]+)/
pwd = "#{`pwd`.strip}"
clean_sources = false
clean_joins = false
# acts on the latest HAR file, if none provided
filename = ((ARGV[0].match(/\.har$/)[1] ? ARGV[0] : nil) rescue nil) || Dir.glob("*.har").max_by { |f| File.mtime(f) }
target_file = filename.split('/').last
puts target_file
har = JSON.parse(File.read(filename)); puts har.size
parts = har.dig('log', 'entries').map{|e| { url: (e.dig('request', 'url') rescue nil), length: (e.dig('response','headers').find{|h| h['name'] == 'Content-Length' }.dig('value') rescue nil).to_i } }
segs = parts.group_by{|s| s[:url].match(seg_rx)[0].to_s }; puts segs.size
# video
puts 'download video'
vid_segs = segs.map{|sg| sg.last.max_by{|v| v[:length].to_i } }
vid_urls = vid_segs.map{|s| s[:url] }
vid_urls.map{|url| system("yt-dlp #{url}") }
vid_ts_files = Dir.children('.').select{|f| f.start_with? 'segment' }.sort
vid_ts_filepaths_sorted = vid_ts_files.map{|f| { f:f, idx:f.match(file_num_rx)[1].to_i } }.sort_by{|h| h[:idx] }.map{|h| "#{pwd}/#{h[:f]}" }.map{|f| '"'+f+'"' }; vid_ts_filepaths_sorted.size
puts 'join video'
system("cat #{vid_ts_filepaths_sorted.join(' ')} > #{target_file}.all_video.ts")
system("ffmpeg -i #{target_file}.all_video.ts -acodec copy -vcodec copy #{target_file}.all_video.mp4")
system("mkdir -p #{target_file}--video")
system("rm -rf #{target_file}--video/*")
system("mv segment* #{target_file}--video/")
# audio
puts 'download audio'
aud_segs = segs.map{|sg| sg.last.min_by{|v| v[:length].to_i } }
aud_urls = aud_segs.map{|s| s[:url] }
aud_urls.map{|url| system("yt-dlp #{url}") }
aud_ts_files = Dir.children('.').select{|f| f.start_with? 'segment' }.sort
aud_ts_filepaths_sorted = aud_ts_files.map{|f| { f:f, idx:f.match(file_num_rx)[1].to_i } }.sort_by{|h| h[:idx] }.map{|h| "#{pwd}/#{h[:f]}" }.map{|f| '"'+f+'"' }
puts 'join audio'
system("cat #{aud_ts_filepaths_sorted.join(' ')} > #{target_file}.all_audio.ts")
system("ffmpeg -i #{target_file}.all_audio.ts -acodec copy -vcodec copy #{target_file}.all_audio.mp4")
system("mv #{target_file}.all_audio.mp4 #{target_file}.all_audio.mp3 ")
system("mkdir -p #{target_file}--audio")
system("rm -rf #{target_file}--audio/*")
system("mv segment* #{target_file}--audio/")
# join
puts 'join all'
system("ffmpeg -i #{target_file}.all_video.mp4 -i #{target_file}.all_audio.mp3 -c copy #{target_file}.g.mp4")
# clean
puts 'clean'
if clean_joins
system("rm -rf #{target_file}.all_video.mp4")
system("rm -rf #{target_file}.all_video.ts")
system("rm -rf #{target_file}.all_audio.mp3")
system("rm -rf #{target_file}.all_audio.ts")
end
if clean_sources
system("rm -rf #{target_file}--audio")
system("rm -rf #{target_file}--video")
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment