Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
WPM Counts for YouTube Videos
require 'words_counted'
require 'webvtt'
require 'time'
require 'youtube-dl.rb'
require 'yt' # JSON error with youtube-dl.rb
line_buffer = []
max_buffer_size = 5
num_words = 0
audio_start = 0
audio_end = 0
minute_counts = []
# YouTube video key
key = "8KkKuTCFvzI"
options = {
'skip-download': true,
'write-auto-sub': true,
'output': "captions/#{key}"
}
YoutubeDL.download "https://www.youtube.com/watch?v=#{key}", options
webvtt = WebVTT.read("captions/#{key}.en.vtt")
webvtt.cues.each do |cue|
puts cue.inspect
lines = cue.text.gsub(/<\/?[^>]*>/, "").split(/\n/)
lines.each do |line|
# puts line
if !line_buffer.include? line
line_buffer << line
current_line_count = WordsCounted.count(line).token_count
num_words += current_line_count
current_minute = ((Time.parse(cue.start.to_s) - Time.parse("00:00:00.000")) / 60).to_i
minute_counts[current_minute] = minute_counts[current_minute].to_i + current_line_count
line_buffer.shift if line_buffer.size > max_buffer_size
end
end
# Calculate duration of audio stream
audio_start = cue.start if audio_start == 0
audio_end = cue.end
end
duration = (Time.parse(audio_end.to_s) - Time.parse(audio_start.to_s)) / 60
puts "Audio stream length (min): #{duration}"
puts "Word count: #{num_words}"
puts "WPM: #{num_words / duration}"
puts Time.at(Time.parse(audio_end.to_s) - Time.parse(audio_start.to_s)).utc.strftime("%H:%M:%S")
minute_counts.each_with_index do |value, key|
puts "#{key}, #{value}"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment