Skip to content

Instantly share code, notes, and snippets.

@jasalt
Created October 30, 2023 06:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jasalt/c9369ff616d30466aa05e76400e67d58 to your computer and use it in GitHub Desktop.
Save jasalt/c9369ff616d30466aa05e76400e67d58 to your computer and use it in GitHub Desktop.
# Convert srt file (from eg. pywhispercpp) into csv format with sub start location also in seconds to create Youtube time link.
# Could be useful for annotating livestream recordings from transcription in a spreadsheet.
# Deps (Python 3.11):
# pip install srt
import csv
import srt
def srt_to_csv(srt_file, youtube_link):
# Open the SRT file
with open(srt_file, 'r', encoding='utf-8') as file:
subs = list(srt.parse(file.read()))
# Create a CSV file and write the headers
outfile = f"{srt_file}.csv"
with open(outfile, 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['subtitle', 'start_time', 'end_time', 'start_s', 'youtube_link'])
start_time = subs[0].start
# Iterate over each subtitle and write the data to the CSV file
for sub in subs:
seconds_from_start = (sub.start - start_time).seconds
yt_link = f"{youtube_link}&t={seconds_from_start}s"
writer.writerow([sub.content, sub.start, sub.end, seconds_from_start, yt_link])
print(f"CSV file '{outfile}' created successfully.")
srt_to_csv('video1.mp4.srt', 'https://www.youtube.com/watch?v=XXXXXXXX')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment