Skip to content

Instantly share code, notes, and snippets.

@willprice
Last active February 20, 2022 19:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save willprice/9175c7ad02c2597ce7060e10bca40179 to your computer and use it in GitHub Desktop.
Save willprice/9175c7ad02c2597ce7060e10bca40179 to your computer and use it in GitHub Desktop.
Convert CSV to SRT subtitles
import argparse
from pathlib import Path
import numpy as np
import pandas as pd
parser = argparse.ArgumentParser(
description="",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("in_csv", type=Path)
parser.add_argument("out_csv", type=Path)
parser.add_argument("--timestamp-col", type=str, default="timestamp")
parser.add_argument("--duration", type=float)
def main(args):
points = pd.read_csv(args.in_csv)
point_timestamp_col = args.timestamp_col
start_timestamp_col = "start_" + args.timestamp_col
stop_timestamp_col = "stop_" + args.timestamp_col
point_times = points[point_timestamp_col].apply(timestamp_to_seconds)
start_times = point_times.values
stop_times = np.concatenate([point_times.values[1:], [args.duration]])
segments = pd.DataFrame(
{
start_timestamp_col: [seconds_to_timestamp(s) for s in start_times],
stop_timestamp_col: [seconds_to_timestamp(s) for s in stop_times],
}
)
for col in set(points.columns) - {point_timestamp_col}:
segments[col] = points[col].values
segments.to_csv(args.out_csv, index=False)
def seconds_to_timestamp(total_seconds: float) -> str:
"""Convert seconds into a timestamp
Args:
total_seconds: time in seconds
Returns:
timestamp representing ``total_seconds``
Examples:
>>> seconds_to_timestamp(1)
'00:00:01.000'
>>> seconds_to_timestamp(1.1)
'00:00:01.100'
>>> seconds_to_timestamp(60)
'00:01:00.000'
>>> seconds_to_timestamp(61)
'00:01:01.000'
>>> seconds_to_timestamp(60 * 60 + 1)
'01:00:01.000'
>>> seconds_to_timestamp(60 * 60 + 60 + 1)
'01:01:01.000'
>>> seconds_to_timestamp(1225.78500002)
'00:20:25.785'
"""
ss = total_seconds % 60
mm = np.floor((total_seconds / 60) % 60)
hh = np.floor((total_seconds / (60 * 60)))
return "{:02.0f}:{:02.0f}:{:06.3f}".format(hh, mm, ss)
def timestamp_to_seconds(timestamp: str) -> float:
"""Convert a timestamp into total number of seconds
Args:
timestamp: formatted as ``HH:MM:SS[.FractionalPart]``
Returns:
``timestamp`` converted to seconds
Examples:
>>> timestamp_to_seconds("00:00:00")
0.0
>>> timestamp_to_seconds("00:00:05")
5.0
>>> timestamp_to_seconds("00:00:05.5")
5.5
>>> timestamp_to_seconds("00:01:05.5")
65.5
>>> timestamp_to_seconds("01:01:05.5")
3665.5
"""
_MINUTES_TO_SECONDS = 60
_HOURS_TO_SECONDS = 60 * _MINUTES_TO_SECONDS
hours, minutes, seconds = map(float, timestamp.split(":"))
total_seconds = hours * _HOURS_TO_SECONDS + minutes * _MINUTES_TO_SECONDS + seconds
return total_seconds
if __name__ == "__main__":
main(parser.parse_args())
import argparse
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import List
import numpy as np
import pandas as pd
parser = argparse.ArgumentParser(
description="Create a SRT file from timestamped columns",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("csv", type=Path, help="Source CSV")
parser.add_argument("srt", type=Path, help="Destination SRT")
parser.add_argument(
"--force", action="store_true", help="Overwrite destination if it already exists"
)
parser.add_argument(
"--start-timestamp-col",
type=str,
default="timestamp",
help="Start timestamp column",
)
parser.add_argument(
"--subtitle-col",
type=str,
default="subtitle",
help="Column to use as contents of subtitle",
)
parser.add_argument("--stop-timestamp-col", type=str, help="Stop timestamp if present")
parser.add_argument(
"--duration",
type=float,
default=2,
help="Duration for subtitle if --stop-timestamp is not specified",
)
parser.add_argument(
"--center",
action="store_true",
help="Center subtitle around start time when --duration is specified instead of "
"starting from the start timestamp.",
)
def main(args):
if args.srt.exists() and not args.force:
print(f"{args.srt} already exists, quitting. Use --force to overwrite.")
df = pd.read_csv(args.csv)
cols_to_check = [args.start_timestamp_col, args.subtitle_col]
if args.stop_timestamp_col is not None:
cols_to_check.insert(1, args.stop_timestamp_col)
check_failed = False
for col in cols_to_check:
if col not in df.columns:
print(f"{col!r} not present in columns ({df.columns}).")
check_failed = True
if check_failed:
sys.exit(-1)
start_times = df[args.start_timestamp_col].apply(timestamp_to_seconds)
if args.stop_timestamp_col is None:
stop_times = start_times + args.duration
if args.center:
start_times -= args.duration / 2
start_times = start_times.clip(lower=0)
stop_times -= args.duration / 2
else:
stop_times = df[args.stop_timestamp_col].apply(timestamp_to_seconds)
subs = df[args.subtitle_col]
subtitle_entries = [
SubtitleEntry(
start_timestamp=seconds_to_timestamp(start),
stop_timestamp=seconds_to_timestamp(stop),
subtitle=str(sub),
)
for start, stop, sub in zip(start_times, stop_times, subs)
]
srt = subtitles_to_srt(subtitle_entries)
with open(args.srt, "w") as f:
f.write(srt)
@dataclass
class SubtitleEntry:
start_timestamp: str
stop_timestamp: str
subtitle: str
def subtitles_to_srt(subtitles: List[SubtitleEntry]) -> str:
srt = ""
for i, subtitle in enumerate(subtitles):
if i > 0:
srt += "\n"
seq_number = i + 1
srt += f"{seq_number}\n"
srt += f"{subtitle.start_timestamp} --> {subtitle.stop_timestamp}\n"
srt += f"{subtitle.subtitle}\n"
return srt
def seconds_to_timestamp(total_seconds: float) -> str:
"""Convert seconds into a timestamp
Args:
total_seconds: time in seconds
Returns:
timestamp representing ``total_seconds``
Examples:
>>> seconds_to_timestamp(1)
'00:00:01.000'
>>> seconds_to_timestamp(1.1)
'00:00:01.100'
>>> seconds_to_timestamp(60)
'00:01:00.000'
>>> seconds_to_timestamp(61)
'00:01:01.000'
>>> seconds_to_timestamp(60 * 60 + 1)
'01:00:01.000'
>>> seconds_to_timestamp(60 * 60 + 60 + 1)
'01:01:01.000'
>>> seconds_to_timestamp(1225.78500002)
'00:20:25.785'
"""
ss = total_seconds % 60
mm = np.floor((total_seconds / 60) % 60)
hh = np.floor((total_seconds / (60 * 60)))
return "{:02.0f}:{:02.0f}:{:06.3f}".format(hh, mm, ss)
def timestamp_to_seconds(timestamp: str) -> float:
"""Convert a timestamp into total number of seconds
Args:
timestamp: formatted as ``HH:MM:SS[.FractionalPart]``
Returns:
``timestamp`` converted to seconds
Examples:
>>> timestamp_to_seconds("00:00:00")
0.0
>>> timestamp_to_seconds("00:00:05")
5.0
>>> timestamp_to_seconds("00:00:05.5")
5.5
>>> timestamp_to_seconds("00:01:05.5")
65.5
>>> timestamp_to_seconds("01:01:05.5")
3665.5
"""
_MINUTES_TO_SECONDS = 60
_HOURS_TO_SECONDS = 60 * _MINUTES_TO_SECONDS
hours, minutes, seconds = map(float, timestamp.split(":"))
total_seconds = hours * _HOURS_TO_SECONDS + minutes * _MINUTES_TO_SECONDS + seconds
return total_seconds
def to_subtitles(df):
return [
SubtitleEntry(
start_timestamp=seconds_to_timestamp(timestamp_to_seconds(r.start_time)),
stop_timestamp=seconds_to_timestamp(timestamp_to_seconds(r.stop_time)),
subtitle=r.thread,
)
for _, r in df.iterrows()
]
if __name__ == "__main__":
main(parser.parse_args())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment