prcutler/csv_points_to_segments.py

## csv_points_to_segments.py
import argparse

from pathlib import Path

import numpy as np
import pandas as pd


parser = argparse.ArgumentParser(
    description="",
    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("in_csv", type=Path)
parser.add_argument("out_csv", type=Path)
parser.add_argument("--timestamp-col", type=str, default="timestamp")
parser.add_argument("--duration", type=float)


def main(args):
    points = pd.read_csv(args.in_csv)
    point_timestamp_col = args.timestamp_col
    start_timestamp_col = "start_" + args.timestamp_col
    stop_timestamp_col = "stop_" + args.timestamp_col
    point_times = points[point_timestamp_col].apply(timestamp_to_seconds)
    start_times = point_times.values
    stop_times = np.concatenate([point_times.values[1:], [args.duration]])
    segments = pd.DataFrame(
        {
            start_timestamp_col: [seconds_to_timestamp(s) for s in start_times],
            stop_timestamp_col: [seconds_to_timestamp(s) for s in stop_times],
        }
    )
    for col in set(points.columns) - {point_timestamp_col}:
        segments[col] = points[col].values
    segments.to_csv(args.out_csv, index=False)


def seconds_to_timestamp(total_seconds: float) -> str:
    """Convert seconds into a timestamp

    Args:
        total_seconds: time in seconds

    Returns:
        timestamp representing ``total_seconds``

    Examples:
        >>> seconds_to_timestamp(1)
        '00:00:01.000'
        >>> seconds_to_timestamp(1.1)
        '00:00:01.100'
        >>> seconds_to_timestamp(60)
        '00:01:00.000'
        >>> seconds_to_timestamp(61)
        '00:01:01.000'
        >>> seconds_to_timestamp(60 * 60 + 1)
        '01:00:01.000'
        >>> seconds_to_timestamp(60 * 60  + 60 + 1)
        '01:01:01.000'
        >>> seconds_to_timestamp(1225.78500002)
        '00:20:25.785'
    """
    ss = total_seconds % 60
    mm = np.floor((total_seconds / 60) % 60)
    hh = np.floor((total_seconds / (60 * 60)))
    return "{:02.0f}:{:02.0f}:{:06.3f}".format(hh, mm, ss)


def timestamp_to_seconds(timestamp: str) -> float:
    """Convert a timestamp into total number of seconds

    Args:
        timestamp: formatted as ``HH:MM:SS[.FractionalPart]``

    Returns:
        ``timestamp`` converted to seconds

    Examples:
        >>> timestamp_to_seconds("00:00:00")
        0.0
        >>> timestamp_to_seconds("00:00:05")
        5.0
        >>> timestamp_to_seconds("00:00:05.5")
        5.5
        >>> timestamp_to_seconds("00:01:05.5")
        65.5
        >>> timestamp_to_seconds("01:01:05.5")
        3665.5
    """
    _MINUTES_TO_SECONDS = 60
    _HOURS_TO_SECONDS = 60 * _MINUTES_TO_SECONDS

    hours, minutes, seconds = map(float, timestamp.split(":"))
    total_seconds = hours * _HOURS_TO_SECONDS + minutes * _MINUTES_TO_SECONDS + seconds
    return total_seconds


if __name__ == "__main__":
    main(parser.parse_args())

## csv_to_srt.py
import argparse
import sys

from dataclasses import dataclass
from pathlib import Path
from typing import List

import numpy as np
import pandas as pd


parser = argparse.ArgumentParser(
    description="Create a SRT file from timestamped columns",
    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("csv", type=Path, help="Source CSV")
parser.add_argument("srt", type=Path, help="Destination SRT")
parser.add_argument(
    "--force", action="store_true", help="Overwrite destination if it already exists"
)
parser.add_argument(
    "--start-timestamp-col",
    type=str,
    default="timestamp",
    help="Start timestamp column",
)
parser.add_argument(
    "--subtitle-col",
    type=str,
    default="subtitle",
    help="Column to use as contents of subtitle",
)
parser.add_argument("--stop-timestamp-col", type=str, help="Stop timestamp if present")
parser.add_argument(
    "--duration",
    type=float,
    default=2,
    help="Duration for subtitle if --stop-timestamp is not specified",
)
parser.add_argument(
    "--center",
    action="store_true",
    help="Center subtitle around start time when --duration is specified instead of "
    "starting from the start timestamp.",
)


def main(args):
    if args.srt.exists() and not args.force:
        print(f"{args.srt} already exists, quitting. Use --force to overwrite.")

    df = pd.read_csv(args.csv)

    cols_to_check = [args.start_timestamp_col, args.subtitle_col]
    if args.stop_timestamp_col is not None:
        cols_to_check.insert(1, args.stop_timestamp_col)

    check_failed = False
    for col in cols_to_check:
        if col not in df.columns:
            print(f"{col!r} not present in columns ({df.columns}).")
            check_failed = True

    if check_failed:
        sys.exit(-1)

    start_times = df[args.start_timestamp_col].apply(timestamp_to_seconds)
    if args.stop_timestamp_col is None:
        stop_times = start_times + args.duration
        if args.center:
            start_times -= args.duration / 2
            start_times = start_times.clip(lower=0)
            stop_times -= args.duration / 2
    else:
        stop_times = df[args.stop_timestamp_col].apply(timestamp_to_seconds)

    subs = df[args.subtitle_col]
    subtitle_entries = [
        SubtitleEntry(
            start_timestamp=seconds_to_timestamp(start),
            stop_timestamp=seconds_to_timestamp(stop),
            subtitle=str(sub),
        )
        for start, stop, sub in zip(start_times, stop_times, subs)
    ]
    srt = subtitles_to_srt(subtitle_entries)
    with open(args.srt, "w") as f:
        f.write(srt)


@dataclass
class SubtitleEntry:
    start_timestamp: str
    stop_timestamp: str
    subtitle: str


def subtitles_to_srt(subtitles: List[SubtitleEntry]) -> str:
    srt = ""
    for i, subtitle in enumerate(subtitles):
        if i > 0:
            srt += "\n"
        seq_number = i + 1
        srt += f"{seq_number}\n"
        srt += f"{subtitle.start_timestamp} --> {subtitle.stop_timestamp}\n"
        srt += f"{subtitle.subtitle}\n"
    return srt


def seconds_to_timestamp(total_seconds: float) -> str:
    """Convert seconds into a timestamp

    Args:
        total_seconds: time in seconds

    Returns:
        timestamp representing ``total_seconds``

    Examples:
        >>> seconds_to_timestamp(1)
        '00:00:01.000'
        >>> seconds_to_timestamp(1.1)
        '00:00:01.100'
        >>> seconds_to_timestamp(60)
        '00:01:00.000'
        >>> seconds_to_timestamp(61)
        '00:01:01.000'
        >>> seconds_to_timestamp(60 * 60 + 1)
        '01:00:01.000'
        >>> seconds_to_timestamp(60 * 60  + 60 + 1)
        '01:01:01.000'
        >>> seconds_to_timestamp(1225.78500002)
        '00:20:25.785'
    """
    ss = total_seconds % 60
    mm = np.floor((total_seconds / 60) % 60)
    hh = np.floor((total_seconds / (60 * 60)))
    return "{:02.0f}:{:02.0f}:{:06.3f}".format(hh, mm, ss)


def timestamp_to_seconds(timestamp: str) -> float:
    """Convert a timestamp into total number of seconds

    Args:
        timestamp: formatted as ``HH:MM:SS[.FractionalPart]``

    Returns:
        ``timestamp`` converted to seconds

    Examples:
        >>> timestamp_to_seconds("00:00:00")
        0.0
        >>> timestamp_to_seconds("00:00:05")
        5.0
        >>> timestamp_to_seconds("00:00:05.5")
        5.5
        >>> timestamp_to_seconds("00:01:05.5")
        65.5
        >>> timestamp_to_seconds("01:01:05.5")
        3665.5
    """
    _MINUTES_TO_SECONDS = 60
    _HOURS_TO_SECONDS = 60 * _MINUTES_TO_SECONDS

    hours, minutes, seconds = map(float, timestamp.split(":"))
    total_seconds = hours * _HOURS_TO_SECONDS + minutes * _MINUTES_TO_SECONDS + seconds
    return total_seconds


def to_subtitles(df):
    return [
        SubtitleEntry(
            start_timestamp=seconds_to_timestamp(timestamp_to_seconds(r.start_time)),
            stop_timestamp=seconds_to_timestamp(timestamp_to_seconds(r.stop_time)),
            subtitle=r.thread,
        )
        for _, r in df.iterrows()
    ]


if __name__ == "__main__":
    main(parser.parse_args())
	import argparse

	from pathlib import Path

	import numpy as np
	import pandas as pd


	parser = argparse.ArgumentParser(
	description="",
	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
	)
	parser.add_argument("in_csv", type=Path)
	parser.add_argument("out_csv", type=Path)
	parser.add_argument("--timestamp-col", type=str, default="timestamp")
	parser.add_argument("--duration", type=float)


	def main(args):
	points = pd.read_csv(args.in_csv)
	point_timestamp_col = args.timestamp_col
	start_timestamp_col = "start_" + args.timestamp_col
	stop_timestamp_col = "stop_" + args.timestamp_col
	point_times = points[point_timestamp_col].apply(timestamp_to_seconds)
	start_times = point_times.values
	stop_times = np.concatenate([point_times.values[1:], [args.duration]])
	segments = pd.DataFrame(
	{
	start_timestamp_col: [seconds_to_timestamp(s) for s in start_times],
	stop_timestamp_col: [seconds_to_timestamp(s) for s in stop_times],
	}
	)
	for col in set(points.columns) - {point_timestamp_col}:
	segments[col] = points[col].values
	segments.to_csv(args.out_csv, index=False)


	def seconds_to_timestamp(total_seconds: float) -> str:
	"""Convert seconds into a timestamp

	Args:
	total_seconds: time in seconds

	Returns:
	timestamp representing ``total_seconds``

	Examples:
	>>> seconds_to_timestamp(1)
	'00:00:01.000'
	>>> seconds_to_timestamp(1.1)
	'00:00:01.100'
	>>> seconds_to_timestamp(60)
	'00:01:00.000'
	>>> seconds_to_timestamp(61)
	'00:01:01.000'
	>>> seconds_to_timestamp(60 * 60 + 1)
	'01:00:01.000'
	>>> seconds_to_timestamp(60 * 60 + 60 + 1)
	'01:01:01.000'
	>>> seconds_to_timestamp(1225.78500002)
	'00:20:25.785'
	"""
	ss = total_seconds % 60
	mm = np.floor((total_seconds / 60) % 60)
	hh = np.floor((total_seconds / (60 * 60)))
	return "{:02.0f}:{:02.0f}:{:06.3f}".format(hh, mm, ss)


	def timestamp_to_seconds(timestamp: str) -> float:
	"""Convert a timestamp into total number of seconds

	Args:
	timestamp: formatted as ``HH:MM:SS[.FractionalPart]``

	Returns:
	``timestamp`` converted to seconds

	Examples:
	>>> timestamp_to_seconds("00:00:00")
	0.0
	>>> timestamp_to_seconds("00:00:05")
	5.0
	>>> timestamp_to_seconds("00:00:05.5")
	5.5
	>>> timestamp_to_seconds("00:01:05.5")
	65.5
	>>> timestamp_to_seconds("01:01:05.5")
	3665.5
	"""
	_MINUTES_TO_SECONDS = 60
	_HOURS_TO_SECONDS = 60 * _MINUTES_TO_SECONDS

	hours, minutes, seconds = map(float, timestamp.split(":"))
	total_seconds = hours * _HOURS_TO_SECONDS + minutes * _MINUTES_TO_SECONDS + seconds
	return total_seconds


	if __name__ == "__main__":
	main(parser.parse_args())
	import argparse
	import sys

	from dataclasses import dataclass
	from pathlib import Path
	from typing import List

	import numpy as np
	import pandas as pd


	parser = argparse.ArgumentParser(
	description="Create a SRT file from timestamped columns",
	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
	)
	parser.add_argument("csv", type=Path, help="Source CSV")
	parser.add_argument("srt", type=Path, help="Destination SRT")
	parser.add_argument(
	"--force", action="store_true", help="Overwrite destination if it already exists"
	)
	parser.add_argument(
	"--start-timestamp-col",
	type=str,
	default="timestamp",
	help="Start timestamp column",
	)
	parser.add_argument(
	"--subtitle-col",
	type=str,
	default="subtitle",
	help="Column to use as contents of subtitle",
	)
	parser.add_argument("--stop-timestamp-col", type=str, help="Stop timestamp if present")
	parser.add_argument(
	"--duration",
	type=float,
	default=2,
	help="Duration for subtitle if --stop-timestamp is not specified",
	)
	parser.add_argument(
	"--center",
	action="store_true",
	help="Center subtitle around start time when --duration is specified instead of "
	"starting from the start timestamp.",
	)


	def main(args):
	if args.srt.exists() and not args.force:
	print(f"{args.srt} already exists, quitting. Use --force to overwrite.")

	df = pd.read_csv(args.csv)

	cols_to_check = [args.start_timestamp_col, args.subtitle_col]
	if args.stop_timestamp_col is not None:
	cols_to_check.insert(1, args.stop_timestamp_col)

	check_failed = False
	for col in cols_to_check:
	if col not in df.columns:
	print(f"{col!r} not present in columns ({df.columns}).")
	check_failed = True

	if check_failed:
	sys.exit(-1)

	start_times = df[args.start_timestamp_col].apply(timestamp_to_seconds)
	if args.stop_timestamp_col is None:
	stop_times = start_times + args.duration
	if args.center:
	start_times -= args.duration / 2
	start_times = start_times.clip(lower=0)
	stop_times -= args.duration / 2
	else:
	stop_times = df[args.stop_timestamp_col].apply(timestamp_to_seconds)

	subs = df[args.subtitle_col]
	subtitle_entries = [
	SubtitleEntry(
	start_timestamp=seconds_to_timestamp(start),
	stop_timestamp=seconds_to_timestamp(stop),
	subtitle=str(sub),
	)
	for start, stop, sub in zip(start_times, stop_times, subs)
	]
	srt = subtitles_to_srt(subtitle_entries)
	with open(args.srt, "w") as f:
	f.write(srt)


	@dataclass
	class SubtitleEntry:
	start_timestamp: str
	stop_timestamp: str
	subtitle: str


	def subtitles_to_srt(subtitles: List[SubtitleEntry]) -> str:
	srt = ""
	for i, subtitle in enumerate(subtitles):
	if i > 0:
	srt += "\n"
	seq_number = i + 1
	srt += f"{seq_number}\n"
	srt += f"{subtitle.start_timestamp} --> {subtitle.stop_timestamp}\n"
	srt += f"{subtitle.subtitle}\n"
	return srt


	def seconds_to_timestamp(total_seconds: float) -> str:
	"""Convert seconds into a timestamp

	Args:
	total_seconds: time in seconds

	Returns:
	timestamp representing ``total_seconds``

	Examples:
	>>> seconds_to_timestamp(1)
	'00:00:01.000'
	>>> seconds_to_timestamp(1.1)
	'00:00:01.100'
	>>> seconds_to_timestamp(60)
	'00:01:00.000'
	>>> seconds_to_timestamp(61)
	'00:01:01.000'
	>>> seconds_to_timestamp(60 * 60 + 1)
	'01:00:01.000'
	>>> seconds_to_timestamp(60 * 60 + 60 + 1)
	'01:01:01.000'
	>>> seconds_to_timestamp(1225.78500002)
	'00:20:25.785'
	"""
	ss = total_seconds % 60
	mm = np.floor((total_seconds / 60) % 60)
	hh = np.floor((total_seconds / (60 * 60)))
	return "{:02.0f}:{:02.0f}:{:06.3f}".format(hh, mm, ss)


	def timestamp_to_seconds(timestamp: str) -> float:
	"""Convert a timestamp into total number of seconds

	Args:
	timestamp: formatted as ``HH:MM:SS[.FractionalPart]``

	Returns:
	``timestamp`` converted to seconds

	Examples:
	>>> timestamp_to_seconds("00:00:00")
	0.0
	>>> timestamp_to_seconds("00:00:05")
	5.0
	>>> timestamp_to_seconds("00:00:05.5")
	5.5
	>>> timestamp_to_seconds("00:01:05.5")
	65.5
	>>> timestamp_to_seconds("01:01:05.5")
	3665.5
	"""
	_MINUTES_TO_SECONDS = 60
	_HOURS_TO_SECONDS = 60 * _MINUTES_TO_SECONDS

	hours, minutes, seconds = map(float, timestamp.split(":"))
	total_seconds = hours * _HOURS_TO_SECONDS + minutes * _MINUTES_TO_SECONDS + seconds
	return total_seconds


	def to_subtitles(df):
	return [
	SubtitleEntry(
	start_timestamp=seconds_to_timestamp(timestamp_to_seconds(r.start_time)),
	stop_timestamp=seconds_to_timestamp(timestamp_to_seconds(r.stop_time)),
	subtitle=r.thread,
	)
	for _, r in df.iterrows()
	]


	if __name__ == "__main__":
	main(parser.parse_args())