Skip to content

Instantly share code, notes, and snippets.

@solace
Last active December 27, 2023 17:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save solace/29e9718d405324ef782f5ef38baa1cac to your computer and use it in GitHub Desktop.
Save solace/29e9718d405324ef782f5ef38baa1cac to your computer and use it in GitHub Desktop.
Interactive transcripts with YouTube and Descript. See https://askmeaboutmypodcast.substack.com/p/interactive-transcripts-with-youtube
export default function ShowNotes({ transcript, seekTo }) {
const headings = transcript.filter((entry) => 'heading' in entry && entry.heading);
return (
<ul>
{headings.map((heading, ix) =>
<li
key={`timeline-${ix}`}
role="button"
data-start={heading.start}
onClick={() => seekTo(heading.start)}
>
[{convertSecToTime(heading.start)}] {heading.text}
</li>
)}
</ul>
);
}
export default function Transcript({transcript, currentTime, scrubTo}) {
return <>
{transcript.map((entry, ix) => {
if ('heading' in entry && entry.heading) {
return (<h3 key={`heading-${ix}`}>{entry.text}</h3>);
} else {
return (
<dl key={`paragraph-${ix}`}>
<dt>{'speaker' in entry && entry.speaker}</dt>
<dd>
{'paragraphs' in entry && entry.paragraphs.map(
(paragraph, iy) => (
<p key={`paragraph-${ix}-${iy}`}>
{paragraph.segments.map(
(segment, iz) => (
<span
key={`segment-${ix}-${iy}-${iz}`}
role="button"
data-start={segment.start}
data-end={segment.end}
onClick={() => scrubTo(segment.start)}
>
{segment.text}{' '}
</span>
),
)}
</p>
),
)}
</dd>
</dl>
);
}
})}
</>;
}
// Splits the string by timecodes that look like [00:00:00] and keeps them in the array.
function getParts(str) {
return str.trim().split(/(?=\[[\d:]+?\])|(?<=\[[\d:]+?\])/g);
}
// Converts timecodes that look like 00:00:00 to seconds.
function convertTimeToSec(time: string): number {
let total = 0;
const hms = time.split(':');
const s = hms.pop();
if (s) {
total += Number(s);
}
const m = hms.pop();
if (m) {
total += Number(m) * 60;
}
const h = hms.pop();
if (h) {
total += Number(h) * 60 * 60;
}
return total;
}
export interface Heading {
heading: boolean;
start: number;
text: string;
}
export interface Segment {
start: number;
end?: number;
text: string;
}
export interface Paragraph {
segments: Segment[];
}
export interface Speaker {
speaker: string;
paragraphs: Paragraph[]
}
export type Transcript = (Heading | Speaker)[];
// Returns an array containing an array of Heading or Speaker objects.
// Speaker objects contain paragraphs split into timed segments.
// See typescript definitions above.
export const parseTranscript = (markdown) => {
const transcript: Transcript = [];
// Splits markdown into sections of heading (optional) + contents
const sections = [...markdown.matchAll(/(^#+ (?<heading>[^#]+?\n))?(?<content>[^#]+)/gm)];
let lastSegment;
sections.map(({groups: { heading, content }}, si) => {
if (heading) {
const parts = getParts(heading);
const start = convertTimeToSec(parts[0].replace(/[\[\]]/g, ''));
transcript.push({
heading: true,
start,
text: decode(parts[1])
});
}
if (content) {
const paragraphs = content.split(/\r?\n/);
paragraphs.forEach((p, pi) => {
const parts = getParts(p);
// Skip empty lines
if (parts.length === 1 && parts[0] === '') {
return;
}
const speaker = {speaker: null, paragraphs: [{segments: []}]};
parts.forEach((part, ix) => {
if (ix % 2 === 1 && part.trim().length > 0) {
const start = convertTimeToSec(parts[ix - 1].replace(/[\[\]]/g, ''));
const end = parts[ix + 1] ? convertTimeToSec(parts[ix + 1].replace(/[\[\]]/g, '')) : null;
const { groups: {name, speech} } = part.match(/^\s*(\*\*(?<name>[\w\s]+):\*\* )?(?<speech>.*)$/);
if (name) {
speaker.speaker = name;
}
const segment = {
start,
end,
text: decode(speech),
};
// Last segment for this paragraph, save for later.
if (end === null) {
lastSegment = segment;
// Ensure end time is set on last segments of paragraphs.
} else if (lastSegment) {
lastSegment.end = start > lastSegment.start ? start : end;
lastSegment = undefined;
}
speaker.paragraphs[speaker.paragraphs.length - 1].segments.push(segment);
}
});
if (transcript.length > 0 && 'speaker' in transcript[transcript.length - 1] && (speaker.speaker === null || speaker.speaker === (transcript[transcript.length - 1] as Speaker).speaker)) {
(transcript[transcript.length - 1] as Speaker).paragraphs.push(...speaker.paragraphs);
} else {
transcript.push(speaker);
}
});
}
});
return transcript;
};
import React, { useCallback, useRef, useState } from 'react';
import YouTube from 'react-youtube';
import ShowNotes from '@/components/ShowNotes';
import InteractiveTranscript from '@/components/Transcript';
export function YouTubeInteractive({ youtubeVideoId, transcript }) {
const [currentTime, setCurrentTime] = useState(0);
const interval = useRef();
const player = useRef();
const onPlayerStateChange = useCallback((ev) => {
if (ev.data == YouTube.PlayerState.PLAYING) {
interval.current = setInterval(function () {
player.current && setCurrentTime(player.current.getCurrentTime());
}, 500);
} else {
clearInterval(interval.current);
}
}, []);
const onReady = useCallback((event) => {
player.current = event.target;
}, []);
const seekTo = useCallback((start) => {
if (player.current) {
player.current.seekTo(start);
if (player.current.getPlayerState() !== YouTube.PlayerState.PLAYING) {
player.current.playVideo();
}
}
}, []);
return (
<section className="mb-8">
<YouTube
videoId={youtubeVideoId}
onReady={onReady}
onStateChange={onPlayerStateChange}
/>
<ShowNotes transcript={transcript} seekTo={seekTo}/>
<InteractiveTranscript transcript={transcript} seekTo={seekTo} currentTime={currentTime}/>
</section>
);
}
export default YouTubeInteractive;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment