-
-
Save ziedbentahar/9ecc91f8f6c6dbea0cb443b93d54e8e8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { storeTranscript } from "adapters/transcript-repository"; | |
import { YoutubeTranscript } from "youtube-transcript"; | |
export const handler = async (event: { | |
youtubeVideoUrl: string; | |
requestId: string; | |
}) => { | |
const { youtubeVideoUrl, requestId } = event; | |
const transcript = await YoutubeTranscript.fetchTranscript(youtubeVideoUrl); | |
const sentences = Array.from(getSentencesFromYoutubeTranscript(transcript)); | |
await storeTranscript(requestId, sentences.join("\n")); | |
}; | |
function* getSentencesFromYoutubeTranscript(transcript: { text: string }[]) { | |
let currentSentence: string[] = []; | |
let i = 0; | |
do { | |
const { text } = transcript[i]; | |
currentSentence.push(text); | |
if (text.endsWith(".")) { | |
yield currentSentence.join(" ").replaceAll("\n", " "); | |
currentSentence = []; | |
} | |
i++; | |
} while (i < transcript.length); | |
yield currentSentence.join(" ").replaceAll("\n", " "); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment