Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save thekie/1469ebe2622882ae8f78b11c7012946e to your computer and use it in GitHub Desktop.
Save thekie/1469ebe2622882ae8f78b11c7012946e to your computer and use it in GitHub Desktop.
The code I used in this video: https://youtu.be/lNdpu6u9ZYM
import copy
import json
import os
from openai import OpenAI
client = OpenAI(
api_key="<Your api key here>"
)
prompt = """
Transcribe the following audio from videos of the youtube channel "Kie Codes" into English.
"""
metadata = [
{
"url": "https://youtu.be/KdelZvfcPfk?si=IZVJVsinerSj6tN8",
"title": "5 Productivity Tips for Programmers",
},
{
"url": "https://youtu.be/Hlp-9cdImSM?si=HO2B24MerwuS7B0b",
"title": "How to commit better with Git",
},
{
"url": "https://youtu.be/EvpZkdkp-v0?si=4gFG9XB7Th9I42kY",
"title": "How to use GIT HOOKS for better COMMITS (PYTHON CODE INCLUDED)",
},
{
"url": "https://youtu.be/T0qWZ7St_GE?si=yGyhL-9XpqVtrHr3",
"title": "What Programming Language should I learn first 2022?"
},
{
"url": "https://youtu.be/211tiIqZ-58?si=mvKAcpmqVd5juaFX",
"title": "Learn faster to code in a new programming language"
},
{
"url": "https://youtu.be/JF4z2u2ftv0?si=jsflpLbFDCry91nG",
"title": "Learn programming efficiently (5 Tips)"
},
{
"url": "https://youtu.be/zombLkjem00?si=Fe1-2vX0LzQGVNi_",
"title": "5 Things I wish I knew before becoming a software engineer"
},
{
"url": "https://youtu.be/uQj5UNhCPuo?si=VfJYXM7JPgP2zQG6",
"title": "Genetic Algorithms Explained By Example"
},
{
"url": "https://youtu.be/nhT56blfRpE?si=gd9jZQsxTjtnWWVt",
"title": "Genetic Algorithm from Scratch in Python (tutorial with code)"
},
{
"url": "https://youtu.be/aOsET8KapQQ?si=yC9o95_5iapNJxli",
"title": "Genetic Algorithm in Python generates Music (code included)"
},
{
"url": "https://youtu.be/5y7pQaP-5Qw?si=A3b7488gx30mUpQU",
"title": "Why you should use Type Hints in Python - Are type hints worth it?"
},
{
"url": "https://youtu.be/a1NLvZ5rgvQ?si=zJuviwEn9d0MsJVr",
"title": "How to create a pre-commit git hook for your python type checker?",
},
{
"url": "https://youtu.be/yScuF1UgGU0?si=kH0NoV-a69KTeKYx",
"title": "How to use python type hinting?",
},
{
"url": "https://youtu.be/51EoNgwoaTo?si=JgIqDZC2SbHKeHjr",
"title": "Neural Network from Scratch in Python",
},
{
"url": "https://youtu.be/P8Xrj70qtyo?si=D8dsXi-sPU9FNJ72",
"title": "Neural Networks Matrix Math and NumPy"
},
{
"url": "https://youtu.be/pdyyQ-w_x0I?si=BRk6Si0JJZCC1ste",
"title": "When did you learn to program? (Ask Kie #1)",
},
{
"url": "https://youtu.be/GAFh2Z5VtgM?si=Ff4fFfDDpnCoKVvq",
"title": "Coding an NFT crypto collectible in 3 days (DAY 1)",
},
{
"url": "https://youtu.be/75D0JjX7EZg?si=zRE4sPjnJ92OmuOJ",
"title": "How to implement an ERC721 Token and connect it to OpenSea (DAY 2)",
},
{
"url": "https://youtu.be/EnIrWNFwN-U?si=VNoOUYSQgWEbmo3K",
"title": "Creating a dApp and migrating to the Ethereum Mainnet (DAY 3)"
},
{
"url": "https://youtu.be/LW1i-axSoYE?si=N8g0HNtBw-U3PDb5",
"title": "Random NFT pictures in under 100 lines of JavaScript"
}
]
# Transcribe the audio
print("Transcribing audio")
result = ""
result_json = copy.deepcopy(metadata)
for filename in os.listdir("audio"):
name = os.path.splitext(filename)[0]
ext = os.path.splitext(filename)[1]
if ext not in [".mp3", ".mp4"]:
continue
print("Transcribing: " + filename)
if os.path.isfile("transcript/" + name + ".txt"):
print("File already transcribed: " + name + ".txt -> Skipping")
else:
audio_file = open("audio/" + filename, "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text",
prompt=prompt
)
with open(f"transcript/{name}.txt", "w") as f:
f.write(transcript)
f.flush()
transcript = open("transcript/" + name + ".txt", "r").read()
result += f"""
Video Title: {metadata[int(name)-1]["title"]}
Video URL: {metadata[int(name)-1]["url"]}
Transcript: {transcript}
---
"""
result_json[int(name)-1]["transcript"] = transcript
with open(f"final/video-{name}.json", "w") as f:
json.dump(
{
"title": metadata[int(name)-1]["title"],
"url": metadata[int(name)-1]["url"],
"transcript": transcript
},
f,
indent=2
)
f.flush()
# Put all transcripts into one text file with urls and titles
with open("kiecodes-transcripts.txt", "w") as f:
f.write(result)
f.flush()
# Put all transcripts into one json file with urls and titles
with open("kiecodes-videos.json", "w") as f:
json.dump(result_json, f)
f.flush()
# Put all transcripts into one json file with urls and titles (beautified)
with open("kiecodes-videos-beautified.json", "w") as f:
json.dump(result_json, f, indent=2)
f.flush()
# Summarize the transcript
print("Summarizing transcripts")
for filename in os.listdir("transcript"):
name = os.path.splitext(filename)[0]
print(f"Summarizing: {filename}")
if os.path.isfile(f"summary/{filename}"):
print(f"File already summarized: {filename} -> Skipping")
else:
with open(f"transcript/{filename}", "r") as f:
transcript = f.read()
response = client.chat.completions.create(
model="gpt-3.5-turbo-16k",
messages=[
{
"role": "system",
"content": """
You are a youtube script summarizer, which outputs a two sentence summary of a youtube video transcript.
The host in the video is Kie and is male. The channel name is Kie Codes.
The user will only provide you with a transcript of the full video.
You only output the two sentences that summary.
"""
},
{
"role": "user",
"content": transcript
}
]
)
with open(f"summary/{filename}", "w") as f:
f.write(response.choices[0].message.content)
f.flush()
# Create a summary file with all titles, urls and summaries of all videos
result = ""
result_json = copy.deepcopy(metadata)
for summary_filename in os.listdir("summary"):
name = os.path.splitext(summary_filename)[0]
with open(f"summary/{summary_filename}", "r") as f:
summary = f.read()
result += f"""
Video Title: {metadata[int(name)-1]["title"]}
Video URL: {metadata[int(name)-1]["url"]}
Summary: {summary}
---
"""
result_json[int(name)-1]["summary"] = summary
result_json[int(name)-1]["transcript_file"] = f"video-{name}.json"
with open("kiecodes-summary.txt", "w") as f:
f.write(result)
f.flush()
with open("kiecodes-summary-beautified.json", "w") as f:
json.dump(result_json, f, indent=2)
f.flush()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment