jryebread/ugc_generator.py

## ugc_generator.py
import requests
from youtube_comment_downloader import *
PROMPT_CLAUDE = "you are a writer at a product review blog. You are tasked with generating an article (WRITTEN IN VALID MARKDOWN FORMAT WITH QUOTES FROM THE ACTUAL COMMENTERS IN CASES WHERE YOU THINK APPROPRIATE) that encapsulates the essence of the topic discussed in the provided comments under a post, along with incorporating the sentiments and opinions expressed in the comments section. The article should begin with an introduction that provides a concise overview of the video topic, followed by a detailed analysis of the key points discussed therein. Ensure that the article reflects a balanced representation of the various perspectives expressed in the comments, highlighting both positive and negative sentiments where applicable. Additionally, the article should delve into any recurring themes or noteworthy insights conveyed by the commenters. Please maintain a coherent narrative throughout the article and aim for a length that sufficiently covers the breadth of the discussion while remaining engaging to readers. Aim for atleast 4000 words."

#TODO: Implement your LLM call, I decided to use Claude since the writing is good, but you can use local OLLAMA model to make it free!
def query_claude2(prompt, max_tokens=50000):
  pass

def get_video_title_from_url(video_url):
    api_url = f"https://noembed.com/embed?dataType=json&url={video_url}"
    response = requests.get(api_url)
    if response.status_code == 200:
        data = response.json()
        return data.get('title', 'Title not found')
    else:
        return 'Error fetching title'

def process_youtube_videos(video_file):
    # Load YouTube video URLs from text file
    with open(video_file, 'r') as f:
        video_urls = f.readlines()

    for video_url in video_urls:
        # Get comments from the video
        downloader = YoutubeCommentDownloader()
        comments = downloader.get_comments_from_url(video_url, sort_by=SORT_BY_POPULAR)
        comments_string = ""
        for comment in comments:
            text = comment.get('text', '')  # Accessing the 'text' key of the comment
            print(text)
            comments_string += text + ","  # Append each comment to the comments string
        video_title = get_video_title_from_url(video_url)

        # Prompt GPT with video topic and comments
        print(video_title)
        prompt = f"INSTRUCTIONS: {PROMPT_CLAUDE} Topic: {video_title}. Comments: {comments_string}"
        article = query_claude2(prompt)

        # Save the article to a Markdown file
        with open(f"article_{video_title}.mdx", "w") as f:
            f.write(article)

process_youtube_videos("video_urls.txt")
	import requests
	from youtube_comment_downloader import *
	PROMPT_CLAUDE = "you are a writer at a product review blog. You are tasked with generating an article (WRITTEN IN VALID MARKDOWN FORMAT WITH QUOTES FROM THE ACTUAL COMMENTERS IN CASES WHERE YOU THINK APPROPRIATE) that encapsulates the essence of the topic discussed in the provided comments under a post, along with incorporating the sentiments and opinions expressed in the comments section. The article should begin with an introduction that provides a concise overview of the video topic, followed by a detailed analysis of the key points discussed therein. Ensure that the article reflects a balanced representation of the various perspectives expressed in the comments, highlighting both positive and negative sentiments where applicable. Additionally, the article should delve into any recurring themes or noteworthy insights conveyed by the commenters. Please maintain a coherent narrative throughout the article and aim for a length that sufficiently covers the breadth of the discussion while remaining engaging to readers. Aim for atleast 4000 words."

	#TODO: Implement your LLM call, I decided to use Claude since the writing is good, but you can use local OLLAMA model to make it free!
	def query_claude2(prompt, max_tokens=50000):
	pass

	def get_video_title_from_url(video_url):
	api_url = f"https://noembed.com/embed?dataType=json&url={video_url}"
	response = requests.get(api_url)
	if response.status_code == 200:
	data = response.json()
	return data.get('title', 'Title not found')
	else:
	return 'Error fetching title'

	def process_youtube_videos(video_file):
	# Load YouTube video URLs from text file
	with open(video_file, 'r') as f:
	video_urls = f.readlines()

	for video_url in video_urls:
	# Get comments from the video
	downloader = YoutubeCommentDownloader()
	comments = downloader.get_comments_from_url(video_url, sort_by=SORT_BY_POPULAR)
	comments_string = ""
	for comment in comments:
	text = comment.get('text', '') # Accessing the 'text' key of the comment
	print(text)
	comments_string += text + "," # Append each comment to the comments string
	video_title = get_video_title_from_url(video_url)

	# Prompt GPT with video topic and comments
	print(video_title)
	prompt = f"INSTRUCTIONS: {PROMPT_CLAUDE} Topic: {video_title}. Comments: {comments_string}"
	article = query_claude2(prompt)

	# Save the article to a Markdown file
	with open(f"article_{video_title}.mdx", "w") as f:
	f.write(article)

	process_youtube_videos("video_urls.txt")