Skip to content

Instantly share code, notes, and snippets.

@CodeWithOz
Last active August 26, 2025 22:34
Show Gist options
  • Select an option

  • Save CodeWithOz/0b01d558adac703673aceca48471dca7 to your computer and use it in GitHub Desktop.

Select an option

Save CodeWithOz/0b01d558adac703673aceca48471dca7 to your computer and use it in GitHub Desktop.
Agent for cleaning up named entities in YouTube video transcripts.
...
class TavilySearchResult(TypedDict):
title: str
content: str
class BaseVerifiedEntity(BaseModel):
canonical_name: str = Field(description="The canonical name of the entity.")
class VerifiedEntity(BaseVerifiedEntity):
extracted_name: str = Field(description="The name extracted from the video.")
...
class DemoEnrichmentAgent:
...
entity_verifier_llm: Runnable
def __init__(self):
...
self.entity_verifier_llm = init_chat_model(
"gpt-4o-mini", model_provider="openai"
).with_structured_output(BaseVerifiedEntity)
async def get_canonical_name(
self, entity: NamedEntity, search_results: list[TavilySearchResult]
):
if not search_results:
# return the same entity as verified
return VerifiedEntity(
extracted_name=entity.entity_name,
canonical_name=entity.entity_name,
)
# now use an LLM to verify the entity
results_txt = "\n\n".join(
[
f'{result.get("title", "")}\n{result.get("content", "")}'
for result in search_results
]
)
human_message = f"EXTRACTED_ENTITY_NAME:\n{entity.entity_name}\n\nSEARCH_RESULTS_TEXT:\n{results_txt}"
print(
f"Getting canonical name from search results for entity {entity.entity_name!r}"
)
verifier_res: BaseVerifiedEntity = await self.entity_verifier_llm.ainvoke(
[
SystemMessage(
content=(
"You are a perceptive and highly skilled data extractor. "
"You will receive 2 inputs: "
"(1) an entity name that was extracted from a video transcript; "
"(2) a series of newline-separated search results from a web "
"search about that entity. "
"Your task is to extract the canonical name of the entity from "
"the search results. Your output MUST ONLY be a JSON object with ONLY "
"one key `canonical_name`. "
'For example, `{"canonical_name": "<confirmed-canonical-name>"}.`'
)
),
HumanMessage(content=human_message),
]
)
print(
f"LLM verified entity {entity.entity_name!r} as {verifier_res.canonical_name!r}"
)
return VerifiedEntity(
extracted_name=entity.entity_name,
canonical_name=verifier_res.canonical_name,
)
...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment