Last active
August 26, 2025 22:34
-
-
Save CodeWithOz/0b01d558adac703673aceca48471dca7 to your computer and use it in GitHub Desktop.
Agent for cleaning up named entities in YouTube video transcripts.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ... | |
| class ReplacementReviewOutcome(BaseModel): | |
| replaced_fully: bool = Field(description="Whether the entity was replaced fully.") | |
| ... | |
| class DemoEnrichmentAgent: | |
| ... | |
| replacement_reviewer_llm: Runnable | |
| def __init__(self): | |
| ... | |
| self.replacement_reviewer_llm = init_chat_model( | |
| "gpt-5-mini-2025-08-07", model_provider="openai" | |
| ).with_structured_output(ReplacementReviewOutcome) | |
| ... | |
| async def replacement_reviewer_node(self, state: AgentState): | |
| verified_entities = state.get("verified_entities", []) | |
| current_idx = state.get("replacement_loop_idx", 0) | |
| pass_count = state.get("replacement_pass_count", 0) | |
| print(f"Current replacement loop index: {current_idx}") | |
| print(f"Current replacement pass count: {pass_count}") | |
| if current_idx == 0 and pass_count == 0: | |
| # no pass made yet on first entity, skip review until pass is made | |
| print("Skipping replacement review for first entity until pass is made") | |
| return {"replacement_loop_idx": 0, "replacement_pass_count": 0} | |
| # Current entity under review | |
| entity = verified_entities[current_idx] | |
| updated_text = state.get("updated_transcript_text") | |
| # Ask LLM to assess if replacement is complete | |
| print(f"Reviewing replacement for entity: {entity.extracted_name}") | |
| outcome: ReplacementReviewOutcome = await self.replacement_reviewer_llm.ainvoke( | |
| [ | |
| SystemMessage( | |
| content=( | |
| "You are reviewing a text replacement operation. " | |
| "Determine if the original entity name has been fully " | |
| "replaced with the canonical name in the provided text. " | |
| "Check for edge cases such as partial matches and " | |
| "special characters. " | |
| "ONLY output JSON with a boolean 'replaced_fully'. " | |
| "For example, `{'replaced_fully': true}` or `{'replaced_fully': false}`." | |
| ) | |
| ), | |
| HumanMessage(content="UPDATED_TRANSCRIPT_TEXT:\n\n" + updated_text), | |
| HumanMessage( | |
| content="ENTITY_ORIGINAL_NAME:\n\n" + entity.extracted_name | |
| ), | |
| HumanMessage( | |
| content="ENTITY_CANONICAL_NAME:\n\n" + entity.canonical_name | |
| ), | |
| ] | |
| ) | |
| print(f"Replacement review outcome: {outcome}") | |
| # Decide whether to advance to next entity | |
| increment = 1 if (outcome.replaced_fully or pass_count >= 2) else 0 | |
| # Reset pass counter when moving to next entity; otherwise keep as-is | |
| next_pass_count = 0 if increment == 1 else pass_count | |
| return { | |
| "replacement_loop_idx": increment, | |
| "replacement_pass_count": next_pass_count, | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment