|
### From the citymeetings.nyc talk at NYC School of Data 2024 |
|
# |
|
# CHAPTER EXTRACTION STEP 3: WRITE TITLES AND DESCRIPTIONS |
|
# -------------------------------------------------------- |
|
# |
|
# I use `instructor` instead of a plaintext prompt. There is no system prompt -- I rely on |
|
# the schema of the models to guide the LLM here. |
|
# |
|
# The docstring comments and descriptions all end up in the LLM system prompt. `instructor` |
|
# does this. |
|
# |
|
# I also use these prompts to edit titles and descriptions in my review UI (which you can |
|
# see in the slides for my talk). |
|
|
|
import os |
|
from typing import Union |
|
|
|
import instructor |
|
from openai import OpenAI |
|
from pydantic import BaseModel, Field |
|
|
|
|
|
class QuestionChapterTitleAndDescription(BaseModel): |
|
""" |
|
I will provide you with a portion of a transcript of a NYC city council meeting, |
|
which encompasses a chapter of the meeting where a council member is asking a |
|
question and receiving an answer. |
|
|
|
I will also provide you the initial title and description of the chapter. |
|
|
|
Your job is to edit the title and description subject to the rules provided. |
|
""" |
|
|
|
edited_title: str = Field( |
|
description=""" |
|
The title of the chapter MUST be phrased as a question, or multiple questions, even if |
|
speakers in the transcript portion are not phrasing their speech as a question. |
|
|
|
For example, the speaker may phrase a question as a statement, like "I would like to know |
|
more about the budget for the new school." You would need to phrase this as a question, |
|
like "What is the budget for the new school?" |
|
|
|
The question/questions in the title must match what is being asked or covered in the |
|
transcript portion provided. |
|
|
|
Do not capitalize the entire question, only the first word and proper nouns. |
|
|
|
If you use acronyms, you must always spell them out. For example: |
|
|
|
- This is bad: "What is the budget for ACE?" |
|
- This is good: "What is the budget for the Accelerated Career Education (ACE) program?" |
|
""" |
|
) |
|
edited_description: str = Field( |
|
description=""" |
|
For the description, first provide a single-sentence summary of the answer given in the transcript. |
|
|
|
Make these sentences concise and to the point. Do not use any filler words, unnecessary adjectives or |
|
adverbs. The language should be plain. |
|
|
|
Then provide 3-5 bullet points that give more details about the answer. These bullet points should also |
|
be concise and to the point, with the same style as the summary. |
|
|
|
Try to make the summary Axios-style. |
|
|
|
Do not editorialize anything. Just state the facts as presented in the transcript. Do not |
|
say things like "The council member gave a great answer." or "This exchange highlights...", etc. |
|
|
|
Use present tense in describing what is happening in the chapter. For example, "The council member |
|
explains that the budget for the new school is $10 million." instead of "The council member explained..." |
|
|
|
Finally, format the description like so: |
|
|
|
``` |
|
{SINGLE_SENTENCE_SUMMARY} |
|
|
|
- {BULLET_POINT_1} |
|
... |
|
- {BULLET_POINT_N} |
|
``` |
|
""" |
|
) |
|
|
|
|
|
class ProcedureChapterTitleAndDescription(BaseModel): |
|
""" |
|
I will provide you with a portion of a transcript of a NYC city council meeting, |
|
which encompasses a procedural segment of the meeting. |
|
|
|
I will also provide you the initial title and description of the chapter. |
|
|
|
Your job is to edit the title and description subject to the rules provided. |
|
""" |
|
|
|
edited_title: str = Field( |
|
description=""" |
|
If the chapter is at the beginning of the transcript and involves council staff asking |
|
for attendees to silence their phones and prepare for the meeting, the title must be "Front Matter". |
|
|
|
Otherwise the title for the chapter must be a plain statement of the procedure that is happening. |
|
""" |
|
) |
|
|
|
edited_description: str = Field( |
|
description=""" |
|
For the description, provide a single-sentence summary of the procedure, and nothing else. |
|
""" |
|
) |
|
|
|
|
|
class TestimonyChapterTitleAndDescription(BaseModel): |
|
""" |
|
I will provide you with a portion of a transcript of a NYC city council meeting, |
|
which encompasses a testimony given by a meeting attendee. |
|
|
|
I will also provide you the initial title and description of the chapter. |
|
|
|
Your job is to edit the title and description subject to the rules provided. |
|
""" |
|
|
|
edited_title: str = Field( |
|
description=""" |
|
The title for the testimony must be "<speaker> on <topic>". |
|
|
|
<speaker> should include all details about the speaker that you have. Some examples: |
|
|
|
- "Manuela Frisas, President of the Workers Unite Project" |
|
- "Manuela Frisas from the Workers Unite Project", if you only have the name and the organization. |
|
- "Manuela Frisas", if you only have the name. |
|
- "Workers Unite Project", if you have the organization but not the name. |
|
|
|
With the topic, get very specific! It is okay if the the topic is long. |
|
|
|
If you can't infer the speaker's name, role, or organization just use "Member of the Public" and make the title as specific as you can. |
|
|
|
If you use acronyms, you must always spell them out. For example: |
|
|
|
- This is bad: "Manuela Frias on ACE" |
|
- This is good: "Manuela Frias on the Accelerated Career Education (ACE) program at the NYC Department of Education." |
|
""" |
|
) |
|
|
|
edited_description: str = Field( |
|
description=""" |
|
For the description, first provide a single-sentence summary of the testimony. |
|
|
|
Make this sentences concise and to the point. Do not use any filler words, unnecessary adjectives or |
|
adverbs. The language should be plain. |
|
|
|
Then provide 3-5 bullet points that give more details about the testimony. These bullet points should also |
|
be concise and to the point, with the same style as the summary. |
|
|
|
Try to make the summary Axios-style. |
|
|
|
Do not editorialize anything. Just state the facts as presented in the transcript. |
|
|
|
Use present tense in describing what is happening in the chapter. For example, "The council member |
|
explains that the budget for the new school is $10 million." instead of "The council member explained..." |
|
|
|
Finally, format the description like so: |
|
|
|
``` |
|
{SINGLE_SENTENCE_SUMMARY} |
|
|
|
- {BULLET_POINT_1} |
|
... |
|
- {BULLET_POINT_N} |
|
``` |
|
""" |
|
) |
|
|
|
|
|
class RemarksChapterTitleAndDescription(BaseModel): |
|
""" |
|
I will provide you with a portion of a transcript of a NYC city council meeting, |
|
which encompasses standalone remarks by a council member. |
|
|
|
I will also provide you the initial title and description of the chapter. |
|
|
|
Your job is to edit the title and description subject to the rules provided. |
|
""" |
|
|
|
edited_title: str = Field( |
|
description=""" |
|
The title of the chapter must be "Council Member {name} on {topic}". |
|
|
|
If you use acronyms, you must always spell them out. For example: |
|
|
|
- This is bad: "Council Member {name} on ACE." |
|
- This is good: "Council Member {name} on the Accelerated Career Education (ACE) program." |
|
""" |
|
) |
|
|
|
edited_description: str = Field( |
|
description=""" |
|
For the description, first provide a single-sentence summary of the council member's remarks. |
|
|
|
Make these sentences concise and to the point. Do not use any filler words, unnecessary adjectives or |
|
adverbs. The language should be plain. |
|
|
|
Then provide 3-5 bullet points that give more details about the remarks. These bullet points should also |
|
be concise and to the point, with the same style as the summary. |
|
|
|
Try to make the summary Axios-style. |
|
|
|
Do not editorialize anything. Just state the facts as presented in the transcript. |
|
|
|
Use present tense in describing what is happening in the chapter. For example, "The council member |
|
explains that the budget for the new school is $10 million." instead of "The council member explained..." |
|
|
|
Finally, format the description like so: |
|
|
|
``` |
|
{SINGLE_SENTENCE_SUMMARY} |
|
|
|
- {BULLET_POINT_1} |
|
... |
|
- {BULLET_POINT_N} |
|
``` |
|
""" |
|
) |
|
|
|
|
|
def edit_title_and_description( |
|
current_title: str, |
|
current_description: str, |
|
transcript_portion: str, |
|
additional_context: str, |
|
chapter_model: Union[ |
|
QuestionChapterTitleAndDescription, |
|
TestimonyChapterTitleAndDescription, |
|
ProcedureChapterTitleAndDescription, |
|
RemarksChapterTitleAndDescription, |
|
], |
|
): |
|
""" |
|
This function takes the current title and description of a chapter, along with a transcript portion and |
|
additional context, and uses instructor to prompt OpenAI to edit the title and description. |
|
|
|
The model returned will be `chapter_model`. |
|
""" |
|
|
|
client = instructor.patch(OpenAI(api_key=os.getenv("OPENAI_API_KEY"))) |
|
return client.chat.completions.create( |
|
model="gpt-4-turbo-preview", |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": transcript_portion, |
|
}, |
|
{ |
|
"role": "user", |
|
"content": f""" |
|
TITLE: {current_title} |
|
DESCRIPTION: {current_description} |
|
|
|
ADDITIONAL CONTEXT: {additional_context if additional_context else "No additional context."} |
|
""", |
|
}, |
|
], |
|
response_model=chapter_model, |
|
max_retries=3, |
|
) |