Skip to content

Instantly share code, notes, and snippets.

@corneliusroemer
Created September 25, 2023 23:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save corneliusroemer/5f294dbe753ef611c9800465b484d357 to your computer and use it in GitHub Desktop.
Save corneliusroemer/5f294dbe753ef611c9800465b484d357 to your computer and use it in GitHub Desktop.
Automatically generate faculty profiles using pubmed abstracts and GPT-4
import argparse
from math import e
from Bio import Entrez
import openai
def search_latest_papers(faculty_name, source="Pubmed", num_papers=5):
abstracts = []
if source == "Pubmed":
Entrez.email = "YOUR_EMAIL@example.com"
handle = Entrez.esearch(
db="pubmed",
term=f"{faculty_name}[Author]",
retmax=num_papers,
sort="relevance",
retmode="xml",
)
results = Entrez.read(handle)
handle.close()
for paper_id in results["IdList"]:
handle = Entrez.efetch(db="pubmed", id=paper_id, retmode="xml")
papers = Entrez.read(handle)
handle.close()
try:
abstract = papers["PubmedArticle"][0]["MedlineCitation"]["Article"][
"Abstract"
]["AbstractText"][0]
abstracts.append(abstract)
except (IndexError, KeyError):
pass
return abstracts
def generate_summary(abstracts, name, api_key):
"""
Use the OpenAI API to generate a summary for the given abstract.
:param abstract: Abstract of the paper.
:return: Summarized abstract.
"""
openai.api_key = api_key
# completion = openai.ChatCompletion.create(
modeltype = "chat"
if modeltype == "completion":
completion = openai.Completion.create(
prompt=f"You are the head of a university department. Please produce a faculty profile of around 150 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Please start now:",
model="gpt-3.5-turbo-instruct",
max_tokens=500,
# prompt=[
# {
# "role": "system",
# "content": "You are the head of a university department",
# },
# {
# "role": "user",
# "content": f"Please produce a faculty profile of around 150 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore.",
# },
# ],
# max_tokens=500,
)
else:
completion = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{
"role": "system",
"content": "You are the head of a university department. If you don't know something, don't guess. Just say you don't know. You tend to be conservative, you don't exaggerate. You are not a salesman. You write succinctly.",
},
{
"role": "user",
"content": f"Please produce a faculty profile of around 100 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. Just refer to them by name. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Don't go into too much detail as research areas often change. Everyone knows that this is a faculty profile, definitely do no say things like '{name} is a faculty member'. Start with '{name} specializes in' or '{name} is an expert in'.",
},
],
max_tokens=500,
)
return completion
def main():
parser = argparse.ArgumentParser(
description="Fetch the latest papers' abstracts for a faculty from PubMed."
)
parser.add_argument("faculty_name", help="Name of the faculty member.")
parser.add_argument(
"--source",
choices=["Pubmed"],
default="Pubmed",
help="Source to fetch papers from. Currently supports only 'Pubmed'.",
)
parser.add_argument(
"--num-papers",
type=int,
default=5,
help="Number of papers to retrieve. Default is 5.",
)
parser.add_argument("--api-key", help="OpenAI API key.")
parser.add_argument("--verbose", action="store_true", help="Print verbose output.")
args = parser.parse_args()
abstracts = search_latest_papers(args.faculty_name, args.source, args.num_papers)
if args.verbose:
for idx, abstract in enumerate(abstracts, 1):
print(f"Abstract {idx}: {abstract}\n")
summary = generate_summary(abstracts, args.faculty_name, args.api_key)
# Extract the summary from the completion.
extraction = summary["choices"][0]["message"]["content"]
# Pretty print the summary.
print(extraction)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment