Skip to content

Instantly share code, notes, and snippets.

@corneliusroemer
Created September 26, 2023 02:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save corneliusroemer/0a8fd4417ca61c479907b4601d550069 to your computer and use it in GitHub Desktop.
Save corneliusroemer/0a8fd4417ca61c479907b4601d550069 to your computer and use it in GitHub Desktop.
Automatically generate faculty profiles using pubmed abstracts and GPT-4, filtering papers for relevance based on author position
import argparse
from math import e
from Bio import Entrez
import openai
def search_latest_papers(faculty_name, source="Pubmed", num_papers=5):
papers_info = []
if source == "Pubmed":
Entrez.email = "YOUR_EMAIL@example.com"
handle = Entrez.esearch(
db="pubmed",
term=f"{faculty_name}[Author]",
retmax=num_papers,
sort="relevance",
retmode="xml",
)
results = Entrez.read(handle)
handle.close()
for paper_id in results["IdList"]:
handle = Entrez.efetch(db="pubmed", id=paper_id, retmode="xml")
papers = Entrez.read(handle)
handle.close()
try:
abstract = papers["PubmedArticle"][0]["MedlineCitation"]["Article"]["Abstract"]["AbstractText"][0]
authors = papers["PubmedArticle"][0]["MedlineCitation"]["Article"]["AuthorList"]
author_names = [f"{author['LastName']}" for author in authors]
papers_info.append({'abstract': abstract, 'authors': author_names})
except (IndexError, KeyError):
pass
return papers_info
def generate_summary(abstracts, name, api_key, model):
"""
Use the OpenAI API to generate a summary for the given abstract.
:param abstract: Abstract of the paper.
:return: Summarized abstract.
"""
openai.api_key = api_key
# completion = openai.ChatCompletion.create(
if model == "gpt-3.5-turbo-instruct":
completion = openai.Completion.create(
prompt=f"You are the head of a university department. Please produce a faculty profile of around 150 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Please start now:",
model="gpt-3.5-turbo-instruct",
max_tokens=500,
)
else:
completion = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{
"role": "system",
"content": "You are the head of a university department. If you don't know something, don't guess. Just say you don't know. You tend to be conservative, you don't exaggerate. You are not a salesman. You write succinctly.",
},
{
"role": "user",
"content": f"Please produce a faculty profile of around 100 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. Just refer to them by name. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Don't go into too much detail as research areas often change. Everyone knows that this is a faculty profile, definitely do no say things like '{name} is a faculty member'. Start with '{name} specializes in' or '{name} is an expert in'.",
},
],
max_tokens=500,
)
return completion
def main():
parser = argparse.ArgumentParser(
description="Fetch the latest papers' abstracts for a faculty from PubMed."
)
parser.add_argument("first", help="First Name of the faculty member.")
parser.add_argument("last", help="Last Name of the faculty member.")
parser.add_argument(
"--source",
choices=["Pubmed"],
default="Pubmed",
help="Source to fetch papers from. Currently supports only 'Pubmed'.",
)
parser.add_argument(
"--num-papers",
type=int,
default=5,
help="Number of papers to retrieve. Default is 5.",
)
parser.add_argument("--api-key", help="OpenAI API key.")
parser.add_argument("--verbose", action="store_true", help="Print verbose output.")
parser.add_argument("--model", help="OpenAI model to use.", default="gpt-3.5-turbo")
parser.add_argument("--relevant-first", help="Number of first author positions that are relevant.", default=2)
parser.add_argument("--relevant-last", help="Number of last author positions that are relevant.", default=2)
args = parser.parse_args()
abstracts = search_latest_papers(f"{args.first} {args.last}", args.source, args.num_papers)
# Filter out abstracts where faculty is not among first 2 or last 2 authors.
relevant_abstracts = []
for abstract in abstracts:
if args.last in abstract['authors'][:args.relevant_first] or args.last in abstract['authors'][-args.relevant_last:]:
relevant_abstracts.append(abstract['abstract'])
if args.verbose:
print(f"Relevant abstract: {abstract['abstract']}\n")
else:
if args.verbose:
print(f"Ignoring abstract: {abstract['abstract']}\n")
# if args.verbose:
# for idx, abstract in enumerate(abstracts, 1):
# print(f"Abstract {idx}: {abstract}\n")
summary = generate_summary(relevant_abstracts, f"{args.first} {args.last}", args.api_key, model=args.model)
# Extract the summary from the completion.
extraction = summary["choices"][0]["message"]["content"]
# Pretty print the summary.
print(extraction)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment