Skip to content

Instantly share code, notes, and snippets.

@EnkrateiaLucca
Created May 28, 2023 10:24
Show Gist options
  • Save EnkrateiaLucca/38bd8a7ed55fb028f4282518fe2fd2d7 to your computer and use it in GitHub Desktop.
Save EnkrateiaLucca/38bd8a7ed55fb028f4282518fe2fd2d7 to your computer and use it in GitHub Desktop.
fetch arxiv paper by title
# 1. Fetch a random paper from arxiv in the fields of: machine learning, AI, nlp, computer vision etc.
import arxiv
import random
import time
import requests
def fetch_paper(title):
"""
Fetches a paper from arXiv based on the given title and downloads it.
Args:
title (str): The title of the paper to fetch.
Returns:
A string containing the name of the downloaded paper.
"""
search = arxiv.Search(
query=title,
max_results=1,
sort_by=arxiv.SortCriterion.SubmittedDate,
sort_order=arxiv.SortOrder.Descending
)
paper = list(search.results())[0]
response = requests.get(paper.pdf_url)
with open("paper.pdf", 'wb') as f:
f.write(response.content)
return f'Downloaded paper: {paper.title}'
# Use the function
random_paper = fetch_paper()
print(f"Title: {random_paper['title']}")
print(f"Authors: {', '.join(random_paper['authors'])}")
print(f"Abstract: {random_paper['abstract']}")
print(f"URL: {random_paper['url']}")
# Select a random page from that paper
# from langchain.document_loaders import PyPDFLoader
# loader = PyPDFLoader("./pdfs/2305.11165v1.pdf")
# pages = loader.load_and_split()
# page = random.choice(pages)
# page.page_content
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment