EnkrateiaLucca/fetch_paper.py

## fetch_paper.py
# 1. Fetch a random paper from arxiv in the fields of: machine learning, AI, nlp, computer vision etc.

import arxiv
import random
import time
import requests


def fetch_paper(title):
    """
    Fetches a paper from arXiv based on the given title and downloads it.

    Args:
        title (str): The title of the paper to fetch.

    Returns:
        A string containing the name of the downloaded paper.
    """
    search = arxiv.Search(
        query=title,
        max_results=1,
        sort_by=arxiv.SortCriterion.SubmittedDate,
        sort_order=arxiv.SortOrder.Descending
    )
    paper = list(search.results())[0]

    response = requests.get(paper.pdf_url)
    with open("paper.pdf", 'wb') as f:
        f.write(response.content)

    return f'Downloaded paper: {paper.title}'
# Use the function
random_paper = fetch_paper()
print(f"Title: {random_paper['title']}")
print(f"Authors: {', '.join(random_paper['authors'])}")
print(f"Abstract: {random_paper['abstract']}")
print(f"URL: {random_paper['url']}")

# Select a random page from that paper
# from langchain.document_loaders import PyPDFLoader
# loader = PyPDFLoader("./pdfs/2305.11165v1.pdf")
# pages = loader.load_and_split()
# page = random.choice(pages)
# page.page_content
	# 1. Fetch a random paper from arxiv in the fields of: machine learning, AI, nlp, computer vision etc.

	import arxiv
	import random
	import time
	import requests


	def fetch_paper(title):
	"""
	Fetches a paper from arXiv based on the given title and downloads it.

	Args:
	title (str): The title of the paper to fetch.

	Returns:
	A string containing the name of the downloaded paper.
	"""
	search = arxiv.Search(
	query=title,
	max_results=1,
	sort_by=arxiv.SortCriterion.SubmittedDate,
	sort_order=arxiv.SortOrder.Descending
	)
	paper = list(search.results())[0]

	response = requests.get(paper.pdf_url)
	with open("paper.pdf", 'wb') as f:
	f.write(response.content)

	return f'Downloaded paper: {paper.title}'
	# Use the function
	random_paper = fetch_paper()
	print(f"Title: {random_paper['title']}")
	print(f"Authors: {', '.join(random_paper['authors'])}")
	print(f"Abstract: {random_paper['abstract']}")
	print(f"URL: {random_paper['url']}")

	# Select a random page from that paper
	# from langchain.document_loaders import PyPDFLoader
	# loader = PyPDFLoader("./pdfs/2305.11165v1.pdf")
	# pages = loader.load_and_split()
	# page = random.choice(pages)
	# page.page_content