lfoppiano/glutton.py

## glutton.py
import argparse
import os
from pathlib import Path

import requests

# Constants
GLUTTON_URL = "ADD BIBLIO GLUTTON LOOKUP SERVICE"


def main(input_path: Path, output_path: Path, has_header: bool = False):
    # Read input CSV file and process each row
    with open(input_path) as f:
        for doi in f:
            doi = doi.strip("\n")
            if not doi:
                continue

            payload = {'doi': doi, 'postValidate': False}
            response = requests.get(GLUTTON_URL + "/service/lookup", params=payload)

            if response.status_code == 200:
                response_data = response.json()

                # Extract relevant information from response
                oa_link = response_data.get('oaLink', '')
                url = response_data.get('URL', '')

                # Prepare output record
                output_record = {'id': doi, 'oaLink': oa_link, 'url': url}
                output_filename_path = os.path.join(output_path, doi.replace("/", "_")) + ".pdf"

                # Download PDF if available
                if oa_link:
                    download_url = oa_link
                    print(f"Download {doi} from {download_url}")

                    # Download PDF
                    download_pdf(download_url, output_filename_path)
            else:
                print(f"{id}: invalid request")
                print(response.content)


def download_pdf(download_url: str, output_filename: str) -> None:
    """Download PDF file from the given URL and save it."""
    try:
        response = requests.get(download_url, stream=True)
        with open(output_filename, 'wb') as fd:
            for chunk in response.iter_content(chunk_size=128):
                fd.write(chunk)
    except Exception as e:
        print(f"Error downloading {output_filename}: {str(e)}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Download open access PDF using Biblio-glutton with a list of DOIs")

    parser.add_argument("--input",
                        help="Input text file, one DOI per line.",
                        required=True)
    parser.add_argument("--output",
                        help="Output directory",
                        required=True)

    args = parser.parse_args()

    input = args.input
    output = args.output

    input_path = Path(input)
    output_path = Path(output)

    main(input_path, output_path)
	import argparse
	import os
	from pathlib import Path

	import requests

	# Constants
	GLUTTON_URL = "ADD BIBLIO GLUTTON LOOKUP SERVICE"


	def main(input_path: Path, output_path: Path, has_header: bool = False):
	# Read input CSV file and process each row
	with open(input_path) as f:
	for doi in f:
	doi = doi.strip("\n")
	if not doi:
	continue

	payload = {'doi': doi, 'postValidate': False}
	response = requests.get(GLUTTON_URL + "/service/lookup", params=payload)

	if response.status_code == 200:
	response_data = response.json()

	# Extract relevant information from response
	oa_link = response_data.get('oaLink', '')
	url = response_data.get('URL', '')

	# Prepare output record
	output_record = {'id': doi, 'oaLink': oa_link, 'url': url}
	output_filename_path = os.path.join(output_path, doi.replace("/", "_")) + ".pdf"

	# Download PDF if available
	if oa_link:
	download_url = oa_link
	print(f"Download {doi} from {download_url}")

	# Download PDF
	download_pdf(download_url, output_filename_path)
	else:
	print(f"{id}: invalid request")
	print(response.content)


	def download_pdf(download_url: str, output_filename: str) -> None:
	"""Download PDF file from the given URL and save it."""
	try:
	response = requests.get(download_url, stream=True)
	with open(output_filename, 'wb') as fd:
	for chunk in response.iter_content(chunk_size=128):
	fd.write(chunk)
	except Exception as e:
	print(f"Error downloading {output_filename}: {str(e)}")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Download open access PDF using Biblio-glutton with a list of DOIs")

	parser.add_argument("--input",
	help="Input text file, one DOI per line.",
	required=True)
	parser.add_argument("--output",
	help="Output directory",
	required=True)

	args = parser.parse_args()

	input = args.input
	output = args.output

	input_path = Path(input)
	output_path = Path(output)

	main(input_path, output_path)