deeplycloudy/get_dois.sh

## get_dois.sh
# Here’s the URL to a journal’s search results page for papers from the Geostationary Lightning Mapper that
# were published from 2018-2021. There are several pages of results; below is an example of the third and final URL

# Download the webpage. Do this for each page of the search results, changing the URL and output filename:

curl "https://journals.ametsoc.org/search?access_0=all&fromDate=2018&page=3&pageSize=50&q1=geostationary+lightning+mapper&sort=relevance&toDate=2021" > page3dois.txt

# Then concatenate all three files, and save out just the DOIs linked on each page.

cat page[1-3]dois.txt | grep -Eoi '<a [^>]+>' |
grep -Eo 'href="[^\"]+"' | grep -Eo '(http|https)://doi.org/[\.0-9]+/[^/"<]+' | sort | uniq > page_dois.txt

# Finally, get bibtex using the doi.org service

cat page_dois.txt | xargs -n1 curl -LH "Accept:text/bibliography; style=bibtex" >> saved_refs.bib
	# Here’s the URL to a journal’s search results page for papers from the Geostationary Lightning Mapper that
	# were published from 2018-2021. There are several pages of results; below is an example of the third and final URL

	# Download the webpage. Do this for each page of the search results, changing the URL and output filename:

	curl "https://journals.ametsoc.org/search?access_0=all&fromDate=2018&page=3&pageSize=50&q1=geostationary+lightning+mapper&sort=relevance&toDate=2021" > page3dois.txt

	# Then concatenate all three files, and save out just the DOIs linked on each page.

	cat page[1-3]dois.txt \| grep -Eoi '<a [^>]+>' \|
	grep -Eo 'href="[^\"]+"' \| grep -Eo '(http\|https)://doi.org/[\.0-9]+/[^/"<]+' \| sort \| uniq > page_dois.txt

	# Finally, get bibtex using the doi.org service

	cat page_dois.txt \| xargs -n1 curl -LH "Accept:text/bibliography; style=bibtex" >> saved_refs.bib