Last active
July 14, 2021 17:48
-
-
Save rpavlik/f897800c17d14af5530a67d2efa9f1a8 to your computer and use it in GitHub Desktop.
Get the UNC CS dept tech report list and convert for zotero import
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Copyright 2021, Collabora, Ltd. | |
# SPDX-License-Identifier: MIT | |
# | |
# needs bibutils and sed and wget on debian. | |
# Import the resulting .ris file into zotero and enjoy! | |
# References: | |
# https://en.wikipedia.org/wiki/EndNote#Tags_and_fields | |
# https://en.wikipedia.org/wiki/RIS_(file_format)#Tags | |
FILENAME=00-000README-TECHREP | |
if [ ! -f $FILENAME ]; then | |
wget ftp://ftp.cs.unc.edu/pub/publications/techreports/00-000README-TECHREP -O $FILENAME | |
fi | |
# on input side: rewrite URL, make sure the report number makes it thru conversion, | |
# and flag as a "Report" | |
cat $FILENAME \ | |
| sed -r \ | |
-e 's_ftp://ftp.cs.unc.edu/pub/publications_http://www.cs.unc.edu_' \ | |
-e 's/%R (.*)/%F \1\n%@ \1/' \ | |
-e '/^%U/a %0 Report' \ | |
| end2xml \ | |
| xml2ris \ | |
| sed -r \ | |
-e 's/^ID /SN /' \ | |
-e 's_^DA - ([A-Za-z]+) ([0-9]+) ([0-9]+)//_DA - \1 \2, \3_' \ | |
> techrep.ris | |
# On output side: Fix up how report number is shown, and re-format dates. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment