Filter NJP open content from HathiFiles
# Filter "Hathifiles" (https://www.hathitrust.org/hathifiles) to open content from NJP | |
# Run w/ e.g., `cat hathi_full_20151101.txt | awk -f hathi_to_tsv.awk` | |
BEGIN { | |
FS = "\t"; | |
printf("%s\t%s\t%s\t%s\n", "title", "enum", "URI", "BBID") | |
} | |
($1 ~ /^njp/) && ($2 == "allow") { | |
printf "\"%s\t%s\thttp://hdl.handle.net/2027/%s\t%s\n", $12, $5, $1, $7 | |
} | |
END { } | |
# Hathi file fields: | |
# 1 = Volume identifier | |
# 2 = Access | |
# 3 = Rights | |
# 4 = HathiTrust record number | |
# 5 = Enumeration/Chronology | |
# 6 = Source | |
# 7 = Source institution record number | |
# 8 = OCLC numbers | |
# 9 = ISBNs | |
# 10 = ISSNs | |
# 11 = LCCNs | |
# 12 = Title | |
# 13 = Imprint | |
# 14 = Rights determination reason code | |
# 15 = Date of last update | |
# 16 = Government Document | |
# 17 = Publication Date | |
# 18 = Publication Place | |
# 19 = Language | |
# 20 = Bibliograhic Format |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment