Skip to content

Instantly share code, notes, and snippets.

@jswrenn
Created February 9, 2021 01:05
Show Gist options
  • Save jswrenn/171057e8841cd18129cfba31f8e8bf2c to your computer and use it in GitHub Desktop.
Save jswrenn/171057e8841cd18129cfba31f8e8bf2c to your computer and use it in GitHub Desktop.
#!/bin/bash
. `which env_parallel.bash`
env_parallel --session
function enrolled_total() {
jq -rc '"<parent>" + .regdemog_html + "</parent>"' \
| ( xidel - -s --xpath='one-or-more(//p[@class="enroll_demog"]/text())' 2>/dev/null \
|| echo "null" ) \
| cut -d' ' -f3
}
function enrolled_max() {
jq -rc '"<parent>" + .seats + "</parent>"' \
| xidel - -s --xpath='one-or-more(//span[@class="seats_max"]/text())' 2>/dev/null \
|| echo "null"
}
function enrolled_demog() {
jq -c 'if (.regdemog_json | length) > 0 then (.regdemog_json | fromjson) else null end'
}
function sections(){
jq -rc '.all_sections' \
| xidel -s - -e '//a/@data-key' \
| xargs -I{} printf '{"%s}\n' {} \
| sed -e 's/\:/": /g' \
| jq -c --slurp '.'
}
function books() {
jq -rc '.books_html' | xidel -s - -e '//tr/[
(th[1]/text()[last()], null)[1],
(td[1]/text()[last()], null)[1],
(td[2]/text()[last()], null)[1],
(td[3]/text()[last()], null)[1],
(td[4]/text()[last()], null)[1]]' \
| head -n -1 | tail -n +2 \
| jq -rc --slurp 'reduce .[] as $row ({"groups": {}, last: null};
if ($row[0] != null) then
.last |= $row[0] | .groups[.last] |= []
else
.groups[.last] += [{
title: $row[1],
author: $row[2],
isbn: $row[3],
cost: (if ($row[4] == null) then null else (($row[4] | sub("\\$" ; ""; "gs") | split(" - "))[0] | tonumber) end),
}]
end) | .groups'
}
function instrs() {
jq -rc '.instructordetail_html' \
| xidel -s - --xpath='//div[@class="instructor"]/[
descendant::a[@data-id]/@data-id,
descendant::a[@data-id]/text(),
descendant::a[starts-with(@href,"mailto:")]/text()]' \
| jq -c 'select(length > 0) | {id: .[0], name: .[1], email: .[2]}' \
| jq -c --slurp '.'
}
function stats() {
# adjust as needed
FILE="$1"
CONTENTS="$(cat "$FILE")"
echo $CONTENTS | jq '.crn as $CourseCRN |
{ key: .key | tonumber,
db: .srcdb | tonumber,
crn: .crn | tonumber,
title: .title,
department: .code | split(" ")[0],
code: .code | split(" ")[1],
section: .section,
cancelled: (.stat == "C"),
enrollment: {
actual: $enrolled_total,
maximum: $enrolled_max,
demographics: $enrolled_demog,
},
instructors: $instructors,
other_sections: ([$sections | .[] | select(.crn == $CourseCRN)]),
books: $books
}' "$FILE" \
--argjson enrolled_total "$(echo $CONTENTS | enrolled_total "$FILE")" \
--argjson enrolled_max "null" \
--argjson enrolled_demog "null" \
--argjson instructors "$(echo $CONTENTS | instrs "$FILE")" \
--argjson sections "[]" \
--argjson books "$(echo $CONTENTS | books "$FILE")"
#--argjson enrolled_total null \ # "$(echo $CONTENTS | enrolled_total "$FILE")" \
#--argjson enrolled_max "$(echo $CONTENTS | enrolled_max "$FILE")" \
#--argjson enrolled_demog "$(echo $CONTENTS | enrolled_demog "$FILE")" \
#--argjson instructors "$(echo $CONTENTS | instrs "$FILE")" \
#--argjson sections "$(echo $CONTENTS | sections "$FILE")" \
#--argjson books "$(echo $CONTENTS | books "$FILE")"
}
function susp(){
stats $1
}
find db/202010/ -iname '*.json' | env_parallel --progress -j8 "susp {}" > summary-202010.json
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment