Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/bin/sh
# Pull manifest of overview metadata and output summary info.
TMPFILE=`mktemp /tmp/data_kcmo_org.data.XXXX.json`
curl http://data.kcmo.org/data.json | python -mjson.tool > $TMPFILE
date
echo -n 'Total entries: '
grep -c "\"identifier\":" $TMPFILE
echo -n 'Number of uploaders: '
grep -c "\"mbox\":" $TMPFILE
echo 'License info:'
grep "\"license\":" $TMPFILE | awk -F\" '{print $4}' | sort | uniq -c
echo 'Number of updates over the last 5 days:'
grep "\"modified\":" $TMPFILE | awk -F\" '{print $4}' | sort | uniq -c | tail -5
echo 'Top 5 days with the largest number of updates:'
grep "\"modified\":" $TMPFILE | awk -F\" '{print $4}' | sort | uniq -c | sort -rn | head -5
echo 'Top 5 datasets with duplicate titles:'
grep "\"title\"" $TMPFILE | awk -F\" '{print $4}' | sort | uniq -c | sort -rn | awk '$1>1' | head -5
@christianchristensen

This comment has been minimized.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment