Skip to content

Instantly share code, notes, and snippets.

@franloza
Created June 24, 2022 12:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save franloza/dcaaf90151b7b59898b34b1ddfd11dca to your computer and use it in GitHub Desktop.
Save franloza/dcaaf90151b7b59898b34b1ddfd11dca to your computer and use it in GitHub Desktop.
Bash script to download a dataset in JSON format from coches.net
#!/bin/bash
#------------------------------------------------------------------------------------
# What: Download a dataset in JSON format from coches.net
# Author: Fran Lozano <franloza.com>
# License: MIT License <https://choosealicense.com/licenses/mit/>
# Copyright (c) 2022 Fran Lozano
#------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------
# SCRIPT CONFIGURATION
#------------------------------------------------------------------------------------
SCRIPT_NAME=`basename $0`
VERSION=0.1
# Add your own global variables here
LIMIT=-1
#------------------------------------------------------------------------------------
# UTILITY FUNCTIONS
#------------------------------------------------------------------------------------
function Usage
{
cat << USAGE
usage: $0 [ -l limit]
Download a dataset in JSON format from coches.net
PARAMETERS:
-l limit Limit the number of pages (100 records/page)
USAGE
}
function Query()
{
curl --request POST -s \
--url https://ms-mt--api-web.spain.advgo.net/search \
--header 'Accept: application/json' \
--header 'Accept-Encoding: gzip, deflate, br' \
--header 'Content-Type: application/json;charset=utf-8' \
--header 'X-Schibsted-Tenant: coches' \
--data '{"pagination": {"page": '$1' ,"size": 100},"sort": {"order": "desc", "term": "publishedDate"}}'
}
function ProgressBar {
let _progress=(${1}*100/${2}*100)/100
let _done=(${_progress}*4)/10
let _left=40-$_done
_done=$(printf "%${_done}s")
_left=$(printf "%${_left}s")
printf "\rProgress : [${_done// /#}${_left// /-}] ${_progress}%% (${1}/${2})"
}
function getOptions()
{
while getopts ":l:" OPTION
do
if [ $OPTION == "-" ]; then
OPTION=$OPTARG
fi
case $OPTION in
l) LIMIT=${OPTARG};;
\?) echo "Invalid option" && Usage && exit 1;;
esac
done
}
#,
main()
{
getOptions "$@"
# Download all pages
num_pages=$(Query 1 | zcat - | jq .meta.totalPages)
mkdir -p tmp;
for ((i=1;i<=num_pages;i++)); do
printf -v filename "tmp/data.%04d.json" $i
ProgressBar ${i} ${num_pages}
Query $i | zcat - | jq -cr '.items[]' > $filename;
if [[ "$LIMIT" != -1 ]] && [[ "$i" -ge "$LIMIT" ]]; then
break
fi
sleep 0.3 # Be nice
done;
# Join files and clenup
wait
jq -c . tmp/*.json > data.json
rm -rf tmp
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment