Created
May 24, 2016 18:40
-
-
Save Vesihiisi/92ef5d796ab052d876da29d42e89ad9d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Little reference to API | |
# -------------------------------- | |
# Use http://api.arbetsformedlingen.se/platsannons/soklista/yrkesomraden | |
# to fetch a list of all available profession areas. | |
# The one that best fits our needs is area 3, "data/IT" | |
# | |
# http://api.arbetsformedlingen.se/platsannons/soklista/yrkesgrupper?yrkesomradeid=3 | |
# This will fetch number of available jobs for each profession group within this | |
# profession area | |
# | |
# http://api.arbetsformedlingen.se/platsannons/soklista/yrken?yrkesgruppid=3515 | |
# This will fetch number of available jobs for each profession within this | |
# profession group. | |
# In order to minimize how many times we call on the API, we start off | |
# by fetching all the ads within relevant profession area. | |
# Max limit of rows is 10 000, and since the number of results doesn't seem to be larger | |
# than 3000, that's not a problem. | |
# | |
echo "Will now fetch list of all relevant ads. This may take some time." | |
curl \ | |
--silent \ | |
--header "Accept: application/json" \ | |
--header "Accept-Language: sv" \ | |
--header "From: alkw15@student.bth.se" \ | |
"http://api.arbetsformedlingen.se/platsannons/matchning?yrkesomradeid=3&antalrader=10000" | jq '.' > "ads_list.json" | |
echo "List of all relevant ads fetched." | |
echo -n "Number of ads: " | |
jq '.matchningslista.antal_platsannonser' ads_list.json | |
echo "Will now download all these ads. This may take some time." | |
# Working list of ids, -r to avoid quotation marks | |
# This will be used to fetch all the actual ads | |
# | |
jq -r '.matchningslista.matchningdata[].annonsid' ads_list.json > "all_ids.txt" | |
# Remove all existing ads | |
rm -r ads/* | |
cat "all_ids.txt" | while read ad_id | |
do | |
# For whatever reason, some requests take a lot of time... | |
# As of now, we watch the output manually and restart script | |
# if it seems to take too much time. | |
# Setting --max-time doesn't make sense since we then miss some files. | |
# Remove --silent to have something to look at while querying. | |
nameoffile="ads/$ad_id.json" | |
echo "Downloading and saving ad $ad_id as file $nameoffile" | |
curl \ | |
--header "Accept: application/json" \ | |
--header "Accept-Language: sv" \ | |
--header "From: alkw15@student.bth.se" \ | |
--silent \ | |
"http://api.arbetsformedlingen.se/platsannons/$ad_id" | jq '.' > $nameoffile | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment