Skip to content

Instantly share code, notes, and snippets.

@stefanw
Last active December 17, 2015 19:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save stefanw/39375774d1d304353624 to your computer and use it in GitHub Desktop.
Save stefanw/39375774d1d304353624 to your computer and use it in GitHub Desktop.
Documentation for fix-ups of FarmSubsidy data
set -x verbose
wget -O AT.zip "http://data.farmsubsidy.org/web/at20130501.zip"
wget -O BE.zip "http://data.farmsubsidy.org/web/be20130430.zip"
wget -O BG.zip "http://data.farmsubsidy.org/web/bg20130427.zip"
wget -O CY.zip "http://data.farmsubsidy.org/web/cy20130513.zip"
wget -O CZ.zip "http://data.farmsubsidy.org/web/cz20130116.zip"
wget -O DE.zip "http://data.farmsubsidy.org/web/de20130426.zip"
wget -O DK.zip "http://data.farmsubsidy.org/web/dk20130414.zip"
wget -O EE.zip "http://data.farmsubsidy.org/web/ee20130312.zip"
wget -O ES.zip "http://data.farmsubsidy.org/web/es20130501.zip"
wget -O FI.zip "http://data.farmsubsidy.org/web/fi20130312.zip"
wget -O FR.zip "http://data.farmsubsidy.org/web/fr20130506.zip"
wget -O GB.zip "http://data.farmsubsidy.org/web/gb20130512.zip"
wget -O HU.zip "http://data.farmsubsidy.org/web/hu20130122.zip"
wget -O IE.zip "http://data.farmsubsidy.org/web/ie20130430.zip"
wget -O IT.zip "http://data.farmsubsidy.org/web/it20130424.zip"
wget -O LT.zip "http://data.farmsubsidy.org/web/lt20130413.zip"
wget -O LU.zip "http://data.farmsubsidy.org/web/lu20130430.zip"
wget -O LV.zip "http://data.farmsubsidy.org/web/lv20130509.zip"
wget -O MT.zip "http://data.farmsubsidy.org/web/mt20130505.zip"
wget -O NL.zip "http://data.farmsubsidy.org/web/nl20130426.zip"
wget -O PL.zip "http://data.farmsubsidy.org/web/pl20130426.zip"
wget -O PT.zip "http://data.farmsubsidy.org/web/pt20130511.zip"
wget -O RO.zip "http://data.farmsubsidy.org/web/ro20130512.zip"
wget -O SE.zip "http://data.farmsubsidy.org/web/se20130309.zip"
wget -O SI.zip "http://data.farmsubsidy.org/web/si20130511.zip"
wget -O SK.zip "http://data.farmsubsidy.org/web/sk20130320.zip"
unzip -d AT AT.zip
unzip -d BE BE.zip
unzip -d BG BG.zip
unzip -d CY CY.zip
unzip -d CZ CZ.zip
unzip -d DE DE.zip
unzip -d DK DK.zip
unzip -d EE EE.zip
unzip -d ES ES.zip
unzip -d FI FI.zip
unzip -d FR FR.zip
unzip -d GB GB.zip
unzip -d HU HU.zip
unzip -d IE IE.zip
unzip -d IT IT.zip
unzip -d LT LT.zip
unzip -d LU LU.zip
unzip -d LV LV.zip
unzip -d MT MT.zip
unzip -d NL NL.zip
unzip -d PL PL.zip
unzip -d PT PT.zip
unzip -d RO RO.zip
unzip -d SE SE.zip
unzip -d SI SI.zip
unzip -d SK SK.zip
# Apply fixes
mv BE/recipient.txt BE/recipient_old.txt
cat BE/recipient_old.txt | uniq > BE/recipient.txt
cp GB/payments1.txt GB/payment.txt
tail -n +2 GB/payments2.txt >> GB/payment.txt
tail -n +2 GB/payment3.txt >> GB/payment.txt
mv GB/payment.txt GB/payment_bad.txt
sed 's/;"GBGB/;"GB/g' GB/payment_bad.txt > GB/payment.txt
mv GB/schemes.txt GB/scheme_bad.txt
sed 's/^\([^;]*\);;;/\1;;\1;/g' GB/scheme_bad.txt | dos2unix > GB/scheme.txt
printf '"GB200450302032";;"Unknown";;"GB"\n' >> GB/scheme.txt
printf '"GB200450302052";;"Unknown";;"GB"\n' >> GB/scheme.txt
cp DK/payment1.txt DK/payment.txt
tail -n +2 DK/payment2.txt >> DK/payment.txt
cat DK/recipient.txt | dos2unix > DK/recipient_bad.txt
sed 's/;"";""$/;;/g' DK/recipient_bad.txt > DK/recipient.txt
mv DK/scheme.txt DK/scheme_bad.txt
sed 's/^[^;]*;\(.*\)/\1/g' DK/scheme_bad.txt > DK/scheme.txt
mv NL/scheme.txt NL/scheme_bad.txt
sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' NL/scheme_bad.txt > NL/scheme.txt
mv LT/scheme.txt LT/scheme_bad.txt
grep "^\"" LT/scheme_bad.txt > LT/scheme.txt
mv LV/scheme.txt LV/scheme_bad.txt
sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' LV/scheme_bad.txt > LV/scheme.txt
mv ES/scheme.txt ES/scheme_bad.txt
tr -d '\n' < ES/scheme_bad.txt | tr '\r' '\n' | sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' > ES/scheme.txt
cat SE/recipient.txt | dos2unix > SE/recipient_bad.txt
sed 's/;"";""$/;;/g' SE/recipient_bad.txt > SE/recipient.txt
mv SE/scheme.txt SE/scheme_bad.txt
sed 's/^\([^;]*\);;;/\1;;\1;/g' SE/scheme_bad.txt > SE/scheme.txt
mv SI/scheme.txt SI/scheme_bad.txt
sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' SI/scheme_bad.txt > SI/scheme.txt
mv SI/payment.txt SI/payment_bad.txt
# Somehow the country SL (Sierra Leone) appears in there instead of SI
# Fix it by replacing with SI
sed 's/^\([^;]*\);;/\1;"SI\1";/g' SI/payment_bad.txt | sed 's/"SL/"SI/g' > SI/payment.txt
mv SI/recipient.txt SI/recipient_bad.txt
sed 's/"SL/"SI/g' SI/recipient_bad.txt > SI/recipient.txt
mv SK/scheme.txt SK/scheme_bad.txt
sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' SK/scheme_bad.txt > SK/scheme.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment