Skip to content

Instantly share code, notes, and snippets.

@s-leroux

s-leroux/extract

Last active Jul 6, 2017
Embed
What would you like to do?
Open source observatory data extraction and visualization.
for file in "$@"
do
tidy -q -numeric -asxhtml \
--show-warnings no \
"${file}" | \
xmlstarlet sel -T \
-t \
-m "//_:a[text()='eGovernment']" \
-m "//*[contains(@class,'coverage-description')]/_:a" \
-o "$(basename "${file}")"'|' \
-v "substring-after(//*[contains(@class,'field-submitted')],'on ')" \
-o '|' \
-v "text()" \
-n #2>/dev/null
done | \
sed -e 's/European Union/Europe/g' -e 's/EU Institutions/Europe/g'| \
sed -e '/Europe/d' -e '/America/d' -e '/Asia/d' -e '/Ukraine/d' -e '/Other/d' -e '/Canada/d' -e '/Africa/d' -e '/India/d' -e '/Greenland/d' -e '/Fyrom/d' -e '/Australia/d' -e '/United States/d' -e '/Oceania/d' | \
awk '
BEGIN {
C=1; FS=OFS="|";
A["Germany"]=C++;
A["France"]=C++;
A["United Kingdom"]=C++;
A["Italy"]=C++;
A["Spain"]=C++;
A["Poland"]=C++;
A["Romania"]=C++;
A["Netherlands"]=C++;
A["Belgium"]=C++;
A["Greece"]=C++;
A["Czech Republic"]=C++;
A["Portugal"]=C++;
A["Sweden"]=C++;
A["Hungary"]=C++;
A["Austria"]=C++;
A["Switzerland"]=C++;
A["Bulgaria"]=C++;
A["Denmark"]=C++;
A["Finland"]=C++;
A["Slovakia"]=C++;
A["Norway"]=C++;
A["Ireland"]=C++;
A["Croatia"]=C++;
A["Lithuania"]=C++;
A["Slovenia"]=C++;
A["Albania"]=C++;
A["Latvia"]=C++;
A["Estonia"]=C++;
# A["Cyprus"]=C++;
A["Luxembourg"]=C++;
A["Malta"]=C++;
A["Vatican City"]=C++;
}
{
id = A[$3];
if (!id) {
id = A[$3] = C++;
}
print $0,id;
}
'
#!/bin/bash
gnuplot --persist << EOF
set style fill transparent solid 0.15 noborder
set datafile separator "|"
set xdata time
set timefmt "%Y"
set xrange ["2015":"2017"]
set xtics ("2015", "2016", "2017")
set format x "%Y"
set timefmt "%B %d, %Y"
set offset graph 0.05, graph 0.05, 0.5, 0.5
set key outside center top
plot '$1' using 2:4:(3e6):ytic(3) title "News publications on OSOR" with circles
EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.