Skip to content

Instantly share code, notes, and snippets.

@s-leroux
Last active July 6, 2017 08:55
Show Gist options
  • Save s-leroux/9bf1a19f8461bcced8e2d3ce2c2403a5 to your computer and use it in GitHub Desktop.
Save s-leroux/9bf1a19f8461bcced8e2d3ce2c2403a5 to your computer and use it in GitHub Desktop.
Open source observatory data extraction and visualization.
for file in "$@"
do
tidy -q -numeric -asxhtml \
--show-warnings no \
"${file}" | \
xmlstarlet sel -T \
-t \
-m "//_:a[text()='eGovernment']" \
-m "//*[contains(@class,'coverage-description')]/_:a" \
-o "$(basename "${file}")"'|' \
-v "substring-after(//*[contains(@class,'field-submitted')],'on ')" \
-o '|' \
-v "text()" \
-n #2>/dev/null
done | \
sed -e 's/European Union/Europe/g' -e 's/EU Institutions/Europe/g'| \
sed -e '/Europe/d' -e '/America/d' -e '/Asia/d' -e '/Ukraine/d' -e '/Other/d' -e '/Canada/d' -e '/Africa/d' -e '/India/d' -e '/Greenland/d' -e '/Fyrom/d' -e '/Australia/d' -e '/United States/d' -e '/Oceania/d' | \
awk '
BEGIN {
C=1; FS=OFS="|";
A["Germany"]=C++;
A["France"]=C++;
A["United Kingdom"]=C++;
A["Italy"]=C++;
A["Spain"]=C++;
A["Poland"]=C++;
A["Romania"]=C++;
A["Netherlands"]=C++;
A["Belgium"]=C++;
A["Greece"]=C++;
A["Czech Republic"]=C++;
A["Portugal"]=C++;
A["Sweden"]=C++;
A["Hungary"]=C++;
A["Austria"]=C++;
A["Switzerland"]=C++;
A["Bulgaria"]=C++;
A["Denmark"]=C++;
A["Finland"]=C++;
A["Slovakia"]=C++;
A["Norway"]=C++;
A["Ireland"]=C++;
A["Croatia"]=C++;
A["Lithuania"]=C++;
A["Slovenia"]=C++;
A["Albania"]=C++;
A["Latvia"]=C++;
A["Estonia"]=C++;
# A["Cyprus"]=C++;
A["Luxembourg"]=C++;
A["Malta"]=C++;
A["Vatican City"]=C++;
}
{
id = A[$3];
if (!id) {
id = A[$3] = C++;
}
print $0,id;
}
'
#!/bin/bash
gnuplot --persist << EOF
set style fill transparent solid 0.15 noborder
set datafile separator "|"
set xdata time
set timefmt "%Y"
set xrange ["2015":"2017"]
set xtics ("2015", "2016", "2017")
set format x "%Y"
set timefmt "%B %d, %Y"
set offset graph 0.05, graph 0.05, 0.5, 0.5
set key outside center top
plot '$1' using 2:4:(3e6):ytic(3) title "News publications on OSOR" with circles
EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment