Last active
July 6, 2017 08:55
-
-
Save s-leroux/9bf1a19f8461bcced8e2d3ce2c2403a5 to your computer and use it in GitHub Desktop.
Open source observatory data extraction and visualization.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for file in "$@" | |
do | |
tidy -q -numeric -asxhtml \ | |
--show-warnings no \ | |
"${file}" | \ | |
xmlstarlet sel -T \ | |
-t \ | |
-m "//_:a[text()='eGovernment']" \ | |
-m "//*[contains(@class,'coverage-description')]/_:a" \ | |
-o "$(basename "${file}")"'|' \ | |
-v "substring-after(//*[contains(@class,'field-submitted')],'on ')" \ | |
-o '|' \ | |
-v "text()" \ | |
-n #2>/dev/null | |
done | \ | |
sed -e 's/European Union/Europe/g' -e 's/EU Institutions/Europe/g'| \ | |
sed -e '/Europe/d' -e '/America/d' -e '/Asia/d' -e '/Ukraine/d' -e '/Other/d' -e '/Canada/d' -e '/Africa/d' -e '/India/d' -e '/Greenland/d' -e '/Fyrom/d' -e '/Australia/d' -e '/United States/d' -e '/Oceania/d' | \ | |
awk ' | |
BEGIN { | |
C=1; FS=OFS="|"; | |
A["Germany"]=C++; | |
A["France"]=C++; | |
A["United Kingdom"]=C++; | |
A["Italy"]=C++; | |
A["Spain"]=C++; | |
A["Poland"]=C++; | |
A["Romania"]=C++; | |
A["Netherlands"]=C++; | |
A["Belgium"]=C++; | |
A["Greece"]=C++; | |
A["Czech Republic"]=C++; | |
A["Portugal"]=C++; | |
A["Sweden"]=C++; | |
A["Hungary"]=C++; | |
A["Austria"]=C++; | |
A["Switzerland"]=C++; | |
A["Bulgaria"]=C++; | |
A["Denmark"]=C++; | |
A["Finland"]=C++; | |
A["Slovakia"]=C++; | |
A["Norway"]=C++; | |
A["Ireland"]=C++; | |
A["Croatia"]=C++; | |
A["Lithuania"]=C++; | |
A["Slovenia"]=C++; | |
A["Albania"]=C++; | |
A["Latvia"]=C++; | |
A["Estonia"]=C++; | |
# A["Cyprus"]=C++; | |
A["Luxembourg"]=C++; | |
A["Malta"]=C++; | |
A["Vatican City"]=C++; | |
} | |
{ | |
id = A[$3]; | |
if (!id) { | |
id = A[$3] = C++; | |
} | |
print $0,id; | |
} | |
' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
gnuplot --persist << EOF | |
set style fill transparent solid 0.15 noborder | |
set datafile separator "|" | |
set xdata time | |
set timefmt "%Y" | |
set xrange ["2015":"2017"] | |
set xtics ("2015", "2016", "2017") | |
set format x "%Y" | |
set timefmt "%B %d, %Y" | |
set offset graph 0.05, graph 0.05, 0.5, 0.5 | |
set key outside center top | |
plot '$1' using 2:4:(3e6):ytic(3) title "News publications on OSOR" with circles | |
EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment