-
-
Save nichtich/2917572 to your computer and use it in GitHub Desktop.
script which generates the files under http://thedatahub.org/dataset/isil-dbpedia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# version 20120611 | |
# author: dr0ide | |
# Copyright 2011 dr0ide. The program is distributed under the terms of | |
# the GNU General Public License, see https://www.gnu.org/licenses/gpl-3.0.html | |
# | |
#TODO: | |
# 1. if http://de.dbpedia.org/ontology/wikiPageRedirects exists then fetch it. | |
# because only there resides the information if it is "wikiPageDisambiguates" , | |
# e.g. http://de.dbpedia.org/resource/Bücherei , respectively said property is NOT | |
# there like in http://de.dbpedia.org/resource/Bibliothek_der_Hansestadt_Lübeck | |
# The momentarily workaround igrnores every resource with an "wikiPageRedirects" | |
# 2. http://de.dbpedia.org/ontology/wikiPageDisambiguates to be ignored, see | |
# DE-MUS-181411 to http://de.dbpedia.org/resource/Walhalla | |
# configure international DBpedia | |
#EP_dbpedia=http://live.dbpedia.org/sparql | |
#DOMAIN=dbpedia.org | |
# configure German DBpedia | |
EP_dbpedia=http://de.dbpedia.org/sparql | |
DOMAIN=de.dbpedia.org | |
# change domain and TIMESTAMP manually | |
PREFIX="#FORMAT: BEACON | |
#PREFIX: http://lobid.org/organisation/ | |
#TARGET: http://de.dbpedia.org/resource/ | |
#TIMESTAMP: 2012-11-01T10:43:53Z" | |
# nothing do change below this line ! | |
EP_lobid=http://lobid.org/sparql/ | |
echo "will first remove the beacon.txt, please press any key"; read | |
rm ${DOMAIN}.beacon | |
echo "$PREFIX" > ${DOMAIN}.beacon | |
IFS=" | |
"; | |
function sparql_dbpedia(){ | |
curl -H "Accept: text/turtle" --data-urlencode "query= | |
SELECT DISTINCT (COUNT( distinct ?o) AS ?count) WHERE { | |
<http://$DOMAIN/resource/$1> rdfs:label ?o . | |
FILTER NOT EXISTS {<http://$DOMAIN/resource/$1> <http://dbpedia.org/ontology/wikiPageRedirects> ?t } . | |
FILTER NOT EXISTS {<http://$DOMAIN/resource/$1> <http://dbpedia.org/ontology/wikiPageDisambiguates> ?tm } . | |
} | |
" $EP_dbpedia 2>/dev/null | tail -n 1 | sed -e 's#.*value\ \(.\)\ .*#\1#g' | |
} | |
function sparql_lobid_count(){ | |
curl -H "Accept: text/plain" --data-urlencode "query= | |
Prefix foaf: <http://xmlns.com/foaf/0.1/> | |
SELECT DISTINCT (COUNT( distinct ?s) AS ?count) WHERE { | |
graph <http://lobid.org/organisation/> { | |
?s foaf:name \"$1\" ; | |
} | |
} | |
" $EP_lobid 2>/dev/null | tail -n 1 | sed -e 's#.*value\ \(.\)\ .*#\1#g' | |
} | |
function sparql_lobid(){ | |
curl -H "Accept: text/plain" --data-urlencode "query= | |
Prefix foaf: <http://xmlns.com/foaf/0.1/> | |
PREFIX dct: <http://purl.org/dc/terms/> | |
SELECT ?id WHERE { | |
graph <http://lobid.org/organisation/> { | |
?s foaf:name \"$1\" ; | |
dct:identifier ?id; | |
} | |
} | |
" $EP_lobid 2>/dev/null | tail -n 1 | sed -e 's#"##g' | |
} | |
#sorry: as long as that turtle file [http://thedatahub.org/dataset/lobid-organisations] is not open data you must use the lobid endpoint at http://lobid.org/sparql/ | |
for name in $(grep foaf:name ../../lobid-organisations1.ttl | cut -d '"' -f2 ) ; do | |
i=$(echo "$name" | sed -e 's#\ #_#g' ) | |
if [ $(sparql_dbpedia "$i") -gt 0 -a $(sparql_lobid_count "$name") -eq 1 ] ; then | |
ID=$(sparql_lobid "$name") | |
if [ $(echo "$ID" | grep ^?; echo $?) == 1 ]; then | |
echo "$ID|$i" >> ${DOMAIN}.beacon | |
fi | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment