Skip to content

Instantly share code, notes, and snippets.

@lobid
Created June 11, 2012 15:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lobid/2910584 to your computer and use it in GitHub Desktop.
Save lobid/2910584 to your computer and use it in GitHub Desktop.
script which generates the files under http://thedatahub.org/dataset/isil-dbpedia
#!/bin/bash
# version 20120611
# author: Pascal Christoph, hbz
# Copyright 2011 Pascal Christoph. The program is distributed under the terms of
# the GNU General Public License, see https://www.gnu.org/licenses/gpl-3.0.html
#
#TODO:
# 1. if http://de.dbpedia.org/ontology/wikiPageRedirects exists then fetch it.
# because only there resides the information if it has "wikiPageDisambiguates" ,
# e.g. http://de.dbpedia.org/resource/Bücherei
# The momentarily workaround igrnores every resource with an "wikiPageRedirects"
# 2. lookup if any resource hast a http://de.dbpedia.org/ontology/wikiPageDisambiguates
# for the resource in question, like in DE-MUS-181411 http://de.dbpedia.org/resource/Walhalla
# and ignore those
# configure international DBpedia
#EP_dbpedia=http://live.dbpedia.org/sparql
#DOMAIN=dbpedia.org
# configure German DBpedia
EP_dbpedia=http://de.dbpedia.org/sparql
DOMAIN=de.dbpedia.org
# change domain and TIMESTAMP manually
PREFIX="#FORMAT: BEACON
#PREFIX: http://lobid.org/organisation/
#TARGETPREFIX: http://de.dbpedia.org/resource/
#TIMESTAMP: 2012-11-01T10:43:53Z"
# nothing do change below this line !
EP_lobid=http://lobid.org/sparql/
echo "will first remove the beacon.txt, please press any key"; read
rm ${DOMAIN}.beacon
echo "$PREFIX" > ${DOMAIN}.beacon
IFS="
";
function sparql_dbpedia(){
curl -H "Accept: text/turtle" --data-urlencode "query=
SELECT DISTINCT (COUNT( distinct ?o) AS ?count) WHERE {
<http://$DOMAIN/resource/$1> rdfs:label ?o .
FILTER NOT EXISTS {<http://$DOMAIN/resource/$1> <http://dbpedia.org/ontology/wikiPageRedirects> ?t } .
FILTER NOT EXISTS {<http://$DOMAIN/resource/$1> <http://dbpedia.org/ontology/wikiPageDisambiguates> ?tm } .
}
" $EP_dbpedia 2>/dev/null | tail -n 1 | sed -e 's#.*value\ \(.\)\ .*#\1#g'
}
function sparql_lobid_count(){
curl -H "Accept: text/plain" --data-urlencode "query=
Prefix foaf: <http://xmlns.com/foaf/0.1/>
SELECT DISTINCT (COUNT( distinct ?s) AS ?count) WHERE {
graph <http://lobid.org/organisation/> {
?s foaf:name \"$1\" ;
}
}
" $EP_lobid 2>/dev/null | tail -n 1 | sed -e 's#.*value\ \(.\)\ .*#\1#g'
}
function sparql_lobid(){
curl -H "Accept: text/plain" --data-urlencode "query=
Prefix foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dct: <http://purl.org/dc/terms/>
SELECT ?id WHERE {
graph <http://lobid.org/organisation/> {
?s foaf:name \"$1\" ;
dct:identifier ?id;
}
}
" $EP_lobid 2>/dev/null | tail -n 1 | sed -e 's#"##g'
}
#sorry: as long as that turtle file [http://thedatahub.org/dataset/lobid-organisations] is not open data you must use the lobid endpoint at http://lobid.org/sparql/
for name in $(grep foaf:name ../../lobid-organisations1.ttl | cut -d '"' -f2 ) ; do
i=$(echo "$name" | sed -e 's#\ #_#g' )
if [ $(sparql_dbpedia "$i") -gt 0 -a $(sparql_lobid_count "$name") -eq 1 ] ; then
ID=$(sparql_lobid "$name")
if [ $(echo "$ID" | grep ^?; echo $?) == 1 ]; then
echo "$ID|$i" >> ${DOMAIN}.beacon
fi
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment