Last active
February 7, 2017 14:04
-
-
Save nandana/7cea0f4d990696465e9b4b21e5f1485e to your computer and use it in GitHub Desktop.
RDF reification of DBpedia TQL files along with template and attribute fields
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
#The script expects a single tql file | |
if [ "$#" -ne 1 ]; then | |
echo "tql2rec - generates rectified statements from DBpedia tql files including template and infobox attributes." | |
echo "Usage: tql2rec <tql-file-to-be-used>" | |
exit 1; | |
fi | |
#Regex used to extract the template and the property from the context. | |
regex1='.*<(.*)&template=(.*)&property=(.*)&split(.*)>' | |
#Sometimes split parameter is not present. | |
regex2='.*<(.*)&template=(.*)&property=(.*)>' | |
while IFS='' read -r line || [[ -n "$line" ]]; do | |
# clean the variable | |
source=''; | |
template=''; | |
property=''; | |
# uuid for each rectified statement | |
uuid=$(uuidgen); | |
# Tokenize the line by space, we can easily identify subject, and predicate with this. But this doesn't work for objects. | |
arr=($line) | |
if [[ $line =~ $regex1 ]]; then | |
source=${BASH_REMATCH[1]}; | |
template=${BASH_REMATCH[2]}; | |
property=${BASH_REMATCH[3]}; | |
elif [[ $line =~ $regex2 ]]; then | |
source=${BASH_REMATCH[1]}; | |
template=${BASH_REMATCH[2]}; | |
property=${BASH_REMATCH[3]}; | |
else | |
#regex didn't match, we miss data but will skip without rectification | |
continue; | |
fi | |
echo "_:$uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement> . " | |
echo "_:$uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#subject> ${arr[0]} . " | |
echo "_:$uuid <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> ${arr[1]} . " | |
echo "_:$uuid <http://www.w3.org/ns/prov#wasDerivedFrom> <$source> . " | |
echo "_:$uuid <http://dbpedia.org/x-template> \"$template\" . " | |
echo "_:$uuid <http://dbpedia.org/x-attribute> \"$property\" . " | |
done < "$1" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment