Skip to content

Instantly share code, notes, and snippets.

@ashepherd
Last active October 10, 2018 23:18
Show Gist options
  • Save ashepherd/b83eb2bf1d6e91b6bcb38492646261ff to your computer and use it in GitHub Desktop.
Save ashepherd/b83eb2bf1d6e91b6bcb38492646261ff to your computer and use it in GitHub Desktop.
@prefix : <http://data.example.org/id/dataset/1234/v1/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix provone: <http://purl.dataone.org/provone/2015/01/15/ontology#> .
@prefix schema: <http://schema.org/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2000/10/XMLSchema#> .
@prefix : <http://data.example.org/id/dataset/1234/v1/> .
# pipeline-spec.yaml => prov:Plan
# pipeline step => prov:Entity (schema:SoftwareApplication)
# pipeline step params => prov:Collection
# datapackage.json => prov:Entity (generated)
# McMurdoEpifauna.csv => prov:Entity (generated)
#### Machine-Actionable ####
:frictionlessdata-datapackage-pipelines
a schema:SoftwareSourceCode, prov:Entity ;
schema:name "FrictionlessData DataPackage Pipelines"@en-US ;
schema:codeRepository "https://github.com/frictionlessdata/datapackage-pipelines"^^xsd:anyURI ;
schema:potentialAction [
a schema:CreateAction ;
schema:target :docker-dpp ;
schema:object :pipeline-spec ;
] .
:bcodmo-datapackage-pipelines
a schema:SoftwareSourceCode, prov:Entity ;
schema:name "BCO-DMO DataPackage Pipelines"@en-US ;
rdfs:seeAlso "https://github.com/frictionlessdata/datapackage-pipelines#custom-processors"^^xsd:anyURI ;
schema:codeRepository "https://github.com/BCODMO/pipeline-generator/tree/development"^^xsd:anyURI ;
schema:potentialAction [
a schema:CreateAction ;
schema:target :docker-dpp ;
schema:object :pipeline-spec ;
] .
:docker-dpp
a schema:EntryPoint ;
schema:actionPlatform :docker ;
schema:actionApplication [
a schema:SoftwareApplication ;
schema:downloadUrl "https://raw.githubusercontent.com/BCODMO/pipeline-generator/master/Dockerfile"^^xsd:anyURI ;
schema:description "docker run -it -v `pwd`:/pipelines:rw bcodmo/datapackage-pipelines:latest run ./lat_lon_DDM_to_DD"^^xsd:string ;
schema:potentialAction [
a schema:InstallAction ;
schema:instrument :docker ;
schema:object :bcodmo-datapackage-pipelines ;
schema:target [
a schema:EntryPoint ;
schema:description "docker pull bcodmo/datapackage-pipelines:latest"^^xsd:string ;
] ;
] ;
] .
:docker
a schema:SoftwareApplication ;
schema:downloadUrl "https://www.docker.com"^^xsd:anyURI .
#### PROV ####
: a prov:Bundle, prov:Entity;
prov:wasAttributedTo :alice ;
prov:generatedAtTime "2018-09-21T13:38:10Z"^^xsd:dateTime .
:raw-data
a schema:Dataset, provone:Data, prov:Entity ;
schema:distribution [
a schema:DataDownload ;
schema:contentUrl "https://example.org/dataset/1234/original/20180921T123456Z/McMurdoEpifauna.xlsx"^^xsd:anyURI ;
schema:encodingFormat "application/vnd.ms-excel"^^xsd:string ;
] .
:alice
a prov:Person, prov:Agent ;
prov:actedOnBehalfOf :bco-dmo-office ;
prov:qualifiedDelegation [
a prov:Delegation;
prov:agent :bco-dmo-office ;
prov:hadRole :data-manager ;
prov:hadActivity :created-pipeline-spec, :executed-pipeline ;
] .
:bco-dmo-office
a prov:Organization, prov:Agent ;
schema:name "Biological and Chemical Oceanography Data Management Office"^^xsd:string .
:created-pipeline-spec
a provone:Execution, prov:Activity ;
prov:generated :pipeline-spec ;
prov:used :raw-data ;
prov:wasStartedBy :alice ;
prov:qualifiedAssociation [
a prov:Association ;
prov:agent :alice ;
prov:hadRole :data-manager ;
] .
:pipeline-spec
a schema:DigitalDocument, provone:Workflow, prov:Plan, prov:Entity ;
prov:wasGeneratedBy :created-pipeline-spec ;
prov:wasAttributedTo :alice ;
schema:name "lat_lon_DDM_to_DD"^^xsd:string ;
schema:headline "lat_lon_DDM_to_DD"^^xsd:string ;
schema:description "Add lat & lon columns in decimal degrees (DD) given one column with lat & lon in format degrees decimal minutes (DDM)."@en-US ;
schema:contentUrl "https://example.org/dataset/1234/v1/pipeline-spec.yaml"^^xsd:anyURI ;
schema:encodingFormat "application/x-yaml"^^xsd:string ;
dcterms:hasPart :step-1-add-resource,
:step-2-stream_remote_resources,
:step-3-set_types,
:step-4-bcodmo_pipeline_processors-add_schema_metadata,
:step-5-bcodmo_pipeline_processors-convert_to_decimal_degrees,
:step-6-bcodmo_pipeline_processors-convert_to_decimal_degrees,
:step-7-bcodmo_pipeline_processors-round_fields,
:step-8-bcodmo_pipeline_processors.round_fields,
:step-9-dump-to_path .
:step-1-add-resource
a provone:Program, prov:Entity ;
schema:supportingData :step-1-add-resource-inputs .
:step-1-add-resource-inputs
a schema:DataFeed ;
schema:dataFeedElement [
a prov:Collection ;
rdfs:comment "A single step in pipeline."@en-US ;
prov:hadMember [
a schema:PropertyValue, provone:Data, prov:Entity ;
schema:name "run"^^xsd:string ;
schema:value "add_resource"^^xsd:string ;
],
[
a schema:PropertyValue, provone:Data, prov:Entity ;
schema:name "parameters"^^xsd:string ;
schema:value [
a schema:PropertyValue ;
schema:name "name"^^xsd:string ;
schema:value "mcmurdo_epifauna"^^xsd:string ;
],
[
a schema:PropertyValue ;
schema:name "url"^^xsd:string ;
schema:value "https://example.org/dataset/1234/original/20180921T123456Z/McMurdoEpifauna.xlsx"^^xsd:string ;
],
[
a schema:PropertyValue ;
schema:name "format"^^xsd:string ;
schema:value "xlsx"^^xsd:string ;
],
[
a schema:PropertyValue ;
schema:name "sheet"^^xsd:string ;
schema:value "animals"^^xsd:string ;
],
[
a schema:PropertyValue ;
schema:name "headers"^^xsd:string ;
schema:value "1"^^xsd:integer ;
] ;
] ;
] .
:executed-pipeline
a provone:Execution, prov:Activity ;
prov:hadPlan :pipeline-spec ;
prov:wasStartedBy :alice ;
prov:qualifiedAssociation [
a prov:Association ;
prov:agent :alice ;
prov:hadRole :data-manager ;
prov:hadPlan :pipeline-spec ;
] .
:frictionless-data-pkg
a schema:DigitalDocument, provone:Data, prov:Entity ;
prov:wasGeneratedBy :executed-pipeline ;
prov:qualifiedGeneration [
a prov:Generation ;
prov:startTime "2018-09-21T13:37:53Z"^^xsd:dateTime ;
prov:endTime "2018-09-21T13:38:10Z"^^xsd:dateTime ;
prov:activity :executed-pipeline ;
] ;
schema:encodingFormat "application.vnd.datapackage+json"^^xsd:string ;
schema:url "https://example.org/dataset/1234/v1/datapackage.json"^^xsd:anyURI .
:processed-data
a schema:Dataset, provone:Data, prov:Entity ;
prov:wasGeneratedBy :executed-pipeline ;
prov:hadPrimarySource :raw-data ;
prov:wasDerivedFrom :pipeline-spec ;
prov:wasDerivedFrom :raw-data ;
prov:qualifiedGeneration [
a prov:Generation ;
prov:startTime "2018-09-21T13:37:54Z"^^xsd:dateTime ;
prov:endTime "2018-09-21T13:38:09Z"^^xsd:dateTime ;
prov:activity :executed-pipeline ;
] ;
schema:distribution [
a schema:DataDownload ;
schema:contentUrl "https://example.org/dataset/1234/v1/McMurdoEpifauna.csv"^^xsd:anyURI ;
schema:encodingFormat "text/csv"^^xsd:string ;
] .
#### ROLES
:data-manager a prov:Role .
:program-input a prov:Role .
:program-output a prov:Role .
#### ALIGNMENTS
schema:Dataset
owl:equivalentClass provone:Data .
provone:Program
owl:equivalentClass schema:SoftwareApplication .
prov:Person
owl:equivalentClass schema:Person .
prov:Organization
owl:equivalentClass schema:Organization .
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment