mpkocher/ArchOverview.md

## advanced-jobs.dot
digraph {

  subgraph cluster_0 {
  c0_Job [shape=hexagon, color=blue, label="Import DataSet Job"]
  c0_store [shape=cylinder, label="DataStore"]
  c0_ep1 [shape=diamond, label="Path /path/to/alpha_subreadset.xml"]
  c0_ep1 -> c0_Job
  c0_Job -> c0_store
  c0_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
  c0_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
  c0_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
  c0_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
  c0_store -> c0_dsf_02
  c0_store -> c0_dsf_04
  c0_store -> c0_dsf_05
  c0_store -> c0_dsf_03
  c0_dsf_02 -> c0_dsf_04 [style=dotted]
  c0_dsf_02 -> c0_dsf_05 [style=dotted]
  }

  subgraph cluster_2 {
  c2_Job [shape=hexagon, color=blue, label="Import DataSet Job"]
  c2_store [shape=cylinder, label="DataStore"]
  c2_ep1 [shape=diamond, label="Path /path/to/beta_subreadset.xml"]
  c2_ep1 -> c2_Job
  c2_Job -> c2_store
  c2_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
  c2_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
  c2_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
  c2_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
  c2_store -> c2_dsf_02
  c2_store -> c2_dsf_04
  c2_store -> c2_dsf_05
  c2_store -> c2_dsf_03
  c2_dsf_02 -> c2_dsf_04 [style=dotted]
  c2_dsf_02 -> c2_dsf_05 [style=dotted]
  }


    subgraph cluster_3 {
  c3_Job [shape=hexagon, color=blue, label="Merge DataSets Job"]
  c3_store [shape=cylinder, label="DataStore"]
  c3_ep1 [shape=diamond, label="Path /path/to/gamma_subreadset.xml"]
  c3_ep1 -> c3_Job
  c3_Job -> c3_store
  c3_dsf_02 [shape=tab, label="DataStoreFile (Merged SubreadSet)"]
  c3_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
  c3_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
  c3_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
  c3_store -> c3_dsf_02
  c3_store -> c3_dsf_04
  c3_store -> c3_dsf_05
  c3_store -> c3_dsf_03
  c3_dsf_02 -> c3_dsf_04 [style=dotted]
  c3_dsf_02 -> c3_dsf_05 [style=dotted]
  }


 subgraph cluster_4 {
  c4_Job [shape=hexagon, color=blue, label="Import DataSet Job"]
  c4_store [shape=cylinder, label="DataStore"]
  c4_ep1 [shape=diamond, label="Path /path/to/referenceset.xml"]
  c4_ep1 -> c4_Job
  c4_Job -> c4_store
  c4_dsf_02 [shape=tab, label="DataStoreFile (ReferenceSet)"]
  c4_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
  c4_store -> c4_dsf_02
  c4_store -> c4_dsf_03
  }

 subgraph cluster_5 {
  c5_Job [shape=hexagon, color=blue, label="Copy (and filter) DataSet"]
  c5_store [shape=cylinder, label="DataStore"]
  c5_ep1 [shape=diamond, label="DataSet UUID=X,filter=rq >= 0.7"]
  c5_ep1 -> c5_Job
  c5_Job -> c5_store
  c5_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
  c5_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
  c5_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
  c5_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
  c5_store -> c5_dsf_02
  c5_store -> c5_dsf_04
  c5_store -> c5_dsf_05
  c5_store -> c5_dsf_03
  c5_dsf_02 -> c5_dsf_04 [style=dotted]
  c5_dsf_02 -> c5_dsf_05 [style=dotted]
  }

 subgraph cluster_6 {
  c6_Job [shape=hexagon, color=blue, label="Export DataSet(s) Zip Job"]
  c6_store [shape=cylinder, label="DataStore"]
  c6_ep1 [shape=diamond, label="DataSet UUIDs=X,Y,Z"]
  c6_ep1 -> c6_Job
  c6_Job -> c6_store
  c6_dsf_02 [shape=tab, label="DataStoreFile DataSet XML(s) Zip"]
  c6_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
  c6_store -> c6_dsf_02
  c6_store -> c6_dsf_03
  }

  subgraph cluster_01 {
  c1_Job [shape=hexagon, color=blue, label="Analysis Job"]
  c1_store [shape=cylinder, label="DataStore"]
  c1_ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
  c1_ep2 [shape=diamond, label="EntryPoint (ReferenceSet)"]
  c1_ep1 -> c1_Job
  c1_ep2 -> c1_Job
  c1_Job -> c1_store
  c1_dsf_01 [shape=tab, label="DataStoreFile (Fasta)"]
  c1_dsf_02 [shape=tab, label="DataStoreFile (AlignmentSet)"]
  c1_dsf_03 [shape=tab, label="DataStoreFile (VCF)"]
  c1_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
  c1_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
  c1_dsf_06 [shape=tab, label="DataStoreFile (LOG)"]
  c1_store -> c1_dsf_01
  c1_store -> c1_dsf_02
  c1_store -> c1_dsf_03
  c1_store -> c1_dsf_04
  c1_store -> c1_dsf_05
  c1_store -> c1_dsf_06
  c1_ep1 -> c1_dsf_04 [style=dotted]
  c1_dsf_02 -> c1_dsf_05 [style=dotted]
  c1_ep2 -> c1_dsf_05 [style=dotted]
  }

  c3_dsf_02 -> c1_ep1
  c4_dsf_02 -> c1_ep2

  c0_dsf_02 -> c5_ep1

  c2_dsf_02 -> c3_ep1
  c5_dsf_02 -> c3_ep1

  c5_dsf_02 -> c6_ep1
}

## advanced-jobs.png

      
    Raw
  

              advanced-jobs.png
            
          
## advanced-jobs.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              advanced-jobs.svg
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## ArchOverview.md

      
    Raw
  

              ArchOverview.md
            
          
    Arch Overview

Core Nouns of the PacBio System


Run (often created/edited from SMRT Link RunDesign, stored as XML)
CollectionMetadata a Run has a list of Collection (Primary Analysis will convert a CollectionMetadata to a SubreadSet)
PacBio DataSets SubreadSet, ReferenceSet, etc... These are thin-ish XML files that have general metadata as well as pointers to 'external resources' (e.g., BAM, Fasta files) and their companion index files.
SMRT Link Job A general (async) unit of work to perform operations on PacBio DataSets
** DataStoreFile** a container for output files from a SMRT Link Job and contains metadata, such as file type, size, path. A list of DataStore Files is called a DataStore. This is the core output of SMRT Link Job.
** Report** a Report is general model to capture Report metrics (also referred to as 'Attributes'), Report Tables and Report Plot Groups. A Report is a specific type of DataStoreFile and are used to communicate details of a SMRT Link Job to the SMRT Link UI (and webservices.)

Second tier models, such as Report View Rules, or Pipeline View Rules are not discussed here.
General Workflow starting from PA

ICS/PA takes a Run XML with a list of Collections, converts each CollectionMeta into a SubreadSet. The SubreadSet is copied from ICS/PA file system into the customer storage on NFS (accessible by the companion SMRT Link instance) and the SubreadSet XML is imported into SMRT Link using the import-dataset Job type in SMRT Link. The Reports for the SubreadSet XML emitted from the import-dataset job show up in RunQC as well as in DataManagement in SMRT Link.
Show below is a sketch of the dataflow.

General SMRT Link Job Model

Simplify, the general interface of a SMRT Link Job, for type T,
A Job takes T as input and produces a PB (T -> Job -> DataStore)
List of EntryPoint PB DataSet ->  Job -> DataStore
A DataStore is a list of DataStore files.
Each DataStoreFile can be a different file types, such as, PB DataSet, VCF, ReportJSON, Fasta, etc... and also contains the specific ob id and UUID that generated the DataStoreFile.
During and after SMRT Link Job execution, the DataStoreFiles will be imorted into the db, the DataStoreFile. For a specific subset of file types (PB DataSet types), additional metadata will be stored in the SMRT Link database. Each DataSet has metadata about the specific dataset type as well as metadata about a possible 'parent' DataSet. The DataSet 'parentage' can be a result from copying, merging, analysis (the semantics are not consistent).
Report Details

Each ReportJson file type contains a list of PB Dataset UUIDs in the data model. This is used to communicate which DataSets are specific to the input(s) of a specific ReportJSON. Alternatively said, the EntryPoint PB DataSet(s) might not be directly used to compute the ReportJson* datastore file..
Example Jobs

NOTE, the dotted arrow represents the relation between the Report and the source input for the task at the Report JSON level. This is NOT captured at the SMRT Link Server level.
Import DataSet Job


Accessing the Reports and the source DataSet is clearly defined here by only depending on the Job Id.
I believe the Merge DataSet Job type is Similar.
Example Resequencing Job


Example: Larger Picture of DataFlow in SMRT Link using SMRT Link Jobs

Simple Example

To perform a standard Resequencing Job, the user can run two different import-dataset SMRT Link Jobs, then a pbsmrtpipe (i.e., 'Analysis') SMRT Link Job can be performed.
Steps:

Import SubreadSet
Import ReferenceSet
Run Analysis Job to run the Resequencing Analysis


(Each Job type is shown in its own box)
Advanced Example

To demonstrate a larger dataflow example, consider the following case. A user would like to import SubreadSet alpha and beta, perform filtering on beta, merge the datasets, perform a Resequencing analysis on the merged subreadset and export the filtered SubreadSet as a zip.
Steps:

Import ReferenceSet, SubreadSet alpha and Beta
Create a filtered SubreadSet from SubreadSet alpha
Create a Merged SubreadSet from SubreadSet Beta and the output of #2
Create an Analysis Job using #3 and ReferenceSet from #1
Create a DataSet XML(s) ZIP from the output of #3


This demonstrates graph nature of the design and composibility of different SMRT Link Job types. Note that data provenance is for free in the model.

  
## connected-jobs.dot
digraph {

  subgraph cluster_0 {
  c0_Job [shape=hexagon, color=blue, label="Import DataSet"]
  c0_store [shape=cylinder, label="DataStore"]
  c0_ep1 [shape=diamond, label="Path /path/to/subreadset.xml"]
  c0_ep1 -> c0_Job
  c0_Job -> c0_store
  c0_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
  c0_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
  c0_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
  c0_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
  c0_store -> c0_dsf_02
  c0_store -> c0_dsf_04
  c0_store -> c0_dsf_05
  c0_store -> c0_dsf_03
  c0_dsf_02 -> c0_dsf_04 [style=dotted]
  c0_dsf_02 -> c0_dsf_05 [style=dotted]
  }

 subgraph cluster_2 {
  c2_Job [shape=hexagon, color=blue, label="Import DataSet"]
  c2_store [shape=cylinder, label="DataStore"]
  c2_ep1 [shape=diamond, label="Path /path/to/referenceset.xml"]
  c2_ep1 -> c2_Job
  c2_Job -> c2_store
  c2_dsf_02 [shape=tab, label="DataStoreFile (ReferenceSet)"]
  c2_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
  c2_store -> c2_dsf_02
  c2_store -> c2_dsf_03
  }

  subgraph cluster_02 {
  c1_Job [shape=hexagon, color=blue, label="Analysis Job"]
  c1_store [shape=cylinder, label="DataStore"]
  c1_ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
  c1_ep2 [shape=diamond, label="EntryPoint (ReferenceSet)"]
  c1_ep1 -> c1_Job
  c1_ep2 -> c1_Job
  c1_Job -> c1_store
  c1_dsf_01 [shape=tab, label="DataStoreFile (Fasta)"]
  c1_dsf_02 [shape=tab, label="DataStoreFile (AlignmentSet)"]
  c1_dsf_03 [shape=tab, label="DataStoreFile (VCF)"]
  c1_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
  c1_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
  c1_dsf_06 [shape=tab, label="DataStoreFile (LOG)"]
  c1_store -> c1_dsf_01
  c1_store -> c1_dsf_02
  c1_store -> c1_dsf_03
  c1_store -> c1_dsf_04
  c1_store -> c1_dsf_05
  c1_store -> c1_dsf_06
  c1_ep1 -> c1_dsf_04 [style=dotted]
  c1_dsf_02 -> c1_dsf_05 [style=dotted]
  c1_ep2 -> c1_dsf_05 [style=dotted]
  }

  c2_dsf_02 -> c1_ep2
  c0_dsf_02 -> c1_ep1

}

## connected-jobs.png

      
    Raw
  

              connected-jobs.png
            
          
## connected-jobs.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              connected-jobs.svg
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## DataSet-Job-Report-ScratchPad.md

      
    Raw
  

              DataSet-Job-Report-ScratchPad.md
            
          
    Notes on SMRT Link DataSet, Job, DataStoreFile, Report models

Current Model for SMRT Link 'Job' model

Simplify, the general interface of a SMRT Link Job, for type T,
A Job takes T as input and produces a PB (T -> Job -> DataStore)
List of EntryPoint PB DataSet ->  Job -> DataStore
A DataStore is a list of DataStore files.
Each DataStoreFile can be a different file types, such as, PB DataSet, VCF, ReportJSON, Fasta, etc... and also contains the specific ob id and UUID that generated the DataStoreFile.
During and after SMRT Link Job execution, the DataStoreFiles will be imorted into the db, the DataStoreFile. For a specific subset of file types (PB DataSet types), additional metadata will be stored in the SMRT Link database. Each DataSet has metadata about the specific dataset type as well as metadata about a possible 'parent' DataSet. The DataSet 'parentage' can be a result from copying, merging, analysis (the semantics are not consistent).
Report Details

Each ReportJson file type contains a list of PB Dataset UUIDs in the data model. This is used to communicate which DataSets are specific to the input(s) of a specific ReportJSON. Alternatively said, the EntryPoint PB DataSet(s) might not be directly used to compute the ReportJson* datastore file..
NOTE This is the core issue. Currently the system only communicates the DataSet Job Id
Example Jobs

NOTE, the dotted arrow represents the relation between the Report and the source input for the task at the Report JSON level. This is NOT captured at the SMRT Link Server level.
Import DataSet Job


Accessing the Reports and the source DataSet is clearly defined here by only depending on the Job Id.
I believe the Merge DataSet Job type is Similar.
Example Resequencing Job


The use case here is often to view all the output'ed Reports and links back to the source DataSet is not necessary.
However, viewing the AlignmentSet in DM will start yield unexpected results. This is why the SMRT Link has a workaround to filter all the Reports from the Job and only show the DataSets where for the AlignmentSet in interest. This works for small number of Reports, but DOES NOT work for an Job that outputs "many" reports (because of the explicit filtering necessary on the client side).
Example Demux Analysis Job


The model is more involved when N DataSets and N (or more) companion reports per DataSet are emitted.
The core issue is that a specific DataSet of the N will return
Job Output Access Points

From the file system access point, the DataStore* is accessible in the datastore.json file in the SMRT Link Job directory (the path of this is not consistent, but it's often in the root directory of the Job).
From the webservices, the datastore files are accessible from DM where DS-TYPE' is the DataSet type 'short name' (e.g., subreads) and DS-IDABLE is the DataSet (local) integer id, or (global) UUID.
smrt-link/datasets/<DS-TYPE>/<DS-IDABLE>/reports
NOTE The  reports interface is the core issue because it assumes the Job Id link. I believe the SMRT Link UI is filtering to get around this, however, this is not scalable because the SMRT Link UI has to fetch the details of all the reports, then filter out based on the DataSet UUID in the report.
And from the Jobs context.
smrt-link/job-manager/jobs/<JOB-TYPE>/<JOB-IDABLE>/reports

The Job is not a problem and the interface does NOT need to be changed. Semantically, the interface captures exactly what is expected.
Possible Solution

Capture new Report -> DataSet(s) relation


Add new table to capture DataStoreFile -> Set(DataStoreFile) relation
On import parse Report and assign
Update /smrt-link/<DS-TYPE>/<DS-IDABLE>/reports to also filter by 'parent' DataStore File UUID(s)

This is straight forward, but this now requires a join to get the reports for a specific DataSet.
Legacy Data

Need to handle legacy data, specifically for the smrt-link/datasets/<DS-TYPE>/<DS-IDABLE>/reports webservice endpoint.
Possible Solutions


During db migration on "start/upgrade", parse the Report JSON files and extract the DataSet UUID from the Report JSON file on disk and update the database.

BAD. This is expensive (could be parsing 1000's of report JSON files)
BAD. The dataset_uuids field might not be populated consistently. Not clear when that was added in the SMRT Link version.
GOOD. Potentially parsing the raw data removes guesswork at the job level (See #3 and the above issue is resolved)


Hide the details in the API and dispatch on lookup based on the Job Version. (i.e., if job > 6.0.0, do X else do Y to get reports)

BAD. Fundamentally has different semantic results.
This is probably very difficult to debug when it's not working as expected. The dispatch from different SL versions would requires an extra join to the engine_jobs table.
GOOD. Potentially least amount of db migration machinery


During db migration, attempt a thinner approach to migrate old data and assign Report -> DataSet relation based on the Job type

import-dataset (Look at the output DataStore file, get the (single) DataSet DataStoreFile, get the List of Reports, then update the DB
merge-datasets (Similar to import-dataset)
analysis job (Use the Entry Point(s) to get the UUID(s) and assign to all output Reports) (Note, this is not correct. The SL UI would still have to keep the legacy filtering model in place)
Other job types (Don't support?)


GOOD. Thin-ish migration
BAD. Edge cases on capturing the

  
## demux-pbsmrtpipe-job.dot
strict digraph {
  Job [shape=hexagon, color=blue, label="Demux Analysis Job"]
  store [shape=cylinder, label="DataStore"]
  ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
  ep2 [shape=diamond, label="EntryPoint (BarcodeSet)"]
  ep1 -> Job
  ep2 -> Job
  Job -> store
  s1 [shape=tab, label="DataStoreFile (SubreadSet_01)"]
  s2 [shape=tab, label="DataStoreFile (SubreadSet_02)"]
  s3 [shape=tab, label="DataStoreFile (SubreadSet_03)"]
  r1 [shape=tab, label="DataStoreFile (Report_01)"]
  r2 [shape=tab, label="DataStoreFile (Report_02)"]
  r3 [shape=tab, label="DataStoreFile (Report_03)"]
  dsf_01 [shape=tab, label="DataStoreFile (Log)"]
  store -> dsf_01
  store -> s1
  store -> s2
  store -> s3
  store -> r1
  store -> r2
  store -> r3
  s1 -> r1 [style=dotted]
  s2 -> r2 [style=dotted]
  s3 -> r3 [style=dotted]
}

## demux-pbsmrtpipe-job.png

      
    Raw
  

              demux-pbsmrtpipe-job.png
            
          
## demux-pbsmrtpipe-job.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              demux-pbsmrtpipe-job.svg
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## ics.dot
strict digraph {

  Run [shape=component, label="PacBio Run (XML)"]

  c1 [shape=box, label="CollectionMetadata 1"]
  c2 [shape=box, label="CollectionMetadata 2"]
  c3 [shape=box, label="CollectionMetadata 3"]
  c4 [shape=box, label="CollectionMetadata 4"]

  Run -> c1
  Run -> c2
  Run -> c3
  Run -> c4

  p [shape=parallelogram, color=blue, label="Primary Analysis: Convert CollectionMeta to SubreadSet)"]

  f1 [shape=tab, label="PA File (SubreadSet XML) on PA file system"]

  copy_job [shape=parallelogram, color=blue, label="Primary Analysis: Copy to Customer FileSystem"]
  customer_subreadset [shape=tab, label="SubreadSet XML on Customer FileSystem"]
  import_job [shape=parallelogram, color=blue, label="Primary Analysis: Import SubreadSet XML into SMRT Link using import-dataset Job"]

  c1 -> p
  p -> f1

  f1 -> copy_job
  copy_job -> customer_subreadset
  customer_subreadset -> import_job

}

## ics.png

      
    Raw
  

              ics.png
            
          
## import-dataset-job.dot
strict digraph {
  Job [shape=hexagon, color=blue, label="Import DataSet"]
  store [shape=cylinder, label="DataStore"]
  ep1 [shape=diamond, label="Path /path/to/subreadset.xml"]
  ep1 -> Job
  Job -> store
  dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
  dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
  dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
  dsf_03 [shape=tab, label="DataStoreFile (Log)"]
  store -> dsf_02
  store -> dsf_04
  store -> dsf_05
  store -> dsf_03
  dsf_02 -> dsf_04 [style=dotted]
  dsf_02 -> dsf_05 [style=dotted]
}

## import-dataset-job.png

      
    Raw
  

              import-dataset-job.png
            
          
## import-dataset-job.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              import-dataset-job.svg
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Makefile

default: convert

convert:
	dot -Tpng demux-pbsmrtpipe-job.dot  -o demux-pbsmrtpipe-job.png
	dot -Tsvg demux-pbsmrtpipe-job.dot  -o demux-pbsmrtpipe-job.svg
	dot -Tpng import-dataset-job.dot -o import-dataset-job.png
	dot -Tsvg import-dataset-job.dot -o import-dataset-job.svg
	dot -Tpng pbsmrtpipe-job.dot -o pbsmrtpipe-job.png
	dot -Tsvg pbsmrtpipe-job.dot -o pbsmrtpipe-job.svg
	dot -Tpng connected-jobs.dot -o connected-jobs.png
	dot -Tsvg connected-jobs.dot -o connected-jobs.svg
	dot -Tpng advanced-jobs.dot -o advanced-jobs.png
	dot -Tsvg advanced-jobs.dot -o advanced-jobs.svg
	dot -Tpng ics.dot -o ics.png
	dot -Tsvg system-job-running.dot -o system-job-running.svg
	dot -Tpng system-job-running.dot -o system-job-running.png


clean:
	rm *.png

## pbsmrtpipe-job.dot
strict digraph {
  Job [shape=hexagon, color=blue, label="Analysis Job"]
  store [shape=cylinder, label="DataStore"]
  ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
  ep2 [shape=diamond, label="EntryPoint (ReferenceSet)"]
  ep1 -> Job
  ep2 -> Job
  Job -> store
  dsf_01 [shape=tab, label="DataStoreFile (Fasta)"]
  dsf_02 [shape=tab, label="DataStoreFile (AlignmentSet)"]
  dsf_03 [shape=tab, label="DataStoreFile (VCF)"]
  dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
  dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
  dsf_06 [shape=tab, label="DataStoreFile (LOG)"]
  store -> dsf_01
  store -> dsf_02
  store -> dsf_03
  store -> dsf_04
  store -> dsf_05
  store -> dsf_06
  ep1 -> dsf_04 [style=dotted]
  dsf_02 -> dsf_05 [style=dotted]
  ep2 -> dsf_05 [style=dotted]
}

## pbsmrtpipe-job.png

      
    Raw
  

              pbsmrtpipe-job.png
            
          
## pbsmrtpipe-job.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              pbsmrtpipe-job.svg
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## system-job-running.dot
digraph {
        postgres_db [shape=cylinder];
        WorkerA [shape=diamond, color=blue];
        WorkerB [shape=diamond, color=blue];
        WorkerC [shape=diamond, color=blue];
        WorkerD [shape=pentagon, color=green];
        WorkerE [shape=pentagon, color=orange];

        pbsmrtpipeJobA [shape=rectangle];
        pbsmrtpipeJobB [shape=rectangle]
        pbsmrtpipeJobC [shape=rectangle]

        postgres_db -> SL_Services
        SL_Services -> WorkerA
        SL_Services -> WorkerB
        SL_Services -> WorkerC
        SL_Services -> WorkerD
        SL_Services -> WorkerE

        WorkerA -> pbsmrtpipeJobA
        WorkerB -> pbsmrtpipeJobB
        WorkerC -> pbsmrtpipeJobC
        WorkerD -> backup_db
        WorkerE -> import_dataset


        pbsmrtpipeJobA -> taskA_01
        pbsmrtpipeJobA -> taskA_02
        pbsmrtpipeJobA -> taskA_03
        pbsmrtpipeJobA -> taskA_04
        pbsmrtpipeJobA -> taskA_05

        taskA_01 -> sge_sync_job_A_01
        taskA_02 -> sge_sync_job_A_02
        taskA_03 -> sge_sync_job_A_03
        taskA_04 -> sge_sync_job_A_04
        taskA_05 -> sge_sync_job_A_05


        pbsmrtpipeJobB -> taskB_01
        pbsmrtpipeJobB -> taskB_02

        taskB_01 -> sge_sync_job_B_01
        taskB_02 -> sge_sync_job_B_02

        pbsmrtpipeJobC -> taskC_01
        pbsmrtpipeJobC -> taskC_02

        taskC_01 -> sge_sync_job_C_01
        taskC_02 -> sge_sync_job_C_02


}

## system-job-running.png

      
    Raw
  

              system-job-running.png
            
          
## system-job-running.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              system-job-running.svg
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	digraph {

	subgraph cluster_0 {
	c0_Job [shape=hexagon, color=blue, label="Import DataSet Job"]
	c0_store [shape=cylinder, label="DataStore"]
	c0_ep1 [shape=diamond, label="Path /path/to/alpha_subreadset.xml"]
	c0_ep1 -> c0_Job
	c0_Job -> c0_store
	c0_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
	c0_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
	c0_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
	c0_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
	c0_store -> c0_dsf_02
	c0_store -> c0_dsf_04
	c0_store -> c0_dsf_05
	c0_store -> c0_dsf_03
	c0_dsf_02 -> c0_dsf_04 [style=dotted]
	c0_dsf_02 -> c0_dsf_05 [style=dotted]
	}

	subgraph cluster_2 {
	c2_Job [shape=hexagon, color=blue, label="Import DataSet Job"]
	c2_store [shape=cylinder, label="DataStore"]
	c2_ep1 [shape=diamond, label="Path /path/to/beta_subreadset.xml"]
	c2_ep1 -> c2_Job
	c2_Job -> c2_store
	c2_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
	c2_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
	c2_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
	c2_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
	c2_store -> c2_dsf_02
	c2_store -> c2_dsf_04
	c2_store -> c2_dsf_05
	c2_store -> c2_dsf_03
	c2_dsf_02 -> c2_dsf_04 [style=dotted]
	c2_dsf_02 -> c2_dsf_05 [style=dotted]
	}



	subgraph cluster_3 {
	c3_Job [shape=hexagon, color=blue, label="Merge DataSets Job"]
	c3_store [shape=cylinder, label="DataStore"]
	c3_ep1 [shape=diamond, label="Path /path/to/gamma_subreadset.xml"]
	c3_ep1 -> c3_Job
	c3_Job -> c3_store
	c3_dsf_02 [shape=tab, label="DataStoreFile (Merged SubreadSet)"]
	c3_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
	c3_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
	c3_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
	c3_store -> c3_dsf_02
	c3_store -> c3_dsf_04
	c3_store -> c3_dsf_05
	c3_store -> c3_dsf_03
	c3_dsf_02 -> c3_dsf_04 [style=dotted]
	c3_dsf_02 -> c3_dsf_05 [style=dotted]
	}



	subgraph cluster_4 {
	c4_Job [shape=hexagon, color=blue, label="Import DataSet Job"]
	c4_store [shape=cylinder, label="DataStore"]
	c4_ep1 [shape=diamond, label="Path /path/to/referenceset.xml"]
	c4_ep1 -> c4_Job
	c4_Job -> c4_store
	c4_dsf_02 [shape=tab, label="DataStoreFile (ReferenceSet)"]
	c4_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
	c4_store -> c4_dsf_02
	c4_store -> c4_dsf_03
	}

	subgraph cluster_5 {
	c5_Job [shape=hexagon, color=blue, label="Copy (and filter) DataSet"]
	c5_store [shape=cylinder, label="DataStore"]
	c5_ep1 [shape=diamond, label="DataSet UUID=X,filter=rq >= 0.7"]
	c5_ep1 -> c5_Job
	c5_Job -> c5_store
	c5_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
	c5_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
	c5_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
	c5_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
	c5_store -> c5_dsf_02
	c5_store -> c5_dsf_04
	c5_store -> c5_dsf_05
	c5_store -> c5_dsf_03
	c5_dsf_02 -> c5_dsf_04 [style=dotted]
	c5_dsf_02 -> c5_dsf_05 [style=dotted]
	}

	subgraph cluster_6 {
	c6_Job [shape=hexagon, color=blue, label="Export DataSet(s) Zip Job"]
	c6_store [shape=cylinder, label="DataStore"]
	c6_ep1 [shape=diamond, label="DataSet UUIDs=X,Y,Z"]
	c6_ep1 -> c6_Job
	c6_Job -> c6_store
	c6_dsf_02 [shape=tab, label="DataStoreFile DataSet XML(s) Zip"]
	c6_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
	c6_store -> c6_dsf_02
	c6_store -> c6_dsf_03
	}

	subgraph cluster_01 {
	c1_Job [shape=hexagon, color=blue, label="Analysis Job"]
	c1_store [shape=cylinder, label="DataStore"]
	c1_ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
	c1_ep2 [shape=diamond, label="EntryPoint (ReferenceSet)"]
	c1_ep1 -> c1_Job
	c1_ep2 -> c1_Job
	c1_Job -> c1_store
	c1_dsf_01 [shape=tab, label="DataStoreFile (Fasta)"]
	c1_dsf_02 [shape=tab, label="DataStoreFile (AlignmentSet)"]
	c1_dsf_03 [shape=tab, label="DataStoreFile (VCF)"]
	c1_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
	c1_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
	c1_dsf_06 [shape=tab, label="DataStoreFile (LOG)"]
	c1_store -> c1_dsf_01
	c1_store -> c1_dsf_02
	c1_store -> c1_dsf_03
	c1_store -> c1_dsf_04
	c1_store -> c1_dsf_05
	c1_store -> c1_dsf_06
	c1_ep1 -> c1_dsf_04 [style=dotted]
	c1_dsf_02 -> c1_dsf_05 [style=dotted]
	c1_ep2 -> c1_dsf_05 [style=dotted]
	}

	c3_dsf_02 -> c1_ep1
	c4_dsf_02 -> c1_ep2

	c0_dsf_02 -> c5_ep1

	c2_dsf_02 -> c3_ep1
	c5_dsf_02 -> c3_ep1

	c5_dsf_02 -> c6_ep1
	}
	strict digraph {
	Job [shape=hexagon, color=blue, label="Demux Analysis Job"]
	store [shape=cylinder, label="DataStore"]
	ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
	ep2 [shape=diamond, label="EntryPoint (BarcodeSet)"]
	ep1 -> Job
	ep2 -> Job
	Job -> store
	s1 [shape=tab, label="DataStoreFile (SubreadSet_01)"]
	s2 [shape=tab, label="DataStoreFile (SubreadSet_02)"]
	s3 [shape=tab, label="DataStoreFile (SubreadSet_03)"]
	r1 [shape=tab, label="DataStoreFile (Report_01)"]
	r2 [shape=tab, label="DataStoreFile (Report_02)"]
	r3 [shape=tab, label="DataStoreFile (Report_03)"]
	dsf_01 [shape=tab, label="DataStoreFile (Log)"]
	store -> dsf_01
	store -> s1
	store -> s2
	store -> s3
	store -> r1
	store -> r2
	store -> r3
	s1 -> r1 [style=dotted]
	s2 -> r2 [style=dotted]
	s3 -> r3 [style=dotted]
	}
	strict digraph {

	Run [shape=component, label="PacBio Run (XML)"]

	c1 [shape=box, label="CollectionMetadata 1"]
	c2 [shape=box, label="CollectionMetadata 2"]
	c3 [shape=box, label="CollectionMetadata 3"]
	c4 [shape=box, label="CollectionMetadata 4"]

	Run -> c1
	Run -> c2
	Run -> c3
	Run -> c4

	p [shape=parallelogram, color=blue, label="Primary Analysis: Convert CollectionMeta to SubreadSet)"]

	f1 [shape=tab, label="PA File (SubreadSet XML) on PA file system"]

	copy_job [shape=parallelogram, color=blue, label="Primary Analysis: Copy to Customer FileSystem"]
	customer_subreadset [shape=tab, label="SubreadSet XML on Customer FileSystem"]
	import_job [shape=parallelogram, color=blue, label="Primary Analysis: Import SubreadSet XML into SMRT Link using import-dataset Job"]

	c1 -> p
	p -> f1

	f1 -> copy_job
	copy_job -> customer_subreadset
	customer_subreadset -> import_job

	}
	strict digraph {
	Job [shape=hexagon, color=blue, label="Import DataSet"]
	store [shape=cylinder, label="DataStore"]
	ep1 [shape=diamond, label="Path /path/to/subreadset.xml"]
	ep1 -> Job
	Job -> store
	dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
	dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
	dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
	dsf_03 [shape=tab, label="DataStoreFile (Log)"]
	store -> dsf_02
	store -> dsf_04
	store -> dsf_05
	store -> dsf_03
	dsf_02 -> dsf_04 [style=dotted]
	dsf_02 -> dsf_05 [style=dotted]
	}

	default: convert

	convert:
	dot -Tpng demux-pbsmrtpipe-job.dot -o demux-pbsmrtpipe-job.png
	dot -Tsvg demux-pbsmrtpipe-job.dot -o demux-pbsmrtpipe-job.svg
	dot -Tpng import-dataset-job.dot -o import-dataset-job.png
	dot -Tsvg import-dataset-job.dot -o import-dataset-job.svg
	dot -Tpng pbsmrtpipe-job.dot -o pbsmrtpipe-job.png
	dot -Tsvg pbsmrtpipe-job.dot -o pbsmrtpipe-job.svg
	dot -Tpng connected-jobs.dot -o connected-jobs.png
	dot -Tsvg connected-jobs.dot -o connected-jobs.svg
	dot -Tpng advanced-jobs.dot -o advanced-jobs.png
	dot -Tsvg advanced-jobs.dot -o advanced-jobs.svg
	dot -Tpng ics.dot -o ics.png
	dot -Tsvg system-job-running.dot -o system-job-running.svg
	dot -Tpng system-job-running.dot -o system-job-running.png


	clean:
	rm *.png
	strict digraph {
	Job [shape=hexagon, color=blue, label="Analysis Job"]
	store [shape=cylinder, label="DataStore"]
	ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
	ep2 [shape=diamond, label="EntryPoint (ReferenceSet)"]
	ep1 -> Job
	ep2 -> Job
	Job -> store
	dsf_01 [shape=tab, label="DataStoreFile (Fasta)"]
	dsf_02 [shape=tab, label="DataStoreFile (AlignmentSet)"]
	dsf_03 [shape=tab, label="DataStoreFile (VCF)"]
	dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
	dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
	dsf_06 [shape=tab, label="DataStoreFile (LOG)"]
	store -> dsf_01
	store -> dsf_02
	store -> dsf_03
	store -> dsf_04
	store -> dsf_05
	store -> dsf_06
	ep1 -> dsf_04 [style=dotted]
	dsf_02 -> dsf_05 [style=dotted]
	ep2 -> dsf_05 [style=dotted]
	}
	digraph {
	postgres_db [shape=cylinder];
	WorkerA [shape=diamond, color=blue];
	WorkerB [shape=diamond, color=blue];
	WorkerC [shape=diamond, color=blue];
	WorkerD [shape=pentagon, color=green];
	WorkerE [shape=pentagon, color=orange];

	pbsmrtpipeJobA [shape=rectangle];
	pbsmrtpipeJobB [shape=rectangle]
	pbsmrtpipeJobC [shape=rectangle]

	postgres_db -> SL_Services
	SL_Services -> WorkerA
	SL_Services -> WorkerB
	SL_Services -> WorkerC
	SL_Services -> WorkerD
	SL_Services -> WorkerE

	WorkerA -> pbsmrtpipeJobA
	WorkerB -> pbsmrtpipeJobB
	WorkerC -> pbsmrtpipeJobC
	WorkerD -> backup_db
	WorkerE -> import_dataset


	pbsmrtpipeJobA -> taskA_01
	pbsmrtpipeJobA -> taskA_02
	pbsmrtpipeJobA -> taskA_03
	pbsmrtpipeJobA -> taskA_04
	pbsmrtpipeJobA -> taskA_05

	taskA_01 -> sge_sync_job_A_01
	taskA_02 -> sge_sync_job_A_02
	taskA_03 -> sge_sync_job_A_03
	taskA_04 -> sge_sync_job_A_04
	taskA_05 -> sge_sync_job_A_05


	pbsmrtpipeJobB -> taskB_01
	pbsmrtpipeJobB -> taskB_02

	taskB_01 -> sge_sync_job_B_01
	taskB_02 -> sge_sync_job_B_02

	pbsmrtpipeJobC -> taskC_01
	pbsmrtpipeJobC -> taskC_02

	taskC_01 -> sge_sync_job_C_01
	taskC_02 -> sge_sync_job_C_02


	}