Skip to content

Instantly share code, notes, and snippets.

@drdozer
Last active December 17, 2015 16:09
Show Gist options
  • Save drdozer/5636889 to your computer and use it in GitHub Desktop.
Save drdozer/5636889 to your computer and use it in GitHub Desktop.
SBOL data model
// some types so that we know cardinalities
type Optional[T] // 0..1
type Required[T] // 1
type Many[T] // 0..*
// a value range restriction
type PositiveInteger // x: Int => x > 0
// a whole load of DNA components grouped for whatever reason
trait Collection {
def uri: Required[URI]
def displayId: Required[String]
def name: Optional[String]
def description: Optional[String]
def components: Many[DnaComponent]
}
// a DNA component
trait DnaComponent {
def uri: Required[URI]
def displayId: Required[String]
def name: Optional[String]
def description: Optional[String]
def `type`: Many[URI]
def dnaSequence: Optional[DnaSequence]
def annotations: Many[SequenceAnnotation]
}
// a DNA sequence
trait DnaSequence {
def uri: Required[URI]
def nucleotides: Required[String]
}
// an interesting bit of a DNA component
trait SequeneAnnotation {
def uri: Required[URI]
def bioStart: Optional[PositiveInteger]
def bioEnd: Optional[PositiveInteger]
def strand: Optional[Strand]
def precedes: Many[SequenceAnnotation]
def subComponent: Required[SequenceAnnotation]
}
sealed trait Strand
object Strand {
object + extends Strand
object - extends Strand
}
type Optional[T] // 0..1
type Required[T] // 1
type Many[T] // 0..*
type PositiveInteger // x: Int => x > 0
// Something that has a URI identifier
trait Identified {
def uri: Required[URI]
}
// something with some documentation
trait Described {
self: Identified => // we can only extend something that is Identified
def displayId: Required[String]
def name: Optional[String]
def description: Optional[String]
}
trait Collection extends Identified with Described {
def components: Many[DnaComponent]
}
trait DnaComponent extends Identified with Described {
def `type`: Many[URI]
def dnaSequence: Optional[DnaSequence]
def annotations: Many[SequenceAnnotation]
}
trait DnaSequence extends Identified {
def nucleotides: Required[String]
}
trait SequeneAnnotation extends Identified {
def bioStart: Optional[PositiveInteger]
def bioEnd: Optional[PositiveInteger]
def strand: Optional[Strand]
def precedes: Many[SequenceAnnotation]
def subComponent: Required[SequenceAnnotation]
}
sealed trait Strand
object Strand {
object + extends Strand
object - extends Strand
}
type Optional[T] // 0..1
type Required[T] // 1
type Many[T] // 0..*
type PositiveInteger // x: Int => x > 0
trait Identified {
def uri: Required[URI]
}
trait Described {
self: Identified => // we can only extend something that is Identified
def displayId: Required[String]
def name: Optional[String]
def description: Optional[String]
}
trait Collection extends Identified with Described {
def components: Many[DnaComponent]
}
trait DnaComponent extends Identified with Described {
def `type`: Many[URI]
def dnaSequence: Optional[DnaSequence]
def annotations: Many[SequenceAnnotation]
}
trait DnaSequence extends Identified {
def nucleotides: Required[String]
}
trait SequeneAnnotation extends Identified {
def locatedAt: Optional[Region] // the location info is either all presdent or all absent
def precedes: Many[SequenceAnnotation]
def subComponent: Required[SequenceAnnotation]
}
// a region of a DNA molecule
trait Region {
def bioStart: Required[PositiveInteger]
def bioEnd: Required[PositiveInteger]
def strand: Optional[Strand] // the spec is silent about if strand is required, so I went with optional
}
sealed trait Strand
object Strand {
object + extends Strand
object - extends Strand
}
type Optional[T] // 0..1
type Required[T] // 1
type Many[T] // 0..*
type PositiveInteger // x: Int => x > 0
type | [A, B] // a type A|B has values that are either instances of A or of B, but not anything else
trait Identified {
def uri: Required[URI]
}
trait Described {
self: Identified =>
def displayId: Required[String]
def name: Optional[String]
def description: Optional[String]
}
// Open question - we could have a collection for each type of molecule - does that make sense?
// I couldn't be bothered.
trait Collection extends Identified with Described {
def components: Many[DnaComponent | RnaComponent | ProteinComponent]
}
trait DnaComponent extends Identified with Described {
def `type`: Many[URI]
def dnaSequence: Optional[DnaSequence]
def annotations: Many[DnaSequenceAnnotation]
}
// for RNA
trait RnaComponent extends Identified with Described {
def `type`: Many[URI]
def rnaSequence: Optional[RnaSequence]
def annotations: Many[RnaSequenceAnnotation]
}
// for protein
trait ProteinComponent extends Identified with Described {
def `type`: Many[URI]
def proteinSequence: Optional[ProteinSequence]
def annotations: Many[ProteinSequenceAnnotation]
}
trait DnaSequence extends Identified {
def nucleotides: Required[String]
}
// for RNA
trait RnaSequence extends Identified {
def nucleotides: Required[String]
}
// for protein
trait ProteinSequence extends Identified {
def residues: Required[String]
}
trait DnaSequeneAnnotation extends Identified {
def locatedAt: Optional[DnaRegion]
def precedes: Many[SequenceAnnotation]
def subComponent: Required[DnaSequenceAnnotation]
}
// for RNA
trait RnaSequeneAnnotation extends Identified {
def locatedAt: Optional[RnaRegion]
def precedes: Many[SequenceAnnotation]
def subComponent: Required[RnaSequenceAnnotation]
}
// for protein
trait ProteinSequeneAnnotation extends Identified {
def locatedAt: Optional[ProteinRegion]
def precedes: Many[SequenceAnnotation]
def subComponent: Required[ProteinSequenceAnnotation]
}
trait DnaRegion {
def bioStart: Required[PositiveInteger]
def bioEnd: Required[PositiveInteger]
def strand: Optional[Strand]
}
// not sure this is the correct expresivity, given that we potentially need to mark up hairpins
// and other non-regular structures
trait RnaRegion {
def bioStart: Required[PositiveInteger]
def bioEnd: Required[PositiveInteger]
def strand: Optional[Strand]
}
// protein regions don't have strand information
trait ProteinRegion {
def bioStart: Required[PositiveInteger]
def bioEnd: Required[PositiveInteger]
}
sealed trait Strand
object Strand {
object + extends Strand
object - extends Strand
}
type Optional[T] // 0..1
type Required[T] // 1
type Many[T] // 0..*
type PositiveInteger // x: Int => x > 0
type | [A, B] // a type A|B has values that are either instances of A or of B, but not anything else
trait Identified {
def uri: Required[URI]
}
trait Described {
self: Identified =>
def displayId: Required[String]
def name: Optional[String]
def description: Optional[String]
}
trait Collection extends Identified with Described {
def components: Many[DnaComponent | RnaComponent | ProteinComponent]
}
// the base type for <foo>Component types
trait BaseComponent extends Identified with Described {
type Sequence <: BaseSequence // subtypes will refine this type
type SequenceAnnotation <: BaseSequenceAnnotation // subtypes will refine this type
def `type`: Many[URI]
def sequence: Optional[Sequence] // renamed for generality
def annotations: Many[SequenceAnnotation]
}
trait DnaComponent extends BaseComponent {
type Sequence = DnaSequence // supply the actual sequence type
type SequenceAnnotation = DnaSequenceAnnotation // also supply the actual annotation type
}
trait RnaComponent extends BaseComponent {
type Sequence = RnaSequence
type SequenceAnnotation = RnaSequenceAnnotation
}
trait ProteinComponent extends BaseComponent {
type Sequence = ProteinSequence
type SequenceAnnotation = ProteinSequenceAnnotation
}
// we probably don't want to accidentally pass DNA in to a method expecting protein
// so use the type system
trait BaseSequence extends Identified {
def residues: Required[String] // name change needed!
}
trait DnaSequence extends BaseSequence
trait RnaSequence extends BaseSequence
trait ProteinSequence extends BaseSequence
// the base annotation type
trait BaseAnnotation extends Identified {
type Region <: BaseRegion // again, subtypes will refine this
type SequenceAnnotation <: BaseAnnotation // should be filled in with the sub-type
def locatedAt: Optional[Region]
def precedes: Many[SequenceAnnotation]
def subComponent: Required[SequenceAnnotation]
}
// derive from BaseAnnotation and fill in the actual types
trait DnaSequeneAnnotation extends BaseAnnotation {
type Region = DnaRegion
type SequenceAnnotation = DnaSequenceAnnotation
}
trait RnaSequeneAnnotation extends BaseAnnotation {
type Region = RnaRegion
type SequenceAnnotation = RnaSequenceAnnotation
}
trait ProteinSequeneAnnotation extends BaseAnnotation {
type Region = ProteinRegion
type SequenceAnnotation = ProteinSequenceAnnotation
}
// introduce this base class for regions
trait BaseRegion {
def bioStart: Required[PositiveInteger]
def bioEnd: Required[PositiveInteger]
}
// and this for stranded regions
trait NucleotideRegion extends BaseRegion {
def strand: Optional[Strand]
}
trait DnaRegion extends NucleotideRegion
trait RnaRegion extends NucleotideRegion
trait ProteinRegion extends BaseRegion
sealed trait Strand
object Strand {
object + extends Strand
object - extends Strand
}
// The constraints captured by Optional and Required types in scala are represented
// by the @Nullable and NotNull annotations respectively.
//
// Where the scala Many type is used, we use the Java Set interface, together with @NotNull
// to flag that the collection must be present, but may be empty.
//
// For the sake of my fingers, we've only presented the getters.
//
// Type members are encoded as generic type parameters - it isn't a wholly correct semantics, but
// it is close enough.
public interface Identified {
@NotNull public URI getURI();
}
public interface Described extends Identified {
@NotNull public String getDisplayId();
@Nullable public String getName();
@Nullable public String getDescription();
}
public interface Collection extends Described {
@NotNull public Set<BaseComponent> getComponents();
}
public interface BaseComponent<Sequence super BaseSequence, SequenceAnnotation super BaseSequenceAnnotation>
extends Described {
@NotNull public Set<URI> getTypes();
@Nullable public Sequence getSequence();
@NotNull public Set<SequenceAnnotation> getAnnotations();
}
public interface DnaComponent extends BaseComponent<DnaSequence, DnaSequenceAnnotation> {}
public interface RnaComponent extends BaseComponent<RnaSequence, RnaSequenceAnnotation> {}
public interface ProteinComponent extends BaseComponent<ProteinSequence, ProteinSequenceAnnotation> {}
public interface BaseSequence extends Identified {
@NotNull public String getResidues();
}
public interface DnaSequence extends BaseSequence {}
public interface RnaSequence extends BaseSequence {}
public interface ProteinSequence extends BaseSequence {}
public interface BaseAnnotation<Region super BaseRegion, SequeneAnnotation super BaseAnnotation> extends Identified {
@Nullable public Region getLocatedAt();
@NotNull public Set<SequenceAnnotation> getPrecedes();
@NotNull public SequenceAnnotation getSubComponent();
}
public interface DnaSequenceAnnotation extends BaseAnnotation<DnaRegion, DnaSequenceAnnotation> {}
public interface RnaSequenceAnnotation extends BaseAnnotation<RnaRegion, RnaSequenceAnnotation> {}
public interface ProteinSequenceAnnotation extends BaseAnotation<ProteinRegion, ProteinSequenceAnotation> {}
public interface BaseRegion {
public int getBioStart(); // primitive type, not nullable
public int getBioEnd();
}
public interface NucleotideRegion extends BaseRegion {
@Nullable public Strand getStrand();
}
public interface DnaRegion extends NucleotideRegion {}
public interface RnaRegion extends NucleotideRegion {}
public interface ProteinRegion extends BaseRegion {}
public enum Strand {
PLUS, MINUS
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment