Skip to content

Instantly share code, notes, and snippets.

@lh3
Created December 4, 2017 20:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lh3/9673e133748fd059498a8c4c7ab4f66d to your computer and use it in GitHub Desktop.
Save lh3/9673e133748fd059498a8c4c7ab4f66d to your computer and use it in GitHub Desktop.
CREATE TABLE seq (
checksum TEXT,
ac TEXT, -- INSDC sequence accession, when available
len INTEGER, -- could be of type "TEXT"; no need to implement "less than"
seq TEXT,
PRIMARY KEY (checksum) -- what about collisions?
);
CREATE INDEX seq_len ON seq (len)
CREATE INDEX seq_ac ON seq (ac) -- different checksums may have the same AC
CREATE TABLE seq_set ( -- sequence set
ss_id TEXT, -- seq-set identifier
alias TEXT, -- sequence name alias in ss_name
checksum TEXT,
PRIMARY KEY (ss_id, alias) -- this tuple is required to be unique
);
CREATE UNIQUE INDEX ss_checksum ON seq_set (checksum, ss_id)
CREATE TABLE ss_properties ( -- additional properties about a sequence set
ss_id TEXT,
ac TEXT, -- INSDC Genome or Assembly accession, if available
primary_only BOOLEAN, -- contain primary assembly only, or not
analysis_set BOOLEAN, -- GRC-blessed "analysis set", or not
PRIMARY KEY (ss_id)
);
CREATE UNIQUE INDEX ss_ac ON ss_properties (ac, ss_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment