hdonnay/diagram.d2

## diagram.d2
Notes: "" {
  grid-rows: 1
  grid-columns: 2
  grid-gap: 5
  near: top-center
  Notes: |md
    # Goals
    - Remove all "presentation-only" data into a table outside of a query path (`advisory_meta`)
    - Allow lookups to and from a-k-a names (`advisory_reference`)
    - Structure cross-referencing data (`reference`)
    - Allow storing arbitrary extension data per package (`package.ecosystem`)
    - Allow storing (near) arbitrary extension data per advisory (`attr`, `advisory_attr`)
      - "Distribution" and "repository" types are subsumed into `attr`, now
    - Trade pivot-table write traffic for potentially conflict, but low volume writes (`advisory.generation`, `advisory.added`)

    # TBD
    - Is there a better way to do architecture matching?
	- Is there a good way to GC anything beind a pivot table (`reference`, `attr`, `package`)?
  |

  Query: |`md
    # Query Paths:

    ### Lookup advisories by Ref
    Example: Given `CVE-20XX-NNNN`, look up all advisories covering it.
    ```sql
    SELECT a.namespace || '-' || a.name
    FROM
      advisory a
      JOIN advisory_reference ar ON a.id = ar.advisory
      JOIN reference r ON r.id = ar.reference
    WHERE
      r.namespace = 'CVE' AND r.name = '20XX-NNNN';
    ```

	### Get current advisories
	```sql
	SELECT a.namespace || '-' || a.name
    FROM
      advisory a
	  JOIN (SELECT id FROM run WHERE completed ORDER BY id DESC LIMIT 1) AS run ON run.id = a.generation;
	```
	A Client could also pick a generation at the start of its queries and keep a consistent view.

	### Query by some `attr`
	```sql
	SELECT a.namespace || '-' || a.name
    FROM
	  advisory_attr aa
	  JOIN advisory a ON aa.advisory = a.id
	  JOIN attr ON aa.attr = attr.id
	WHERE
	  attr.kind = "CPE" AND
	  attr.data -> 'bound' = 'cpe:/a:redhat:enterprise_linux:9::highavailability'
	```
  `|
}

GC: "" {
  near: bottom-center
  text: |md
      # GC mechanism: Advisories
      The `advisory` and `enrichment` tables work with the `result` table to support garbage collection.

      The `added` and `generation` columns are nullable foreign keys referencing a `result` row.\
      The pair represents the history and collection status of the object (see table).\
    Once the `generation` has been set to `NULL` by the `result` row being deleted,\
    it can be collected.

    Tuning the number of kept runs per updater to 1 would effectively disable garbage collection.


  |
  table1: "GC possibilities" {
    grid-rows: 3
    grid-columns: 3
    grid-gap: 0

    a: "Invariant: generation ≥ added"
    b: "added IS NULL"
    c: "added IS NOT NULL"

    d: "generation IS NULL"
    e: "🚮"
    f: "∅"

    g: "generation IS NOT NULL"
    h: "🆗"
    i: "🆗"
  }
}

Updater V1 Schema: {
  icon: https://icons.terrastruct.com/essentials%2F117-database.svg

  configuration: {
    shape: sql_table
    id: int {constraint: primary_key}
    updaters: "int[]" {constraint: foreign_key}
    "...": "..."
  }
  configuration.updaters -> updater.id

  updater: {
    shape: sql_table
    id: int {constraint: primary_key}
    name: ltree {constraint: unique}
    version: text
  }

  run: {
    shape: sql_table
    id: int {constraint: primary_key}
    ref: uuid {constraint: unique}
    configuration: int {constraint: foreign_key}
	completed: bool
  }
  run.configuration -> configuration.id

  result: {
    shape: sql_table
    id: int {constraint: primary_key}
    run: int {constraint: primary_key}
    updater: int {constraint: foreign_key}
    date: timestamp
    success: boolean
    fingerprint: jsonb
    error: text
  }
  result.run -> run.id
  result.updater -> updater.id

  enrichment: {
    shape: sql_table
    id: int {constraint: primary_key}
    added: int {constraint: foreign_key}
    generation: int {constraint: foreign_key}
    namespace: text
    name: text
    tags: "text[]"
    data: jsonb
  }
  enrichment.tooltip: "Partitioned on namespace.\nUnique over (namespace, name)."
  enrichment.added -> run.id
  enrichment.generation -> run.id

  advisory: {
    shape: sql_table
    id: int {constraint: primary_key}
    added: int {constraint: foreign_key}
    generation: int {constraint: foreign_key}
    namespace: text
    name: text
  }
  advisory.tooltip: "Partitioned on namespace.\nUnique over (namespace, name)."
  advisory.added -> run.id
  advisory.generation -> run.id

  advisory_meta: {
    shape: sql_table
    id: int {constraint: primary_key}
    advisory: int {constraint: foreign_key}
    "...": "..."
  }
  advisory_meta.advisory -> advisory.id

  reference: {
    shape: sql_table
    id: int {constraint: primary_key}
    namespace: text
    name: text
    uri: "text[]"
  }
  reference.tooltip: "Partitioned on namespace.\nUnique over (namespace, name)."

  advisory_reference: {
    shape: sql_table
    advisory: int {constraint: foreign_key}
    reference: int {constraint: foreign_key}
  }
  advisory_reference.advisory -> advisory.id
  advisory_reference.reference -> reference.id

  package: {
    shape: sql_table
    id: int {constraint: primary_key}
    name: text
    arch_in: boolean
    arch: "text[]"
    vulnerable_range: VersionRange
    version_upstream: text
    version_kind: text
    kind: text
    ecosystem: jsonb
  }

  advisory_package: {
    shape: sql_table
    advisory: int {constraint: foreign_key}
    package: int {constraint: foreign_key}
  }
  advisory_package.advisory -> advisory.id
  advisory_package.package -> package.id

  attr: {
    shape: sql_table
    id: int {constraint: primary_key}
    kind: text
    data: hstore
  }

  advisory_attr: {
    shape: sql_table
    advisory: int {constraint: foreign_key}
    attr: int {constraint: foreign_key}
  }
  advisory_attr.advisory -> advisory.id
  advisory_attr.attr -> attr.id
}
	Notes: "" {
	grid-rows: 1
	grid-columns: 2
	grid-gap: 5
	near: top-center
	Notes: \|md
	# Goals
	- Remove all "presentation-only" data into a table outside of a query path (`advisory_meta`)
	- Allow lookups to and from a-k-a names (`advisory_reference`)
	- Structure cross-referencing data (`reference`)
	- Allow storing arbitrary extension data per package (`package.ecosystem`)
	- Allow storing (near) arbitrary extension data per advisory (`attr`, `advisory_attr`)
	- "Distribution" and "repository" types are subsumed into `attr`, now
	- Trade pivot-table write traffic for potentially conflict, but low volume writes (`advisory.generation`, `advisory.added`)

	# TBD
	- Is there a better way to do architecture matching?
	- Is there a good way to GC anything beind a pivot table (`reference`, `attr`, `package`)?
	\|

	Query: \|`md
	# Query Paths:

	### Lookup advisories by Ref
	Example: Given `CVE-20XX-NNNN`, look up all advisories covering it.
	```sql
	SELECT a.namespace \|\| '-' \|\| a.name
	FROM
	advisory a
	JOIN advisory_reference ar ON a.id = ar.advisory
	JOIN reference r ON r.id = ar.reference
	WHERE
	r.namespace = 'CVE' AND r.name = '20XX-NNNN';
	```

	### Get current advisories
	```sql
	SELECT a.namespace \|\| '-' \|\| a.name
	FROM
	advisory a
	JOIN (SELECT id FROM run WHERE completed ORDER BY id DESC LIMIT 1) AS run ON run.id = a.generation;
	```
	A Client could also pick a generation at the start of its queries and keep a consistent view.

	### Query by some `attr`
	```sql
	SELECT a.namespace \|\| '-' \|\| a.name
	FROM
	advisory_attr aa
	JOIN advisory a ON aa.advisory = a.id
	JOIN attr ON aa.attr = attr.id
	WHERE
	attr.kind = "CPE" AND
	attr.data -> 'bound' = 'cpe:/a:redhat:enterprise_linux:9::highavailability'
	```
	`\|
	}

	GC: "" {
	near: bottom-center
	text: \|md
	# GC mechanism: Advisories
	The `advisory` and `enrichment` tables work with the `result` table to support garbage collection.

	The `added` and `generation` columns are nullable foreign keys referencing a `result` row.\
	The pair represents the history and collection status of the object (see table).\
	Once the `generation` has been set to `NULL` by the `result` row being deleted,\
	it can be collected.

	Tuning the number of kept runs per updater to 1 would effectively disable garbage collection.


	\|
	table1: "GC possibilities" {
	grid-rows: 3
	grid-columns: 3
	grid-gap: 0

	a: "Invariant: generation ≥ added"
	b: "added IS NULL"
	c: "added IS NOT NULL"

	d: "generation IS NULL"
	e: "🚮"
	f: "∅"

	g: "generation IS NOT NULL"
	h: "🆗"
	i: "🆗"
	}
	}

	Updater V1 Schema: {
	icon: https://icons.terrastruct.com/essentials%2F117-database.svg

	configuration: {
	shape: sql_table
	id: int {constraint: primary_key}
	updaters: "int[]" {constraint: foreign_key}
	"...": "..."
	}
	configuration.updaters -> updater.id

	updater: {
	shape: sql_table
	id: int {constraint: primary_key}
	name: ltree {constraint: unique}
	version: text
	}

	run: {
	shape: sql_table
	id: int {constraint: primary_key}
	ref: uuid {constraint: unique}
	configuration: int {constraint: foreign_key}
	completed: bool
	}
	run.configuration -> configuration.id

	result: {
	shape: sql_table
	id: int {constraint: primary_key}
	run: int {constraint: primary_key}
	updater: int {constraint: foreign_key}
	date: timestamp
	success: boolean
	fingerprint: jsonb
	error: text
	}
	result.run -> run.id
	result.updater -> updater.id

	enrichment: {
	shape: sql_table
	id: int {constraint: primary_key}
	added: int {constraint: foreign_key}
	generation: int {constraint: foreign_key}
	namespace: text
	name: text
	tags: "text[]"
	data: jsonb
	}
	enrichment.tooltip: "Partitioned on namespace.\nUnique over (namespace, name)."
	enrichment.added -> run.id
	enrichment.generation -> run.id

	advisory: {
	shape: sql_table
	id: int {constraint: primary_key}
	added: int {constraint: foreign_key}
	generation: int {constraint: foreign_key}
	namespace: text
	name: text
	}
	advisory.tooltip: "Partitioned on namespace.\nUnique over (namespace, name)."
	advisory.added -> run.id
	advisory.generation -> run.id

	advisory_meta: {
	shape: sql_table
	id: int {constraint: primary_key}
	advisory: int {constraint: foreign_key}
	"...": "..."
	}
	advisory_meta.advisory -> advisory.id

	reference: {
	shape: sql_table
	id: int {constraint: primary_key}
	namespace: text
	name: text
	uri: "text[]"
	}
	reference.tooltip: "Partitioned on namespace.\nUnique over (namespace, name)."

	advisory_reference: {
	shape: sql_table
	advisory: int {constraint: foreign_key}
	reference: int {constraint: foreign_key}
	}
	advisory_reference.advisory -> advisory.id
	advisory_reference.reference -> reference.id

	package: {
	shape: sql_table
	id: int {constraint: primary_key}
	name: text
	arch_in: boolean
	arch: "text[]"
	vulnerable_range: VersionRange
	version_upstream: text
	version_kind: text
	kind: text
	ecosystem: jsonb
	}

	advisory_package: {
	shape: sql_table
	advisory: int {constraint: foreign_key}
	package: int {constraint: foreign_key}
	}
	advisory_package.advisory -> advisory.id
	advisory_package.package -> package.id

	attr: {
	shape: sql_table
	id: int {constraint: primary_key}
	kind: text
	data: hstore
	}

	advisory_attr: {
	shape: sql_table
	advisory: int {constraint: foreign_key}
	attr: int {constraint: foreign_key}
	}
	advisory_attr.advisory -> advisory.id
	advisory_attr.attr -> attr.id
	}