Skip to content

Instantly share code, notes, and snippets.

@kinow
Created July 28, 2019 05:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kinow/7c8e1a2da97c9746f0cbc868b33839c3 to your computer and use it in GitHub Desktop.
Save kinow/7c8e1a2da97c9746f0cbc868b33839c3 to your computer and use it in GitHub Desktop.
database-diagram.py
#!/usr/bin/env python3
# THIS FILE IS PART OF THE CYLC SUITE ENGINE.
# Copyright (C) 2008-2019 NIWA & British Crown (Met Office) & Contributors.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""database-diagram
Create a database diagram of Cylc's database, reading its schema, and adding
relationships manually.
The console output should include the markdown and the dot file used,
and a PNG image with the database diagram will be created in the working
directory, with the name cylc-database.png."""
import logging
import sqlite3
import tempfile
from contextlib import closing
import pygraphviz as pgv
from cylc.flow.rundb import CylcSuiteDAO
from eralchemy import render_er, main as eralchemy_main
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def get_relationships():
"""These are the database relationships. * means 0..N, and ? {0, 1}.
Ideally we would have these relationships in the database model, but doing
that would require further tests to prevent regressions for users
after they update Cylc. So we are hard-coding the relationships here
for now.
Returns:
list: with relationships in the eralchemy markdown syntax.
"""
# TODO: remove this once the relationships are in the DB, and then automate
return [
["task_states", "*--?", "task_events"],
["task_states", "*--?", "task_jobs"],
["broadcast_states", "*--?", "broadcast_states_checkpoints"],
["checkpoint_id", "*--?", "broadcast_states_checkpoints"],
["checkpoint_id", "*--?", "suite_params_checkpoints"],
["checkpoint_id", "*--?", "task_pool_checkpoints"],
["suite_params", "*--?", "suite_params_checkpoints"],
["task_pool", "*--?", "task_pool_checkpoints"],
["task_pool", "*--?", "task_action_timers"],
["task_pool", "?--?", "task_late_flags"],
["task_pool", "*--?", "task_outputs"],
["task_pool", "?--?", "task_timeout_timers"]
]
def get_columns_metadata(table_name, conn):
"""Return the metadata for the columns in a table.
Args:
table_name (str): name of the DB table to retrieve the columns.
conn (sqlite3.Connection): SQLite connection.
Returns:
list: an array with the cid, name, type, notnull, default_value,
and pk value (0=not, 1=yes).
"""
with closing(conn.cursor()) as cursor:
cursor.execute(f'PRAGMA table_info({table_name})')
return cursor.fetchall()
def is_orphan(table_name, relationships):
for relationship in relationships:
if table_name in relationship:
return False
return True
def schema_to_markdown(db_name):
"""Return the database markdown schema.
Args:
db_name (str): database name
Returns:
tuple: one side with lines of a markdown file, the other with
the orphan tables
"""
lines = []
orphans = []
relationships = get_relationships()
with closing(sqlite3.connect(db_name)) as conn:
with closing(conn.cursor()) as cursor:
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
for row in cursor:
table = row[0]
lines.append(f"[{table}]")
for column in get_columns_metadata(table, conn):
pk = "*" if column[5] > 0 else "" # 1 is normally PK
label = f"{{label: {column[2].upper()}}}"
line = f"\t{pk}{column[1]} {label}"
lines.append(line)
if is_orphan(table, relationships):
orphans.append(table)
# in the eralchemy example, relationships go at the end of the file
lines.extend([" ".join(value) for value in relationships])
return lines, orphans
def add_orphans_subgraph(file_name, orphans, append=None):
"""Add a subgraph with the orphans tables, so they appear grouped
at the bottom of the diagram.
What this function is doing is, basically, to re-organise the current
content of the file, by separating some portions of the original dot file
in lists, and then returning them in order, so that they can be rendered.
Args:
file_name (str): dot file name
orphans (list): list of orphan tables
append (list): list of table names to be appended to the subgraph
"""
# TODO: rather complicated, some extensibility work around eralchemy
# may make it way simpler later
graph_beginning = [] # dot file graph beginning
subgraph = ["subgraph orphan {rank = sink\n"] # subgraph
subgraph_append = [] # extra tables that must be appended to subgraph too
graph = [] # normal graph
relationships = [] # the relationships after the graph
if not append:
append = []
with open(file_name) as dot_file:
for line in dot_file:
if line.strip() == "}":
# we are closing the graph section ourselves later
continue
if not line.strip().startswith("\""):
# only lines in our dot file that start with " are tables
# and relationships
graph_beginning.append(line)
else:
if "--" in line:
# relationships use -- for eralchemy
relationships.append(line)
else:
# we got a table line, just need to group orphans
# and not orphans
quote_left = line.index("\"", 0) + 1
quote_right = line.index("\"", quote_left)
table = line[quote_left:quote_right]
if table in orphans:
subgraph.append(line)
elif table not in append:
graph.append(line)
else:
subgraph_append.append(line)
subgraph_append.append("}\n") # close the subgraph section
relationships.append("}\n") # close the main graph section
# combine again everything creating a new dot file
return "".join(graph_beginning +
subgraph +
subgraph_append +
graph +
relationships)
def render_dot_graph(file_name):
G = pgv.AGraph(file_name)
G.draw("cylc-database.png", prog="dot")
def main():
"""Create Cylc public database, run diagram creation tool, and
then finally adjust the generated dot file for better display layout."""
# temporary file to hold the Cylc database to be passed to eralchemy
with tempfile.NamedTemporaryFile() as tf_db:
# is_public=False triggers the creation of tables
CylcSuiteDAO(db_file_name=tf_db.name, is_public=False)
logger.info(f"Cylc database created in {tf_db.name}!")
schema, orphans = schema_to_markdown(db_name=tf_db.name)
# eralchemy needs a file that ends with .er to parse markdown
with tempfile.NamedTemporaryFile(suffix=".er") as tf_md:
markdown = "\n".join(schema)
logger.info(f"Markdown generated: ")
logger.info(markdown)
tf_md.write(markdown.encode("utf-8"))
tf_md.flush()
# this is the output of eralchemy, instead of a PNG, get a DOT file
with tempfile.NamedTemporaryFile(suffix=".dot") as tf_dot:
# TODO: monkey-patching not really elegant, but here we need
# to change the graph beginning in the dot output, and
# there is no way of doing that in eralchemy
eralchemy_main.GRAPH_BEGINNING = (
'graph {\n'
' node [label = "\\N", shape = plaintext];\n'
' edge [color = gray50, minlen = 2, style = dashed];\n'
' rankdir = "TB";\n'
' newrank="true"\n'
)
render_er(input=tf_md.name, output=tf_dot.name, mode="dot")
tf_dot.flush()
# finally modify a little bit the DOT file, by adding a
# subgraph with the orphan tables, and one linking table
# broadcast_states, which is not orphan, but balances the graph
new_file = add_orphans_subgraph(tf_dot.name, orphans,
["broadcast_states"])
logger.info("Final dot file: ")
logger.info(new_file)
tf_dot.truncate(0)
tf_dot.write(new_file.encode("utf-8"))
render_dot_graph(tf_dot.name)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment