CJHwong/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Origin: https://news.ycombinator.com/item?id=39798115
Replace the database name by yours
mysqldump --no-data $DB_NAME --result-file myschema.sql
Download mysql2sqlite from https://github.com/dumblob/mysql2sqlite
./mysql2sqlite myschema.sql | sqlite3 mysqlite3.db
Download sqlite-schema-diagram.sql from https://gitlab.com/Screwtapello/sqlite-schema-diagram
sqlite3 ./mysqlite3.db -init sqlite-schema-diagram.sql "" > schema.dot
Install Graphviz
dot -Tsvg schema.dot > schema.svg

  
## mysql2sqlite
#!/usr/bin/awk -f

# Authors: @esperlu, @artemyk, @gkuenning, @dumblob

# FIXME detect empty input file and issue a warning

function printerr( s ){ print s | "cat >&2" }

BEGIN {
  if( ARGC != 2 ){
    printerr( \
      "USAGE:\n"\
      "       mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \
      "       OR\n" \
      "       mysql2sqlite dump_mysql.sql | sqlite3 sqlite.db\n" \
      "\n" \
      "NOTES:\n" \
      "       Dash in filename is not supported, because dash (-) means stdin." )
    no_END = 1
    exit 1
  }

  # Find INT_MAX supported by both this AWK (usually an ISO C signed int)
  #   and SQlite.
  # On non-8bit-based architectures, the additional bits are safely ignored.

  # 8bit (lower precision should not exist)
  s="127"
  # "63" + 0 avoids potential parser misbehavior
  if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 }
  # 16bit
  s="32767"
  if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 }
  # 32bit
  s="2147483647"
  if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 }
  # 64bit (as INTEGER in SQlite3)
  s="9223372036854775807"
  if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 }
#  # 128bit
#  s="170141183460469231731687303715884105728"
#  if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 }
#  # 256bit
#  s="57896044618658097711785492504343953926634992332820282019728792003956564819968"
#  if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 }
#  # 512bit
#  s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048"
#  if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 }
#  # 1024bit
#  s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608"
#  if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 }
#  # higher precision probably not needed

  FS=",$"
  print "PRAGMA synchronous = OFF;"
  print "PRAGMA journal_mode = MEMORY;"
  print "BEGIN TRANSACTION;"
}

# historically 3 spaces separate non-argument local variables
function bit_to_int( str_bit,   powtwo, i, res, bit, overflow ){
  powtwo = 1
  overflow = 0
  # 011101 = 1*2^0 + 0*2^1 + 1*2^2 ...
  for( i = length( str_bit ); i > 0; --i ){
    bit = substr( str_bit, i, 1 )
    if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){
      printerr( \
        NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." )
      break
    }
    res = res + bit * powtwo
    # no warning here as it might be the last iteration
    if( powtwo > INT_MAX_HALF ){ overflow = 1; continue }
    powtwo = powtwo * 2
  }
  return res
}

# CREATE TRIGGER statements have funny commenting. Remember we are in trigger.
/^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ {
  gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" )
  print
  inTrigger = 1
  next
}
# The end of CREATE TRIGGER has a stray comment terminator
/(END|end) \*\/;;/ { gsub( /\*\//, "" ); print; inTrigger = 0; next }
# The rest of triggers just get passed through
inTrigger != 0 { print; next }

# CREATE VIEW looks like a TABLE in comments
/^\/\*.*(CREATE.*TABLE|create.*table)/ {
  inView = 1
  next
}
# end of CREATE VIEW
/^(\).*(ENGINE|engine).*\*\/;)/ {
  inView = 0
  next
}
# content of CREATE VIEW
inView != 0 { next }

# skip comments
/^\/\*/ { next }

# skip PARTITION statements
/^ *[(]?(PARTITION|partition) +[^ ]+/ { next }

# print all INSERT lines
( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert|REPLACE|replace)/ {
  prev = ""

  # first replace \\ by \_ that mysqldump never generates to deal with
  # sequnces like \\n that should be translated into \n, not \<LF>.
  # After we convert all escapes we replace \_ by backslashes.
  gsub( /\\\\/, "\\_" )

  # single quotes are escaped by another single quote
  gsub( /\\'/, "''" )
  gsub( /\\n/, "\n" )
  gsub( /\\r/, "\r" )
  gsub( /\\"/, "\"" )
  gsub( /\\\032/, "\032" )  # substitute char

  gsub( /\\_/, "\\" )

  # sqlite3 is limited to 16 significant digits of precision
  while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){
    hexIssue = 1
    sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 )
  }
  if( hexIssue ){
    printerr( \
      NR ": WARN Hex number trimmed (length longer than 16 chars)." )
    hexIssue = 0
  }
  print
  next
}

# CREATE DATABASE is not supported
/^(CREATE DATABASE|create database)/ { next }

# print the CREATE line as is and capture the table name
/^(CREATE|create)/ {
  if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){
    caseIssue = 1
    printerr( \
      NR ": WARN Potential case sensitivity issues with table/column naming\n" \
      "          (see INFO at the end)." )
  }
  if( match( $0, /`[^`]+/ ) ){
    tableName = substr( $0, RSTART+1, RLENGTH-1 )
  }
  aInc = 0
  prev = ""
  firstInTable = 1
  print
  next
}

# Replace `FULLTEXT KEY` (probably other `XXXXX KEY`)
/^  (FULLTEXT KEY|fulltext key)/ { gsub( /[A-Za-z ]+(KEY|key)/, "  KEY" ) }

# Get rid of field lengths in KEY lines
/ (PRIMARY |primary )?(KEY|key)/ { gsub( /\([0-9]+\)/, "" ) }

aInc == 1 && /PRIMARY KEY|primary key/ { next }

# Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY
/ (COLLATE|collate) [a-z0-9_]*/ { gsub( /(COLLATE|collate) [a-z0-9_]*/, "COLLATE BINARY" ) }

# Print all fields definition lines except the `KEY` lines.
/^  / && !/^(  (KEY|key)|\);)/ {
  if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){
    aInc = 1
    gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" )
  }
  gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " )
  gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" )
  # FIXME
  #   CREATE TRIGGER [UpdateLastTime]
  #   AFTER UPDATE
  #   ON Package
  #   FOR EACH ROW
  #   BEGIN
  #   UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId;
  #   END
  gsub( /(ON|on) (UPDATE|update) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "" )
  gsub( /(DEFAULT|default) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "DEFAULT current_timestamp")
  gsub( /(COLLATE|collate) [^ ]+ /, "" )
  gsub( /(ENUM|enum)[^)]+\)/, "text " )
  gsub( /(SET|set)\([^)]+\)/, "text " )
  gsub( /UNSIGNED|unsigned/, "" )
  gsub( /_utf8mb3/, "" )
  gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" )
  gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" )
  ere_bit_field = "[bB]'[10]+'"
  if( match($0, ere_bit_field) ){
    sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) )
  }

  # remove USING BTREE and other suffixes for USING, for example: "UNIQUE KEY
  # `hostname_domain` (`hostname`,`domain`) USING BTREE,"
  gsub( / USING [^, ]+/, "" )

  # field comments are not supported
  gsub( / (COMMENT|comment).+$/, "" )
  # Get commas off end of line
  gsub( /,.?$/, "" )
  if( prev ){
    if( firstInTable ){
      print prev
      firstInTable = 0
    }
    else {
      print "," prev
    }
  }
  else {
    # FIXME check if this is correct in all cases
    if( match( $1,
        /(CONSTRAINT|constraint) ["].*["] (FOREIGN KEY|foreign key)/ ) ){
      print ","
    }
  }
  prev = $1
}

/ ENGINE| engine/ {
  if( prev ){
    if( firstInTable ){
      print prev
      firstInTable = 0
    }
    else {
      print "," prev
    }
  }
  prev=""
  print ");"
  next
}
# `KEY` lines are extracted from the `CREATE` block and stored in array for later print
# in a separate `CREATE KEY` command. The index name is prefixed by the table name to
# avoid a sqlite error for duplicate index name.
/^(  (KEY|key)|\);)/ {
  if( prev ){
    if( firstInTable ){
      print prev
      firstInTable = 0
    }
    else {
      print "," prev
    }
  }
  prev = ""
  if( $0 == ");" ){
    print
  }
  else {
    if( match( $0, /`[^`]+/ ) ){
      indexName = substr( $0, RSTART+1, RLENGTH-1 )
    }
    if( match( $0, /\([^()]+/ ) ){
      indexKey = substr( $0, RSTART+1, RLENGTH-1 )
    }
    # idx_ prefix to avoid name clashes (they really happen!)
    key[tableName] = key[tableName] "CREATE INDEX \"idx_" \
       tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n"
  }
}

END {
  if( no_END ){ exit 1}
  # print all KEY creation lines.
  for( table in key ){ printf key[table] }

  print "END TRANSACTION;"

  if( caseIssue ){
    printerr( \
      "INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \
      "     or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \
      "     identifiers. Thus expect errors like \"table T has no column named F\".")
  }
}

## sqlite-schema-diagram.sql
-- We start a GraphViz graph
SELECT '
digraph structs {
'
UNION ALL

-- Normally, GraphViz' "dot" command lays out a hierarchical graph from
-- top to bottom.  However, we aren't just laying out individual nodes,
-- each node is a vertical list of database fields.  To prevent GraphViz
-- from snaking arrows all over the place, we constrain it to draw
-- incoming references on the left of each field, and outgoing references
-- on the right.  Since that's the way references flow for each database
-- table, we tell GraphViz to lay the whole graph out left-to-right,
-- which makes its job much easier and produces prettier output.
SELECT '
rankdir="LR"
'
UNION ALL


-- By default, nodes have circles around them.  We will draw our own
-- tables below, we do not want the circles.
SELECT '
node [shape=none]
'
UNION ALL

-- This is the big query that renders a node complete with field names
-- for each table in the database.  Because we want raw GraphViz output,
-- our query returns rows with a single string field, whose value is a
-- complex calculation using SQL as a templating engine.  This is kind
-- of an abuse, but works nicely nevertheless.
SELECT
    CASE
        -- When the previous row's table name is the same as this one,
        -- do nothing.
        WHEN LAG(t.name, 1) OVER (ORDER BY t.name) = t.name THEN ''

        -- Otherwise, this is the first row of a new table, so start
        -- the node markup and add a header row.  Normally in GraphViz,
        -- the table name would *be* the label of the node, but since
        -- we're using the label to represent the entire node, we have
        -- to make our own header.
        --
        -- GraphViz does have a "record" label shape, but it seems tricky
        -- to work with and I found the HTML-style label markup easier
        -- to get working the way I wanted.
        ELSE
            t.name || ' [label=<
            <TABLE BORDER="0" CELLSPACING="0" CELLBORDER="1">
                <TR>
                    <TD COLSPAN="2"><B>' || t.name || '</B></TD>
                </TR>
            '

    -- After the header (if needed), we have rows for each field in
    -- the table.
    --
    -- The "pk" metadata field is zero for table fields that are not part
    -- of the primary key.  If the "pk" metadata field is 1 or more, that
    -- tells you that table field's order in the (potentially composite)
    -- primary key.
    --
    -- We also add ports to each of the table cells, so that we can
    -- later tell GraphViz to specifically connect the ports representing
    -- specific fields in each table, instead of connecting the tables
    -- generally.
    END || '
                <TR>
                    <TD PORT="' || i.name || '_to">' ||
                        CASE i.pk WHEN 0 THEN '&nbsp;' ELSE '🔑' END ||
                    '</TD>
                    <TD PORT="' || i.name || '_from">' || i.name || '</TD>
                </TR>
            ' ||
    CASE
        -- When the next row's table name is the same as this one,
        -- do nothing.
        WHEN LEAD(t.name, 1) OVER (ORDER BY t.name) = t.name THEN ''

        -- Otherwise, this is the last row of a database table, so end
        -- the table markup.
        ELSE '
            </TABLE>
        >];
        '
    END

-- This is how you get nice relational data out of SQLite's metadata
-- pragmas.
FROM pragma_table_list() AS t
    JOIN pragma_table_info(t.name, t.schema) AS i

WHERE
    -- SQLite has a bunch of metadata tables in each schema, which
    -- are hidden from .tables and .schema but which are reported
    -- in pragma_table_list().  They're not user-created and almost
    -- certainly user databases don't have foreign keys to them, so
    -- let's just filter them out.
    t.name NOT LIKE 'sqlite_%'

    -- Despite its name, pragma_table_list() also includes views.
    -- Since those don't store any information or have any correctness
    -- constraints, they're just distracting if you're trying to quickly
    -- understand a database's schema, so we'll filter them out too.
    AND t.type = 'table'
UNION ALL

-- Now we have all the database tables set up, we can draw the links
-- between them.  SQLite gives us the pragma_foreign_key_list() function
-- which (for a given source table) lists all the source fields that are
-- part of a foreign key reference, the target table they refer to, and
-- (if it was created with "REFERENCES table_name(column_name)" syntax,
-- the target column names too.  Unfortunately, if the reference was
-- created with "REFERENCES table_name" syntax, the pragma does *not*
-- figure out what the corresponding target fields are, so we'll also need
-- pragma_table_info() to look up the primary key(s) for the target table.
--
-- Once we have everything we need, we just do a bit more string
-- concatenation to build up the GraphViz syntax equivalent.
--
-- Note that we use the ports we defined above, as well as the directional
-- overrides :e and :w, to force GraphViz to give us a layout that's
-- likely to be readable.
SELECT

    -- We left-join every foreign key field against pragma_table_info
    -- looking for primary keys, and the target table may have a composite
    -- primary key even if the foreign key does not reference the primary
    -- key, so we may wind up with multiple results describing the same
    -- foreign key reference.  DISTINCT makes sure we only describe each
    -- reference once.
    DISTINCT

    t.name || ':' || f."from" || '_from:e -> ' ||

    -- If the constraint was created with "REFERENCES
    -- table_name(column_name)", then f.to will contain 'column_name'.
    -- Otherwise, f.to is NULL, and we need to grab the corresponding
    -- field from the primary key in i.name.
    f."table" || ':' || COALESCE(f."to", i.name) || '_to:w'

FROM pragma_table_list() AS t
    JOIN pragma_foreign_key_list(t.name, t.schema) AS f

    -- We look up all the fields in the target table, just in case
    -- pragma_foreign_key_list() doesn't tell us what the target field
    -- name is.  SQLite doesn't allow foreign-key references to cross
    -- schemas, so it's OK to use the source table's schema name to look
    -- up the target table.
    --
    -- Strictly speaking, we shouldn't need to LEFT JOIN here, a basic
    -- JOIN should do.  This works around a bug in SQLite 3.16.0 to
    -- version 3.45.1: https://sqlite.org/forum/forumpost/b1656fcb39
    LEFT JOIN pragma_table_info(f."table", t.schema) AS i

-- f.seq represents the order of fields in a source table's composite foreign key
-- reference, starting at 0.  In "FOREIGN KEY (a, b)", "a" would have
-- seq=0 and "b" would have seq=1.  i.pk represents the order of fields
-- in a primary key, where "0" means "not part of the primary key".
-- In "PRIMARY KEY (a, b)", "a" would have pk=1 and "b" would have pk=2.
-- For a foreign key reference that specifies the target field name,
-- none of this matters, but if the target field name is missing, then
-- this makes sure that each field of the foreign key reference is joined
-- with the corresponding primary key field of the target table.
WHERE f.seq + 1 = i.pk

UNION ALL

-- Lastly, we close the GraphViz graph.
SELECT '
}';
	#!/usr/bin/awk -f

	# Authors: @esperlu, @artemyk, @gkuenning, @dumblob

	# FIXME detect empty input file and issue a warning

	function printerr( s ){ print s \| "cat >&2" }

	BEGIN {
	if( ARGC != 2 ){
	printerr( \
	"USAGE:\n"\
	" mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \
	" OR\n" \
	" mysql2sqlite dump_mysql.sql \| sqlite3 sqlite.db\n" \
	"\n" \
	"NOTES:\n" \
	" Dash in filename is not supported, because dash (-) means stdin." )
	no_END = 1
	exit 1
	}

	# Find INT_MAX supported by both this AWK (usually an ISO C signed int)
	# and SQlite.
	# On non-8bit-based architectures, the additional bits are safely ignored.

	# 8bit (lower precision should not exist)
	s="127"
	# "63" + 0 avoids potential parser misbehavior
	if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 }
	# 16bit
	s="32767"
	if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 }
	# 32bit
	s="2147483647"
	if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 }
	# 64bit (as INTEGER in SQlite3)
	s="9223372036854775807"
	if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 }
	# # 128bit
	# s="170141183460469231731687303715884105728"
	# if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 }
	# # 256bit
	# s="57896044618658097711785492504343953926634992332820282019728792003956564819968"
	# if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 }
	# # 512bit
	# s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048"
	# if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 }
	# # 1024bit
	# s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608"
	# if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 }
	# # higher precision probably not needed

	FS=",$"
	print "PRAGMA synchronous = OFF;"
	print "PRAGMA journal_mode = MEMORY;"
	print "BEGIN TRANSACTION;"
	}

	# historically 3 spaces separate non-argument local variables
	function bit_to_int( str_bit, powtwo, i, res, bit, overflow ){
	powtwo = 1
	overflow = 0
	# 011101 = 12^0 + 02^1 + 1*2^2 ...
	for( i = length( str_bit ); i > 0; --i ){
	bit = substr( str_bit, i, 1 )
	if( overflow \|\| ( bit == 1 && res > INT_MAX_HALF ) ){
	printerr( \
	NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." )
	break
	}
	res = res + bit * powtwo
	# no warning here as it might be the last iteration
	if( powtwo > INT_MAX_HALF ){ overflow = 1; continue }
	powtwo = powtwo * 2
	}
	return res
	}

	# CREATE TRIGGER statements have funny commenting. Remember we are in trigger.
	/^\/\.(CREATE.TRIGGER\|create.trigger)/ {
	gsub( /^.*(TRIGGER\|trigger)/, "CREATE TRIGGER" )
	print
	inTrigger = 1
	next
	}
	# The end of CREATE TRIGGER has a stray comment terminator
	/(END\|end) \\/;;/ { gsub( /\\//, "" ); print; inTrigger = 0; next }
	# The rest of triggers just get passed through
	inTrigger != 0 { print; next }

	# CREATE VIEW looks like a TABLE in comments
	/^\/\.(CREATE.TABLE\|create.table)/ {
	inView = 1
	next
	}
	# end of CREATE VIEW
	/^(\).(ENGINE\|engine).\*\/;)/ {
	inView = 0
	next
	}
	# content of CREATE VIEW
	inView != 0 { next }

	# skip comments
	/^\/\*/ { next }

	# skip PARTITION statements
	/^ *[(]?(PARTITION\|partition) +[^ ]+/ { next }

	# print all INSERT lines
	( /^ \(/ && /\) [,;] *$/ ) \|\| /^(INSERT\|insert\|REPLACE\|replace)/ {
	prev = ""

	# first replace \\ by \_ that mysqldump never generates to deal with
	# sequnces like \\n that should be translated into \n, not \<LF>.
	# After we convert all escapes we replace \_ by backslashes.
	gsub( /\\\\/, "\\_" )

	# single quotes are escaped by another single quote
	gsub( /\\'/, "''" )
	gsub( /\\n/, "\n" )
	gsub( /\\r/, "\r" )
	gsub( /\\"/, "\"" )
	gsub( /\\\032/, "\032" ) # substitute char

	gsub( /\\_/, "\\" )

	# sqlite3 is limited to 16 significant digits of precision
	while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){
	hexIssue = 1
	sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 )
	}
	if( hexIssue ){
	printerr( \
	NR ": WARN Hex number trimmed (length longer than 16 chars)." )
	hexIssue = 0
	}
	print
	next
	}

	# CREATE DATABASE is not supported
	/^(CREATE DATABASE\|create database)/ { next }

	# print the CREATE line as is and capture the table name
	/^(CREATE\|create)/ {
	if( $0 ~ /IF NOT EXISTS\|if not exists/ \|\| $0 ~ /TEMPORARY\|temporary/ ){
	caseIssue = 1
	printerr( \
	NR ": WARN Potential case sensitivity issues with table/column naming\n" \
	" (see INFO at the end)." )
	}
	if( match( $0, /`[^`]+/ ) ){
	tableName = substr( $0, RSTART+1, RLENGTH-1 )
	}
	aInc = 0
	prev = ""
	firstInTable = 1
	print
	next
	}

	# Replace `FULLTEXT KEY` (probably other `XXXXX KEY`)
	/^ (FULLTEXT KEY\|fulltext key)/ { gsub( /[A-Za-z ]+(KEY\|key)/, " KEY" ) }

	# Get rid of field lengths in KEY lines
	/ (PRIMARY \|primary )?(KEY\|key)/ { gsub( /\([0-9]+\)/, "" ) }

	aInc == 1 && /PRIMARY KEY\|primary key/ { next }

	# Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY
	/ (COLLATE\|collate) [a-z0-9_]/ { gsub( /(COLLATE\|collate) [a-z0-9_]/, "COLLATE BINARY" ) }

	# Print all fields definition lines except the `KEY` lines.
	/^ / && !/^( (KEY\|key)\|\);)/ {
	if( match( $0, /[^"`]AUTO_INCREMENT\|auto_increment[^"`]/) ){
	aInc = 1
	gsub( /AUTO_INCREMENT\|auto_increment/, "PRIMARY KEY AUTOINCREMENT" )
	}
	gsub( /(UNIQUE KEY\|unique key) (`.`\|".") /, "UNIQUE " )
	gsub( /(CHARACTER SET\|character set) [^ ]+[ ,]/, "" )
	# FIXME
	# CREATE TRIGGER [UpdateLastTime]
	# AFTER UPDATE
	# ON Package
	# FOR EACH ROW
	# BEGIN
	# UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId;
	# END
	gsub( /(ON\|on) (UPDATE\|update) (CURRENT_TIMESTAMP\|current_timestamp)(\(\))?/, "" )
	gsub( /(DEFAULT\|default) (CURRENT_TIMESTAMP\|current_timestamp)(\(\))?/, "DEFAULT current_timestamp")
	gsub( /(COLLATE\|collate) [^ ]+ /, "" )
	gsub( /(ENUM\|enum)[^)]+\)/, "text " )
	gsub( /(SET\|set)\([^)]+\)/, "text " )
	gsub( /UNSIGNED\|unsigned/, "" )
	gsub( /_utf8mb3/, "" )
	gsub( /` [^ ](INT\|int\|BIT\|bit)[^ ]/, "` integer" )
	gsub( /" [^ ](INT\|int\|BIT\|bit)[^ ]/, "\" integer" )
	ere_bit_field = "[bB]'[10]+'"
	if( match($0, ere_bit_field) ){
	sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) )
	}

	# remove USING BTREE and other suffixes for USING, for example: "UNIQUE KEY
	# `hostname_domain` (`hostname`,`domain`) USING BTREE,"
	gsub( / USING [^, ]+/, "" )

	# field comments are not supported
	gsub( / (COMMENT\|comment).+$/, "" )
	# Get commas off end of line
	gsub( /,.?$/, "" )
	if( prev ){
	if( firstInTable ){
	print prev
	firstInTable = 0
	}
	else {
	print "," prev
	}
	}
	else {
	# FIXME check if this is correct in all cases
	if( match( $1,
	/(CONSTRAINT\|constraint) ["].*["] (FOREIGN KEY\|foreign key)/ ) ){
	print ","
	}
	}
	prev = $1
	}

	/ ENGINE\| engine/ {
	if( prev ){
	if( firstInTable ){
	print prev
	firstInTable = 0
	}
	else {
	print "," prev
	}
	}
	prev=""
	print ");"
	next
	}
	# `KEY` lines are extracted from the `CREATE` block and stored in array for later print
	# in a separate `CREATE KEY` command. The index name is prefixed by the table name to
	# avoid a sqlite error for duplicate index name.
	/^( (KEY\|key)\|\);)/ {
	if( prev ){
	if( firstInTable ){
	print prev
	firstInTable = 0
	}
	else {
	print "," prev
	}
	}
	prev = ""
	if( $0 == ");" ){
	print
	}
	else {
	if( match( $0, /`[^`]+/ ) ){
	indexName = substr( $0, RSTART+1, RLENGTH-1 )
	}
	if( match( $0, /\([^()]+/ ) ){
	indexKey = substr( $0, RSTART+1, RLENGTH-1 )
	}
	# idx_ prefix to avoid name clashes (they really happen!)
	key[tableName] = key[tableName] "CREATE INDEX \"idx_" \
	tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n"
	}
	}

	END {
	if( no_END ){ exit 1}
	# print all KEY creation lines.
	for( table in key ){ printf key[table] }

	print "END TRANSACTION;"

	if( caseIssue ){
	printerr( \
	"INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \
	" or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \
	" identifiers. Thus expect errors like \"table T has no column named F\".")
	}
	}
	-- We start a GraphViz graph
	SELECT '
	digraph structs {
	'
	UNION ALL

	-- Normally, GraphViz' "dot" command lays out a hierarchical graph from
	-- top to bottom. However, we aren't just laying out individual nodes,
	-- each node is a vertical list of database fields. To prevent GraphViz
	-- from snaking arrows all over the place, we constrain it to draw
	-- incoming references on the left of each field, and outgoing references
	-- on the right. Since that's the way references flow for each database
	-- table, we tell GraphViz to lay the whole graph out left-to-right,
	-- which makes its job much easier and produces prettier output.
	SELECT '
	rankdir="LR"
	'
	UNION ALL


	-- By default, nodes have circles around them. We will draw our own
	-- tables below, we do not want the circles.
	SELECT '
	node [shape=none]
	'
	UNION ALL

	-- This is the big query that renders a node complete with field names
	-- for each table in the database. Because we want raw GraphViz output,
	-- our query returns rows with a single string field, whose value is a
	-- complex calculation using SQL as a templating engine. This is kind
	-- of an abuse, but works nicely nevertheless.
	SELECT
	CASE
	-- When the previous row's table name is the same as this one,
	-- do nothing.
	WHEN LAG(t.name, 1) OVER (ORDER BY t.name) = t.name THEN ''

	-- Otherwise, this is the first row of a new table, so start
	-- the node markup and add a header row. Normally in GraphViz,
	-- the table name would be the label of the node, but since
	-- we're using the label to represent the entire node, we have
	-- to make our own header.
	--
	-- GraphViz does have a "record" label shape, but it seems tricky
	-- to work with and I found the HTML-style label markup easier
	-- to get working the way I wanted.
	ELSE
	t.name \|\| ' [label=<
	<TABLE BORDER="0" CELLSPACING="0" CELLBORDER="1">
	<TR>
	<TD COLSPAN="2"><B>' \|\| t.name \|\| '</B></TD>
	</TR>
	'

	-- After the header (if needed), we have rows for each field in
	-- the table.
	--
	-- The "pk" metadata field is zero for table fields that are not part
	-- of the primary key. If the "pk" metadata field is 1 or more, that
	-- tells you that table field's order in the (potentially composite)
	-- primary key.
	--
	-- We also add ports to each of the table cells, so that we can
	-- later tell GraphViz to specifically connect the ports representing
	-- specific fields in each table, instead of connecting the tables
	-- generally.
	END \|\| '
	<TR>
	<TD PORT="' \|\| i.name \|\| '_to">' \|\|
	CASE i.pk WHEN 0 THEN ' ' ELSE '🔑' END \|\|
	'</TD>
	<TD PORT="' \|\| i.name \|\| '_from">' \|\| i.name \|\| '</TD>
	</TR>
	' \|\|
	CASE
	-- When the next row's table name is the same as this one,
	-- do nothing.
	WHEN LEAD(t.name, 1) OVER (ORDER BY t.name) = t.name THEN ''

	-- Otherwise, this is the last row of a database table, so end
	-- the table markup.
	ELSE '
	</TABLE>
	>];
	'
	END

	-- This is how you get nice relational data out of SQLite's metadata
	-- pragmas.
	FROM pragma_table_list() AS t
	JOIN pragma_table_info(t.name, t.schema) AS i

	WHERE
	-- SQLite has a bunch of metadata tables in each schema, which
	-- are hidden from .tables and .schema but which are reported
	-- in pragma_table_list(). They're not user-created and almost
	-- certainly user databases don't have foreign keys to them, so
	-- let's just filter them out.
	t.name NOT LIKE 'sqlite_%'

	-- Despite its name, pragma_table_list() also includes views.
	-- Since those don't store any information or have any correctness
	-- constraints, they're just distracting if you're trying to quickly
	-- understand a database's schema, so we'll filter them out too.
	AND t.type = 'table'
	UNION ALL

	-- Now we have all the database tables set up, we can draw the links
	-- between them. SQLite gives us the pragma_foreign_key_list() function
	-- which (for a given source table) lists all the source fields that are
	-- part of a foreign key reference, the target table they refer to, and
	-- (if it was created with "REFERENCES table_name(column_name)" syntax,
	-- the target column names too. Unfortunately, if the reference was
	-- created with "REFERENCES table_name" syntax, the pragma does not
	-- figure out what the corresponding target fields are, so we'll also need
	-- pragma_table_info() to look up the primary key(s) for the target table.
	--
	-- Once we have everything we need, we just do a bit more string
	-- concatenation to build up the GraphViz syntax equivalent.
	--
	-- Note that we use the ports we defined above, as well as the directional
	-- overrides :e and :w, to force GraphViz to give us a layout that's
	-- likely to be readable.
	SELECT

	-- We left-join every foreign key field against pragma_table_info
	-- looking for primary keys, and the target table may have a composite
	-- primary key even if the foreign key does not reference the primary
	-- key, so we may wind up with multiple results describing the same
	-- foreign key reference. DISTINCT makes sure we only describe each
	-- reference once.
	DISTINCT

	t.name \|\| ':' \|\| f."from" \|\| '_from:e -> ' \|\|

	-- If the constraint was created with "REFERENCES
	-- table_name(column_name)", then f.to will contain 'column_name'.
	-- Otherwise, f.to is NULL, and we need to grab the corresponding
	-- field from the primary key in i.name.
	f."table" \|\| ':' \|\| COALESCE(f."to", i.name) \|\| '_to:w'

	FROM pragma_table_list() AS t
	JOIN pragma_foreign_key_list(t.name, t.schema) AS f

	-- We look up all the fields in the target table, just in case
	-- pragma_foreign_key_list() doesn't tell us what the target field
	-- name is. SQLite doesn't allow foreign-key references to cross
	-- schemas, so it's OK to use the source table's schema name to look
	-- up the target table.
	--
	-- Strictly speaking, we shouldn't need to LEFT JOIN here, a basic
	-- JOIN should do. This works around a bug in SQLite 3.16.0 to
	-- version 3.45.1: https://sqlite.org/forum/forumpost/b1656fcb39
	LEFT JOIN pragma_table_info(f."table", t.schema) AS i

	-- f.seq represents the order of fields in a source table's composite foreign key
	-- reference, starting at 0. In "FOREIGN KEY (a, b)", "a" would have
	-- seq=0 and "b" would have seq=1. i.pk represents the order of fields
	-- in a primary key, where "0" means "not part of the primary key".
	-- In "PRIMARY KEY (a, b)", "a" would have pk=1 and "b" would have pk=2.
	-- For a foreign key reference that specifies the target field name,
	-- none of this matters, but if the target field name is missing, then
	-- this makes sure that each field of the foreign key reference is joined
	-- with the corresponding primary key field of the target table.
	WHERE f.seq + 1 = i.pk

	UNION ALL

	-- Lastly, we close the GraphViz graph.
	SELECT '
	}';