Skip to content

Instantly share code, notes, and snippets.

@AndreSteenveld
Last active November 24, 2021 08:46
Show Gist options
  • Save AndreSteenveld/f0b6d150da9efc2e41b4840ae7d1cb3b to your computer and use it in GitHub Desktop.
Save AndreSteenveld/f0b6d150da9efc2e41b4840ae7d1cb3b to your computer and use it in GitHub Desktop.
Dumping table from SQL server to SQLite
#! /usr/bin/env bash
set -e
#
# Invoke this script; dump-to-csv <database> <schema> <table> -U sa -P 'P455word!!!' -S localhost
#
# The output will be written to stdout, redirecting to where you need your csv to go. Make sure `bcp`
# is in your path, in the docker images it can be found in /opt/mssql-tools/bin
#
declare database="$1"
declare schema="$2"
declare table="$3"
declare header_query="
select column_name
from information_schema.columns
where '${database}' = table_catalog
and '${schema}' = table_schema
and '${table}' = table_name
order by ordinal_position
"
#
# A few notes here; `bcp` just writes to stdout and nothing else if no file is provided, it will also
# output messages every time with warnings and a summary of what has been done. Which pollutes the
# the output if you want to work with it as a stream. Good for us everything on *nix is a a file and
# that bcp is a little naive in this regard, using `cat` in a subshell as a file will allow us to
# write to stdout without adding the extra stuff to it.
#
bcp "$header_query" queryout >(sed -E 's/^/"/g;s/,"$//g' | cat - <(echo '')) -c -t'' -r'","' -d "$database" "${@:4}" > /dev/null;
bcp "$schema.$table" out >(sed -z 's/\r/\\r/g;s/\n/\\n/g;s/"\\n/"\n/g' | sed 's/^/"/g' | cat -) -c -t'","' -r'"\n' -d "$database" "${@:4}" >&2
#! /usr/bin/env bash
dump_to_sqlite__as_b64_bcp_command() {
local query="$1" bcp_arguments=("$2")
echo "bcp \"${query}\" queryout >(cat) ${bcp_arguments} > /dev/null" | base64 --wrap=0
}
dump_to_sqlite__create_and_populate_information_schema_table() {
local db_file="$1" database="$2" schema="$3" table="$4"
local bcp_arguments=("-c" "-t'\t'" "-r'\n'" "${@:5}")
local create_table_information_schema_tables="
create table if not exists information_schema__columns (
table_schema text,
table_name text,
column_name text,
ordinal_position integer,
is_nullable integer,
data_type text,
primary key (table_schema, table_name, column_name)
);
create table if not exists information_schema__table_constraints (
constraint_schema text,
constraint_name text,
table_schema text,
table_name text,
constraint_type text,
primary key (constraint_schema, constraint_name, table_schema, table_name)
);
create table if not exists information_schema__key_column_usage (
constraint_schema text,
constraint_name text,
table_schema text,
table_name text,
column_name text,
primary key (constraint_schema, constraint_name, table_schema, table_name, column_name)
);
"
local select_columns="
select table_schema, table_name, column_name, ordinal_position, is_nullable, data_type
from information_schema.columns
where
table_catalog = '${database}'
and table_schema = '${schema}'
and table_name = '${table}'
";
local select_table_constraints="
select constraint_schema, constraint_name, table_schema, table_name, constraint_type
from information_schema.table_constraints
where
constraint_type = 'PRIMARY KEY'
and table_catalog = '${database}'
and table_schema = '${schema}'
and table_name = '${table}'
";
local select_key_column_usage="
select key_column_usage.constraint_schema, key_column_usage.constraint_name, key_column_usage.table_schema, key_column_usage.table_name, key_column_usage.column_name
from
information_schema.table_constraints table_constraint,
information_schema.key_column_usage
where
table_constraint.constraint_type = 'PRIMARY KEY'
and table_constraint.table_catalog = '${database}'
and table_constraint.table_schema = '${schema}'
and table_constraint.table_name = '${table}'
and table_constraint.constraint_name = key_column_usage.constraint_name
and table_constraint.constraint_catalog = key_column_usage.constraint_catalog
and table_constraint.constraint_schema = key_column_usage.constraint_schema
and table_constraint.table_catalog = key_column_usage.table_catalog
and table_constraint.table_schema = key_column_usage.table_schema
and table_constraint.table_name = key_column_usage.table_name
";
sqlite3 -cmd ".timeout 5000" -cmd ".mode tabs" "${db_file}" <<-SQL
${create_table_information_schema_tables}
.import '|echo "$(dump_to_sqlite__as_b64_bcp_command "${select_columns//[$'\n']/\ }" "${bcp_arguments[*]}")" | base64 --decode | bash' information_schema__columns
.import '|echo "$(dump_to_sqlite__as_b64_bcp_command "${select_table_constraints//[$'\n']/\ }" "${bcp_arguments[*]}")" | base64 --decode | bash' information_schema__table_constraints
.import '|echo "$(dump_to_sqlite__as_b64_bcp_command "${select_key_column_usage//[$'\n']/\ }" "${bcp_arguments[*]}")" | base64 --decode | bash' information_schema__key_column_usage
select printf(
'[ %s ] has [ %d ] columns'
, table_name
, count(*)
-- group_concat order is arbitrairy so it does look a little weird. At this time I can't really be bothered firguring out
-- how to fix this. Although the documentation does suggest that it is possible using window functions
--, group_concat( column_name, ', ' ) over (order by ordinal_position desc)
)
from information_schema__columns
where
table_schema = '${schema}'
and table_name = '${table}'
order by ordinal_position desc
;
select printf(
'Constraint [ %s ] of type [ %s ] on [ %s ] using columns [ %s ]'
, table_constraint.constraint_name
, table_constraint.constraint_type
, table_constraint.table_name
, group_concat( key_column_usage.column_name )
)
from information_schema__table_constraints as table_constraint
join information_schema__key_column_usage as key_column_usage using (constraint_name)
where
table_constraint.table_schema = '${schema}'
and table_constraint.table_name = '${table}'
group by table_constraint.constraint_name
;
SQL
}
dump_to_sqlite__create_table() {
local db_file="$1" database="$2" schema="$3" table="$4"
#
# The type here are taken from:
# https://docs.microsoft.com/en-us/sql/t-sql/data-types/data-types-transact-sql?view=sql-server-ver15
# https://www.sqlite.org/datatype3.html
#
# And the mappings are made using common sense, for any floating point number that is lossy it is mapped
# to a real but decimal, money, etc are mapped to blobs as these are not lossy. Maybe there should be a
# way to override this?
#
local sqlite_columns_query="
with
type_map ( data_type, sqlite_type ) as (
values
('bigint', 'integer'),
('bit', 'integer'),
('smallint', 'integer'),
('int', 'integer'),
('tinyint', 'integer'),
('numeric', 'blob'),
('decimal', 'blob'),
('smallmoney', 'blob'),
('money', 'blob'),
('float', 'real'),
('real', 'real'),
('date', 'text'),
('datetimeoffset', 'text'),
('datetime2', 'text'),
('smalldatetime', 'text'),
('datetime', 'text'),
('time', 'text'),
('char', 'text'),
('varchar', 'text'),
('text', 'text'),
('nchar', 'text'),
('nvarchar', 'text'),
('ntext', 'text'),
('binary', 'blob'),
('varbinary', 'blob'),
('image', 'blob')
)
select
column_name,
type_map.sqlite_type,
case is_nullable
when 'YES' then ''
when 'NO' then 'not null'
end
from information_schema__columns
join type_map using (data_type)
where
table_schema = '${schema}'
and table_name = '${table}'
order by ordinal_position
;
"
local sqlite_columns="" ; sqlite_columns=$(sqlite3 "${db_file}" -noheader -cmd ".separator ' ' ','" "${sqlite_columns_query}")
local primary_key_name_query="
select constraint_name
from information_schema__table_constraints
where
constraint_type = 'PRIMARY KEY'
and table_schema = '${schema}'
and table_name = '${table}';
"
local primary_key_name=""; primary_key_name=$(sqlite3 "${db_file}" -noheader -cmd ".mode tabs" "${primary_key_name_query}")
local primary_key_columns_query="
select group_concat( column_name )
from information_schema__table_constraints as table_constraint
join information_schema__key_column_usage using (constraint_name)
where
table_constraint.constraint_type = 'PRIMARY KEY'
and table_constraint.table_schema = '${schema}'
and table_constraint.table_name = '${table}';
";
local primary_key_columns=""; primary_key_columns=$(sqlite3 "${db_file}" -noheader -cmd ".separator ' ' ','" "${primary_key_columns_query}")
sqlite3 -cmd ".timeout 5000" "${db_file}" <<-SQL
create table ${table} ( ${sqlite_columns%?} );
create unique index ${primary_key_name} on ${table} ( ${primary_key_columns%?} );
SQL
}
dump_to_sqlite__dump_table_to_sqlite() {
local db_file="$1" database="$2" schema="$3" table="$4"
local bcp_arguments=("${@:5}")
sqlite3 -cmd ".timeout 5000" -cmd ".mode csv" "${db_file}" <<-SQL
.import -v '|echo "$(base64 --wrap=0 <<<"dump-to-csv '${database}' '${schema}' '${table}' ${bcp_arguments[*]} | tail -n +2")"| base64 --decode | bash' ${table}
SQL
}
dump_to_sqlite__help() {
printf '%s\n'\
'dump-to-sqlite - Dump one or more tables from MSSQL to a sqlite database ' \
' ' \
'Usage: ' \
' dump-to-sqlite [options] <sqlite db file> <tables...> ' \
' ' \
'sqlite db file: ' \
' Path to the SQLite database file ' \
' ' \
'tables: ' \
' List of tables to dump to sqlite, these must be unqualified ' \
' ' \
'Options: ' \
' -U, --username <username> Username provided to MSSQL ' \
' -P, --password <password> Password provided to MSSQL ' \
' -S, --server <address> Server where to find the instance of MSSQL. Note ' \
' that this is not a standard URI format and port- ' \
' numbers are seperated using comma. ' \
' -d, --database <database> The database name ' \
' -s, --schema <schema> The schema name ' \
' ' \
' -?, --help Shows this help message ' \
' ' \
'Examples: ' \
' dump-to-sqlite -U sa -P "P455word!!!" -S "localhost,1433" -d "databeast" -s "dbo" /tmp/fuzzies.db "Fuzzies"' \
' ' \
exit
}
dump_to_sqlite__main() {
local username password server database schema
local db_file tables
while (( $# )); do
echo "$1" > /dev/null;
case $1 in
"-?" | --help) dump_to_sqlite__help ;;
-U | --username) username="$2" ;;
-P | --password) password="$2" ;;
-S | --server) server="$2" ;;
-d | --database) database="$2" ;;
-s | --schema) schema="$2" ;;
*)
db_file="$1" ; shift
tables=( "$@" )
break 2
esac
shift 2
done
if [[ -z "$username" ]] ; then printf "[ dump-to-sqlite ] 'username' was empty\n" >&2 ; exit 1
elif [[ -z "$password" ]] ; then printf "[ dump-to-sqlite ] 'password' was empty\n" >&2 ; exit 1
elif [[ -z "$server" ]] ; then printf "[ dump-to-sqlite ] 'server' was empty\n" >&2 ; exit 1
elif [[ -z "$database" ]] ; then printf "[ dump-to-sqlite ] 'database' was empty\n" >&2 ; exit 1
elif [[ -z "$schema" ]] ; then printf "[ dump-to-sqlite ] 'schema' was empty\n" >&2 ; exit 1
elif [[ -z "$db_file" ]] ; then printf "[ dump-to-sqlite ] 'db_file' was empty\n" >&2 ; exit 1
elif [[ -z "$tables" ]] ; then printf "[ dump-to-sqlite ] 'tables' was empty\n" >&2 ; exit 1
fi
local bcp_arguments=("-U" "$username" "-P" "'${password}'" "-S" "$server" "-d" "$database");
for table in "${tables[@]}"; do
dump_to_sqlite__create_and_populate_information_schema_table "$db_file" "$database" "$schema" "$table" "${bcp_arguments[@]}" ;
dump_to_sqlite__create_table "$db_file" "$database" "$schema" "$table" ;
dump_to_sqlite__dump_table_to_sqlite "$db_file" "$database" "$schema" "$table" "${bcp_arguments[@]}" ;
done
}
(return 0 2>/dev/null) || dump_to_sqlite__main "$@"
#! /usr/bin/env bash
set -e
#
# Invoke this script; dump-to-tsv <database> <schema> <table> -U sa -P 'P455word!!!' -S localhost
#
# The output will be written to stdout, redirecting to where you need your tsv to go. Make sure `bcp`
# is in your path, in the docker images it can be found in /opt/mssql-tools/bin
#
declare database="$1"
declare schema="$2"
declare table="$3"
declare header_query="
select column_name
from information_schema.columns
where '${database}' = table_catalog
and '${schema}' = table_schema
and '${table}' = table_name
order by ordinal_position
"
#
# A few notes here; `bcp` just writes to stdout and nothing else if no file is provided, it will also
# output messages every time with warnings and a summary of what has been done. Which pollutes the
# the output if you want to work with it as a stream. Good for us everything on *nix is a a file and
# that bcp is a little naive in this regard, using `cat` in a subshell as a file will allow us to
# write to stdout without adding the extra stuff to it.
#
bcp "$header_query" queryout >(cat - <(echo '')) -c -t'' -r'\t' -d "$database" "${@:4}" > /dev/null;
bcp "$schema.$table" out >(cat - <(echo '')) -c -t'\t' -r'\r\n' -d "$database" "${@:4}" >&2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment