Last active
November 24, 2021 08:46
-
-
Save AndreSteenveld/f0b6d150da9efc2e41b4840ae7d1cb3b to your computer and use it in GitHub Desktop.
Dumping table from SQL server to SQLite
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
set -e | |
# | |
# Invoke this script; dump-to-csv <database> <schema> <table> -U sa -P 'P455word!!!' -S localhost | |
# | |
# The output will be written to stdout, redirecting to where you need your csv to go. Make sure `bcp` | |
# is in your path, in the docker images it can be found in /opt/mssql-tools/bin | |
# | |
declare database="$1" | |
declare schema="$2" | |
declare table="$3" | |
declare header_query=" | |
select column_name | |
from information_schema.columns | |
where '${database}' = table_catalog | |
and '${schema}' = table_schema | |
and '${table}' = table_name | |
order by ordinal_position | |
" | |
# | |
# A few notes here; `bcp` just writes to stdout and nothing else if no file is provided, it will also | |
# output messages every time with warnings and a summary of what has been done. Which pollutes the | |
# the output if you want to work with it as a stream. Good for us everything on *nix is a a file and | |
# that bcp is a little naive in this regard, using `cat` in a subshell as a file will allow us to | |
# write to stdout without adding the extra stuff to it. | |
# | |
bcp "$header_query" queryout >(sed -E 's/^/"/g;s/,"$//g' | cat - <(echo '')) -c -t'' -r'","' -d "$database" "${@:4}" > /dev/null; | |
bcp "$schema.$table" out >(sed -z 's/\r/\\r/g;s/\n/\\n/g;s/"\\n/"\n/g' | sed 's/^/"/g' | cat -) -c -t'","' -r'"\n' -d "$database" "${@:4}" >&2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
dump_to_sqlite__as_b64_bcp_command() { | |
local query="$1" bcp_arguments=("$2") | |
echo "bcp \"${query}\" queryout >(cat) ${bcp_arguments} > /dev/null" | base64 --wrap=0 | |
} | |
dump_to_sqlite__create_and_populate_information_schema_table() { | |
local db_file="$1" database="$2" schema="$3" table="$4" | |
local bcp_arguments=("-c" "-t'\t'" "-r'\n'" "${@:5}") | |
local create_table_information_schema_tables=" | |
create table if not exists information_schema__columns ( | |
table_schema text, | |
table_name text, | |
column_name text, | |
ordinal_position integer, | |
is_nullable integer, | |
data_type text, | |
primary key (table_schema, table_name, column_name) | |
); | |
create table if not exists information_schema__table_constraints ( | |
constraint_schema text, | |
constraint_name text, | |
table_schema text, | |
table_name text, | |
constraint_type text, | |
primary key (constraint_schema, constraint_name, table_schema, table_name) | |
); | |
create table if not exists information_schema__key_column_usage ( | |
constraint_schema text, | |
constraint_name text, | |
table_schema text, | |
table_name text, | |
column_name text, | |
primary key (constraint_schema, constraint_name, table_schema, table_name, column_name) | |
); | |
" | |
local select_columns=" | |
select table_schema, table_name, column_name, ordinal_position, is_nullable, data_type | |
from information_schema.columns | |
where | |
table_catalog = '${database}' | |
and table_schema = '${schema}' | |
and table_name = '${table}' | |
"; | |
local select_table_constraints=" | |
select constraint_schema, constraint_name, table_schema, table_name, constraint_type | |
from information_schema.table_constraints | |
where | |
constraint_type = 'PRIMARY KEY' | |
and table_catalog = '${database}' | |
and table_schema = '${schema}' | |
and table_name = '${table}' | |
"; | |
local select_key_column_usage=" | |
select key_column_usage.constraint_schema, key_column_usage.constraint_name, key_column_usage.table_schema, key_column_usage.table_name, key_column_usage.column_name | |
from | |
information_schema.table_constraints table_constraint, | |
information_schema.key_column_usage | |
where | |
table_constraint.constraint_type = 'PRIMARY KEY' | |
and table_constraint.table_catalog = '${database}' | |
and table_constraint.table_schema = '${schema}' | |
and table_constraint.table_name = '${table}' | |
and table_constraint.constraint_name = key_column_usage.constraint_name | |
and table_constraint.constraint_catalog = key_column_usage.constraint_catalog | |
and table_constraint.constraint_schema = key_column_usage.constraint_schema | |
and table_constraint.table_catalog = key_column_usage.table_catalog | |
and table_constraint.table_schema = key_column_usage.table_schema | |
and table_constraint.table_name = key_column_usage.table_name | |
"; | |
sqlite3 -cmd ".timeout 5000" -cmd ".mode tabs" "${db_file}" <<-SQL | |
${create_table_information_schema_tables} | |
.import '|echo "$(dump_to_sqlite__as_b64_bcp_command "${select_columns//[$'\n']/\ }" "${bcp_arguments[*]}")" | base64 --decode | bash' information_schema__columns | |
.import '|echo "$(dump_to_sqlite__as_b64_bcp_command "${select_table_constraints//[$'\n']/\ }" "${bcp_arguments[*]}")" | base64 --decode | bash' information_schema__table_constraints | |
.import '|echo "$(dump_to_sqlite__as_b64_bcp_command "${select_key_column_usage//[$'\n']/\ }" "${bcp_arguments[*]}")" | base64 --decode | bash' information_schema__key_column_usage | |
select printf( | |
'[ %s ] has [ %d ] columns' | |
, table_name | |
, count(*) | |
-- group_concat order is arbitrairy so it does look a little weird. At this time I can't really be bothered firguring out | |
-- how to fix this. Although the documentation does suggest that it is possible using window functions | |
--, group_concat( column_name, ', ' ) over (order by ordinal_position desc) | |
) | |
from information_schema__columns | |
where | |
table_schema = '${schema}' | |
and table_name = '${table}' | |
order by ordinal_position desc | |
; | |
select printf( | |
'Constraint [ %s ] of type [ %s ] on [ %s ] using columns [ %s ]' | |
, table_constraint.constraint_name | |
, table_constraint.constraint_type | |
, table_constraint.table_name | |
, group_concat( key_column_usage.column_name ) | |
) | |
from information_schema__table_constraints as table_constraint | |
join information_schema__key_column_usage as key_column_usage using (constraint_name) | |
where | |
table_constraint.table_schema = '${schema}' | |
and table_constraint.table_name = '${table}' | |
group by table_constraint.constraint_name | |
; | |
SQL | |
} | |
dump_to_sqlite__create_table() { | |
local db_file="$1" database="$2" schema="$3" table="$4" | |
# | |
# The type here are taken from: | |
# https://docs.microsoft.com/en-us/sql/t-sql/data-types/data-types-transact-sql?view=sql-server-ver15 | |
# https://www.sqlite.org/datatype3.html | |
# | |
# And the mappings are made using common sense, for any floating point number that is lossy it is mapped | |
# to a real but decimal, money, etc are mapped to blobs as these are not lossy. Maybe there should be a | |
# way to override this? | |
# | |
local sqlite_columns_query=" | |
with | |
type_map ( data_type, sqlite_type ) as ( | |
values | |
('bigint', 'integer'), | |
('bit', 'integer'), | |
('smallint', 'integer'), | |
('int', 'integer'), | |
('tinyint', 'integer'), | |
('numeric', 'blob'), | |
('decimal', 'blob'), | |
('smallmoney', 'blob'), | |
('money', 'blob'), | |
('float', 'real'), | |
('real', 'real'), | |
('date', 'text'), | |
('datetimeoffset', 'text'), | |
('datetime2', 'text'), | |
('smalldatetime', 'text'), | |
('datetime', 'text'), | |
('time', 'text'), | |
('char', 'text'), | |
('varchar', 'text'), | |
('text', 'text'), | |
('nchar', 'text'), | |
('nvarchar', 'text'), | |
('ntext', 'text'), | |
('binary', 'blob'), | |
('varbinary', 'blob'), | |
('image', 'blob') | |
) | |
select | |
column_name, | |
type_map.sqlite_type, | |
case is_nullable | |
when 'YES' then '' | |
when 'NO' then 'not null' | |
end | |
from information_schema__columns | |
join type_map using (data_type) | |
where | |
table_schema = '${schema}' | |
and table_name = '${table}' | |
order by ordinal_position | |
; | |
" | |
local sqlite_columns="" ; sqlite_columns=$(sqlite3 "${db_file}" -noheader -cmd ".separator ' ' ','" "${sqlite_columns_query}") | |
local primary_key_name_query=" | |
select constraint_name | |
from information_schema__table_constraints | |
where | |
constraint_type = 'PRIMARY KEY' | |
and table_schema = '${schema}' | |
and table_name = '${table}'; | |
" | |
local primary_key_name=""; primary_key_name=$(sqlite3 "${db_file}" -noheader -cmd ".mode tabs" "${primary_key_name_query}") | |
local primary_key_columns_query=" | |
select group_concat( column_name ) | |
from information_schema__table_constraints as table_constraint | |
join information_schema__key_column_usage using (constraint_name) | |
where | |
table_constraint.constraint_type = 'PRIMARY KEY' | |
and table_constraint.table_schema = '${schema}' | |
and table_constraint.table_name = '${table}'; | |
"; | |
local primary_key_columns=""; primary_key_columns=$(sqlite3 "${db_file}" -noheader -cmd ".separator ' ' ','" "${primary_key_columns_query}") | |
sqlite3 -cmd ".timeout 5000" "${db_file}" <<-SQL | |
create table ${table} ( ${sqlite_columns%?} ); | |
create unique index ${primary_key_name} on ${table} ( ${primary_key_columns%?} ); | |
SQL | |
} | |
dump_to_sqlite__dump_table_to_sqlite() { | |
local db_file="$1" database="$2" schema="$3" table="$4" | |
local bcp_arguments=("${@:5}") | |
sqlite3 -cmd ".timeout 5000" -cmd ".mode csv" "${db_file}" <<-SQL | |
.import -v '|echo "$(base64 --wrap=0 <<<"dump-to-csv '${database}' '${schema}' '${table}' ${bcp_arguments[*]} | tail -n +2")"| base64 --decode | bash' ${table} | |
SQL | |
} | |
dump_to_sqlite__help() { | |
printf '%s\n'\ | |
'dump-to-sqlite - Dump one or more tables from MSSQL to a sqlite database ' \ | |
' ' \ | |
'Usage: ' \ | |
' dump-to-sqlite [options] <sqlite db file> <tables...> ' \ | |
' ' \ | |
'sqlite db file: ' \ | |
' Path to the SQLite database file ' \ | |
' ' \ | |
'tables: ' \ | |
' List of tables to dump to sqlite, these must be unqualified ' \ | |
' ' \ | |
'Options: ' \ | |
' -U, --username <username> Username provided to MSSQL ' \ | |
' -P, --password <password> Password provided to MSSQL ' \ | |
' -S, --server <address> Server where to find the instance of MSSQL. Note ' \ | |
' that this is not a standard URI format and port- ' \ | |
' numbers are seperated using comma. ' \ | |
' -d, --database <database> The database name ' \ | |
' -s, --schema <schema> The schema name ' \ | |
' ' \ | |
' -?, --help Shows this help message ' \ | |
' ' \ | |
'Examples: ' \ | |
' dump-to-sqlite -U sa -P "P455word!!!" -S "localhost,1433" -d "databeast" -s "dbo" /tmp/fuzzies.db "Fuzzies"' \ | |
' ' \ | |
exit | |
} | |
dump_to_sqlite__main() { | |
local username password server database schema | |
local db_file tables | |
while (( $# )); do | |
echo "$1" > /dev/null; | |
case $1 in | |
"-?" | --help) dump_to_sqlite__help ;; | |
-U | --username) username="$2" ;; | |
-P | --password) password="$2" ;; | |
-S | --server) server="$2" ;; | |
-d | --database) database="$2" ;; | |
-s | --schema) schema="$2" ;; | |
*) | |
db_file="$1" ; shift | |
tables=( "$@" ) | |
break 2 | |
esac | |
shift 2 | |
done | |
if [[ -z "$username" ]] ; then printf "[ dump-to-sqlite ] 'username' was empty\n" >&2 ; exit 1 | |
elif [[ -z "$password" ]] ; then printf "[ dump-to-sqlite ] 'password' was empty\n" >&2 ; exit 1 | |
elif [[ -z "$server" ]] ; then printf "[ dump-to-sqlite ] 'server' was empty\n" >&2 ; exit 1 | |
elif [[ -z "$database" ]] ; then printf "[ dump-to-sqlite ] 'database' was empty\n" >&2 ; exit 1 | |
elif [[ -z "$schema" ]] ; then printf "[ dump-to-sqlite ] 'schema' was empty\n" >&2 ; exit 1 | |
elif [[ -z "$db_file" ]] ; then printf "[ dump-to-sqlite ] 'db_file' was empty\n" >&2 ; exit 1 | |
elif [[ -z "$tables" ]] ; then printf "[ dump-to-sqlite ] 'tables' was empty\n" >&2 ; exit 1 | |
fi | |
local bcp_arguments=("-U" "$username" "-P" "'${password}'" "-S" "$server" "-d" "$database"); | |
for table in "${tables[@]}"; do | |
dump_to_sqlite__create_and_populate_information_schema_table "$db_file" "$database" "$schema" "$table" "${bcp_arguments[@]}" ; | |
dump_to_sqlite__create_table "$db_file" "$database" "$schema" "$table" ; | |
dump_to_sqlite__dump_table_to_sqlite "$db_file" "$database" "$schema" "$table" "${bcp_arguments[@]}" ; | |
done | |
} | |
(return 0 2>/dev/null) || dump_to_sqlite__main "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
set -e | |
# | |
# Invoke this script; dump-to-tsv <database> <schema> <table> -U sa -P 'P455word!!!' -S localhost | |
# | |
# The output will be written to stdout, redirecting to where you need your tsv to go. Make sure `bcp` | |
# is in your path, in the docker images it can be found in /opt/mssql-tools/bin | |
# | |
declare database="$1" | |
declare schema="$2" | |
declare table="$3" | |
declare header_query=" | |
select column_name | |
from information_schema.columns | |
where '${database}' = table_catalog | |
and '${schema}' = table_schema | |
and '${table}' = table_name | |
order by ordinal_position | |
" | |
# | |
# A few notes here; `bcp` just writes to stdout and nothing else if no file is provided, it will also | |
# output messages every time with warnings and a summary of what has been done. Which pollutes the | |
# the output if you want to work with it as a stream. Good for us everything on *nix is a a file and | |
# that bcp is a little naive in this regard, using `cat` in a subshell as a file will allow us to | |
# write to stdout without adding the extra stuff to it. | |
# | |
bcp "$header_query" queryout >(cat - <(echo '')) -c -t'' -r'\t' -d "$database" "${@:4}" > /dev/null; | |
bcp "$schema.$table" out >(cat - <(echo '')) -c -t'\t' -r'\r\n' -d "$database" "${@:4}" >&2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment