Skip to content

Instantly share code, notes, and snippets.

@vergenzt
Created April 14, 2022 13:56
Show Gist options
  • Save vergenzt/f22f1933d9433c91daead81b0061cd1f to your computer and use it in GitHub Desktop.
Save vergenzt/f22f1933d9433c91daead81b0061cd1f to your computer and use it in GitHub Desktop.
parse_database_url.py
#!/usr/bin/env python3
import argparse, os, shlex, subprocess, sys
from typing import Dict, List
from urllib.parse import ParseResult, urlparse, unquote, parse_qs
def _envvar_vals(url: ParseResult) -> Dict[str, str]:
path: List[str] = url.path.strip('/').split('/')
vals_from_url = {
'SCHEME': url.scheme if url.scheme else '',
'TYPE': url.scheme.split('+')[0] if url.scheme else '', # allow e.g. 'spark+odbc://...'
'HOST': unquote(url.hostname) if url.hostname else '',
'PORT': str(url.port) if url.port else '',
'USERNAME': unquote(url.username) if url.username else '',
'PASSWORD': unquote(url.password) if url.password else '',
'DATABASE': unquote(path[0]) if len(path) > 0 else '',
'SCHEMA': unquote(path[1]) if len(path) > 1 else '',
}
vals_from_query = {
key.upper(): vals[-1] # only keep last query string value if there are multiple
for key, vals in parse_qs(url.query).items()
if not vals_from_url.get(key.upper()) # and only if that value is not specified in the rest of the url
}
return { **vals_from_url, **vals_from_query, }
def envvars_from_url(url_str: str, envvar_prefix: str = 'DB_') -> Dict[str, str]:
url = urlparse(url_str)
vars = _envvar_vals(url)
return {
envvar_prefix + name: val
for name, val in vars.items()
if val != ''
}
def envvars_str(vars: Dict[str, str]) -> str:
assert all( name.isidentifier() for name in vars.keys() )
return '\n'.join( f'{name}={shlex.quote(val)}' for name, val in vars.items() )
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='''
Parse URL parts into individual environment variables prefixed with PREFIX_.
Runs COMMAND with updated environment if provided; otherwise prints list of shell-compatible variable assignment statements.
Variable assignments for missing URL parts are omitted.
Any query parameters provided are included in output with names uppercased.
''')
parser.add_argument('-e', '--env-prefix', metavar='PREFIX_', default='DB_', help='prefix to prepend to output environment variable names (default: `%(default)s`)')
parser.add_argument('-u', '--url', help='URL to parse (default: input envvar named `[PREFIX_]URL` ). Format: [SCHEME://][USER[:PASSWORD]@]HOST[:PORT][/DATABASE[/SCHEMA]][?PARAM=VALUE[&...]]')
parser.add_argument('COMMAND', metavar='[COMMAND...]', nargs=argparse.REMAINDER, help='command line to run with the parsed URL part environment variables (default: print the envvar assignments)')
try:
args = parser.parse_args()
url = args.url or os.environ[args.env_prefix.upper() + 'URL']
except:
parser.print_help()
print()
raise
vars = envvars_from_url(url, args.env_prefix)
if args.COMMAND:
sys.exit(subprocess.call(['/usr/bin/env'] + args.COMMAND, env={ **os.environ, **vars }))
else:
print(envvars_str(vars))
#!/usr/bin/env python3
import inspect, unittest
from parse_database_url import *
class TestParseDatabaseURL(unittest.TestCase):
_cases: str = (
# dbt example
"""
redshift://myuser:pa$$word@foo.bar.baz:1234/database/schema?threads=4
DB_SCHEME=redshift
DB_TYPE=redshift
DB_HOST=foo.bar.baz
DB_PORT=1234
DB_USERNAME=myuser
DB_PASSWORD='pa$$word'
DB_DATABASE=database
DB_SCHEMA=schema
DB_THREADS=4
"""
# "wild" examples from https://github.com/jacobian/dj-database-url/blob/master/test_dj_database_url.py
+ """
mssql://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com/d8r82722r2kuvn?driver=ODBC Driver 13 for SQL Server
DB_SCHEME=mssql
DB_TYPE=mssql
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
DB_DRIVER='ODBC Driver 13 for SQL Server'
mssql://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com\\insnsnss:12345/d8r82722r2kuvn?driver=ODBC Driver 13 for SQL Server
DB_SCHEME=mssql
DB_TYPE=mssql
DB_HOST='ec2-107-21-253-135.compute-1.amazonaws.com\\insnsnss'
DB_PORT=12345
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
DB_DRIVER='ODBC Driver 13 for SQL Server'
mysql-connector://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn
DB_SCHEME=mysql-connector
DB_TYPE=mysql-connector
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_PORT=5431
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
mysql://bea6eb025ca0d8:69772142@us-cdbr-east.cleardb.com/heroku_97681db3eff7580?reconnect=true
DB_SCHEME=mysql
DB_TYPE=mysql
DB_HOST=us-cdbr-east.cleardb.com
DB_USERNAME=bea6eb025ca0d8
DB_PASSWORD=69772142
DB_DATABASE=heroku_97681db3eff7580
DB_RECONNECT=true
mysqlgis://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn
DB_SCHEME=mysqlgis
DB_TYPE=mysqlgis
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_PORT=5431
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
oracle://scott:tiger@/tnsname
DB_SCHEME=oracle
DB_TYPE=oracle
DB_USERNAME=scott
DB_PASSWORD=tiger
DB_DATABASE=tnsname
oracle://scott:tiger@oraclehost:1521/hr
DB_SCHEME=oracle
DB_TYPE=oracle
DB_HOST=oraclehost
DB_PORT=1521
DB_USERNAME=scott
DB_PASSWORD=tiger
DB_DATABASE=hr
oraclegis://scott:tiger@oraclehost:1521/hr
DB_SCHEME=oraclegis
DB_TYPE=oraclegis
DB_HOST=oraclehost
DB_PORT=1521
DB_USERNAME=scott
DB_PASSWORD=tiger
DB_DATABASE=hr
postgis://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn
DB_SCHEME=postgis
DB_TYPE=postgis
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_PORT=5431
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
postgis://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn?schema=otherschema
DB_SCHEME=postgis
DB_TYPE=postgis
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_PORT=5431
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
DB_SCHEMA=otherschema
postgis://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn/otherschema
DB_SCHEME=postgis
DB_TYPE=postgis
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_PORT=5431
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
DB_SCHEMA=otherschema
postgres://%23user:%23password@ec2-107-21-253-135.compute-1.amazonaws.com:5431/%23database
DB_SCHEME=postgres
DB_TYPE=postgres
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_PORT=5431
DB_USERNAME='#user'
DB_PASSWORD='#password'
DB_DATABASE='#database'
postgres://%2FUsers%2Fpostgres%2FRuN/d8r82722r2kuvn
DB_SCHEME=postgres
DB_TYPE=postgres
DB_HOST=/Users/postgres/RuN
DB_DATABASE=d8r82722r2kuvn
postgres://%2Fvar%2Frun%2Fpostgresql/d8r82722r2kuvn
DB_SCHEME=postgres
DB_TYPE=postgres
DB_HOST=/var/run/postgresql
DB_DATABASE=d8r82722r2kuvn
postgres://ieRaekei9wilaim7:wegauwhgeuioweg@[2001:db8:1234::1234:5678:90af]:5431/d8r82722r2kuvn
DB_SCHEME=postgres
DB_TYPE=postgres
DB_HOST=2001:db8:1234::1234:5678:90af
DB_PORT=5431
DB_USERNAME=ieRaekei9wilaim7
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
postgres://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn
DB_SCHEME=postgres
DB_TYPE=postgres
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_PORT=5431
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
postgres://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn?schema=otherschema
DB_SCHEME=postgres
DB_TYPE=postgres
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_PORT=5431
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
DB_SCHEMA=otherschema
redshift://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5439/d8r82722r2kuvn/otherschema
DB_SCHEME=redshift
DB_TYPE=redshift
DB_HOST=ec2-107-21-253-135.compute-1.amazonaws.com
DB_PORT=5439
DB_USERNAME=uf07k1i6d8ia0v
DB_PASSWORD=wegauwhgeuioweg
DB_DATABASE=d8r82722r2kuvn
DB_SCHEMA=otherschema
""")
def test_cases(self):
for case_str in inspect.cleandoc(self._cases).split('\n\n'):
url, expected_output = case_str.split('\n', 1)
with self.subTest(url=url):
self.assertEqual(envvars_str(envvars_from_url(url)), expected_output)
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment