Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Tutorial on using tableau-api-lib and Tableau's REST API and Metadata API to pull underlying data sources across the entire server.
import pandas as pd
from tableau_api_lib import TableauServerConnection
from tableau_api_lib.utils.querying import get_sites_dataframe
SITE_NAMES = None # if this variable is None then all sites will be queried
# uncomment the line below if defining a subset of sites
# SITE_NAMES = ['SiteA', 'SiteB', 'SiteC'] # optional variable to define a subset of sites
DB_SERVER_QUERY = """
{
databaseServers {
name
hostName
port
connectionType
isEmbedded
}
}"""
DATA_CONNECTIONS_QUERY = """
{
databases {
name
__typename
connectionType
}
}"""
TS_CONFIG = {
'my_env': {
'server': 'https://YourTableauServer.com',
'api_version': '<YOUR_API_VERSION>',
'username': '<YOUR_USERNAME>',
'password': '<YOUR_PASSWORD>',
'site_name': '<YOUR_SITE_NAME>',
'site_url': '<YOUR_SITE_CONTENT_URL>'
}
}
conn = TableauServerConnection(config_json=TS_CONFIG, env='my_env')
conn.sign_in()
sites_df = get_sites_dataframe(conn)
if any(SITE_NAMES):
sites_df = sites_df.loc[sites_df['name'].isin(SITE_NAMES)]
db_servers_df = pd.DataFrame()
data_connections_df = pd.DataFrame()
for index, site in sites_df.iterrows():
conn.switch_site(site['contentUrl'])
response_a = conn.metadata_graphql_query(DB_SERVER_QUERY)
response_b = conn.metadata_graphql_query(DATA_CONNECTIONS_QUERY)
site_db_servers_df = pd.DataFrame(response_a.json()['data']['databaseServers'])
site_data_connections_df = pd.DataFrame(response_b.json()['data']['databases'])
site_db_servers_df['site_name'] = site['name']
site_data_connections_df['site_name'] = site['name']
db_servers_df = db_servers_df.append(site_db_servers_df, sort=False, ignore_index=True)
data_connections_df = data_connections_df.append(site_data_connections_df, sort=False, ignore_index=True)
combined_db_df = data_connections_df.merge(db_servers_df, how='left', on=['name', 'connectionType', 'site_name'])
combined_db_df_unique = combined_db_df.drop(columns=['name', 'site_name']).drop_duplicates()
conn.sign_out()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment