data-goblin/powerbi-admin-get-workspaces.py

## powerbi-admin-get-workspaces.py
#########################################################################################
# Authentication - Replace string variables with your relevant values
#########################################################################################

import json, requests, pandas as pd
try:
    from azure.identity import ClientSecretCredential
except Exception:
     !pip install azure.identity
     from azure.identity import ClientSecretCredential

# --------------------------------------------------------------------------------------#
# String variables: Replace with your own
tenant = 'Your-Tenant-ID'
client = 'Your-App-Client-ID'
client_secret = 'Your-Client-Secret-Value' # See Note 2: Better to use key vault
api = 'https://analysis.windows.net/powerbi/api/.default'
# --------------------------------------------------------------------------------------#

# Generates the access token for the Service Principal
auth = ClientSecretCredential(authority = 'https://login.microsoftonline.com/',
                                                        tenant_id = tenant,
                                                        client_id = client,
                                                        client_secret = client_secret)
access_token = auth.get_token(api)
access_token = access_token.token

print('\nSuccessfully authenticated.')

#########################################################################################

# Note 1: This code was authored for use in a Google Colab Notebook
# Note 2: Check here for info on using Azure Key Vault:
          # https://docs.microsoft.com/en-us/azure/key-vault/secrets/quick-create-python
          # The above code should be modified to programmatically get the secret from AKV

#########################################################################################
# Admin: Get all Workspaces & containing things (users, artifacts)
#########################################################################################

base_url = 'https://api.powerbi.com/v1.0/myorg/'
header = {'Authorization': f'Bearer {access_token}'}

# The admin API doesn't work unless you provide a ?$top=n argument
# Top 5000 workspaces
topn = '$top=5000'

# Get workspaces only
admin_groups = f'{base_url}admin/groups?{topn}'

# Get workspaces with all downstream things
admin_groups_expand = f'{base_url}admin/groups?$expand=users,reports,dashboards,datasets,dataflows,workbooks&{topn}'

# HTTP GET Request
groups = requests.get(admin_groups_expand, headers=header)

# Response code (200 = Success; 401 = Unauthorized; 404 = Bad Request)
print(groups)
try:
    groups = json.loads(groups.content)

except Exception as e:
    print(e)
    exit()


#########################################################################################
# Custom function to flatten the dataframe containing nested json / list objects
#########################################################################################
# Reference: https://stackoverflow.com/a/61269285

def flatten_nested_json_df(df):

    df = df.reset_index()

    # search for columns to explode/flatten
    s = (df.applymap(type) == list).all()
    list_columns = s[s].index.tolist()

    s = (df.applymap(type) == dict).all()
    dict_columns = s[s].index.tolist()

    while len(list_columns) > 0 or len(dict_columns) > 0:
        new_columns = []

        for col in dict_columns:

            horiz_exploded = pd.json_normalize(df[col]).add_prefix(f'{col}.')
            horiz_exploded.index = df.index
            df = pd.concat([df, horiz_exploded], axis=1).drop(columns=[col])
            new_columns.extend(horiz_exploded.columns) # inplace

        for col in list_columns:

            # explode lists vertically, adding new columns
            df = df.drop(columns=[col]).join(df[col].explode().to_frame())
            new_columns.append(col)

        # check if there are still dict o list fields to flatten
        s = (df[new_columns].applymap(type) == list).all()
        list_columns = s[s].index.tolist()

        s = (df[new_columns].applymap(type) == dict).all()
        dict_columns = s[s].index.tolist()

    return df

#########################################################################################
# Return the results in dataframes
#########################################################################################

try:
    result = pd.concat([pd.json_normalize(x) for x in groups['value']])
    print('Found', len(result['id'].dropna().unique().tolist()), 'workspaces.\n\nInside those workspaces...')
except Exception:
    print('No workspaces found.')

#########################################################################################
# Get users in those workspaces
#########################################################################################
try:
    groups_users    = flatten_nested_json_df(result[['id', 'users']].explode('users').dropna())
    print('Found', len(groups_users['users.identifier'].dropna().unique().tolist()), 'users.')
except Exception:
    print('No users found.')

#########################################################################################
# Get datasets in those workspaces
#########################################################################################
try:
    groups_datasets = flatten_nested_json_df(result[['id', 'datasets']].explode('datasets').dropna())
    print('Found', len(groups_datasets['datasets.id'].dropna().unique().tolist()), 'datasets.')
except Exception:
    print('No datasets found.')

#########################################################################################
# Get reports in those workspaces
#########################################################################################
try:
    groups_reports  = flatten_nested_json_df(result[['id', 'reports']].explode('reports').dropna())
    print('Found', len(groups_reports['reports.id'].dropna().unique().tolist()), 'reports.')
except Exception:
    print('No reports found.')

#########################################################################################
# Get dashboards in those workspaces
#########################################################################################
try:
    groups_dashboards  = flatten_nested_json_df(result[['id', 'dashboards']].explode('dashboards').dropna())
    print('Found', len(groups_dashboards['dashboards.id'].dropna().unique().tolist()), 'dashboards.')
except Exception:
    print('No dashboards found.')

#########################################################################################
# Get workbooks in those workspaces
#########################################################################################
try:
    groups_workbooks  = flatten_nested_json_df(result[['id', 'workbooks']].explode('workbooks').dropna())
    print('Found', len(groups_workbooks['workbooks.id'].dropna().unique().tolist()), 'workbooks.')
except Exception:
    print('No workbooks found.')

#########################################################################################
# Get dataflows in those workspaces
#########################################################################################
try:
    groups_dataflows  = flatten_nested_json_df(result[['id', 'dataflows']].explode('dataflows').dropna())
    print('Found', len(groups_dataflows['dataflows.id'].dropna().unique().tolist()), 'dataflows.')
except Exception:
    print('No dataflows found.')

#########################################################################################
# Get workspaces only
#########################################################################################
result = result[[x for x in result.columns if x not in ['users', 'datasets', 'dataflows', 'workbooks', 'dashboards', 'reports']]]
	#########################################################################################
	# Authentication - Replace string variables with your relevant values
	#########################################################################################

	import json, requests, pandas as pd
	try:
	from azure.identity import ClientSecretCredential
	except Exception:
	!pip install azure.identity
	from azure.identity import ClientSecretCredential

	# --------------------------------------------------------------------------------------#
	# String variables: Replace with your own
	tenant = 'Your-Tenant-ID'
	client = 'Your-App-Client-ID'
	client_secret = 'Your-Client-Secret-Value' # See Note 2: Better to use key vault
	api = 'https://analysis.windows.net/powerbi/api/.default'
	# --------------------------------------------------------------------------------------#

	# Generates the access token for the Service Principal
	auth = ClientSecretCredential(authority = 'https://login.microsoftonline.com/',
	tenant_id = tenant,
	client_id = client,
	client_secret = client_secret)
	access_token = auth.get_token(api)
	access_token = access_token.token

	print('\nSuccessfully authenticated.')

	#########################################################################################

	# Note 1: This code was authored for use in a Google Colab Notebook
	# Note 2: Check here for info on using Azure Key Vault:
	# https://docs.microsoft.com/en-us/azure/key-vault/secrets/quick-create-python
	# The above code should be modified to programmatically get the secret from AKV

	#########################################################################################
	# Admin: Get all Workspaces & containing things (users, artifacts)
	#########################################################################################

	base_url = 'https://api.powerbi.com/v1.0/myorg/'
	header = {'Authorization': f'Bearer {access_token}'}

	# The admin API doesn't work unless you provide a ?$top=n argument
	# Top 5000 workspaces
	topn = '$top=5000'

	# Get workspaces only
	admin_groups = f'{base_url}admin/groups?{topn}'

	# Get workspaces with all downstream things
	admin_groups_expand = f'{base_url}admin/groups?$expand=users,reports,dashboards,datasets,dataflows,workbooks&{topn}'

	# HTTP GET Request
	groups = requests.get(admin_groups_expand, headers=header)

	# Response code (200 = Success; 401 = Unauthorized; 404 = Bad Request)
	print(groups)
	try:
	groups = json.loads(groups.content)

	except Exception as e:
	print(e)
	exit()


	#########################################################################################
	# Custom function to flatten the dataframe containing nested json / list objects
	#########################################################################################
	# Reference: https://stackoverflow.com/a/61269285

	def flatten_nested_json_df(df):

	df = df.reset_index()

	# search for columns to explode/flatten
	s = (df.applymap(type) == list).all()
	list_columns = s[s].index.tolist()

	s = (df.applymap(type) == dict).all()
	dict_columns = s[s].index.tolist()

	while len(list_columns) > 0 or len(dict_columns) > 0:
	new_columns = []

	for col in dict_columns:

	horiz_exploded = pd.json_normalize(df[col]).add_prefix(f'{col}.')
	horiz_exploded.index = df.index
	df = pd.concat([df, horiz_exploded], axis=1).drop(columns=[col])
	new_columns.extend(horiz_exploded.columns) # inplace

	for col in list_columns:

	# explode lists vertically, adding new columns
	df = df.drop(columns=[col]).join(df[col].explode().to_frame())
	new_columns.append(col)

	# check if there are still dict o list fields to flatten
	s = (df[new_columns].applymap(type) == list).all()
	list_columns = s[s].index.tolist()

	s = (df[new_columns].applymap(type) == dict).all()
	dict_columns = s[s].index.tolist()

	return df

	#########################################################################################
	# Return the results in dataframes
	#########################################################################################

	try:
	result = pd.concat([pd.json_normalize(x) for x in groups['value']])
	print('Found', len(result['id'].dropna().unique().tolist()), 'workspaces.\n\nInside those workspaces...')
	except Exception:
	print('No workspaces found.')

	#########################################################################################
	# Get users in those workspaces
	#########################################################################################
	try:
	groups_users = flatten_nested_json_df(result[['id', 'users']].explode('users').dropna())
	print('Found', len(groups_users['users.identifier'].dropna().unique().tolist()), 'users.')
	except Exception:
	print('No users found.')

	#########################################################################################
	# Get datasets in those workspaces
	#########################################################################################
	try:
	groups_datasets = flatten_nested_json_df(result[['id', 'datasets']].explode('datasets').dropna())
	print('Found', len(groups_datasets['datasets.id'].dropna().unique().tolist()), 'datasets.')
	except Exception:
	print('No datasets found.')

	#########################################################################################
	# Get reports in those workspaces
	#########################################################################################
	try:
	groups_reports = flatten_nested_json_df(result[['id', 'reports']].explode('reports').dropna())
	print('Found', len(groups_reports['reports.id'].dropna().unique().tolist()), 'reports.')
	except Exception:
	print('No reports found.')

	#########################################################################################
	# Get dashboards in those workspaces
	#########################################################################################
	try:
	groups_dashboards = flatten_nested_json_df(result[['id', 'dashboards']].explode('dashboards').dropna())
	print('Found', len(groups_dashboards['dashboards.id'].dropna().unique().tolist()), 'dashboards.')
	except Exception:
	print('No dashboards found.')

	#########################################################################################
	# Get workbooks in those workspaces
	#########################################################################################
	try:
	groups_workbooks = flatten_nested_json_df(result[['id', 'workbooks']].explode('workbooks').dropna())
	print('Found', len(groups_workbooks['workbooks.id'].dropna().unique().tolist()), 'workbooks.')
	except Exception:
	print('No workbooks found.')

	#########################################################################################
	# Get dataflows in those workspaces
	#########################################################################################
	try:
	groups_dataflows = flatten_nested_json_df(result[['id', 'dataflows']].explode('dataflows').dropna())
	print('Found', len(groups_dataflows['dataflows.id'].dropna().unique().tolist()), 'dataflows.')
	except Exception:
	print('No dataflows found.')

	#########################################################################################
	# Get workspaces only
	#########################################################################################
	result = result[[x for x in result.columns if x not in ['users', 'datasets', 'dataflows', 'workbooks', 'dashboards', 'reports']]]