Created
May 5, 2022 20:22
-
-
Save data-goblin/cbe5857b3ae53409a3249a76190f980b to your computer and use it in GitHub Desktop.
Python script to get all workspaces managed in a tenant and containing objects (users, artifacts) with the Power BI Admin REST API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################################################################### | |
# Authentication - Replace string variables with your relevant values | |
######################################################################################### | |
import json, requests, pandas as pd | |
try: | |
from azure.identity import ClientSecretCredential | |
except Exception: | |
!pip install azure.identity | |
from azure.identity import ClientSecretCredential | |
# --------------------------------------------------------------------------------------# | |
# String variables: Replace with your own | |
tenant = 'Your-Tenant-ID' | |
client = 'Your-App-Client-ID' | |
client_secret = 'Your-Client-Secret-Value' # See Note 2: Better to use key vault | |
api = 'https://analysis.windows.net/powerbi/api/.default' | |
# --------------------------------------------------------------------------------------# | |
# Generates the access token for the Service Principal | |
auth = ClientSecretCredential(authority = 'https://login.microsoftonline.com/', | |
tenant_id = tenant, | |
client_id = client, | |
client_secret = client_secret) | |
access_token = auth.get_token(api) | |
access_token = access_token.token | |
print('\nSuccessfully authenticated.') | |
######################################################################################### | |
# Note 1: This code was authored for use in a Google Colab Notebook | |
# Note 2: Check here for info on using Azure Key Vault: | |
# https://docs.microsoft.com/en-us/azure/key-vault/secrets/quick-create-python | |
# The above code should be modified to programmatically get the secret from AKV | |
######################################################################################### | |
# Admin: Get all Workspaces & containing things (users, artifacts) | |
######################################################################################### | |
base_url = 'https://api.powerbi.com/v1.0/myorg/' | |
header = {'Authorization': f'Bearer {access_token}'} | |
# The admin API doesn't work unless you provide a ?$top=n argument | |
# Top 5000 workspaces | |
topn = '$top=5000' | |
# Get workspaces only | |
admin_groups = f'{base_url}admin/groups?{topn}' | |
# Get workspaces with all downstream things | |
admin_groups_expand = f'{base_url}admin/groups?$expand=users,reports,dashboards,datasets,dataflows,workbooks&{topn}' | |
# HTTP GET Request | |
groups = requests.get(admin_groups_expand, headers=header) | |
# Response code (200 = Success; 401 = Unauthorized; 404 = Bad Request) | |
print(groups) | |
try: | |
groups = json.loads(groups.content) | |
except Exception as e: | |
print(e) | |
exit() | |
######################################################################################### | |
# Custom function to flatten the dataframe containing nested json / list objects | |
######################################################################################### | |
# Reference: https://stackoverflow.com/a/61269285 | |
def flatten_nested_json_df(df): | |
df = df.reset_index() | |
# search for columns to explode/flatten | |
s = (df.applymap(type) == list).all() | |
list_columns = s[s].index.tolist() | |
s = (df.applymap(type) == dict).all() | |
dict_columns = s[s].index.tolist() | |
while len(list_columns) > 0 or len(dict_columns) > 0: | |
new_columns = [] | |
for col in dict_columns: | |
horiz_exploded = pd.json_normalize(df[col]).add_prefix(f'{col}.') | |
horiz_exploded.index = df.index | |
df = pd.concat([df, horiz_exploded], axis=1).drop(columns=[col]) | |
new_columns.extend(horiz_exploded.columns) # inplace | |
for col in list_columns: | |
# explode lists vertically, adding new columns | |
df = df.drop(columns=[col]).join(df[col].explode().to_frame()) | |
new_columns.append(col) | |
# check if there are still dict o list fields to flatten | |
s = (df[new_columns].applymap(type) == list).all() | |
list_columns = s[s].index.tolist() | |
s = (df[new_columns].applymap(type) == dict).all() | |
dict_columns = s[s].index.tolist() | |
return df | |
######################################################################################### | |
# Return the results in dataframes | |
######################################################################################### | |
try: | |
result = pd.concat([pd.json_normalize(x) for x in groups['value']]) | |
print('Found', len(result['id'].dropna().unique().tolist()), 'workspaces.\n\nInside those workspaces...') | |
except Exception: | |
print('No workspaces found.') | |
######################################################################################### | |
# Get users in those workspaces | |
######################################################################################### | |
try: | |
groups_users = flatten_nested_json_df(result[['id', 'users']].explode('users').dropna()) | |
print('Found', len(groups_users['users.identifier'].dropna().unique().tolist()), 'users.') | |
except Exception: | |
print('No users found.') | |
######################################################################################### | |
# Get datasets in those workspaces | |
######################################################################################### | |
try: | |
groups_datasets = flatten_nested_json_df(result[['id', 'datasets']].explode('datasets').dropna()) | |
print('Found', len(groups_datasets['datasets.id'].dropna().unique().tolist()), 'datasets.') | |
except Exception: | |
print('No datasets found.') | |
######################################################################################### | |
# Get reports in those workspaces | |
######################################################################################### | |
try: | |
groups_reports = flatten_nested_json_df(result[['id', 'reports']].explode('reports').dropna()) | |
print('Found', len(groups_reports['reports.id'].dropna().unique().tolist()), 'reports.') | |
except Exception: | |
print('No reports found.') | |
######################################################################################### | |
# Get dashboards in those workspaces | |
######################################################################################### | |
try: | |
groups_dashboards = flatten_nested_json_df(result[['id', 'dashboards']].explode('dashboards').dropna()) | |
print('Found', len(groups_dashboards['dashboards.id'].dropna().unique().tolist()), 'dashboards.') | |
except Exception: | |
print('No dashboards found.') | |
######################################################################################### | |
# Get workbooks in those workspaces | |
######################################################################################### | |
try: | |
groups_workbooks = flatten_nested_json_df(result[['id', 'workbooks']].explode('workbooks').dropna()) | |
print('Found', len(groups_workbooks['workbooks.id'].dropna().unique().tolist()), 'workbooks.') | |
except Exception: | |
print('No workbooks found.') | |
######################################################################################### | |
# Get dataflows in those workspaces | |
######################################################################################### | |
try: | |
groups_dataflows = flatten_nested_json_df(result[['id', 'dataflows']].explode('dataflows').dropna()) | |
print('Found', len(groups_dataflows['dataflows.id'].dropna().unique().tolist()), 'dataflows.') | |
except Exception: | |
print('No dataflows found.') | |
######################################################################################### | |
# Get workspaces only | |
######################################################################################### | |
result = result[[x for x in result.columns if x not in ['users', 'datasets', 'dataflows', 'workbooks', 'dashboards', 'reports']]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment