Skip to content

Instantly share code, notes, and snippets.

@data-goblin
Created May 5, 2022 20:22
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save data-goblin/cbe5857b3ae53409a3249a76190f980b to your computer and use it in GitHub Desktop.
Save data-goblin/cbe5857b3ae53409a3249a76190f980b to your computer and use it in GitHub Desktop.
Python script to get all workspaces managed in a tenant and containing objects (users, artifacts) with the Power BI Admin REST API
#########################################################################################
# Authentication - Replace string variables with your relevant values
#########################################################################################
import json, requests, pandas as pd
try:
from azure.identity import ClientSecretCredential
except Exception:
!pip install azure.identity
from azure.identity import ClientSecretCredential
# --------------------------------------------------------------------------------------#
# String variables: Replace with your own
tenant = 'Your-Tenant-ID'
client = 'Your-App-Client-ID'
client_secret = 'Your-Client-Secret-Value' # See Note 2: Better to use key vault
api = 'https://analysis.windows.net/powerbi/api/.default'
# --------------------------------------------------------------------------------------#
# Generates the access token for the Service Principal
auth = ClientSecretCredential(authority = 'https://login.microsoftonline.com/',
tenant_id = tenant,
client_id = client,
client_secret = client_secret)
access_token = auth.get_token(api)
access_token = access_token.token
print('\nSuccessfully authenticated.')
#########################################################################################
# Note 1: This code was authored for use in a Google Colab Notebook
# Note 2: Check here for info on using Azure Key Vault:
# https://docs.microsoft.com/en-us/azure/key-vault/secrets/quick-create-python
# The above code should be modified to programmatically get the secret from AKV
#########################################################################################
# Admin: Get all Workspaces & containing things (users, artifacts)
#########################################################################################
base_url = 'https://api.powerbi.com/v1.0/myorg/'
header = {'Authorization': f'Bearer {access_token}'}
# The admin API doesn't work unless you provide a ?$top=n argument
# Top 5000 workspaces
topn = '$top=5000'
# Get workspaces only
admin_groups = f'{base_url}admin/groups?{topn}'
# Get workspaces with all downstream things
admin_groups_expand = f'{base_url}admin/groups?$expand=users,reports,dashboards,datasets,dataflows,workbooks&{topn}'
# HTTP GET Request
groups = requests.get(admin_groups_expand, headers=header)
# Response code (200 = Success; 401 = Unauthorized; 404 = Bad Request)
print(groups)
try:
groups = json.loads(groups.content)
except Exception as e:
print(e)
exit()
#########################################################################################
# Custom function to flatten the dataframe containing nested json / list objects
#########################################################################################
# Reference: https://stackoverflow.com/a/61269285
def flatten_nested_json_df(df):
df = df.reset_index()
# search for columns to explode/flatten
s = (df.applymap(type) == list).all()
list_columns = s[s].index.tolist()
s = (df.applymap(type) == dict).all()
dict_columns = s[s].index.tolist()
while len(list_columns) > 0 or len(dict_columns) > 0:
new_columns = []
for col in dict_columns:
horiz_exploded = pd.json_normalize(df[col]).add_prefix(f'{col}.')
horiz_exploded.index = df.index
df = pd.concat([df, horiz_exploded], axis=1).drop(columns=[col])
new_columns.extend(horiz_exploded.columns) # inplace
for col in list_columns:
# explode lists vertically, adding new columns
df = df.drop(columns=[col]).join(df[col].explode().to_frame())
new_columns.append(col)
# check if there are still dict o list fields to flatten
s = (df[new_columns].applymap(type) == list).all()
list_columns = s[s].index.tolist()
s = (df[new_columns].applymap(type) == dict).all()
dict_columns = s[s].index.tolist()
return df
#########################################################################################
# Return the results in dataframes
#########################################################################################
try:
result = pd.concat([pd.json_normalize(x) for x in groups['value']])
print('Found', len(result['id'].dropna().unique().tolist()), 'workspaces.\n\nInside those workspaces...')
except Exception:
print('No workspaces found.')
#########################################################################################
# Get users in those workspaces
#########################################################################################
try:
groups_users = flatten_nested_json_df(result[['id', 'users']].explode('users').dropna())
print('Found', len(groups_users['users.identifier'].dropna().unique().tolist()), 'users.')
except Exception:
print('No users found.')
#########################################################################################
# Get datasets in those workspaces
#########################################################################################
try:
groups_datasets = flatten_nested_json_df(result[['id', 'datasets']].explode('datasets').dropna())
print('Found', len(groups_datasets['datasets.id'].dropna().unique().tolist()), 'datasets.')
except Exception:
print('No datasets found.')
#########################################################################################
# Get reports in those workspaces
#########################################################################################
try:
groups_reports = flatten_nested_json_df(result[['id', 'reports']].explode('reports').dropna())
print('Found', len(groups_reports['reports.id'].dropna().unique().tolist()), 'reports.')
except Exception:
print('No reports found.')
#########################################################################################
# Get dashboards in those workspaces
#########################################################################################
try:
groups_dashboards = flatten_nested_json_df(result[['id', 'dashboards']].explode('dashboards').dropna())
print('Found', len(groups_dashboards['dashboards.id'].dropna().unique().tolist()), 'dashboards.')
except Exception:
print('No dashboards found.')
#########################################################################################
# Get workbooks in those workspaces
#########################################################################################
try:
groups_workbooks = flatten_nested_json_df(result[['id', 'workbooks']].explode('workbooks').dropna())
print('Found', len(groups_workbooks['workbooks.id'].dropna().unique().tolist()), 'workbooks.')
except Exception:
print('No workbooks found.')
#########################################################################################
# Get dataflows in those workspaces
#########################################################################################
try:
groups_dataflows = flatten_nested_json_df(result[['id', 'dataflows']].explode('dataflows').dropna())
print('Found', len(groups_dataflows['dataflows.id'].dropna().unique().tolist()), 'dataflows.')
except Exception:
print('No dataflows found.')
#########################################################################################
# Get workspaces only
#########################################################################################
result = result[[x for x in result.columns if x not in ['users', 'datasets', 'dataflows', 'workbooks', 'dashboards', 'reports']]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment