Skip to content

Instantly share code, notes, and snippets.

@rcabr
Last active February 3, 2021 15:12
Show Gist options
  • Save rcabr/e4f63b5cd89691329334c3c2c957142f to your computer and use it in GitHub Desktop.
Save rcabr/e4f63b5cd89691329334c3c2c957142f to your computer and use it in GitHub Desktop.
Create CSV file reporting all ACLs on data lake folders
[CmdletBinding()]
param (
[Parameter()]
[string]
$SubscriptionName,
[Parameter()]
[string]
$StorageAcctName,
[Parameter()]
[string]
$OutputCsvFilePath"
)
# create local cache for AAD queries
$ADPrincipalCache = @{}
# function to lookup Azure AD principals (and cache results locally to reduce queries to AAD)
function Lookup-ADPrincipal {
[CmdletBinding()]
param ($EntityId)
$p = $Null
if ($EntityId -eq $Null) {
return [PSCustomObject]@{
DisplayName = "Public"
ObjectType = $Null
}
}
# check cache
$p = $ADPrincipalCache.$EntityId
if ($p -ne $Null) {
Write-Verbose "AAD cache hit."
return $p
}
# call AAD
Write-Verbose "AAD cache miss. Querying AAD."
$p = (Get-AzADUser -ObjectId $aclEntry.EntityId) ?? (Get-AzADGroup -ObjectId $aclEntry.EntityId) ?? (Get-AzADServicePrincipal -ObjectId $aclEntry.EntityId)
$p = $p | Select-Object DisplayName, ObjectType
$ADPrincipalCache.Add($EntityId, $p) # cache it
return $p
}
Write-Verbose "Prompting for credentials"
$login = Connect-AzAccount
Write-Output "Connected as '$($login.Account)' in Azure tenant '$($login.TenantId)'"
Write-Verbose "Setting context to subscription '$SubscriptionName'"
Set-AzContext -Subscription $SubscriptionName
Write-Verbose "Connecting to storage account '$StorageAcctName' using account '$($login.Account)'"
$ctx = New-AzStorageContext -StorageAccountName $StorageAcctName -UseConnectedAccount
# get all file systems (a.k.a. containers)
$folderList = @()
Write-Verbose "Getting all file systems in storage account '$StorageAcctName'"
$fileSystems = Get-AzDatalakeGen2FileSystem -Context $ctx | Select-Object -ExpandProperty Name
# get folders in each filesystem
$batchSize = 1000
$counter = 0
foreach ($filesystemName in $fileSystems) {
$counter += 1
Write-Output "($counter/$($fileSystems.Count)) Querying file system '$filesystemName' in storage account '$StorageAcctName'"
$continueToken = $Null
do {
$children = Get-AzDataLakeGen2ChildItem -Context $ctx -FileSystem $filesystemName -Recurse -MaxCount $batchSize -ContinuationToken $continueToken
if ($children.Length -le 0) { break; }
$folderList += $children | Where-Object -Property IsDirectory -EQ -Value $true | Select-Object IsDirectory -ExpandProperty Directory| Select-Object AccountName, FileSystemName, Path
$continueToken = $children[$children.Count -1].ContinuationToken;
}
while ($continueToken -ne $Null)
}
Write-Output "Found $($folderList.Count) folders in $($fileSystems.Count) file systems in storage account $StorageAcctName."
Write-Output "Retrieving Access Control Lists for folders."
$foldersWithAcls = @()
$counter = 0
foreach ($folder in $folderList) {
$counter += 1
# get ACL for each folder
Write-Output " ($counter/$($folderList.Count)) Getting ACL for file system '$($folder.FileSystemName)' folder '$($folder.Path)'"
$currentPath = Get-AzDataLakeGen2Item -Context $ctx -FileSystem $folder.FileSystemName -Path $folder.Path `
foreach ($aclEntry in $($currentPath.ACL | Where-Object AccessControlType -NE "Mask")) { # remove Masks
# lookup principal
$p = Lookup-ADPrincipal -EntityId $aclEntry.EntityId # -Verbose
$foldersWithAcls += [PSCustomObject]@{
AccountName = $folder.AccountName
FileSystem = $folder.FileSystemName
Path = $folder.Path
EntityName = $p.DisplayName
EntityType = $p.ObjectType
Permissions = $aclEntry.Permissions
DefaultPermissions = $aclEntry.DefaultScope
AccessControlType = $aclEntry.AccessControlType
EntityId = $aclEntry.EntityId
}
}
}
# save to CSV file
Write-Output "Writing output to $OutputCsvFilePath"
$foldersWithAcls | Export-Csv -Path $OutputCsvFilePath
Write-Output "Done."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment