Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save abhi18av/102f2986d707aec73f6f04d000d45145 to your computer and use it in GitHub Desktop.
Save abhi18av/102f2986d707aec73f6f04d000d45145 to your computer and use it in GitHub Desktop.
A script to configure (file share and blob container mounting) and create an Azure Batch pool suitable for Nextflow Tower.
from azure.identity import DefaultAzureCredential
from azure.mgmt.batch import BatchManagementClient
import os
from dotenv import load_dotenv
import sys
load_dotenv()
AZURE_SUBSCRIPTION_ID = os.getenv('AZURE_SUBSCRIPTION_ID')
AZURE_BATCH_RESOURCE_GROUP_NAME = os.getenv('AZURE_BATCH_RESOURCE_GROUP_NAME')
AZURE_BATCH_ACCOUNT_NAME = os.getenv('AZURE_BATCH_ACCOUNT_NAME')
AZURE_BATCH_STORAGE_ACCOUNT_NAME = os.getenv('AZURE_BATCH_STORAGE_ACCOUNT_NAME')
AZURE_BATCH_STORAGE_ACCOUNT_KEY = os.getenv('AZURE_BATCH_STORAGE_ACCOUNT_KEY')
AZURE_BATCH_STORAGE_FILE_SHARE = os.getenv('AZURE_BATCH_STORAGE_FILE_SHARE')
AZURE_BATCH_STORAGE_BLOB_CONTAINER = os.getenv('AZURE_BATCH_STORAGE_BLOB_CONTAINER')
AZURE_BATCH_POOL_NAME = sys.argv[1]
"""
# PREREQUISITES
pip install azure-identity
pip install azure-mgmt-batch
# USAGE
python create-batch-pool.py POOL_NAME
Created by combining Azure instructions with Nextflow auto-created pool json:
https://learn.microsoft.com/en-us/rest/api/batchmanagement/pool/create?tabs=Python#createpool---full-virtualmachineconfiguration
Make sure that there is a .env file in the same foder as this script. .env file must contain definitions of the environmental
variables used in this script.
Before run the sample, make sure to either set the values of the client ID, tenant ID and client secret
of the AAD application as environment variables: AZURE_CLIENT_ID, AZURE_TENANT_ID,
AZURE_CLIENT_SECRET or to authenticate via `az login`.
"""
def main(mount = True, file_share = True):
client = BatchManagementClient(
credential=DefaultAzureCredential(),
subscription_id=AZURE_SUBSCRIPTION_ID,
)
params = {
"properties": {
"deploymentConfiguration": {
"virtualMachineConfiguration": {
"imageReference": {
"publisher": "microsoft-azure-batch",
"offer": "centos-container",
"sku": "7-8",
"version": "latest"
},
"nodeAgentSKUId": "batch.node.centos 7",
"containerConfiguration": {
"type": "dockerCompatible",
"containerImageNames": [
"quay.io/seqeralabs/nf-launcher:j17-22.10.3"
]
},
"osDisk": {
"ephemeralOSDiskSettings": {
"placement": "CacheDisk"
}
}
}
},
"vmSize": "standard_d16s_v3",
"taskSlotsPerNode": 16,
"taskSchedulingPolicy": {
"nodeFillType": "Pack"
},
"scaleSettings": {
"autoScale": {
"evaluationInterval": "PT5M",
"formula": "// Get pool lifetime since creation.\nlifespan = time() - time(\"2022-12-01T16:11:12.962629Z\");\ninterval = TimeInterval_Minute * 5;\n\n// Compute the target nodes based on pending tasks.\n// $PendingTasks == The sum of $ActiveTasks and $RunningTasks\n$samples = $PendingTasks.GetSamplePercent(interval);\n$tasks = $samples < 70 ? max(0, $PendingTasks.GetSample(1)) : max( $PendingTasks.GetSample(1), avg($PendingTasks.GetSample(interval)));\n$targetVMs = $tasks > 0 ? $tasks : max(0, $TargetDedicatedNodes/2);\ntargetPoolSize = max(0, min($targetVMs, 60));\n\n// For first interval deploy 1 node, for other intervals scale up/down as per tasks.\n$TargetDedicatedNodes = lifespan < interval ? 1 : targetPoolSize;\n$NodeDeallocationOption = taskcompletion;"
}
},
"startTask": {
"commandLine": "bash -c \"chmod +x azcopy && mkdir $AZ_BATCH_NODE_SHARED_DIR/bin/ && cp azcopy $AZ_BATCH_NODE_SHARED_DIR/bin/\" ",
"resourceFiles": [
{
"httpUrl": "https://nf-xpack.seqera.io/azcopy/linux_amd64_10.8.0/azcopy",
"filePath": "azcopy"
}
],
"userIdentity": {
"autoUser": {
"scope": "pool",
"elevationLevel": "nonadmin"
}
},
"maxTaskRetryCount": 0,
"waitForSuccess": "true"
}
}
}
if mount:
if file_share:
params["properties"]["MountConfiguration"] =[
{
"azureFileShareConfiguration": {
"accountName": AZURE_BATCH_STORAGE_ACCOUNT_NAME,
"azureFileUrl": "https://" + AZURE_BATCH_STORAGE_ACCOUNT_NAME + ".file.core.windows.net/" + AZURE_BATCH_STORAGE_FILE_SHARE,
"relativeMountPath": AZURE_BATCH_STORAGE_FILE_SHARE,
"mountOptions": "-o vers=3.0,dir_mode=0777,file_mode=0777,sec=ntlmssp",
"accountKey": AZURE_BATCH_STORAGE_ACCOUNT_KEY
}
}
]
else:
params["properties"]["MountConfiguration"] =[
{
"azureBlobFileSystemConfiguration": {
"accountName": AZURE_BATCH_STORAGE_ACCOUNT_NAME,
"containerName": AZURE_BATCH_STORAGE_BLOB_CONTAINER,
"relativeMountPath": AZURE_BATCH_STORAGE_BLOB_CONTAINER,
"mountOptions": "-o vers=3.0,dir_mode=0777,file_mode=0777,sec=ntlmssp",
"accountKey": AZURE_BATCH_STORAGE_ACCOUNT_KEY
}
}
]
response = client.pool.create(
resource_group_name=AZURE_BATCH_RESOURCE_GROUP_NAME,
account_name=AZURE_BATCH_ACCOUNT_NAME,
pool_name=AZURE_BATCH_POOL_NAME,
parameters=params,
)
print(response)
# x-ms-original-file: specification/batch/resource-manager/Microsoft.Batch/stable/2022-10-01/examples/PoolCreate_SharedImageGallery.json
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment