Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save DhyanRathore/e24943df181048dd13103107a8765498 to your computer and use it in GitHub Desktop.
Save DhyanRathore/e24943df181048dd13103107a8765498 to your computer and use it in GitHub Desktop.
Using Azure Data Factory to copy multiple files based on URL pattern over HTTP
{
"name": "pl_autoCopyCsvFiles",
"properties": {
"activities": [
{
"name": "ac_checkAllAvailableFiles",
"type": "GetMetadata",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"dataset": {
"referenceName": "ds_csvStgAccDest",
"type": "DatasetReference"
},
"fieldList": [
"childItems"
],
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"recursive": true
},
"formatSettings": {
"type": "DelimitedTextReadSettings"
}
}
},
{
"name": "ac_setStartingFileName",
"type": "SetVariable",
"dependsOn": [
{
"activity": "ac_checkAllAvailableFiles",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "i",
"value": {
"value": "@if(empty(last(activity('ac_checkAllAvailableFiles').output.childItems)),variables('j'),formatDateTime(addDays(formatDateTime(replace(last(activity('ac_checkAllavailableFiles').output.childItems).name,'.csv',''),'MM-dd-yyyy'),1),'MM-dd-yyyy'))",
"type": "Expression"
}
}
},
{
"name": "ac_getCsvFiles",
"type": "Until",
"dependsOn": [
{
"activity": "ac_setStartingFileName",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"expression": {
"value": "@greater(dayOfYear(formatDateTime(variables('i'),'MM-dd-yyyy')),dayOfYear(subtractFromTime(utcnow(),1,'Day')))",
"type": "Expression"
},
"activities": [
{
"name": "ac_copyCsvFile",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "DelimitedTextSource",
"storeSettings": {
"type": "HttpReadSettings",
"requestMethod": "GET"
},
"formatSettings": {
"type": "DelimitedTextReadSettings"
}
},
"sink": {
"type": "DelimitedTextSink",
"storeSettings": {
"type": "AzureBlobFSWriteSettings"
},
"formatSettings": {
"type": "DelimitedTextWriteSettings",
"quoteAllText": true,
"fileExtension": ".txt"
}
},
"enableStaging": false,
"translator": {
"type": "TabularTranslator",
"typeConversion": true,
"typeConversionSettings": {
"allowDataTruncation": true,
"treatBooleanAsNumber": false
}
}
},
"inputs": [
{
"referenceName": "ds_csvWebSrc",
"type": "DatasetReference",
"parameters": {
"fileName": {
"value": "@concat(formatDateTime(variables('i'),'MM-dd-yyyy'),'.csv')",
"type": "Expression"
}
}
}
],
"outputs": [
{
"referenceName": "ds_csvStgAccDest",
"type": "DatasetReference"
}
]
},
{
"name": "ac_incrementDateP1",
"type": "SetVariable",
"dependsOn": [
{
"activity": "ac_copyCsvFile",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "j",
"value": {
"value": "@addDays(formatDateTime(variables('i'),'MM-dd-yyyy'),1)",
"type": "Expression"
}
}
},
{
"name": "ac_incrementDateP2",
"type": "SetVariable",
"dependsOn": [
{
"activity": "ac_incrementDateP1",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "i",
"value": {
"value": "@variables('j')",
"type": "Expression"
}
}
}
],
"timeout": "7.00:00:00"
}
}
],
"variables": {
"i": {
"type": "String",
"defaultValue": "01-22-2020"
},
"j": {
"type": "String",
"defaultValue": "01-22-2020"
}
},
"annotations": [],
"lastPublishTime": "2020-09-18T17:38:57Z"
},
"type": "Microsoft.DataFactory/factories/pipelines"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment