Last active
June 25, 2016 16:24
-
-
Save stephlocke/410ad30ca863ea5388b5a3fd2d2b1be8 to your computer and use it in GitHub Desktop.
Workflow for getting new files from s3 into blob storage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
workflow s3toqueue | |
{ | |
param ( | |
[Parameter(Mandatory=$false)] | |
[Int]$processNfiles = 10, | |
[Parameter(Mandatory=$false)] | |
[String]$SubscriptionName="subname", | |
[Parameter(Mandatory=$false)] | |
[String]$StorageAccountName = "etlstore", | |
[Parameter(Mandatory=$false)] | |
[String]$rgname="censornetML", | |
[Parameter(Mandatory=$false)] | |
[String]$refcontainer="archive", | |
[Parameter(Mandatory=$false)] | |
[String]$destcontainer="queue", | |
[Parameter(Mandatory=$false)] | |
[String]$automationaccount = "poshetl", | |
[Parameter(Mandatory=$false)] | |
[String]$serviceaccount = "mletl_service" | |
) | |
#$ErrorActionPreference = "Stop" | |
$VerbosePreference = "Continue" | |
#---- Starting vals -------- | |
#Select-AzureRmProfile -Path .\Documents\s3toblob\sllogin | |
$Cred = Get-AutomationPSCredential -Name $serviceaccount | |
#$Cred = Get-AzureRmAutomationCredential -Name $serviceaccount -AutomationAccountName $automationaccount -ResourceGroupName $rgname | |
# Connect to Azure | |
Add-AzureRmAccount -Credential $Cred | Write-Verbose | |
Add-AzureRmAccount -Credential $Cred | Write-Verbose | |
#---- Config -------------- | |
# Your account access key - must have read access to your S3 Bucket | |
$accessKey = (Get-AzureRMAutomationVariable -Name "aws_accountkey" -ResourceGroupName $rgname -AutomationAccountName $automationaccount ).Value | |
# Your account secret access key | |
$secretKey = (Get-AzureRMAutomationVariable -Name "aws_secretkey" -ResourceGroupName $rgname -AutomationAccountName $automationaccount).Value | |
# The region associated with your bucket e.g. eu-west-1, us-east-1 etc. (see http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-regions) | |
$region = (Get-AzureRMAutomationVariable -Name "aws_region" -ResourceGroupName $rgname -AutomationAccountName $automationaccount).Value | |
# The name of your S3 Bucket | |
$bucket = (Get-AzureRMAutomationVariable -Name "aws_logbucket" -ResourceGroupName $rgname -AutomationAccountName $automationaccount).Value | |
# getting Azure storage account key | |
$fullkeys = Get-AzureRMStorageAccountKey -StorageAccountName $StorageAccountName -ResourceGroupName $rgname | |
# the script will be using primary key | |
$key = $fullkeys.Key1 # this differs to the Get-AzureStorageAccountKey cmdlet's output! | |
# getting storage account content | |
$context = New-AzureStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $key | |
#---- Identify new files --- | |
$objects = Get-S3Object -BucketName $bucket -KeyPrefix $keyPrefix -AccessKey $accessKey -SecretKey $secretKey -Region $region | Where-Object -Property StorageClass -EQ -Value "STANDARD_IA" | |
$archive=Get-AzureStorageBlob -Container $refcontainer -Context (New-AzureStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $key ) | |
$queue=Get-AzureStorageBlob -Container $destcontainer -Context (New-AzureStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $key ) | |
$existingobjects = $archive + $queue | |
$s3list = (Sort-Object -InputObject $objects -property Key).Key | |
$bloblist = (Sort-Object -InputObject $existingobjects -property Name).Name | |
if ($bloblist.Count -eq 0) | |
{ | |
$filestoget = $s3list | |
} | |
else | |
{ | |
$filestoget = Compare-Object -ReferenceObject $bloblist -DifferenceObject $s3list -PassThru | |
} | |
$filestoget = $filestoget | Select-Object -First $processNfiles | |
#---- Write files to storage ------- | |
ForEach -parallel ($file in $filestoget) | |
{ | |
try{ | |
Copy-S3Object -Key $file -LocalFile $file -BucketName $bucket -AccessKey $accessKey -SecretKey $secretKey -Region $region -ErrorAction Continue | |
Set-AzureStorageBlobContent -File $file -Container $destcontainer -Context (New-AzureStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $key) -ErrorAction Continue | |
Remove-Item -Path $file | |
}catch{} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment