Skip to content

Instantly share code, notes, and snippets.

@stephlocke stephlocke/s3toblob.ps1
Last active Jun 25, 2016

Embed
What would you like to do?
Workflow for getting new files from s3 into blob storage
workflow s3toqueue
{
param (
[Parameter(Mandatory=$false)]
[Int]$processNfiles = 10,
[Parameter(Mandatory=$false)]
[String]$SubscriptionName="subname",
[Parameter(Mandatory=$false)]
[String]$StorageAccountName = "etlstore",
[Parameter(Mandatory=$false)]
[String]$rgname="censornetML",
[Parameter(Mandatory=$false)]
[String]$refcontainer="archive",
[Parameter(Mandatory=$false)]
[String]$destcontainer="queue",
[Parameter(Mandatory=$false)]
[String]$automationaccount = "poshetl",
[Parameter(Mandatory=$false)]
[String]$serviceaccount = "mletl_service"
)
#$ErrorActionPreference = "Stop"
$VerbosePreference = "Continue"
#---- Starting vals --------
#Select-AzureRmProfile -Path .\Documents\s3toblob\sllogin
$Cred = Get-AutomationPSCredential -Name $serviceaccount
#$Cred = Get-AzureRmAutomationCredential -Name $serviceaccount -AutomationAccountName $automationaccount -ResourceGroupName $rgname
# Connect to Azure
Add-AzureRmAccount -Credential $Cred | Write-Verbose
Add-AzureRmAccount -Credential $Cred | Write-Verbose
#---- Config --------------
# Your account access key - must have read access to your S3 Bucket
$accessKey = (Get-AzureRMAutomationVariable -Name "aws_accountkey" -ResourceGroupName $rgname -AutomationAccountName $automationaccount ).Value
# Your account secret access key
$secretKey = (Get-AzureRMAutomationVariable -Name "aws_secretkey" -ResourceGroupName $rgname -AutomationAccountName $automationaccount).Value
# The region associated with your bucket e.g. eu-west-1, us-east-1 etc. (see http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-regions)
$region = (Get-AzureRMAutomationVariable -Name "aws_region" -ResourceGroupName $rgname -AutomationAccountName $automationaccount).Value
# The name of your S3 Bucket
$bucket = (Get-AzureRMAutomationVariable -Name "aws_logbucket" -ResourceGroupName $rgname -AutomationAccountName $automationaccount).Value
# getting Azure storage account key
$fullkeys = Get-AzureRMStorageAccountKey -StorageAccountName $StorageAccountName -ResourceGroupName $rgname
# the script will be using primary key
$key = $fullkeys.Key1 # this differs to the Get-AzureStorageAccountKey cmdlet's output!
# getting storage account content
$context = New-AzureStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $key
#---- Identify new files ---
$objects = Get-S3Object -BucketName $bucket -KeyPrefix $keyPrefix -AccessKey $accessKey -SecretKey $secretKey -Region $region | Where-Object -Property StorageClass -EQ -Value "STANDARD_IA"
$archive=Get-AzureStorageBlob -Container $refcontainer -Context (New-AzureStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $key )
$queue=Get-AzureStorageBlob -Container $destcontainer -Context (New-AzureStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $key )
$existingobjects = $archive + $queue
$s3list = (Sort-Object -InputObject $objects -property Key).Key
$bloblist = (Sort-Object -InputObject $existingobjects -property Name).Name
if ($bloblist.Count -eq 0)
{
$filestoget = $s3list
}
else
{
$filestoget = Compare-Object -ReferenceObject $bloblist -DifferenceObject $s3list -PassThru
}
$filestoget = $filestoget | Select-Object -First $processNfiles
#---- Write files to storage -------
ForEach -parallel ($file in $filestoget)
{
try{
Copy-S3Object -Key $file -LocalFile $file -BucketName $bucket -AccessKey $accessKey -SecretKey $secretKey -Region $region -ErrorAction Continue
Set-AzureStorageBlobContent -File $file -Container $destcontainer -Context (New-AzureStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $key) -ErrorAction Continue
Remove-Item -Path $file
}catch{}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.