Last active
March 23, 2021 09:58
-
-
Save rakisaionji/337e32978f3108a9d0de084a76acdc28 to your computer and use it in GitHub Desktop.
Compute Amazon S3 ETag for a local file. Converted from original PowerShell script at: https://gist.github.com/fireflycons/de3a5255b77d94292c5ad43c602b6d7d
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
partial class Program | |
{ | |
/* | |
.SYNOPSIS | |
Compute Amazon S3 ETag for a local file | |
.DESCRIPTION | |
ETags represent a hash of the content of a file stored in S3. | |
Comaparing ETags can be used to determine | |
- If a file in S3 is the same as one you are going to upload | |
- Following an upload, whether the file was successfully uploaded. | |
.PARAMETER Path | |
Path to file to compute the ETag for | |
.PARAMETER MinimumPartSize | |
Minimum size in bytes of a part for mulitpart upload (default 5MB as per Write-S3Object) | |
.PARAMETER MinimumSizeBeforeMultipartUpload | |
Minimum file size, above which multipart upload will be selected (default 16MB as per Write-S3Object) | |
.OUTPUTS | |
[string] The computed ETag | |
.NOTES | |
The defaults for MinimumPartSize and MinimumSizeBeforeMultipartUpload are based on the values used by Write-S3Object. | |
Other clients (eg. AWS CLI) may use different values, and application code may use any values as determined by the application's developers. | |
For a single part upload, the ETag is simply the MD5 of the whole file converted to hex string. | |
For multipart, concatentate the binary MD5 hash of each part, then compute MD5 hash of the concatenated binary data, convert to hex string and append the number of parts to this. | |
*/ | |
static void GetS3ETagForLocalFile | |
( | |
string filePath, | |
long minimumPartSize = 5 * 1024 * 1024, | |
long minimumSizeBeforeMultipartUpload = 16 * 1024 * 1024 | |
) | |
{ | |
// CLR objects | |
System.IO.FileStream inputStream = null; | |
System.Security.Cryptography.HashAlgorithm md5 = null; | |
System.IO.MemoryStream hashBuffer = null; | |
try | |
{ | |
// Get file size | |
var fileSize = (new System.IO.FileInfo(filePath)).Length; | |
// Calculate part size | |
var partSize = (long)(Math.Max(Math.Ceiling((double)fileSize / 10000.0), minimumPartSize)); | |
// Calculate number of parts | |
var numberOfParts = (int)(fileSize / partSize); | |
if (fileSize % partSize > 0) | |
{ | |
++numberOfParts; | |
} | |
// Counter for number of parts read so far | |
var partsRead = 0; | |
// Buffer to read file parts into | |
var buf = new byte[partSize]; | |
// Will hold the final hash result. | |
byte[] md5Hash = null; | |
// Create MD5 hash algorithm | |
md5 = System.Security.Cryptography.HashAlgorithm.Create("MD5"); | |
// Open input file for reading | |
inputStream = System.IO.File.OpenRead(filePath); | |
// Stream to write part hashes to as we compute them | |
hashBuffer = null; | |
// Is file large enough to do a multipart upload? | |
if (fileSize > minimumSizeBeforeMultipartUpload) | |
{ | |
// File name for status messages | |
var filename = System.IO.Path.GetFileName(filePath); | |
// Create the stream that will concatenate chuck hashes. | |
hashBuffer = new System.IO.MemoryStream(); | |
// Counter for number of bytes read so far | |
var bytesRead = 0; | |
if (Environment.UserInteractive) | |
{ | |
// Show progress if running at the command line | |
WriteProgress("Computing ETag", filename, 0); | |
} | |
// Read each part | |
while ((bytesRead = inputStream.Read(buf, 0, buf.Length)) != 0) | |
{ | |
if (Environment.UserInteractive && partsRead % 10 == 0) | |
{ | |
// Show progress every 10 parts read if running at the command line | |
WriteProgress("Computing ETag", $"{filename} - Part {partsRead}/{numberOfParts}", partsRead * 100 / numberOfParts); | |
} | |
++partsRead; | |
// Hash the part | |
var partMd5Hash = md5.ComputeHash(buf, 0, bytesRead); | |
// and write to the buffer. | |
hashBuffer.Write(partMd5Hash, 0, partMd5Hash.Length); | |
} | |
// Seek to start of the buffer | |
hashBuffer.Seek(0, System.IO.SeekOrigin.Begin); | |
// and compute the hash of hashes | |
md5Hash = md5.ComputeHash(hashBuffer); | |
if (Environment.UserInteractive) | |
{ | |
// Remove progress bar if running at the command line | |
WriteProgress("Computing ETag", true); | |
} | |
} | |
else | |
{ | |
// Single part upload - ETag is just MD5 of the whole file. | |
partsRead = 1; | |
md5Hash = md5.ComputeHash(inputStream); | |
} | |
// Build the ETag | |
var eTag = System.BitConverter.ToString(md5Hash).Replace("-", string.Empty).ToLower(); | |
if (partsRead > 1) | |
{ | |
// For multipart ETag, append the number of parts. | |
eTag = eTag + $"-{partsRead}"; | |
} | |
// Emit result | |
Console.WriteLine(eTag); | |
} | |
finally | |
{ | |
// Dispose any IDisposable CLR objects created | |
var disposables = new IDisposable[] { inputStream, md5, hashBuffer }; | |
foreach (var disposable in disposables) | |
{ | |
if (disposable != null) | |
{ | |
disposable.Dispose(); | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Working executable.
Get-S3ETagForLocalFile.exe