Skip to content

Instantly share code, notes, and snippets.

@metadaddy
Created October 14, 2023 00:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save metadaddy/137eae5d96981f4588760b5d9e385ee6 to your computer and use it in GitHub Desktop.
Save metadaddy/137eae5d96981f4588760b5d9e385ee6 to your computer and use it in GitHub Desktop.
Simple Python app to benchmark file uploads using the S3 PutObject API
# MIT License
#
# Copyright (c) 2023 Backblaze, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import argparse
import os
import boto3
import boto3.session
import random
import sys
import datetime
import time
from statistics import mean, stdev
from dotenv import load_dotenv
from humanfriendly import parse_size, InvalidSize
# Nano = 1e-9, milli = 1e-3
NANO_TO_MILLI = 1000000
parser = argparse.ArgumentParser(description='Test PutObject performance')
parser.add_argument('key', type=str, help='object key')
parser.add_argument('size', type=str, help='object size - e.g. 256K, 1MiB etc')
parser.add_argument('iterations', type=int, help='number of iterations')
parser.add_argument('--reuse', action='store_true', help='reuse connection')
parser.add_argument('--csv', action='store_true', help='CSV output')
parser.add_argument('-e', '--envfile', nargs='?', type=str, help='environment file')
parser.add_argument('-d', '--delay', nargs='?', type=float, default=10, help='delay in ms')
parser.add_argument('-o', '--outfile', nargs='?', type=str, help='output file')
args = parser.parse_args()
load_dotenv(args.envfile)
try:
num_bytes = parse_size(args.size)
except InvalidSize:
print(f'Invalid size: {args.size}\n', file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
if args.outfile:
# Write to a file
try:
# mode 'x' opens the file for exclusive creation,
# failing if the file already exists
output_fd = open(args.outfile, 'x')
if args.csv:
# Write CSV header to newly created file
output_fd.write('timestamp,endpoint,iterations,block_size,reuse,delay,total,mean,stdev\n')
except:
# Append to file
output_fd = open(args.outfile, 'a')
else:
# Write to stdout
output_fd = sys.stdout
file_data = random.randbytes(num_bytes)
s3 = boto3.client('s3')
timestamp = datetime.datetime.now(tz=datetime.timezone.utc).isoformat()
endpoint = s3.meta.endpoint_url
if not args.csv:
print(f"Start time: {timestamp}", file=output_fd)
print(f"Operating on endpoint: {endpoint}", file=output_fd)
print(f"Performing {args.iterations} iterations with {num_bytes} byte blocks, {'' if args.reuse else 'not '}reusing the connection, with {args.delay} ms delay", file=output_fd)
times_ns = []
for i in range(args.iterations):
if not args.csv:
print(".", end='', flush=True, file=output_fd)
# Start the clock
start = time.time_ns()
s3.put_object(
Bucket=os.environ['BUCKET'],
Key=args.key,
Body=file_data
)
# Stop the clock and save the elapsed time
times_ns.append(time.time_ns() - start)
time.sleep(args.delay/1000)
if not args.reuse:
s3 = boto3.client('s3')
total_ns = sum(times_ns)
mean_ns = mean(times_ns)
stdev_ns = stdev(times_ns, mean_ns)
if args.csv:
output_fd.write(f'{timestamp},{endpoint},{args.iterations},{num_bytes},{args.reuse},{args.delay},{round(total_ns/NANO_TO_MILLI)},{round(mean_ns/NANO_TO_MILLI)},{round(stdev_ns/NANO_TO_MILLI)}\n')
else:
print(f"\nTotal time: {round(total_ns/NANO_TO_MILLI)} ms, mean = {round(mean_ns/NANO_TO_MILLI)} ms, stdev = {round(stdev_ns/NANO_TO_MILLI)} ms", file=output_fd)
if args.outfile:
output_fd.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment