Skip to content

Instantly share code, notes, and snippets.

@erincerys
Created February 16, 2016 20:52
Show Gist options
  • Save erincerys/e11db4ae8fcf9a6b7401 to your computer and use it in GitHub Desktop.
Save erincerys/e11db4ae8fcf9a6b7401 to your computer and use it in GitHub Desktop.
Script to collect response times and catch errors from an S3-like service - useful to benchmark performance over time
access_key =
secret_key =
check_ssl_certificate = False
cloudfront_host = IP_OR_FQDN:80
host_base = s3.cloudian.com:18080
host_bucket = %(bucket)s.FQDN:80
# host_bucket = IP:80/%(bucket)s
signature_v2 = False
simpledb_host = IP_OR_FQDN:80
website_endpoint = http://%(bucket)s.FQDN:80/
use_https = False
access_token =
add_encoding_exts =
add_headers =
bucket_location = US
ca_certs_file =
cache_file =
default_mime_type = binary/octet-stream
delay_updates = False
delete_after = False
delete_after_fetch = False
delete_removed = False
dry_run = False
enable_multipart = True
encoding = UTF-8
encrypt = False
expiry_date =
expiry_days =
expiry_prefix =
follow_symlinks = False
force = False
get_continue = False
gpg_command = None
gpg_decrypt = %(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
gpg_encrypt = %(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
gpg_passphrase =
guess_mime_type = True
human_readable_sizes = False
ignore_failed_copy = False
invalidate_default_index_on_cf = False
invalidate_default_index_root_on_cf = True
invalidate_on_cf = False
list_md5 = False
log_target_prefix =
max_delete = -1
mime_type =
multipart_chunk_size_mb = 15
preserve_attrs = True
progress_meter = True
proxy_host =
proxy_port = 0
put_continue = False
recursive = False
recv_chunk = 4096
reduced_redundancy = False
restore_days = 1
send_chunk = 4096
server_side_encryption = False
skip_existing = False
socket_timeout = 300
urlencoding_mode = normal
use_mime_magic = True
verbosity = INFO
website_error =
website_index = index.html
#!/bin/bash
# test_s3api.sh
#
# PURPOSE
# We needed to determine if our infrastrucutre and installation of software that emulates S3 was sound
# This script takes benchmarks of some API operations on an endless loop and writes that to a log file
# The log file can be ingested into a simple database for analytics
#
# DEPENDENCIES
# - s3cmd
# - python-magic (for mimetypes)
#
# QUERYING IN MYSQL
#
# Prepare
#
# Shave off microsecond resolution on bench results
# $ cat s3api_responses.csv | perl -pe 's/(\d:\d{2})\.\d{2,3}/$1/' > s3api_responses.transformed.csv
# Remove timezone from occurrence timestamp
# $ cat s3api_responses.transformed.csv | perl -pe 's/\+\d{2}:\d{2}//' | sponge s3api_responses.transformed.csv
#
# Load
# create table s3api_responses (
# `id` INT UNSIGNED NULL PRIMARY KEY AUTO_INCREMENT,
# `time` TIMESTAMP NOT NULL DEFAULT 0,
# `cmdshort` VARCHAR(10) NOT NULL DEFAULT '',
# `cmdlong` VARCHAR(124) NOT NULL DEFAULT '',
# `bench` TIME NOT NULL DEFAULT 0,
# `exitcode` TINYINT(1) UNSIGNED NULL,
# `httpcode` SMALLINT UNSIGNED NULL
# ) ENGINE=INNODB;
# create index `response_time` on s3api_responses (`bench`);
# create index `composite_bench` on s3api_responses (`bench`,`httpcode`);
# load data local infile '/home/user/bench.csv'
# ignore
# into table s3api_responses
# character set 'utf8'
# fields optionally enclosed by '"' terminated by ','
# lines terminated by '\n'
# (`time`, `cmdshort`, `cmdlong`, `bench`, `exitcode`, `httpcode`);
#
# Run analytics
#
# -- HTTP error count
# select count(1) from s3api_responses where httpcode > 500;
# -- Count of and average of long running queries (>3s)
# select count(1) as over_3sec, sec_to_time(avg(time_to_sec(bench))) as avg_long_running from s3api_responses where bench > '00:03' order by bench desc;
# -- Average runtime of all in set and runtime of largest outlier
# select sec_to_time(avg(time_to_sec(bench))) as avg_of_all, sec_to_time(max(time_to_sec(bench))) as max_runtime from s3api_responses;
# The CSV log file to write to
logfile="$HOME/s3api_responses.log"
s3cmd=`which s3cmd`
s3cmd_config="$HOME/.s3cfg"
# The parameters that will be sent to s3cmd and thus, what ops we're benchmarking in the S# API
s3args=('ls s3://test/' 'del s3://test/index.html' "put $HOME/index.html s3://test/index.html" "get s3://test/index.html $HOME/index-get.html" 'info s3://test/index.html')
timecmd=`which time`
# Set the /usr/bin/time format to output the command executed, the real time it took and the exit code of the run
timeargs="-f \"%C\",%E,%x"
cmdheader="$timecmd $timeargs $s3cmd -c $s3cmd_config"
rm $HOME/index-get.html
while [ 1 ] ; do
iter=0
while [ $iter -le $((${#s3args[@]} - 1)) ] ; do
# Write the current date in UTC, enclosed in double quotes and sufficed with a comma to conform to the CSV format we're going for in the log
echo -nE "\"$(date --rfc-3339=seconds --utc)\"," >> $logfile
s3action=`echo -n ${s3args[$iter]} | cut -d' ' -f1`
echo -n "$s3action," >> $logfile
# Run our benchmark command and sink the actual output of the command to oblivion and the resulting timing to a variable
line=`{ $($cmdheader ${s3args[$iter]} 1>/dev/null 2>&$err); } {err}>&1`
echo -n "${line}," | grep -Eo '".*",.*,.*$' | tr -d "\n" >> $logfile
# Set the fail type, if applicable (these are equivalent to HTTP response codes)
if [[ "$line" =~ An\ unexpected\ error\ has\ occurred ]] ; then
code=600 # s3cmd fucked up
elif [[ "$line" =~ Service.?Unavailable ]] ; then
code=503 # S3 api unavailable
elif [[ "$line" =~ Internal.?Server.?Error ]] ; then
code=500 # Something bad happened
elif [[ "$line" =~ Slow.?Down ]] ; then
code=503 # QOS throttled us
elif [[ "${str:$line:1}" -eq 1 ]] ; then
code=520 # Unknown error
else
code=200 # OK
fi
echo $code >> $logfile
# If the API call didn't fail, move on to the next one, otherwise, repeat
if [[ $code -eq 200 ]] ; then
iter=$(($iter+1))
fi
sleep 1
done
rm ${HOME}/index-get.html
sleep 1
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment