kyle0r/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Documentation here.

  
## install.sh
#!/bin/bash
# :set ts=2 sw=2 expandtab number autoindent|colorscheme darkblue

clear

set -u
set -o pipefail

# https://superuser.com/a/1183819
[ "systemd" == "$(ps --no-headers -o comm 1)" ] || { echo "exiting. AFCR depends on systemd for user services and timers."; exit 1; }

systemctl --user 1>/dev/null 2>&1 || { echo "exiting. issue detected with systemdctl --user commands"; exit 1; }

detected_os=$(hostnamectl |grep Operating|cut -d' ' -f5-|tr '[:upper:]' '[:lower:]')

function is_debian_or_ubuntu() {
  if [[ "$detected_os" =~ ubuntu ]] || [[ "$detected_os" =~ debian ]]; then
    return 0
  fi
  return 1
}

#is_debian_or_ubuntu || { echo "exiting. unsupported os detected.";  exit 1; }

[ 0 == "$UID" ] && { echo "exiting. root user detected. It is recommended to run this script with a non-root user."; exit 1; }

# https://stackoverflow.com/a/35412000
function is_variable_set() {
  declare -p "$1" &>/dev/null
}

# systemd service directives ExecStart and WorkingDirectory do not permit dynamic paths.
# Therefore implementation logic expects there to be a constant path $HOME/nets-afcr-s3/data.
# Therefore symlinks are used to provide systemd with constant paths.
# an alt would be to find+replace on the .service during install, more complex?

SYSTEMD_AFCR_INSTALL_PATH="${HOME}/nets-afcr-s3-systemd-path"
INSTALL_PATH="${INSTALL_PATH:-${HOME}/nets-afcr-s3}"
DEFAULT_DATA_PATH=/dev/shm/nets-afcr-s3
DATA_PATH="${DATA_PATH:-${DEFAULT_DATA_PATH}}"

# remove any single trailing slash
INSTALL_PATH=${INSTALL_PATH%/}
DATA_PATH=${DATA_PATH%/}

mkdir -p "$INSTALL_PATH"
[ -d "$INSTALL_PATH" ] || { echo "exiting. install path not a dir."; exit 1; }

[ -h "${INSTALL_PATH}/data" ] && rm "${INSTALL_PATH}/data" 2>/dev/null

mkdir -p "$DATA_PATH"
[ -d "$DATA_PATH" ] || { echo "exiting. data path not found."; exit 1; }

# create symlink if DATA_PATH is not the same as the symlink path.
[ "${DATA_PATH}" != "${INSTALL_PATH}/data" ] && ln -s "${DATA_PATH}" "$INSTALL_PATH/data" 2>/dev/null

[ -e "$INSTALL_PATH/data" ] || { echo "exiting. $INSTALL_PATH/data does not exist."; exit 1; }

# create/update the systemd constant path
ln -Tfs "$INSTALL_PATH" "$SYSTEMD_AFCR_INSTALL_PATH" 2>/dev/null

# generic pause
function generic_pause() {
  read -rn1 -p 'Press CTRL+C to abort OR press any key to continue...'
  printf -- "\\n\\n"
}

dpkg_q=$(command -v dpkg-query)
apt=$(command -v apt)
dnf=$(command -v dnf)
yum=$(command -v yum)

if [ -x "$dnf" ]; then
  pkg_mgr="$dnf"
elif [ -x "$yum" ]; then
  pkg_mgr="$yum"
elif [ -x "$apt" ]; then
  pkg_mgr="$apt"
elif false; then
  : # e.g. pacman for arch
else
  echo "exiting. unable to determine the system package manager."
  exit 1
fi

cat <<EOF

============================================
Welcome to the AFCR data consumer installer.
============================================

Docs: https://coda.io/@ff0/afcr-automated-fraud-and-chargeback-reporting

This script is designed to be re-run as many times as needed.
It also acts like an updater; latest versions will be installed.

The latest file(s) will be downloaded from the s3 bucket to DATA_PATH.
Existing files will be downloaded/overwritten if size or modified time differ.

INSTALL_PATH=$INSTALL_PATH
DATA_PATH=$DATA_PATH

NOTE: These env vars can be set prior to installer launch.

EOF

if [ "$DEFAULT_DATA_PATH" == "$DATA_PATH" ]; then
cat <<EOF
ATTENTION: the DATA_PATH default is non-persistent shared memory.
VOLATILE: files in memory will not survive a reboot.
INFOSEC: This is a security-by-design best practice.

EOF
fi

cat <<EOF
This installer aims to be as lightweight an maintainable as possible.
It does NOT use the official aws cli util because we only need simple s3
capabilities, aws cli is overkill and >120MB.
This installer does use the official aws boto3 python module.

This installer performs the following:

0) check for prerequisites and provide feedback on detected issues.
1) install prerequisites like python3, python3-pip and python3-venv.
2) setup a python virtual env in INSTALL_PATH to for an isolated and
   maintainable install.
   read more about phyton venv here: https://docs.python.org/3/library/venv.html
3) will prompt for your AWS S3 bucket details and credentials.
4) download the latest s3-helper.py from GitHub.
5) download the latest data from your bucket to the DATA_PATH (not a full sync).
6) install systemd --user services and timers for scheduled download of the
   latest data.


EOF

generic_pause

clear

function is_installed() {
  if [ -x "$dpkg_q" ]; then
    # shellcheck disable=SC2016
    if "$dpkg_q" -W --showformat='${db:Status-Status}' "$1" 2>/dev/null | grep --quiet '^installed$'; then
      return 0
    fi
  elif [ -x "$dnf" ] || [ -x "$yum" ]; then
    if "$pkg_mgr" --quiet list installed "$1" 1>/dev/null 2>&1; then
      return 0
    fi
  else
    echo "exiting. cannot determine how to check for installed packages. cannot continue."; exit 1
  fi
  return 1
}

function check_sudo_is_installed() {
  sudo=$(command -v sudo)
  [ -x "$sudo" ] || { echo "exiting. sudo not found"; exit 1; }
  $sudo uptime 1>/dev/null 2>&1 || { echo "exiting. sudo rights issue detected."; exit 1; }
}

function install_prerequisites() {
  #set -x

  if is_variable_set pkg_to_install && [ "${#pkg_to_install[@]}" -gt 0 ]; then

    check_sudo_is_installed

    if [ -x "$apt" ] || [ -x "$yum" ] || [ -x "$dnf" ]; then

      if ! "$sudo" "$pkg_mgr" install "${pkg_to_install[@]}"; then
        echo "exiting. something went wrong with installing prerequisite packages."
        printf "package list: %s" "${pkg_to_install[@]}"
        exit 1
      fi
    elif false; then
      :
    else
      echo "exiting. cannot determine how to install packages. cannot continue."
    fi

  fi # end if some pkg to install

  #set +x
}

curl=$(command -v curl)
if ! [ -x "$curl" ]; then
  if ! is_installed curl; then
    pkg_to_install+=( curl )
  fi
fi

if ! is_installed python3 ; then
  pkg_to_install+=( python3 python3-pip )
elif ! is_installed python3-pip ; then
  pkg_to_install+=( python3-pip )
fi

if is_debian_or_ubuntu; then
  is_installed python3-venv || pkg_to_install+=( python3-venv )
fi

install_prerequisites

###  # python3-venv exist in repo but not-installed.
###  if ! is_installed python3-venv; then
###
###    "$sudo" "$apt" install python3-pip python3-venv || { echo "exiting. something went wrong with installing python3 and python3-venv."; exit 1;}
###
####  # python3-virtualenv is not an available package, so fallback to pip
####  elif is_not_installed python3-virtualenv; then
####
####    "$sudo" "$apt" install python3-pip python3-virtualenv || { echo "exiting. something went wrong with installing python3 and python3-pip."; exit 1;}
####
###  else
###    echo "exiting. cannot determine prerequisite install strategy."
###  fi


curl=$(command -v curl); [ -x "$curl" ] || { echo "exiting. curl not found. please install it."; exit 1; }
python=$(command -v python3); [ -x "$python" ] || { echo "exiting. python3 not found. cannot continue."; exit 1; }

"$python" -m pip help 1>/dev/null 2>&1 || { echo "exiting. something went wrong checking if python3-pip is available."; exit 1; }

"$python" -m venv -h 1>/dev/null 2>&1 || { echo "exiting. something went wrong checking if python3-venv is available."; exit 1; }

"$python" -m venv "${INSTALL_PATH}/venv" || { echo "exiting. something went wrong creating python venv."; exit 1; }

# shellcheck source=/dev/null
source "${INSTALL_PATH}/venv/bin/activate"

# now inside the venv
# execute individually in case tmp space is small/low
python3 -m pip install --upgrade pip || { echo "exiting. something went wrong installing/updating python pip."; exit 1; }
python3 -m pip install --upgrade boto3 || { echo "exiting. something went wrong installing/updating python package boto3."; exit 1; }

printf "\\nPlease enter the bucket info you have been provided:\\n\\n"
set +u
while [ -z "$BUCKET" ]; do read -rp "enter aws bucket name: " BUCKET; export BUCKET; echo; echo "value: $BUCKET"; done
while [ -z "$AWS_DEFAULT_REGION" ]; do read -rp "enter aws bucket region: " AWS_DEFAULT_REGION; export AWS_DEFAULT_REGION; echo; echo "value: $AWS_DEFAULT_REGION"; done

printf "\\nPlease copy/paste the aws credentails you have been provided:\\ninput will not be echoed.\\n\\n"
while [ -z "$AWS_ACCESS_KEY_ID" ]; do read -rsp "(no echo) enter aws access key id: " AWS_ACCESS_KEY_ID; export AWS_ACCESS_KEY_ID; printf '\nread bytes: %s\n' "$(echo -n "$AWS_ACCESS_KEY_ID"|wc -c)"; done
while [ -z "$AWS_SECRET_ACCESS_KEY" ]; do read -rsp "(no echo) enter aws secret access key: " AWS_SECRET_ACCESS_KEY; export AWS_SECRET_ACCESS_KEY; printf '\nread bytes: %s\n' "$(echo -n "$AWS_SECRET_ACCESS_KEY"|wc -c)"; done
set -u

curl --silent --show-error -L https://gist.githubusercontent.com/kyle0r/a6e29d2abf3da23e4c113c7897e5c351/raw/s3-helper.py > "${INSTALL_PATH}"/s3-helper.py
chmod +x "${INSTALL_PATH}/s3-helper.py"

[ -x "${INSTALL_PATH}/s3-helper.py" ] || { echo "exiting. s3-helper.ph not found or not executable."; exit 1; }

cd "$DATA_PATH" || { echo "exiting. could not cd to $DATA_PATH.."; exit 1; }

cat <<EOF

The base install is completed.

INSTALL_PATH=$INSTALL_PATH
DATA_PATH=$DATA_PATH

Now the systemd --user services and timers will be installed.

EOF

generic_pause

#set -x
# mkdir for systemd user service
mkdir -p ~/.config/systemd/user ~/.config/systemd/user/nets-afcr.service.d

# install systemd --user service
curl --silent --show-error -L https://gist.githubusercontent.com/kyle0r/a6e29d2abf3da23e4c113c7897e5c351/raw/nets-afcr.service > ~/.config/systemd/user/nets-afcr.service

cat <<EOF > ~/.config/systemd/user/nets-afcr.service.d/override.conf
[Service]
Environment=BUCKET=${BUCKET}
Environment=AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
Environment=AWS_ACCESS_KEY_ID='${AWS_ACCESS_KEY_ID}'
Environment=AWS_SECRET_ACCESS_KEY='${AWS_SECRET_ACCESS_KEY}'
EOF

# reload --user systemd
systemctl --user daemon-reload

clear

cat <<EOF

systemd --user service install completed.

service will now be started, pay attention to any errors.

The following journal entry means the install and start was successful:

"Started AFCR service." or "Finished AFCR service." or similar.

EOF

generic_pause

# do a one-time functional check service start, and follow/tail the journal to verify service starts OK.
systemctl --user start nets-afcr.service ; journalctl --no-pager --user-unit nets-afcr.service

printf "\\n\\n%s\\n\\n" "if the install and service start was successful please continue."
generic_pause

# install systemd --user timer
curl --silent --show-error -L https://gist.githubusercontent.com/kyle0r/a6e29d2abf3da23e4c113c7897e5c351/raw/nets-afcr.timer > ~/.config/systemd/user/nets-afcr.timer

# reload --user systemd
systemctl --user daemon-reload

# enable the timer, so it will survive reboots
systemctl --user enable nets-afcr.timer
# start the timer
systemctl --user start nets-afcr.timer

# check the timer is configued as expected
systemctl --user --all list-timers

cat <<EOF

the systemd --user timer is now installed.

You should see it listed above. You may wish to adjust the timer schedule.

$ systemctl --user edit --full nets-afcr.timer


This script will now exit.

Tip: at your convenience reboot the system to verify the timer is active and configured as expected.
To check timers:

$ systemctl --all --user list-timers

EOF


## nets-afcr.service
[Unit]
Description=AFCR service

[Service]
Type=oneshot
WorkingDirectory=-%h/nets-afcr-s3-systemd-path/data
ExecStartPre=/bin/mkdir -p /dev/shm/nets-afcr-s3
ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/fraud/daily
ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/fraud/monthly
ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/cbk/daily
ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/cbk/monthly

[Install]
WantedBy=default.target

## nets-afcr.timer
[Unit]
Description=AFCR timer
Requires=nets-afcr.timer

[Timer]
Unit=nets-afcr.service
# daily
OnCalendar=*-*-* 03:00:00 UTC

[Install]
WantedBy=timers.target

## s3-helper.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""python productivity tool for downloading files from s3 buckets"""

"""
vim settings suggestion:
:set ts=2 sw=2 expandtab number autoindent|colorscheme darkblue
"""

import os, sys
import boto3
import argparse
import logging

ENV = os.getenv

ARGS = None
ARGV = sys.argv.copy()
SCRIPT_NAME = os.path.basename(ARGV.pop(0))

COMMANDS = [\
             ('latest', 'download_latest_object')\
             ,('sync', 'sync_prefix')\
           ]
LOGLEVELS = [\
              ('DEBUG', logging.DEBUG), ('INFO', logging.INFO), \
              ('WARNING', logging.WARNING), ('ERROR', logging.ERROR), \
              ('CRITICAL', logging.CRITICAL) \
            ]
FORMAT = '%(levelname)s\t%(asctime)-15s\t%(message)s'
logger = logging.getLogger(SCRIPT_NAME)
# set the root level to highest mode, and control the levels
# with the specific handlers. The root controls the MAX logging
# level, so if its set to wARNING, then WARNING is the MAX level.
# https://stackoverflow.com/q/17668633
logger.setLevel(logging.DEBUG)
LOG_FORMATTER = logging.Formatter(FORMAT)
# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.WARNING)
ch.setFormatter(LOG_FORMATTER)
logger.addHandler(ch)
# simple var to track current log level
currentLogLevel = logging.WARNING

def download_latest_object():
  """
  cite: https://stackoverflow.com/a/53423319
  """

  logger.info('attempting to download the latest file from s3://%s/%s' % (ARGS.bucket, ARGS.prefix) )
  get_last_modified = lambda obj: int(obj['LastModified'].strftime('%s'))
  s3 = boto3.client('s3', region_name=ARGS.region)
  paginator = s3.get_paginator( "list_objects_v2" )
  # Delimiter='/' prevents recursion
  # doc: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Paginator.ListObjectsV2.paginate
  page_iterator = paginator.paginate(Bucket=ARGS.bucket, Delimiter='/', Prefix=ARGS.prefix)
  for page in page_iterator:
    if "Contents" in page:
      latest_obj = [obj for obj in sorted( page["Contents"], key=get_last_modified)][-1]

  if not 'latest_obj' in locals() or latest_obj['Key'].endswith('/') or os.path.isdir(latest_obj['Key']):
    logger.warning('exiting. no object(s) detected at prefix s3://%s/%s' % (ARGS.bucket, ARGS.prefix) )
    return

  logger.info('attempting to download latest detected key: %s LastModified: %s' % (latest_obj['Key'], latest_obj['LastModified']))

  download_object_if_needed(latest_obj, s3)

def download_object(obj, s3):
  """
  download an s3 object, making dirs as required.
  a managed transfer which will perform a multipart download in multiple threads if necessary.
  ref: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_fileobj
  """
  dirname = os.path.dirname(obj['Key'])
  path_exists = os.path.exists(dirname)
  if not '' == dirname and not path_exists and not os.path.isdir(dirname):
    os.makedirs(dirname)

  with open(obj['Key'], 'wb') as f:
    logger.info('writing local key: %s' % obj['Key'])
    s3.download_fileobj(ARGS.bucket, obj['Key'], f)

  t = int(obj['LastModified'].strftime('%s'))
  os.utime( obj['Key'], times=(t, t) )

def download_object_if_needed(obj, s3):
  s3_key = obj['Key']
  local_exists = os.path.exists(s3_key)
  s3_mtime = int(obj['LastModified'].strftime('%s'))
  s3_size = int(obj['Size'])
  logger.info('key: %s, exists local: %s, s3 timestamp: %s, s3 size: %i' % ( s3_key, local_exists, s3_mtime, s3_size))
  if local_exists:
    local_mtime = int(os.path.getmtime(s3_key))
    local_size = int(os.path.getsize(s3_key))
    logger.info('local key: %s, local mtime: %i, local size: %i' % ( s3_key, local_mtime, local_size))

    if local_mtime == s3_mtime and local_size == s3_size:
      logger.info('local key: %s already exists with idential timestamp and size, skipping.' % s3_key)
    else:
      download_object(obj, s3)

  else:
    download_object(obj, s3)


def sync_prefix():
  """
  boto3 does not have an s3 sync method like the aws cli.
  here is a simple rsync-like implementation.
  ref: https://github.com/boto/boto3/issues/358
  logic: does s3 and local modified time and size differ? yes=overwrite no=skip.
  empty remote prefixes are not created locally.

  it would be possible to update to logic to also compare checksums
  ref: https://zihao.me/post/calculating-etag-for-aws-s3-objects/
  """

  logger.info('attempting to sync src s3://%s/%s to local dst' % (ARGS.bucket, ARGS.prefix) )
  s3 = boto3.client('s3', region_name=ARGS.region)
  paginator = s3.get_paginator( "list_objects_v2" )
  page_iterator = paginator.paginate(Bucket=ARGS.bucket, Delimiter='', Prefix=ARGS.prefix)
  for page in page_iterator:
    if "Contents" in page:
      for obj in page["Contents"]:
        if not obj['Key'].endswith('/'):
          download_object_if_needed(obj, s3)

def setLogLevel(newLevel, log_object):
  for level in LOGLEVELS:
    lvl, val = level
    if newLevel == lvl:
      log_object.setLevel(val)
      globals()['currentLogLevel'] = val
      return

def main():

  parser = argparse.ArgumentParser(\
    description='''\
python productivity tool for downloading files from s3 buckets.

files are downloaded to the current working dir.
any relative prefix paths will be created automatically.
multipart objects are natively supported  supported.
logic: does s3 and local modified time and size differ? yes=overwrite no=skip.

the "latest" command is non-recursive.
the "sync" command is recursive.
both support --prefix.

I suggest to use env vars or cfg files to set the required sensitive config vars.
The script requires at least AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY or equivilent cfg file.
cite: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables
cite: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-a-configuration-file
'''
    ,epilog='''Author: Kyle M <kmiln@nets.eu>'''
    ,formatter_class=argparse.RawDescriptionHelpFormatter)

  parser.add_argument('--log-level', metavar='<level>', dest='log_level', action='store',
    help='modify the console log level.', default='WARNING',
    choices=[key for key, val in LOGLEVELS])

  parser.add_argument('--command', metavar='<command>', dest='command', action='store',
    help='the command to execute.', required=True,
    choices=[key for key, val in COMMANDS])

  parser.add_argument('--bucket', metavar='<bucket>', dest='bucket', action='store',
    help='the source s3 bucket.', required=True)

  parser.add_argument('--region', metavar='<region>', dest='region', action='store',
    help='the source s3 bucket region.', required=True)

  parser.add_argument('--prefix', metavar='<prefix>', dest='prefix', action='store',
    help='bucket key prefix.', required=False, default='')

  global ARGS
  ARGS = parser.parse_args(ARGV)

  if ARGS.log_level:
    setLogLevel(ARGS.log_level, ch)

  if ARGS.prefix.startswith('/'):
    ARGS.prefix = ARGS.prefix[1:]
  elif ARGS.prefix and not ARGS.prefix.endswith('/'):
    ARGS.prefix += '/'

  for cmd in COMMANDS:
    key, val = cmd
    if ARGS.command == key:
      globals()[val]()
      break

  logger.info('"%s" command completed. exiting.' % ARGS.command)

if __name__ == "__main__":
  main()
	#!/bin/bash
	# :set ts=2 sw=2 expandtab number autoindent\|colorscheme darkblue

	clear

	set -u
	set -o pipefail

	# https://superuser.com/a/1183819
	[ "systemd" == "$(ps --no-headers -o comm 1)" ] \|\| { echo "exiting. AFCR depends on systemd for user services and timers."; exit 1; }

	systemctl --user 1>/dev/null 2>&1 \|\| { echo "exiting. issue detected with systemdctl --user commands"; exit 1; }

	detected_os=$(hostnamectl \|grep Operating\|cut -d' ' -f5-\|tr '[:upper:]' '[:lower:]')

	function is_debian_or_ubuntu() {
	if [[ "$detected_os" =~ ubuntu ]] \|\| [[ "$detected_os" =~ debian ]]; then
	return 0
	fi
	return 1
	}

	#is_debian_or_ubuntu \|\| { echo "exiting. unsupported os detected."; exit 1; }

	[ 0 == "$UID" ] && { echo "exiting. root user detected. It is recommended to run this script with a non-root user."; exit 1; }

	# https://stackoverflow.com/a/35412000
	function is_variable_set() {
	declare -p "$1" &>/dev/null
	}

	# systemd service directives ExecStart and WorkingDirectory do not permit dynamic paths.
	# Therefore implementation logic expects there to be a constant path $HOME/nets-afcr-s3/data.
	# Therefore symlinks are used to provide systemd with constant paths.
	# an alt would be to find+replace on the .service during install, more complex?

	SYSTEMD_AFCR_INSTALL_PATH="${HOME}/nets-afcr-s3-systemd-path"
	INSTALL_PATH="${INSTALL_PATH:-${HOME}/nets-afcr-s3}"
	DEFAULT_DATA_PATH=/dev/shm/nets-afcr-s3
	DATA_PATH="${DATA_PATH:-${DEFAULT_DATA_PATH}}"

	# remove any single trailing slash
	INSTALL_PATH=${INSTALL_PATH%/}
	DATA_PATH=${DATA_PATH%/}

	mkdir -p "$INSTALL_PATH"
	[ -d "$INSTALL_PATH" ] \|\| { echo "exiting. install path not a dir."; exit 1; }

	[ -h "${INSTALL_PATH}/data" ] && rm "${INSTALL_PATH}/data" 2>/dev/null

	mkdir -p "$DATA_PATH"
	[ -d "$DATA_PATH" ] \|\| { echo "exiting. data path not found."; exit 1; }

	# create symlink if DATA_PATH is not the same as the symlink path.
	[ "${DATA_PATH}" != "${INSTALL_PATH}/data" ] && ln -s "${DATA_PATH}" "$INSTALL_PATH/data" 2>/dev/null

	[ -e "$INSTALL_PATH/data" ] \|\| { echo "exiting. $INSTALL_PATH/data does not exist."; exit 1; }

	# create/update the systemd constant path
	ln -Tfs "$INSTALL_PATH" "$SYSTEMD_AFCR_INSTALL_PATH" 2>/dev/null

	# generic pause
	function generic_pause() {
	read -rn1 -p 'Press CTRL+C to abort OR press any key to continue...'
	printf -- "\\n\\n"
	}

	dpkg_q=$(command -v dpkg-query)
	apt=$(command -v apt)
	dnf=$(command -v dnf)
	yum=$(command -v yum)

	if [ -x "$dnf" ]; then
	pkg_mgr="$dnf"
	elif [ -x "$yum" ]; then
	pkg_mgr="$yum"
	elif [ -x "$apt" ]; then
	pkg_mgr="$apt"
	elif false; then
	: # e.g. pacman for arch
	else
	echo "exiting. unable to determine the system package manager."
	exit 1
	fi

	cat <<EOF

	============================================
	Welcome to the AFCR data consumer installer.
	============================================

	Docs: https://coda.io/@ff0/afcr-automated-fraud-and-chargeback-reporting

	This script is designed to be re-run as many times as needed.
	It also acts like an updater; latest versions will be installed.

	The latest file(s) will be downloaded from the s3 bucket to DATA_PATH.
	Existing files will be downloaded/overwritten if size or modified time differ.

	INSTALL_PATH=$INSTALL_PATH
	DATA_PATH=$DATA_PATH

	NOTE: These env vars can be set prior to installer launch.

	EOF

	if [ "$DEFAULT_DATA_PATH" == "$DATA_PATH" ]; then
	cat <<EOF
	ATTENTION: the DATA_PATH default is non-persistent shared memory.
	VOLATILE: files in memory will not survive a reboot.
	INFOSEC: This is a security-by-design best practice.

	EOF
	fi

	cat <<EOF
	This installer aims to be as lightweight an maintainable as possible.
	It does NOT use the official aws cli util because we only need simple s3
	capabilities, aws cli is overkill and >120MB.
	This installer does use the official aws boto3 python module.

	This installer performs the following:

	0) check for prerequisites and provide feedback on detected issues.
	1) install prerequisites like python3, python3-pip and python3-venv.
	2) setup a python virtual env in INSTALL_PATH to for an isolated and
	maintainable install.
	read more about phyton venv here: https://docs.python.org/3/library/venv.html
	3) will prompt for your AWS S3 bucket details and credentials.
	4) download the latest s3-helper.py from GitHub.
	5) download the latest data from your bucket to the DATA_PATH (not a full sync).
	6) install systemd --user services and timers for scheduled download of the
	latest data.


	EOF

	generic_pause

	clear

	function is_installed() {
	if [ -x "$dpkg_q" ]; then
	# shellcheck disable=SC2016
	if "$dpkg_q" -W --showformat='${db:Status-Status}' "$1" 2>/dev/null \| grep --quiet '^installed$'; then
	return 0
	fi
	elif [ -x "$dnf" ] \|\| [ -x "$yum" ]; then
	if "$pkg_mgr" --quiet list installed "$1" 1>/dev/null 2>&1; then
	return 0
	fi
	else
	echo "exiting. cannot determine how to check for installed packages. cannot continue."; exit 1
	fi
	return 1
	}

	function check_sudo_is_installed() {
	sudo=$(command -v sudo)
	[ -x "$sudo" ] \|\| { echo "exiting. sudo not found"; exit 1; }
	$sudo uptime 1>/dev/null 2>&1 \|\| { echo "exiting. sudo rights issue detected."; exit 1; }
	}

	function install_prerequisites() {
	#set -x

	if is_variable_set pkg_to_install && [ "${#pkg_to_install[@]}" -gt 0 ]; then

	check_sudo_is_installed

	if [ -x "$apt" ] \|\| [ -x "$yum" ] \|\| [ -x "$dnf" ]; then

	if ! "$sudo" "$pkg_mgr" install "${pkg_to_install[@]}"; then
	echo "exiting. something went wrong with installing prerequisite packages."
	printf "package list: %s" "${pkg_to_install[@]}"
	exit 1
	fi
	elif false; then
	:
	else
	echo "exiting. cannot determine how to install packages. cannot continue."
	fi

	fi # end if some pkg to install

	#set +x
	}

	curl=$(command -v curl)
	if ! [ -x "$curl" ]; then
	if ! is_installed curl; then
	pkg_to_install+=( curl )
	fi
	fi

	if ! is_installed python3 ; then
	pkg_to_install+=( python3 python3-pip )
	elif ! is_installed python3-pip ; then
	pkg_to_install+=( python3-pip )
	fi

	if is_debian_or_ubuntu; then
	is_installed python3-venv \|\| pkg_to_install+=( python3-venv )
	fi

	install_prerequisites

	### # python3-venv exist in repo but not-installed.
	### if ! is_installed python3-venv; then
	###
	### "$sudo" "$apt" install python3-pip python3-venv \|\| { echo "exiting. something went wrong with installing python3 and python3-venv."; exit 1;}
	###
	#### # python3-virtualenv is not an available package, so fallback to pip
	#### elif is_not_installed python3-virtualenv; then
	####
	#### "$sudo" "$apt" install python3-pip python3-virtualenv \|\| { echo "exiting. something went wrong with installing python3 and python3-pip."; exit 1;}
	####
	### else
	### echo "exiting. cannot determine prerequisite install strategy."
	### fi


	curl=$(command -v curl); [ -x "$curl" ] \|\| { echo "exiting. curl not found. please install it."; exit 1; }
	python=$(command -v python3); [ -x "$python" ] \|\| { echo "exiting. python3 not found. cannot continue."; exit 1; }

	"$python" -m pip help 1>/dev/null 2>&1 \|\| { echo "exiting. something went wrong checking if python3-pip is available."; exit 1; }

	"$python" -m venv -h 1>/dev/null 2>&1 \|\| { echo "exiting. something went wrong checking if python3-venv is available."; exit 1; }

	"$python" -m venv "${INSTALL_PATH}/venv" \|\| { echo "exiting. something went wrong creating python venv."; exit 1; }

	# shellcheck source=/dev/null
	source "${INSTALL_PATH}/venv/bin/activate"

	# now inside the venv
	# execute individually in case tmp space is small/low
	python3 -m pip install --upgrade pip \|\| { echo "exiting. something went wrong installing/updating python pip."; exit 1; }
	python3 -m pip install --upgrade boto3 \|\| { echo "exiting. something went wrong installing/updating python package boto3."; exit 1; }

	printf "\\nPlease enter the bucket info you have been provided:\\n\\n"
	set +u
	while [ -z "$BUCKET" ]; do read -rp "enter aws bucket name: " BUCKET; export BUCKET; echo; echo "value: $BUCKET"; done
	while [ -z "$AWS_DEFAULT_REGION" ]; do read -rp "enter aws bucket region: " AWS_DEFAULT_REGION; export AWS_DEFAULT_REGION; echo; echo "value: $AWS_DEFAULT_REGION"; done

	printf "\\nPlease copy/paste the aws credentails you have been provided:\\ninput will not be echoed.\\n\\n"
	while [ -z "$AWS_ACCESS_KEY_ID" ]; do read -rsp "(no echo) enter aws access key id: " AWS_ACCESS_KEY_ID; export AWS_ACCESS_KEY_ID; printf '\nread bytes: %s\n' "$(echo -n "$AWS_ACCESS_KEY_ID"\|wc -c)"; done
	while [ -z "$AWS_SECRET_ACCESS_KEY" ]; do read -rsp "(no echo) enter aws secret access key: " AWS_SECRET_ACCESS_KEY; export AWS_SECRET_ACCESS_KEY; printf '\nread bytes: %s\n' "$(echo -n "$AWS_SECRET_ACCESS_KEY"\|wc -c)"; done
	set -u

	curl --silent --show-error -L https://gist.githubusercontent.com/kyle0r/a6e29d2abf3da23e4c113c7897e5c351/raw/s3-helper.py > "${INSTALL_PATH}"/s3-helper.py
	chmod +x "${INSTALL_PATH}/s3-helper.py"

	[ -x "${INSTALL_PATH}/s3-helper.py" ] \|\| { echo "exiting. s3-helper.ph not found or not executable."; exit 1; }

	cd "$DATA_PATH" \|\| { echo "exiting. could not cd to $DATA_PATH.."; exit 1; }

	cat <<EOF

	The base install is completed.

	INSTALL_PATH=$INSTALL_PATH
	DATA_PATH=$DATA_PATH

	Now the systemd --user services and timers will be installed.

	EOF

	generic_pause

	#set -x
	# mkdir for systemd user service
	mkdir -p ~/.config/systemd/user ~/.config/systemd/user/nets-afcr.service.d

	# install systemd --user service
	curl --silent --show-error -L https://gist.githubusercontent.com/kyle0r/a6e29d2abf3da23e4c113c7897e5c351/raw/nets-afcr.service > ~/.config/systemd/user/nets-afcr.service

	cat <<EOF > ~/.config/systemd/user/nets-afcr.service.d/override.conf
	[Service]
	Environment=BUCKET=${BUCKET}
	Environment=AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
	Environment=AWS_ACCESS_KEY_ID='${AWS_ACCESS_KEY_ID}'
	Environment=AWS_SECRET_ACCESS_KEY='${AWS_SECRET_ACCESS_KEY}'
	EOF

	# reload --user systemd
	systemctl --user daemon-reload

	clear

	cat <<EOF

	systemd --user service install completed.

	service will now be started, pay attention to any errors.

	The following journal entry means the install and start was successful:

	"Started AFCR service." or "Finished AFCR service." or similar.

	EOF

	generic_pause

	# do a one-time functional check service start, and follow/tail the journal to verify service starts OK.
	systemctl --user start nets-afcr.service ; journalctl --no-pager --user-unit nets-afcr.service

	printf "\\n\\n%s\\n\\n" "if the install and service start was successful please continue."
	generic_pause

	# install systemd --user timer
	curl --silent --show-error -L https://gist.githubusercontent.com/kyle0r/a6e29d2abf3da23e4c113c7897e5c351/raw/nets-afcr.timer > ~/.config/systemd/user/nets-afcr.timer

	# reload --user systemd
	systemctl --user daemon-reload

	# enable the timer, so it will survive reboots
	systemctl --user enable nets-afcr.timer
	# start the timer
	systemctl --user start nets-afcr.timer

	# check the timer is configued as expected
	systemctl --user --all list-timers

	cat <<EOF

	the systemd --user timer is now installed.

	You should see it listed above. You may wish to adjust the timer schedule.

	$ systemctl --user edit --full nets-afcr.timer


	This script will now exit.

	Tip: at your convenience reboot the system to verify the timer is active and configured as expected.
	To check timers:

	$ systemctl --all --user list-timers

	EOF
	[Unit]
	Description=AFCR service

	[Service]
	Type=oneshot
	WorkingDirectory=-%h/nets-afcr-s3-systemd-path/data
	ExecStartPre=/bin/mkdir -p /dev/shm/nets-afcr-s3
	ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/fraud/daily
	ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/fraud/monthly
	ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/cbk/daily
	ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/cbk/monthly

	[Install]
	WantedBy=default.target
	[Unit]
	Description=AFCR timer
	Requires=nets-afcr.timer

	[Timer]
	Unit=nets-afcr.service
	# daily
	OnCalendar=--* 03:00:00 UTC

	[Install]
	WantedBy=timers.target
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	"""python productivity tool for downloading files from s3 buckets"""

	"""
	vim settings suggestion:
	:set ts=2 sw=2 expandtab number autoindent\|colorscheme darkblue
	"""

	import os, sys
	import boto3
	import argparse
	import logging

	ENV = os.getenv

	ARGS = None
	ARGV = sys.argv.copy()
	SCRIPT_NAME = os.path.basename(ARGV.pop(0))

	COMMANDS = [\
	('latest', 'download_latest_object')\
	,('sync', 'sync_prefix')\
	]
	LOGLEVELS = [\
	('DEBUG', logging.DEBUG), ('INFO', logging.INFO), \
	('WARNING', logging.WARNING), ('ERROR', logging.ERROR), \
	('CRITICAL', logging.CRITICAL) \
	]
	FORMAT = '%(levelname)s\t%(asctime)-15s\t%(message)s'
	logger = logging.getLogger(SCRIPT_NAME)
	# set the root level to highest mode, and control the levels
	# with the specific handlers. The root controls the MAX logging
	# level, so if its set to wARNING, then WARNING is the MAX level.
	# https://stackoverflow.com/q/17668633
	logger.setLevel(logging.DEBUG)
	LOG_FORMATTER = logging.Formatter(FORMAT)
	# create console handler
	ch = logging.StreamHandler()
	ch.setLevel(logging.WARNING)
	ch.setFormatter(LOG_FORMATTER)
	logger.addHandler(ch)
	# simple var to track current log level
	currentLogLevel = logging.WARNING

	def download_latest_object():
	"""
	cite: https://stackoverflow.com/a/53423319
	"""

	logger.info('attempting to download the latest file from s3://%s/%s' % (ARGS.bucket, ARGS.prefix) )
	get_last_modified = lambda obj: int(obj['LastModified'].strftime('%s'))
	s3 = boto3.client('s3', region_name=ARGS.region)
	paginator = s3.get_paginator( "list_objects_v2" )
	# Delimiter='/' prevents recursion
	# doc: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Paginator.ListObjectsV2.paginate
	page_iterator = paginator.paginate(Bucket=ARGS.bucket, Delimiter='/', Prefix=ARGS.prefix)
	for page in page_iterator:
	if "Contents" in page:
	latest_obj = [obj for obj in sorted( page["Contents"], key=get_last_modified)][-1]

	if not 'latest_obj' in locals() or latest_obj['Key'].endswith('/') or os.path.isdir(latest_obj['Key']):
	logger.warning('exiting. no object(s) detected at prefix s3://%s/%s' % (ARGS.bucket, ARGS.prefix) )
	return

	logger.info('attempting to download latest detected key: %s LastModified: %s' % (latest_obj['Key'], latest_obj['LastModified']))

	download_object_if_needed(latest_obj, s3)

	def download_object(obj, s3):
	"""
	download an s3 object, making dirs as required.
	a managed transfer which will perform a multipart download in multiple threads if necessary.
	ref: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_fileobj
	"""
	dirname = os.path.dirname(obj['Key'])
	path_exists = os.path.exists(dirname)
	if not '' == dirname and not path_exists and not os.path.isdir(dirname):
	os.makedirs(dirname)

	with open(obj['Key'], 'wb') as f:
	logger.info('writing local key: %s' % obj['Key'])
	s3.download_fileobj(ARGS.bucket, obj['Key'], f)

	t = int(obj['LastModified'].strftime('%s'))
	os.utime( obj['Key'], times=(t, t) )

	def download_object_if_needed(obj, s3):
	s3_key = obj['Key']
	local_exists = os.path.exists(s3_key)
	s3_mtime = int(obj['LastModified'].strftime('%s'))
	s3_size = int(obj['Size'])
	logger.info('key: %s, exists local: %s, s3 timestamp: %s, s3 size: %i' % ( s3_key, local_exists, s3_mtime, s3_size))
	if local_exists:
	local_mtime = int(os.path.getmtime(s3_key))
	local_size = int(os.path.getsize(s3_key))
	logger.info('local key: %s, local mtime: %i, local size: %i' % ( s3_key, local_mtime, local_size))

	if local_mtime == s3_mtime and local_size == s3_size:
	logger.info('local key: %s already exists with idential timestamp and size, skipping.' % s3_key)
	else:
	download_object(obj, s3)

	else:
	download_object(obj, s3)


	def sync_prefix():
	"""
	boto3 does not have an s3 sync method like the aws cli.
	here is a simple rsync-like implementation.
	ref: https://github.com/boto/boto3/issues/358
	logic: does s3 and local modified time and size differ? yes=overwrite no=skip.
	empty remote prefixes are not created locally.

	it would be possible to update to logic to also compare checksums
	ref: https://zihao.me/post/calculating-etag-for-aws-s3-objects/
	"""

	logger.info('attempting to sync src s3://%s/%s to local dst' % (ARGS.bucket, ARGS.prefix) )
	s3 = boto3.client('s3', region_name=ARGS.region)
	paginator = s3.get_paginator( "list_objects_v2" )
	page_iterator = paginator.paginate(Bucket=ARGS.bucket, Delimiter='', Prefix=ARGS.prefix)
	for page in page_iterator:
	if "Contents" in page:
	for obj in page["Contents"]:
	if not obj['Key'].endswith('/'):
	download_object_if_needed(obj, s3)

	def setLogLevel(newLevel, log_object):
	for level in LOGLEVELS:
	lvl, val = level
	if newLevel == lvl:
	log_object.setLevel(val)
	globals()['currentLogLevel'] = val
	return

	def main():

	parser = argparse.ArgumentParser(\
	description='''\
	python productivity tool for downloading files from s3 buckets.

	files are downloaded to the current working dir.
	any relative prefix paths will be created automatically.
	multipart objects are natively supported supported.
	logic: does s3 and local modified time and size differ? yes=overwrite no=skip.

	the "latest" command is non-recursive.
	the "sync" command is recursive.
	both support --prefix.

	I suggest to use env vars or cfg files to set the required sensitive config vars.
	The script requires at least AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY or equivilent cfg file.
	cite: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables
	cite: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-a-configuration-file
	'''
	,epilog='''Author: Kyle M <kmiln@nets.eu>'''
	,formatter_class=argparse.RawDescriptionHelpFormatter)

	parser.add_argument('--log-level', metavar='<level>', dest='log_level', action='store',
	help='modify the console log level.', default='WARNING',
	choices=[key for key, val in LOGLEVELS])

	parser.add_argument('--command', metavar='<command>', dest='command', action='store',
	help='the command to execute.', required=True,
	choices=[key for key, val in COMMANDS])

	parser.add_argument('--bucket', metavar='<bucket>', dest='bucket', action='store',
	help='the source s3 bucket.', required=True)

	parser.add_argument('--region', metavar='<region>', dest='region', action='store',
	help='the source s3 bucket region.', required=True)

	parser.add_argument('--prefix', metavar='<prefix>', dest='prefix', action='store',
	help='bucket key prefix.', required=False, default='')

	global ARGS
	ARGS = parser.parse_args(ARGV)

	if ARGS.log_level:
	setLogLevel(ARGS.log_level, ch)

	if ARGS.prefix.startswith('/'):
	ARGS.prefix = ARGS.prefix[1:]
	elif ARGS.prefix and not ARGS.prefix.endswith('/'):
	ARGS.prefix += '/'

	for cmd in COMMANDS:
	key, val = cmd
	if ARGS.command == key:
	globals()[val]()
	break

	logger.info('"%s" command completed. exiting.' % ARGS.command)

	if __name__ == "__main__":
	main()