jlinoff/collect.py

## collect.py
#!/usr/bin/env python3
# pylint: disable=line-too-long
r'''
Extract the Jenkins build data into a JSON file for insertion into a
database where it can be used for graphing in grafana.

The following environment variables control the behavior of this program.

   JFN      Name of the text file to write all of the job names to.
            This is mainly used for debugging.
            The default is not to write the job names.
            Example: JFN=/tmp/alljobs.txt

   EFN      Name of the error file.
            This JSON file contains the errors that were caught.
            Default: 'errs.json'

   DAYS     The number of days (last N days) to process. The default is 30.

   BEG      Begin date in ISO 8601 format. Default is DAYS days ago.

   END      End date in ISO 8601 format. Default is now.

   MAX      Maximum number of builds per job. Useful for cases
            where there are jobs thousands of builds. The default is 0 (all).
            MAX=1 retrieves the latest build for each branch.
            choose MAX=1 DAYS=1825 to get the latest build searching back 5 years.
            choose MAX=1 DAYS=3650 to get the latest build searching back 10 years.

   NOWARN   NOWARN=1 turns off warnings.

   TIMEOUT  Sets the timeout in seconds. The default is 300.
            This will vary considerably for each site.
            I typically use 7200 for my site.

   PREFIX   The top level prefix regex for job names. The default is '^eng'.
            This is also site specific.

   VERBOSE  VERBOSE=1 turns on verbose mode, status messages are written
            to stderr. VERBOSE=2 turns on very verbose mode.

Args:
   credentials_file  The JSON file containing the credentials. A dictionary with three fields:
                     username, password, url.
   output_file       THe output JSON file containing the data.

Usage:

   $ time VERBOSE=1 TIMEOUT=7200  ./extract.py creds.json eng-all.json  # very long time
   $ time VERBOSE=1 TIMEOUT=7200 MAX=1000 ./extract.py creds.json eng-all-1000.json
   $ time VERBOSE=2 DAYS=60 TIMEOUT=7200 MAX=1000 ./extract.py creds.json extract-60.json
   $ time VERBOSE=2 DAYS=7 TIMEOUT=7200 ./extract.py creds.json eng-7.json
   $ time VERBOSE=2 PREFIX='^eng/platform' DAYS=7 TIMEOUT=7200 ./extract.py creds.json eng.json
   $ time VERBOSE=1 PREFIX='^eng/platform' DAYS=7 TIMEOUT=7200 ./extract.py jcreds.json eng.json
   $ time VERBOSE=1 PREFIX='^eng/platform-pipe' DAYS=30 TIMEOUT=7200 ./extract.py jcreds.json eng.json
   $ time VERBOSE=1 PREFIX='^eng' DAYS=30 TIMEOUT=7200 ./extract.py jcreds.json eng-30.json
   $ time VERBOSE=1 PREFIX='^eng' DAYS=60 TIMEOUT=7200 ./extract.py jcreds.json eng-60.json
   $ time VERBOSE=1 PREFIX='^eng' DAYS=90 TIMEOUT=7200 ./extract.py jcreds.json eng-90.json
   $ # Get the latest builds for all branches and all jobs.
   $ time VERBOSE=1 PREFIX='^eng' NOWARN=1 DAYS=3650 MAX=1 TIMEOUT=7200 ./extract.py jcreds.json eng-latest.json

The output file is a JSON file with an array of build entries,
each with the following fields:

1. 'status'    Status string: SUCCESS, FAILURE, ABORTED, etc.
3. 'timestamp' Timestamp in ISO 8601 format.
4. 'duration'  Duration seconds.
5. 'running'   Is the build runing flag.
6. 'url'       The build URL.
7. 'project'   The build project.
8. 'branch'    The build branch.
9. 'number'    The build number.

It can be further processed to insert the data into a database table.
'''
# pylint: enable=line-too-long
import datetime
import inspect
import json
import os
import re
import sys
from getpass import getpass
from typing import Tuple, TextIO, List
from jenkinsapi.jenkins import Jenkins   # pylint: disable=import-error
import requests
import dateutil.parser  # pylint: disable=import-error

# Module variables.
VERBOSE = int(os.getenv('VERBOSE', '0'))
JFN = os.getenv('JFN', '')
EFN = os.getenv('EFN', 'errs.json')
MAX = int(os.getenv('MAX', '0'))
NOW = datetime.datetime.utcnow()
DAYS = int(os.getenv('DAYS', '30'))
DAYS_AGO = datetime.timedelta(days=DAYS)
BEG = os.getenv('BEG', (NOW - DAYS_AGO).isoformat(timespec='seconds') + '+00:00')
END = os.getenv('END', NOW.isoformat(timespec='seconds') + '+00:00')
BEG_DATE = dateutil.parser.parse(BEG)  # pylint: disable=no-member
END_DATE = dateutil.parser.parse(END)  # pylint: disable=no-member
PREFIX = os.getenv('PREFIX', '^eng')
TIMEOUT = float(os.getenv('TIMEOUT', '300'))
NOWARN = bool(int(os.getenv('NOWARN', '0')))


def info(msg: str, ofp: TextIO = sys.stderr, end='\n', level=1):
    '''Print a verbose info message.

    Args:
        msg: The message
        ofp: The output file.
        end: The message terminator.
        level: The level of the caller in the stack.
    '''
    lnum = inspect.stack()[level].lineno
    print(f'INFO:{lnum}: {msg}', file=ofp, end=end)


def warn(msg: str, ofp: TextIO = sys.stderr, end='\n', level=1):
    '''Print a verbose warning message.

    Args:
        msg: The message
        ofp: The output file.
        end: The message terminator.
        level: The level of the caller in the stack.
    '''
    if not NOWARN:
        print('\x1b[35m', end='', file=ofp)
        lnum = inspect.stack()[level].lineno
        print(f'WARNING:{lnum}: {msg}', file=ofp, end=end)
        print('\x1b[0m', end='', file=ofp)


def err(msg: str, ofp: TextIO = sys.stderr, end='\n', level=1):
    '''Error message.

    Args:
        msg: The message
        ofp: The output file.
        end: The message terminator.
        level: The level of the caller in the stack.
    '''
    lnum = inspect.stack()[level].lineno
    print('\x1b[31m', end='', file=ofp)
    print(f'ERROR:{lnum}: {msg}', file=ofp, end=end)
    print('\x1b[0m', end='', file=ofp)


def get_jenkins_creds(fname: str) -> Tuple[str,str,str]:
    '''
    Load the jenkins credentials into a dict object
    for use with get_jenkins_server function.

    There are three keys: username, password and url.

    Behind a firewall the credentials password is normally a token.
    That is obtained by creating a global credentials user in jenkins.

    If any of the fields are missing, prompt for them.

    Returns:
        creds: dictionary of the jenkins credentials
    '''
    creds = {
        'username': '',
        'password': '',
        'url': '',
    }
    with open(fname) as ifp:
        string = ifp.read().strip()
        try:
            creds = json.loads(string)
        except json.decoder.JSONDecodeError as exc:
            err(f'failed to decode credentials file: {fname}: {exc}')

    # Prompt for missing fields.
    for key in ['username', 'password', 'url']:
        if key in creds and not creds[key]:
            if key == 'password':
                creds[key] = getpass(f'{key.capitalize()}? ')
            else:
                creds[key] = input(f'{key.capitalize()}? ')

    # Verify no bad input.
    for key in ['username', 'password', 'url']:
        if key in creds and not creds[key]:
            err(f'missing value for "{key}" in {fname}')
            sys.exit(1)
    return creds


def get_jenkins_server(creds: dict) -> Jenkins:
    '''
    Get the jenkins server object.
    It is used for all subsequent interface calls.

    The object construction loads ALL of the jobs when not in lazy mode
    which can take a few minutes.

    Args:
       creds: The login credentials in a dict with username, password and url.

    Returns:
       server: The jenkins server object.
    '''
    url = creds['url']
    if VERBOSE:
        info(f'loading jenkins server data from {url}')
        start = datetime.datetime.now()
    server = Jenkins(username=creds['username'],
                     password=creds['password'],
                     baseurl=url,
                     timeout=TIMEOUT,
                     lazy=True)
    if VERBOSE:
        elapsed = datetime.datetime.now() - start
        info(f'loaded jenkins server in {elapsed.total_seconds()} seconds')
    return server


def get_server_jobs_names(server: Jenkins) -> List[str]:
    '''Get the jenkins job names.

    Args:
        server: The jenkins server object.

    Returns:
        list: The list of job names.
    '''
    if VERBOSE:
        info('loading server job names')
        start = datetime.datetime.now()

    job_names = server.keys()
    if JFN:
        if VERBOSE:
            info(f'writing {len(job_names)} job names to "{JFN}"')
        with open(JFN, 'w') as ofp:
            for job_name in job_names:
                ofp.write(job_name + '\n')

    if VERBOSE:
        elapsed = datetime.datetime.now() - start
        info(f'loaded {len(job_names)} jobs names in {elapsed.total_seconds()} seconds')
    return job_names


def write_failed_errors(failed: list):
    '''Save the failures for later debugging.

    Args:
        failed: The list of failures.
    '''
    if failed and EFN:
        err(f'{len(failed)} processing failures written to "{EFN}"\n')
        with open(EFN, 'w') as efp:
            efp.write(json.dumps(failed, indent=4) + '\n')


def process_jobs(server: Jenkins, ofp: TextIO): # pylint: disable=too-many-locals,too-many-branches, too-many-statements
    '''Process jobs.

    Args:
        server: The jenkins server object.
        ofp: The output file pointer.
    '''
    start = datetime.datetime.now()
    prefix = re.compile(PREFIX)
    job_names = get_server_jobs_names(server)

    # don't user server.get_jobs() in the for-loop because it would
    # retrieve job info objects for all jobs even the filtered ones
    # which would affect performance in cases where a subset of jobs
    # is desired.
    failed = []
    recs = {
        'meta': {
            'date': datetime.datetime.now().isoformat(timespec='seconds'),
            'days': DAYS,
            'beg': BEG,
            'end': END,
            'timeout': TIMEOUT,
            'prefix':  PREFIX,
            'max': MAX,
            'efn': EFN,
            'jfn': JFN,
            'time': 0,
            'errors': [],
        },
        'builds': []
    }
    njobs = len(job_names)
    for i, job_name in enumerate(job_names, start=1):
        if not prefix.search(job_name):
            if VERBOSE > 1:
                warn(f"skipping job name that doesn't match '{PREFIX}': '{job_name}'  {i} of {njobs}")
            continue
        if VERBOSE:
            jrem = njobs - i
            info(f'\x1b[1mJOB: {job_name}  {i} of {njobs}  ({jrem})\x1b[0m')
        _top, project, branch = job_name.split('/')

        # get the job information.
        try:
            job_instance = server.get_job(job_name)
        except requests.exceptions.HTTPError as exc:
            err(f'request timed out for server.get_job(job_name): {exc}')
            failed.append({'type': 'job', 'name': job_name, 'exc': str(exc)})
            continue

        # get the build_ids
        try:
            build_ids = list(job_instance.get_build_ids())
            if MAX:
                ntrunc = len(build_ids) - MAX if len(build_ids) > MAX else 0
                if ntrunc > 0:
                    warn(f"MAX={MAX} skipping {ntrunc} jobs out of {len(build_ids)}")
                build_ids = build_ids[:MAX]  # truncate
        except requests.exceptions.HTTPError as exc:
            err(f'request timed out for job_instance.get_build_ids(): {exc}')
            failed.append({'type': 'job', 'name': job_name, 'exc': str(exc)})
            continue

        nbuilds = len(build_ids)
        for j, build_id in enumerate(build_ids, start=1):
            # get the build information
            if VERBOSE:
                rem = nbuilds - j
                info(f'BUILD: {len(recs)+1} {job_name}/{build_id}  {j} of {nbuilds}  ({rem})')
            try:
                build = job_instance.get_build(build_id)
            except requests.exceptions.HTTPError as exc:
                err(f'request timed out for job_instance.get_build(build_id): {exc}')
                failed.append({'type': 'job', 'name': job_name, 'build_id': build_id, 'exc': exc})
                continue

            if VERBOSE:
                info(f'STATUS: {build.get_status()} {build.get_build_url()}')

            dts = build.get_timestamp().isoformat(timespec='seconds')
            status = build.get_status() if build.get_status() else 'RUNNING'
            rec = {
                'project': project,
                'branch': branch,
                'number': build_id,
                'running': build.is_running(),
                'status': status,
                'duration': build.get_duration().total_seconds(),
                'timestamp': dts,
                'url': build.get_build_url()
                }

            if VERBOSE > 1:
                info(f'DTS: {dts} {BEG} {END}')
                info(json.dumps(rec, indent=4))

            if dts < BEG:
                if VERBOSE:
                    warn(f'too old - skipping {rem} builds earlier than BEG for this job: '
                         f'{dts} < {BEG}')
                break  # all subsequent builds are earlier

            if dts > END:
                if VERBOSE:
                    warn(f'too new - skipping builds later than END for this job: {dts} > {END}')
                continue  # do not break!! need to get the earlier dates

            recs['builds'].append(rec)

    ofp.write(json.dumps(recs))
    elapsed = datetime.datetime.now() - start
    recs['meta']['time'] = elapsed.total_seconds()
    recs['meta']['errors'] = failed
    if VERBOSE:
        info(f'extracted {len(recs["builds"])} build records in {elapsed.total_seconds()} seconds')
    write_failed_errors(failed)


def main():
    '''main entry point
    '''
    cfile = ''
    ofp = sys.stdout
    if len(sys.argv) > 1:
        cfile = sys.argv[1]
    if len(sys.argv) > 2:
        ofp = open(sys.argv[2], 'w')  # pylint: disable=consider-using-with
    if VERBOSE:
        ofn = sys.argv[2] if len(sys.argv) > 2 else 'sys.stdout'
        info('Parameters')
        info(f'  DAYS     : {DAYS}')
        info(f'  BEG      : {BEG}')
        info(f'  END      : {END}')
        info(f'  MAX      : {MAX}')
        info(f'  PREFIX   : "{PREFIX}"')
        info(f'  TIMEOUT  : {TIMEOUT}')
        info(f'  VERBOSE  : {VERBOSE}')
        info(f'  EFN      : {EFN}')
        info(f'  JFN      : {JFN}')
        info(f'  ofn      : "{ofn}"')

    creds = get_jenkins_creds(cfile)
    if VERBOSE:
        info(f'  username : {creds["username"]}')
        info(f'  url      : {creds["url"]}')
    server = get_jenkins_server(creds)
    process_jobs(server, ofp)
    if ofp != sys.stdout:
        ofp.close()

if __name__ == '__main__':
    main()

## convert.py
#!/usr/bin/env python3
'''
Convert extract format files into SQL.

Typical usage:
    $ time /convert.py extract.json builds builds.sql
    $ #                ^            ^      ^
    $ #                |            |      +---- output file name
    $ #                |            +----------- SQL table name
    $ #                +------------------------ input file name
'''
import json
import sys
from typing import TextIO


def tdef(tname: str, meta: str, ofp: TextIO=sys.stdout):
    '''Output the table definition

    Args:
        tname: The table name
        meta: The meta table name
        ofp: The output file pointer
    '''
    ofp.write(f'''
DROP TABLE IF EXISTS {tname} CASCADE;
CREATE TABLE IF NOT EXISTS {tname} (
   id         SERIAL PRIMARY KEY,
   status     TEXT NOT NULL,
   time       TIMESTAMPTZ NOT NULL,
   duration   DECIMAL NOT NULL,
   project    TEXT NOT NULL,
   branch     TEXT NOT NULL,
   number     DECIMAL NOT NULL,
   url        TEXT NOT NULL
);
COMMENT ON TABLE {tname} IS 'Jenkins build data';
COMMENT ON COLUMN {tname}.status IS 'Result like SUCCESS or FAILURE';
COMMENT ON COLUMN {tname}.time IS 'Build completed timestamp';
COMMENT ON COLUMN {tname}.duration IS 'Build duration in seconds';
COMMENT ON COLUMN {tname}.project IS 'Build project name';
COMMENT ON COLUMN {tname}.branch IS 'Build branch name: "PR-1"';
COMMENT ON COLUMN {tname}.number IS 'Build number';
COMMENT ON COLUMN {tname}.url IS 'Build URL';

DROP TABLE IF EXISTS {meta} CASCADE;
CREATE TABLE IF NOT EXISTS {meta} (
   id     SERIAL PRIMARY KEY,
   key    TEXT NOT NULL,
   value  TEXT NOT NULL
);
COMMENT ON TABLE {meta} IS 'Jenkins build meta data';
COMMENT ON COLUMN {meta}.key IS 'Key name';
COMMENT ON COLUMN {meta}.value IS 'Key value';
''')


def populate_meta(meta: str, recs: dict, ofp: TextIO=sys.stdout):
    '''Populate the meta table.

    Args:
        meta: The meta table name
        recs: The extracr JSON data
        ofp: The output file pointer
    '''
    ofp.write(f'''
INSERT INTO {meta} (key, value)
VALUES
''')
    for i, key in enumerate(recs['meta']):
        if i:
            ofp.write(',\n')
        value = recs['meta'][key]
        ofp.write(f'''\
  ('{key}', '{value}')\
''')
    ofp.write(';\n')


def populate_builds(tname: str, recs: dict, ofp: TextIO=sys.stdout):
    '''Populate the builds data

    Args:
        tname: The table name
        recs: The extracr JSON data
        ofp: The output file pointer
    '''
    ofp.write(f'''
INSERT INTO {tname} (status, time, duration, project, branch, number, url)
VALUES
''')
    for i, rec in enumerate(recs['builds']):
        status = rec['status']
        timestamp = rec['timestamp']
        duration = int(rec['duration'])
        project = rec['project']
        branch = rec['branch']
        number = rec['number']
        url = rec['url']
        if not timestamp.endswith('+00:00'):
            timestamp += '+00:00'
        if i:
            ofp.write(',\n')
        ofp.write(f'''\
  ('{status}', '{timestamp}', {duration}, '{project}', '{branch}', '{number}', '{url}')\
''')
    ofp.write(';\n')


def main():
    '''main
    '''
    ifn = sys.argv[1]  # input file name
    tname = sys.argv[2]  # table name
    ofn = sys.argv[3]  # output file name
    meta = f'{tname}_meta'
    with open(ifn) as ifp:
        recs = json.loads(ifp.read())
    with open(ofn, 'w') as ofp:
        tdef(tname, meta, ofp)
        populate_meta(meta, recs, ofp)
        populate_builds(tname, recs, ofp)


if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	# pylint: disable=line-too-long
	r'''
	Extract the Jenkins build data into a JSON file for insertion into a
	database where it can be used for graphing in grafana.

	The following environment variables control the behavior of this program.

	JFN Name of the text file to write all of the job names to.
	This is mainly used for debugging.
	The default is not to write the job names.
	Example: JFN=/tmp/alljobs.txt

	EFN Name of the error file.
	This JSON file contains the errors that were caught.
	Default: 'errs.json'

	DAYS The number of days (last N days) to process. The default is 30.

	BEG Begin date in ISO 8601 format. Default is DAYS days ago.

	END End date in ISO 8601 format. Default is now.

	MAX Maximum number of builds per job. Useful for cases
	where there are jobs thousands of builds. The default is 0 (all).
	MAX=1 retrieves the latest build for each branch.
	choose MAX=1 DAYS=1825 to get the latest build searching back 5 years.
	choose MAX=1 DAYS=3650 to get the latest build searching back 10 years.

	NOWARN NOWARN=1 turns off warnings.

	TIMEOUT Sets the timeout in seconds. The default is 300.
	This will vary considerably for each site.
	I typically use 7200 for my site.

	PREFIX The top level prefix regex for job names. The default is '^eng'.
	This is also site specific.

	VERBOSE VERBOSE=1 turns on verbose mode, status messages are written
	to stderr. VERBOSE=2 turns on very verbose mode.

	Args:
	credentials_file The JSON file containing the credentials. A dictionary with three fields:
	username, password, url.
	output_file THe output JSON file containing the data.

	Usage:

	$ time VERBOSE=1 TIMEOUT=7200 ./extract.py creds.json eng-all.json # very long time
	$ time VERBOSE=1 TIMEOUT=7200 MAX=1000 ./extract.py creds.json eng-all-1000.json
	$ time VERBOSE=2 DAYS=60 TIMEOUT=7200 MAX=1000 ./extract.py creds.json extract-60.json
	$ time VERBOSE=2 DAYS=7 TIMEOUT=7200 ./extract.py creds.json eng-7.json
	$ time VERBOSE=2 PREFIX='^eng/platform' DAYS=7 TIMEOUT=7200 ./extract.py creds.json eng.json
	$ time VERBOSE=1 PREFIX='^eng/platform' DAYS=7 TIMEOUT=7200 ./extract.py jcreds.json eng.json
	$ time VERBOSE=1 PREFIX='^eng/platform-pipe' DAYS=30 TIMEOUT=7200 ./extract.py jcreds.json eng.json
	$ time VERBOSE=1 PREFIX='^eng' DAYS=30 TIMEOUT=7200 ./extract.py jcreds.json eng-30.json
	$ time VERBOSE=1 PREFIX='^eng' DAYS=60 TIMEOUT=7200 ./extract.py jcreds.json eng-60.json
	$ time VERBOSE=1 PREFIX='^eng' DAYS=90 TIMEOUT=7200 ./extract.py jcreds.json eng-90.json
	$ # Get the latest builds for all branches and all jobs.
	$ time VERBOSE=1 PREFIX='^eng' NOWARN=1 DAYS=3650 MAX=1 TIMEOUT=7200 ./extract.py jcreds.json eng-latest.json

	The output file is a JSON file with an array of build entries,
	each with the following fields:

	1. 'status' Status string: SUCCESS, FAILURE, ABORTED, etc.
	3. 'timestamp' Timestamp in ISO 8601 format.
	4. 'duration' Duration seconds.
	5. 'running' Is the build runing flag.
	6. 'url' The build URL.
	7. 'project' The build project.
	8. 'branch' The build branch.
	9. 'number' The build number.

	It can be further processed to insert the data into a database table.
	'''
	# pylint: enable=line-too-long
	import datetime
	import inspect
	import json
	import os
	import re
	import sys
	from getpass import getpass
	from typing import Tuple, TextIO, List
	from jenkinsapi.jenkins import Jenkins # pylint: disable=import-error
	import requests
	import dateutil.parser # pylint: disable=import-error

	# Module variables.
	VERBOSE = int(os.getenv('VERBOSE', '0'))
	JFN = os.getenv('JFN', '')
	EFN = os.getenv('EFN', 'errs.json')
	MAX = int(os.getenv('MAX', '0'))
	NOW = datetime.datetime.utcnow()
	DAYS = int(os.getenv('DAYS', '30'))
	DAYS_AGO = datetime.timedelta(days=DAYS)
	BEG = os.getenv('BEG', (NOW - DAYS_AGO).isoformat(timespec='seconds') + '+00:00')
	END = os.getenv('END', NOW.isoformat(timespec='seconds') + '+00:00')
	BEG_DATE = dateutil.parser.parse(BEG) # pylint: disable=no-member
	END_DATE = dateutil.parser.parse(END) # pylint: disable=no-member
	PREFIX = os.getenv('PREFIX', '^eng')
	TIMEOUT = float(os.getenv('TIMEOUT', '300'))
	NOWARN = bool(int(os.getenv('NOWARN', '0')))


	def info(msg: str, ofp: TextIO = sys.stderr, end='\n', level=1):
	'''Print a verbose info message.

	Args:
	msg: The message
	ofp: The output file.
	end: The message terminator.
	level: The level of the caller in the stack.
	'''
	lnum = inspect.stack()[level].lineno
	print(f'INFO:{lnum}: {msg}', file=ofp, end=end)


	def warn(msg: str, ofp: TextIO = sys.stderr, end='\n', level=1):
	'''Print a verbose warning message.

	Args:
	msg: The message
	ofp: The output file.
	end: The message terminator.
	level: The level of the caller in the stack.
	'''
	if not NOWARN:
	print('\x1b[35m', end='', file=ofp)
	lnum = inspect.stack()[level].lineno
	print(f'WARNING:{lnum}: {msg}', file=ofp, end=end)
	print('\x1b[0m', end='', file=ofp)


	def err(msg: str, ofp: TextIO = sys.stderr, end='\n', level=1):
	'''Error message.

	Args:
	msg: The message
	ofp: The output file.
	end: The message terminator.
	level: The level of the caller in the stack.
	'''
	lnum = inspect.stack()[level].lineno
	print('\x1b[31m', end='', file=ofp)
	print(f'ERROR:{lnum}: {msg}', file=ofp, end=end)
	print('\x1b[0m', end='', file=ofp)


	def get_jenkins_creds(fname: str) -> Tuple[str,str,str]:
	'''
	Load the jenkins credentials into a dict object
	for use with get_jenkins_server function.

	There are three keys: username, password and url.

	Behind a firewall the credentials password is normally a token.
	That is obtained by creating a global credentials user in jenkins.

	If any of the fields are missing, prompt for them.

	Returns:
	creds: dictionary of the jenkins credentials
	'''
	creds = {
	'username': '',
	'password': '',
	'url': '',
	}
	with open(fname) as ifp:
	string = ifp.read().strip()
	try:
	creds = json.loads(string)
	except json.decoder.JSONDecodeError as exc:
	err(f'failed to decode credentials file: {fname}: {exc}')

	# Prompt for missing fields.
	for key in ['username', 'password', 'url']:
	if key in creds and not creds[key]:
	if key == 'password':
	creds[key] = getpass(f'{key.capitalize()}? ')
	else:
	creds[key] = input(f'{key.capitalize()}? ')

	# Verify no bad input.
	for key in ['username', 'password', 'url']:
	if key in creds and not creds[key]:
	err(f'missing value for "{key}" in {fname}')
	sys.exit(1)
	return creds


	def get_jenkins_server(creds: dict) -> Jenkins:
	'''
	Get the jenkins server object.
	It is used for all subsequent interface calls.

	The object construction loads ALL of the jobs when not in lazy mode
	which can take a few minutes.

	Args:
	creds: The login credentials in a dict with username, password and url.

	Returns:
	server: The jenkins server object.
	'''
	url = creds['url']
	if VERBOSE:
	info(f'loading jenkins server data from {url}')
	start = datetime.datetime.now()
	server = Jenkins(username=creds['username'],
	password=creds['password'],
	baseurl=url,
	timeout=TIMEOUT,
	lazy=True)
	if VERBOSE:
	elapsed = datetime.datetime.now() - start
	info(f'loaded jenkins server in {elapsed.total_seconds()} seconds')
	return server


	def get_server_jobs_names(server: Jenkins) -> List[str]:
	'''Get the jenkins job names.

	Args:
	server: The jenkins server object.

	Returns:
	list: The list of job names.
	'''
	if VERBOSE:
	info('loading server job names')
	start = datetime.datetime.now()

	job_names = server.keys()
	if JFN:
	if VERBOSE:
	info(f'writing {len(job_names)} job names to "{JFN}"')
	with open(JFN, 'w') as ofp:
	for job_name in job_names:
	ofp.write(job_name + '\n')

	if VERBOSE:
	elapsed = datetime.datetime.now() - start
	info(f'loaded {len(job_names)} jobs names in {elapsed.total_seconds()} seconds')
	return job_names


	def write_failed_errors(failed: list):
	'''Save the failures for later debugging.

	Args:
	failed: The list of failures.
	'''
	if failed and EFN:
	err(f'{len(failed)} processing failures written to "{EFN}"\n')
	with open(EFN, 'w') as efp:
	efp.write(json.dumps(failed, indent=4) + '\n')


	def process_jobs(server: Jenkins, ofp: TextIO): # pylint: disable=too-many-locals,too-many-branches, too-many-statements
	'''Process jobs.

	Args:
	server: The jenkins server object.
	ofp: The output file pointer.
	'''
	start = datetime.datetime.now()
	prefix = re.compile(PREFIX)
	job_names = get_server_jobs_names(server)

	# don't user server.get_jobs() in the for-loop because it would
	# retrieve job info objects for all jobs even the filtered ones
	# which would affect performance in cases where a subset of jobs
	# is desired.
	failed = []
	recs = {
	'meta': {
	'date': datetime.datetime.now().isoformat(timespec='seconds'),
	'days': DAYS,
	'beg': BEG,
	'end': END,
	'timeout': TIMEOUT,
	'prefix': PREFIX,
	'max': MAX,
	'efn': EFN,
	'jfn': JFN,
	'time': 0,
	'errors': [],
	},
	'builds': []
	}
	njobs = len(job_names)
	for i, job_name in enumerate(job_names, start=1):
	if not prefix.search(job_name):
	if VERBOSE > 1:
	warn(f"skipping job name that doesn't match '{PREFIX}': '{job_name}' {i} of {njobs}")
	continue
	if VERBOSE:
	jrem = njobs - i
	info(f'\x1b[1mJOB: {job_name} {i} of {njobs} ({jrem})\x1b[0m')
	_top, project, branch = job_name.split('/')

	# get the job information.
	try:
	job_instance = server.get_job(job_name)
	except requests.exceptions.HTTPError as exc:
	err(f'request timed out for server.get_job(job_name): {exc}')
	failed.append({'type': 'job', 'name': job_name, 'exc': str(exc)})
	continue

	# get the build_ids
	try:
	build_ids = list(job_instance.get_build_ids())
	if MAX:
	ntrunc = len(build_ids) - MAX if len(build_ids) > MAX else 0
	if ntrunc > 0:
	warn(f"MAX={MAX} skipping {ntrunc} jobs out of {len(build_ids)}")
	build_ids = build_ids[:MAX] # truncate
	except requests.exceptions.HTTPError as exc:
	err(f'request timed out for job_instance.get_build_ids(): {exc}')
	failed.append({'type': 'job', 'name': job_name, 'exc': str(exc)})
	continue

	nbuilds = len(build_ids)
	for j, build_id in enumerate(build_ids, start=1):
	# get the build information
	if VERBOSE:
	rem = nbuilds - j
	info(f'BUILD: {len(recs)+1} {job_name}/{build_id} {j} of {nbuilds} ({rem})')
	try:
	build = job_instance.get_build(build_id)
	except requests.exceptions.HTTPError as exc:
	err(f'request timed out for job_instance.get_build(build_id): {exc}')
	failed.append({'type': 'job', 'name': job_name, 'build_id': build_id, 'exc': exc})
	continue

	if VERBOSE:
	info(f'STATUS: {build.get_status()} {build.get_build_url()}')

	dts = build.get_timestamp().isoformat(timespec='seconds')
	status = build.get_status() if build.get_status() else 'RUNNING'
	rec = {
	'project': project,
	'branch': branch,
	'number': build_id,
	'running': build.is_running(),
	'status': status,
	'duration': build.get_duration().total_seconds(),
	'timestamp': dts,
	'url': build.get_build_url()
	}

	if VERBOSE > 1:
	info(f'DTS: {dts} {BEG} {END}')
	info(json.dumps(rec, indent=4))

	if dts < BEG:
	if VERBOSE:
	warn(f'too old - skipping {rem} builds earlier than BEG for this job: '
	f'{dts} < {BEG}')
	break # all subsequent builds are earlier

	if dts > END:
	if VERBOSE:
	warn(f'too new - skipping builds later than END for this job: {dts} > {END}')
	continue # do not break!! need to get the earlier dates

	recs['builds'].append(rec)

	ofp.write(json.dumps(recs))
	elapsed = datetime.datetime.now() - start
	recs['meta']['time'] = elapsed.total_seconds()
	recs['meta']['errors'] = failed
	if VERBOSE:
	info(f'extracted {len(recs["builds"])} build records in {elapsed.total_seconds()} seconds')
	write_failed_errors(failed)


	def main():
	'''main entry point
	'''
	cfile = ''
	ofp = sys.stdout
	if len(sys.argv) > 1:
	cfile = sys.argv[1]
	if len(sys.argv) > 2:
	ofp = open(sys.argv[2], 'w') # pylint: disable=consider-using-with
	if VERBOSE:
	ofn = sys.argv[2] if len(sys.argv) > 2 else 'sys.stdout'
	info('Parameters')
	info(f' DAYS : {DAYS}')
	info(f' BEG : {BEG}')
	info(f' END : {END}')
	info(f' MAX : {MAX}')
	info(f' PREFIX : "{PREFIX}"')
	info(f' TIMEOUT : {TIMEOUT}')
	info(f' VERBOSE : {VERBOSE}')
	info(f' EFN : {EFN}')
	info(f' JFN : {JFN}')
	info(f' ofn : "{ofn}"')

	creds = get_jenkins_creds(cfile)
	if VERBOSE:
	info(f' username : {creds["username"]}')
	info(f' url : {creds["url"]}')
	server = get_jenkins_server(creds)
	process_jobs(server, ofp)
	if ofp != sys.stdout:
	ofp.close()

	if __name__ == '__main__':
	main()
	#!/usr/bin/env python3
	'''
	Convert extract format files into SQL.

	Typical usage:
	$ time /convert.py extract.json builds builds.sql
	$ # ^ ^ ^
	$ # \| \| +---- output file name
	$ # \| +----------- SQL table name
	$ # +------------------------ input file name
	'''
	import json
	import sys
	from typing import TextIO


	def tdef(tname: str, meta: str, ofp: TextIO=sys.stdout):
	'''Output the table definition

	Args:
	tname: The table name
	meta: The meta table name
	ofp: The output file pointer
	'''
	ofp.write(f'''
	DROP TABLE IF EXISTS {tname} CASCADE;
	CREATE TABLE IF NOT EXISTS {tname} (
	id SERIAL PRIMARY KEY,
	status TEXT NOT NULL,
	time TIMESTAMPTZ NOT NULL,
	duration DECIMAL NOT NULL,
	project TEXT NOT NULL,
	branch TEXT NOT NULL,
	number DECIMAL NOT NULL,
	url TEXT NOT NULL
	);
	COMMENT ON TABLE {tname} IS 'Jenkins build data';
	COMMENT ON COLUMN {tname}.status IS 'Result like SUCCESS or FAILURE';
	COMMENT ON COLUMN {tname}.time IS 'Build completed timestamp';
	COMMENT ON COLUMN {tname}.duration IS 'Build duration in seconds';
	COMMENT ON COLUMN {tname}.project IS 'Build project name';
	COMMENT ON COLUMN {tname}.branch IS 'Build branch name: "PR-1"';
	COMMENT ON COLUMN {tname}.number IS 'Build number';
	COMMENT ON COLUMN {tname}.url IS 'Build URL';

	DROP TABLE IF EXISTS {meta} CASCADE;
	CREATE TABLE IF NOT EXISTS {meta} (
	id SERIAL PRIMARY KEY,
	key TEXT NOT NULL,
	value TEXT NOT NULL
	);
	COMMENT ON TABLE {meta} IS 'Jenkins build meta data';
	COMMENT ON COLUMN {meta}.key IS 'Key name';
	COMMENT ON COLUMN {meta}.value IS 'Key value';
	''')


	def populate_meta(meta: str, recs: dict, ofp: TextIO=sys.stdout):
	'''Populate the meta table.

	Args:
	meta: The meta table name
	recs: The extracr JSON data
	ofp: The output file pointer
	'''
	ofp.write(f'''
	INSERT INTO {meta} (key, value)
	VALUES
	''')
	for i, key in enumerate(recs['meta']):
	if i:
	ofp.write(',\n')
	value = recs['meta'][key]
	ofp.write(f'''\
	('{key}', '{value}')\
	''')
	ofp.write(';\n')


	def populate_builds(tname: str, recs: dict, ofp: TextIO=sys.stdout):
	'''Populate the builds data

	Args:
	tname: The table name
	recs: The extracr JSON data
	ofp: The output file pointer
	'''
	ofp.write(f'''
	INSERT INTO {tname} (status, time, duration, project, branch, number, url)
	VALUES
	''')
	for i, rec in enumerate(recs['builds']):
	status = rec['status']
	timestamp = rec['timestamp']
	duration = int(rec['duration'])
	project = rec['project']
	branch = rec['branch']
	number = rec['number']
	url = rec['url']
	if not timestamp.endswith('+00:00'):
	timestamp += '+00:00'
	if i:
	ofp.write(',\n')
	ofp.write(f'''\
	('{status}', '{timestamp}', {duration}, '{project}', '{branch}', '{number}', '{url}')\
	''')
	ofp.write(';\n')


	def main():
	'''main
	'''
	ifn = sys.argv[1] # input file name
	tname = sys.argv[2] # table name
	ofn = sys.argv[3] # output file name
	meta = f'{tname}_meta'
	with open(ifn) as ifp:
	recs = json.loads(ifp.read())
	with open(ofn, 'w') as ofp:
	tdef(tname, meta, ofp)
	populate_meta(meta, recs, ofp)
	populate_builds(tname, recs, ofp)


	if __name__ == '__main__':
	main()