isaacl/cq_stats.py

## cq_stats.py
#!/usr/bin/python
# sudo apt-get install pip; sudo pip install requests request_cache
import argparse
import collections
from datetime import timedelta, datetime
import itertools
import json
import numpy
import operator
import re
import requests
import requests_cache
import time
import sys
import multiprocessing

parser = argparse.ArgumentParser()
parser.add_argument("--clear", help="clear the cache before starting", action="store_true")
parser.add_argument("--progress", help="Show progres", action="store_true")
parser.add_argument("--base-url-stats", help="print base_url matching stats", action="store_true")
parser.add_argument("--chrome", action='store_true')
parser.add_argument('--count', type=int, default=2000, help='number of issues to fetch')
args = parser.parse_args()


def make_throttle_hook(timeout=1.0):
    def hook(response, *args, **kwargs):
        print response.url
        if not getattr(response, 'from_cache', False):
            print "sleeping"
            time.sleep(timeout)
        return response
    return hook


# Rietveld allows up to 1000 results per search query.  I don't know if that's better for the server or not.
per_request_search_limit = min(int(args.count/2), 1000)
# Currently looking at the last 2000 entries.
target_count = args.count
# Only looking at looking at Blink patches (eventually we should support switching these):
if args.chrome:
    base_url_shortname = "chrome"
    target_base_url_regexps = [
        re.compile(r"^svn://svn\.chromium\.org/chrome/trunk/src"),
        re.compile(r"^https://chromium\.googlesource\.com/chromium/chromium/src(?:\.git)?(?:@[\w\-]+)?$"),
        re.compile(r"^http://src\.chromium\.org/chrome/trunk/src"),
    ]
else:
    base_url_shortname = "blink"
    target_base_url_regexps = [
        re.compile(r"^svn://svn\.chromium\.org/blink/(?:trunk/?)?$"),
        re.compile(r"^https://chromium\.googlesource\.com/chromium/blink(?:\.git)?(?:@[\w\-]+)?$"),
        re.compile(r"^http://src\.chromium\.org/blink/(?:trunk/?)?$"),
    ]
# It seems reasonable to expect that rietveld can handle 10 query-per-second load from this script.
# Do not reduce this value with asking someone on the infrastructure team first.
seconds_between_requests = 0.1


requests_cache.install_cache('cq')
session = requests_cache.CachedSession()
if args.clear:
    print "Clearing cache..."
    session.cache.clear()
session.hooks = {'response': make_throttle_hook(seconds_between_requests)}


def fetch_recent_issues(limit, cursor=None):
    search_url = "https://codereview.chromium.org/search?format=json&limit=%d" % limit
    if cursor:
        search_url += "&cursor=%s" % cursor
    response = session.get(search_url)
    return response.json()

def matches_target_base_url(url):
    for regexp in target_base_url_regexps:
        match = regexp.match(url)
        if match:
            return True
    return False

def issue_patch_string(issue_id, patch_id):
    return "%s:%s" % (issue_id, patch_id)

def issue_display_url(issue_id):
    return "https://codereview.chromium.org/%s" % issue_id

def issue_url(issue_id):
    return "https://codereview.chromium.org/api/%s?messages=true" % issue_id

def date_from_iso_str(iso_str):
    # 2013-10-17 16:43:04.391480
    return datetime.strptime(iso_str, "%Y-%m-%d %H:%M:%S.%f")

def round_timedelta(delta):
    # We never care about the milliseconds when printing timedeltas:
    return timedelta(seconds=round(delta.total_seconds()))

def fetch_issue(issue_id):
    url = issue_url(issue_id)
    json = session.get(url).json()
    cache_key = session.cache._url_to_key(url)
    _, now = session.cache.get_response_and_time(cache_key)
    return json, now


class CQAttempt(object):
    def __init__(self):
        self.result = None
        self.patch_id = None
        self.issue_id = None
        self.author = None
        self.start_time = None
        self.end_time = None

    @property
    def issue_patch_string(self):
        return issue_patch_string(self.issue_id, self.patch_id)

    @property
    def duration(self):
        return self.end_time - self.start_time


class CQAnalyzer(object):
    def __init__(self):
        self.state = None
        self.issue_id = None
        self.author = None
        self.patch_id = None
        self.start_time = None
        self.attempts = []
        self.manual_commits = set()
        self.no_try_issues = set()
        # issue_id -> time since CQ start
        self.in_progress = {}

    start_regexp = re.compile("""CQ is trying da patch. Follow status at
https://chromium-status.appspot.com/cq/(?P<author>.*)/(?P<issue_id>\d+)/(?P<patch_id>\d+)""")
    committed_regexp = re.compile("Change committed as (?P<revision>\d+)")
    manual_commit_regexp = re.compile("Committed patchset #(?P<patch_id>\d+) manually as r(?P<revision>\d+)")

    def _reset(self):
        self.state = None
        self.author = None
        self.patch_id = None
        self.start_time = None

    def _start(self, patch_id, start_time, author=None):
        if self.state == "START" and self.patch_id == patch_id:
            #print "Restarted for patchset %s on %s" % (patch_id, issue_display_url(self.issue_id))
            self._end("RESTART", start_time)
        self.state = "START"
        self.patch_id = patch_id
        self.author = author
        assert start_time
        self.start_time = start_time

    def _end(self, result, end_time):
        if self.state != "START":
            print "ERROR: %s for %s without start??" % (result, issue_display_url(self.issue_id))
            self._reset()
            return
        attempt = CQAttempt()
        attempt.result = result
        attempt.patch_id = self.patch_id
        attempt.issue_id = self.issue_id
        attempt.start_time = self.start_time
        attempt.end_time = end_time
        self.attempts.append(attempt)
        self._reset()

    def _start_and_end(self, result, date):
        self._start(result, date)
        self._end(result, date)

    def _manual_commit(self, patch_id):
        self.manual_commits.add(issue_patch_string(self.issue_id, patch_id))

    def process_message(self, message):
        text = message['text']
        date = date_from_iso_str(message['date'])

        match = self.manual_commit_regexp.match(text)
        if match:
            if self.start_time:
                self._end("MANUAL_COMMIT", date_from_iso_str(self.issue['modified']))
            patch_number = int(match.group('patch_id'))
            # gcl dcommit talks in terms of patch numbers 1-5, we need to map those to patchset ids:
            patch_id = self.issue['patchsets'][patch_number - 1]
            self._manual_commit(patch_id)
            return

        if message['sender'] != 'commit-bot@chromium.org':
            return

        match = self.start_regexp.match(text)
        if match:
            self._start(match.group('patch_id'), date, match.group('author'))
            return
        match = self.committed_regexp.match(text)
        if match:
            self._end("SUCCESS", date)
            return

        cq_end_messages = {
            "TRY": "Retried try job",
            "APPLY": "Failed to apply patch",
            "COMPILE": "Sorry for I got bad news for ya",
            "DESCRIPTION_CHANGED": "Commit queue rejected this change because the description",
            "REVIEWERS_CHANGED": "List of reviewers changed.", # This is too conservative.
            "PATCH_CHANGED": "Commit queue failed due to new patchset.", # User caused.
            # FAILED_TO_TRIGGER is a very serious failure, unclear why it happens!
            "FAILED_TO_TRIGGER": "Failed to trigger a try job on",
            # BINARY_FILE is just a bug in the CQ.
            "BINARY_FILE": "Can't process patch for file",
            # Unclear why UPDATE_STEP happens.  Likely broken bots, shouldn't fail patches!
            "UPDATE_STEP": "Step \"update\" is always a major failure.",
        }
        for result, message_text in cq_end_messages.items():
            if text.startswith(message_text):
                self._end(result, date)
                return

        cq_fail_to_start_messsages = {
            "NO_COMMENTS": "No comments yet.",
            "NO_LGTM": "No LGTM from a valid reviewer yet.",
        }
        for result, message_text in cq_fail_to_start_messsages.items():
            if text.startswith(message_text):
                self._start_and_end(result, date)
                return

        print "Error %s has unknown message: %s" % (issue_display_url(self.issue_id), text)
        self._end("UNKNOWN", date)

    def start_issue(self, issue, now):
        # We may be dealing with cached data, so cache our concept of "now"
        self.now = now
        self.issue = issue
        self.issue_id = issue['issue']

    def end_issue(self, issue):
        issue_id = issue['issue']
        assert issue_id == self.issue_id
        if self.start_time:
            if self.issue['commit']:
                print "%s in progress? %s" % (round_timedelta(self.now - self.start_time), issue_display_url(self.issue_id))
                self._end("IN_PROGRESS", self.now)
            else:
                # There are various reasons why the commit box may have been unchecked, including that
                # the issue may be closed now.  If those are not from a user, then that's a bug in rietveld
                # and out of scope for this script.  For now we're just ignoring cases where the box was unchecked.
                # We could log an _end() event, but which time would we use?  last_modified has at best a weak
                # corolation with the issue close or CQ checkbox.
                self._reset()
        # _reset() shouldn't be needed, but just in case.
        self._reset()
        self.issue_id = None
        self.issue = None

    def process_issue(self, issue, now):
        issue_id = issue['issue']

        if re.search('^NOTRY=[tT][rR][uU][eE]$', issue['description'], re.M):
            self.no_try_issues.add(issue_id)

        analyzer.start_issue(issue, now)
        for message in issue['messages']:
            analyzer.process_message(message)
            # print "%(date)s: %(text)s" % message
        analyzer.end_issue(issue)

    # FIXME: Why am I writing my own table formating?  This must exist in python...
    def format_line(self, values, aligns, widths):
        values = map(str, values)
        args = zip(values, aligns, widths)
        return " ".join([apply(align, (value, width)) for value, align, width in args])

    def print_attempts_table(self):
        result_getter = operator.attrgetter('result')
        attempts = sorted(self.attempts, key=result_getter)

        col_aligns = (str.rjust, str.ljust, str.rjust, str.rjust)
        col_widths = (5, 20, 16, 16)
        col_headers = ("count", "result", "mean duration", "median duration")
        print
        print "CQ Attempts by result (there can be more than one attempt on the same patch)"
        print self.format_line(col_headers, col_aligns, col_widths)
        print "-" * (sum(col_widths) + len(col_widths) - 1)
        for result, group_itr in itertools.groupby(attempts, result_getter):
            group = list(group_itr)
            durations = map(operator.attrgetter('duration'), group)
            mean = sum(durations, timedelta()) / len(group)
            durations_seconds = map(timedelta.total_seconds, durations)
            median_seconds = numpy.median(durations_seconds)
            median = timedelta(seconds=round(median_seconds))
            values = (len(group), result, round_timedelta(mean), median)
            print self.format_line(values, col_aligns, col_widths)
        print


print "Fetching and analyzing %d most recent issues..." % target_count
analyzer = CQAnalyzer()

total_fetch_count = 0
log_every = 100
issue_count = 0
cursor = None
url_counts = collections.Counter()
matching_issue_ids = []
while issue_count < target_count:
    search_results = fetch_recent_issues(per_request_search_limit, cursor)
    cursor = search_results['cursor']
    total_fetch_count += per_request_search_limit

    if args.base_url_stats:
        url_counts.update([result['base_url'] for result in search_results['results']])

    for result in search_results['results']:
        if not matches_target_base_url(result['base_url']):
            continue
        issue_count += 1
        if args.progress and (issue_count % log_every == 0):
             print "%d/%d" % (issue_count, target_count)
        matching_issue_ids.append(result['issue'])

#for i, issue_id in enumerate(matching_issue_ids):
#    if i % 100 == 0:
#      print 'Now fetching', i
#    issue, now = fetch_issue(issue_id)

pool = multiprocessing.Pool(100)
for issue, now in pool.map_async(fetch_issue, matching_issue_ids).get(99999):
  analyzer.process_issue(issue, now)

if args.base_url_stats:
    print
    for url, count in url_counts.most_common():
        matches_target = matches_target_base_url(url)
        print "%s %s %s" % (matches_target, count, url)

print
print "Found %s %s issues (in the %s most recent issues)." % (issue_count, base_url_shortname, total_fetch_count)
cq_issues = set([attempt.issue_id for attempt in analyzer.attempts])
cq_issue_count = len(cq_issues)
cq_patches = set([attempt.issue_patch_string for attempt in analyzer.attempts])
cq_patch_count = len(cq_patches)
print "%s issues (%d%%) (%d patches) tried the CQ, resulting in %d CQ attempts." % (
    cq_issue_count, 100 * cq_issue_count / issue_count, cq_patch_count, len(analyzer.attempts))

no_try_issues = len(cq_issues & analyzer.no_try_issues)
gave_up_patches = len(cq_patches & analyzer.manual_commits)
print "%s of those (%d%%) were committed manually (gave up). %s (%d%%) were marked NOTRY." % (
    gave_up_patches, 100 * gave_up_patches / cq_issue_count,
    no_try_issues, 100 * no_try_issues / cq_issue_count)

analyzer.print_attempts_table()

identifier_getter = operator.attrgetter('issue_patch_string')
attempts = sorted(analyzer.attempts, key=identifier_getter)
extra_user_clicks = 0
eventually_success_durations = []
eventually_success_attempts = []

for identifier, group_itr in itertools.groupby(attempts, identifier_getter):
    group = list(group_itr)
    if group[0].issue_id in analyzer.no_try_issues:
        continue
    results = map(operator.attrgetter('result'), group)
    # print "%s(%s): %s %s" % (identifier, len(group), total_time, results)
    if "SUCCESS" in results:
        # "RESTART" events may not require a user re-click?
        user_initiated = filter(lambda result: result != "RESTART", results)
        extra_user_clicks += len(user_initiated) - 1
        durations = map(operator.attrgetter('duration'), group)
        eventually_success_durations.append(sum(durations, timedelta()).total_seconds())
        eventually_success_attempts.append(len(group))


print "%s CQ starts (%d%%) were due a previous false-negative (CQ landed a patch it previously rejected)" % (extra_user_clicks, 100 * extra_user_clicks / len(analyzer.attempts))

print 'Patches which eventually land percentiles:'
for i in [10, 25, 50, 75, 90, 95, 99]:
  eventually_success_durations_p = round(numpy.percentile(eventually_success_durations, i)/(60*60), 3)
  eventually_success_attempts_p = numpy.percentile(eventually_success_attempts, i)
  print "%d: %.3f hrs" % (i, eventually_success_durations_p)
	#!/usr/bin/python
	# sudo apt-get install pip; sudo pip install requests request_cache
	import argparse
	import collections
	from datetime import timedelta, datetime
	import itertools
	import json
	import numpy
	import operator
	import re
	import requests
	import requests_cache
	import time
	import sys
	import multiprocessing

	parser = argparse.ArgumentParser()
	parser.add_argument("--clear", help="clear the cache before starting", action="store_true")
	parser.add_argument("--progress", help="Show progres", action="store_true")
	parser.add_argument("--base-url-stats", help="print base_url matching stats", action="store_true")
	parser.add_argument("--chrome", action='store_true')
	parser.add_argument('--count', type=int, default=2000, help='number of issues to fetch')
	args = parser.parse_args()


	def make_throttle_hook(timeout=1.0):
	def hook(response, args, *kwargs):
	print response.url
	if not getattr(response, 'from_cache', False):
	print "sleeping"
	time.sleep(timeout)
	return response
	return hook


	# Rietveld allows up to 1000 results per search query. I don't know if that's better for the server or not.
	per_request_search_limit = min(int(args.count/2), 1000)
	# Currently looking at the last 2000 entries.
	target_count = args.count
	# Only looking at looking at Blink patches (eventually we should support switching these):
	if args.chrome:
	base_url_shortname = "chrome"
	target_base_url_regexps = [
	re.compile(r"^svn://svn\.chromium\.org/chrome/trunk/src"),
	re.compile(r"^https://chromium\.googlesource\.com/chromium/chromium/src(?:\.git)?(?:@[\w\-]+)?$"),
	re.compile(r"^http://src\.chromium\.org/chrome/trunk/src"),
	]
	else:
	base_url_shortname = "blink"
	target_base_url_regexps = [
	re.compile(r"^svn://svn\.chromium\.org/blink/(?:trunk/?)?$"),
	re.compile(r"^https://chromium\.googlesource\.com/chromium/blink(?:\.git)?(?:@[\w\-]+)?$"),
	re.compile(r"^http://src\.chromium\.org/blink/(?:trunk/?)?$"),
	]
	# It seems reasonable to expect that rietveld can handle 10 query-per-second load from this script.
	# Do not reduce this value with asking someone on the infrastructure team first.
	seconds_between_requests = 0.1


	requests_cache.install_cache('cq')
	session = requests_cache.CachedSession()
	if args.clear:
	print "Clearing cache..."
	session.cache.clear()
	session.hooks = {'response': make_throttle_hook(seconds_between_requests)}


	def fetch_recent_issues(limit, cursor=None):
	search_url = "https://codereview.chromium.org/search?format=json&limit=%d" % limit
	if cursor:
	search_url += "&cursor=%s" % cursor
	response = session.get(search_url)
	return response.json()

	def matches_target_base_url(url):
	for regexp in target_base_url_regexps:
	match = regexp.match(url)
	if match:
	return True
	return False

	def issue_patch_string(issue_id, patch_id):
	return "%s:%s" % (issue_id, patch_id)

	def issue_display_url(issue_id):
	return "https://codereview.chromium.org/%s" % issue_id

	def issue_url(issue_id):
	return "https://codereview.chromium.org/api/%s?messages=true" % issue_id

	def date_from_iso_str(iso_str):
	# 2013-10-17 16:43:04.391480
	return datetime.strptime(iso_str, "%Y-%m-%d %H:%M:%S.%f")

	def round_timedelta(delta):
	# We never care about the milliseconds when printing timedeltas:
	return timedelta(seconds=round(delta.total_seconds()))

	def fetch_issue(issue_id):
	url = issue_url(issue_id)
	json = session.get(url).json()
	cache_key = session.cache._url_to_key(url)
	_, now = session.cache.get_response_and_time(cache_key)
	return json, now


	class CQAttempt(object):
	def __init__(self):
	self.result = None
	self.patch_id = None
	self.issue_id = None
	self.author = None
	self.start_time = None
	self.end_time = None

	@property
	def issue_patch_string(self):
	return issue_patch_string(self.issue_id, self.patch_id)

	@property
	def duration(self):
	return self.end_time - self.start_time


	class CQAnalyzer(object):
	def __init__(self):
	self.state = None
	self.issue_id = None
	self.author = None
	self.patch_id = None
	self.start_time = None
	self.attempts = []
	self.manual_commits = set()
	self.no_try_issues = set()
	# issue_id -> time since CQ start
	self.in_progress = {}

	start_regexp = re.compile("""CQ is trying da patch. Follow status at
	https://chromium-status.appspot.com/cq/(?P<author>.*)/(?P<issue_id>\d+)/(?P<patch_id>\d+)""")
	committed_regexp = re.compile("Change committed as (?P<revision>\d+)")
	manual_commit_regexp = re.compile("Committed patchset #(?P<patch_id>\d+) manually as r(?P<revision>\d+)")

	def _reset(self):
	self.state = None
	self.author = None
	self.patch_id = None
	self.start_time = None

	def _start(self, patch_id, start_time, author=None):
	if self.state == "START" and self.patch_id == patch_id:
	#print "Restarted for patchset %s on %s" % (patch_id, issue_display_url(self.issue_id))
	self._end("RESTART", start_time)
	self.state = "START"
	self.patch_id = patch_id
	self.author = author
	assert start_time
	self.start_time = start_time

	def _end(self, result, end_time):
	if self.state != "START":
	print "ERROR: %s for %s without start??" % (result, issue_display_url(self.issue_id))
	self._reset()
	return
	attempt = CQAttempt()
	attempt.result = result
	attempt.patch_id = self.patch_id
	attempt.issue_id = self.issue_id
	attempt.start_time = self.start_time
	attempt.end_time = end_time
	self.attempts.append(attempt)
	self._reset()

	def _start_and_end(self, result, date):
	self._start(result, date)
	self._end(result, date)

	def _manual_commit(self, patch_id):
	self.manual_commits.add(issue_patch_string(self.issue_id, patch_id))

	def process_message(self, message):
	text = message['text']
	date = date_from_iso_str(message['date'])

	match = self.manual_commit_regexp.match(text)
	if match:
	if self.start_time:
	self._end("MANUAL_COMMIT", date_from_iso_str(self.issue['modified']))
	patch_number = int(match.group('patch_id'))
	# gcl dcommit talks in terms of patch numbers 1-5, we need to map those to patchset ids:
	patch_id = self.issue['patchsets'][patch_number - 1]
	self._manual_commit(patch_id)
	return

	if message['sender'] != 'commit-bot@chromium.org':
	return

	match = self.start_regexp.match(text)
	if match:
	self._start(match.group('patch_id'), date, match.group('author'))
	return
	match = self.committed_regexp.match(text)
	if match:
	self._end("SUCCESS", date)
	return

	cq_end_messages = {
	"TRY": "Retried try job",
	"APPLY": "Failed to apply patch",
	"COMPILE": "Sorry for I got bad news for ya",
	"DESCRIPTION_CHANGED": "Commit queue rejected this change because the description",
	"REVIEWERS_CHANGED": "List of reviewers changed.", # This is too conservative.
	"PATCH_CHANGED": "Commit queue failed due to new patchset.", # User caused.
	# FAILED_TO_TRIGGER is a very serious failure, unclear why it happens!
	"FAILED_TO_TRIGGER": "Failed to trigger a try job on",
	# BINARY_FILE is just a bug in the CQ.
	"BINARY_FILE": "Can't process patch for file",
	# Unclear why UPDATE_STEP happens. Likely broken bots, shouldn't fail patches!
	"UPDATE_STEP": "Step \"update\" is always a major failure.",
	}
	for result, message_text in cq_end_messages.items():
	if text.startswith(message_text):
	self._end(result, date)
	return

	cq_fail_to_start_messsages = {
	"NO_COMMENTS": "No comments yet.",
	"NO_LGTM": "No LGTM from a valid reviewer yet.",
	}
	for result, message_text in cq_fail_to_start_messsages.items():
	if text.startswith(message_text):
	self._start_and_end(result, date)
	return

	print "Error %s has unknown message: %s" % (issue_display_url(self.issue_id), text)
	self._end("UNKNOWN", date)

	def start_issue(self, issue, now):
	# We may be dealing with cached data, so cache our concept of "now"
	self.now = now
	self.issue = issue
	self.issue_id = issue['issue']

	def end_issue(self, issue):
	issue_id = issue['issue']
	assert issue_id == self.issue_id
	if self.start_time:
	if self.issue['commit']:
	print "%s in progress? %s" % (round_timedelta(self.now - self.start_time), issue_display_url(self.issue_id))
	self._end("IN_PROGRESS", self.now)
	else:
	# There are various reasons why the commit box may have been unchecked, including that
	# the issue may be closed now. If those are not from a user, then that's a bug in rietveld
	# and out of scope for this script. For now we're just ignoring cases where the box was unchecked.
	# We could log an _end() event, but which time would we use? last_modified has at best a weak
	# corolation with the issue close or CQ checkbox.
	self._reset()
	# _reset() shouldn't be needed, but just in case.
	self._reset()
	self.issue_id = None
	self.issue = None

	def process_issue(self, issue, now):
	issue_id = issue['issue']

	if re.search('^NOTRY=[tT][rR][uU][eE]$', issue['description'], re.M):
	self.no_try_issues.add(issue_id)

	analyzer.start_issue(issue, now)
	for message in issue['messages']:
	analyzer.process_message(message)
	# print "%(date)s: %(text)s" % message
	analyzer.end_issue(issue)

	# FIXME: Why am I writing my own table formating? This must exist in python...
	def format_line(self, values, aligns, widths):
	values = map(str, values)
	args = zip(values, aligns, widths)
	return " ".join([apply(align, (value, width)) for value, align, width in args])

	def print_attempts_table(self):
	result_getter = operator.attrgetter('result')
	attempts = sorted(self.attempts, key=result_getter)

	col_aligns = (str.rjust, str.ljust, str.rjust, str.rjust)
	col_widths = (5, 20, 16, 16)
	col_headers = ("count", "result", "mean duration", "median duration")
	print
	print "CQ Attempts by result (there can be more than one attempt on the same patch)"
	print self.format_line(col_headers, col_aligns, col_widths)
	print "-" * (sum(col_widths) + len(col_widths) - 1)
	for result, group_itr in itertools.groupby(attempts, result_getter):
	group = list(group_itr)
	durations = map(operator.attrgetter('duration'), group)
	mean = sum(durations, timedelta()) / len(group)
	durations_seconds = map(timedelta.total_seconds, durations)
	median_seconds = numpy.median(durations_seconds)
	median = timedelta(seconds=round(median_seconds))
	values = (len(group), result, round_timedelta(mean), median)
	print self.format_line(values, col_aligns, col_widths)
	print


	print "Fetching and analyzing %d most recent issues..." % target_count
	analyzer = CQAnalyzer()

	total_fetch_count = 0
	log_every = 100
	issue_count = 0
	cursor = None
	url_counts = collections.Counter()
	matching_issue_ids = []
	while issue_count < target_count:
	search_results = fetch_recent_issues(per_request_search_limit, cursor)
	cursor = search_results['cursor']
	total_fetch_count += per_request_search_limit

	if args.base_url_stats:
	url_counts.update([result['base_url'] for result in search_results['results']])

	for result in search_results['results']:
	if not matches_target_base_url(result['base_url']):
	continue
	issue_count += 1
	if args.progress and (issue_count % log_every == 0):
	print "%d/%d" % (issue_count, target_count)
	matching_issue_ids.append(result['issue'])

	#for i, issue_id in enumerate(matching_issue_ids):
	# if i % 100 == 0:
	# print 'Now fetching', i
	# issue, now = fetch_issue(issue_id)

	pool = multiprocessing.Pool(100)
	for issue, now in pool.map_async(fetch_issue, matching_issue_ids).get(99999):
	analyzer.process_issue(issue, now)

	if args.base_url_stats:
	print
	for url, count in url_counts.most_common():
	matches_target = matches_target_base_url(url)
	print "%s %s %s" % (matches_target, count, url)

	print
	print "Found %s %s issues (in the %s most recent issues)." % (issue_count, base_url_shortname, total_fetch_count)
	cq_issues = set([attempt.issue_id for attempt in analyzer.attempts])
	cq_issue_count = len(cq_issues)
	cq_patches = set([attempt.issue_patch_string for attempt in analyzer.attempts])
	cq_patch_count = len(cq_patches)
	print "%s issues (%d%%) (%d patches) tried the CQ, resulting in %d CQ attempts." % (
	cq_issue_count, 100 * cq_issue_count / issue_count, cq_patch_count, len(analyzer.attempts))

	no_try_issues = len(cq_issues & analyzer.no_try_issues)
	gave_up_patches = len(cq_patches & analyzer.manual_commits)
	print "%s of those (%d%%) were committed manually (gave up). %s (%d%%) were marked NOTRY." % (
	gave_up_patches, 100 * gave_up_patches / cq_issue_count,
	no_try_issues, 100 * no_try_issues / cq_issue_count)

	analyzer.print_attempts_table()

	identifier_getter = operator.attrgetter('issue_patch_string')
	attempts = sorted(analyzer.attempts, key=identifier_getter)
	extra_user_clicks = 0
	eventually_success_durations = []
	eventually_success_attempts = []

	for identifier, group_itr in itertools.groupby(attempts, identifier_getter):
	group = list(group_itr)
	if group[0].issue_id in analyzer.no_try_issues:
	continue
	results = map(operator.attrgetter('result'), group)
	# print "%s(%s): %s %s" % (identifier, len(group), total_time, results)
	if "SUCCESS" in results:
	# "RESTART" events may not require a user re-click?
	user_initiated = filter(lambda result: result != "RESTART", results)
	extra_user_clicks += len(user_initiated) - 1
	durations = map(operator.attrgetter('duration'), group)
	eventually_success_durations.append(sum(durations, timedelta()).total_seconds())
	eventually_success_attempts.append(len(group))


	print "%s CQ starts (%d%%) were due a previous false-negative (CQ landed a patch it previously rejected)" % (extra_user_clicks, 100 * extra_user_clicks / len(analyzer.attempts))

	print 'Patches which eventually land percentiles:'
	for i in [10, 25, 50, 75, 90, 95, 99]:
	eventually_success_durations_p = round(numpy.percentile(eventually_success_durations, i)/(60*60), 3)
	eventually_success_attempts_p = numpy.percentile(eventually_success_attempts, i)
	print "%d: %.3f hrs" % (i, eventually_success_durations_p)