Last active
November 20, 2020 04:49
-
-
Save lilinghai/d1077dcb247b4e07cfd9b03f6dd07d04 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import codecs | |
import os | |
import re | |
import json | |
import time | |
from datetime import datetime | |
from influxdb import InfluxDBClient | |
from error_log import _LOG_PATH_PAT | |
ci_base_link = "https://internal.pingcap.net/idc-jenkins/blue/organizations/jenkins/{job}/detail/{job}/{" \ | |
"build}/pipeline" | |
cli = InfluxDBClient(port="30409", host="172.16.4.4") | |
rule_build_issues = dict( | |
generic=dict( | |
plugin_ver_mismatch="[FATAL].*?plugin was built with a different version of package" | |
), | |
test=dict( | |
ghpr_test_fail=r"test result: FAILED", | |
ut_fail_not_found_detail=r'^\[.*?\] FAIL$', | |
integration_test_fail=r'level=fatal msg=', | |
make_test_fail=r"make: \*.*?Error", | |
), | |
# This section will be specially logged besides main DB | |
env=dict( | |
dns_resolve_failure=r"Could not resolve host", | |
pod_vanish=r"\[get\] for kind: \[Pod\] with name: \[(.*?)\] in namespace: \[jenkins-ci\] failed", | |
http_500=r"500 Internal Server Error", | |
kill_signal=r"signal killed|signal interrupt|Killing processes", | |
core_dumped=r"core dumped", | |
rewrite_error=r"Rewrite error", | |
connection_closed=r"java\.nio\.channels\.ClosedByInterruptException", | |
connection_reset=r"[Cc]onnection reset", | |
socket_timeout=r"java.net.SocketTimeoutException", | |
socket_close=r"java.net.SocketException: Socket closed", | |
) | |
) | |
def flatten_rule_group_dict(rule_group_dict): | |
return {group + "_" + k: rule_group_dict[group][k] for group in rule_group_dict for k in rule_group_dict[group]} | |
def flatten_count_problem_types(count_dict): | |
return [count_dict[group][k] for group in count_dict for k in count_dict[group]] | |
def get_issue_json_desc(count_problem_dict): | |
""" | |
construct json description string to be used in SQL insert statement | |
:param count_problem_dict: | |
:return: str | |
""" | |
return json.dumps(count_problem_dict) | |
def gen_classified_issue_count(job, build, match_occr_as_count=False): | |
log_path = _LOG_PATH_PAT.format(job, build) | |
count_problem_types = { | |
group: {issue: 0 for issue in rule_build_issues[group]} | |
for group in rule_build_issues | |
} | |
if not os.path.isfile(log_path): | |
return count_problem_types | |
base_link = ci_base_link | |
with codecs.open(log_path, 'r', 'utf-8') as f: | |
lines = f.readlines() | |
# print(len(lines)) | |
matched_misc, matched_specific, matched_other = False, False, False | |
funcs = dict() | |
for index, line in enumerate(lines): | |
# print(index) | |
if job == "tidb_ghpr_unit_test" or job == "tidb_ghpr_check" or job == "tidb_ghpr_check_2": | |
if "FAIL:" in line or "PANIC:" in line or "WARNING: DATA RACE" in line: | |
# print(line) | |
if "FAIL: TestT " in line: | |
# print("skip TestT") | |
continue | |
if "WARNING: DATA RACE" in line: | |
fail_line = lines[index + 2].strip() | |
fail_func = ':'.join(["DATA_RACE", fail_line.split()[1]]) | |
if fail_func in funcs: | |
funcs[fail_func] += 1 | |
else: | |
funcs[fail_func] = 1 | |
continue | |
if False: | |
# print "drop {}".format(line.strip()) | |
continue | |
else: | |
# 由于不同的 pr 代码行数可能会变,干扰统计,这里需要去除原信息中的行信息 | |
fail_line = line.strip() | |
fail_code_pos = fail_line.split()[2].split(":")[0] | |
fail_func_name = fail_line.split()[3] | |
fail_func = ':'.join([fail_code_pos, fail_func_name]) | |
if fail_func in funcs: | |
funcs[fail_func] += 1 | |
else: | |
funcs[fail_func] = 1 | |
continue | |
for pname in rule_build_issues['generic']: | |
if re.search(rule_build_issues['generic'][pname], line): | |
matched_other = True | |
matched_specific = True | |
if match_occr_as_count: | |
count_problem_types['generic'][pname] += 1 | |
else: | |
count_problem_types['generic'][pname] = 1 | |
# print_issue_description(base_link, build, job, line, pname) | |
for pname in rule_build_issues['test']: | |
if re.search(rule_build_issues['test'][pname], line): | |
matched_other = True | |
matched_specific = True | |
if match_occr_as_count: | |
count_problem_types['test'][pname] += 1 | |
else: | |
count_problem_types['test'][pname] = 1 | |
# print_issue_description(base_link, build, job, line, pname) | |
for pname in rule_build_issues['env']: | |
if matched_other: | |
continue | |
if re.search(rule_build_issues['env'][pname], line): | |
if pname == 'misc': | |
matched_misc = True | |
else: | |
matched_specific = True | |
if match_occr_as_count: | |
count_problem_types['env'][pname] += 1 | |
else: | |
count_problem_types['env'][pname] = 1 | |
# print_issue_description(base_link, build, job, line, pname) | |
if matched_misc and matched_specific: # revoke 'misc' classification | |
count_problem_types['env']['misc'] = 0 | |
# print(count_problem_types) | |
# print(funcs) | |
return count_problem_types, funcs | |
def get_ci_link(job, build): | |
return ci_base_link.format(job=job, build=build) | |
def get_job(job, start_time, end_time): | |
start = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S') | |
end = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S') | |
query = """SELECT "ghprbPullId", "job","buildId" FROM "jenkins_ci_result" | |
WHERE (time >= '{}' AND time < '{}') | |
AND ("job" =~ {}) | |
AND ("testPurpose" != 'night-ci') | |
AND ("ghprbPullId" != '0') | |
AND("ghprbPullId" != '') | |
AND ("ghprbCommentBody" = '/run-all-tests') | |
AND ("status" = 'FAILURE')""".format( | |
start, end, job | |
) | |
print(query) | |
res = cli.query(query, database="jenkins_ci_result") | |
pr_res = {} | |
for m in res.get_points(measurement="jenkins_ci_result"): | |
if not pr_res.get(m["ghprbPullId"]): | |
pr_res[m["ghprbPullId"]] = [] | |
pr_res[m["ghprbPullId"]].append({"job": m["job"], "buildId": m["buildId"]}) | |
for k, v in pr_res.items(): | |
print(k, v) | |
return pr_res | |
def analyze_fail(job, start_time, end_time): | |
pr_jobs = get_job(job, start_time, end_time) | |
index = 0 | |
for pr, jobs in pr_jobs.items(): | |
print(f"process {index}/{len(pr_jobs)} pr {pr}") | |
index += 1 | |
for job in jobs: | |
cres, funcs = gen_classified_issue_count(job["job"], job["buildId"]) | |
if funcs: | |
job["reason"] = funcs.keys() | |
else: | |
reason = [] | |
for v in cres.values(): | |
for k, vv in v.items(): | |
if vv > 0: | |
reason.append(k) | |
job["reason"] = reason | |
print(get_ci_link(job["job"], job["buildId"]), job["reason"]) | |
if __name__ == "__main__": | |
end = time.time() | |
# 调整想要的时间 | |
start = end - 54000 | |
analyze_fail("/tidb_ghpr/", start, end) | |
analyze_fail("/tikv_ghpr/", start, end) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment