Skip to content

Instantly share code, notes, and snippets.

@huonw
Created June 14, 2020 23:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save huonw/5b15172499251ce88ac42a6a926e6162 to your computer and use it in GitHub Desktop.
Save huonw/5b15172499251ce88ac42a6a926e6162 to your computer and use it in GitHub Desktop.
Find flaky tests in a Buildkite pipeline
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Analysing test failures on Buildkite, using JUnit XML artifacts\n",
"\n",
"This notebook helps one explore test failures on Buildkite CI, by looking at failures in JUnit XML files.\n",
"\n",
"It tries to be moderately generic, but has only been used under the following conditions:\n",
"\n",
"- a token with read access to everything exists in `~/.buildkite/read_token`\n",
"- test results are recorded in JUnit XML files and uploaded as artifacts matching the glob `junit-*.xml` (see `JUNIT_RE` below)\n",
"- the JUnit XML files use `pytest`'s particular format\n",
"\n",
"## Configuration"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"org = \"stellar\"\n",
"pipeline = \"stellargraph-public\"\n",
"JUNIT_RE = re.compile(r\"^junit-.*\\.xml$\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Definitions"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install pybuildkite"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {},
"outputs": [],
"source": [
"from pybuildkite.buildkite import Buildkite\n",
"import os\n",
"import requests\n",
"import xml.etree.ElementTree as ET\n",
"import re\n",
"import pandas as pd\n",
"import warnings\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"buildkite = Buildkite()\n",
"with open(os.path.expanduser(\"~/.buildkite/read_token\")) as tok_file:\n",
" token = tok_file.read().strip()\n",
" buildkite.set_access_token(token)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"def get_builds(count):\n",
" builds = {}\n",
" next_page = 1\n",
" while next_page and len(builds) < count:\n",
" print(f\"downloading page {next_page} (found {len(builds)}/{count} builds)\")\n",
" resp = buildkite.builds().list_all_for_pipeline(org, pipeline, page=next_page, with_pagination=True)\n",
" for build in resp.body:\n",
" builds[build[\"number\"]] = build\n",
" next_page = resp.next_page\n",
" return builds"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
"def junit_xml_artifacts(build_number):\n",
" artifacts = buildkite.artifacts().list_artifacts_for_build(org, pipeline, build_number)\n",
" return [\n",
" art\n",
" for art in artifacts\n",
" if JUNIT_RE.match(art[\"path\"])\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"def download_artifact(art):\n",
" return requests.get(art[\"download_url\"], headers={\"Authorization\": f\"Bearer {token}\"})"
]
},
{
"cell_type": "code",
"execution_count": 148,
"metadata": {},
"outputs": [],
"source": [
"def check(elem, expected, context):\n",
" if elem.tag != expected:\n",
" warnings.warn(f\"found tag {elem.tag!r}, expected {expected!r} (context: {context})\")\n",
" return False\n",
" \n",
" return True\n",
"\n",
"def find_failures(xml_string, context):\n",
" testsuites = ET.fromstring(xml_string) \n",
" if not check(testsuites, \"testsuites\", context):\n",
" return\n",
"\n",
" for testsuite in testsuites:\n",
" if not check(testsuite, \"testsuite\", context):\n",
" continue\n",
" \n",
" for testcase in testsuite:\n",
" if not check(testcase, \"testcase\", context):\n",
" continue\n",
"\n",
" has_failure = any(x.tag == \"failure\" for x in testcase)\n",
" if has_failure:\n",
" yield testcase"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {},
"outputs": [],
"source": [
"def summarise_testcase(testcase):\n",
" return (testcase.attrib[\"classname\"], testcase.attrib[\"name\"])"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [],
"source": [
"def failures_for_build(build_number):\n",
" return [\n",
" (build_number,) + summarise_testcase(testcase)\n",
" for art in junit_xml_artifacts(build_number)\n",
" for testcase in find_failures(download_artifact(art).text, f\"build number {build_number}\")\n",
" ]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data retrieval"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"downloading page 1 (found 0/1000 builds)\n",
"downloading page 2 (found 100/1000 builds)\n",
"downloading page 3 (found 200/1000 builds)\n",
"downloading page 4 (found 300/1000 builds)\n",
"downloading page 5 (found 400/1000 builds)\n",
"downloading page 6 (found 500/1000 builds)\n",
"downloading page 7 (found 600/1000 builds)\n",
"downloading page 8 (found 700/1000 builds)\n",
"downloading page 9 (found 800/1000 builds)\n",
"downloading page 10 (found 900/1000 builds)\n"
]
}
],
"source": [
"most_recent_builds = get_builds(1000)"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys([4746, 4745, 4744, 4743, 4742, 4741, 4740, 4739, 4738, 4737, 4736, 4735, 4734, 4733, 4732, 4731, 4730, 4729, 4728, 4727, 4726, 4725, 4724, 4723, 4722, 4721, 4720, 4719, 4718, 4717, 4716, 4715, 4714, 4713, 4712, 4711, 4710, 4709, 4708, 4707, 4706, 4705, 4704, 4703, 4702, 4701, 4700, 4699, 4698, 4697, 4696, 4695, 4694, 4693, 4692, 4691, 4690, 4689, 4688, 4687, 4686, 4685, 4684, 4683, 4682, 4681, 4680, 4679, 4678, 4677, 4676, 4675, 4674, 4673, 4672, 4671, 4670, 4669, 4668, 4667, 4666, 4665, 4664, 4663, 4662, 4661, 4660, 4659, 4658, 4657, 4656, 4655, 4654, 4653, 4652, 4651, 4650, 4649, 4648, 4647, 4646, 4645, 4644, 4643, 4642, 4641, 4640, 4639, 4638, 4637, 4636, 4635, 4634, 4633, 4632, 4631, 4630, 4629, 4628, 4627, 4626, 4625, 4624, 4623, 4622, 4621, 4620, 4619, 4618, 4617, 4616, 4615, 4614, 4613, 4612, 4611, 4610, 4609, 4608, 4607, 4606, 4605, 4604, 4603, 4602, 4601, 4600, 4599, 4598, 4597, 4596, 4595, 4594, 4593, 4592, 4591, 4590, 4589, 4588, 4587, 4586, 4585, 4584, 4583, 4582, 4581, 4580, 4579, 4578, 4577, 4576, 4575, 4574, 4573, 4572, 4571, 4570, 4569, 4568, 4567, 4566, 4565, 4564, 4563, 4562, 4561, 4560, 4559, 4558, 4557, 4556, 4555, 4554, 4553, 4552, 4551, 4550, 4549, 4548, 4547, 4546, 4545, 4544, 4543, 4542, 4541, 4540, 4539, 4538, 4537, 4536, 4535, 4534, 4533, 4532, 4531, 4530, 4529, 4528, 4527, 4526, 4525, 4524, 4523, 4522, 4521, 4520, 4519, 4518, 4517, 4516, 4515, 4514, 4513, 4512, 4511, 4510, 4509, 4508, 4507, 4506, 4505, 4504, 4503, 4502, 4501, 4500, 4499, 4498, 4497, 4496, 4495, 4494, 4493, 4492, 4491, 4490, 4489, 4488, 4487, 4486, 4485, 4484, 4483, 4482, 4481, 4480, 4479, 4478, 4477, 4476, 4475, 4474, 4473, 4472, 4471, 4470, 4469, 4468, 4467, 4466, 4465, 4464, 4463, 4462, 4461, 4460, 4459, 4458, 4457, 4456, 4455, 4454, 4453, 4452, 4451, 4450, 4449, 4448, 4447, 4446, 4445, 4444, 4443, 4442, 4441, 4440, 4439, 4438, 4437, 4436, 4435, 4434, 4433, 4432, 4431, 4430, 4429, 4428, 4427, 4426, 4425, 4424, 4423, 4422, 4421, 4420, 4419, 4418, 4417, 4416, 4415, 4414, 4413, 4412, 4411, 4410, 4409, 4408, 4407, 4406, 4405, 4404, 4403, 4402, 4401, 4400, 4399, 4398, 4397, 4396, 4395, 4394, 4393, 4392, 4391, 4390, 4389, 4388, 4387, 4386, 4385, 4384, 4383, 4382, 4381, 4380, 4379, 4378, 4377, 4376, 4375, 4374, 4373, 4372, 4371, 4370, 4369, 4368, 4367, 4366, 4365, 4364, 4363, 4362, 4361, 4360, 4359, 4358, 4357, 4356, 4355, 4354, 4353, 4352, 4351, 4350, 4349, 4348, 4347, 4346, 4345, 4344, 4343, 4342, 4341, 4340, 4339, 4338, 4337, 4336, 4335, 4334, 4333, 4332, 4331, 4330, 4329, 4328, 4327, 4326, 4325, 4324, 4323, 4322, 4321, 4320, 4319, 4318, 4317, 4316, 4315, 4314, 4313, 4312, 4311, 4310, 4309, 4308, 4307, 4306, 4305, 4304, 4303, 4302, 4301, 4300, 4299, 4298, 4297, 4296, 4295, 4294, 4293, 4292, 4291, 4290, 4289, 4288, 4287, 4286, 4285, 4284, 4283, 4282, 4281, 4280, 4279, 4278, 4277, 4276, 4275, 4274, 4273, 4272, 4271, 4270, 4269, 4268, 4267, 4266, 4265, 4264, 4263, 4262, 4261, 4260, 4259, 4258, 4257, 4256, 4255, 4254, 4253, 4252, 4251, 4250, 4249, 4248, 4247, 4246, 4245, 4244, 4243, 4242, 4241, 4240, 4239, 4238, 4237, 4236, 4235, 4234, 4233, 4232, 4231, 4230, 4229, 4228, 4227, 4226, 4225, 4224, 4223, 4222, 4221, 4220, 4219, 4218, 4217, 4216, 4215, 4214, 4213, 4212, 4211, 4210, 4209, 4208, 4207, 4206, 4205, 4204, 4203, 4202, 4201, 4200, 4199, 4198, 4197, 4196, 4195, 4194, 4193, 4192, 4191, 4190, 4189, 4188, 4187, 4186, 4185, 4184, 4183, 4182, 4181, 4180, 4179, 4178, 4177, 4176, 4175, 4174, 4173, 4172, 4171, 4170, 4169, 4168, 4167, 4166, 4165, 4164, 4163, 4162, 4161, 4160, 4159, 4158, 4157, 4156, 4155, 4154, 4153, 4152, 4151, 4150, 4149, 4148, 4147, 4146, 4145, 4144, 4143, 4142, 4141, 4140, 4139, 4138, 4137, 4136, 4135, 4134, 4133, 4132, 4131, 4130, 4129, 4128, 4127, 4126, 4125, 4124, 4123, 4122, 4121, 4120, 4119, 4118, 4117, 4116, 4115, 4114, 4113, 4112, 4111, 4110, 4109, 4108, 4107, 4106, 4105, 4104, 4103, 4102, 4101, 4100, 4099, 4098, 4097, 4096, 4095, 4094, 4093, 4092, 4091, 4090, 4089, 4088, 4087, 4086, 4085, 4084, 4083, 4082, 4081, 4080, 4079, 4078, 4077, 4076, 4075, 4074, 4073, 4072, 4071, 4070, 4069, 4068, 4067, 4066, 4065, 4064, 4063, 4062, 4061, 4060, 4059, 4058, 4057, 4056, 4055, 4054, 4053, 4052, 4051, 4050, 4049, 4048, 4047, 4046, 4045, 4044, 4043, 4042, 4041, 4040, 4039, 4038, 4037, 4036, 4035, 4034, 4033, 4032, 4031, 4030, 4029, 4028, 4027, 4026, 4025, 4024, 4023, 4022, 4021, 4020, 4019, 4018, 4017, 4016, 4015, 4014, 4013, 4012, 4011, 4010, 4009, 4008, 4007, 4006, 4005, 4004, 4003, 4002, 4001, 4000, 3999, 3998, 3997, 3996, 3995, 3994, 3993, 3992, 3991, 3990, 3989, 3988, 3987, 3986, 3985, 3984, 3983, 3982, 3981, 3980, 3979, 3978, 3977, 3976, 3975, 3974, 3973, 3972, 3971, 3970, 3969, 3968, 3967, 3966, 3965, 3964, 3963, 3962, 3961, 3960, 3959, 3958, 3957, 3956, 3955, 3954, 3953, 3952, 3951, 3950, 3949, 3948, 3947, 3946, 3945, 3944, 3943, 3942, 3941, 3940, 3939, 3938, 3937, 3936, 3935, 3934, 3933, 3932, 3931, 3930, 3929, 3928, 3927, 3926, 3925, 3924, 3923, 3922, 3921, 3920, 3919, 3918, 3917, 3916, 3915, 3914, 3913, 3912, 3911, 3910, 3909, 3908, 3907, 3906, 3905, 3904, 3903, 3902, 3901, 3900, 3899, 3898, 3897, 3896, 3895, 3894, 3893, 3892, 3891, 3890, 3889, 3888, 3887, 3886, 3885, 3884, 3883, 3882, 3881, 3880, 3879, 3878, 3877, 3876, 3875, 3874, 3873, 3872, 3871, 3870, 3869, 3868, 3867, 3866, 3865, 3864, 3863, 3862, 3861, 3860, 3859, 3858, 3857, 3856, 3855, 3854, 3853, 3852, 3851, 3850, 3849, 3848, 3847, 3846, 3845, 3844, 3843, 3842, 3841, 3840, 3839, 3838, 3837, 3836, 3835, 3834, 3833, 3832, 3831, 3830, 3829, 3828, 3827, 3826, 3825, 3824, 3823, 3822, 3821, 3820, 3819, 3818, 3817, 3816, 3815, 3814, 3813, 3812, 3811, 3810, 3809, 3808, 3807, 3806, 3805, 3804, 3803, 3802, 3801, 3800, 3799, 3798, 3797, 3796, 3795, 3794, 3793, 3792, 3791, 3790, 3789, 3788, 3787, 3786, 3785, 3784, 3783, 3782, 3781, 3780, 3779, 3778, 3777, 3776, 3775, 3774, 3773, 3772, 3771, 3770, 3769, 3768, 3767, 3766, 3765, 3764, 3763, 3762, 3761, 3760, 3759, 3758, 3757, 3756, 3755, 3754, 3753, 3752, 3751, 3750, 3749, 3748, 3747])"
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"most_recent_builds.keys()"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [],
"source": [
"from concurrent.futures import ThreadPoolExecutor\n",
"pool = ThreadPoolExecutor(max_workers=100)"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/wil9dw/.pyenv/versions/3.6.9/lib/python3.6/site-packages/ipykernel_launcher.py:3: UserWarning: found tag 'Error', expected 'testsuites' (context: build number 4598)\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n",
"/Users/wil9dw/.pyenv/versions/3.6.9/lib/python3.6/site-packages/ipykernel_launcher.py:3: UserWarning: found tag 'Error', expected 'testsuites' (context: build number 4604)\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2min 52s, sys: 21.9 s, total: 3min 14s\n",
"Wall time: 14min 22s\n"
]
}
],
"source": [
"%%time # this takes a while\n",
"raw_failures = list(pool.map(failures_for_build, most_recent_builds.keys()))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### DataFrame creation"
]
},
{
"cell_type": "code",
"execution_count": 255,
"metadata": {},
"outputs": [],
"source": [
"builds_df_all = pd.DataFrame.from_dict(most_recent_builds, orient=\"index\")\n",
"builds_df = pd.get_dummies(builds_df_all[[\"state\", \"branch\", \"commit\"]], columns=[\"state\"])"
]
},
{
"cell_type": "code",
"execution_count": 291,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>build</th>\n",
" <th>classname</th>\n",
" <th>name</th>\n",
" <th>branch</th>\n",
" <th>commit</th>\n",
" <th>state_canceled</th>\n",
" <th>state_failed</th>\n",
" <th>state_passed</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4744</td>\n",
" <td>tests.layer.test_knowledge_graph</td>\n",
" <td>test_model_rankings[RotH]</td>\n",
" <td>feature/1569-probabilities</td>\n",
" <td>aca59902b6813fa144ddf5223f92c843543ec3b5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4743</td>\n",
" <td>tests.layer.test_knowledge_graph</td>\n",
" <td>test_model_rankings[RotH]</td>\n",
" <td>feature/1569-probabilities</td>\n",
" <td>aca59902b6813fa144ddf5223f92c843543ec3b5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4740</td>\n",
" <td>tests.test_aaa_on_gpu</td>\n",
" <td>test_on_gpu_when_requested</td>\n",
" <td>testing-branch-for-scheduled-builds-DO_NOT_DELETE</td>\n",
" <td>7deaa1daf2e07f03614f2de2c855b6138800f2b9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4740</td>\n",
" <td>tests.test_aaa_on_gpu</td>\n",
" <td>test_on_gpu_when_requested</td>\n",
" <td>testing-branch-for-scheduled-builds-DO_NOT_DELETE</td>\n",
" <td>7deaa1daf2e07f03614f2de2c855b6138800f2b9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4740</td>\n",
" <td>tests.test_aaa_on_gpu</td>\n",
" <td>test_on_gpu_when_requested</td>\n",
" <td>testing-branch-for-scheduled-builds-DO_NOT_DELETE</td>\n",
" <td>7deaa1daf2e07f03614f2de2c855b6138800f2b9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" build classname name \\\n",
"0 4744 tests.layer.test_knowledge_graph test_model_rankings[RotH] \n",
"1 4743 tests.layer.test_knowledge_graph test_model_rankings[RotH] \n",
"2 4740 tests.test_aaa_on_gpu test_on_gpu_when_requested \n",
"3 4740 tests.test_aaa_on_gpu test_on_gpu_when_requested \n",
"4 4740 tests.test_aaa_on_gpu test_on_gpu_when_requested \n",
"\n",
" branch \\\n",
"0 feature/1569-probabilities \n",
"1 feature/1569-probabilities \n",
"2 testing-branch-for-scheduled-builds-DO_NOT_DELETE \n",
"3 testing-branch-for-scheduled-builds-DO_NOT_DELETE \n",
"4 testing-branch-for-scheduled-builds-DO_NOT_DELETE \n",
"\n",
" commit state_canceled state_failed \\\n",
"0 aca59902b6813fa144ddf5223f92c843543ec3b5 0 0 \n",
"1 aca59902b6813fa144ddf5223f92c843543ec3b5 0 1 \n",
"2 7deaa1daf2e07f03614f2de2c855b6138800f2b9 0 1 \n",
"3 7deaa1daf2e07f03614f2de2c855b6138800f2b9 0 1 \n",
"4 7deaa1daf2e07f03614f2de2c855b6138800f2b9 0 1 \n",
"\n",
" state_passed \n",
"0 1 \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 "
]
},
"execution_count": 291,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"failures = pd.DataFrame([f for fs in raw_failures for f in fs], columns=(\"build\", \"classname\", \"name\"))\n",
"failures = failures.join(builds_df, \"build\")\n",
"failures.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Analysis\n",
"\n",
"### Descriptive stats"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>state</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>passed</th>\n",
" <td>591</td>\n",
" </tr>\n",
" <tr>\n",
" <th>failed</th>\n",
" <td>387</td>\n",
" </tr>\n",
" <tr>\n",
" <th>canceled</th>\n",
" <td>22</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" state\n",
"passed 591\n",
"failed 387\n",
"canceled 22"
]
},
"execution_count": 180,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# build states\n",
"builds_df_all[\"state\"].value_counts().to_frame()"
]
},
{
"cell_type": "code",
"execution_count": 190,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"167"
]
},
"execution_count": 190,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# builds that failed with test failures\n",
"len(failures.build.value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 193,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" <tr>\n",
" <th>build</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4716</th>\n",
" <td>66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4717</th>\n",
" <td>66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4053</th>\n",
" <td>73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4278</th>\n",
" <td>78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3826</th>\n",
" <td>86</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3824</th>\n",
" <td>95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4032</th>\n",
" <td>168</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4322</th>\n",
" <td>213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4284</th>\n",
" <td>285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4282</th>\n",
" <td>351</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0\n",
"build \n",
"4716 66\n",
"4717 66\n",
"4053 73\n",
"4278 78\n",
"3826 86\n",
"3824 95\n",
"4032 168\n",
"4322 213\n",
"4284 285\n",
"4282 351"
]
},
"execution_count": 193,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# highest number of test failures per build\n",
"failures.groupby(\"build\").size().sort_values().to_frame().tail(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Flaky tests - failures in passed builds\n",
"\n",
"We can find flaky tests by looking for test failures within builds that passed. This means that someone retried the build and it eventually worked."
]
},
{
"cell_type": "code",
"execution_count": 226,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>builds</th>\n",
" <th>state_failed</th>\n",
" <th>state_passed</th>\n",
" </tr>\n",
" <tr>\n",
" <th>classname</th>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">tests.core.test_convert</th>\n",
" <th>test_columnar_convert_invalid_input</th>\n",
" <td>[4137, 4137, 4137, 4135, 4135]</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_columnar_convert_ndarray</th>\n",
" <td>[4486, 4486, 4486, 4485, 4485, 4485, 4284, 428...</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_columnar_convert_rowframe</th>\n",
" <td>[4486, 4486, 4486, 4485, 4485, 4485, 4284, 428...</td>\n",
" <td>18</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_columnar_convert_rowframe_ndarray_invalid</th>\n",
" <td>[4284, 4284, 4284, 4283, 4283, 4283, 4282, 428...</td>\n",
" <td>12</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_convert_edges_type_column[False]</th>\n",
" <td>[4248, 4248, 4248, 4247, 4247, 4247]</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" builds \\\n",
"classname name \n",
"tests.core.test_convert test_columnar_convert_invalid_input [4137, 4137, 4137, 4135, 4135] \n",
" test_columnar_convert_ndarray [4486, 4486, 4486, 4485, 4485, 4485, 4284, 428... \n",
" test_columnar_convert_rowframe [4486, 4486, 4486, 4485, 4485, 4485, 4284, 428... \n",
" test_columnar_convert_rowframe_ndarray_invalid [4284, 4284, 4284, 4283, 4283, 4283, 4282, 428... \n",
" test_convert_edges_type_column[False] [4248, 4248, 4248, 4247, 4247, 4247] \n",
"\n",
" state_failed \\\n",
"classname name \n",
"tests.core.test_convert test_columnar_convert_invalid_input 5 \n",
" test_columnar_convert_ndarray 23 \n",
" test_columnar_convert_rowframe 18 \n",
" test_columnar_convert_rowframe_ndarray_invalid 12 \n",
" test_convert_edges_type_column[False] 6 \n",
"\n",
" state_passed \n",
"classname name \n",
"tests.core.test_convert test_columnar_convert_invalid_input 0 \n",
" test_columnar_convert_ndarray 0 \n",
" test_columnar_convert_rowframe 0 \n",
" test_columnar_convert_rowframe_ndarray_invalid 0 \n",
" test_convert_edges_type_column[False] 0 "
]
},
"execution_count": 226,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"failures_by_state = failures.groupby([\"classname\", \"name\"]).agg(builds=(\"build\", list), state_failed=(\"state_failed\", \"sum\"), state_passed=(\"state_passed\", \"sum\"))\n",
"failures_by_state.head()"
]
},
{
"cell_type": "code",
"execution_count": 227,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"state_failed 2600\n",
"state_passed 64\n",
"dtype: int64"
]
},
"execution_count": 227,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"failures_by_state[[\"state_failed\", \"state_passed\"]].sum()"
]
},
{
"cell_type": "code",
"execution_count": 228,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>builds</th>\n",
" <th>state_failed</th>\n",
" <th>state_passed</th>\n",
" </tr>\n",
" <tr>\n",
" <th>classname</th>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">tests.core.test_utils</th>\n",
" <th>test_normalize_adj</th>\n",
" <td>[4237, 4237, 4148, 3967, 3837]</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_normalized_laplacian</th>\n",
" <td>[4256, 4256]</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">tests.data.test_edge_splitter.TestEdgeSplitterHeterogeneous</th>\n",
" <th>test_split_data_by_edge_type</th>\n",
" <td>[4679, 4670, 4670, 4626, 4626, 4579, 4579, 451...</td>\n",
" <td>11</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_split_data_by_edge_type_and_attribute</th>\n",
" <td>[4679, 4611, 4471, 4167, 3898, 3865, 3862, 377...</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">tests.layer.test_knowledge_graph</th>\n",
" <th>test_model_rankings[RotH]</th>\n",
" <td>[4744, 4743, 4737, 4737, 4737, 4737, 4732]</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_model_rankings[RotatE]</th>\n",
" <td>[4683, 4683, 4284, 4284, 4284, 4282, 4282, 4282]</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_rotate</th>\n",
" <td>[4499, 4353, 4353, 4185, 4185, 4113, 4113, 411...</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>tests.mapper.test_node_mappers</th>\n",
" <th>test_nodemapper_isolated_nodes</th>\n",
" <td>[4735, 4630, 4384, 4342, 4330, 4284, 4284, 428...</td>\n",
" <td>13</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">tests.reproducibility.test_graphsage</th>\n",
" <th>test_nai[False]</th>\n",
" <td>[4691, 4284, 4284, 4284, 4282, 4282, 4282, 4245]</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_nai[True]</th>\n",
" <td>[4691, 4649, 4642, 4343, 4343, 4306, 4306, 428...</td>\n",
" <td>15</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_unsupervised[False]</th>\n",
" <td>[4730, 4730, 4728, 4728, 4726, 4706, 4691, 469...</td>\n",
" <td>31</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_unsupervised[True]</th>\n",
" <td>[4730, 4728, 4691, 4690, 4690, 4688, 4683, 468...</td>\n",
" <td>28</td>\n",
" <td>11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" builds \\\n",
"classname name \n",
"tests.core.test_utils test_normalize_adj [4237, 4237, 4148, 3967, 3837] \n",
" test_normalized_laplacian [4256, 4256] \n",
"tests.data.test_edge_splitter.TestEdgeSplitterH... test_split_data_by_edge_type [4679, 4670, 4670, 4626, 4626, 4579, 4579, 451... \n",
" test_split_data_by_edge_type_and_attribute [4679, 4611, 4471, 4167, 3898, 3865, 3862, 377... \n",
"tests.layer.test_knowledge_graph test_model_rankings[RotH] [4744, 4743, 4737, 4737, 4737, 4737, 4732] \n",
" test_model_rankings[RotatE] [4683, 4683, 4284, 4284, 4284, 4282, 4282, 4282] \n",
" test_rotate [4499, 4353, 4353, 4185, 4185, 4113, 4113, 411... \n",
"tests.mapper.test_node_mappers test_nodemapper_isolated_nodes [4735, 4630, 4384, 4342, 4330, 4284, 4284, 428... \n",
"tests.reproducibility.test_graphsage test_nai[False] [4691, 4284, 4284, 4284, 4282, 4282, 4282, 4245] \n",
" test_nai[True] [4691, 4649, 4642, 4343, 4343, 4306, 4306, 428... \n",
" test_unsupervised[False] [4730, 4730, 4728, 4728, 4726, 4706, 4691, 469... \n",
" test_unsupervised[True] [4730, 4728, 4691, 4690, 4690, 4688, 4683, 468... \n",
"\n",
" state_failed \\\n",
"classname name \n",
"tests.core.test_utils test_normalize_adj 3 \n",
" test_normalized_laplacian 0 \n",
"tests.data.test_edge_splitter.TestEdgeSplitterH... test_split_data_by_edge_type 11 \n",
" test_split_data_by_edge_type_and_attribute 4 \n",
"tests.layer.test_knowledge_graph test_model_rankings[RotH] 5 \n",
" test_model_rankings[RotatE] 6 \n",
" test_rotate 5 \n",
"tests.mapper.test_node_mappers test_nodemapper_isolated_nodes 13 \n",
"tests.reproducibility.test_graphsage test_nai[False] 7 \n",
" test_nai[True] 15 \n",
" test_unsupervised[False] 31 \n",
" test_unsupervised[True] 28 \n",
"\n",
" state_passed \n",
"classname name \n",
"tests.core.test_utils test_normalize_adj 2 \n",
" test_normalized_laplacian 2 \n",
"tests.data.test_edge_splitter.TestEdgeSplitterH... test_split_data_by_edge_type 4 \n",
" test_split_data_by_edge_type_and_attribute 5 \n",
"tests.layer.test_knowledge_graph test_model_rankings[RotH] 2 \n",
" test_model_rankings[RotatE] 2 \n",
" test_rotate 4 \n",
"tests.mapper.test_node_mappers test_nodemapper_isolated_nodes 4 \n",
"tests.reproducibility.test_graphsage test_nai[False] 1 \n",
" test_nai[True] 10 \n",
" test_unsupervised[False] 17 \n",
" test_unsupervised[True] 11 "
]
},
"execution_count": 228,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"flaky = failures_by_state[failures_by_state.state_passed > 0]\n",
"flaky"
]
},
{
"cell_type": "code",
"execution_count": 252,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"- `tests.core.test_utils` `test_normalize_adj` (total = 5, in failed builds = 3, in successful builds = 2)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3837\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3967\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4148\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4237 (2 times)\n",
"\n",
"- `tests.core.test_utils` `test_normalized_laplacian` (total = 2, in failed builds = 0, in successful builds = 2)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4256 (2 times)\n",
"\n",
"- `tests.data.test_edge_splitter.TestEdgeSplitterHeterogeneous` `test_split_data_by_edge_type` (total = 15, in failed builds = 11, in successful builds = 4)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3824\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4275 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4283\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4343 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4495\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4513\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4579 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4626 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4670 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4679\n",
"\n",
"- `tests.data.test_edge_splitter.TestEdgeSplitterHeterogeneous` `test_split_data_by_edge_type_and_attribute` (total = 9, in failed builds = 4, in successful builds = 5)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3773 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3862\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3865\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3898\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4167\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4471\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4611\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4679\n",
"\n",
"- `tests.layer.test_knowledge_graph` `test_model_rankings[RotH]` (total = 7, in failed builds = 5, in successful builds = 2)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4732\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4737 (4 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4743\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4744\n",
"\n",
"- `tests.layer.test_knowledge_graph` `test_model_rankings[RotatE]` (total = 8, in failed builds = 6, in successful builds = 2)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4282 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4284 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4683 (2 times)\n",
"\n",
"- `tests.layer.test_knowledge_graph` `test_rotate` (total = 9, in failed builds = 5, in successful builds = 4)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4112 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4113 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4185 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4353 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4499\n",
"\n",
"- `tests.mapper.test_node_mappers` `test_nodemapper_isolated_nodes` (total = 17, in failed builds = 13, in successful builds = 4)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3917 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4053 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4206 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4282 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4284 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4330\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4342\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4384\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4630\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4735\n",
"\n",
"- `tests.reproducibility.test_graphsage` `test_nai[False]` (total = 8, in failed builds = 7, in successful builds = 1)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4245\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4282 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4284 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4691\n",
"\n",
"- `tests.reproducibility.test_graphsage` `test_nai[True]` (total = 25, in failed builds = 15, in successful builds = 10)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3858\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3889\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3898 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3915\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3972\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3973\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3986 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4016\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4206 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4282 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4284 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4306 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4343 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4642\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4649\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4691\n",
"\n",
"- `tests.reproducibility.test_graphsage` `test_unsupervised[False]` (total = 48, in failed builds = 31, in successful builds = 17)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4051\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4058\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4060\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4065\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4128\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4129\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4132\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4135\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4140\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4152\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4162\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4193\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4198\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4282 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4284 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4637 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4639 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4642 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4648 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4649 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4652 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4654 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4683 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4688\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4690 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4691 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4706\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4726\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4728 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4730 (2 times)\n",
"\n",
"- `tests.reproducibility.test_graphsage` `test_unsupervised[True]` (total = 39, in failed builds = 28, in successful builds = 11)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/3931\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4051\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4054\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4058\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4060\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4128\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4129 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4132 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4140\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4152\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4198\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4282 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4284 (3 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4346\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4394\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4620 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4637 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4639\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4648 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4649 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4652\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4683 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4688\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4690 (2 times)\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4691\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4728\n",
" - https://buildkite.com/stellar/stellargraph-public/builds/4730\n"
]
}
],
"source": [
"# markdown output for copying into (GitHub) issues:\n",
"for ((classname, name), builds, fail, success) in flaky.itertuples():\n",
" print()\n",
" print(f\"- `{classname}` `{name}` (total = {fail + success}, in failed builds = {fail}, in successful builds = {success})\")\n",
" for b, count in pd.Series(builds).value_counts().sort_index().iteritems():\n",
" extra = \"\" if count == 1 else f\" ({count} times)\"\n",
" print(f\" - https://buildkite.com/stellar/stellargraph-public/builds/{b}{extra}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Flaky tests - failure and pass on a single commit\n",
"\n",
"An alternative way to find a flaky test is to look for multiple builds of a single commit, where some builds failed and some passed, and then finding the tests that failed within those builds. This works best if there's not too many configuration differences between the different builds."
]
},
{
"cell_type": "code",
"execution_count": 277,
"metadata": {},
"outputs": [],
"source": [
"builds_by_commit = builds_df.reset_index().groupby(\"commit\").agg(\n",
" builds=(\"index\", list),\n",
" state_passed=(\"state_passed\", \"sum\"), \n",
" state_failed=(\"state_failed\", \"sum\")\n",
")\n",
"passed_and_failed = builds_by_commit[(builds_by_commit.state_passed > 0) & (builds_by_commit.state_failed > 0)]"
]
},
{
"cell_type": "code",
"execution_count": 293,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>builds</th>\n",
" <th>state_passed</th>\n",
" <th>state_failed</th>\n",
" </tr>\n",
" <tr>\n",
" <th>commit</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0deb275def782953fd5c1efbd5ad61a66b961204</th>\n",
" <td>[4612, 4611]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13eb0d1ad94e4dc442754196de691f22810d13e7</th>\n",
" <td>[4570, 4569]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1f19ff356cef296cef0b57b4126147383b0431aa</th>\n",
" <td>[4150, 4148]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>231b64e0ad77c27a9063cf461411438bad31d1f6</th>\n",
" <td>[4649, 4648]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>450000411f834577d6ff0fd26e4a9eee8c5192fa</th>\n",
" <td>[3967, 3966]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4884deeb7a1c8872eed787716cc2381596e3d7d2</th>\n",
" <td>[4546, 4537, 4532, 4531]</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6333e1024f83e859ab1952f9317670844792d1ee</th>\n",
" <td>[4501, 4495, 4491]</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7774f8f1df0e394289bd6cd3de761efd78f830c4</th>\n",
" <td>[4653, 4652]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7af1707b7bf24060ef4d9f20f209a333a9e46f34</th>\n",
" <td>[4499, 4498]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7deaa1daf2e07f03614f2de2c855b6138800f2b9</th>\n",
" <td>[4740, 4739, 4698, 4697, 4685, 4684, 4661, 458...</td>\n",
" <td>31</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" builds \\\n",
"commit \n",
"0deb275def782953fd5c1efbd5ad61a66b961204 [4612, 4611] \n",
"13eb0d1ad94e4dc442754196de691f22810d13e7 [4570, 4569] \n",
"1f19ff356cef296cef0b57b4126147383b0431aa [4150, 4148] \n",
"231b64e0ad77c27a9063cf461411438bad31d1f6 [4649, 4648] \n",
"450000411f834577d6ff0fd26e4a9eee8c5192fa [3967, 3966] \n",
"4884deeb7a1c8872eed787716cc2381596e3d7d2 [4546, 4537, 4532, 4531] \n",
"6333e1024f83e859ab1952f9317670844792d1ee [4501, 4495, 4491] \n",
"7774f8f1df0e394289bd6cd3de761efd78f830c4 [4653, 4652] \n",
"7af1707b7bf24060ef4d9f20f209a333a9e46f34 [4499, 4498] \n",
"7deaa1daf2e07f03614f2de2c855b6138800f2b9 [4740, 4739, 4698, 4697, 4685, 4684, 4661, 458... \n",
"\n",
" state_passed state_failed \n",
"commit \n",
"0deb275def782953fd5c1efbd5ad61a66b961204 1 1 \n",
"13eb0d1ad94e4dc442754196de691f22810d13e7 1 1 \n",
"1f19ff356cef296cef0b57b4126147383b0431aa 1 1 \n",
"231b64e0ad77c27a9063cf461411438bad31d1f6 1 1 \n",
"450000411f834577d6ff0fd26e4a9eee8c5192fa 1 1 \n",
"4884deeb7a1c8872eed787716cc2381596e3d7d2 2 1 \n",
"6333e1024f83e859ab1952f9317670844792d1ee 1 2 \n",
"7774f8f1df0e394289bd6cd3de761efd78f830c4 1 1 \n",
"7af1707b7bf24060ef4d9f20f209a333a9e46f34 1 1 \n",
"7deaa1daf2e07f03614f2de2c855b6138800f2b9 31 7 "
]
},
"execution_count": 293,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"passed_and_failed.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 294,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>builds</th>\n",
" <th>state_failed</th>\n",
" <th>state_passed</th>\n",
" </tr>\n",
" <tr>\n",
" <th>classname</th>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>tests.core.test_utils</th>\n",
" <th>test_normalize_adj</th>\n",
" <td>[4148, 3967]</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">tests.data.test_edge_splitter.TestEdgeSplitterHeterogeneous</th>\n",
" <th>test_split_data_by_edge_type</th>\n",
" <td>[4495, 4343, 4343]</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_split_data_by_edge_type_and_attribute</th>\n",
" <td>[4611]</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>tests.datasets.test_datasets</th>\n",
" <th>test_dataset_download[METR_LA]</th>\n",
" <td>[4339, 4339]</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">tests.layer.test_knowledge_graph</th>\n",
" <th>test_model_rankings[RotH]</th>\n",
" <td>[4744, 4743, 4737, 4737, 4737, 4737]</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_rotate</th>\n",
" <td>[4499]</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>tests.mapper.test_node_mappers</th>\n",
" <th>test_nodemapper_isolated_nodes</th>\n",
" <td>[4630, 4384, 4342]</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">tests.reproducibility.test_graphsage</th>\n",
" <th>test_link_prediction[False]</th>\n",
" <td>[4726, 4649, 4495, 4062, 4062]</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_nai[True]</th>\n",
" <td>[4649, 4343, 4343]</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_unsupervised[False]</th>\n",
" <td>[4726, 4652, 4652, 4649, 4649, 4648, 4648]</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>test_unsupervised[True]</th>\n",
" <td>[4652, 4649, 4649, 4648, 4648, 3931]</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>tests.test_aaa_on_gpu</th>\n",
" <th>test_on_gpu_when_requested</th>\n",
" <td>[4740, 4740, 4740]</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>tests.utils.test_hyperbolic</th>\n",
" <th>test_poincare_ball_distance_self</th>\n",
" <td>[4740]</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" builds \\\n",
"classname name \n",
"tests.core.test_utils test_normalize_adj [4148, 3967] \n",
"tests.data.test_edge_splitter.TestEdgeSplitterH... test_split_data_by_edge_type [4495, 4343, 4343] \n",
" test_split_data_by_edge_type_and_attribute [4611] \n",
"tests.datasets.test_datasets test_dataset_download[METR_LA] [4339, 4339] \n",
"tests.layer.test_knowledge_graph test_model_rankings[RotH] [4744, 4743, 4737, 4737, 4737, 4737] \n",
" test_rotate [4499] \n",
"tests.mapper.test_node_mappers test_nodemapper_isolated_nodes [4630, 4384, 4342] \n",
"tests.reproducibility.test_graphsage test_link_prediction[False] [4726, 4649, 4495, 4062, 4062] \n",
" test_nai[True] [4649, 4343, 4343] \n",
" test_unsupervised[False] [4726, 4652, 4652, 4649, 4649, 4648, 4648] \n",
" test_unsupervised[True] [4652, 4649, 4649, 4648, 4648, 3931] \n",
"tests.test_aaa_on_gpu test_on_gpu_when_requested [4740, 4740, 4740] \n",
"tests.utils.test_hyperbolic test_poincare_ball_distance_self [4740] \n",
"\n",
" state_failed \\\n",
"classname name \n",
"tests.core.test_utils test_normalize_adj 2 \n",
"tests.data.test_edge_splitter.TestEdgeSplitterH... test_split_data_by_edge_type 3 \n",
" test_split_data_by_edge_type_and_attribute 1 \n",
"tests.datasets.test_datasets test_dataset_download[METR_LA] 2 \n",
"tests.layer.test_knowledge_graph test_model_rankings[RotH] 5 \n",
" test_rotate 1 \n",
"tests.mapper.test_node_mappers test_nodemapper_isolated_nodes 2 \n",
"tests.reproducibility.test_graphsage test_link_prediction[False] 5 \n",
" test_nai[True] 3 \n",
" test_unsupervised[False] 5 \n",
" test_unsupervised[True] 4 \n",
"tests.test_aaa_on_gpu test_on_gpu_when_requested 3 \n",
"tests.utils.test_hyperbolic test_poincare_ball_distance_self 1 \n",
"\n",
" state_passed \n",
"classname name \n",
"tests.core.test_utils test_normalize_adj 0 \n",
"tests.data.test_edge_splitter.TestEdgeSplitterH... test_split_data_by_edge_type 0 \n",
" test_split_data_by_edge_type_and_attribute 0 \n",
"tests.datasets.test_datasets test_dataset_download[METR_LA] 0 \n",
"tests.layer.test_knowledge_graph test_model_rankings[RotH] 1 \n",
" test_rotate 0 \n",
"tests.mapper.test_node_mappers test_nodemapper_isolated_nodes 1 \n",
"tests.reproducibility.test_graphsage test_link_prediction[False] 0 \n",
" test_nai[True] 0 \n",
" test_unsupervised[False] 2 \n",
" test_unsupervised[True] 2 \n",
"tests.test_aaa_on_gpu test_on_gpu_when_requested 0 \n",
"tests.utils.test_hyperbolic test_poincare_ball_distance_self 0 "
]
},
"execution_count": 294,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"failures_of_pass_fail = failures[failures.commit.isin(passed_and_failed.index)]\n",
"failures_of_pass_fail.groupby([\"classname\", \"name\"]).agg(builds=(\"build\", list), state_failed=(\"state_failed\", \"sum\"), state_passed=(\"state_passed\", \"sum\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment