Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
require 'json'
require 'net/http'
require 'optparse'
require 'set'
require 'uri'
options = {}
OptionParser.new do |opt|
opt.on('-s', '--start BUILD_NUMBER', 'Require start') { |o| options[:start] = o }
opt.on('-e', '--end BUILD_NUMBER', 'Require end') { |o| options[:end] = o }
end.parse!
puts "Will crawl builds from #{options[:start]} to #{options[:end]}"
all_failed_tests = []
(options[:start]..options[:end]).each do |job_number|
base_job_uri = "https://build.ci.opensearch.org/job/gradle-check/#{job_number}"
result = JSON.parse(Net::HTTP.get_response(URI.parse(base_job_uri + '/api/json')).body)['result']
# UNSTABLE means the build succeeded but at least one test needed to be
# retried. Many gradle-check runs fail when running against a PR because the
# newly introduced code has a problem. The developer then iterates on the PR
# until all problems are resolved. To filter out this noise we only consider
# UNSTABLE builds, and ignore failures. It is possible for gradle-check
# builds run against merged code to fail due to flaky tests and they would be
# missed here. However, I think that is a small minority of the cases and
# this approach should do pretty well at identifying flaky tests.
if result == 'UNSTABLE'
uri = URI.parse(base_job_uri + '/testReport/api/json?tree=suites[cases[status,className,name]]')
json = JSON.parse(Net::HTTP.get_response(uri).body)
# 'FAILED' means the test failed, just like the previous run.
# 'REGRESSION' means the test failed, but previously passed.
# See https://javadoc.jenkins.io/plugin/junit/hudson/tasks/junit/CaseResult.Status.html
failed_cases = json['suites'].map do |s|
s['cases'].select do |c|
c['status'] == 'REGRESSION' || c['status'] == 'FAILED'
end
end.flatten
failed_tests = failed_cases.map { |c| {'name' => "#{c['className']}.#{c['name']}", 'build' => job_number}}
all_failed_tests.push(failed_tests)
end
end
puts '------------------'
count = {}
all_failed_tests.flatten.each do |test|
unless count.include?(test['name'])
count[test['name']] = {'count' => 0, 'builds' => []}
end
count[test['name']]['count'] += 1
count[test['name']]['builds'].push(test['build'])
end
count.to_a.sort {|a,b| b[1]['count'] <=> a[1]['count'] }.each {|a| puts "#{a[1]['count']} #{a[0]} (#{a[1]['builds'].join(',')})" }
@andrross
Copy link
Author

andrross commented Dec 2, 2022

$  ruby flaky-test-finder.rb --s 6556 --e 7556
Will crawl builds from 6556 to 7556
------------------
41 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/Snapshot and Restore with repository-s3 using permanent credentials} (6561,6561,6561,6577,6587,6591,6591,6598,6645,6709,6711,6711,6717,6750,6751,6766,6778,6778,6779,6779,6779,6782,6879,6879,6880,6880,6952,6953,6953,7074,7074,7074,7080,7082,7082,7177,7200,7201,7224,7277,7310)
23 org.opensearch.index.ShardIndexingPressureConcurrentExecutionTests.testReplicaThreadedUpdateToShardLimitsAndRejections (6585,6681,6962,7046,7090,7095,7149,7149,7149,7158,7188,7206,7206,7253,7253,7253,7274,7274,7274,7327,7463,7483,7492)
22 org.opensearch.index.ShardIndexingPressureConcurrentExecutionTests.testCoordinatingPrimaryThreadedUpdateToShardLimitsAndRejections (6607,6616,6628,6700,6700,6720,6759,6759,6762,6828,6887,6971,6971,6975,7027,7112,7115,7168,7168,7202,7315,7315)
17 org.opensearch.cluster.allocation.AwarenessAllocationIT.testThreeZoneOneReplicaWithForceZoneValueAndLoadAwareness (6562,6601,6627,6717,6741,6908,6921,6925,7036,7047,7112,7149,7422,7447,7495,7517,7555)
11 org.opensearch.clustermanager.ClusterManagerTaskThrottlingIT.testTimeoutWhileThrottling (6556,6593,6594,6594,6598,6599,6601,6602,6602,6602,6742)
9 org.opensearch.snapshots.DedicatedClusterSnapshotRestoreIT.testIndexDeletionDuringSnapshotCreationInQueue (6790,6828,6965,7220,7256,7315,7361,7447,7543)
8 org.opensearch.cluster.service.MasterServiceTests.classMethod (6894,6894,6894,6894,7074,7074,7177,7177)
8 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/Try to create repository with broken endpoint override and named client} (6589,6709,6952,6952,6953,6953,7200,7277)
7 org.opensearch.index.IndexServiceTests.testAsyncTranslogTrimTaskOnClosedIndex (6769,7062,7077,7207,7453,7464,7517)
7 org.opensearch.indices.stats.IndexStatsIT.testFilterCacheStats (6585,7154,7183,7255,7292,7300,7551)
4 org.opensearch.cluster.coordination.AwarenessAttributeDecommissionIT.testNodesRemovedAfterZoneDecommission_ClusterManagerNotInToBeDecommissionedZone (6599,6602,6731,6771)
4 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/Register a repository with a non existing bucket} (6952,6953,7077,7320)
4 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/Register a repository with a non existing client} (6711,6711,6711,6952)
4 org.opensearch.action.bulk.BulkIntegrationIT.testDeleteIndexWhileIndexing (6624,6635,6723,6979)
4 org.opensearch.smoketest.SmokeTestMultiNodeClientYamlTestSuiteIT.test {yaml=pit/10_basic/Delete all} (7185,7212,7231,7342)
4 org.opensearch.cluster.service.MasterServiceTests.testThrottlingForMultipleTaskTypes (6894,6894,7074,7177)
4 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/Register a read-only repository with a non existing client} (6591,6591,6952,7201)
4 org.opensearch.clustermanager.ClusterManagerTaskThrottlingIT.testThrottlingForSingleNode (6593,6615,6664,6682)
3 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/teardown} (6766,6953,6956)
3 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/Restore a non existing snapshot} (6782,6952,7309)
3 org.opensearch.cluster.coordination.AwarenessAttributeDecommissionIT.testNodesRemovedAfterZoneDecommission_ClusterManagerInToBeDecommissionedZone (6606,6709,6895)
3 org.opensearch.index.shard.SegmentReplicationIndexShardTests.testNRTReplicaPromotedAsPrimary (6894,7091,7144)
3 org.opensearch.cluster.coordination.AwarenessAttributeDecommissionIT.testInvariantsAndLogsOnDecommissionedNodes (6738,6792,6825)
2 org.opensearch.action.admin.indices.create.ShrinkIndexIT.testShrinkIndexPrimaryTerm (6685,7406)
2 org.opensearch.gateway.QuorumGatewayIT.testQuorumRecovery (6562,7201)
2 org.opensearch.action.bulk.BulkIntegrationIT.testBulkWithWriteIndexAndRouting (6723,6979)
2 org.opensearch.action.admin.indices.create.ShrinkIndexIT.testCreateShrinkIndexToN (6685,7406)
2 org.opensearch.action.bulk.BulkIntegrationIT.testBulkWithGlobalDefaults (6723,6979)
2 org.opensearch.action.bulk.BulkIntegrationIT.testExternallySetAutoGeneratedTimestamp (6723,6979)
2 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/Register a read-only repository with a non existing bucket} (6766,7076)
2 org.opensearch.action.admin.indices.create.ShrinkIndexIT.testCreateShrinkIndex (6685,7406)
2 org.opensearch.http.SearchRestCancellationIT.testAutomaticCancellationDuringFetchPhase (7167,7463)
2 org.opensearch.action.admin.cluster.node.tasks.ResourceAwareTasksTests.testTaskResourceTrackingDuringTaskCancellation (6893,7166)
2 org.opensearch.action.admin.indices.create.ShrinkIndexIT.testCreateShrinkIndexFails (6685,7406)
1 org.opensearch.action.admin.indices.create.CreateIndexIT.classMethod (7464)
1 org.opensearch.repositories.s3.S3BlobStoreRepositoryTests.testSnapshotWithLargeSegmentFiles (6589)
1 org.opensearch.repositories.s3.S3BlobStoreRepositoryTests.testDeleteBlobs (6589)
1 org.opensearch.repositories.s3.S3BlobStoreRepositoryTests.testList (6589)
1 org.opensearch.repositories.s3.S3BlobStoreRepositoryTests.testMultipleSnapshotAndRollback (6589)
1 org.opensearch.monitor.fs.FsHealthServiceTests.testFailsHealthOnHungIOBeyondHealthyTimeout (6606)
1 org.opensearch.action.admin.cluster.tasks.PendingTasksBlocksIT.testPendingTasksWithClusterNotRecoveredBlock (6653)
1 org.opensearch.index.ShardIndexingPressureIT.testShardIndexingPressureTrackingDuringBulkWrites (6667)
1 org.opensearch.action.bulk.BulkIntegrationIT.testBulkIndexCreatesMapping (6723)
1 org.opensearch.cluster.decommission.DecommissionControllerTests.testTimesOut (6747)
1 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/Delete a non existing snapshot} (6758)
1 org.opensearch.persistent.PersistentTasksExecutorFullRestartIT.testFullClusterRestart (6764)
1 org.opensearch.client.PitIT.testDeleteAllAndListAllPits (6781)
1 org.opensearch.client.PitIT.testCreateAndDeletePit (6781)
1 org.opensearch.index.shard.SegmentReplicationIndexShardTests.testReplicaReceivesGenIncrease (6824)
1 org.opensearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT.test {yaml=repository_s3/20_repository_permanent_credentials/Get a non existing snapshot} (6953)
1 org.opensearch.client.ReindexIT.testReindexTask (6962)
1 org.opensearch.backwards.MixedClusterClientYamlTestSuiteIT.test {p0=search.aggregation/20_terms/string profiler via global ordinals} (6970)
1 org.opensearch.cluster.routing.allocation.decider.ConcurrentRecoveriesAllocationDeciderTests.testClusterConcurrentRecoveries (7022)
1 org.opensearch.search.aggregations.metrics.TDigestPercentilesIT.testMultiValuedFieldWithValueScriptReverse (7208)
1 org.opensearch.cluster.ClusterHealthIT.testHealthOnClusterManagerFailover (7272)
1 org.opensearch.search.SearchCancellationIT.testCancellationDuringFetchPhaseUsingRequestParameter (7318)
1 org.opensearch.indices.state.CloseWhileRelocatingShardsIT.testCloseWhileRelocatingShards (7345)
1 org.opensearch.action.admin.indices.create.SplitIndexIT.testCreateSplitIndex (7415)
1 org.opensearch.action.admin.indices.create.SplitIndexIT.testCreateSplitIndexToN (7415)
1 org.opensearch.repositories.azure.AzureBlobContainerRetriesTests.testReadBlobWithRetries (7422)
1 org.opensearch.action.admin.indices.create.CreateIndexIT.testCreateAndDeleteIndexConcurrently (7464)

@dblock
Copy link

dblock commented Dec 6, 2022

Added looking up GitHub issues and not requiring start/finish to look at the last 100 builds in https://gist.github.com/dblock/335e02867099be890802be0e95d0408c/revisions

@dblock
Copy link

dblock commented Dec 6, 2022

Turns out I wrote https://github.com/dblock/gradle-checks. We can merge the two if you want.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment