Skip to content

Instantly share code, notes, and snippets.

@steveloughran
Created January 21, 2022 17:40
Show Gist options
  • Save steveloughran/7dc1e68220db67327b781b345b42c0b8 to your computer and use it in GitHub Desktop.
Save steveloughran/7dc1e68220db67327b781b345b42c0b8 to your computer and use it in GitHub Desktop.
manifest committer test runs on GCS vs ABFS

GCS TestCommitterLoadManifestsStage


220121 17:13:40.558:INFO [org.apache.hadoop.mapreduce.lib.output.committer.manifest.AbstractManifestCommitterTest] Aggregate FileSystem Statistics counters=((directories_created=34)
(files_created=20)
(files_deleted=42)
(job_stage_create_target_dirs=1)
(job_stage_load_manifests=1)
(job_stage_setup=1)
(op_create=20)
(op_create_directories=1)
(op_delete=83)
(op_get_file_status=46)
(op_get_file_status.failures=22)
(op_list_files=2)
(op_list_status=4)
(op_load_all_manifests=1)
(op_load_manifest=20)
(op_mkdirs=66)
(op_msync=1)
(op_prepare_dir_ancestors=1)
(op_rename=40)
(stream_read_bytes=279474)
(stream_read_close_operations=60)
(stream_read_operations=40)
(stream_read_operations_incomplete=40)
(stream_read_total_bytes=279474)
(stream_write_bytes=139737)
(task_stage_save_manifest=20)
(task_stage_save_task_manifest=20)
(task_stage_setup=20));

gauges=();

minimums=((job_stage_create_target_dirs.min=5251)
(job_stage_load_manifests.min=3159)
(job_stage_setup.min=2681)
(op_create_directories.min=5247)
(op_delete.min=215)
(op_get_file_status.failures.min=226)
(op_get_file_status.min=223)
(op_list_status.min=229)
(op_load_all_manifests.min=2921)
(op_load_manifest.min=355)
(op_mkdirs.min=880)
(op_msync.min=0)
(op_prepare_dir_ancestors.min=1)
(op_rename.min=8456)
(task_stage_save_manifest.min=38024)
(task_stage_save_task_manifest.min=18143)
(task_stage_setup.min=19399));

maximums=((job_stage_create_target_dirs.max=5251)
(job_stage_load_manifests.max=3159)
(job_stage_setup.max=2681)
(op_create_directories.max=5247)
(op_delete.max=13112)
(op_get_file_status.failures.max=6661)
(op_get_file_status.max=223)
(op_list_status.max=243)
(op_load_all_manifests.max=2921)
(op_load_manifest.max=2914)
(op_mkdirs.max=31417)
(op_msync.max=0)
(op_prepare_dir_ancestors.max=1)
(op_rename.max=19719)
(task_stage_save_manifest.max=53080)
(task_stage_save_task_manifest.max=32702)
(task_stage_setup.max=35467));

means=((job_stage_create_target_dirs.mean=(samples=1, sum=5251, mean=5251.0000))
(job_stage_load_manifests.mean=(samples=1, sum=3159, mean=3159.0000))
(job_stage_setup.mean=(samples=1, sum=2681, mean=2681.0000))
(op_create_directories.mean=(samples=1, sum=5247, mean=5247.0000))
(op_delete.mean=(samples=41, sum=311680, mean=7601.9512))
(op_get_file_status.failures.mean=(samples=22, sum=73803, mean=3354.6818))
(op_get_file_status.mean=(samples=1, sum=223, mean=223.0000))
(op_list_status.mean=(samples=2, sum=472, mean=236.0000))
(op_load_all_manifests.mean=(samples=1, sum=2921, mean=2921.0000))
(op_load_manifest.mean=(samples=20, sum=28962, mean=1448.1000))
(op_mkdirs.mean=(samples=32, sum=460335, mean=14385.4688))
(op_msync.mean=(samples=1, sum=0, mean=0.0000))
(op_prepare_dir_ancestors.mean=(samples=1, sum=1, mean=1.0000))
(op_rename.mean=(samples=20, sum=294766, mean=14738.3000))
(task_stage_save_manifest.mean=(samples=20, sum=947133, mean=47356.6500))
(task_stage_save_task_manifest.mean=(samples=20, sum=566356, mean=28317.8000))
(task_stage_setup.mean=(samples=20, sum=484879, mean=24243.9500)));

[INFO] Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 108.232 s - in com.google.cloud.hadoop.fs.gcs.commit.TestCommitterLoadManifestsStage

abfs tests

this test uses the default of 200 manifests, on gcs I had to cut down to 20 as the test was taking too long

2022-01-21 17:31:49,210 INFO  [JUnit]: manifest.AbstractManifestCommitterTest (AbstractManifestCommitterTest.java:dumpFileSystemIOStatistics(445)) - Aggregate FileSystem Statistics counters=((action_http_delete_request=402)
(action_http_delete_request.failures=200)
(action_http_get_request=202)
(action_http_head_request=207)
(action_http_head_request.failures=202)
(action_http_put_request=1013)
(bytes_received=1530857)
(bytes_sent=1530857)
(connections_made=1824)
(directories_created=213)
(files_created=200)
(get_responses=1824)
(job_stage_create_target_dirs=1)
(job_stage_load_manifests=1)
(job_stage_setup=1)
(op_create=200)
(op_create_directories=1)
(op_delete=803)
(op_get_file_status=409)
(op_get_file_status.failures=202)
(op_list_status=2)
(op_load_all_manifests=1)
(op_load_manifest=200)
(op_mkdirs=425)
(op_msync=1)
(op_open=200)
(op_prepare_dir_ancestors=1)
(op_rename=400)
(send_requests=1013)
(task_stage_save_manifest=200)
(task_stage_save_task_manifest=200)
(task_stage_setup=200));

gauges=();

minimums=((action_http_delete_request.failures.min=25)
(action_http_delete_request.min=36)
(action_http_get_request.min=24)
(action_http_head_request.failures.min=24)
(action_http_head_request.min=23)
(action_http_put_request.min=24)
(job_stage_create_target_dirs.min=197)
(job_stage_load_manifests.min=1256)
(job_stage_setup.min=292)
(op_create_directories.min=176)
(op_delete.min=26)
(op_get_file_status.failures.min=24)
(op_get_file_status.min=25)
(op_list_status.min=23)
(op_load_all_manifests.min=1165)
(op_load_manifest.min=27)
(op_mkdirs.min=26)
(op_msync.min=0)
(op_prepare_dir_ancestors.min=1)
(op_rename.min=43)
(task_stage_save_manifest.min=181)
(task_stage_save_task_manifest.min=102)
(task_stage_setup.min=54));

maximums=((action_http_delete_request.failures.max=394)
(action_http_delete_request.max=220)
(action_http_get_request.max=388)
(action_http_head_request.failures.max=504)
(action_http_head_request.max=30)
(action_http_put_request.max=980)
(job_stage_create_target_dirs.max=197)
(job_stage_load_manifests.max=1256)
(job_stage_setup.max=292)
(op_create_directories.max=176)
(op_delete.max=396)
(op_get_file_status.failures.max=505)
(op_get_file_status.max=25)
(op_list_status.max=34)
(op_load_all_manifests.max=1165)
(op_load_manifest.max=416)
(op_mkdirs.max=303)
(op_msync.max=0)
(op_prepare_dir_ancestors.max=1)
(op_rename.max=342)
(task_stage_save_manifest.max=2000)
(task_stage_save_task_manifest.max=1913)
(task_stage_setup.max=689));

means=((action_http_delete_request.failures.mean=(samples=200, sum=15281, mean=76.4050))
(action_http_delete_request.mean=(samples=202, sum=15334, mean=75.9109))
(action_http_get_request.mean=(samples=202, sum=25327, mean=125.3812))
(action_http_head_request.failures.mean=(samples=202, sum=26793, mean=132.6386))
(action_http_head_request.mean=(samples=5, sum=128, mean=25.6000))
(action_http_put_request.mean=(samples=1013, sum=89841, mean=88.6881))
(job_stage_create_target_dirs.mean=(samples=1, sum=197, mean=197.0000))
(job_stage_load_manifests.mean=(samples=1, sum=1256, mean=1256.0000))
(job_stage_setup.mean=(samples=1, sum=292, mean=292.0000))
(op_create_directories.mean=(samples=1, sum=176, mean=176.0000))
(op_delete.mean=(samples=401, sum=30698, mean=76.5536))
(op_get_file_status.failures.mean=(samples=202, sum=26911, mean=133.2228))
(op_get_file_status.mean=(samples=1, sum=25, mean=25.0000))
(op_list_status.mean=(samples=2, sum=57, mean=28.5000))
(op_load_all_manifests.mean=(samples=1, sum=1165, mean=1165.0000))
(op_load_manifest.mean=(samples=200, sum=28287, mean=141.4350))
(op_mkdirs.mean=(samples=212, sum=13913, mean=65.6274))
(op_msync.mean=(samples=1, sum=0, mean=0.0000))
(op_prepare_dir_ancestors.mean=(samples=1, sum=1, mean=1.0000))
(op_rename.mean=(samples=200, sum=16714, mean=83.5700))
(task_stage_save_manifest.mean=(samples=200, sum=119189, mean=595.9450))
(task_stage_save_task_manifest.mean=(samples=200, sum=86521, mean=432.6050))
(task_stage_setup.mean=(samples=200, sum=39727, mean=198.6350)));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment