Skip to content

Instantly share code, notes, and snippets.

@BlueCocoa
Last active November 22, 2020 02:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save BlueCocoa/83333d5655a13b60400918e3a28e8a9f to your computer and use it in GitHub Desktop.
Save BlueCocoa/83333d5655a13b60400918e3a28e8a9f to your computer and use it in GitHub Desktop.
ncnn benchmark on Apple Silicon M1
$ ./benchmark/benchncnn
thread_policy_set error 46
loop_count = 4
num_threads = 8
powersave = 0
gpu_device = -1
cooling_down = 1
squeezenet min = 5.64 max = 6.24 avg = 5.88
squeezenet_int8 min = 8.93 max = 8.97 avg = 8.94
mobilenet min = 8.86 max = 8.99 avg = 8.91
mobilenet_int8 min = 18.42 max = 18.47 avg = 18.45
mobilenet_v2 min = 5.65 max = 5.93 avg = 5.80
mobilenet_v3 min = 4.76 max = 5.01 avg = 4.89
shufflenet min = 4.01 max = 4.48 avg = 4.25
shufflenet_v2 min = 4.46 max = 5.24 avg = 4.83
mnasnet min = 5.79 max = 6.28 avg = 5.99
proxylessnasnet min = 7.08 max = 7.45 avg = 7.25
efficientnet_b0 min = 8.99 max = 9.26 avg = 9.08
regnety_400m min = 9.08 max = 9.22 avg = 9.14
blazeface min = 3.09 max = 3.51 avg = 3.22
googlenet min = 28.02 max = 28.22 avg = 28.13
googlenet_int8 min = 37.95 max = 38.03 avg = 37.99
resnet18 min = 22.81 max = 22.87 avg = 22.84
resnet18_int8 min = 32.08 max = 32.10 avg = 32.09
alexnet min = 33.48 max = 33.94 avg = 33.80
vgg16 min = 81.68 max = 81.96 avg = 81.78
vgg16_int8 min = 196.31 max = 196.52 avg = 196.40
resnet50 min = 50.62 max = 50.66 avg = 50.63
resnet50_int8 min = 69.01 max = 69.21 avg = 69.10
squeezenet_ssd min = 21.53 max = 22.04 avg = 21.84
squeezenet_ssd_int8 min = 26.52 max = 26.56 avg = 26.54
mobilenet_ssd min = 22.14 max = 22.35 avg = 22.25
mobilenet_ssd_int8 min = 31.02 max = 31.11 avg = 31.07
mobilenet_yolo min = 40.97 max = 41.03 avg = 40.99
mobilenetv2_yolov3 min = 20.91 max = 21.50 avg = 21.17
yolov4-tiny min = 34.74 max = 35.24 avg = 34.96
$ ./benchncnn 4 4 0 -1 1
thread_policy_set error 46
loop_count = 4
num_threads = 4
powersave = 0
gpu_device = -1
cooling_down = 1
squeezenet min = 5.66 max = 6.17 avg = 5.86
squeezenet_int8 min = 8.96 max = 9.08 avg = 9.00
mobilenet min = 8.88 max = 8.91 avg = 8.90
mobilenet_int8 min = 18.47 max = 18.50 avg = 18.49
mobilenet_v2 min = 5.61 max = 5.90 avg = 5.72
mobilenet_v3 min = 5.06 max = 5.77 avg = 5.42
shufflenet min = 3.85 max = 4.33 avg = 4.04
shufflenet_v2 min = 4.38 max = 4.92 avg = 4.61
mnasnet min = 5.64 max = 5.92 avg = 5.76
proxylessnasnet min = 7.20 max = 7.50 avg = 7.31
efficientnet_b0 min = 9.00 max = 9.03 avg = 9.01
regnety_400m min = 9.22 max = 9.25 avg = 9.23
blazeface min = 2.74 max = 3.08 avg = 2.96
googlenet min = 28.05 max = 28.19 avg = 28.12
googlenet_int8 min = 38.02 max = 38.14 avg = 38.05
resnet18 min = 22.86 max = 23.10 avg = 22.97
resnet18_int8 min = 32.11 max = 32.16 avg = 32.14
alexnet min = 33.49 max = 33.57 avg = 33.53
vgg16 min = 81.82 max = 82.12 avg = 81.94
vgg16_int8 min = 196.14 max = 196.79 avg = 196.33
resnet50 min = 50.59 max = 50.69 avg = 50.63
resnet50_int8 min = 69.12 max = 69.46 avg = 69.23
squeezenet_ssd min = 21.54 max = 21.65 avg = 21.61
squeezenet_ssd_int8 min = 26.62 max = 26.65 avg = 26.63
mobilenet_ssd min = 22.12 max = 22.16 avg = 22.13
mobilenet_ssd_int8 min = 31.06 max = 31.20 avg = 31.14
mobilenet_yolo min = 41.01 max = 41.09 avg = 41.04
mobilenetv2_yolov3 min = 20.88 max = 21.08 avg = 20.93
yolov4-tiny min = 34.58 max = 34.65 avg = 34.62
# will not run if using bundled MoltenVK in VulkanSDK 1.2.135.0
$ ./benchmark/benchncnn 4 8 0 0 1
dyld: Library not loaded: @rpath/libMoltenVK.dylib
Referenced from: /Users/[redacted]/ncnn/ncnn/build_gpu/./benchmark/benchncnn
Reason: no suitable image found. Did find:
/Users/[redacted]/ncnn/vulkansdk-macos-1.2.135.0/MoltenVK/iOS/dynamic/libMoltenVK.dylib: mach-o, but not built for platform macOS
/Users/[redacted]/ncnn/vulkansdk-macos-1.2.135.0/MoltenVK/iOS/dynamic/libMoltenVK.dylib: stat() failed with errno=1
zsh: abort ./benchmark/benchncnn 4 8 0 1 1
# Manully built MoltenVK for arm64 macOS
$ ./benchncnn 4 8 0 0 1
[mvk-info] MoltenVK version 1.1.1. Vulkan version 1.1.154.
The following 70 Vulkan extensions are supported:
VK_KHR_16bit_storage v1
VK_KHR_8bit_storage v1
VK_KHR_bind_memory2 v1
VK_KHR_create_renderpass2 v1
VK_KHR_dedicated_allocation v3
VK_KHR_depth_stencil_resolve v1
VK_KHR_descriptor_update_template v1
VK_KHR_device_group v4
VK_KHR_device_group_creation v1
VK_KHR_driver_properties v1
VK_KHR_external_fence v1
VK_KHR_external_fence_capabilities v1
VK_KHR_external_memory v1
VK_KHR_external_memory_capabilities v1
VK_KHR_external_semaphore v1
VK_KHR_external_semaphore_capabilities v1
VK_KHR_get_memory_requirements2 v1
VK_KHR_get_physical_device_properties2 v2
VK_KHR_get_surface_capabilities2 v1
VK_KHR_image_format_list v1
VK_KHR_maintenance1 v2
VK_KHR_maintenance2 v1
VK_KHR_maintenance3 v1
VK_KHR_multiview v1
VK_KHR_portability_subset v1
VK_KHR_push_descriptor v2
VK_KHR_relaxed_block_layout v1
VK_KHR_sampler_mirror_clamp_to_edge v3
VK_KHR_sampler_ycbcr_conversion v14
VK_KHR_shader_draw_parameters v1
VK_KHR_shader_float16_int8 v1
VK_KHR_shader_subgroup_extended_types v1
VK_KHR_storage_buffer_storage_class v1
VK_KHR_surface v25
VK_KHR_swapchain v70
VK_KHR_swapchain_mutable_format v1
VK_KHR_timeline_semaphore v2
VK_KHR_uniform_buffer_standard_layout v1
VK_KHR_variable_pointers v1
VK_EXT_debug_marker v4
VK_EXT_debug_report v9
VK_EXT_debug_utils v2
VK_EXT_descriptor_indexing v2
VK_EXT_fragment_shader_interlock v1
VK_EXT_hdr_metadata v2
VK_EXT_host_query_reset v1
VK_EXT_image_robustness v1
VK_EXT_inline_uniform_block v1
VK_EXT_memory_budget v1
VK_EXT_metal_surface v1
VK_EXT_post_depth_coverage v1
VK_EXT_private_data v1
VK_EXT_robustness2 v1
VK_EXT_scalar_block_layout v1
VK_EXT_shader_stencil_export v1
VK_EXT_shader_viewport_index_layer v1
VK_EXT_swapchain_colorspace v4
VK_EXT_texel_buffer_alignment v1
VK_EXT_texture_compression_astc_hdr v1
VK_EXT_vertex_attribute_divisor v3
VK_AMD_gpu_shader_half_float v2
VK_AMD_negative_viewport_height v1
VK_AMD_shader_image_load_store_lod v1
VK_AMD_shader_trinary_minmax v1
VK_IMG_format_pvrtc v1
VK_INTEL_shader_integer_functions2 v1
VK_GOOGLE_display_timing v1
VK_MVK_macos_surface v3
VK_MVK_moltenvk v29
VK_NV_glsl_shader v1
[mvk-info] GPU device:
model: Apple M1
type: Discrete
vendorID: 0x106b
deviceID: 0xa140
pipelineCacheUUID: 00002775-0400-03EF-CE85-A96D8041B208
supports the following Metal Versions, GPU's and Feature Sets:
Metal Shading Language 2.3
GPU Family Apple 7
GPU Family Apple 6
GPU Family Apple 5
GPU Family Apple 4
GPU Family Apple 3
GPU Family Apple 2
GPU Family Apple 1
GPU Family Mac 2
GPU Family Mac 1
GPU Family Common 3
GPU Family Common 2
GPU Family Common 1
macOS GPU Family 2 v1
macOS GPU Family 1 v4
macOS GPU Family 1 v3
macOS GPU Family 1 v2
macOS GPU Family 1 v1
[mvk-info] Created VkInstance with the following 4 Vulkan extensions enabled:
VK_KHR_external_memory_capabilities v1
VK_KHR_get_physical_device_properties2 v2
VK_KHR_get_surface_capabilities2 v1
VK_KHR_surface v25
[0 Apple M1] queueC=0[1] queueG=0[1] queueT=0[1]
[0 Apple M1] bugsbn1=0 bugcopc=0 bugihfa=0
[0 Apple M1] fp16p=1 fp16s=1 fp16a=1 int8s=1 int8a=1
[0 Apple M1] subgroup=32 basic=1 vote=1 ballot=1 shuffle=1
[mvk-info] Using MTLFence for Vulkan semaphores.
[mvk-info] Created VkDevice to run on GPU Apple M1 with the following 14 Vulkan extensions enabled:
VK_KHR_16bit_storage v1
VK_KHR_8bit_storage v1
VK_KHR_bind_memory2 v1
VK_KHR_dedicated_allocation v3
VK_KHR_descriptor_update_template v1
VK_KHR_external_memory v1
VK_KHR_get_memory_requirements2 v1
VK_KHR_maintenance1 v2
VK_KHR_push_descriptor v2
VK_KHR_sampler_ycbcr_conversion v14
VK_KHR_shader_float16_int8 v1
VK_KHR_storage_buffer_storage_class v1
VK_KHR_swapchain v70
VK_EXT_memory_budget v1
thread_policy_set error 46
loop_count = 4
num_threads = 8
powersave = 0
gpu_device = 0
cooling_down = 1
squeezenet min = 2.98 max = 3.01 avg = 2.99
squeezenet_int8 min = 8.99 max = 9.02 avg = 9.00
mobilenet min = 3.54 max = 3.96 avg = 3.77
mobilenet_int8 min = 18.57 max = 18.68 avg = 18.62
mobilenet_v2 min = 4.86 max = 5.20 avg = 5.08
mobilenet_v3 min = 5.88 max = 6.07 avg = 5.97
shufflenet min = 4.38 max = 4.54 avg = 4.44
shufflenet_v2 min = 3.13 max = 4.99 avg = 4.43
mnasnet min = 3.36 max = 4.19 avg = 3.91
proxylessnasnet min = 4.47 max = 5.06 avg = 4.79
efficientnet_b0 min = 9.77 max = 10.45 avg = 10.09
regnety_400m min = 6.66 max = 7.24 avg = 6.94
blazeface min = 2.32 max = 2.47 avg = 2.38
googlenet min = 9.54 max = 10.34 avg = 10.07
googlenet_int8 min = 38.23 max = 38.27 avg = 38.25
resnet18 min = 8.61 max = 8.80 avg = 8.73
resnet18_int8 min = 32.13 max = 32.45 avg = 32.22
alexnet min = 8.74 max = 10.21 avg = 9.49
vgg16 min = 38.82 max = 39.15 avg = 38.98
vgg16_int8 min = 196.18 max = 196.55 avg = 196.33
resnet50 min = 10.44 max = 11.75 avg = 11.07
resnet50_int8 min = 69.21 max = 69.30 avg = 69.25
squeezenet_ssd min = 13.51 max = 14.85 avg = 14.02
squeezenet_ssd_int8 min = 26.57 max = 26.61 avg = 26.58
mobilenet_ssd min = 9.47 max = 9.99 avg = 9.84
mobilenet_ssd_int8 min = 31.15 max = 31.26 avg = 31.21
mobilenet_yolo min = 12.65 max = 13.33 avg = 12.93
mobilenetv2_yolov3 min = 12.50 max = 12.80 avg = 12.67
yolov4-tiny min = 17.60 max = 19.03 avg = 18.34
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment