Skip to content

Instantly share code, notes, and snippets.

@r-ryantm
Created May 24, 2020 23:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save r-ryantm/098c7946a2273d11bf6e5b46fa5910c3 to your computer and use it in GitHub Desktop.
Save r-ryantm/098c7946a2273d11bf6e5b46fa5910c3 to your computer and use it in GitHub Desktop.
/nix/store/wn16bmzvw299jmd75mn7rpl8sqpij1i9-libxsmm-1.15
├── bin
│   └── libxsmm_gemm_generator
├── include
│   ├── libxsmm
│   │   ├── generator_common.c
│   │   ├── generator_common.h
│   │   ├── generator_gemm_avx2_microkernel.c
│   │   ├── generator_gemm_avx2_microkernel.h
│   │   ├── generator_gemm_avx512_microkernel.c
│   │   ├── generator_gemm_avx512_microkernel.h
│   │   ├── generator_gemm_avx_microkernel.c
│   │   ├── generator_gemm_avx_microkernel.h
│   │   ├── generator_gemm.c
│   │   ├── generator_gemm_common.c
│   │   ├── generator_gemm_common.h
│   │   ├── generator_gemm_noarch.c
│   │   ├── generator_gemm_noarch.h
│   │   ├── generator_gemm_sse3_avx_avx2_avx512.c
│   │   ├── generator_gemm_sse3_avx_avx2_avx512.h
│   │   ├── generator_gemm_sse3_microkernel.c
│   │   ├── generator_gemm_sse3_microkernel.h
│   │   ├── generator_matcopy_avx_avx512.c
│   │   ├── generator_matcopy_avx_avx512.h
│   │   ├── generator_matcopy.c
│   │   ├── generator_packed_aux.h
│   │   ├── generator_packed.c
│   │   ├── generator_packed_gemm_ac_rm_avx_avx2_avx512.c
│   │   ├── generator_packed_gemm_ac_rm_avx_avx2_avx512.h
│   │   ├── generator_packed_gemm_avx_avx512.c
│   │   ├── generator_packed_gemm_avx_avx512.h
│   │   ├── generator_packed_gemm_bc_rm_avx_avx2_avx512.c
│   │   ├── generator_packed_gemm_bc_rm_avx_avx2_avx512.h
│   │   ├── generator_packed_gemmnn.h
│   │   ├── generator_packed_getrf_avx_avx512.c
│   │   ├── generator_packed_getrf_avx_avx512.h
│   │   ├── generator_packed_trmm_avx_avx512.c
│   │   ├── generator_packed_trmm_avx_avx512.h
│   │   ├── generator_packed_trsm_avx_avx512.c
│   │   ├── generator_packed_trsm_avx_avx512.h
│   │   ├── generator_packed_trsm_dmacros.h
│   │   ├── generator_packed_xct_avx2_lln.h
│   │   ├── generator_spgemm.c
│   │   ├── generator_spgemm_csc_asparse.c
│   │   ├── generator_spgemm_csc_asparse.h
│   │   ├── generator_spgemm_csc_bsparse.c
│   │   ├── generator_spgemm_csc_bsparse.h
│   │   ├── generator_spgemm_csc_bsparse_soa.c
│   │   ├── generator_spgemm_csc_bsparse_soa.h
│   │   ├── generator_spgemm_csc_csparse_soa.c
│   │   ├── generator_spgemm_csc_csparse_soa.h
│   │   ├── generator_spgemm_csc_reader.c
│   │   ├── generator_spgemm_csc_reader.h
│   │   ├── generator_spgemm_csr_asparse.c
│   │   ├── generator_spgemm_csr_asparse.h
│   │   ├── generator_spgemm_csr_asparse_reg.c
│   │   ├── generator_spgemm_csr_asparse_reg.h
│   │   ├── generator_spgemm_csr_asparse_soa.c
│   │   ├── generator_spgemm_csr_asparse_soa.h
│   │   ├── generator_spgemm_csr_bsparse_soa.c
│   │   ├── generator_spgemm_csr_bsparse_soa.h
│   │   ├── generator_spgemm_csr_reader.c
│   │   ├── generator_spgemm_csr_reader.h
│   │   ├── generator_transpose_avx_avx512.c
│   │   ├── generator_transpose_avx_avx512.h
│   │   ├── generator_transpose.c
│   │   ├── generator_x86_instructions.c
│   │   ├── generator_x86_instructions.h
│   │   ├── libxsmm_blocked_gemm.c
│   │   ├── libxsmm_blocked_gemm_types.h
│   │   ├── libxsmm_cpuid_x86.c
│   │   ├── libxsmm_diff.h
│   │   ├── libxsmm_dnn.c
│   │   ├── libxsmm_dnn_convolution_backward.c
│   │   ├── libxsmm_dnn_convolution_backward.h
│   │   ├── libxsmm_dnn_convolution.c
│   │   ├── libxsmm_dnn_convolution_forward.c
│   │   ├── libxsmm_dnn_convolution_forward.h
│   │   ├── libxsmm_dnn_convolution_weight_update.c
│   │   ├── libxsmm_dnn_convolution_weight_update.h
│   │   ├── libxsmm_dnn_elementwise.c
│   │   ├── libxsmm_dnn_elementwise.h
│   │   ├── libxsmm_dnn_fullyconnected_backward_weight_update.c
│   │   ├── libxsmm_dnn_fullyconnected_backward_weight_update.h
│   │   ├── libxsmm_dnn_fullyconnected.c
│   │   ├── libxsmm_dnn_fullyconnected_forward.c
│   │   ├── libxsmm_dnn_fullyconnected_forward.h
│   │   ├── libxsmm_dnn_fusedbatchnorm_backward.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_backward.h
│   │   ├── libxsmm_dnn_fusedbatchnorm.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_forward.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_forward.h
│   │   ├── libxsmm_dnn_fusedgroupnorm_backward.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_backward.h
│   │   ├── libxsmm_dnn_fusedgroupnorm.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_forward.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_forward.h
│   │   ├── libxsmm_dnn_pooling_backward.c
│   │   ├── libxsmm_dnn_pooling_backward.h
│   │   ├── libxsmm_dnn_pooling.c
│   │   ├── libxsmm_dnn_pooling_forward.c
│   │   ├── libxsmm_dnn_pooling_forward.h
│   │   ├── libxsmm_dnn_rnncell_backward_weight_update.c
│   │   ├── libxsmm_dnn_rnncell_backward_weight_update.h
│   │   ├── libxsmm_dnn_rnncell.c
│   │   ├── libxsmm_dnn_rnncell_forward.c
│   │   ├── libxsmm_dnn_rnncell_forward.h
│   │   ├── libxsmm_dnn_tensor.c
│   │   ├── libxsmm_ext_blocked_gemm.c
│   │   ├── libxsmm_ext.c
│   │   ├── libxsmm_ext_gemm.c
│   │   ├── libxsmm_ext.h
│   │   ├── libxsmm_ext_xcopy.c
│   │   ├── libxsmm_fsspmdm.c
│   │   ├── libxsmm_gemm.c
│   │   ├── libxsmm_gemm.h
│   │   ├── libxsmm_generator.c
│   │   ├── libxsmm_generator_gemm_driver.c
│   │   ├── libxsmm_hash.c
│   │   ├── libxsmm_hash.h
│   │   ├── libxsmm_main.c
│   │   ├── libxsmm_main.h
│   │   ├── libxsmm_malloc.c
│   │   ├── libxsmm_math.c
│   │   ├── libxsmm_memory.c
│   │   ├── libxsmm_mhd.c
│   │   ├── libxsmm_perf.c
│   │   ├── libxsmm_perf.h
│   │   ├── libxsmm_python.c
│   │   ├── libxsmm_rng.c
│   │   ├── libxsmm_spmdm_begin_avx2.h
│   │   ├── libxsmm_spmdm_begin_avx512.h
│   │   ├── libxsmm_spmdm_begin.h
│   │   ├── libxsmm_spmdm.c
│   │   ├── libxsmm_spmdm_end.h
│   │   ├── libxsmm_sync.c
│   │   ├── libxsmm_timer.c
│   │   ├── libxsmm_trace.c
│   │   ├── libxsmm_trace.h
│   │   ├── libxsmm_xcopy.c
│   │   ├── libxsmm_xcopy.h
│   │   ├── perf_jitdump.h
│   │   └── template
│   │   ├── libxsmm_blocked_gemm_convert_b_to_a.tpl.c
│   │   ├── libxsmm_blocked_gemm_copyin_a.tpl.c
│   │   ├── libxsmm_blocked_gemm_copyin_b.tpl.c
│   │   ├── libxsmm_blocked_gemm_copyin_c.tpl.c
│   │   ├── libxsmm_blocked_gemm_copyout_c.tpl.c
│   │   ├── libxsmm_blocked_gemm.tpl.c
│   │   ├── libxsmm_blocked_gemm_transpose_b.tpl.c
│   │   ├── libxsmm_config.h
│   │   ├── libxsmm_dnn_convolve_st_bwd_custom_custom_fallback_generic.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_bwd_custom_custom_generic_bf16.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_bwd_custom_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_bwd_nhwc_custom-rsck_fallback_generic.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_bwd_nhwc_custom-rsck_generic.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_fwd_custom_custom_generic_bf16.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_fwd_custom_custom_generic_i8i32.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_fwd_custom_custom_generic_i8i8.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_fwd_custom_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_fwd_nhwc_custom-rsck_generic.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_upd_custom_custom_generic_bf16.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_upd_custom_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_convolve_st_upd_nhwc_custom-rsck_generic.tpl.c
│   │   ├── libxsmm_dnn_fullyconnected_st_bwdupd_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_fullyconnected_st_bwdupd_ncnc_kcck_generic_bf16.tpl.c
│   │   ├── libxsmm_dnn_fullyconnected_st_bwdupd_ncnc_kcck_generic.tpl.c
│   │   ├── libxsmm_dnn_fullyconnected_st_fwd_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_fullyconnected_st_fwd_ncnc_kcck_generic_bf16.tpl.c
│   │   ├── libxsmm_dnn_fullyconnected_st_fwd_ncnc_kcck_generic.tpl.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_st_bwd_custom_f32_bf16_c16_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_st_bwd_custom_f32_bf16_c32_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_st_bwd_custom_f32_bf16_c64_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_st_bwd_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c16_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c32_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_st_fwd_custom_f32_bf16_c64_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedbatchnorm_st_fwd_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_st_bwd_custom_f32_bf16_c16_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_st_bwd_custom_f32_bf16_c32_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_st_bwd_custom_f32_bf16_c64_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_st_bwd_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c16_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c32_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_st_fwd_custom_f32_bf16_c64_avx512.tpl.c
│   │   ├── libxsmm_dnn_fusedgroupnorm_st_fwd_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_pooling_st_bwd_custom_f32_bf16_c16_avx512.tpl.c
│   │   ├── libxsmm_dnn_pooling_st_bwd_custom_f32_bf16_c32_avx512.tpl.c
│   │   ├── libxsmm_dnn_pooling_st_bwd_custom_f32_bf16_c64_avx512.tpl.c
│   │   ├── libxsmm_dnn_pooling_st_bwd_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_pooling_st_fwd_custom_f32_bf16_c16_avx512.tpl.c
│   │   ├── libxsmm_dnn_pooling_st_fwd_custom_f32_bf16_c32_avx512.tpl.c
│   │   ├── libxsmm_dnn_pooling_st_fwd_custom_f32_bf16_c64_avx512.tpl.c
│   │   ├── libxsmm_dnn_pooling_st_fwd_custom_generic.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_gru_bwdupd_nc_ck_generic.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_gru_bwdupd_nc_kcck.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_gru_fwd_nc_ck_generic.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_gru_fwd_nc_kcck.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_ck_generic_bf16.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_ck_generic.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_bf16.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_core_bf16.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck_core.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_bwdupd_nc_kcck.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic_bf16.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_fwd_nc_ck_generic.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_bf16.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused_bf16.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_diffused.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused_bf16.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck_fused.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_lstm_fwd_nc_kcck.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_rnn_bwdupd_nc_ck_generic.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_rnn_bwdupd_nc_kcck.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_rnn_fwd_nc_ck_generic.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_rnn_fwd_nc_kcck.tpl.c
│   │   ├── libxsmm_dnn_rnncell_st_rnn_fwd_ncnc_kcck.tpl.c
│   │   ├── libxsmm_dnn_tensor_bias_copy_in_nchw.tpl.c
│   │   ├── libxsmm_dnn_tensor_bias_copy_out_nchw.tpl.c
│   │   ├── libxsmm_dnn_tensor_buffer_copy_in_nchw.tpl.c
│   │   ├── libxsmm_dnn_tensor_buffer_copy_out_nchw.tpl.c
│   │   ├── libxsmm_dnn_tensor_filter_copy_in_kcrs.tpl.c
│   │   ├── libxsmm_dnn_tensor_filter_copy_out_kcrs.tpl.c
│   │   ├── libxsmm_dnn_zero_rim_st_input_custom.tpl.c
│   │   ├── libxsmm_dnn_zero_rim_st_input_nhwc.tpl.c
│   │   ├── libxsmm.f
│   │   ├── libxsmm.h
│   │   ├── libxsmm_internal_gru_bwdupd_fused_eltwise_1.tpl.c
│   │   ├── libxsmm_internal_gru_bwdupd_fused_eltwise_2.tpl.c
│   │   ├── libxsmm_internal_lstm_bwdupd_fused_eltwise_reformat_bf16.tpl.c
│   │   ├── libxsmm_internal_lstm_bwdupd_fused_eltwise_reformat.tpl.c
│   │   ├── libxsmm_internal_lstm_bwdupd_fused_eltwise.tpl.c
│   │   ├── libxsmm_internal_lstm_fwd_fused_eltwise_bf16.tpl.c
│   │   ├── libxsmm_internal_lstm_fwd_fused_eltwise.tpl.c
│   │   ├── libxsmm_matdiff.tpl.c
│   │   ├── libxsmm_spmdm_compute_bfloat16_thread.tpl.c
│   │   ├── libxsmm_spmdm_compute_fp32_thread.tpl.c
│   │   ├── libxsmm_spmdm_createSparseSlice_bfloat16_thread.tpl.c
│   │   ├── libxsmm_spmdm_createSparseSlice_fp32_thread.tpl.c
│   │   └── transpose.tpl.c
│   ├── libxsmm_blocked_gemm.h
│   ├── libxsmm_config.h
│   ├── libxsmm_cpuid.h
│   ├── libxsmm_dnn_convolution.h
│   ├── libxsmm_dnn_fullyconnected.h
│   ├── libxsmm_dnn_fusedbatchnorm.h
│   ├── libxsmm_dnn_fusedgroupnorm.h
│   ├── libxsmm_dnn.h
│   ├── libxsmm_dnn_pooling.h
│   ├── libxsmm_dnn_rnncell.h
│   ├── libxsmm_dnn_tensor.h
│   ├── libxsmm.f
│   ├── libxsmm_frontend.h
│   ├── libxsmm_fsspmdm.h
│   ├── libxsmm_generator.h
│   ├── libxsmm.h
│   ├── libxsmm_intrinsics_x86.h
│   ├── libxsmm_macros.h
│   ├── libxsmm_malloc.h
│   ├── libxsmm_math.h
│   ├── libxsmm_memory.h
│   ├── libxsmm_mhd.h
│   ├── libxsmm.mod
│   ├── libxsmm_rng.h
│   ├── libxsmm_source.h
│   ├── libxsmm_spmdm.h
│   ├── libxsmm_sync.h
│   ├── libxsmm_timer.h
│   └── libxsmm_typedefs.h
├── lib
│   ├── libxsmm
│   ├── libxsmm.a
│   ├── libxsmmext.a
│   ├── libxsmmext.pc
│   ├── libxsmmf.a
│   ├── libxsmmf.pc
│   ├── libxsmmgen.a
│   ├── libxsmmnoblas.a
│   ├── libxsmmnoblas.pc
│   └── libxsmm.pc
└── share
└── libxsmm
├── build.txt
├── CONTRIBUTING.md
├── cp2k.md
├── gxm.md
├── index.md
├── libxsmm_aux.md
├── libxsmm_be.md
├── libxsmm_dl.md
├── libxsmm_fortran.md
├── libxsmm_mm.md
├── libxsmm.pdf
├── libxsmm_prof.md
├── libxsmm_samples.md
├── libxsmm_samples.pdf
├── libxsmm_tune.md
├── LICENSE.md
├── README.md
├── SECURITY.md
├── tensorflow.md
├── tensorflow.pdf
└── version.txt
7 directories, 295 files
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment