Skip to content

Instantly share code, notes, and snippets.

@xrq-phys
Last active December 11, 2020 22:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xrq-phys/d0bedec20b0760a57256ea76283d67dd to your computer and use it in GitHub Desktop.
Save xrq-phys/d0bedec20b0760a57256ea76283d67dd to your computer and use it in GitHub Desktop.
Compile NumPy on M1 Chip, with Code Attached.

What

Compile NumPy on Apple Silicon M1 Chip

How

  • Create virtual env at, say export VENV=$HOME/.local:
python3 -m venv $VENV
. $VENV/bin/activate
  • Compile and install BLIS. One can replace blis/kernels/armv8a/3/bli_gemm_armv8a_asm_d6x8.c with blis%kernels%armv8a%3%bli_gemm_armv8a_asm_d6x8.s attached.
./configure -p $VENV -t none --enable-cblas cortexa57
  • Compile and install LibFLAME. One might need an Intel-based gfortran (e.g., one from Homebrew or Anaconda) to deceive autoconf. gfortran is not really used during compilation.
./configure LDFLAGS="-L$VENV/lib -lblis" F77=gfortran --prefix=$VENV --enable-dynamic-build --enable-supermatrix --enable-cblas-interfaces --enable-lapack2flame --with-cc=clang --enable-multithreading=pthreads
  • Download NumPy source code. Replace numpy-1.19.4/numpy/distutils/ccompiler.py with numpy-1.19.4%numpy%distutils%ccompiler.py attached. This blocks auto-detected -arch x86_64 argument passed to compiler.
python setup.py build -j 4 install --prefix $VENV
.arch armv8-a
.file "bli_gemm_armv8a_asm_d6x8.c"
.text
.align 2
.p2align 3,,7
.global _bli_sgemm_armv8a_asm_8x12
// .type bli_sgemm_armv8a_asm_8x12, %function
_bli_sgemm_armv8a_asm_8x12:
.LFB738:
.cfi_startproc
stp x19, x20, [sp, -240]!
.cfi_def_cfa_offset 240
.cfi_offset 19, -240
.cfi_offset 20, -232
cmp x0, 0
add x9, x0, 3
ldr x10, [sp, 240]
csel x9, x9, x0, lt
negs x8, x0
and x0, x0, 3
and x8, x8, 3
asr x9, x9, 2
ldp x11, x10, [x10, 8]
csneg x0, x0, x8, mi
stp x21, x22, [sp, 16]
stp x23, x24, [sp, 32]
stp x25, x26, [sp, 48]
str x27, [sp, 64]
stp d8, d9, [sp, 80]
stp d10, d11, [sp, 96]
stp d12, d13, [sp, 112]
stp d14, d15, [sp, 128]
.cfi_offset 21, -224
.cfi_offset 22, -216
.cfi_offset 23, -208
.cfi_offset 24, -200
.cfi_offset 25, -192
.cfi_offset 26, -184
.cfi_offset 27, -176
.cfi_offset 72, -160
.cfi_offset 73, -152
.cfi_offset 74, -144
.cfi_offset 75, -136
.cfi_offset 76, -128
.cfi_offset 77, -120
.cfi_offset 78, -112
.cfi_offset 79, -104
stp x5, x4, [sp, 152]
stp x3, x2, [sp, 168]
stp x1, x11, [sp, 184]
stp x10, x9, [sp, 200]
stp x0, x6, [sp, 216]
str x7, [sp, 232]
#APP
// 76 "bli_gemm_armv8a_asm_d6x8.c" 1
ldr x0,[sp, 176]
ldr x1,[sp, 168]
ldr x2,[sp, 152]
ldr x3,[sp, 192]
ldr x4,[sp, 200]
ldr x5,[sp, 208]
ldr x6,[sp, 216]
ldr x7,[sp, 184]
ldr x8,[sp, 160]
ldr x9,[sp, 232]
lsl x10,x9,#2
ldr x13,[sp, 224]
lsl x14,x13,#2
add x16,x2,x10
add x17,x16,x10
add x18,x17,x10
add x19,x18,x10
add x20,x19,x10
add x21,x20,x10
add x22,x21,x10
add x23,x22,x10
add x24,x23,x10
add x25,x24,x10
add x26,x25,x10
prfm pldl1keep,[x2]
prfm pldl1keep,[x16]
prfm pldl1keep,[x17]
prfm pldl1keep,[x18]
prfm pldl1keep,[x19]
prfm pldl1keep,[x20]
prfm pldl1keep,[x21]
prfm pldl1keep,[x22]
prfm pldl1keep,[x23]
prfm pldl1keep,[x24]
prfm pldl1keep,[x25]
prfm pldl1keep,[x26]
dup v8.4s, wzr
prfm PLDL1KEEP, [x1, #192]
dup v9.4s, wzr
prfm PLDL1KEEP, [x1, #256]
dup v10.4s, wzr
prfm PLDL1KEEP, [x1, #320]
dup v11.4s, wzr
dup v12.4s, wzr
dup v13.4s, wzr
dup v14.4s, wzr
prfm PLDL1KEEP, [x0, #128]
dup v15.4s, wzr
prfm PLDL1KEEP, [x0, #192]
dup v16.4s, wzr
dup v17.4s, wzr
dup v18.4s, wzr
dup v19.4s, wzr
dup v20.4s, wzr
dup v21.4s, wzr
dup v22.4s, wzr
dup v23.4s, wzr
dup v24.4s, wzr
dup v25.4s, wzr
dup v26.4s, wzr
dup v27.4s, wzr
dup v28.4s, wzr
dup v29.4s, wzr
dup v30.4s, wzr
dup v31.4s, wzr
cmp x5,#0
beq .SCONSIDERKLEFT
ldr q0, [x0]
ldr q1, [x0, #16]
ldr q2, [x1]
ldr q3, [x1, #16]
ldr q4, [x1, #32]
add x0, x0, #32
add x1, x1, #48
cmp x5,1
beq .SLASTITER
.SLOOPKITER:
ldr q5, [x0]
fmla v8.4s, v0.4s,v2.s[0]
fmla v9.4s, v1.4s,v2.s[0]
ldr q6, [x0, #16]
fmla v10.4s,v0.4s,v2.s[1]
fmla v11.4s,v1.4s,v2.s[1]
fmla v12.4s,v0.4s,v2.s[2]
fmla v13.4s,v1.4s,v2.s[2]
fmla v14.4s,v0.4s,v2.s[3]
fmla v15.4s,v1.4s,v2.s[3]
ldr q2, [x1]
fmla v16.4s,v0.4s,v3.s[0]
prfm PLDL1KEEP, [x1, #336]
fmla v17.4s,v1.4s,v3.s[0]
prfm PLDL1KEEP, [x1, #400]
fmla v18.4s,v0.4s,v3.s[1]
fmla v19.4s,v1.4s,v3.s[1]
prfm PLDL1KEEP, [x1, #464]
fmla v20.4s,v0.4s,v3.s[2]
fmla v21.4s,v1.4s,v3.s[2]
fmla v22.4s,v0.4s,v3.s[3]
fmla v23.4s,v1.4s,v3.s[3]
fmla v24.4s,v0.4s,v4.s[0]
fmla v26.4s,v0.4s,v4.s[1]
fmla v28.4s,v0.4s,v4.s[2]
fmla v30.4s,v0.4s,v4.s[3]
ldr q3, [x1, #16]
fmla v25.4s,v1.4s,v4.s[0]
fmla v27.4s,v1.4s,v4.s[1]
fmla v29.4s,v1.4s,v4.s[2]
fmla v31.4s,v1.4s,v4.s[3]
ldr q4, [x1, #32]
ldr q0, [x0, #32]
fmla v8.4s,v5.4s,v2.s[0]
fmla v9.4s,v6.4s,v2.s[0]
ldr q1, [x0, #48]
fmla v10.4s,v5.4s,v2.s[1]
fmla v11.4s,v6.4s,v2.s[1]
fmla v12.4s,v5.4s,v2.s[2]
fmla v13.4s,v6.4s,v2.s[2]
fmla v14.4s,v5.4s,v2.s[3]
fmla v15.4s,v6.4s,v2.s[3]
ldr q2, [x1, #48]
fmla v16.4s,v5.4s,v3.s[0]
prfm PLDL1KEEP, [x0, #224]
fmla v17.4s,v6.4s,v3.s[0]
prfm PLDL1KEEP, [x0, #288]
fmla v18.4s,v5.4s,v3.s[1]
fmla v19.4s,v6.4s,v3.s[1]
fmla v20.4s,v5.4s,v3.s[2]
fmla v21.4s,v6.4s,v3.s[2]
fmla v22.4s,v5.4s,v3.s[3]
fmla v23.4s,v6.4s,v3.s[3]
fmla v24.4s,v5.4s,v4.s[0]
fmla v26.4s,v5.4s,v4.s[1]
fmla v28.4s,v5.4s,v4.s[2]
fmla v30.4s,v5.4s,v4.s[3]
ldr q3, [x1, #64]
fmla v25.4s,v6.4s,v4.s[0]
fmla v27.4s,v6.4s,v4.s[1]
fmla v29.4s,v6.4s,v4.s[2]
fmla v31.4s,v6.4s,v4.s[3]
ldr q4, [x1, #80]
ldr q5, [x0, #64]
fmla v8.4s,v0.4s,v2.s[0]
fmla v9.4s,v1.4s,v2.s[0]
ldr q6, [x0, #80]
fmla v10.4s,v0.4s,v2.s[1]
fmla v11.4s,v1.4s,v2.s[1]
fmla v12.4s,v0.4s,v2.s[2]
fmla v13.4s,v1.4s,v2.s[2]
fmla v14.4s,v0.4s,v2.s[3]
fmla v15.4s,v1.4s,v2.s[3]
ldr q2, [x1, #96]
fmla v16.4s,v0.4s,v3.s[0]
fmla v17.4s,v1.4s,v3.s[0]
fmla v18.4s,v0.4s,v3.s[1]
fmla v19.4s,v1.4s,v3.s[1]
fmla v20.4s,v0.4s,v3.s[2]
fmla v21.4s,v1.4s,v3.s[2]
fmla v22.4s,v0.4s,v3.s[3]
fmla v23.4s,v1.4s,v3.s[3]
fmla v24.4s,v0.4s,v4.s[0]
fmla v26.4s,v0.4s,v4.s[1]
fmla v28.4s,v0.4s,v4.s[2]
fmla v30.4s,v0.4s,v4.s[3]
ldr q3, [x1, #112]
fmla v25.4s,v1.4s,v4.s[0]
fmla v27.4s,v1.4s,v4.s[1]
fmla v29.4s,v1.4s,v4.s[2]
fmla v31.4s,v1.4s,v4.s[3]
ldr q4, [x1, #128]
ldr q0, [x0, #96]
fmla v8.4s,v5.4s,v2.s[0]
fmla v9.4s,v6.4s,v2.s[0]
ldr q1, [x0, #112]
fmla v10.4s,v5.4s,v2.s[1]
fmla v11.4s,v6.4s,v2.s[1]
fmla v12.4s,v5.4s,v2.s[2]
fmla v13.4s,v6.4s,v2.s[2]
fmla v14.4s,v5.4s,v2.s[3]
fmla v15.4s,v6.4s,v2.s[3]
ldr q2, [x1, #144]
fmla v16.4s,v5.4s,v3.s[0]
fmla v17.4s,v6.4s,v3.s[0]
fmla v18.4s,v5.4s,v3.s[1]
fmla v19.4s,v6.4s,v3.s[1]
fmla v20.4s,v5.4s,v3.s[2]
fmla v21.4s,v6.4s,v3.s[2]
fmla v22.4s,v5.4s,v3.s[3]
fmla v23.4s,v6.4s,v3.s[3]
fmla v24.4s,v5.4s,v4.s[0]
fmla v26.4s,v5.4s,v4.s[1]
fmla v28.4s,v5.4s,v4.s[2]
fmla v30.4s,v5.4s,v4.s[3]
ldr q3, [x1, #160]
fmla v25.4s,v6.4s,v4.s[0]
fmla v27.4s,v6.4s,v4.s[1]
fmla v29.4s,v6.4s,v4.s[2]
fmla v31.4s,v6.4s,v4.s[3]
ldr q4, [x1, #176]
add x1, x1, #192
add x0, x0, #128
sub x5,x5,1
cmp x5,1
bne .SLOOPKITER
.SLASTITER:
ldr q5, [x0]
fmla v8.4s,v0.4s,v2.s[0]
fmla v9.4s,v1.4s,v2.s[0]
ldr q6, [x0, #16]
fmla v10.4s,v0.4s,v2.s[1]
fmla v11.4s,v1.4s,v2.s[1]
fmla v12.4s,v0.4s,v2.s[2]
fmla v13.4s,v1.4s,v2.s[2]
fmla v14.4s,v0.4s,v2.s[3]
fmla v15.4s,v1.4s,v2.s[3]
ldr q2, [x1]
fmla v16.4s,v0.4s,v3.s[0]
fmla v17.4s,v1.4s,v3.s[0]
fmla v18.4s,v0.4s,v3.s[1]
fmla v19.4s,v1.4s,v3.s[1]
fmla v20.4s,v0.4s,v3.s[2]
fmla v21.4s,v1.4s,v3.s[2]
fmla v22.4s,v0.4s,v3.s[3]
fmla v23.4s,v1.4s,v3.s[3]
fmla v24.4s,v0.4s,v4.s[0]
fmla v26.4s,v0.4s,v4.s[1]
fmla v28.4s,v0.4s,v4.s[2]
fmla v30.4s,v0.4s,v4.s[3]
ldr q3, [x1, #16]
fmla v25.4s,v1.4s,v4.s[0]
fmla v27.4s,v1.4s,v4.s[1]
fmla v29.4s,v1.4s,v4.s[2]
fmla v31.4s,v1.4s,v4.s[3]
ldr q4, [x1, #32]
ldr q0, [x0, #32]
fmla v8.4s,v5.4s,v2.s[0]
fmla v9.4s,v6.4s,v2.s[0]
ldr q1, [x0, #48]
fmla v10.4s,v5.4s,v2.s[1]
fmla v11.4s,v6.4s,v2.s[1]
fmla v12.4s,v5.4s,v2.s[2]
fmla v13.4s,v6.4s,v2.s[2]
fmla v14.4s,v5.4s,v2.s[3]
fmla v15.4s,v6.4s,v2.s[3]
ldr q2, [x1, #48]
fmla v16.4s,v5.4s,v3.s[0]
fmla v17.4s,v6.4s,v3.s[0]
fmla v18.4s,v5.4s,v3.s[1]
fmla v19.4s,v6.4s,v3.s[1]
fmla v20.4s,v5.4s,v3.s[2]
fmla v21.4s,v6.4s,v3.s[2]
fmla v22.4s,v5.4s,v3.s[3]
fmla v23.4s,v6.4s,v3.s[3]
fmla v24.4s,v5.4s,v4.s[0]
fmla v26.4s,v5.4s,v4.s[1]
fmla v28.4s,v5.4s,v4.s[2]
fmla v30.4s,v5.4s,v4.s[3]
ldr q3, [x1, #64]
fmla v25.4s,v6.4s,v4.s[0]
fmla v27.4s,v6.4s,v4.s[1]
fmla v29.4s,v6.4s,v4.s[2]
fmla v31.4s,v6.4s,v4.s[3]
ldr q4, [x1, #80]
ldr q5, [x0, #64]
fmla v8.4s,v0.4s,v2.s[0]
fmla v9.4s,v1.4s,v2.s[0]
ldr q6, [x0, #80]
fmla v10.4s,v0.4s,v2.s[1]
fmla v11.4s,v1.4s,v2.s[1]
fmla v12.4s,v0.4s,v2.s[2]
fmla v13.4s,v1.4s,v2.s[2]
fmla v14.4s,v0.4s,v2.s[3]
fmla v15.4s,v1.4s,v2.s[3]
ldr q2, [x1, #96]
fmla v16.4s,v0.4s,v3.s[0]
fmla v17.4s,v1.4s,v3.s[0]
fmla v18.4s,v0.4s,v3.s[1]
fmla v19.4s,v1.4s,v3.s[1]
fmla v20.4s,v0.4s,v3.s[2]
fmla v21.4s,v1.4s,v3.s[2]
fmla v22.4s,v0.4s,v3.s[3]
fmla v23.4s,v1.4s,v3.s[3]
fmla v24.4s,v0.4s,v4.s[0]
fmla v26.4s,v0.4s,v4.s[1]
fmla v28.4s,v0.4s,v4.s[2]
fmla v30.4s,v0.4s,v4.s[3]
ldr q3, [x1, #112]
fmla v25.4s,v1.4s,v4.s[0]
fmla v27.4s,v1.4s,v4.s[1]
fmla v29.4s,v1.4s,v4.s[2]
fmla v31.4s,v1.4s,v4.s[3]
ldr q4, [x1, #128]
fmla v8.4s,v5.4s,v2.s[0]
fmla v9.4s,v6.4s,v2.s[0]
fmla v10.4s,v5.4s,v2.s[1]
fmla v11.4s,v6.4s,v2.s[1]
fmla v12.4s,v5.4s,v2.s[2]
fmla v13.4s,v6.4s,v2.s[2]
fmla v14.4s,v5.4s,v2.s[3]
fmla v15.4s,v6.4s,v2.s[3]
fmla v16.4s,v5.4s,v3.s[0]
fmla v17.4s,v6.4s,v3.s[0]
fmla v18.4s,v5.4s,v3.s[1]
fmla v19.4s,v6.4s,v3.s[1]
fmla v20.4s,v5.4s,v3.s[2]
fmla v21.4s,v6.4s,v3.s[2]
fmla v22.4s,v5.4s,v3.s[3]
fmla v23.4s,v6.4s,v3.s[3]
fmla v24.4s,v5.4s,v4.s[0]
fmla v26.4s,v5.4s,v4.s[1]
fmla v28.4s,v5.4s,v4.s[2]
fmla v30.4s,v5.4s,v4.s[3]
fmla v25.4s,v6.4s,v4.s[0]
fmla v27.4s,v6.4s,v4.s[1]
fmla v29.4s,v6.4s,v4.s[2]
fmla v31.4s,v6.4s,v4.s[3]
add x1, x1, #144
add x0, x0, #96
.SCONSIDERKLEFT:
cmp x6,0
beq .SPOSTACCUM
.SLOOPKLEFT:
ldr q0, [x0],#16
ldr q1, [x0],#16
ldr q2, [x1],#16
ldr q3, [x1],#16
ldr q4, [x1],#16
sub x6,x6,1
fmla v8.4s,v0.4s,v2.s[0]
fmla v9.4s,v1.4s,v2.s[0]
fmla v10.4s,v0.4s,v2.s[1]
fmla v11.4s,v1.4s,v2.s[1]
fmla v12.4s,v0.4s,v2.s[2]
fmla v13.4s,v1.4s,v2.s[2]
fmla v14.4s,v0.4s,v2.s[3]
fmla v15.4s,v1.4s,v2.s[3]
fmla v16.4s,v0.4s,v3.s[0]
fmla v17.4s,v1.4s,v3.s[0]
fmla v18.4s,v0.4s,v3.s[1]
fmla v19.4s,v1.4s,v3.s[1]
fmla v20.4s,v0.4s,v3.s[2]
fmla v21.4s,v1.4s,v3.s[2]
fmla v22.4s,v0.4s,v3.s[3]
fmla v23.4s,v1.4s,v3.s[3]
fmla v24.4s,v0.4s,v4.s[0]
fmla v26.4s,v0.4s,v4.s[1]
fmla v28.4s,v0.4s,v4.s[2]
fmla v30.4s,v0.4s,v4.s[3]
fmla v25.4s,v1.4s,v4.s[0]
fmla v27.4s,v1.4s,v4.s[1]
fmla v29.4s,v1.4s,v4.s[2]
fmla v31.4s,v1.4s,v4.s[3]
cmp x6,0
bne .SLOOPKLEFT
.SPOSTACCUM:
ld1r {v6.4s},[x7]
ld1r {v7.4s},[x8]
cmp x13,#1
bne .SGENSTORED
.SCOLSTORED:
dup v0.4s, wzr
dup v1.4s, wzr
dup v2.4s, wzr
dup v3.4s, wzr
dup v4.4s, wzr
dup v5.4s, wzr
fcmp s7,#0.0
beq .SBETAZEROCOLSTOREDS1
ldr q0, [x2]
ldr q1, [x2, #16]
ldr q2, [x16]
ldr q3, [x16, #16]
ldr q4, [x17]
ldr q5, [x17, #16]
fmul v0.4s,v0.4s,v7.s[0]
fmul v1.4s,v1.4s,v7.s[0]
fmul v2.4s,v2.4s,v7.s[0]
fmul v3.4s,v3.4s,v7.s[0]
fmul v4.4s,v4.4s,v7.s[0]
fmul v5.4s,v5.4s,v7.s[0]
.SBETAZEROCOLSTOREDS1:
fmla v0.4s,v8.4s,v6.s[0]
fmla v1.4s,v9.4s,v6.s[0]
fmla v2.4s,v10.4s,v6.s[0]
fmla v3.4s,v11.4s,v6.s[0]
fmla v4.4s,v12.4s,v6.s[0]
fmla v5.4s,v13.4s,v6.s[0]
str q0, [x2]
str q1, [x2, #16]
str q2, [x16]
str q3, [x16, #16]
str q4, [x17]
str q5, [x17, #16]
dup v8.4s, wzr
dup v9.4s, wzr
dup v10.4s, wzr
dup v11.4s, wzr
dup v12.4s, wzr
dup v13.4s, wzr
fcmp s7,#0.0
beq .SBETAZEROCOLSTOREDS2
ldr q8, [x18]
ldr q9, [x18, #16]
ldr q10, [x19]
ldr q11, [x19, #16]
ldr q12, [x20]
ldr q13, [x20, #16]
fmul v8.4s, v8.4s, v7.s[0]
fmul v9.4s, v9.4s, v7.s[0]
fmul v10.4s,v10.4s,v7.s[0]
fmul v11.4s,v11.4s,v7.s[0]
fmul v12.4s,v12.4s,v7.s[0]
fmul v13.4s,v13.4s,v7.s[0]
.SBETAZEROCOLSTOREDS2:
fmla v8.4s, v14.4s,v6.s[0]
fmla v9.4s, v15.4s,v6.s[0]
fmla v10.4s,v16.4s,v6.s[0]
fmla v11.4s,v17.4s,v6.s[0]
fmla v12.4s,v18.4s,v6.s[0]
fmla v13.4s,v19.4s,v6.s[0]
str q8, [x18]
str q9, [x18, #16]
str q10, [x19]
str q11, [x19, #16]
str q12, [x20]
str q13, [x20, #16]
dup v0.4s, wzr
dup v1.4s, wzr
dup v2.4s, wzr
dup v3.4s, wzr
dup v4.4s, wzr
dup v5.4s, wzr
fcmp s7,#0.0
beq .SBETAZEROCOLSTOREDS3
ldr q0, [x21]
ldr q1, [x21, #16]
ldr q2, [x22]
ldr q3, [x22, #16]
ldr q4, [x23]
ldr q5, [x23, #16]
fmul v0.4s,v0.4s,v7.s[0]
fmul v1.4s,v1.4s,v7.s[0]
fmul v2.4s,v2.4s,v7.s[0]
fmul v3.4s,v3.4s,v7.s[0]
fmul v4.4s,v4.4s,v7.s[0]
fmul v5.4s,v5.4s,v7.s[0]
.SBETAZEROCOLSTOREDS3:
fmla v0.4s,v20.4s,v6.s[0]
fmla v1.4s,v21.4s,v6.s[0]
fmla v2.4s,v22.4s,v6.s[0]
fmla v3.4s,v23.4s,v6.s[0]
fmla v4.4s,v24.4s,v6.s[0]
fmla v5.4s,v25.4s,v6.s[0]
str q0, [x21]
str q1, [x21, #16]
str q2, [x22]
str q3, [x22, #16]
str q4, [x23]
str q5, [x23, #16]
dup v8.4s, wzr
dup v9.4s, wzr
dup v10.4s, wzr
dup v11.4s, wzr
dup v12.4s, wzr
dup v13.4s, wzr
fcmp s7,#0.0
beq .SBETAZEROCOLSTOREDS4
ldr q8, [x24]
ldr q9, [x24, #16]
ldr q10, [x25]
ldr q11, [x25, #16]
ldr q12, [x26]
ldr q13, [x26, #16]
fmul v8.4s, v8.4s, v7.s[0]
fmul v9.4s, v9.4s, v7.s[0]
fmul v10.4s,v10.4s,v7.s[0]
fmul v11.4s,v11.4s,v7.s[0]
fmul v12.4s,v12.4s,v7.s[0]
fmul v13.4s,v13.4s,v7.s[0]
.SBETAZEROCOLSTOREDS4:
prfm pldl2keep,[x3]
prfm pldl2keep,[x4]
fmla v8.4s, v26.4s,v6.s[0]
fmla v9.4s, v27.4s,v6.s[0]
fmla v10.4s,v28.4s,v6.s[0]
fmla v11.4s,v29.4s,v6.s[0]
fmla v12.4s,v30.4s,v6.s[0]
fmla v13.4s,v31.4s,v6.s[0]
str q8, [x24]
str q9, [x24, #16]
str q10, [x25]
str q11, [x25, #16]
str q12, [x26]
str q13, [x26, #16]
b .SEND
.SGENSTORED:
dup v0.4s, wzr
dup v1.4s, wzr
dup v2.4s, wzr
dup v3.4s, wzr
dup v4.4s, wzr
dup v5.4s, wzr
fcmp s7,#0.0
beq .SBETAZEROGENSTOREDS1
mov x27, x2
ld1 {v0.s}[0],[x27],x14
ld1 {v0.s}[1],[x27],x14
ld1 {v0.s}[2],[x27],x14
ld1 {v0.s}[3],[x27],x14
ld1 {v1.s}[0],[x27],x14
ld1 {v1.s}[1],[x27],x14
ld1 {v1.s}[2],[x27],x14
ld1 {v1.s}[3],[x27],x14
mov x27, x16
ld1 {v2.s}[0],[x27],x14
ld1 {v2.s}[1],[x27],x14
ld1 {v2.s}[2],[x27],x14
ld1 {v2.s}[3],[x27],x14
ld1 {v3.s}[0],[x27],x14
ld1 {v3.s}[1],[x27],x14
ld1 {v3.s}[2],[x27],x14
ld1 {v3.s}[3],[x27],x14
mov x27, x17
ld1 {v4.s}[0],[x27],x14
ld1 {v4.s}[1],[x27],x14
ld1 {v4.s}[2],[x27],x14
ld1 {v4.s}[3],[x27],x14
ld1 {v5.s}[0],[x27],x14
ld1 {v5.s}[1],[x27],x14
ld1 {v5.s}[2],[x27],x14
ld1 {v5.s}[3],[x27],x14
fmul v0.4s,v0.4s,v7.s[0]
fmul v1.4s,v1.4s,v7.s[0]
fmul v2.4s,v2.4s,v7.s[0]
fmul v3.4s,v3.4s,v7.s[0]
fmul v4.4s,v4.4s,v7.s[0]
fmul v5.4s,v5.4s,v7.s[0]
.SBETAZEROGENSTOREDS1:
fmla v0.4s, v8.4s,v6.s[0]
fmla v1.4s, v9.4s,v6.s[0]
fmla v2.4s,v10.4s,v6.s[0]
fmla v3.4s,v11.4s,v6.s[0]
fmla v4.4s,v12.4s,v6.s[0]
fmla v5.4s,v13.4s,v6.s[0]
mov x27, x2
st1 {v0.s}[0],[x27],x14
st1 {v0.s}[1],[x27],x14
st1 {v0.s}[2],[x27],x14
st1 {v0.s}[3],[x27],x14
st1 {v1.s}[0],[x27],x14
st1 {v1.s}[1],[x27],x14
st1 {v1.s}[2],[x27],x14
st1 {v1.s}[3],[x27],x14
mov x27, x16
st1 {v2.s}[0],[x27],x14
st1 {v2.s}[1],[x27],x14
st1 {v2.s}[2],[x27],x14
st1 {v2.s}[3],[x27],x14
st1 {v3.s}[0],[x27],x14
st1 {v3.s}[1],[x27],x14
st1 {v3.s}[2],[x27],x14
st1 {v3.s}[3],[x27],x14
mov x27, x17
st1 {v4.s}[0],[x27],x14
st1 {v4.s}[1],[x27],x14
st1 {v4.s}[2],[x27],x14
st1 {v4.s}[3],[x27],x14
st1 {v5.s}[0],[x27],x14
st1 {v5.s}[1],[x27],x14
st1 {v5.s}[2],[x27],x14
st1 {v5.s}[3],[x27],x14
dup v8.4s, wzr
dup v9.4s, wzr
dup v10.4s, wzr
dup v11.4s, wzr
dup v12.4s, wzr
dup v13.4s, wzr
fcmp s7,#0.0
beq .SBETAZEROGENSTOREDS2
mov x27, x18
ld1 {v8.s}[0],[x27],x14
ld1 {v8.s}[1],[x27],x14
ld1 {v8.s}[2],[x27],x14
ld1 {v8.s}[3],[x27],x14
ld1 {v9.s}[0],[x27],x14
ld1 {v9.s}[1],[x27],x14
ld1 {v9.s}[2],[x27],x14
ld1 {v9.s}[3],[x27],x14
mov x27, x19
ld1 {v10.s}[0],[x27],x14
ld1 {v10.s}[1],[x27],x14
ld1 {v10.s}[2],[x27],x14
ld1 {v10.s}[3],[x27],x14
ld1 {v11.s}[0],[x27],x14
ld1 {v11.s}[1],[x27],x14
ld1 {v11.s}[2],[x27],x14
ld1 {v11.s}[3],[x27],x14
mov x27, x20
ld1 {v12.s}[0],[x27],x14
ld1 {v12.s}[1],[x27],x14
ld1 {v12.s}[2],[x27],x14
ld1 {v12.s}[3],[x27],x14
ld1 {v13.s}[0],[x27],x14
ld1 {v13.s}[1],[x27],x14
ld1 {v13.s}[2],[x27],x14
ld1 {v13.s}[3],[x27],x14
fmul v8.4s, v8.4s, v7.s[0]
fmul v9.4s, v9.4s, v7.s[0]
fmul v10.4s,v10.4s,v7.s[0]
fmul v11.4s,v11.4s,v7.s[0]
fmul v12.4s,v12.4s,v7.s[0]
fmul v13.4s,v13.4s,v7.s[0]
.SBETAZEROGENSTOREDS2:
fmla v8.4s, v14.4s,v6.s[0]
fmla v9.4s, v15.4s,v6.s[0]
fmla v10.4s,v16.4s,v6.s[0]
fmla v11.4s,v17.4s,v6.s[0]
fmla v12.4s,v18.4s,v6.s[0]
fmla v13.4s,v19.4s,v6.s[0]
mov x27, x18
st1 {v8.s}[0],[x27],x14
st1 {v8.s}[1],[x27],x14
st1 {v8.s}[2],[x27],x14
st1 {v8.s}[3],[x27],x14
st1 {v9.s}[0],[x27],x14
st1 {v9.s}[1],[x27],x14
st1 {v9.s}[2],[x27],x14
st1 {v9.s}[3],[x27],x14
mov x27, x19
st1 {v10.s}[0],[x27],x14
st1 {v10.s}[1],[x27],x14
st1 {v10.s}[2],[x27],x14
st1 {v10.s}[3],[x27],x14
st1 {v11.s}[0],[x27],x14
st1 {v11.s}[1],[x27],x14
st1 {v11.s}[2],[x27],x14
st1 {v11.s}[3],[x27],x14
mov x27, x20
st1 {v12.s}[0],[x27],x14
st1 {v12.s}[1],[x27],x14
st1 {v12.s}[2],[x27],x14
st1 {v12.s}[3],[x27],x14
st1 {v13.s}[0],[x27],x14
st1 {v13.s}[1],[x27],x14
st1 {v13.s}[2],[x27],x14
st1 {v13.s}[3],[x27],x14
dup v0.4s, wzr
dup v1.4s, wzr
dup v2.4s, wzr
dup v3.4s, wzr
dup v4.4s, wzr
dup v5.4s, wzr
fcmp s7,#0.0
beq .SBETAZEROGENSTOREDS3
mov x27, x21
ld1 {v0.s}[0],[x27],x14
ld1 {v0.s}[1],[x27],x14
ld1 {v0.s}[2],[x27],x14
ld1 {v0.s}[3],[x27],x14
ld1 {v1.s}[0],[x27],x14
ld1 {v1.s}[1],[x27],x14
ld1 {v1.s}[2],[x27],x14
ld1 {v1.s}[3],[x27],x14
mov x27, x22
ld1 {v2.s}[0],[x27],x14
ld1 {v2.s}[1],[x27],x14
ld1 {v2.s}[2],[x27],x14
ld1 {v2.s}[3],[x27],x14
ld1 {v3.s}[0],[x27],x14
ld1 {v3.s}[1],[x27],x14
ld1 {v3.s}[2],[x27],x14
ld1 {v3.s}[3],[x27],x14
mov x27, x23
ld1 {v4.s}[0],[x27],x14
ld1 {v4.s}[1],[x27],x14
ld1 {v4.s}[2],[x27],x14
ld1 {v4.s}[3],[x27],x14
ld1 {v5.s}[0],[x27],x14
ld1 {v5.s}[1],[x27],x14
ld1 {v5.s}[2],[x27],x14
ld1 {v5.s}[3],[x27],x14
fmul v0.4s,v0.4s,v7.s[0]
fmul v1.4s,v1.4s,v7.s[0]
fmul v2.4s,v2.4s,v7.s[0]
fmul v3.4s,v3.4s,v7.s[0]
fmul v4.4s,v4.4s,v7.s[0]
fmul v5.4s,v5.4s,v7.s[0]
.SBETAZEROGENSTOREDS3:
fmla v0.4s,v20.4s,v6.s[0]
fmla v1.4s,v21.4s,v6.s[0]
fmla v2.4s,v22.4s,v6.s[0]
fmla v3.4s,v23.4s,v6.s[0]
fmla v4.4s,v24.4s,v6.s[0]
fmla v5.4s,v25.4s,v6.s[0]
mov x27, x21
st1 {v0.s}[0],[x27],x14
st1 {v0.s}[1],[x27],x14
st1 {v0.s}[2],[x27],x14
st1 {v0.s}[3],[x27],x14
st1 {v1.s}[0],[x27],x14
st1 {v1.s}[1],[x27],x14
st1 {v1.s}[2],[x27],x14
st1 {v1.s}[3],[x27],x14
mov x27, x22
st1 {v2.s}[0],[x27],x14
st1 {v2.s}[1],[x27],x14
st1 {v2.s}[2],[x27],x14
st1 {v2.s}[3],[x27],x14
st1 {v3.s}[0],[x27],x14
st1 {v3.s}[1],[x27],x14
st1 {v3.s}[2],[x27],x14
st1 {v3.s}[3],[x27],x14
mov x27, x23
st1 {v4.s}[0],[x27],x14
st1 {v4.s}[1],[x27],x14
st1 {v4.s}[2],[x27],x14
st1 {v4.s}[3],[x27],x14
st1 {v5.s}[0],[x27],x14
st1 {v5.s}[1],[x27],x14
st1 {v5.s}[2],[x27],x14
st1 {v5.s}[3],[x27],x14
dup v8.4s, wzr
dup v9.4s, wzr
dup v10.4s, wzr
dup v11.4s, wzr
dup v12.4s, wzr
dup v13.4s, wzr
fcmp s7,#0.0
beq .SBETAZEROGENSTOREDS4
mov x27, x24
ld1 {v8.s}[0],[x27],x14
ld1 {v8.s}[1],[x27],x14
ld1 {v8.s}[2],[x27],x14
ld1 {v8.s}[3],[x27],x14
ld1 {v9.s}[0],[x27],x14
ld1 {v9.s}[1],[x27],x14
ld1 {v9.s}[2],[x27],x14
ld1 {v9.s}[3],[x27],x14
mov x27, x25
ld1 {v10.s}[0],[x27],x14
ld1 {v10.s}[1],[x27],x14
ld1 {v10.s}[2],[x27],x14
ld1 {v10.s}[3],[x27],x14
ld1 {v11.s}[0],[x27],x14
ld1 {v11.s}[1],[x27],x14
ld1 {v11.s}[2],[x27],x14
ld1 {v11.s}[3],[x27],x14
mov x27, x26
ld1 {v12.s}[0],[x27],x14
ld1 {v12.s}[1],[x27],x14
ld1 {v12.s}[2],[x27],x14
ld1 {v12.s}[3],[x27],x14
ld1 {v13.s}[0],[x27],x14
ld1 {v13.s}[1],[x27],x14
ld1 {v13.s}[2],[x27],x14
ld1 {v13.s}[3],[x27],x14
fmul v8.4s, v8.4s, v7.s[0]
fmul v9.4s, v9.4s, v7.s[0]
fmul v10.4s,v10.4s,v7.s[0]
fmul v11.4s,v11.4s,v7.s[0]
fmul v12.4s,v12.4s,v7.s[0]
fmul v13.4s,v13.4s,v7.s[0]
.SBETAZEROGENSTOREDS4:
prfm pldl2keep,[x3]
prfm pldl2keep,[x4]
fmla v8.4s, v26.4s,v6.s[0]
fmla v9.4s, v27.4s,v6.s[0]
fmla v10.4s,v28.4s,v6.s[0]
fmla v11.4s,v29.4s,v6.s[0]
fmla v12.4s,v30.4s,v6.s[0]
fmla v13.4s,v31.4s,v6.s[0]
mov x27, x24
st1 {v8.s}[0],[x27],x14
st1 {v8.s}[1],[x27],x14
st1 {v8.s}[2],[x27],x14
st1 {v8.s}[3],[x27],x14
st1 {v9.s}[0],[x27],x14
st1 {v9.s}[1],[x27],x14
st1 {v9.s}[2],[x27],x14
st1 {v9.s}[3],[x27],x14
mov x27, x25
st1 {v10.s}[0],[x27],x14
st1 {v10.s}[1],[x27],x14
st1 {v10.s}[2],[x27],x14
st1 {v10.s}[3],[x27],x14
st1 {v11.s}[0],[x27],x14
st1 {v11.s}[1],[x27],x14
st1 {v11.s}[2],[x27],x14
st1 {v11.s}[3],[x27],x14
mov x27, x26
st1 {v12.s}[0],[x27],x14
st1 {v12.s}[1],[x27],x14
st1 {v12.s}[2],[x27],x14
st1 {v12.s}[3],[x27],x14
st1 {v13.s}[0],[x27],x14
st1 {v13.s}[1],[x27],x14
st1 {v13.s}[2],[x27],x14
st1 {v13.s}[3],[x27],x14
.SEND:
// 0 "" 2
#NO_APP
ldp x21, x22, [sp, 16]
ldp x23, x24, [sp, 32]
ldp x25, x26, [sp, 48]
ldr x27, [sp, 64]
ldp d8, d9, [sp, 80]
ldp d10, d11, [sp, 96]
ldp d12, d13, [sp, 112]
ldp d14, d15, [sp, 128]
ldp x19, x20, [sp], 240
.cfi_restore 20
.cfi_restore 19
.cfi_restore 78
.cfi_restore 79
.cfi_restore 76
.cfi_restore 77
.cfi_restore 74
.cfi_restore 75
.cfi_restore 72
.cfi_restore 73
.cfi_restore 27
.cfi_restore 25
.cfi_restore 26
.cfi_restore 23
.cfi_restore 24
.cfi_restore 21
.cfi_restore 22
.cfi_def_cfa_offset 0
ret
.cfi_endproc
.LFE738:
// .size bli_sgemm_armv8a_asm_8x12, .-bli_sgemm_armv8a_asm_8x12
.align 2
.p2align 3,,7
.global _bli_dgemm_armv8a_asm_6x8
// .type bli_dgemm_armv8a_asm_6x8, %function
_bli_dgemm_armv8a_asm_6x8:
.LFB739:
.cfi_startproc
stp x20, x21, [sp, -224]!
.cfi_def_cfa_offset 224
.cfi_offset 20, -224
.cfi_offset 21, -216
cmp x0, 0
add x9, x0, 3
ldr x10, [sp, 224]
csel x9, x9, x0, lt
negs x8, x0
and x0, x0, 3
and x8, x8, 3
asr x9, x9, 2
ldp x11, x10, [x10, 8]
csneg x0, x0, x8, mi
stp x22, x23, [sp, 16]
stp x24, x25, [sp, 32]
stp x26, x27, [sp, 48]
stp d8, d9, [sp, 64]
stp d10, d11, [sp, 80]
stp d12, d13, [sp, 96]
stp d14, d15, [sp, 112]
.cfi_offset 22, -208
.cfi_offset 23, -200
.cfi_offset 24, -192
.cfi_offset 25, -184
.cfi_offset 26, -176
.cfi_offset 27, -168
.cfi_offset 72, -160
.cfi_offset 73, -152
.cfi_offset 74, -144
.cfi_offset 75, -136
.cfi_offset 76, -128
.cfi_offset 77, -120
.cfi_offset 78, -112
.cfi_offset 79, -104
stp x5, x4, [sp, 136]
stp x3, x2, [sp, 152]
stp x1, x11, [sp, 168]
stp x10, x9, [sp, 184]
stp x0, x6, [sp, 200]
str x7, [sp, 216]
#APP
// 1130 "bli_gemm_armv8a_asm_d6x8.c" 1
ldr x0,[sp, 160]
ldr x1,[sp, 152]
ldr x2,[sp, 136]
ldr x3,[sp, 176]
ldr x4,[sp, 184]
ldr x5,[sp, 192]
ldr x6,[sp, 200]
ldr x7,[sp, 168]
ldr x8,[sp, 144]
ldr x9,[sp, 216]
lsl x10,x9,#3
ldr x13,[sp, 208]
lsl x14,x13,#3
add x20,x2,x10
add x21,x20,x10
add x22,x21,x10
add x23,x22,x10
add x24,x23,x10
add x25,x24,x10
add x26,x25,x10
prfm pldl1keep,[x2]
prfm pldl1keep,[x20]
prfm pldl1keep,[x21]
prfm pldl1keep,[x22]
prfm pldl1keep,[x23]
prfm pldl1keep,[x24]
prfm pldl1keep,[x25]
prfm pldl1keep,[x26]
dup v8.2d, xzr
prfm PLDL1KEEP, [x1, #256]
dup v9.2d, xzr
prfm PLDL1KEEP, [x1, #320]
dup v10.2d, xzr
prfm PLDL1KEEP, [x1, #384]
dup v11.2d, xzr
prfm PLDL1KEEP, [x1, #448]
dup v12.2d, xzr
dup v13.2d, xzr
dup v14.2d, xzr
prfm PLDL1KEEP, [x0, #192]
dup v15.2d, xzr
prfm PLDL1KEEP, [x0, #256]
dup v16.2d, xzr
prfm PLDL1KEEP, [x0, #320]
dup v17.2d, xzr
dup v18.2d, xzr
dup v19.2d, xzr
dup v20.2d, xzr
dup v21.2d, xzr
dup v22.2d, xzr
dup v23.2d, xzr
dup v24.2d, xzr
dup v25.2d, xzr
dup v26.2d, xzr
dup v27.2d, xzr
dup v28.2d, xzr
dup v29.2d, xzr
dup v30.2d, xzr
dup v31.2d, xzr
cmp x5,#0
beq .DCONSIDERKLEFT
ldr q0, [x0]
ldr q1, [x0, #16]
ldr q2, [x0, #32]
ldr q3, [x1]
ldr q4, [x1, #16]
ldr q5, [x1, #32]
ldr q6, [x1, #48]
add x0, x0, #48
add x1, x1, #64
cmp x5,1
beq .DLASTITER
DLOOP:
fmla v8.2d ,v0.2d,v3.d[0]
prfm PLDL1KEEP, [x1, #448]
fmla v9.2d ,v1.2d,v3.d[0]
prfm PLDL1KEEP, [x1, #512]
fmla v10.2d,v2.2d,v3.d[0]
prfm PLDL1KEEP, [x1, #576]
fmla v11.2d,v0.2d,v3.d[1]
fmla v12.2d,v1.2d,v3.d[1]
fmla v13.2d,v2.2d,v3.d[1]
fmla v14.2d,v0.2d,v4.d[0]
fmla v15.2d,v1.2d,v4.d[0]
fmla v16.2d,v2.2d,v4.d[0]
ldr q3, [x1]
fmla v17.2d,v0.2d,v4.d[1]
fmla v18.2d,v1.2d,v4.d[1]
fmla v19.2d,v2.2d,v4.d[1]
ldr q7, [x0, #32]
fmla v20.2d,v0.2d,v5.d[0]
fmla v21.2d,v1.2d,v5.d[0]
fmla v22.2d,v2.2d,v5.d[0]
ldr q4, [x1, #16]
fmla v23.2d,v0.2d,v5.d[1]
fmla v24.2d,v1.2d,v5.d[1]
fmla v25.2d,v2.2d,v5.d[1]
ldr q5, [x1, #32]
fmla v26.2d,v0.2d,v6.d[0]
fmla v29.2d,v0.2d,v6.d[1]
ldr q0, [x0]
fmla v27.2d,v1.2d,v6.d[0]
fmla v30.2d,v1.2d,v6.d[1]
ldr q1, [x0, #16]
fmla v28.2d,v2.2d,v6.d[0]
fmla v31.2d,v2.2d,v6.d[1]
ldr q6, [x1, #48]
fmla v8.2d ,v0.2d,v3.d[0]
prfm PLDL1KEEP, [x1, #640]
fmla v9.2d ,v1.2d,v3.d[0]
prfm PLDL1KEEP, [x0, #336]
fmla v10.2d,v7.2d,v3.d[0]
prfm PLDL1KEEP, [x0, #400]
fmla v11.2d,v0.2d,v3.d[1]
fmla v12.2d,v1.2d,v3.d[1]
fmla v13.2d,v7.2d,v3.d[1]
fmla v14.2d,v0.2d,v4.d[0]
fmla v15.2d,v1.2d,v4.d[0]
fmla v16.2d,v7.2d,v4.d[0]
ldr q3, [x1, #64]
fmla v17.2d,v0.2d,v4.d[1]
fmla v18.2d,v1.2d,v4.d[1]
fmla v19.2d,v7.2d,v4.d[1]
ldr q2, [x0, #80]
fmla v20.2d,v0.2d,v5.d[0]
fmla v21.2d,v1.2d,v5.d[0]
fmla v22.2d,v7.2d,v5.d[0]
ldr q4, [x1, #80]
fmla v23.2d,v0.2d,v5.d[1]
fmla v24.2d,v1.2d,v5.d[1]
fmla v25.2d,v7.2d,v5.d[1]
ldr q5, [x1, #96]
fmla v26.2d,v0.2d,v6.d[0]
fmla v29.2d,v0.2d,v6.d[1]
ldr q0, [x0, #48]
fmla v27.2d,v1.2d,v6.d[0]
fmla v30.2d,v1.2d,v6.d[1]
ldr q1, [x0, #64]
fmla v28.2d,v7.2d,v6.d[0]
fmla v31.2d,v7.2d,v6.d[1]
ldr q6, [x1, #112]
fmla v8.2d ,v0.2d,v3.d[0]
prfm PLDL1KEEP, [x0, #464]
fmla v9.2d ,v1.2d,v3.d[0]
fmla v10.2d,v2.2d,v3.d[0]
fmla v11.2d,v0.2d,v3.d[1]
fmla v12.2d,v1.2d,v3.d[1]
fmla v13.2d,v2.2d,v3.d[1]
fmla v14.2d,v0.2d,v4.d[0]
fmla v15.2d,v1.2d,v4.d[0]
fmla v16.2d,v2.2d,v4.d[0]
ldr q3, [x1, #128]
fmla v17.2d,v0.2d,v4.d[1]
fmla v18.2d,v1.2d,v4.d[1]
fmla v19.2d,v2.2d,v4.d[1]
ldr q7, [x0, #128]
fmla v20.2d,v0.2d,v5.d[0]
fmla v21.2d,v1.2d,v5.d[0]
fmla v22.2d,v2.2d,v5.d[0]
ldr q4, [x1, #144]
fmla v23.2d,v0.2d,v5.d[1]
fmla v24.2d,v1.2d,v5.d[1]
fmla v25.2d,v2.2d,v5.d[1]
ldr q5, [x1, #160]
fmla v26.2d,v0.2d,v6.d[0]
fmla v29.2d,v0.2d,v6.d[1]
ldr q0, [x0, #96]
fmla v27.2d,v1.2d,v6.d[0]
fmla v30.2d,v1.2d,v6.d[1]
ldr q1, [x0, #112]
fmla v28.2d,v2.2d,v6.d[0]
fmla v31.2d,v2.2d,v6.d[1]
ldr q6, [x1, #176]
fmla v8.2d ,v0.2d,v3.d[0]
fmla v9.2d ,v1.2d,v3.d[0]
fmla v10.2d,v7.2d,v3.d[0]
fmla v11.2d,v0.2d,v3.d[1]
fmla v12.2d,v1.2d,v3.d[1]
fmla v13.2d,v7.2d,v3.d[1]
ldr q3, [x1, #192]
fmla v14.2d,v0.2d,v4.d[0]
fmla v15.2d,v1.2d,v4.d[0]
fmla v16.2d,v7.2d,v4.d[0]
ldr q2, [x0, #176]
fmla v17.2d,v0.2d,v4.d[1]
fmla v18.2d,v1.2d,v4.d[1]
fmla v19.2d,v7.2d,v4.d[1]
ldr q4, [x1, #208]
fmla v20.2d,v0.2d,v5.d[0]
fmla v21.2d,v1.2d,v5.d[0]
fmla v22.2d,v7.2d,v5.d[0]
fmla v23.2d,v0.2d,v5.d[1]
fmla v24.2d,v1.2d,v5.d[1]
fmla v25.2d,v7.2d,v5.d[1]
ldr q5, [x1, #224]
fmla v26.2d,v0.2d,v6.d[0]
fmla v29.2d,v0.2d,v6.d[1]
ldr q0, [x0, #144]
fmla v27.2d,v1.2d,v6.d[0]
fmla v30.2d,v1.2d,v6.d[1]
ldr q1, [x0, #160]
fmla v28.2d,v7.2d,v6.d[0]
fmla v31.2d,v7.2d,v6.d[1]
ldr q6, [x1, #240]
add x0, x0, #192
add x1, x1, #256
sub x5,x5,1
cmp x5,1
bne DLOOP
.DLASTITER:
fmla v8.2d ,v0.2d,v3.d[0]
fmla v9.2d ,v1.2d,v3.d[0]
fmla v10.2d,v2.2d,v3.d[0]
fmla v11.2d,v0.2d,v3.d[1]
fmla v12.2d,v1.2d,v3.d[1]
fmla v13.2d,v2.2d,v3.d[1]
ldr q3, [x1]
fmla v14.2d,v0.2d,v4.d[0]
fmla v15.2d,v1.2d,v4.d[0]
fmla v16.2d,v2.2d,v4.d[0]
ldr q7, [x0, #32]
fmla v17.2d,v0.2d,v4.d[1]
fmla v18.2d,v1.2d,v4.d[1]
fmla v19.2d,v2.2d,v4.d[1]
ldr q4, [x1, #16]
fmla v20.2d,v0.2d,v5.d[0]
fmla v21.2d,v1.2d,v5.d[0]
fmla v22.2d,v2.2d,v5.d[0]
fmla v23.2d,v0.2d,v5.d[1]
fmla v24.2d,v1.2d,v5.d[1]
fmla v25.2d,v2.2d,v5.d[1]
ldr q5, [x1, #32]
fmla v26.2d,v0.2d,v6.d[0]
fmla v29.2d,v0.2d,v6.d[1]
ldr q0, [x0]
fmla v27.2d,v1.2d,v6.d[0]
fmla v30.2d,v1.2d,v6.d[1]
ldr q1, [x0, #16]
fmla v28.2d,v2.2d,v6.d[0]
fmla v31.2d,v2.2d,v6.d[1]
ldr q6, [x1, #48]
fmla v8.2d ,v0.2d,v3.d[0]
fmla v9.2d ,v1.2d,v3.d[0]
fmla v10.2d,v7.2d,v3.d[0]
fmla v11.2d,v0.2d,v3.d[1]
fmla v12.2d,v1.2d,v3.d[1]
fmla v13.2d,v7.2d,v3.d[1]
ldr q3, [x1, #64]
fmla v14.2d,v0.2d,v4.d[0]
fmla v15.2d,v1.2d,v4.d[0]
fmla v16.2d,v7.2d,v4.d[0]
ldr q2, [x0, #80]
fmla v17.2d,v0.2d,v4.d[1]
fmla v18.2d,v1.2d,v4.d[1]
fmla v19.2d,v7.2d,v4.d[1]
ldr q4, [x1, #80]
fmla v20.2d,v0.2d,v5.d[0]
fmla v21.2d,v1.2d,v5.d[0]
fmla v22.2d,v7.2d,v5.d[0]
fmla v23.2d,v0.2d,v5.d[1]
fmla v24.2d,v1.2d,v5.d[1]
fmla v25.2d,v7.2d,v5.d[1]
ldr q5, [x1, #96]
fmla v26.2d,v0.2d,v6.d[0]
fmla v29.2d,v0.2d,v6.d[1]
ldr q0, [x0, #48]
fmla v27.2d,v1.2d,v6.d[0]
fmla v30.2d,v1.2d,v6.d[1]
ldr q1, [x0, #64]
fmla v28.2d,v7.2d,v6.d[0]
fmla v31.2d,v7.2d,v6.d[1]
ldr q6, [x1, #112]
fmla v8.2d ,v0.2d,v3.d[0]
fmla v9.2d ,v1.2d,v3.d[0]
fmla v10.2d,v2.2d,v3.d[0]
fmla v11.2d,v0.2d,v3.d[1]
fmla v12.2d,v1.2d,v3.d[1]
fmla v13.2d,v2.2d,v3.d[1]
ldr q3, [x1, #128]
fmla v14.2d,v0.2d,v4.d[0]
fmla v15.2d,v1.2d,v4.d[0]
fmla v16.2d,v2.2d,v4.d[0]
ldr q7, [x0, #128]
fmla v17.2d,v0.2d,v4.d[1]
fmla v18.2d,v1.2d,v4.d[1]
fmla v19.2d,v2.2d,v4.d[1]
ldr q4, [x1, #144]
fmla v20.2d,v0.2d,v5.d[0]
fmla v21.2d,v1.2d,v5.d[0]
fmla v22.2d,v2.2d,v5.d[0]
fmla v23.2d,v0.2d,v5.d[1]
fmla v24.2d,v1.2d,v5.d[1]
fmla v25.2d,v2.2d,v5.d[1]
ldr q5, [x1, #160]
fmla v26.2d,v0.2d,v6.d[0]
fmla v29.2d,v0.2d,v6.d[1]
ldr q0, [x0, #96]
fmla v27.2d,v1.2d,v6.d[0]
fmla v30.2d,v1.2d,v6.d[1]
ldr q1, [x0, #112]
fmla v28.2d,v2.2d,v6.d[0]
fmla v31.2d,v2.2d,v6.d[1]
ldr q6, [x1, #176]
fmla v8.2d ,v0.2d,v3.d[0]
fmla v9.2d ,v1.2d,v3.d[0]
fmla v10.2d,v7.2d,v3.d[0]
fmla v11.2d,v0.2d,v3.d[1]
fmla v12.2d,v1.2d,v3.d[1]
fmla v13.2d,v7.2d,v3.d[1]
fmla v14.2d,v0.2d,v4.d[0]
fmla v15.2d,v1.2d,v4.d[0]
fmla v16.2d,v7.2d,v4.d[0]
fmla v17.2d,v0.2d,v4.d[1]
fmla v18.2d,v1.2d,v4.d[1]
fmla v19.2d,v7.2d,v4.d[1]
fmla v20.2d,v0.2d,v5.d[0]
fmla v21.2d,v1.2d,v5.d[0]
fmla v22.2d,v7.2d,v5.d[0]
fmla v23.2d,v0.2d,v5.d[1]
fmla v24.2d,v1.2d,v5.d[1]
fmla v25.2d,v7.2d,v5.d[1]
fmla v26.2d,v0.2d,v6.d[0]
add x1, x1, #192
fmla v29.2d,v0.2d,v6.d[1]
fmla v27.2d,v1.2d,v6.d[0]
fmla v30.2d,v1.2d,v6.d[1]
fmla v28.2d,v7.2d,v6.d[0]
fmla v31.2d,v7.2d,v6.d[1]
add x0, x0, #144
.DCONSIDERKLEFT:
cmp x6,0
beq .DPOSTACCUM
.DLOOPKLEFT:
ldr q0, [x0],#16
ldr q1, [x0],#16
ldr q2, [x0],#16
ldr q3, [x1],#16
ldr q4, [x1],#16
ldr q5, [x1],#16
ldr q6, [x1],#16
sub x6,x6,1
fmla v8.2d ,v0.2d,v3.d[0]
fmla v9.2d ,v1.2d,v3.d[0]
fmla v10.2d,v2.2d,v3.d[0]
fmla v11.2d,v0.2d,v3.d[1]
fmla v12.2d,v1.2d,v3.d[1]
fmla v13.2d,v2.2d,v3.d[1]
fmla v14.2d,v0.2d,v4.d[0]
fmla v15.2d,v1.2d,v4.d[0]
fmla v16.2d,v2.2d,v4.d[0]
fmla v17.2d,v0.2d,v4.d[1]
fmla v18.2d,v1.2d,v4.d[1]
fmla v19.2d,v2.2d,v4.d[1]
fmla v20.2d,v0.2d,v5.d[0]
fmla v21.2d,v1.2d,v5.d[0]
fmla v22.2d,v2.2d,v5.d[0]
fmla v23.2d,v0.2d,v5.d[1]
fmla v24.2d,v1.2d,v5.d[1]
fmla v25.2d,v2.2d,v5.d[1]
fmla v26.2d,v0.2d,v6.d[0]
fmla v29.2d,v0.2d,v6.d[1]
fmla v27.2d,v1.2d,v6.d[0]
fmla v30.2d,v1.2d,v6.d[1]
fmla v28.2d,v2.2d,v6.d[0]
fmla v31.2d,v2.2d,v6.d[1]
cmp x6,0
bne .DLOOPKLEFT
.DPOSTACCUM:
ld1r {v6.2d},[x7]
ld1r {v7.2d},[x8]
cmp x13,#1
bne .DGENSTORED
.DCOLSTORED:
dup v0.2d, xzr
dup v1.2d, xzr
dup v2.2d, xzr
dup v3.2d, xzr
dup v4.2d, xzr
dup v5.2d, xzr
fcmp d7,#0.0
beq .DBETAZEROCOLSTOREDS1
ldr q0, [x2]
ldr q1, [x2, #16]
ldr q2, [x2, #32]
ldr q3, [x20]
ldr q4, [x20, #16]
ldr q5, [x20, #32]
fmul v0.2d,v0.2d,v7.d[0]
fmul v1.2d,v1.2d,v7.d[0]
fmul v2.2d,v2.2d,v7.d[0]
fmul v3.2d,v3.2d,v7.d[0]
fmul v4.2d,v4.2d,v7.d[0]
fmul v5.2d,v5.2d,v7.d[0]
.DBETAZEROCOLSTOREDS1:
fmla v0.2d,v8.2d,v6.d[0]
fmla v1.2d,v9.2d,v6.d[0]
fmla v2.2d,v10.2d,v6.d[0]
fmla v3.2d,v11.2d,v6.d[0]
fmla v4.2d,v12.2d,v6.d[0]
fmla v5.2d,v13.2d,v6.d[0]
str q0, [x2]
str q1, [x2, #16]
str q2, [x2, #32]
str q3, [x20]
str q4, [x20, #16]
str q5, [x20, #32]
dup v8.2d, xzr
dup v9.2d, xzr
dup v10.2d, xzr
dup v11.2d, xzr
dup v12.2d, xzr
dup v13.2d, xzr
fcmp d7,#0.0
beq .DBETAZEROCOLSTOREDS2
ldr q8, [x21]
ldr q9, [x21, #16]
ldr q10, [x21, #32]
ldr q11, [x22]
ldr q12, [x22, #16]
ldr q13, [x22, #32]
fmul v8.2d, v8.2d, v7.d[0]
fmul v9.2d, v9.2d, v7.d[0]
fmul v10.2d,v10.2d,v7.d[0]
fmul v11.2d,v11.2d,v7.d[0]
fmul v12.2d,v12.2d,v7.d[0]
fmul v13.2d,v13.2d,v7.d[0]
.DBETAZEROCOLSTOREDS2:
fmla v8.2d, v14.2d,v6.d[0]
fmla v9.2d, v15.2d,v6.d[0]
fmla v10.2d,v16.2d,v6.d[0]
fmla v11.2d,v17.2d,v6.d[0]
fmla v12.2d,v18.2d,v6.d[0]
fmla v13.2d,v19.2d,v6.d[0]
str q8, [x21]
str q9, [x21, #16]
str q10, [x21, #32]
str q11, [x22]
str q12, [x22, #16]
str q13, [x22, #32]
dup v0.2d, xzr
dup v1.2d, xzr
dup v2.2d, xzr
dup v3.2d, xzr
dup v4.2d, xzr
dup v5.2d, xzr
fcmp d7,#0.0
beq .DBETAZEROCOLSTOREDS3
ldr q0, [x23]
ldr q1, [x23, #16]
ldr q2, [x23, #32]
ldr q3, [x24]
ldr q4, [x24, #16]
ldr q5, [x24, #32]
fmul v0.2d,v0.2d,v7.d[0]
fmul v1.2d,v1.2d,v7.d[0]
fmul v2.2d,v2.2d,v7.d[0]
fmul v3.2d,v3.2d,v7.d[0]
fmul v4.2d,v4.2d,v7.d[0]
fmul v5.2d,v5.2d,v7.d[0]
.DBETAZEROCOLSTOREDS3:
fmla v0.2d,v20.2d,v6.d[0]
fmla v1.2d,v21.2d,v6.d[0]
fmla v2.2d,v22.2d,v6.d[0]
fmla v3.2d,v23.2d,v6.d[0]
fmla v4.2d,v24.2d,v6.d[0]
fmla v5.2d,v25.2d,v6.d[0]
str q0, [x23]
str q1, [x23, #16]
str q2, [x23, #32]
str q3, [x24]
str q4, [x24, #16]
str q5, [x24, #32]
dup v8.2d, xzr
dup v9.2d, xzr
dup v10.2d, xzr
dup v11.2d, xzr
dup v12.2d, xzr
dup v13.2d, xzr
fcmp d7,#0.0
beq .DBETAZEROCOLSTOREDS4
ldr q8, [x25]
ldr q9, [x25, #16]
ldr q10, [x25, #32]
ldr q11, [x26]
ldr q12, [x26, #16]
ldr q13, [x26, #32]
fmul v8.2d, v8.2d, v7.d[0]
fmul v9.2d, v9.2d, v7.d[0]
fmul v10.2d,v10.2d,v7.d[0]
fmul v11.2d,v11.2d,v7.d[0]
fmul v12.2d,v12.2d,v7.d[0]
fmul v13.2d,v13.2d,v7.d[0]
.DBETAZEROCOLSTOREDS4:
prfm pldl2keep,[x3]
prfm pldl2keep,[x4]
fmla v8.2d, v26.2d,v6.d[0]
fmla v9.2d, v27.2d,v6.d[0]
fmla v10.2d,v28.2d,v6.d[0]
fmla v11.2d,v29.2d,v6.d[0]
fmla v12.2d,v30.2d,v6.d[0]
fmla v13.2d,v31.2d,v6.d[0]
str q8, [x25]
str q9, [x25, #16]
str q10, [x25, #32]
str q11, [x26]
str q12, [x26, #16]
str q13, [x26, #32]
b .DEND
.DGENSTORED:
dup v0.2d, xzr
dup v1.2d, xzr
dup v2.2d, xzr
dup v3.2d, xzr
dup v4.2d, xzr
dup v5.2d, xzr
fcmp d7,#0.0
beq .DBETAZEROGENSTOREDS1
mov x27, x2
ld1 {v0.d}[0],[x27],x14
ld1 {v0.d}[1],[x27],x14
ld1 {v1.d}[0],[x27],x14
ld1 {v1.d}[1],[x27],x14
ld1 {v2.d}[0],[x27],x14
ld1 {v2.d}[1],[x27],x14
mov x27, x20
ld1 {v3.d}[0],[x27],x14
ld1 {v3.d}[1],[x27],x14
ld1 {v4.d}[0],[x27],x14
ld1 {v4.d}[1],[x27],x14
ld1 {v5.d}[0],[x27],x14
ld1 {v5.d}[1],[x27],x14
fmul v0.2d,v0.2d,v7.d[0]
fmul v1.2d,v1.2d,v7.d[0]
fmul v2.2d,v2.2d,v7.d[0]
fmul v3.2d,v3.2d,v7.d[0]
fmul v4.2d,v4.2d,v7.d[0]
fmul v5.2d,v5.2d,v7.d[0]
.DBETAZEROGENSTOREDS1:
fmla v0.2d,v8.2d,v6.d[0]
fmla v1.2d,v9.2d,v6.d[0]
fmla v2.2d,v10.2d,v6.d[0]
fmla v3.2d,v11.2d,v6.d[0]
fmla v4.2d,v12.2d,v6.d[0]
fmla v5.2d,v13.2d,v6.d[0]
mov x27, x2
st1 {v0.d}[0],[x27],x14
st1 {v0.d}[1],[x27],x14
st1 {v1.d}[0],[x27],x14
st1 {v1.d}[1],[x27],x14
st1 {v2.d}[0],[x27],x14
st1 {v2.d}[1],[x27],x14
mov x27, x20
st1 {v3.d}[0],[x27],x14
st1 {v3.d}[1],[x27],x14
st1 {v4.d}[0],[x27],x14
st1 {v4.d}[1],[x27],x14
st1 {v5.d}[0],[x27],x14
st1 {v5.d}[1],[x27],x14
dup v8.2d, xzr
dup v9.2d, xzr
dup v10.2d, xzr
dup v11.2d, xzr
dup v12.2d, xzr
dup v13.2d, xzr
fcmp d7,#0.0
beq .DBETAZEROGENSTOREDS2
mov x27, x21
ld1 {v8.d}[0], [x27],x14
ld1 {v8.d}[1], [x27],x14
ld1 {v9.d}[0], [x27],x14
ld1 {v9.d}[1], [x27],x14
ld1 {v10.d}[0],[x27],x14
ld1 {v10.d}[1],[x27],x14
mov x27, x22
ld1 {v11.d}[0],[x27],x14
ld1 {v11.d}[1],[x27],x14
ld1 {v12.d}[0],[x27],x14
ld1 {v12.d}[1],[x27],x14
ld1 {v13.d}[0],[x27],x14
ld1 {v13.d}[1],[x27],x14
fmul v8.2d, v8.2d, v7.d[0]
fmul v9.2d, v9.2d, v7.d[0]
fmul v10.2d,v10.2d,v7.d[0]
fmul v11.2d,v11.2d,v7.d[0]
fmul v12.2d,v12.2d,v7.d[0]
fmul v13.2d,v13.2d,v7.d[0]
.DBETAZEROGENSTOREDS2:
fmla v8.2d, v14.2d,v6.d[0]
fmla v9.2d, v15.2d,v6.d[0]
fmla v10.2d,v16.2d,v6.d[0]
fmla v11.2d,v17.2d,v6.d[0]
fmla v12.2d,v18.2d,v6.d[0]
fmla v13.2d,v19.2d,v6.d[0]
mov x27, x21
st1 {v8.d}[0], [x27],x14
st1 {v8.d}[1], [x27],x14
st1 {v9.d}[0], [x27],x14
st1 {v9.d}[1], [x27],x14
st1 {v10.d}[0],[x27],x14
st1 {v10.d}[1],[x27],x14
mov x27, x22
st1 {v11.d}[0],[x27],x14
st1 {v11.d}[1],[x27],x14
st1 {v12.d}[0],[x27],x14
st1 {v12.d}[1],[x27],x14
st1 {v13.d}[0],[x27],x14
st1 {v13.d}[1],[x27],x14
dup v0.2d, xzr
dup v1.2d, xzr
dup v2.2d, xzr
dup v3.2d, xzr
dup v4.2d, xzr
dup v5.2d, xzr
fcmp d7,#0.0
beq .DBETAZEROGENSTOREDS3
mov x27, x23
ld1 {v0.d}[0],[x27],x14
ld1 {v0.d}[1],[x27],x14
ld1 {v1.d}[0],[x27],x14
ld1 {v1.d}[1],[x27],x14
ld1 {v2.d}[0],[x27],x14
ld1 {v2.d}[1],[x27],x14
mov x27, x24
ld1 {v3.d}[0],[x27],x14
ld1 {v3.d}[1],[x27],x14
ld1 {v4.d}[0],[x27],x14
ld1 {v4.d}[1],[x27],x14
ld1 {v5.d}[0],[x27],x14
ld1 {v5.d}[1],[x27],x14
fmul v0.2d,v0.2d,v7.d[0]
fmul v1.2d,v1.2d,v7.d[0]
fmul v2.2d,v2.2d,v7.d[0]
fmul v3.2d,v3.2d,v7.d[0]
fmul v4.2d,v4.2d,v7.d[0]
fmul v5.2d,v5.2d,v7.d[0]
.DBETAZEROGENSTOREDS3:
fmla v0.2d,v20.2d,v6.d[0]
fmla v1.2d,v21.2d,v6.d[0]
fmla v2.2d,v22.2d,v6.d[0]
fmla v3.2d,v23.2d,v6.d[0]
fmla v4.2d,v24.2d,v6.d[0]
fmla v5.2d,v25.2d,v6.d[0]
mov x27, x23
st1 {v0.d}[0],[x27],x14
st1 {v0.d}[1],[x27],x14
st1 {v1.d}[0],[x27],x14
st1 {v1.d}[1],[x27],x14
st1 {v2.d}[0],[x27],x14
st1 {v2.d}[1],[x27],x14
mov x27, x24
st1 {v3.d}[0],[x27],x14
st1 {v3.d}[1],[x27],x14
st1 {v4.d}[0],[x27],x14
st1 {v4.d}[1],[x27],x14
st1 {v5.d}[0],[x27],x14
st1 {v5.d}[1],[x27],x14
dup v8.2d, xzr
dup v9.2d, xzr
dup v10.2d, xzr
dup v11.2d, xzr
dup v12.2d, xzr
dup v13.2d, xzr
fcmp d7,#0.0
beq .DBETAZEROGENSTOREDS4
mov x27, x25
ld1 {v8.d}[0], [x27],x14
ld1 {v8.d}[1], [x27],x14
ld1 {v9.d}[0], [x27],x14
ld1 {v9.d}[1], [x27],x14
ld1 {v10.d}[0],[x27],x14
ld1 {v10.d}[1],[x27],x14
mov x27, x26
ld1 {v11.d}[0],[x27],x14
ld1 {v11.d}[1],[x27],x14
ld1 {v12.d}[0],[x27],x14
ld1 {v12.d}[1],[x27],x14
ld1 {v13.d}[0],[x27],x14
ld1 {v13.d}[1],[x27],x14
fmul v8.2d, v8.2d, v7.d[0]
fmul v9.2d, v9.2d, v7.d[0]
fmul v10.2d,v10.2d,v7.d[0]
fmul v11.2d,v11.2d,v7.d[0]
fmul v12.2d,v12.2d,v7.d[0]
fmul v13.2d,v13.2d,v7.d[0]
.DBETAZEROGENSTOREDS4:
prfm pldl2keep,[x3]
prfm pldl2keep,[x4]
fmla v8.2d, v26.2d,v6.d[0]
fmla v9.2d, v27.2d,v6.d[0]
fmla v10.2d,v28.2d,v6.d[0]
fmla v11.2d,v29.2d,v6.d[0]
fmla v12.2d,v30.2d,v6.d[0]
fmla v13.2d,v31.2d,v6.d[0]
mov x27, x25
st1 {v8.d}[0], [x27],x14
st1 {v8.d}[1], [x27],x14
st1 {v9.d}[0], [x27],x14
st1 {v9.d}[1], [x27],x14
st1 {v10.d}[0],[x27],x14
st1 {v10.d}[1],[x27],x14
mov x27, x26
st1 {v11.d}[0],[x27],x14
st1 {v11.d}[1],[x27],x14
st1 {v12.d}[0],[x27],x14
st1 {v12.d}[1],[x27],x14
st1 {v13.d}[0],[x27],x14
st1 {v13.d}[1],[x27],x14
.DEND:
// 0 "" 2
#NO_APP
ldp x22, x23, [sp, 16]
ldp x24, x25, [sp, 32]
ldp x26, x27, [sp, 48]
ldp d8, d9, [sp, 64]
ldp d10, d11, [sp, 80]
ldp d12, d13, [sp, 96]
ldp d14, d15, [sp, 112]
ldp x20, x21, [sp], 224
.cfi_restore 21
.cfi_restore 20
.cfi_restore 78
.cfi_restore 79
.cfi_restore 76
.cfi_restore 77
.cfi_restore 74
.cfi_restore 75
.cfi_restore 72
.cfi_restore 73
.cfi_restore 26
.cfi_restore 27
.cfi_restore 24
.cfi_restore 25
.cfi_restore 22
.cfi_restore 23
.cfi_def_cfa_offset 0
ret
.cfi_endproc
.LFE739:
// .size bli_dgemm_armv8a_asm_6x8, .-bli_dgemm_armv8a_asm_6x8
.ident "GCC: (ARM-build-5) 9.2.0"
// .section .note.GNU-stack,"",@progbits
import os
import re
import sys
import shlex
import time
import subprocess
from copy import copy
from distutils import ccompiler
from distutils.ccompiler import (
compiler_class, gen_lib_options, get_default_compiler, new_compiler,
CCompiler
)
from distutils.errors import (
DistutilsExecError, DistutilsModuleError, DistutilsPlatformError,
CompileError, UnknownFileError
)
from distutils.sysconfig import customize_compiler
from distutils.version import LooseVersion
from numpy.distutils import log
from numpy.distutils.exec_command import (
filepath_from_subprocess_output, forward_bytes_to_stdout
)
from numpy.distutils.misc_util import cyg2win32, is_sequence, mingw32, \
get_num_build_jobs, \
_commandline_dep_string
# globals for parallel build management
try:
import threading
except ImportError:
import dummy_threading as threading
_job_semaphore = None
_global_lock = threading.Lock()
_processing_files = set()
def _needs_build(obj, cc_args, extra_postargs, pp_opts):
"""
Check if an objects needs to be rebuild based on its dependencies
Parameters
----------
obj : str
object file
Returns
-------
bool
"""
# defined in unixcompiler.py
dep_file = obj + '.d'
if not os.path.exists(dep_file):
return True
# dep_file is a makefile containing 'object: dependencies'
# formatted like posix shell (spaces escaped, \ line continuations)
# the last line contains the compiler commandline arguments as some
# projects may compile an extension multiple times with different
# arguments
with open(dep_file, "r") as f:
lines = f.readlines()
cmdline =_commandline_dep_string(cc_args, extra_postargs, pp_opts)
last_cmdline = lines[-1]
if last_cmdline != cmdline:
return True
contents = ''.join(lines[:-1])
deps = [x for x in shlex.split(contents, posix=True)
if x != "\n" and not x.endswith(":")]
try:
t_obj = os.stat(obj).st_mtime
# check if any of the dependencies is newer than the object
# the dependencies includes the source used to create the object
for f in deps:
if os.stat(f).st_mtime > t_obj:
return True
except OSError:
# no object counts as newer (shouldn't happen if dep_file exists)
return True
return False
def replace_method(klass, method_name, func):
# Py3k does not have unbound method anymore, MethodType does not work
m = lambda self, *args, **kw: func(self, *args, **kw)
setattr(klass, method_name, m)
######################################################################
## Method that subclasses may redefine. But don't call this method,
## it i private to CCompiler class and may return unexpected
## results if used elsewhere. So, you have been warned..
def CCompiler_find_executables(self):
"""
Does nothing here, but is called by the get_version method and can be
overridden by subclasses. In particular it is redefined in the `FCompiler`
class where more documentation can be found.
"""
pass
replace_method(CCompiler, 'find_executables', CCompiler_find_executables)
# Using customized CCompiler.spawn.
def CCompiler_spawn(self, cmd, display=None):
"""
Execute a command in a sub-process.
Parameters
----------
cmd : str
The command to execute.
display : str or sequence of str, optional
The text to add to the log file kept by `numpy.distutils`.
If not given, `display` is equal to `cmd`.
Returns
-------
None
Raises
------
DistutilsExecError
If the command failed, i.e. the exit status was not 0.
"""
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
if display is None:
display = cmd
if is_sequence(display):
display = ' '.join(list(display))
log.info(display)
try:
if self.verbose:
subprocess.check_output(cmd)
else:
subprocess.check_output(cmd, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as exc:
o = exc.output
s = exc.returncode
except OSError:
# OSError doesn't have the same hooks for the exception
# output, but exec_command() historically would use an
# empty string for EnvironmentError (base class for
# OSError)
o = b''
# status previously used by exec_command() for parent
# of OSError
s = 127
else:
# use a convenience return here so that any kind of
# caught exception will execute the default code after the
# try / except block, which handles various exceptions
return None
if is_sequence(cmd):
cmd = ' '.join(list(cmd))
if self.verbose:
forward_bytes_to_stdout(o)
if re.search(b'Too many open files', o):
msg = '\nTry rerunning setup command until build succeeds.'
else:
msg = ''
raise DistutilsExecError('Command "%s" failed with exit status %d%s' %
(cmd, s, msg))
replace_method(CCompiler, 'spawn', CCompiler_spawn)
def CCompiler_object_filenames(self, source_filenames, strip_dir=0, output_dir=''):
"""
Return the name of the object files for the given source files.
Parameters
----------
source_filenames : list of str
The list of paths to source files. Paths can be either relative or
absolute, this is handled transparently.
strip_dir : bool, optional
Whether to strip the directory from the returned paths. If True,
the file name prepended by `output_dir` is returned. Default is False.
output_dir : str, optional
If given, this path is prepended to the returned paths to the
object files.
Returns
-------
obj_names : list of str
The list of paths to the object files corresponding to the source
files in `source_filenames`.
"""
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
if output_dir is None:
output_dir = ''
obj_names = []
for src_name in source_filenames:
base, ext = os.path.splitext(os.path.normpath(src_name))
base = os.path.splitdrive(base)[1] # Chop off the drive
base = base[os.path.isabs(base):] # If abs, chop off leading /
if base.startswith('..'):
# Resolve starting relative path components, middle ones
# (if any) have been handled by os.path.normpath above.
i = base.rfind('..')+2
d = base[:i]
d = os.path.basename(os.path.abspath(d))
base = d + base[i:]
if ext not in self.src_extensions:
raise UnknownFileError("unknown file type '%s' (from '%s')" % (ext, src_name))
if strip_dir:
base = os.path.basename(base)
obj_name = os.path.join(output_dir, base + self.obj_extension)
obj_names.append(obj_name)
return obj_names
replace_method(CCompiler, 'object_filenames', CCompiler_object_filenames)
def CCompiler_compile(self, sources, output_dir=None, macros=None,
include_dirs=None, debug=0, extra_preargs=None,
extra_postargs=None, depends=None):
"""
Compile one or more source files.
Please refer to the Python distutils API reference for more details.
Parameters
----------
sources : list of str
A list of filenames
output_dir : str, optional
Path to the output directory.
macros : list of tuples
A list of macro definitions.
include_dirs : list of str, optional
The directories to add to the default include file search path for
this compilation only.
debug : bool, optional
Whether or not to output debug symbols in or alongside the object
file(s).
extra_preargs, extra_postargs : ?
Extra pre- and post-arguments.
depends : list of str, optional
A list of file names that all targets depend on.
Returns
-------
objects : list of str
A list of object file names, one per source file `sources`.
Raises
------
CompileError
If compilation fails.
"""
# This method is effective only with Python >=2.3 distutils.
# Any changes here should be applied also to fcompiler.compile
# method to support pre Python 2.3 distutils.
global _job_semaphore
jobs = get_num_build_jobs()
# setup semaphore to not exceed number of compile jobs when parallelized at
# extension level (python >= 3.5)
with _global_lock:
if _job_semaphore is None:
_job_semaphore = threading.Semaphore(jobs)
if not sources:
return []
from numpy.distutils.fcompiler import (FCompiler, is_f_file,
has_f90_header)
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
if isinstance(self, FCompiler):
display = []
for fc in ['f77', 'f90', 'fix']:
fcomp = getattr(self, 'compiler_'+fc)
if fcomp is None:
continue
display.append("Fortran %s compiler: %s" % (fc, ' '.join(fcomp)))
display = '\n'.join(display)
else:
ccomp = self.compiler_so
display = "C compiler: %s\n" % (' '.join(ccomp),)
log.info(display)
macros, objects, extra_postargs, pp_opts, build = \
self._setup_compile(output_dir, macros, include_dirs, sources,
depends, extra_postargs)
cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
display = "compile options: '%s'" % (' '.join(cc_args))
if extra_postargs:
display += "\nextra options: '%s'" % (' '.join(extra_postargs))
log.info(display)
def single_compile(args):
obj, (src, ext) = args
if not _needs_build(obj, cc_args, extra_postargs, pp_opts):
return
# check if we are currently already processing the same object
# happens when using the same source in multiple extensions
while True:
# need explicit lock as there is no atomic check and add with GIL
with _global_lock:
# file not being worked on, start working
if obj not in _processing_files:
_processing_files.add(obj)
break
# wait for the processing to end
time.sleep(0.1)
try:
# retrieve slot from our #job semaphore and build
with _job_semaphore:
self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
finally:
# register being done processing
with _global_lock:
_processing_files.remove(obj)
if isinstance(self, FCompiler):
objects_to_build = list(build.keys())
f77_objects, other_objects = [], []
for obj in objects:
if obj in objects_to_build:
src, ext = build[obj]
if self.compiler_type=='absoft':
obj = cyg2win32(obj)
src = cyg2win32(src)
if is_f_file(src) and not has_f90_header(src):
f77_objects.append((obj, (src, ext)))
else:
other_objects.append((obj, (src, ext)))
# f77 objects can be built in parallel
build_items = f77_objects
# build f90 modules serial, module files are generated during
# compilation and may be used by files later in the list so the
# ordering is important
for o in other_objects:
single_compile(o)
else:
build_items = build.items()
if len(build) > 1 and jobs > 1:
# build parallel
import multiprocessing.pool
pool = multiprocessing.pool.ThreadPool(jobs)
pool.map(single_compile, build_items)
pool.close()
else:
# build serial
for o in build_items:
single_compile(o)
# Return *all* object filenames, not just the ones we just built.
return objects
replace_method(CCompiler, 'compile', CCompiler_compile)
def CCompiler_customize_cmd(self, cmd, ignore=()):
"""
Customize compiler using distutils command.
Parameters
----------
cmd : class instance
An instance inheriting from `distutils.cmd.Command`.
ignore : sequence of str, optional
List of `CCompiler` commands (without ``'set_'``) that should not be
altered. Strings that are checked for are:
``('include_dirs', 'define', 'undef', 'libraries', 'library_dirs',
'rpath', 'link_objects')``.
Returns
-------
None
"""
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
log.info('customize %s using %s' % (self.__class__.__name__,
cmd.__class__.__name__))
def allow(attr):
return getattr(cmd, attr, None) is not None and attr not in ignore
if allow('include_dirs'):
self.set_include_dirs(cmd.include_dirs)
if allow('define'):
for (name, value) in cmd.define:
self.define_macro(name, value)
if allow('undef'):
for macro in cmd.undef:
self.undefine_macro(macro)
if allow('libraries'):
self.set_libraries(self.libraries + cmd.libraries)
if allow('library_dirs'):
self.set_library_dirs(self.library_dirs + cmd.library_dirs)
if allow('rpath'):
self.set_runtime_library_dirs(cmd.rpath)
if allow('link_objects'):
self.set_link_objects(cmd.link_objects)
replace_method(CCompiler, 'customize_cmd', CCompiler_customize_cmd)
def _compiler_to_string(compiler):
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
props = []
mx = 0
keys = list(compiler.executables.keys())
for key in ['version', 'libraries', 'library_dirs',
'object_switch', 'compile_switch',
'include_dirs', 'define', 'undef', 'rpath', 'link_objects']:
if key not in keys:
keys.append(key)
for key in keys:
if hasattr(compiler, key):
v = getattr(compiler, key)
mx = max(mx, len(key))
props.append((key, repr(v)))
fmt = '%-' + repr(mx+1) + 's = %s'
lines = [fmt % prop for prop in props]
return '\n'.join(lines)
def CCompiler_show_customization(self):
"""
Print the compiler customizations to stdout.
Parameters
----------
None
Returns
-------
None
Notes
-----
Printing is only done if the distutils log threshold is < 2.
"""
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
if 0:
for attrname in ['include_dirs', 'define', 'undef',
'libraries', 'library_dirs',
'rpath', 'link_objects']:
attr = getattr(self, attrname, None)
if not attr:
continue
log.info("compiler '%s' is set to %s" % (attrname, attr))
try:
self.get_version()
except Exception:
pass
if log._global_log.threshold<2:
print('*'*80)
print(self.__class__)
print(_compiler_to_string(self))
print('*'*80)
replace_method(CCompiler, 'show_customization', CCompiler_show_customization)
def CCompiler_customize(self, dist, need_cxx=0):
"""
Do any platform-specific customization of a compiler instance.
This method calls `distutils.sysconfig.customize_compiler` for
platform-specific customization, as well as optionally remove a flag
to suppress spurious warnings in case C++ code is being compiled.
Parameters
----------
dist : object
This parameter is not used for anything.
need_cxx : bool, optional
Whether or not C++ has to be compiled. If so (True), the
``"-Wstrict-prototypes"`` option is removed to prevent spurious
warnings. Default is False.
Returns
-------
None
Notes
-----
All the default options used by distutils can be extracted with::
from distutils import sysconfig
sysconfig.get_config_vars('CC', 'CXX', 'OPT', 'BASECFLAGS',
'CCSHARED', 'LDSHARED', 'SO')
"""
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
# See FCompiler.customize for suggested usage.
log.info('customize %s' % (self.__class__.__name__))
customize_compiler(self)
if need_cxx:
# In general, distutils uses -Wstrict-prototypes, but this option is
# not valid for C++ code, only for C. Remove it if it's there to
# avoid a spurious warning on every compilation.
try:
self.compiler_so.remove('-Wstrict-prototypes')
except (AttributeError, ValueError):
pass
if hasattr(self, 'compiler') and 'cc' in self.compiler[0]:
if not self.compiler_cxx:
if self.compiler[0].startswith('gcc'):
a, b = 'gcc', 'g++'
else:
a, b = 'cc', 'c++'
self.compiler_cxx = [self.compiler[0].replace(a, b)]\
+ self.compiler[1:]
else:
if hasattr(self, 'compiler'):
log.warn("#### %s #######" % (self.compiler,))
if not hasattr(self, 'compiler_cxx'):
log.warn('Missing compiler_cxx fix for ' + self.__class__.__name__)
# check if compiler supports gcc style automatic dependencies
# run on every extension so skip for known good compilers
if hasattr(self, 'compiler') and ('gcc' in self.compiler[0] or
'g++' in self.compiler[0] or
'clang' in self.compiler[0]):
self._auto_depends = True
elif os.name == 'posix':
import tempfile
import shutil
tmpdir = tempfile.mkdtemp()
try:
fn = os.path.join(tmpdir, "file.c")
with open(fn, "w") as f:
f.write("int a;\n")
self.compile([fn], output_dir=tmpdir,
extra_preargs=['-MMD', '-MF', fn + '.d'])
self._auto_depends = True
except CompileError:
self._auto_depends = False
finally:
shutil.rmtree(tmpdir)
return
replace_method(CCompiler, 'customize', CCompiler_customize)
def simple_version_match(pat=r'[-.\d]+', ignore='', start=''):
"""
Simple matching of version numbers, for use in CCompiler and FCompiler.
Parameters
----------
pat : str, optional
A regular expression matching version numbers.
Default is ``r'[-.\\d]+'``.
ignore : str, optional
A regular expression matching patterns to skip.
Default is ``''``, in which case nothing is skipped.
start : str, optional
A regular expression matching the start of where to start looking
for version numbers.
Default is ``''``, in which case searching is started at the
beginning of the version string given to `matcher`.
Returns
-------
matcher : callable
A function that is appropriate to use as the ``.version_match``
attribute of a `CCompiler` class. `matcher` takes a single parameter,
a version string.
"""
def matcher(self, version_string):
# version string may appear in the second line, so getting rid
# of new lines:
version_string = version_string.replace('\n', ' ')
pos = 0
if start:
m = re.match(start, version_string)
if not m:
return None
pos = m.end()
while True:
m = re.search(pat, version_string[pos:])
if not m:
return None
if ignore and re.match(ignore, m.group(0)):
pos = m.end()
continue
break
return m.group(0)
return matcher
def CCompiler_get_version(self, force=False, ok_status=[0]):
"""
Return compiler version, or None if compiler is not available.
Parameters
----------
force : bool, optional
If True, force a new determination of the version, even if the
compiler already has a version attribute. Default is False.
ok_status : list of int, optional
The list of status values returned by the version look-up process
for which a version string is returned. If the status value is not
in `ok_status`, None is returned. Default is ``[0]``.
Returns
-------
version : str or None
Version string, in the format of `distutils.version.LooseVersion`.
"""
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
if not force and hasattr(self, 'version'):
return self.version
self.find_executables()
try:
version_cmd = self.version_cmd
except AttributeError:
return None
if not version_cmd or not version_cmd[0]:
return None
try:
matcher = self.version_match
except AttributeError:
try:
pat = self.version_pattern
except AttributeError:
return None
def matcher(version_string):
m = re.match(pat, version_string)
if not m:
return None
version = m.group('version')
return version
try:
output = subprocess.check_output(version_cmd, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as exc:
output = exc.output
status = exc.returncode
except OSError:
# match the historical returns for a parent
# exception class caught by exec_command()
status = 127
output = b''
else:
# output isn't actually a filepath but we do this
# for now to match previous distutils behavior
output = filepath_from_subprocess_output(output)
status = 0
version = None
if status in ok_status:
version = matcher(output)
if version:
version = LooseVersion(version)
self.version = version
return version
replace_method(CCompiler, 'get_version', CCompiler_get_version)
def CCompiler_cxx_compiler(self):
"""
Return the C++ compiler.
Parameters
----------
None
Returns
-------
cxx : class instance
The C++ compiler, as a `CCompiler` instance.
"""
# Block x86
if self.compiler[-1] == "x86_64":
self.compiler.pop()
self.compiler.pop()
if self.compiler_so[-1] == "x86_64":
self.compiler_so.pop()
self.compiler_so.pop()
if self.linker_so[-1] == "x86_64":
self.linker_so.pop()
self.linker_so.pop()
if self.linker_exe[-1] == "x86_64":
self.linker_exe.pop()
self.linker_exe.pop()
if self.compiler_type in ('msvc', 'intelw', 'intelemw'):
return self
cxx = copy(self)
cxx.compiler_so = [cxx.compiler_cxx[0]] + cxx.compiler_so[1:]
if sys.platform.startswith('aix') and 'ld_so_aix' in cxx.linker_so[0]:
# AIX needs the ld_so_aix script included with Python
cxx.linker_so = [cxx.linker_so[0], cxx.compiler_cxx[0]] \
+ cxx.linker_so[2:]
else:
cxx.linker_so = [cxx.compiler_cxx[0]] + cxx.linker_so[1:]
return cxx
replace_method(CCompiler, 'cxx_compiler', CCompiler_cxx_compiler)
compiler_class['intel'] = ('intelccompiler', 'IntelCCompiler',
"Intel C Compiler for 32-bit applications")
compiler_class['intele'] = ('intelccompiler', 'IntelItaniumCCompiler',
"Intel C Itanium Compiler for Itanium-based applications")
compiler_class['intelem'] = ('intelccompiler', 'IntelEM64TCCompiler',
"Intel C Compiler for 64-bit applications")
compiler_class['intelw'] = ('intelccompiler', 'IntelCCompilerW',
"Intel C Compiler for 32-bit applications on Windows")
compiler_class['intelemw'] = ('intelccompiler', 'IntelEM64TCCompilerW',
"Intel C Compiler for 64-bit applications on Windows")
compiler_class['pathcc'] = ('pathccompiler', 'PathScaleCCompiler',
"PathScale Compiler for SiCortex-based applications")
ccompiler._default_compilers += (('linux.*', 'intel'),
('linux.*', 'intele'),
('linux.*', 'intelem'),
('linux.*', 'pathcc'),
('nt', 'intelw'),
('nt', 'intelemw'))
if sys.platform == 'win32':
compiler_class['mingw32'] = ('mingw32ccompiler', 'Mingw32CCompiler',
"Mingw32 port of GNU C Compiler for Win32"\
"(for MSC built Python)")
if mingw32():
# On windows platforms, we want to default to mingw32 (gcc)
# because msvc can't build blitz stuff.
log.info('Setting mingw32 as default compiler for nt.')
ccompiler._default_compilers = (('nt', 'mingw32'),) \
+ ccompiler._default_compilers
_distutils_new_compiler = new_compiler
def new_compiler (plat=None,
compiler=None,
verbose=None,
dry_run=0,
force=0):
# Try first C compilers from numpy.distutils.
if verbose is None:
verbose = log.get_threshold() <= log.INFO
if plat is None:
plat = os.name
try:
if compiler is None:
compiler = get_default_compiler(plat)
(module_name, class_name, long_description) = compiler_class[compiler]
except KeyError:
msg = "don't know how to compile C/C++ code on platform '%s'" % plat
if compiler is not None:
msg = msg + " with '%s' compiler" % compiler
raise DistutilsPlatformError(msg)
module_name = "numpy.distutils." + module_name
try:
__import__ (module_name)
except ImportError as e:
msg = str(e)
log.info('%s in numpy.distutils; trying from distutils',
str(msg))
module_name = module_name[6:]
try:
__import__(module_name)
except ImportError as e:
msg = str(e)
raise DistutilsModuleError("can't compile C/C++ code: unable to load module '%s'" % \
module_name)
try:
module = sys.modules[module_name]
klass = vars(module)[class_name]
except KeyError:
raise DistutilsModuleError(("can't compile C/C++ code: unable to find class '%s' " +
"in module '%s'") % (class_name, module_name))
compiler = klass(None, dry_run, force)
compiler.verbose = verbose
log.debug('new_compiler returns %s' % (klass))
return compiler
ccompiler.new_compiler = new_compiler
_distutils_gen_lib_options = gen_lib_options
def gen_lib_options(compiler, library_dirs, runtime_library_dirs, libraries):
# the version of this function provided by CPython allows the following
# to return lists, which are unpacked automatically:
# - compiler.runtime_library_dir_option
# our version extends the behavior to:
# - compiler.library_dir_option
# - compiler.library_option
# - compiler.find_library_file
r = _distutils_gen_lib_options(compiler, library_dirs,
runtime_library_dirs, libraries)
lib_opts = []
for i in r:
if is_sequence(i):
lib_opts.extend(list(i))
else:
lib_opts.append(i)
return lib_opts
ccompiler.gen_lib_options = gen_lib_options
# Also fix up the various compiler modules, which do
# from distutils.ccompiler import gen_lib_options
# Don't bother with mwerks, as we don't support Classic Mac.
for _cc in ['msvc9', 'msvc', '_msvc', 'bcpp', 'cygwinc', 'emxc', 'unixc']:
_m = sys.modules.get('distutils.' + _cc + 'compiler')
if _m is not None:
setattr(_m, 'gen_lib_options', gen_lib_options)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment