Created
February 16, 2020 12:22
-
-
Save ssvb/6f57b3654fc224a895d96e730196b47b to your computer and use it in GitHub Desktop.
libjpeg-turbo floating point MIPS functions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ cat test.c | |
typedef short JCOEF; | |
typedef unsigned char JSAMPLE; | |
typedef unsigned int JDIMENSION; | |
typedef JCOEF *JCOEFPTR; /* useful in a couple of places */ | |
#define GETJSAMPLE(value) ((int)(value)) | |
#define CENTERJSAMPLE 128 | |
#define FAST_FLOAT float | |
#define DCTSIZE 8 | |
#define DCTSIZE2 64 | |
typedef JSAMPLE *JSAMPROW; /* ptr to one image row of pixel samples. */ | |
typedef JSAMPROW *JSAMPARRAY; /* ptr to some rows (a 2-D sample array) */ | |
void | |
convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, | |
FAST_FLOAT *workspace) | |
{ | |
register FAST_FLOAT *workspaceptr; | |
register JSAMPROW elemptr; | |
register int elemr; | |
workspaceptr = workspace; | |
for (elemr = 0; elemr < DCTSIZE; elemr++) { | |
elemptr = sample_data[elemr] + start_col; | |
#if DCTSIZE == 8 /* unroll the inner loop */ | |
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); | |
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); | |
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); | |
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); | |
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); | |
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); | |
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); | |
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); | |
#else | |
{ | |
register int elemc; | |
for (elemc = DCTSIZE; elemc > 0; elemc--) | |
*workspaceptr++ = (FAST_FLOAT) | |
(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); | |
} | |
#endif | |
} | |
} | |
void | |
quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, | |
FAST_FLOAT *workspace) | |
{ | |
register FAST_FLOAT temp; | |
register int i; | |
register JCOEFPTR output_ptr = coef_block; | |
for (i = 0; i < DCTSIZE2; i++) { | |
/* Apply the quantization and scaling factor */ | |
temp = workspace[i] * divisors[i]; | |
/* Round to nearest integer. | |
* Since C does not specify the direction of rounding for negative | |
* quotients, we have to force the dividend positive for portability. | |
* The maximum coefficient size is +-16K (for 12-bit data), so this | |
* code should work for either 16-bit or 32-bit ints. | |
*/ | |
output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384); | |
} | |
} | |
$ mipsel-unknown-linux-gnu-gcc -O2 -c -march=mips32r2 -funroll-loops test.c && mipsel-unknown-linux-gnu-objdump -d test.o | |
test.o: file format elf32-tradlittlemips | |
Disassembly of section .text: | |
00000000 <convsamp_float>: | |
0: 8c830000 lw v1,0(a0) | |
4: 8c8a0004 lw t2,4(a0) | |
8: 00654021 addu t0,v1,a1 | |
c: 91020000 lbu v0,0(t0) | |
10: 01456821 addu t5,t2,a1 | |
14: 2447ff80 addiu a3,v0,-128 | |
18: 44870000 mtc1 a3,$f0 | |
1c: 46800060 cvt.s.w $f1,$f0 | |
20: e4c10000 swc1 $f1,0(a2) | |
24: 91090001 lbu t1,1(t0) | |
28: 252bff80 addiu t3,t1,-128 | |
2c: 448b1000 mtc1 t3,$f2 | |
30: 468010e0 cvt.s.w $f3,$f2 | |
34: e4c30004 swc1 $f3,4(a2) | |
38: 910c0002 lbu t4,2(t0) | |
3c: 258eff80 addiu t6,t4,-128 | |
40: 448e2000 mtc1 t6,$f4 | |
44: 46802160 cvt.s.w $f5,$f4 | |
48: e4c50008 swc1 $f5,8(a2) | |
4c: 910f0003 lbu t7,3(t0) | |
50: 25f8ff80 addiu t8,t7,-128 | |
54: 44983000 mtc1 t8,$f6 | |
58: 468031e0 cvt.s.w $f7,$f6 | |
5c: e4c7000c swc1 $f7,12(a2) | |
60: 91190004 lbu t9,4(t0) | |
64: 2723ff80 addiu v1,t9,-128 | |
68: 44834000 mtc1 v1,$f8 | |
6c: 46804260 cvt.s.w $f9,$f8 | |
70: e4c90010 swc1 $f9,16(a2) | |
74: 91020005 lbu v0,5(t0) | |
78: 2447ff80 addiu a3,v0,-128 | |
7c: 44875000 mtc1 a3,$f10 | |
80: 468052e0 cvt.s.w $f11,$f10 | |
84: e4cb0014 swc1 $f11,20(a2) | |
88: 91090006 lbu t1,6(t0) | |
8c: 252aff80 addiu t2,t1,-128 | |
90: 448a6000 mtc1 t2,$f12 | |
94: 46806360 cvt.s.w $f13,$f12 | |
98: e4cd0018 swc1 $f13,24(a2) | |
9c: 91080007 lbu t0,7(t0) | |
a0: 250bff80 addiu t3,t0,-128 | |
a4: 448b7000 mtc1 t3,$f14 | |
a8: 468073e0 cvt.s.w $f15,$f14 | |
ac: e4cf001c swc1 $f15,28(a2) | |
b0: 91ac0000 lbu t4,0(t5) | |
b4: 258eff80 addiu t6,t4,-128 | |
b8: 448e8000 mtc1 t6,$f16 | |
bc: 46808460 cvt.s.w $f17,$f16 | |
c0: e4d10020 swc1 $f17,32(a2) | |
c4: 91af0001 lbu t7,1(t5) | |
c8: 25f8ff80 addiu t8,t7,-128 | |
cc: 44989000 mtc1 t8,$f18 | |
d0: 468094e0 cvt.s.w $f19,$f18 | |
d4: e4d30024 swc1 $f19,36(a2) | |
d8: 91b90002 lbu t9,2(t5) | |
dc: 2723ff80 addiu v1,t9,-128 | |
e0: 44830000 mtc1 v1,$f0 | |
e4: 46800060 cvt.s.w $f1,$f0 | |
e8: e4c10028 swc1 $f1,40(a2) | |
ec: 91a20003 lbu v0,3(t5) | |
f0: 2447ff80 addiu a3,v0,-128 | |
f4: 44871000 mtc1 a3,$f2 | |
f8: 468010e0 cvt.s.w $f3,$f2 | |
fc: e4c3002c swc1 $f3,44(a2) | |
100: 91a90004 lbu t1,4(t5) | |
104: 252aff80 addiu t2,t1,-128 | |
108: 448a2000 mtc1 t2,$f4 | |
10c: 46802160 cvt.s.w $f5,$f4 | |
110: e4c50030 swc1 $f5,48(a2) | |
114: 91a80005 lbu t0,5(t5) | |
118: 250bff80 addiu t3,t0,-128 | |
11c: 448b3000 mtc1 t3,$f6 | |
120: 468031e0 cvt.s.w $f7,$f6 | |
124: e4c70034 swc1 $f7,52(a2) | |
128: 91ac0006 lbu t4,6(t5) | |
12c: 258eff80 addiu t6,t4,-128 | |
130: 448e4000 mtc1 t6,$f8 | |
134: 46804260 cvt.s.w $f9,$f8 | |
138: e4c90038 swc1 $f9,56(a2) | |
13c: 91ad0007 lbu t5,7(t5) | |
140: 8c8f0008 lw t7,8(a0) | |
144: 25b8ff80 addiu t8,t5,-128 | |
148: 44985000 mtc1 t8,$f10 | |
14c: 01e5c821 addu t9,t7,a1 | |
150: 468052e0 cvt.s.w $f11,$f10 | |
154: e4cb003c swc1 $f11,60(a2) | |
158: 93220000 lbu v0,0(t9) | |
15c: 8c83000c lw v1,12(a0) | |
160: 2447ff80 addiu a3,v0,-128 | |
164: 44876000 mtc1 a3,$f12 | |
168: 00655021 addu t2,v1,a1 | |
16c: 46806360 cvt.s.w $f13,$f12 | |
170: e4cd0040 swc1 $f13,64(a2) | |
174: 93290001 lbu t1,1(t9) | |
178: 2528ff80 addiu t0,t1,-128 | |
17c: 44887000 mtc1 t0,$f14 | |
180: 468073e0 cvt.s.w $f15,$f14 | |
184: e4cf0044 swc1 $f15,68(a2) | |
188: 932b0002 lbu t3,2(t9) | |
18c: 256cff80 addiu t4,t3,-128 | |
190: 448c8000 mtc1 t4,$f16 | |
194: 46808460 cvt.s.w $f17,$f16 | |
198: e4d10048 swc1 $f17,72(a2) | |
19c: 932e0003 lbu t6,3(t9) | |
1a0: 25cdff80 addiu t5,t6,-128 | |
1a4: 448d9000 mtc1 t5,$f18 | |
1a8: 468094e0 cvt.s.w $f19,$f18 | |
1ac: e4d3004c swc1 $f19,76(a2) | |
1b0: 932f0004 lbu t7,4(t9) | |
1b4: 25f8ff80 addiu t8,t7,-128 | |
1b8: 44980000 mtc1 t8,$f0 | |
1bc: 46800060 cvt.s.w $f1,$f0 | |
1c0: e4c10050 swc1 $f1,80(a2) | |
1c4: 93220005 lbu v0,5(t9) | |
1c8: 2443ff80 addiu v1,v0,-128 | |
1cc: 44831000 mtc1 v1,$f2 | |
1d0: 468010e0 cvt.s.w $f3,$f2 | |
1d4: e4c30054 swc1 $f3,84(a2) | |
1d8: 93270006 lbu a3,6(t9) | |
1dc: 24e9ff80 addiu t1,a3,-128 | |
1e0: 44892000 mtc1 t1,$f4 | |
1e4: 46802160 cvt.s.w $f5,$f4 | |
1e8: e4c50058 swc1 $f5,88(a2) | |
1ec: 93390007 lbu t9,7(t9) | |
1f0: 2728ff80 addiu t0,t9,-128 | |
1f4: 44883000 mtc1 t0,$f6 | |
1f8: 468031e0 cvt.s.w $f7,$f6 | |
1fc: e4c7005c swc1 $f7,92(a2) | |
200: 914b0000 lbu t3,0(t2) | |
204: 256cff80 addiu t4,t3,-128 | |
208: 448c4000 mtc1 t4,$f8 | |
20c: 46804260 cvt.s.w $f9,$f8 | |
210: e4c90060 swc1 $f9,96(a2) | |
214: 914e0001 lbu t6,1(t2) | |
218: 25cdff80 addiu t5,t6,-128 | |
21c: 448d5000 mtc1 t5,$f10 | |
220: 468052e0 cvt.s.w $f11,$f10 | |
224: e4cb0064 swc1 $f11,100(a2) | |
228: 914f0002 lbu t7,2(t2) | |
22c: 25f8ff80 addiu t8,t7,-128 | |
230: 44986000 mtc1 t8,$f12 | |
234: 46806360 cvt.s.w $f13,$f12 | |
238: e4cd0068 swc1 $f13,104(a2) | |
23c: 91420003 lbu v0,3(t2) | |
240: 2443ff80 addiu v1,v0,-128 | |
244: 44837000 mtc1 v1,$f14 | |
248: 468073e0 cvt.s.w $f15,$f14 | |
24c: e4cf006c swc1 $f15,108(a2) | |
250: 91470004 lbu a3,4(t2) | |
254: 24e9ff80 addiu t1,a3,-128 | |
258: 44898000 mtc1 t1,$f16 | |
25c: 46808460 cvt.s.w $f17,$f16 | |
260: e4d10070 swc1 $f17,112(a2) | |
264: 91590005 lbu t9,5(t2) | |
268: 2728ff80 addiu t0,t9,-128 | |
26c: 44889000 mtc1 t0,$f18 | |
270: 468094e0 cvt.s.w $f19,$f18 | |
274: e4d30074 swc1 $f19,116(a2) | |
278: 914b0006 lbu t3,6(t2) | |
27c: 8c8c0010 lw t4,16(a0) | |
280: 256eff80 addiu t6,t3,-128 | |
284: 448e0000 mtc1 t6,$f0 | |
288: 01856821 addu t5,t4,a1 | |
28c: 46800060 cvt.s.w $f1,$f0 | |
290: e4c10078 swc1 $f1,120(a2) | |
294: 914a0007 lbu t2,7(t2) | |
298: 8c820014 lw v0,20(a0) | |
29c: 254fff80 addiu t7,t2,-128 | |
2a0: 448f1000 mtc1 t7,$f2 | |
2a4: 00454821 addu t1,v0,a1 | |
2a8: 468010e0 cvt.s.w $f3,$f2 | |
2ac: e4c3007c swc1 $f3,124(a2) | |
2b0: 91b80000 lbu t8,0(t5) | |
2b4: 2703ff80 addiu v1,t8,-128 | |
2b8: 44832000 mtc1 v1,$f4 | |
2bc: 46802160 cvt.s.w $f5,$f4 | |
2c0: e4c50080 swc1 $f5,128(a2) | |
2c4: 91a70001 lbu a3,1(t5) | |
2c8: 24f9ff80 addiu t9,a3,-128 | |
2cc: 44993000 mtc1 t9,$f6 | |
2d0: 468031e0 cvt.s.w $f7,$f6 | |
2d4: e4c70084 swc1 $f7,132(a2) | |
2d8: 91a80002 lbu t0,2(t5) | |
2dc: 250bff80 addiu t3,t0,-128 | |
2e0: 448b4000 mtc1 t3,$f8 | |
2e4: 46804260 cvt.s.w $f9,$f8 | |
2e8: e4c90088 swc1 $f9,136(a2) | |
2ec: 91ac0003 lbu t4,3(t5) | |
2f0: 258eff80 addiu t6,t4,-128 | |
2f4: 448e5000 mtc1 t6,$f10 | |
2f8: 468052e0 cvt.s.w $f11,$f10 | |
2fc: e4cb008c swc1 $f11,140(a2) | |
300: 91aa0004 lbu t2,4(t5) | |
304: 254fff80 addiu t7,t2,-128 | |
308: 448f6000 mtc1 t7,$f12 | |
30c: 46806360 cvt.s.w $f13,$f12 | |
310: e4cd0090 swc1 $f13,144(a2) | |
314: 91b80005 lbu t8,5(t5) | |
318: 2702ff80 addiu v0,t8,-128 | |
31c: 44827000 mtc1 v0,$f14 | |
320: 468073e0 cvt.s.w $f15,$f14 | |
324: e4cf0094 swc1 $f15,148(a2) | |
328: 91a30006 lbu v1,6(t5) | |
32c: 2467ff80 addiu a3,v1,-128 | |
330: 44878000 mtc1 a3,$f16 | |
334: 46808460 cvt.s.w $f17,$f16 | |
338: e4d10098 swc1 $f17,152(a2) | |
33c: 91ad0007 lbu t5,7(t5) | |
340: 25b9ff80 addiu t9,t5,-128 | |
344: 44999000 mtc1 t9,$f18 | |
348: 468094e0 cvt.s.w $f19,$f18 | |
34c: e4d3009c swc1 $f19,156(a2) | |
350: 91280000 lbu t0,0(t1) | |
354: 250bff80 addiu t3,t0,-128 | |
358: 448b0000 mtc1 t3,$f0 | |
35c: 46800060 cvt.s.w $f1,$f0 | |
360: e4c100a0 swc1 $f1,160(a2) | |
364: 912c0001 lbu t4,1(t1) | |
368: 258eff80 addiu t6,t4,-128 | |
36c: 448e1000 mtc1 t6,$f2 | |
370: 468010e0 cvt.s.w $f3,$f2 | |
374: e4c300a4 swc1 $f3,164(a2) | |
378: 912a0002 lbu t2,2(t1) | |
37c: 254fff80 addiu t7,t2,-128 | |
380: 448f2000 mtc1 t7,$f4 | |
384: 46802160 cvt.s.w $f5,$f4 | |
388: e4c500a8 swc1 $f5,168(a2) | |
38c: 91380003 lbu t8,3(t1) | |
390: 2702ff80 addiu v0,t8,-128 | |
394: 44823000 mtc1 v0,$f6 | |
398: 468031e0 cvt.s.w $f7,$f6 | |
39c: e4c700ac swc1 $f7,172(a2) | |
3a0: 91230004 lbu v1,4(t1) | |
3a4: 2467ff80 addiu a3,v1,-128 | |
3a8: 44874000 mtc1 a3,$f8 | |
3ac: 46804260 cvt.s.w $f9,$f8 | |
3b0: e4c900b0 swc1 $f9,176(a2) | |
3b4: 912d0005 lbu t5,5(t1) | |
3b8: 8c990018 lw t9,24(a0) | |
3bc: 25a8ff80 addiu t0,t5,-128 | |
3c0: 44885000 mtc1 t0,$f10 | |
3c4: 8c84001c lw a0,28(a0) | |
3c8: 468052e0 cvt.s.w $f11,$f10 | |
3cc: e4cb00b4 swc1 $f11,180(a2) | |
3d0: 912b0006 lbu t3,6(t1) | |
3d4: 03256021 addu t4,t9,a1 | |
3d8: 256eff80 addiu t6,t3,-128 | |
3dc: 448e6000 mtc1 t6,$f12 | |
3e0: 00852821 addu a1,a0,a1 | |
3e4: 46806360 cvt.s.w $f13,$f12 | |
3e8: e4cd00b8 swc1 $f13,184(a2) | |
3ec: 91290007 lbu t1,7(t1) | |
3f0: 252aff80 addiu t2,t1,-128 | |
3f4: 448a7000 mtc1 t2,$f14 | |
3f8: 468073e0 cvt.s.w $f15,$f14 | |
3fc: e4cf00bc swc1 $f15,188(a2) | |
400: 918f0000 lbu t7,0(t4) | |
404: 25f8ff80 addiu t8,t7,-128 | |
408: 44988000 mtc1 t8,$f16 | |
40c: 46808460 cvt.s.w $f17,$f16 | |
410: e4d100c0 swc1 $f17,192(a2) | |
414: 91820001 lbu v0,1(t4) | |
418: 2443ff80 addiu v1,v0,-128 | |
41c: 44839000 mtc1 v1,$f18 | |
420: 468094e0 cvt.s.w $f19,$f18 | |
424: e4d300c4 swc1 $f19,196(a2) | |
428: 91870002 lbu a3,2(t4) | |
42c: 24edff80 addiu t5,a3,-128 | |
430: 448d0000 mtc1 t5,$f0 | |
434: 46800060 cvt.s.w $f1,$f0 | |
438: e4c100c8 swc1 $f1,200(a2) | |
43c: 91990003 lbu t9,3(t4) | |
440: 2728ff80 addiu t0,t9,-128 | |
444: 44881000 mtc1 t0,$f2 | |
448: 468010e0 cvt.s.w $f3,$f2 | |
44c: e4c300cc swc1 $f3,204(a2) | |
450: 918b0004 lbu t3,4(t4) | |
454: 256eff80 addiu t6,t3,-128 | |
458: 448e2000 mtc1 t6,$f4 | |
45c: 46802160 cvt.s.w $f5,$f4 | |
460: e4c500d0 swc1 $f5,208(a2) | |
464: 91890005 lbu t1,5(t4) | |
468: 2524ff80 addiu a0,t1,-128 | |
46c: 44843000 mtc1 a0,$f6 | |
470: 468031e0 cvt.s.w $f7,$f6 | |
474: e4c700d4 swc1 $f7,212(a2) | |
478: 918a0006 lbu t2,6(t4) | |
47c: 254fff80 addiu t7,t2,-128 | |
480: 448f4000 mtc1 t7,$f8 | |
484: 46804260 cvt.s.w $f9,$f8 | |
488: e4c900d8 swc1 $f9,216(a2) | |
48c: 918c0007 lbu t4,7(t4) | |
490: 2598ff80 addiu t8,t4,-128 | |
494: 44985000 mtc1 t8,$f10 | |
498: 468052e0 cvt.s.w $f11,$f10 | |
49c: e4cb00dc swc1 $f11,220(a2) | |
4a0: 90a20000 lbu v0,0(a1) | |
4a4: 2443ff80 addiu v1,v0,-128 | |
4a8: 44836000 mtc1 v1,$f12 | |
4ac: 46806360 cvt.s.w $f13,$f12 | |
4b0: e4cd00e0 swc1 $f13,224(a2) | |
4b4: 90a70001 lbu a3,1(a1) | |
4b8: 24edff80 addiu t5,a3,-128 | |
4bc: 448d7000 mtc1 t5,$f14 | |
4c0: 468073e0 cvt.s.w $f15,$f14 | |
4c4: e4cf00e4 swc1 $f15,228(a2) | |
4c8: 90b90002 lbu t9,2(a1) | |
4cc: 2728ff80 addiu t0,t9,-128 | |
4d0: 44888000 mtc1 t0,$f16 | |
4d4: 46808460 cvt.s.w $f17,$f16 | |
4d8: e4d100e8 swc1 $f17,232(a2) | |
4dc: 90ab0003 lbu t3,3(a1) | |
4e0: 256eff80 addiu t6,t3,-128 | |
4e4: 448e9000 mtc1 t6,$f18 | |
4e8: 468094e0 cvt.s.w $f19,$f18 | |
4ec: e4d300ec swc1 $f19,236(a2) | |
4f0: 90a90004 lbu t1,4(a1) | |
4f4: 2524ff80 addiu a0,t1,-128 | |
4f8: 44840000 mtc1 a0,$f0 | |
4fc: 46800060 cvt.s.w $f1,$f0 | |
500: e4c100f0 swc1 $f1,240(a2) | |
504: 90aa0005 lbu t2,5(a1) | |
508: 254fff80 addiu t7,t2,-128 | |
50c: 448f1000 mtc1 t7,$f2 | |
510: 468010e0 cvt.s.w $f3,$f2 | |
514: e4c300f4 swc1 $f3,244(a2) | |
518: 90ac0006 lbu t4,6(a1) | |
51c: 2598ff80 addiu t8,t4,-128 | |
520: 44982000 mtc1 t8,$f4 | |
524: 46802160 cvt.s.w $f5,$f4 | |
528: e4c500f8 swc1 $f5,248(a2) | |
52c: 90a50007 lbu a1,7(a1) | |
530: 24a2ff80 addiu v0,a1,-128 | |
534: 44823000 mtc1 v0,$f6 | |
538: 468031e0 cvt.s.w $f7,$f6 | |
53c: 03e00008 jr ra | |
540: e4c700fc swc1 $f7,252(a2) | |
00000544 <quantize_float>: | |
544: 3c1c0000 lui gp,0x0 | |
548: 279c0000 addiu gp,gp,0 | |
54c: 0399e021 addu gp,gp,t9 | |
550: 8f820000 lw v0,0(gp) | |
554: 24cc0100 addiu t4,a2,256 | |
558: c4400000 lwc1 $f0,0(v0) | |
55c: c4c80000 lwc1 $f8,0(a2) | |
560: c4b00000 lwc1 $f16,0(a1) | |
564: c4c70004 lwc1 $f7,4(a2) | |
568: c4af0004 lwc1 $f15,4(a1) | |
56c: c4c60008 lwc1 $f6,8(a2) | |
570: c4ae0008 lwc1 $f14,8(a1) | |
574: c4c5000c lwc1 $f5,12(a2) | |
578: c4ad000c lwc1 $f13,12(a1) | |
57c: c4c40010 lwc1 $f4,16(a2) | |
580: c4ac0010 lwc1 $f12,16(a1) | |
584: c4c30014 lwc1 $f3,20(a2) | |
588: c4ab0014 lwc1 $f11,20(a1) | |
58c: c4c20018 lwc1 $f2,24(a2) | |
590: c4aa0018 lwc1 $f10,24(a1) | |
594: c4c1001c lwc1 $f1,28(a2) | |
598: c4a9001c lwc1 $f9,28(a1) | |
59c: 4c104460 madd.s $f17,$f0,$f8,$f16 | |
5a0: 4c0f3ca0 madd.s $f18,$f0,$f7,$f15 | |
5a4: 4c0e34e0 madd.s $f19,$f0,$f6,$f14 | |
5a8: 4c0d2c20 madd.s $f16,$f0,$f5,$f13 | |
5ac: 4c0c23e0 madd.s $f15,$f0,$f4,$f12 | |
5b0: 4c0b1ba0 madd.s $f14,$f0,$f3,$f11 | |
5b4: 4c0a1360 madd.s $f13,$f0,$f2,$f10 | |
5b8: 4c090b20 madd.s $f12,$f0,$f1,$f9 | |
5bc: 46008a0d trunc.w.s $f8,$f17 | |
5c0: 460091cd trunc.w.s $f7,$f18 | |
5c4: 4600998d trunc.w.s $f6,$f19 | |
5c8: 4600814d trunc.w.s $f5,$f16 | |
5cc: 4600790d trunc.w.s $f4,$f15 | |
5d0: 460070cd trunc.w.s $f3,$f14 | |
5d4: 46006acd trunc.w.s $f11,$f13 | |
5d8: 4600608d trunc.w.s $f2,$f12 | |
5dc: 440d4000 mfc1 t5,$f8 | |
5e0: 440b3800 mfc1 t3,$f7 | |
5e4: 440a3000 mfc1 t2,$f6 | |
5e8: 44092800 mfc1 t1,$f5 | |
5ec: 44082000 mfc1 t0,$f4 | |
5f0: 44071800 mfc1 a3,$f3 | |
5f4: 44035800 mfc1 v1,$f11 | |
5f8: 440e1000 mfc1 t6,$f2 | |
5fc: 25afc000 addiu t7,t5,-16384 | |
600: 2578c000 addiu t8,t3,-16384 | |
604: 2559c000 addiu t9,t2,-16384 | |
608: 252dc000 addiu t5,t1,-16384 | |
60c: 250bc000 addiu t3,t0,-16384 | |
610: 24eac000 addiu t2,a3,-16384 | |
614: 2469c000 addiu t1,v1,-16384 | |
618: 25c2c000 addiu v0,t6,-16384 | |
61c: 24c60020 addiu a2,a2,32 | |
620: a48f0000 sh t7,0(a0) | |
624: a4980002 sh t8,2(a0) | |
628: a4990004 sh t9,4(a0) | |
62c: a48d0006 sh t5,6(a0) | |
630: a48b0008 sh t3,8(a0) | |
634: a48a000a sh t2,10(a0) | |
638: a489000c sh t1,12(a0) | |
63c: a482000e sh v0,14(a0) | |
640: 24a50020 addiu a1,a1,32 | |
644: 1586ffc5 bne t4,a2,55c <quantize_float+0x18> | |
648: 24840010 addiu a0,a0,16 | |
64c: 03e00008 jr ra | |
650: 00000000 nop |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For comparison, the output of objdump for
jsimd_quantize_float_dspr2
from https://github.com/libjpeg-turbo/libjpeg-turbo/blob/master/simd/mips/jsimd_dspr2.S#L2816The optimized
jsimd_quantize_float_dspr2
assembly function contains 67 instructions. GCC 6.5.0 output forquantize_float
contains 68 of mostly the same instructions in a slightly different order.