Created
May 6, 2020 17:23
-
-
Save kernigh/bdd379ca19cd42af52c0cbdaff688207 to your computer and use it in GitHub Desktop.
benchmark for https://github.com/milkytracker/MilkyTracker/pull/210
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* $ cc -O2 -o timefill timefill.c | |
* Add -DUSE_ASM for PowerPC asm. | |
* | |
* George Koehler modified this file on 2020-05-02. License is at | |
* https://github.com/milkytracker/MilkyTracker/blob/v1.02.00/COPYING | |
* | |
* This is a benchmark of PowerPC asm versus C for OpenBSD. It | |
* assumes clang or gcc. It might work on other BSDs and Linux, but | |
* you might need to provide timespecsub(3) if it is missing. | |
*/ | |
/* | |
* ppui/fastfill.h | |
* | |
* Copyright 2009 Peter Barth | |
* | |
* This file is part of Milkytracker. | |
* | |
* Milkytracker is free software: you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation, either version 3 of the License, or | |
* (at your option) any later version. | |
* | |
* Milkytracker is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with Milkytracker. If not, see <http://www.gnu.org/licenses/>. | |
* | |
*/ | |
/* | |
* fastfill.h | |
* MilkyTracker | |
* | |
* Created by Peter Barth on 28.12.07. | |
* | |
*/ | |
#include <sys/time.h> | |
#include <err.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <time.h> | |
typedef uint32_t pp_uint32; | |
#ifdef USE_ASM | |
asm( | |
".p2align 4\n" | |
"fill_dword_asm:\n" | |
"li %r9, 0\n" | |
"srawi %r10, %r5, 2\n" | |
"cmpw %cr7,%r10,%r9\n" | |
"beq %cr7,$+36\n" | |
"2:\n" | |
"stw %r4,0(%r3)\n" | |
"stw %r4,4(%r3)\n" | |
"stw %r4,8(%r3)\n" | |
"stw %r4,12(%r3)\n" | |
"addi %r10,%r10,-1\n" | |
"addi %r3,%r3,16\n" // advance by 16 | |
"cmpw %cr7,%r10,%r9\n" | |
"bne %cr7,2b\n" | |
"clrlwi %r11, %r5, 30\n" | |
"nop\n" // align loop start to 16 byte boundary | |
"cmpw %cr7,%r11,%r9\n" | |
"beq %cr7,$+24\n" | |
"1:\n" | |
"stw %r4,0(%r3)\n" | |
"addi %r11,%r11,-1\n" | |
"addi %r3,%r3,4\n" // advance by 4 | |
"cmpw %cr7,%r11,%r9\n" | |
"bne %cr7,1b\n" | |
"blr"); | |
#endif | |
static inline void fill_dword(pp_uint32* buff, pp_uint32 dw, pp_uint32 len) | |
{ | |
#ifdef USE_ASM | |
// PPC assembly FTW!!1! | |
register pp_uint32* r3 asm ("r3") = buff; | |
register pp_uint32 r4 asm ("r4") = dw; | |
register pp_uint32 r5 asm ("r5") = len; | |
asm volatile("bl fill_dword_asm" | |
: "+r"(r3), "+r"(r4), "+r"(r5) | |
:: "r9", "r10", "r11", "cr7", "lr", "memory"); | |
#else | |
pp_uint32 newlen = len >> 2; | |
pp_uint32 remlen = len & 3; | |
if (newlen) | |
{ | |
do | |
{ | |
*buff = dw; | |
*(buff+1) = dw; | |
*(buff+2) = dw; | |
*(buff+3) = dw; | |
buff+=4; | |
} while (--newlen); | |
} | |
if (remlen) | |
{ | |
do | |
{ | |
*buff++ = dw; | |
} while (--remlen); | |
} | |
#endif | |
} | |
#ifdef USE_ASM | |
asm( | |
".p2align 4\n" | |
"fill_dword_vertical_asm:\n" | |
"nop\n" // align loop start to 16 byte boundary | |
"nop\n" // same | |
"nop\n" // same | |
"li %r9,0\n" | |
"1:\n" | |
"stw %r4,0(%r3)\n" | |
"addi %r5,%r5,-1\n" | |
"add %r3,%r3,%r6\n" | |
"cmpw %cr7,%r5,%r9\n" | |
"bne %cr7,1b\n" | |
"blr"); | |
#endif | |
static inline void fill_dword_vertical(pp_uint32* buff, pp_uint32 dw, pp_uint32 len, pp_uint32 pitch) | |
{ | |
#ifdef USE_ASM | |
register pp_uint32* r3 asm ("r3") = buff; | |
register pp_uint32 r4 asm ("r4") = dw; | |
register pp_uint32 r5 asm ("r5") = len; | |
register pp_uint32 r6 asm ("r6") = pitch; | |
asm volatile("bl fill_dword_vertical_asm" | |
: "+r"(r3), "+r"(r4), "+r"(r5), "+r"(r6) | |
:: "r9", "cr7", "lr", "memory"); | |
#else | |
do | |
{ | |
*buff = dw; | |
buff+=(pitch>>2); | |
} while (--len); | |
#endif | |
} | |
static void | |
report(const char *what, struct timespec *tp) | |
{ | |
struct timespec difference, now; | |
clock_gettime(CLOCK_MONOTONIC, &now); | |
timespecsub(&now, tp, &difference); | |
printf("%s: %lld.%09ld s\n", what, | |
(long long)difference.tv_sec, (long)difference.tv_nsec); | |
} | |
int | |
main(void) | |
{ | |
const size_t LEN = 100; | |
struct timespec t[1]; | |
pp_uint32 *buff, expect; | |
size_t i; | |
int ok; | |
if ((buff = calloc(LEN, sizeof(buff[0]))) == NULL) | |
err(1, NULL); | |
printf("USE_ASM? %s\n", | |
#ifdef USE_ASM | |
"yes" | |
#else | |
"no" | |
#endif | |
); | |
clock_gettime(CLOCK_MONOTONIC, t); | |
fill_dword(buff, 0x1a1b1c1d, LEN); | |
report("fill_dword 1", t); | |
clock_gettime(CLOCK_MONOTONIC, t); | |
fill_dword(buff, 0x2a2b2c2d, LEN); | |
report("fill_dword 2", t); | |
clock_gettime(CLOCK_MONOTONIC, t); | |
fill_dword(buff, 0x3a3b3c3d, LEN); | |
report("fill_dword 3", t); | |
clock_gettime(CLOCK_MONOTONIC, t); | |
fill_dword_vertical(buff, 0x4a4b4c4d, (LEN + 7) / 8, 32); | |
report("fdvertical 4", t); | |
clock_gettime(CLOCK_MONOTONIC, t); | |
fill_dword_vertical(buff, 0x5a5b5c5d, (LEN + 7) / 8, 32); | |
report("fdvertical 5", t); | |
clock_gettime(CLOCK_MONOTONIC, t); | |
fill_dword_vertical(buff, 0x6a6b6c6d, (LEN + 7) / 8, 32); | |
report("fdvertical 6", t); | |
ok = 1; | |
for (i = 0; i < LEN; i++) { | |
expect = i % 8 ? 0x3a3b3c3d : 0x6a6b6c6d; | |
if (buff[i] != expect) { | |
ok = 0; | |
break; | |
} | |
} | |
printf("buff is %s\n", ok ? "ok" : "*not ok*"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment