Skip to content

Instantly share code, notes, and snippets.

@kernigh
Created May 6, 2020 17:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kernigh/bdd379ca19cd42af52c0cbdaff688207 to your computer and use it in GitHub Desktop.
Save kernigh/bdd379ca19cd42af52c0cbdaff688207 to your computer and use it in GitHub Desktop.
/*
* $ cc -O2 -o timefill timefill.c
* Add -DUSE_ASM for PowerPC asm.
*
* George Koehler modified this file on 2020-05-02. License is at
* https://github.com/milkytracker/MilkyTracker/blob/v1.02.00/COPYING
*
* This is a benchmark of PowerPC asm versus C for OpenBSD. It
* assumes clang or gcc. It might work on other BSDs and Linux, but
* you might need to provide timespecsub(3) if it is missing.
*/
/*
* ppui/fastfill.h
*
* Copyright 2009 Peter Barth
*
* This file is part of Milkytracker.
*
* Milkytracker is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Milkytracker is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Milkytracker. If not, see <http://www.gnu.org/licenses/>.
*
*/
/*
* fastfill.h
* MilkyTracker
*
* Created by Peter Barth on 28.12.07.
*
*/
#include <sys/time.h>
#include <err.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
typedef uint32_t pp_uint32;
#ifdef USE_ASM
asm(
".p2align 4\n"
"fill_dword_asm:\n"
"li %r9, 0\n"
"srawi %r10, %r5, 2\n"
"cmpw %cr7,%r10,%r9\n"
"beq %cr7,$+36\n"
"2:\n"
"stw %r4,0(%r3)\n"
"stw %r4,4(%r3)\n"
"stw %r4,8(%r3)\n"
"stw %r4,12(%r3)\n"
"addi %r10,%r10,-1\n"
"addi %r3,%r3,16\n" // advance by 16
"cmpw %cr7,%r10,%r9\n"
"bne %cr7,2b\n"
"clrlwi %r11, %r5, 30\n"
"nop\n" // align loop start to 16 byte boundary
"cmpw %cr7,%r11,%r9\n"
"beq %cr7,$+24\n"
"1:\n"
"stw %r4,0(%r3)\n"
"addi %r11,%r11,-1\n"
"addi %r3,%r3,4\n" // advance by 4
"cmpw %cr7,%r11,%r9\n"
"bne %cr7,1b\n"
"blr");
#endif
static inline void fill_dword(pp_uint32* buff, pp_uint32 dw, pp_uint32 len)
{
#ifdef USE_ASM
// PPC assembly FTW!!1!
register pp_uint32* r3 asm ("r3") = buff;
register pp_uint32 r4 asm ("r4") = dw;
register pp_uint32 r5 asm ("r5") = len;
asm volatile("bl fill_dword_asm"
: "+r"(r3), "+r"(r4), "+r"(r5)
:: "r9", "r10", "r11", "cr7", "lr", "memory");
#else
pp_uint32 newlen = len >> 2;
pp_uint32 remlen = len & 3;
if (newlen)
{
do
{
*buff = dw;
*(buff+1) = dw;
*(buff+2) = dw;
*(buff+3) = dw;
buff+=4;
} while (--newlen);
}
if (remlen)
{
do
{
*buff++ = dw;
} while (--remlen);
}
#endif
}
#ifdef USE_ASM
asm(
".p2align 4\n"
"fill_dword_vertical_asm:\n"
"nop\n" // align loop start to 16 byte boundary
"nop\n" // same
"nop\n" // same
"li %r9,0\n"
"1:\n"
"stw %r4,0(%r3)\n"
"addi %r5,%r5,-1\n"
"add %r3,%r3,%r6\n"
"cmpw %cr7,%r5,%r9\n"
"bne %cr7,1b\n"
"blr");
#endif
static inline void fill_dword_vertical(pp_uint32* buff, pp_uint32 dw, pp_uint32 len, pp_uint32 pitch)
{
#ifdef USE_ASM
register pp_uint32* r3 asm ("r3") = buff;
register pp_uint32 r4 asm ("r4") = dw;
register pp_uint32 r5 asm ("r5") = len;
register pp_uint32 r6 asm ("r6") = pitch;
asm volatile("bl fill_dword_vertical_asm"
: "+r"(r3), "+r"(r4), "+r"(r5), "+r"(r6)
:: "r9", "cr7", "lr", "memory");
#else
do
{
*buff = dw;
buff+=(pitch>>2);
} while (--len);
#endif
}
static void
report(const char *what, struct timespec *tp)
{
struct timespec difference, now;
clock_gettime(CLOCK_MONOTONIC, &now);
timespecsub(&now, tp, &difference);
printf("%s: %lld.%09ld s\n", what,
(long long)difference.tv_sec, (long)difference.tv_nsec);
}
int
main(void)
{
const size_t LEN = 100;
struct timespec t[1];
pp_uint32 *buff, expect;
size_t i;
int ok;
if ((buff = calloc(LEN, sizeof(buff[0]))) == NULL)
err(1, NULL);
printf("USE_ASM? %s\n",
#ifdef USE_ASM
"yes"
#else
"no"
#endif
);
clock_gettime(CLOCK_MONOTONIC, t);
fill_dword(buff, 0x1a1b1c1d, LEN);
report("fill_dword 1", t);
clock_gettime(CLOCK_MONOTONIC, t);
fill_dword(buff, 0x2a2b2c2d, LEN);
report("fill_dword 2", t);
clock_gettime(CLOCK_MONOTONIC, t);
fill_dword(buff, 0x3a3b3c3d, LEN);
report("fill_dword 3", t);
clock_gettime(CLOCK_MONOTONIC, t);
fill_dword_vertical(buff, 0x4a4b4c4d, (LEN + 7) / 8, 32);
report("fdvertical 4", t);
clock_gettime(CLOCK_MONOTONIC, t);
fill_dword_vertical(buff, 0x5a5b5c5d, (LEN + 7) / 8, 32);
report("fdvertical 5", t);
clock_gettime(CLOCK_MONOTONIC, t);
fill_dword_vertical(buff, 0x6a6b6c6d, (LEN + 7) / 8, 32);
report("fdvertical 6", t);
ok = 1;
for (i = 0; i < LEN; i++) {
expect = i % 8 ? 0x3a3b3c3d : 0x6a6b6c6d;
if (buff[i] != expect) {
ok = 0;
break;
}
}
printf("buff is %s\n", ok ? "ok" : "*not ok*");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment