Skip to content

Instantly share code, notes, and snippets.

@nominolo
Created November 28, 2012 22:08
Show Gist options
  • Save nominolo/4165016 to your computer and use it in GitHub Desktop.
Save nominolo/4165016 to your computer and use it in GitHub Desktop.
Last-executed Iteration trace detection
/*
* Data structure for trace selection using Last Executed Iteration
* (LEI). LEI is described in "Improving Region Selection in Dynamic
* Optimization Systems" by Hiniker, Hazelwood, and Smith in MICRO'05.
*
* Copyright (c) 2012 Thomas Schilling
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define BRANCH_HISTORY_BUFFER_SIZE 512
#define BRANCH_HISTORY_HASH_TABLE_SIZE (3 * BRANCH_HISTORY_BUFFER_SIZE)
/* #define BTB_STATS */
#ifdef BTB_STATS
# define IF_STATS(stmt) stmt
#else
# define IF_STATS(stmt)
#endif
typedef void *Target;
typedef uint64_t HashEntry;
typedef uint16_t HashRef;
typedef int bool;
#define true 1
#define false 0
/**
* The branch history buffer is a circular buffer of the most recent
* branch targets. For efficiency a hash table is maintained that
* maps addresses (48 bits) to number of occurrences in the circular
* buffer (16 bits).
*
* Collision resolution is done simply using linear probing, i.e., if
* the desired slot is full, we use linear search to find the next
* empty slot. This leads to some amount of clustering, but is better
* for locality (according to Derek Bruening's PhD thesis).
*
* The hash function simply discards the lower 3 bits from the input
* and otherwise uses the address itself as the hash.
*
* The circular buffer itself only stores references into the hash
* table (to save memory).
*/
typedef struct _BranchTargetBuffer {
uint32_t next; /* Insert next element here */
uint32_t size; /* Total no. of entries in buffer. */
HashRef buffer[BRANCH_HISTORY_BUFFER_SIZE];
HashEntry hash[BRANCH_HISTORY_HASH_TABLE_SIZE];
} BranchTargetBuffer;
/**
* Initialises the given BTB.
*/
void
btb_init(BranchTargetBuffer *btb)
{
btb->next = 0;
btb->size = 0;
memset(btb->buffer, 0, sizeof(*btb->buffer) * BRANCH_HISTORY_BUFFER_SIZE);
memset(btb->hash, 0, sizeof(*btb->hash) * BRANCH_HISTORY_HASH_TABLE_SIZE);
}
static inline uint32_t
btb_hash(Target target)
{
return (uint32_t)((uint64_t)target >> 3);
}
static inline Target
btb_hash_key(HashEntry e)
{
return (Target)(e & (((HashEntry)1 << 48) - 1));
}
static inline uint16_t
btb_hash_value(HashEntry e)
{
return (uint16_t)(e >> 48);
}
static inline HashEntry
btb_make_entry(Target t, uint16_t value)
{
return (HashEntry)t | ((HashEntry)value << 48);
}
static inline HashEntry
btb_set_hash_value(HashEntry e, uint16_t value)
{
return btb_make_entry(btb_hash_key(e), value);
}
static inline void
btb_remove_entry(BranchTargetBuffer *btb, uint32_t index)
{
HashEntry entry = btb->hash[index];
assert(entry != (HashEntry)0);
uint16_t new_value = btb_hash_value(entry) - 1;
if (new_value == 0) {
btb->hash[index] = (HashEntry)0;
} else {
btb->hash[index] = btb_set_hash_value(entry, new_value);
}
}
IF_STATS(static long total_iters = 0);
IF_STATS(static int max_iters = 0);
/**
* Inserts the target address into the buffer.
*
* Returns whether the target was already in the buffer.
*/
bool
btb_insert(BranchTargetBuffer *btb, Target target)
{
// 1. Find slot in hash-table
uint32_t index = btb_hash(target) % BRANCH_HISTORY_HASH_TABLE_SIZE;
bool found = false;
IF_STATS(int iters = 0);
HashEntry entry = (HashEntry)0;
// As long as countof(btb->hash) > countof(btb->buffer) this loop is
// guaranteed to terminate, since we will eventually hit an empty
// slot.
while (true) {
IF_STATS(++iters);
// TODO: See if unrolling improves performance. On average there
// should be only 1-2 iterations.
entry = btb->hash[index];
if (entry == (HashEntry)0)
break;
Target t = btb_hash_key(entry);
if (t == target) {
found = true;
break;
}
++index;
if (index > BRANCH_HISTORY_HASH_TABLE_SIZE)
index = 0;
}
IF_STATS(total_iters += iters);
IF_STATS(if (iters > max_iters) { max_iters = iters; });
if (!found) {
btb->hash[index] = btb_make_entry(target, 1);
} else {
btb->hash[index] = btb_make_entry(target, btb_hash_value(entry) + 1);
}
if (btb->size == BRANCH_HISTORY_BUFFER_SIZE) {
// Buffer is full. Decrement reference count of element at
// btb->next (or remove it).
btb_remove_entry(btb, btb->buffer[btb->next]);
} else {
// Otherwise, we just
btb->size++;
}
btb->buffer[btb->next] = (HashRef)index;
btb->next++;
if (btb->next >= BRANCH_HISTORY_BUFFER_SIZE)
btb->next = 0;
return found;
}
/**
* Debug print the state of the BTB.
*/
void
btb_dump(BranchTargetBuffer *btb, FILE *out)
{
int32_t index = btb->next - btb->size;
if (index < 0) index += BRANCH_HISTORY_BUFFER_SIZE;
uint32_t s, nl;
fprintf(out, "History (size=%d):\n ", btb->size);
for (s = btb->size, nl = 0; s > 0; --s) {
fprintf(out, " %p", btb_hash_key(btb->hash[btb->buffer[index]]));
if (++index >= BRANCH_HISTORY_BUFFER_SIZE) { index = 0; }
if (++nl >= 8) { fprintf(out, "\n "); nl = 0; }
}
uint32_t i;
const char counts[] = { '.', '1', '2', '3', '4', '5', '6', '7', '8', '*' };
fprintf(out, "\nHashTable[");
for (i = 0, nl = 0; i < BRANCH_HISTORY_HASH_TABLE_SIZE; ++i) {
uint16_t count = btb_hash_value(btb->hash[i]);
if (count <= 9) fprintf(out, "%c", counts[count]);
else fprintf(out, "%d", count);
if (++nl >= 70) { fprintf(out, "\n "); nl = 0; }
}
fprintf(out, "]\n");
}
static void
btb_test1()
{
BranchTargetBuffer btb;
btb_init(&btb);
long i;
long hits = 0;
long rounds = 300000000;
uint64_t dummy;
for (i = 1; i < rounds + 1; ++i) {
uint64_t r = (uint64_t)((random() % 5000) * 8);
dummy ^= r;
/* Comment out these in order to measure PRNG overhead. */
bool b = btb_insert(&btb, (Target)r);
if (b) ++hits;
}
/* btb_dump(&btb, stderr); */
fprintf(stderr, "dummy = %p\n", (Target)dummy);
fprintf(stderr, "hits = %ld / %ld (%f)\n", hits, rounds,
(double)hits / (double)rounds);
IF_STATS(fprintf(stderr, "iters = %ld / %ld (%f)\nmax = %d",
total_iters, rounds,
(double)total_iters / (double)rounds,
max_iters));
}
int main(int argc, char *argv[])
{
btb_test1();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment