Skip to content

Instantly share code, notes, and snippets.

@anarsoul
Created April 17, 2020 02:25
Show Gist options
  • Save anarsoul/0c6e860d7b22bf943c3cb448a860d4fd to your computer and use it in GitHub Desktop.
Save anarsoul/0c6e860d7b22bf943c3cb448a860d4fd to your computer and use it in GitHub Desktop.
commit 066252ddf41448124682882fe978f817716ad92c
Author: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu Apr 16 19:01:21 2020 -0700
lima/gpir: try to group block regs together in regalloc
GP is scalar, but load and store operates on vec4 regs. We can
load 2 vec4 regs and store 1 vec4 per instruction.
If we group our scalar regs together we may load/store more scalar
regs per instruction.
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
diff --git a/src/gallium/drivers/lima/ir/gp/gpir.h b/src/gallium/drivers/lima/ir/gp/gpir.h
index 26cbb099a94..3c9231aacce 100644
--- a/src/gallium/drivers/lima/ir/gp/gpir.h
+++ b/src/gallium/drivers/lima/ir/gp/gpir.h
@@ -338,6 +338,7 @@ typedef struct gpir_block {
struct list_head list;
struct list_head node_list;
struct list_head instr_list;
+ struct list_head sort_list;
struct gpir_compiler *comp;
struct gpir_block *successors[2];
diff --git a/src/gallium/drivers/lima/ir/gp/regalloc.c b/src/gallium/drivers/lima/ir/gp/regalloc.c
index b268841839d..9377564dd66 100644
--- a/src/gallium/drivers/lima/ir/gp/regalloc.c
+++ b/src/gallium/drivers/lima/ir/gp/regalloc.c
@@ -416,6 +416,108 @@ static bool do_regalloc(struct regalloc_ctx *ctx)
return true;
}
+static void renumber_colors(struct regalloc_ctx *ctx)
+{
+ uint64_t available = ~0ull;
+ const uint64_t x_comp = 0x1111111111111111ull;
+ const uint64_t xz_comp = 0x5555555555555555ull;
+ int new_colors[GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM];
+
+ for (int i = 0; i < (GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM); i++)
+ new_colors[i] = -1;
+
+ /* Renumber colors so we have physical regs of a block grouped together in vec4
+ * regs, so we can load or store as many regs as possible with a single load/store
+ */
+
+ /* Start with stores. We have 2 store blocks, store0 can store x,y components,
+ * store1 can store z,w components */
+ list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
+ uint64_t best = 0;
+ list_for_each_entry(gpir_node, node, &block->node_list, list) {
+ unsigned old_color;
+ if (node->op == gpir_op_store_reg) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ old_color = ctx->registers[store->reg->index].assigned_color;
+
+ if (new_colors[old_color] != -1) {
+ best |= 0x3ull << (new_colors[old_color] & ~0x1);
+ continue;
+ }
+ } else
+ continue;
+
+ int new_color;
+
+ if (available & best)
+ new_color = ffsll(available & best) - 1;
+ else if (available & xz_comp)
+ new_color = ffsll(available & xz_comp) - 1;
+ else
+ new_color = ffsll(available) - 1;
+
+ new_colors[old_color] = new_color;
+ best |= 0x3ull << (new_color & ~0x1);
+ available &= ~(1ull << new_color);
+ }
+ }
+
+ /* Now process loads. We have 2 load blocks, both can load full vec4 */
+ list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
+ uint64_t best = 0;
+ list_for_each_entry(gpir_node, node, &block->node_list, list) {
+ unsigned old_color;
+ if (node->op == gpir_op_load_reg) {
+ gpir_load_node *load = gpir_node_to_load(node);
+ old_color = ctx->registers[load->reg->index].assigned_color;
+
+ if (new_colors[old_color] != -1) {
+ best |= 0xfull << (new_colors[old_color] & ~0x3);
+ continue;
+ }
+ } else
+ continue;
+
+ int new_color;
+
+ if (available & best)
+ new_color = ffsll(available & best) - 1;
+ else if (available & x_comp)
+ new_color = ffsll(available & x_comp) - 1;
+ else
+ new_color = ffsll(available) - 1;
+
+ new_colors[old_color] = new_color;
+ best |= 0xfull << (new_color & ~0x3);
+ available &= ~(1ull << new_color);
+ }
+ }
+
+ /* Now handle the rest. For phys regs assign first available, keep
+ * old color for value regs
+ */
+ for (int i = 0; i < ctx->num_nodes_and_regs; i++) {
+ unsigned old_color = ctx->registers[i].assigned_color;
+ if (new_colors[old_color] != -1) {
+ ctx->registers[i].assigned_color = new_colors[old_color];
+ continue;
+ }
+ if (old_color >= GPIR_PHYSICAL_REG_NUM) {
+ new_colors[old_color] = old_color;
+ continue;
+ }
+
+ int new_color;
+
+ assert(available);
+
+ new_color = ffsll(available) - 1;
+ new_colors[old_color] = new_color;
+ available &= ~(1ull << new_color);
+ ctx->registers[i].assigned_color = new_color;
+ }
+}
+
static void assign_regs(struct regalloc_ctx *ctx)
{
list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
@@ -512,6 +614,7 @@ bool gpir_regalloc_prog(gpir_compiler *comp)
ralloc_free(ctx.mem_ctx);
return false;
}
+ renumber_colors(&ctx);
assign_regs(&ctx);
regalloc_print_result(comp);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment