Created
April 17, 2020 02:25
-
-
Save anarsoul/0c6e860d7b22bf943c3cb448a860d4fd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 066252ddf41448124682882fe978f817716ad92c | |
Author: Vasily Khoruzhick <anarsoul@gmail.com> | |
Date: Thu Apr 16 19:01:21 2020 -0700 | |
lima/gpir: try to group block regs together in regalloc | |
GP is scalar, but load and store operates on vec4 regs. We can | |
load 2 vec4 regs and store 1 vec4 per instruction. | |
If we group our scalar regs together we may load/store more scalar | |
regs per instruction. | |
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> | |
diff --git a/src/gallium/drivers/lima/ir/gp/gpir.h b/src/gallium/drivers/lima/ir/gp/gpir.h | |
index 26cbb099a94..3c9231aacce 100644 | |
--- a/src/gallium/drivers/lima/ir/gp/gpir.h | |
+++ b/src/gallium/drivers/lima/ir/gp/gpir.h | |
@@ -338,6 +338,7 @@ typedef struct gpir_block { | |
struct list_head list; | |
struct list_head node_list; | |
struct list_head instr_list; | |
+ struct list_head sort_list; | |
struct gpir_compiler *comp; | |
struct gpir_block *successors[2]; | |
diff --git a/src/gallium/drivers/lima/ir/gp/regalloc.c b/src/gallium/drivers/lima/ir/gp/regalloc.c | |
index b268841839d..9377564dd66 100644 | |
--- a/src/gallium/drivers/lima/ir/gp/regalloc.c | |
+++ b/src/gallium/drivers/lima/ir/gp/regalloc.c | |
@@ -416,6 +416,108 @@ static bool do_regalloc(struct regalloc_ctx *ctx) | |
return true; | |
} | |
+static void renumber_colors(struct regalloc_ctx *ctx) | |
+{ | |
+ uint64_t available = ~0ull; | |
+ const uint64_t x_comp = 0x1111111111111111ull; | |
+ const uint64_t xz_comp = 0x5555555555555555ull; | |
+ int new_colors[GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM]; | |
+ | |
+ for (int i = 0; i < (GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM); i++) | |
+ new_colors[i] = -1; | |
+ | |
+ /* Renumber colors so we have physical regs of a block grouped together in vec4 | |
+ * regs, so we can load or store as many regs as possible with a single load/store | |
+ */ | |
+ | |
+ /* Start with stores. We have 2 store blocks, store0 can store x,y components, | |
+ * store1 can store z,w components */ | |
+ list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) { | |
+ uint64_t best = 0; | |
+ list_for_each_entry(gpir_node, node, &block->node_list, list) { | |
+ unsigned old_color; | |
+ if (node->op == gpir_op_store_reg) { | |
+ gpir_store_node *store = gpir_node_to_store(node); | |
+ old_color = ctx->registers[store->reg->index].assigned_color; | |
+ | |
+ if (new_colors[old_color] != -1) { | |
+ best |= 0x3ull << (new_colors[old_color] & ~0x1); | |
+ continue; | |
+ } | |
+ } else | |
+ continue; | |
+ | |
+ int new_color; | |
+ | |
+ if (available & best) | |
+ new_color = ffsll(available & best) - 1; | |
+ else if (available & xz_comp) | |
+ new_color = ffsll(available & xz_comp) - 1; | |
+ else | |
+ new_color = ffsll(available) - 1; | |
+ | |
+ new_colors[old_color] = new_color; | |
+ best |= 0x3ull << (new_color & ~0x1); | |
+ available &= ~(1ull << new_color); | |
+ } | |
+ } | |
+ | |
+ /* Now process loads. We have 2 load blocks, both can load full vec4 */ | |
+ list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) { | |
+ uint64_t best = 0; | |
+ list_for_each_entry(gpir_node, node, &block->node_list, list) { | |
+ unsigned old_color; | |
+ if (node->op == gpir_op_load_reg) { | |
+ gpir_load_node *load = gpir_node_to_load(node); | |
+ old_color = ctx->registers[load->reg->index].assigned_color; | |
+ | |
+ if (new_colors[old_color] != -1) { | |
+ best |= 0xfull << (new_colors[old_color] & ~0x3); | |
+ continue; | |
+ } | |
+ } else | |
+ continue; | |
+ | |
+ int new_color; | |
+ | |
+ if (available & best) | |
+ new_color = ffsll(available & best) - 1; | |
+ else if (available & x_comp) | |
+ new_color = ffsll(available & x_comp) - 1; | |
+ else | |
+ new_color = ffsll(available) - 1; | |
+ | |
+ new_colors[old_color] = new_color; | |
+ best |= 0xfull << (new_color & ~0x3); | |
+ available &= ~(1ull << new_color); | |
+ } | |
+ } | |
+ | |
+ /* Now handle the rest. For phys regs assign first available, keep | |
+ * old color for value regs | |
+ */ | |
+ for (int i = 0; i < ctx->num_nodes_and_regs; i++) { | |
+ unsigned old_color = ctx->registers[i].assigned_color; | |
+ if (new_colors[old_color] != -1) { | |
+ ctx->registers[i].assigned_color = new_colors[old_color]; | |
+ continue; | |
+ } | |
+ if (old_color >= GPIR_PHYSICAL_REG_NUM) { | |
+ new_colors[old_color] = old_color; | |
+ continue; | |
+ } | |
+ | |
+ int new_color; | |
+ | |
+ assert(available); | |
+ | |
+ new_color = ffsll(available) - 1; | |
+ new_colors[old_color] = new_color; | |
+ available &= ~(1ull << new_color); | |
+ ctx->registers[i].assigned_color = new_color; | |
+ } | |
+} | |
+ | |
static void assign_regs(struct regalloc_ctx *ctx) | |
{ | |
list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) { | |
@@ -512,6 +614,7 @@ bool gpir_regalloc_prog(gpir_compiler *comp) | |
ralloc_free(ctx.mem_ctx); | |
return false; | |
} | |
+ renumber_colors(&ctx); | |
assign_regs(&ctx); | |
regalloc_print_result(comp); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment