1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Update IR

IR commit: 682cc0ca6761164dbcd791e5f56283f8e88537d2

Fixes GH-13286
This commit is contained in:
Dmitry Stogov
2024-01-31 22:20:10 +03:00
parent a135517376
commit c50255c626
6 changed files with 395 additions and 274 deletions

View File

@@ -1299,7 +1299,7 @@ static uint32_t ir_hashtab_hash_size(uint32_t size)
size |= (size >> 4);
size |= (size >> 8);
size |= (size >> 16);
return size + 1;
return IR_MAX(size + 1, 4);
}
static void ir_hashtab_resize(ir_hashtab *tab)

View File

@@ -1038,21 +1038,27 @@ static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg)
return IR_SPILL_POS_TO_OFFSET(offset);
}
static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v)
{
int32_t offset;
ir_reg base;
IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]);
offset = ctx->live_intervals[v]->stack_spill_pos;
IR_ASSERT(offset != -1);
if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
IR_ASSERT(ctx->spill_base != IR_REG_NONE);
return IR_MEM_BO(ctx->spill_base, offset);
}
base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(offset);
return IR_MEM_BO(base, offset);
}
static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref)
{
ir_reg reg;
int32_t offset;
IR_ASSERT(ref >= 0);
offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
IR_ASSERT(offset != -1);
if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
IR_ASSERT(ctx->spill_base != IR_REG_NONE);
reg = ctx->spill_base;
return IR_MEM_BO(reg, offset);
}
reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
return IR_MEM_BO(reg, IR_SPILL_POS_TO_OFFSET(offset));
IR_ASSERT(!IR_IS_CONST_REF(ref));
return ir_vreg_spill_slot(ctx, ctx->vregs[ref]);
}
static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem)
@@ -1404,12 +1410,8 @@ static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg r
}
}
static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg)
static void ir_emit_store_mem(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg)
{
ir_mem mem;
IR_ASSERT(dst >= 0);
mem = ir_ref_spill_slot(ctx, dst);
if (IR_IS_TYPE_INT(type)) {
ir_emit_store_mem_int(ctx, type, mem, reg);
} else {
@@ -1417,6 +1419,12 @@ static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg)
}
}
static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg)
{
IR_ASSERT(dst >= 0);
ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg);
}
static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
{
ir_backend_data *data = ctx->data;
@@ -3558,11 +3566,7 @@ static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
ir_emit_load(ctx, type, op3_reg, insn->op3);
}
mem = IR_MEM_BO(fp, offset);
if (IR_IS_TYPE_INT(type)) {
ir_emit_store_mem_int(ctx, type, mem, op3_reg);
} else {
ir_emit_store_mem_fp(ctx, type, mem, op3_reg);
}
ir_emit_store_mem(ctx, type, mem, op3_reg);
}
static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
@@ -3944,7 +3948,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
if (op2_reg != IR_REG_NONE) {
| str Rx(tmp_reg), [Rx(op2_reg)]
} else {
int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &op2_reg);
| str Rx(tmp_reg), [Rx(op2_reg), #offset]
}
@@ -4033,7 +4037,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
if (op2_reg != IR_REG_NONE) {
| str Rx(tmp_reg), [Rx(op2_reg)]
} else {
int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg);
int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &op2_reg);
| str Rx(tmp_reg), [Rx(op2_reg), #offset]
}
@@ -4404,11 +4408,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
} else {
/* Pass register arguments to stack (REG->MEM moves) */
if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) {
if (IR_IS_TYPE_INT(type)) {
ir_emit_store_mem_int(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg);
} else {
ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg);
}
ir_emit_store_mem(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg);
} else {
do_pass3 = 1;
}

View File

@@ -162,39 +162,52 @@ void ir_dump_use_lists(const ir_ctx *ctx, FILE *f)
}
}
static int ir_dump_dessa_move(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
static void ir_dump_dessa_moves(const ir_ctx *ctx, int b, ir_block *bb, FILE *f)
{
FILE *f = ctx->data;
int8_t reg;
uint32_t succ;
ir_block *succ_bb;
ir_use_list *use_list;
ir_ref k, i, *p, use_ref, input;
ir_insn *use_insn;
if (IR_IS_CONST_REF(from)) {
fprintf(f, "\tmov c_%d -> ", -from);
} else if (from) {
fprintf(f, "\tmov R%d", ctx->vregs[from]);
if (ctx->live_intervals && ctx->live_intervals[ctx->vregs[from]]) {
reg = ctx->live_intervals[ctx->vregs[from]]->reg;
if (reg >= 0) {
fprintf(f, " [%%%s]", ir_reg_name(reg, type));
}
}
fprintf(f, " -> ");
} else {
fprintf(f, "\tmov TMP -> ");
}
IR_ASSERT(bb->successors_count == 1);
succ = ctx->cfg_edges[bb->successors];
succ_bb = &ctx->cfg_blocks[succ];
IR_ASSERT(succ_bb->predecessors_count > 1);
use_list = &ctx->use_lists[succ_bb->start];
k = ir_phi_input_number(ctx, succ_bb, b);
if (to) {
fprintf(f, "R%d", ctx->vregs[to]);
if (ctx->live_intervals && ctx->live_intervals[ctx->vregs[to]]) {
reg = ctx->live_intervals[ctx->vregs[to]]->reg;
if (reg >= 0) {
fprintf(f, " [%%%s]", ir_reg_name(reg, type));
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
use_ref = *p;
use_insn = &ctx->ir_base[use_ref];
if (use_insn->op == IR_PHI) {
input = ir_insn_op(use_insn, k);
if (IR_IS_CONST_REF(input)) {
fprintf(f, "\t# DESSA MOV c_%d", -input);
} else if (ctx->vregs[input] != ctx->vregs[use_ref]) {
fprintf(f, "\t# DESSA MOV d_%d {R%d}", input, ctx->vregs[input]);
} else {
continue;
}
if (ctx->regs) {
int8_t *regs = ctx->regs[use_ref];
int8_t reg = regs[k];
if (reg != IR_REG_NONE) {
fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[input].type),
(reg & (IR_REG_SPILL_LOAD|IR_REG_SPILL_SPECIAL)) ? ":load" : "");
}
}
fprintf(f, " -> d_%d {R%d}", use_ref, ctx->vregs[use_ref]);
if (ctx->regs) {
int8_t reg = ctx->regs[use_ref][0];
if (reg != IR_REG_NONE) {
fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[use_ref].type),
(reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : "");
}
}
fprintf(f, "\n");
}
fprintf(f, "\n");
} else {
fprintf(f, "TMP\n");
}
return 1;
}
void ir_dump_cfg(ir_ctx *ctx, FILE *f)
@@ -283,8 +296,7 @@ void ir_dump_cfg(ir_ctx *ctx, FILE *f)
}
}
if (bb->flags & IR_BB_DESSA_MOVES) {
ctx->data = f;
ir_gen_dessa_moves(ctx, b, ir_dump_dessa_move);
ir_dump_dessa_moves(ctx, b, bb, f);
}
}
fprintf(f, "}\n");
@@ -621,50 +633,7 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f)
}
if (bb->flags & IR_BB_DESSA_MOVES) {
uint32_t succ;
ir_block *succ_bb;
ir_use_list *use_list;
ir_ref k, i, *p, use_ref, input;
ir_insn *use_insn;
IR_ASSERT(bb->successors_count == 1);
succ = ctx->cfg_edges[bb->successors];
succ_bb = &ctx->cfg_blocks[succ];
IR_ASSERT(succ_bb->predecessors_count > 1);
use_list = &ctx->use_lists[succ_bb->start];
k = ir_phi_input_number(ctx, succ_bb, b);
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
use_ref = *p;
use_insn = &ctx->ir_base[use_ref];
if (use_insn->op == IR_PHI) {
input = ir_insn_op(use_insn, k);
if (IR_IS_CONST_REF(input)) {
fprintf(f, "\t# DESSA MOV c_%d", -input);
} else if (ctx->vregs[input] != ctx->vregs[use_ref]) {
fprintf(f, "\t# DESSA MOV d_%d {R%d}", input, ctx->vregs[input]);
} else {
continue;
}
if (ctx->regs) {
int8_t *regs = ctx->regs[use_ref];
int8_t reg = regs[k];
if (reg != IR_REG_NONE) {
fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[input].type),
(reg & (IR_REG_SPILL_LOAD|IR_REG_SPILL_SPECIAL)) ? ":load" : "");
}
}
fprintf(f, " -> d_%d {R%d}", use_ref, ctx->vregs[use_ref]);
if (ctx->regs) {
int8_t reg = ctx->regs[use_ref][0];
if (reg != IR_REG_NONE) {
fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[use_ref].type),
(reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : "");
}
}
fprintf(f, "\n");
}
}
ir_dump_dessa_moves(ctx, b, bb, f);
}
insn = &ctx->ir_base[bb->end];

View File

@@ -51,13 +51,11 @@ typedef struct _ir_copy {
ir_reg to;
} ir_copy;
typedef struct _ir_delayed_copy {
ir_ref input;
ir_ref output;
typedef struct _ir_dessa_copy {
ir_type type;
ir_reg from;
ir_reg to;
} ir_delayed_copy;
int32_t from; /* negative - constant ref, [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */
int32_t to; /* [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */
} ir_dessa_copy;
#if IR_REG_INT_ARGS
static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS];
@@ -255,25 +253,6 @@ static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs
return count;
}
static bool ir_is_same_mem(const ir_ctx *ctx, ir_ref r1, ir_ref r2)
{
ir_live_interval *ival1, *ival2;
int32_t o1, o2;
if (IR_IS_CONST_REF(r1) || IR_IS_CONST_REF(r2)) {
return 0;
}
IR_ASSERT(ctx->vregs[r1] && ctx->vregs[r2]);
ival1 = ctx->live_intervals[ctx->vregs[r1]];
ival2 = ctx->live_intervals[ctx->vregs[r2]];
IR_ASSERT(ival1 && ival2);
o1 = ival1->stack_spill_pos;
o2 = ival2->stack_spill_pos;
IR_ASSERT(o1 != -1 && o2 != -1);
return o1 == o2;
}
static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset)
{
ir_live_interval *ival1;
@@ -479,7 +458,6 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
ir_reg to, from;
ir_type type;
ir_regset todo, ready, srcs;
ir_reg last_reg, last_fp_reg;
if (count == 1) {
to = copies[0].to;
@@ -529,6 +507,11 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
return 1;
}
/* temporary registers can't be the same as some of the destinations */
IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_reg));
IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_fp_reg));
/* first we resolve all "windmill blades" - trees (this doesn't requre temporary registers) */
while (ready != IR_REGSET_EMPTY) {
ir_reg r;
@@ -551,25 +534,11 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
return 1;
}
/* temporary registers may be the same as some of the destinations */
last_reg = IR_REG_NONE;
if (tmp_reg != IR_REG_NONE) {
IR_ASSERT(!IR_REGSET_IN(srcs, tmp_reg));
if (IR_REGSET_IN(todo, tmp_reg)) {
last_reg = tmp_reg;
IR_REGSET_EXCL(todo, tmp_reg);
}
}
last_fp_reg = IR_REG_NONE;
if (tmp_fp_reg != IR_REG_NONE) {
IR_ASSERT(!IR_REGSET_IN(srcs, tmp_fp_reg));
if (IR_REGSET_IN(todo, tmp_fp_reg)) {
last_fp_reg = tmp_fp_reg;
IR_REGSET_EXCL(todo, tmp_fp_reg);
}
}
/* at this point the sources that are the same as temoraries are already moved */
IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_reg) || pred[loc[tmp_reg]] == tmp_reg);
IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_fp_reg) || pred[loc[tmp_fp_reg]] == tmp_fp_reg);
/* now we resolve all "windmill axles" - cycles (this reuires temporary registers) */
while (todo != IR_REGSET_EMPTY) {
to = ir_regset_pop_first(&todo);
from = pred[to];
@@ -625,39 +594,271 @@ static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_
}
}
if (last_reg != IR_REG_NONE) {
to = last_reg;
from = pred[to];
type = types[from];
from = loc[from];
if (to != from) {
IR_ASSERT(IR_IS_TYPE_INT(type));
ir_emit_mov_ext(ctx, type, to, from);
return 1;
}
static void ir_emit_dessa_move(ir_ctx *ctx, ir_type type, ir_ref to, ir_ref from, ir_reg tmp_reg, ir_reg tmp_fp_reg)
{
ir_mem mem_from, mem_to;
IR_ASSERT(from != to);
if (to < IR_REG_NUM) {
if (IR_IS_CONST_REF(from)) {
ir_emit_load(ctx, type, to, from);
} else if (from < IR_REG_NUM) {
if (IR_IS_TYPE_INT(type)) {
ir_emit_mov(ctx, type, to, from);
} else {
ir_emit_fp_mov(ctx, type, to, from);
}
} else {
mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
ir_emit_load_mem(ctx, type, to, mem_from);
}
} else {
mem_to = ir_vreg_spill_slot(ctx, to - IR_REG_NUM);
if (IR_IS_CONST_REF(from)) {
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
if (IR_IS_TYPE_INT(type)
&& !IR_IS_SYM_CONST(ctx->ir_base[from].op)
&& (ir_type_size[type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[from].val.i64))) {
ir_emit_store_mem_imm(ctx, type, mem_to, ctx->ir_base[from].val.i32);
return;
}
#endif
ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg;
IR_ASSERT(tmp != IR_REG_NONE);
ir_emit_load(ctx, type, tmp, from);
ir_emit_store_mem(ctx, type, mem_to, tmp);
} else if (from < IR_REG_NUM) {
ir_emit_store_mem(ctx, type, mem_to, from);
} else {
mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
IR_ASSERT(IR_MEM_VAL(mem_to) != IR_MEM_VAL(mem_from));
ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg;
IR_ASSERT(tmp != IR_REG_NONE);
ir_emit_load_mem(ctx, type, tmp, mem_from);
ir_emit_store_mem(ctx, type, mem_to, tmp);
}
}
}
IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, ir_bitset todo, ir_type type, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg)
{
ir_reg from;
ir_mem tmp_spill_slot;
IR_MEM_VAL(tmp_spill_slot) = 0;
IR_ASSERT(!IR_IS_CONST_REF(to));
from = pred[to];
IR_ASSERT(!IR_IS_CONST_REF(from));
IR_ASSERT(from != to);
IR_ASSERT(loc[from] == from);
if (IR_IS_TYPE_INT(type)) {
#ifdef IR_HAVE_SWAP_INT
if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM) {
/* a simple cycle from 2 elements */
ir_emit_swap(ctx, type, to, from);
ir_bitset_excl(todo, from);
ir_bitset_excl(todo, to);
loc[to] = from;
loc[from] = to;
return;
}
#endif
IR_ASSERT(tmp_reg != IR_REG_NONE);
IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
loc[to] = tmp_reg;
if (to < IR_REG_NUM) {
ir_emit_mov(ctx, type, tmp_reg, to);
} else {
ir_emit_load_mem_int(ctx, type, tmp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
}
} else {
#ifdef IR_HAVE_SWAP_FP
if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM) {
/* a simple cycle from 2 elements */
ir_emit_swap_fp(ctx, type, to, from);
IR_REGSET_EXCL(todo, from);
IR_REGSET_EXCL(todo, to);
loc[to] = from;
loc[from] = to;
return;
}
#endif
IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
loc[to] = tmp_fp_reg;
if (to < IR_REG_NUM) {
ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
} else {
ir_emit_load_mem_fp(ctx, type, tmp_fp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
}
}
if (last_fp_reg != IR_REG_NONE) {
to = last_fp_reg;
while (1) {
int32_t r;
from = pred[to];
type = types[from];
from = loc[from];
if (to != from) {
IR_ASSERT(!IR_IS_TYPE_INT(type));
ir_emit_fp_mov(ctx, type, to, from);
r = loc[from];
if (from == r && ir_bitset_in(todo, from)) {
/* Memory to memory move inside an isolated or "blocked" cycle requres an additional temporary register */
if (to >= IR_REG_NUM && r >= IR_REG_NUM) {
ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg;
if (!IR_MEM_VAL(tmp_spill_slot)) {
/* Free a register, saving it in a temporary spill slot */
tmp_spill_slot = IR_MEM_BO(IR_REG_STACK_POINTER, -16);
ir_emit_store_mem(ctx, type, tmp_spill_slot, tmp);
}
ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
} else {
ir_emit_dessa_move(ctx, type, to, r, IR_REG_NONE, IR_REG_NONE);
}
ir_bitset_excl(todo, to);
loc[from] = to;
to = from;
} else {
break;
}
}
if (IR_MEM_VAL(tmp_spill_slot)) {
ir_emit_load_mem(ctx, type, IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg, tmp_spill_slot);
}
ir_emit_dessa_move(ctx, type, to, loc[from], IR_REG_NONE, IR_REG_NONE);
ir_bitset_excl(todo, to);
loc[from] = to;
}
static int ir_dessa_parallel_copy(ir_ctx *ctx, ir_dessa_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
{
int i;
int32_t *pred, *loc, to, from;
int8_t *types;
ir_type type;
uint32_t len;
ir_bitset todo, ready, srcs, visited;
if (count == 1) {
to = copies[0].to;
from = copies[0].from;
IR_ASSERT(from != to);
type = copies[0].type;
ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
return 1;
}
len = IR_REG_NUM + ctx->vregs_count + 1;
todo = ir_bitset_malloc(len);
srcs = ir_bitset_malloc(len);
loc = ir_mem_malloc(len * 2 * sizeof(int32_t) + len * sizeof(int8_t));
pred = loc + len;
types = (int8_t*)(pred + len);
for (i = 0; i < count; i++) {
from = copies[i].from;
to = copies[i].to;
IR_ASSERT(from != to);
if (!IR_IS_CONST_REF(from)) {
ir_bitset_incl(srcs, from);
loc[from] = from;
}
pred[to] = from;
types[to] = copies[i].type;
IR_ASSERT(!ir_bitset_in(todo, to));
ir_bitset_incl(todo, to);
}
/* temporary registers can't be the same as some of the sources */
IR_ASSERT(tmp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_reg));
IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_fp_reg));
/* first we resolve all "windmill blades" - trees, that don't set temporary registers */
ready = ir_bitset_malloc(len);
ir_bitset_copy(ready, todo, ir_bitset_len(len));
ir_bitset_difference(ready, srcs, ir_bitset_len(len));
if (tmp_reg != IR_REG_NONE) {
ir_bitset_excl(ready, tmp_reg);
}
if (tmp_fp_reg != IR_REG_NONE) {
ir_bitset_excl(ready, tmp_fp_reg);
}
while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
ir_bitset_excl(todo, to);
type = types[to];
from = pred[to];
if (IR_IS_CONST_REF(from)) {
ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
} else {
int32_t r = loc[from];
ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
loc[from] = to;
if (from == r && ir_bitset_in(todo, from) && from != tmp_reg && from != tmp_fp_reg) {
ir_bitset_incl(ready, from);
}
}
}
/* then we resolve all "windmill axles" - cycles (this requres temporary registers) */
visited = ir_bitset_malloc(len);
ir_bitset_copy(ready, todo, ir_bitset_len(len));
ir_bitset_intersection(ready, srcs, ir_bitset_len(len));
while ((to = ir_bitset_first(ready, ir_bitset_len(len))) >= 0) {
ir_bitset_clear(visited, ir_bitset_len(len));
ir_bitset_incl(visited, to);
to = pred[to];
while (!IR_IS_CONST_REF(to) && ir_bitset_in(ready, to)) {
to = pred[to];
if (!IR_IS_CONST_REF(to) && ir_bitset_in(visited, to)) {
/* We found a cycle. Resolve it. */
ir_bitset_incl(visited, to);
type = types[to];
ir_dessa_resolve_cycle(ctx, pred, loc, todo, type, to, tmp_reg, tmp_fp_reg);
break;
}
ir_bitset_incl(visited, to);
}
ir_bitset_difference(ready, visited, ir_bitset_len(len));
}
/* finally we resolve remaining "windmill blades" - trees that set temporary registers */
ir_bitset_copy(ready, todo, ir_bitset_len(len));
ir_bitset_difference(ready, srcs, ir_bitset_len(len));
while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
ir_bitset_excl(todo, to);
type = types[to];
from = pred[to];
if (IR_IS_CONST_REF(from)) {
ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
} else {
int32_t r = loc[from];
ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
loc[from] = to;
if (from == r && ir_bitset_in(todo, from)) {
ir_bitset_incl(ready, from);
}
}
}
IR_ASSERT(ir_bitset_empty(todo, ir_bitset_len(len)));
ir_mem_free(visited);
ir_mem_free(ready);
ir_mem_free(loc);
ir_mem_free(srcs);
ir_mem_free(todo);
return 1;
}
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
{
uint32_t succ, k, n = 0, n2 = 0;
uint32_t succ, k, n = 0;
ir_block *succ_bb;
ir_use_list *use_list;
ir_ref i, *p;
ir_copy *copies;
ir_delayed_copy *copies2;
ir_dessa_copy *copies;
ir_reg tmp_reg = ctx->regs[bb->end][0];
ir_reg tmp_fp_reg = ctx->regs[bb->end][1];
@@ -668,8 +869,7 @@ static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
use_list = &ctx->use_lists[succ_bb->start];
k = ir_phi_input_number(ctx, succ_bb, b);
copies = ir_mem_malloc(use_list->count * sizeof(ir_copy) + use_list->count * sizeof(ir_delayed_copy));
copies2 = (ir_delayed_copy*)(copies + use_list->count);
copies = alloca(use_list->count * sizeof(ir_dessa_copy));
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
ir_ref ref = *p;
@@ -679,96 +879,29 @@ static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
ir_ref input = ir_insn_op(insn, k);
ir_reg src = ir_get_alocated_reg(ctx, ref, k);
ir_reg dst = ctx->regs[ref][0];
ir_ref from, to;
if (dst == IR_REG_NONE) {
/* STORE to memory cannot clobber any input register (do it right now) */
if (IR_IS_CONST_REF(input)) {
IR_ASSERT(src == IR_REG_NONE);
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
if (IR_IS_TYPE_INT(insn->type)
&& !IR_IS_SYM_CONST(ctx->ir_base[input].op)
&& (ir_type_size[insn->type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[input].val.i64))) {
ir_emit_store_imm(ctx, insn->type, ref, ctx->ir_base[input].val.i32);
continue;
}
#endif
ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg;
IR_ASSERT(tmp != IR_REG_NONE);
ir_emit_load(ctx, insn->type, tmp, input);
ir_emit_store(ctx, insn->type, ref, tmp);
} else if (src == IR_REG_NONE) {
if (!ir_is_same_mem(ctx, input, ref)) {
ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg;
IR_ASSERT(tmp != IR_REG_NONE);
ir_emit_load(ctx, insn->type, tmp, input);
ir_emit_store(ctx, insn->type, ref, tmp);
}
} else {
if (IR_REG_SPILLED(src)) {
src = IR_REG_NUM(src);
ir_emit_load(ctx, insn->type, src, input);
if (ir_is_same_mem(ctx, input, ref)) {
continue;
}
}
ir_emit_store(ctx, insn->type, ref, src);
}
} else if (src == IR_REG_NONE) {
/* STORE of constant or memory can't be clobbered by parallel reg->reg copies (delay it) */
copies2[n2].input = input;
copies2[n2].output = ref;
copies2[n2].type = insn->type;
copies2[n2].from = src;
copies2[n2].to = dst;
n2++;
IR_ASSERT(dst == IR_REG_NONE || !IR_REG_SPILLED(dst));
if (IR_IS_CONST_REF(input)) {
from = input;
} else {
IR_ASSERT(!IR_IS_CONST_REF(input));
if (IR_REG_SPILLED(src)) {
ir_emit_load(ctx, insn->type, IR_REG_NUM(src), input);
}
if (IR_REG_SPILLED(dst) && (!IR_REG_SPILLED(src) || !ir_is_same_mem(ctx, input, ref))) {
ir_emit_store(ctx, insn->type, ref, IR_REG_NUM(src));
}
if (IR_REG_NUM(src) != IR_REG_NUM(dst)) {
/* Schedule parallel reg->reg copy */
copies[n].type = insn->type;
copies[n].from = IR_REG_NUM(src);
copies[n].to = IR_REG_NUM(dst);
n++;
}
from = (src != IR_REG_NONE && !IR_REG_SPILLED(src)) ?
(ir_ref)src : (ir_ref)(IR_REG_NUM + ctx->vregs[input]);
}
to = (dst != IR_REG_NONE) ?
(ir_ref)dst : (ir_ref)(IR_REG_NUM + ctx->vregs[ref]);
if (to != from) {
copies[n].type = insn->type;
copies[n].from = from;
copies[n].to = to;
n++;
}
}
}
if (n > 0) {
ir_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg);
ir_dessa_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg);
}
for (n = 0; n < n2; n++) {
ir_ref input = copies2[n].input;
ir_ref ref = copies2[n].output;
ir_type type = copies2[n].type;
ir_reg dst = copies2[n].to;
IR_ASSERT(dst != IR_REG_NONE);
if (IR_IS_CONST_REF(input)) {
ir_emit_load(ctx, type, IR_REG_NUM(dst), input);
} else {
IR_ASSERT(copies2[n].from == IR_REG_NONE);
if (IR_REG_SPILLED(dst) && ir_is_same_mem(ctx, input, ref)) {
/* avoid LOAD and STORE to the same memory */
continue;
}
ir_emit_load(ctx, type, IR_REG_NUM(dst), input);
}
if (IR_REG_SPILLED(dst)) {
ir_emit_store(ctx, type, ref, IR_REG_NUM(dst));
}
}
ir_mem_free(copies);
}
int ir_match(ir_ctx *ctx)

View File

@@ -2021,12 +2021,22 @@ int ir_compute_dessa_moves(ir_ctx *ctx)
return 1;
}
/*
* Parallel copy sequentialization algorithm
*
* The implementation is based on algorithm 1 desriebed in
* "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
* Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
* 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
* pp. 114-125, doi: 10.1109/CGO.2009.19.
*/
int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
{
uint32_t succ, k, n = 0;
ir_block *bb, *succ_bb;
ir_use_list *use_list;
ir_ref *loc, *pred, i, *p, ref, input;
ir_ref *loc, *pred, *src, *dst, i, *p, ref, input;
ir_ref s, d;
ir_insn *insn;
uint32_t len;
ir_bitset todo, ready;
@@ -2044,10 +2054,12 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
k = ir_phi_input_number(ctx, succ_bb, b);
loc = ir_mem_malloc(ctx->insns_count * 2 * sizeof(ir_ref));
pred = loc + ctx->insns_count;
len = ir_bitset_len(ctx->insns_count);
todo = ir_bitset_malloc(ctx->insns_count);
loc = ir_mem_malloc((ctx->vregs_count + 1) * 4 * sizeof(ir_ref));
pred = loc + ctx->vregs_count + 1;
src = pred + ctx->vregs_count + 1;
dst = src + ctx->vregs_count + 1;
len = ir_bitset_len(ctx->vregs_count + 1);
todo = ir_bitset_malloc(ctx->vregs_count + 1);
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
ref = *p;
@@ -2057,21 +2069,28 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
if (IR_IS_CONST_REF(input)) {
have_constants = 1;
} else if (ctx->vregs[input] != ctx->vregs[ref]) {
loc[ref] = pred[input] = 0;
ir_bitset_incl(todo, ref);
s = ctx->vregs[input];
d = ctx->vregs[ref];
src[s] = input;
dst[d] = ref;
loc[d] = pred[s] = 0;
ir_bitset_incl(todo, d);
n++;
}
}
}
if (n > 0) {
ready = ir_bitset_malloc(ctx->insns_count);
IR_BITSET_FOREACH(todo, len, ref) {
src[0] = dst[0] = 0;
ready = ir_bitset_malloc(ctx->vregs_count + 1);
IR_BITSET_FOREACH(todo, len, d) {
ref = dst[d];
insn = &ctx->ir_base[ref];
IR_ASSERT(insn->op == IR_PHI);
input = ir_insn_op(insn, k);
loc[input] = input;
pred[ref] = input;
s = ctx->vregs[input];
loc[s] = s;
pred[d] = s;
} IR_BITSET_FOREACH_END();
IR_BITSET_FOREACH(todo, len, i) {
@@ -2086,9 +2105,10 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
while ((b = ir_bitset_pop_first(ready, len)) >= 0) {
a = pred[b];
c = loc[a];
emit_copy(ctx, ctx->ir_base[b].type, c, b);
emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b]);
ir_bitset_excl(todo, b);
loc[a] = b;
src[b] = dst[b];
if (a == c && pred[a]) {
ir_bitset_incl(ready, a);
}
@@ -2098,7 +2118,7 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
break;
}
IR_ASSERT(b != loc[pred[b]]);
emit_copy(ctx, ctx->ir_base[b].type, b, 0);
emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0);
loc[b] = 0;
ir_bitset_incl(ready, b);
}

View File

@@ -2560,15 +2560,15 @@ static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg)
return IR_SPILL_POS_TO_OFFSET(offset);
}
static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref)
static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v)
{
int32_t offset;
ir_reg base;
IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]);
offset = ctx->live_intervals[v]->stack_spill_pos;
IR_ASSERT(offset != -1);
if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
IR_ASSERT(ctx->spill_base != IR_REG_NONE);
return IR_MEM_BO(ctx->spill_base, offset);
}
@@ -2577,6 +2577,12 @@ static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref)
return IR_MEM_BO(base, offset);
}
static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref)
{
IR_ASSERT(!IR_IS_CONST_REF(ref));
return ir_vreg_spill_slot(ctx, ctx->vregs[ref]);
}
static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem)
{
ir_mem m = ir_ref_spill_slot(ctx, ref);
@@ -2814,13 +2820,6 @@ static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg)
ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg);
}
static void ir_emit_store_imm(ir_ctx *ctx, ir_type type, ir_ref dst, int32_t imm)
{
IR_ASSERT(dst >= 0);
IR_ASSERT(IR_IS_TYPE_INT(type));
ir_emit_store_mem_imm(ctx, type, ir_ref_spill_slot(ctx, dst), imm);
}
static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
{
ir_backend_data *data = ctx->data;