diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index b96d4746119..772eea7a5d7 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -1383,9 +1383,16 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); + ir_insn *var_insn; + int32_t offset; IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); + var_insn = &ctx->ir_base[src]; + if (var_insn->op == IR_VADDR) { + var_insn = &ctx->ir_base[var_insn->op1]; + } + IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA); + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); if (aarch64_may_encode_imm12(offset)) { | add Rx(reg), Rx(base), #offset } else { @@ -5680,10 +5687,15 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); ir_ref *ops = insn->ops; IR_REGSET_EXCL(available, reg); - if (constraints.tmp_regs[n].num > 0 - && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { - /* rematerialization */ - reg |= IR_REG_SPILL_LOAD; + if (constraints.tmp_regs[n].num > 0) { + if (IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { + /* rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } else if (ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_ALLOCA || + ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_VADDR) { + /* local address rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } } ctx->regs[i][constraints.tmp_regs[n].num] = reg; } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 12103a174d0..be8744ef198 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -1059,7 +1059,7 @@ restart: if (ctx->flags & IR_DEBUG_SCHEDULE) { fprintf(stderr, "After Schedule\n"); for (i = 1; i != 0; i = _next[i]) { - fprintf(stderr, "%d -> %d\n", i, _blocks[i]); + fprintf(stderr, "%d -> %d (%d)\n", i, _blocks[i], _xlat[i]); } } #endif @@ -1328,11 +1328,13 @@ restart: new_ctx.cfg_edges = ctx->cfg_edges; ctx->cfg_blocks = NULL; ctx->cfg_edges = NULL; + ir_code_buffer *saved_code_buffer = ctx->code_buffer; ir_free(ctx); IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit); IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit); memcpy(ctx, &new_ctx, sizeof(ir_ctx)); + ctx->code_buffer = saved_code_buffer; ctx->flags2 |= IR_LINEAR; ir_mem_free(_next); diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index f980b86b893..e28897e0dc4 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -1013,8 +1013,10 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn) #define IR_HAS_FP_RET_SLOT (1<<10) #define IR_16B_FRAME_ALIGNMENT (1<<11) +/* Temporary: MEM2SSA -> SCCP */ +#define IR_MEM2SSA_VARS (1<<25) + /* Temporary: SCCP -> CFG */ -#define IR_SCCP_DONE (1<<25) #define IR_CFG_REACHABLE (1<<26) /* Temporary: Dominators -> Loops */ diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 680e86c5086..996847d58a7 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -12,27 +12,48 @@ #include "ir.h" #include "ir_private.h" +#define IR_COMBO_COPY_PROPAGATION 1 + #define IR_TOP IR_UNUSED #define IR_BOTTOM IR_LAST_OP #define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0) #define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0) -#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].optx == IR_TOP) -#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].optx == IR_BOTTOM) -#define IR_IS_FEASIBLE(ref) (ref >= 0 && _values[ref].optx != IR_TOP) +#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].op == IR_TOP) +#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].op == IR_BOTTOM) +#define IR_IS_REACHABLE(ref) _ir_is_reachable_ctrl(ctx, _values, ref) +#define IR_IS_CONST(ref) (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op)) -#define IR_COMBO_COPY_PROPAGATION 1 +IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref) +{ + IR_ASSERT(!IR_IS_CONST_REF(ref)); + IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL); + return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */ +} #if IR_COMBO_COPY_PROPAGATION -IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_insn *_values, ir_ref a) +IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a) { if (a > 0 && _values[a].op == IR_COPY) { - a = _values[a].op1; - IR_ASSERT(a < 0 || _values[a].op != IR_COPY); /* this may be a copy of symbolic constant */ + do { + a = _values[a].op1; + } while (a > 0 && _values[a].op == IR_COPY); + IR_ASSERT(a < 0 || _values[a].op == IR_BOTTOM); + IR_ASSERT(a > 0 || IR_IS_SYM_CONST(ctx->ir_base[a].op)); } return a; } + +static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst, ir_type type) +{ + IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY); + IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)) + || (src < 0 && IR_IS_SYM_CONST(ctx->ir_base[src].op))); + IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst); + _values[dst].optx = IR_OPT(IR_COPY, type); + _values[dst].op1 = src; +} #endif static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) @@ -40,9 +61,9 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t o ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; #if IR_COMBO_COPY_PROPAGATION - op1 = ir_sccp_identity(_values, op1); - op2 = ir_sccp_identity(_values, op2); - op3 = ir_sccp_identity(_values, op3); + op1 = ir_sccp_identity(ctx, _values, op1); + op2 = ir_sccp_identity(ctx, _values, op2); + op3 = ir_sccp_identity(ctx, _values, op3); #endif restart: @@ -58,33 +79,31 @@ restart: op3 = ctx->fold_insn.op3; goto restart; case IR_FOLD_DO_EMIT: - IR_MAKE_BOTTOM(res); - return 1; + goto make_bottom; case IR_FOLD_DO_COPY: op1 = ctx->fold_insn.op1; -#if IR_COMBO_COPY_PROPAGATION - op1 = ir_sccp_identity(_values, op1); -#endif insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; if (IR_IS_CONST_OP(insn->op)) { /* pass */ -#if IR_COMBO_COPY_PROPAGATION - } else if (_values[res].optx == IR_TOP) { - _values[res].optx = IR_OPT(IR_COPY, insn->type); - _values[res].op1 = op1; - return 1; - } else if (_values[res].op == IR_COPY && _values[res].op1 == op1) { - return 0; /* not changed */ } else { - IR_ASSERT(_values[res].optx != IR_BOTTOM); - /* we don't check for widening */ - _values[res].optx = IR_OPT(IR_COPY, insn->type); - _values[res].op1 = op1; +#if IR_COMBO_COPY_PROPAGATION + if (_values[res].op == IR_TOP) { + /* pass to new copy */ + } else if (_values[res].op == IR_COPY) { + if (ir_sccp_identity(ctx, _values, _values[res].op1) == ir_sccp_identity(ctx, _values, op1)) { + return 0; /* not changed */ + } else { + goto make_bottom; + } + } else { + IR_ASSERT(_values[res].op != IR_BOTTOM); + /* we don't check for widening */ + } + /* create new COPY */ + ir_sccp_add_identity(ctx, _values, op1, res, insn->type); return 1; #else - } else { - IR_MAKE_BOTTOM(res); - return 1; + goto make_bottom; #endif } break; @@ -100,11 +119,13 @@ restart: _values[res].optx = IR_OPT(insn->type, insn->type); _values[res].val.u64 = insn->val.u64; return 1; - } else if (_values[res].opt != IR_OPT(insn->type, insn->type) || _values[res].val.u64 != insn->val.u64) { - IR_MAKE_BOTTOM(res); - return 1; + } else if (_values[res].opt == IR_OPT(insn->type, insn->type) && _values[res].val.u64 == insn->val.u64) { + return 0; /* not changed */ } - return 0; /* not changed */ + +make_bottom: + IR_MAKE_BOTTOM(res); + return 1; } static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *insn, ir_bitqueue *worklist) @@ -112,14 +133,16 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i ir_ref j, n, input, *merge_input, *p; ir_insn *v, *new_const = NULL; #if IR_COMBO_COPY_PROPAGATION - ir_ref new_copy; + ir_ref new_copy = IR_UNUSED; + ir_ref new_copy_identity = IR_UNUSED; + ir_ref phi_identity = ir_sccp_identity(ctx, _values, i); #endif - if (!IR_IS_FEASIBLE(insn->op1)) { + if (!IR_IS_REACHABLE(insn->op1)) { return 0; } n = insn->inputs_count; - if (n > 3 && _values[i].optx == IR_TOP) { + if (n > 3 && _values[i].op == IR_TOP) { for (j = 0; j < (n>>2); j++) { _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ } @@ -129,7 +152,7 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i merge_input = ctx->ir_base[insn->op1].ops + 1; for (; --n > 0; p++, merge_input++) { IR_ASSERT(*merge_input > 0); - if (_values[*merge_input].optx == IR_TOP) { + if (!IR_IS_REACHABLE(*merge_input)) { continue; } @@ -150,25 +173,30 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { input = v->op1; - IR_ASSERT(input < 0 || _values[input].op != IR_COPY); + new_copy_identity = ir_sccp_identity(ctx, _values, input); + if (new_copy_identity == phi_identity) { + new_copy_identity = IR_UNUSED; + continue; + } new_copy = input; goto next; +#endif } else if (v->op == IR_BOTTOM) { - new_copy = input; +#if IR_COMBO_COPY_PROPAGATION + if (input == phi_identity) { + continue; + } + new_copy = new_copy_identity = input; goto next; #else - } else if (v->op == IR_BOTTOM) { - IR_MAKE_BOTTOM(i); - return 1; + goto make_bottom; #endif } } - new_copy = IR_UNUSED; new_const = v; goto next; } - IR_ASSERT(_values[i].optx == IR_TOP); return 0; next: @@ -177,12 +205,17 @@ next: /* for all live merge inputs */ for (; --n > 0; p++, merge_input++) { IR_ASSERT(*merge_input > 0); - if (_values[*merge_input].optx == IR_TOP) { + if (!IR_IS_REACHABLE(*merge_input)) { continue; } input = *p; if (IR_IS_CONST_REF(input)) { +#if IR_COMBO_COPY_PROPAGATION + if (new_copy) { + goto make_bottom; + } +#endif v = &ctx->ir_base[input]; } else if (input == i) { continue; @@ -197,58 +230,55 @@ next: continue; #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { - input = v->op1; - IR_ASSERT(input < 0 || _values[input].op != IR_COPY); - if (new_copy == input) { + ir_ref identity = ir_sccp_identity(ctx, _values, v->op1); + + if (identity == phi_identity || identity == new_copy_identity) { continue; - } else { - IR_MAKE_BOTTOM(i); - return 1; } - } else if (v->op == IR_BOTTOM) { - if (new_copy == input) { - continue; - } else { - IR_MAKE_BOTTOM(i); - return 1; - } -#else - } else if (v->op == IR_BOTTOM) { - IR_MAKE_BOTTOM(i); - return 1; + goto make_bottom; #endif + } else if (v->op == IR_BOTTOM) { +#if IR_COMBO_COPY_PROPAGATION + if (input == phi_identity || input == new_copy_identity) { + continue; + } +#endif + goto make_bottom; } } if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) { - IR_MAKE_BOTTOM(i); - return 1; + goto make_bottom; } } #if IR_COMBO_COPY_PROPAGATION if (new_copy) { - if (_values[i].op == IR_COPY && _values[i].op1 == new_copy) { - return 0; /* not changed */ + if (_values[i].op == IR_COPY) { + if (phi_identity == new_copy_identity) { + return 0; /* not changed */ + } else { + goto make_bottom; + } } else { - IR_ASSERT(_values[i].optx != IR_BOTTOM); + IR_ASSERT(_values[i].op != IR_BOTTOM); /* we don't check for widening */ - _values[i].optx = IR_OPT(IR_COPY, ctx->ir_base[new_copy].type); - _values[i].op1 = new_copy; + ir_sccp_add_identity(ctx, _values, new_copy, i, insn->type); return 1; } } #endif - if (_values[i].optx == IR_TOP) { + if (_values[i].op == IR_TOP) { _values[i].optx = new_const->opt; _values[i].val.u64 = new_const->val.u64; return 1; } else if (_values[i].opt == new_const->opt && _values[i].val.u64 == new_const->val.u64) { return 0; - } else { - IR_MAKE_BOTTOM(i); - return 1; } + +make_bottom: + IR_MAKE_BOTTOM(i); + return 1; } static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn) @@ -285,25 +315,6 @@ static bool ir_is_dead(ir_ctx *ctx, ir_ref ref) return 0; } -static ir_ref ir_find1(ir_ctx *ctx, uint32_t optx, ir_ref op1) -{ - IR_ASSERT(!IR_IS_CONST_REF(op1)); - - ir_use_list *use_list = &ctx->use_lists[op1]; - ir_ref *p, n = use_list->count; - - for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { - ir_ref use = *p; - ir_insn *use_insn = &ctx->ir_base[use]; - - if (use_insn->optx == optx) { - IR_ASSERT(use_insn->op1 == op1); - return use; - } - } - return IR_UNUSED; -} - static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a) { ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; @@ -321,6 +332,307 @@ static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) return v1->val.u64 == v2->val.u64; } +#ifdef IR_SCCP_TRACE +static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i) +{ + if (IR_IS_BOTTOM(i)) { + fprintf(stderr, "BOTTOM"); + } else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { + fprintf(stderr, "CONST("); + ir_print_const(ctx, &_values[i], stderr, true); + fprintf(stderr, ")"); +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[i].op == IR_COPY) { + fprintf(stderr, "COPY(%d)", _values[i].op1); +#endif + } else if (IR_IS_TOP(i)) { + fprintf(stderr, "TOP"); + } else if (_values[i].op == IR_IF) { + fprintf(stderr, "IF(%d)", _values[i].op1); + } else if (_values[i].op == IR_MERGE) { + fprintf(stderr, "MERGE(%d)", _values[i].op1); + } else { + fprintf(stderr, "%d", _values[i].op); + } +} + +static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i) +{ + fprintf(stderr, "%d. ", i); + ir_sccp_trace_val(ctx, _values, i); +} + +static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i) +{ + fprintf(stderr, " -> "); + ir_sccp_trace_val(ctx, _values, i); + fprintf(stderr, "\n"); +} +#else +# define ir_sccp_trace_start(c, v, i) +# define ir_sccp_trace_end(c, v, i) +#endif + +static void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) +{ + ir_ref i, j, n, *p, use; + ir_use_list *use_list; + ir_insn *insn, *use_insn; + uint32_t flags; + + /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ + worklist->pos = 0; + ir_bitset_incl(worklist->set, 1); + for (; (i = ir_bitqueue_pop(worklist)) >= 0; ir_sccp_trace_end(ctx, _values, i)) { + IR_ASSERT(_values[i].op != IR_BOTTOM); + ir_sccp_trace_start(ctx, _values, i); + insn = &ctx->ir_base[i]; + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_DATA) { + if (ctx->use_lists[i].count == 0) { + /* dead code */ + continue; + } else if (insn->op == IR_PHI) { + if (!ir_sccp_meet_phi(ctx, _values, i, insn, worklist)) { + continue; + } + } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { + bool may_benefit = 0; + bool has_top = 0; + + if ((ctx->flags2 & IR_MEM2SSA_VARS) || _values[i].op != IR_TOP) { + may_benefit = 1; + } + + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); + n = IR_INPUT_EDGES_COUNT(flags); + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + if (input > 0) { + if (_values[input].op == IR_TOP) { + has_top = 1; + /* do backward propagaton only once */ + if (!_values[input].op1) { + _values[input].op1 = 1; + ir_bitqueue_add(worklist, input); + } + } else if (_values[input].op != IR_BOTTOM) { + /* Perform folding only if some of direct inputs + * is going to be replaced by a constant or copy. + * This approach may miss some folding optimizations + * dependent on indirect inputs. e.g. reassociation. + */ + may_benefit = 1; + } + } + } + if (has_top) { + continue; + } + if (!may_benefit) { + IR_MAKE_BOTTOM(i); + if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC + || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { + ir_bitqueue_add(iter_worklist, i); + } + } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { + /* not changed */ + continue; + } else if (_values[i].op == IR_BOTTOM) { + insn = &ctx->ir_base[i]; + if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC + || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { + ir_bitqueue_add(iter_worklist, i); + } + } + } else { + IR_MAKE_BOTTOM(i); + } + } else if (flags & IR_OP_FLAG_BB_START) { + if (insn->op == IR_MERGE || insn->op == IR_BEGIN) { + ir_bitqueue_add(iter_worklist, i); + } + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + ir_ref unfeasible_inputs = 0; + + n = insn->inputs_count; + if (n > 3 && _values[i].op == IR_TOP) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + } + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + IR_ASSERT(input > 0); + if (!IR_IS_REACHABLE(input)) { + unfeasible_inputs++; + } + } + if (unfeasible_inputs == 0) { + IR_MAKE_BOTTOM(i); + } else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) { + _values[i].optx = IR_MERGE; + _values[i].op1 = unfeasible_inputs; + } else { + continue; + } + } else { + IR_ASSERT(insn->op == IR_START || IR_IS_REACHABLE(insn->op1)); + IR_MAKE_BOTTOM(i); + } + } else { + IR_ASSERT(insn->op1 > 0); + if (!IR_IS_REACHABLE(insn->op1)) { + /* control inpt is not feasible */ + continue; + } + if (insn->op == IR_IF) { + if (IR_IS_TOP(insn->op2)) { + /* do backward propagaton only once */ + if (!_values[insn->op2].op1) { + _values[insn->op2].op1 = 1; + ir_bitqueue_add(worklist, insn->op2); + } + continue; + } + if (IR_IS_CONST(insn->op2)) { + bool b = ir_sccp_is_true(ctx, _values, insn->op2); + use_list = &ctx->use_lists[i]; + IR_ASSERT(use_list->count == 2); + p = &ctx->use_edges[use_list->refs]; + use = *p; + use_insn = &ctx->ir_base[use]; + IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE); + if ((use_insn->op == IR_IF_TRUE) != b) { + use = *(p+1); + IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE); + } + if (_values[i].op == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use; + ir_bitqueue_add(worklist, use); + continue; + } else if (_values[i].op == IR_IF && _values[i].op1 == use) { + continue; + } + } + IR_MAKE_BOTTOM(i); + } else if (insn->op == IR_SWITCH) { + if (IR_IS_TOP(insn->op2)) { + /* do backward propagaton only once */ + if (!_values[insn->op2].op1) { + _values[insn->op2].op1 = 1; + ir_bitqueue_add(worklist, insn->op2); + } + continue; + } + if (IR_IS_CONST(insn->op2)) { + ir_ref use_case = IR_UNUSED; + + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + IR_ASSERT(use > 0); + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_CASE_VAL) { + if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) { + use_case = use; + break; + } + } else if (use_insn->op == IR_CASE_DEFAULT) { + use_case = use; + } + } + if (use_case) { + use_insn = &ctx->ir_base[use_case]; + if (_values[i].op == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use_case; + ir_bitqueue_add(worklist, use_case); + continue; + } else if (_values[i].op == IR_IF || _values[i].op1 == use_case) { + continue; + } + } + } + IR_MAKE_BOTTOM(i); + } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) { + /* schedule dead load elimination */ + ir_bitqueue_add(iter_worklist, i); + IR_MAKE_BOTTOM(i); + } else { + IR_MAKE_BOTTOM(i); + + /* control, call, load and store instructions may have unprocessed inputs */ + n = IR_INPUT_EDGES_COUNT(flags); + if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + for (j = 2, p = insn->ops + j; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + use = *p; + if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { + ir_bitqueue_add(worklist, use); + } + } + } else if (n >= 2) { + IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); + use = insn->op2; + if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { + ir_bitqueue_add(worklist, use); + } + if (n > 2) { + IR_ASSERT(n == 3); + IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); + use = insn->op3; + if (use > 0 && UNEXPECTED(_values[use].op == IR_TOP)) { + ir_bitqueue_add(worklist, use); + } + } + } + } + } + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + if (_values[use].op != IR_BOTTOM) { + ir_bitqueue_add(worklist, use); + } + } + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCCP) { + for (i = 1; i < ctx->insns_count; i++) { + if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { + fprintf(stderr, "%d. CONST(", i); + ir_print_const(ctx, &_values[i], stderr, true); + fprintf(stderr, ")\n"); +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[i].op == IR_COPY) { + fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1); +#endif + } else if (IR_IS_TOP(i)) { + fprintf(stderr, "%d. TOP\n", i); + } else if (_values[i].op == IR_IF) { + fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1); + } else if (_values[i].op == IR_MERGE) { + fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1); + } else if (!IR_IS_BOTTOM(i)) { + fprintf(stderr, "%d. %d\n", i, _values[i].op); + } + } + } +#endif +} + +/**********************/ +/* SCCP trasformation */ +/**********************/ + static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref) { ir_ref j, n, *p; @@ -358,31 +670,6 @@ static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bi } } -static void ir_sccp_remove_insn2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) -{ - ir_ref j, n, *p; - ir_insn *insn; - - CLEAR_USES(ref); - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + j; j <= n; j++, p++) { - ir_ref input = *p; - *p = IR_UNUSED; - if (input > 0) { - ir_use_list_remove_all(ctx, input, ref); - if (ir_is_dead(ctx, input)) { - /* schedule DCE */ - ir_bitqueue_add(worklist, input); - } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { - /* try to optimize PHI into ABS/MIN/MAX/COND */ - ir_bitqueue_add(worklist, ctx->ir_base[input].op1); - } - } - } -} - static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) { ir_ref j, n, *p, use, i; @@ -429,7 +716,7 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r for (j = 0; j < n; j++, p++) { use = *p; /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ - if (_values[use].optx == IR_BOTTOM) { + if (_values[use].op == IR_BOTTOM) { insn = &ctx->ir_base[use]; i = ir_insn_find_op(insn, ref); IR_ASSERT(i > 0); @@ -448,155 +735,6 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r CLEAR_USES(ref); } -static void ir_sccp_replace_insn2(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) -{ - ir_ref i, j, n, *p, use; - ir_insn *insn; - ir_use_list *use_list; - - IR_ASSERT(ref != new_ref); - - insn = &ctx->ir_base[ref]; - n = insn->inputs_count; - insn->opt = IR_NOP; /* keep "inputs_count" */ - for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { - ir_ref input = *p; - *p = IR_UNUSED; - if (input > 0) { - ir_use_list_remove_all(ctx, input, ref); - if (ir_is_dead(ctx, input)) { - /* schedule DCE */ - ir_bitqueue_add(worklist, input); - } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { - /* try to optimize PHI into ABS/MIN/MAX/COND */ - ir_bitqueue_add(worklist, input); - } - } - } - - use_list = &ctx->use_lists[ref]; - n = use_list->count; - p = &ctx->use_edges[use_list->refs]; - if (new_ref <= 0) { - /* constant or IR_UNUSED */ - for (; n; p++, n--) { - use = *p; - IR_ASSERT(use != ref); - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - /* schedule folding */ - ir_bitqueue_add(worklist, use); - } - } else { - for (j = 0; j < n; j++, p++) { - use = *p; - IR_ASSERT(use != ref); - insn = &ctx->ir_base[use]; - i = ir_insn_find_op(insn, ref); - IR_ASSERT(i > 0); - ir_insn_set_op(insn, i, new_ref); - if (ir_use_list_add(ctx, new_ref, use)) { - /* restore after reallocation */ - use_list = &ctx->use_lists[ref]; - n = use_list->count; - p = &ctx->use_edges[use_list->refs + j]; - } - /* schedule folding */ - ir_bitqueue_add(worklist, use); - } - } - CLEAR_USES(ref); -} - -static void ir_sccp_fold2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) -{ - uint32_t opt; - ir_ref op1, op2, op3; - ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; - - insn = &ctx->ir_base[ref]; - opt = insn->opt; - op1 = insn->op1; - op2 = insn->op2; - op3 = insn->op3; - -restart: - op1_insn = ctx->ir_base + op1; - op2_insn = ctx->ir_base + op2; - op3_insn = ctx->ir_base + op3; - - switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { - case IR_FOLD_DO_RESTART: - opt = ctx->fold_insn.optx; - op1 = ctx->fold_insn.op1; - op2 = ctx->fold_insn.op2; - op3 = ctx->fold_insn.op3; - goto restart; - case IR_FOLD_DO_EMIT: - insn = &ctx->ir_base[ref]; - if (insn->opt != ctx->fold_insn.opt - || insn->op1 != ctx->fold_insn.op1 - || insn->op2 != ctx->fold_insn.op2 - || insn->op3 != ctx->fold_insn.op3) { - - ir_use_list *use_list; - ir_ref n, j, *p, use; - - insn->optx = ctx->fold_insn.opt; - IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK])); - insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); - if (insn->op1 != ctx->fold_insn.op1) { - if (insn->op1 > 0) { - ir_use_list_remove_one(ctx, insn->op1, ref); - } - if (ctx->fold_insn.op1 > 0) { - ir_use_list_add(ctx, ctx->fold_insn.op1, ref); - } - } - if (insn->op2 != ctx->fold_insn.op2) { - if (insn->op2 > 0) { - ir_use_list_remove_one(ctx, insn->op2, ref); - } - if (ctx->fold_insn.op2 > 0) { - ir_use_list_add(ctx, ctx->fold_insn.op2, ref); - } - } - if (insn->op3 != ctx->fold_insn.op3) { - if (insn->op3 > 0) { - ir_use_list_remove_one(ctx, insn->op3, ref); - } - if (ctx->fold_insn.op3 > 0) { - ir_use_list_add(ctx, ctx->fold_insn.op3, ref); - } - } - insn->op1 = ctx->fold_insn.op1; - insn->op2 = ctx->fold_insn.op2; - insn->op3 = ctx->fold_insn.op3; - - use_list = &ctx->use_lists[ref]; - n = use_list->count; - for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { - use = *p; - ir_bitqueue_add(worklist, use); - } - } - break; - case IR_FOLD_DO_COPY: - op1 = ctx->fold_insn.op1; - ir_sccp_replace_insn2(ctx, ref, op1, worklist); - break; - case IR_FOLD_DO_CONST: - op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); - ir_sccp_replace_insn2(ctx, ref, op1, worklist); - break; - default: - IR_ASSERT(0); - break; - } -} - static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) { ir_ref next; @@ -634,44 +772,71 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values /* remove MERGE completely */ for (j = 1; j <= n; j++) { ir_ref input = ir_insn_op(insn, j); - if (input && IR_IS_FEASIBLE(input)) { + if (input && IR_IS_REACHABLE(input)) { ir_insn *input_insn = &ctx->ir_base[input]; IR_ASSERT(input_insn->op == IR_END || input_insn->op == IR_LOOP_END|| input_insn->op == IR_IJMP || input_insn->op == IR_UNREACHABLE); if (input_insn->op == IR_END || input_insn->op == IR_LOOP_END) { - if (input < ref) { - ir_ref prev, next = IR_UNUSED; - ir_insn *next_insn = NULL; + ir_ref prev, next = IR_UNUSED; + ir_insn *next_insn = NULL; - prev = input_insn->op1; - use_list = &ctx->use_lists[ref]; - if (use_list->count == 1) { - next = ctx->use_edges[use_list->refs]; - next_insn = &ctx->ir_base[next]; - } else { - for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { - use = *p; - use_insn = &ctx->ir_base[use]; - IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed"); - if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { - IR_ASSERT(!next); - next = use; - next_insn = use_insn; - } else if (use_insn->op != IR_NOP) { - IR_ASSERT(use_insn->op1 == ref); - IR_ASSERT(use_insn->op == IR_VAR); - ir_ref region = prev; - while (!IR_IS_BB_START(ctx->ir_base[region].op)) { - region = ctx->ir_base[region].op1; + prev = input_insn->op1; + use_list = &ctx->use_lists[ref]; + if (use_list->count == 1) { + next = ctx->use_edges[use_list->refs]; + next_insn = &ctx->ir_base[next]; + } else { + k = 0; + p = &ctx->use_edges[use_list->refs]; + while (k < use_list->count) { + use = *p; + use_insn = &ctx->ir_base[use]; +#if IR_COMBO_COPY_PROPAGATION + IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed"); +#else + if (use_insn->op == IR_PHI) { + /* Convert PHI into COPY */ + ir_ref i, n = use_insn->inputs_count; + + for (i = 2; i <= n; i++) { + if (i != j + 1) { + ir_ref from = ir_insn_op(use_insn, i); + if (from > 0) { + ir_use_list_remove_one(ctx, from, use); + } + ir_insn_set_op(use_insn, i, IR_UNUSED); } - use_insn->op1 = region; - ir_use_list_add(ctx, region, use); - p = &ctx->use_edges[use_list->refs + k]; } + use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1); + use_insn->op1 = ir_insn_op(use_insn, j + 1); + ir_insn_set_op(use_insn, j + 1, IR_UNUSED); + ir_use_list_remove_one(ctx, ref, use); + p = &ctx->use_edges[use_list->refs + k]; + continue; } +#endif + if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { + IR_ASSERT(!next); + next = use; + next_insn = use_insn; + } else if (use_insn->op != IR_NOP) { + IR_ASSERT(use_insn->op1 == ref); + IR_ASSERT(use_insn->op == IR_VAR); + ir_ref region = prev; + while (!IR_IS_BB_START(ctx->ir_base[region].op)) { + region = ctx->ir_base[region].op1; + } + use_insn->op1 = region; + ir_use_list_add(ctx, region, use); + p = &ctx->use_edges[use_list->refs + k]; + } + k++; + p++; } - IR_ASSERT(prev && next); + } + IR_ASSERT(prev && next); + if (prev < next) { /* remove MERGE and input END from double linked control list */ next_insn->op1 = prev; ir_use_list_replace_one(ctx, prev, input, next); @@ -752,6 +917,266 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values } } +static void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) +{ + ir_ref i, j; + ir_insn *value; + + for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) { + if (value->op == IR_BOTTOM) { + continue; + } else if (IR_IS_CONST_OP(value->op)) { + /* replace instruction by constant */ + j = ir_const(ctx, value->val, value->type); + ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist); + } else if (IR_IS_SYM_CONST(value->op)) { + /* replace instruction by constant */ + j = ir_const_ex(ctx, value->val, value->type, value->optx); + ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist); +#if IR_COMBO_COPY_PROPAGATION + } else if (value->op == IR_COPY) { + ir_sccp_replace_insn(ctx, _values, i, value->op1, iter_worklist); +#endif + } else if (value->op == IR_TOP) { + /* remove unreachable instruction */ + ir_insn *insn = &ctx->ir_base[i]; + + if (insn->op == IR_NOP) { + /* already removed */ + } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { + if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) { + ir_sccp_remove_insn(ctx, _values, i, iter_worklist); + } + } else { + if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { + /* remove from terminators list */ + ir_ref prev = ctx->ir_base[1].op1; + if (prev == i) { + ctx->ir_base[1].op1 = insn->op3; + } else { + while (prev) { + if (ctx->ir_base[prev].op3 == i) { + ctx->ir_base[prev].op3 = insn->op3; + break; + } + prev = ctx->ir_base[prev].op3; + } + } + } + ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, iter_worklist); + } + } else if (value->op == IR_IF) { + /* remove one way IF/SWITCH */ + ir_sccp_remove_if(ctx, _values, i, value->op1); + } else if (value->op == IR_MERGE) { + /* schedule merge to remove unfeasible MERGE inputs */ + ir_bitqueue_add(worklist, i); + } + } + + while ((i = ir_bitqueue_pop(worklist)) >= 0) { + IR_ASSERT(_values[i].op == IR_MERGE); + ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); + } +} + +/***************************/ +/* Iterative Optimizations */ +/***************************/ + +static ir_ref ir_find1(ir_ctx *ctx, uint32_t optx, ir_ref op1) +{ + IR_ASSERT(!IR_IS_CONST_REF(op1)); + + ir_use_list *use_list = &ctx->use_lists[op1]; + ir_ref *p, n = use_list->count; + + for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { + ir_ref use = *p; + ir_insn *use_insn = &ctx->ir_base[use]; + + if (use_insn->optx == optx) { + IR_ASSERT(use_insn->op1 == op1); + return use; + } + } + return IR_UNUSED; +} + +static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p; + ir_insn *insn; + + CLEAR_USES(ref); + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_use_list_remove_all(ctx, input, ref); + if (ir_is_dead(ctx, input)) { + /* schedule DCE */ + ir_bitqueue_add(worklist, input); + } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { + /* try to optimize PHI into ABS/MIN/MAX/COND */ + ir_bitqueue_add(worklist, ctx->ir_base[input].op1); + } + } + } +} + +static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) +{ + ir_ref i, j, n, *p, use; + ir_insn *insn; + ir_use_list *use_list; + + IR_ASSERT(ref != new_ref); + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_use_list_remove_all(ctx, input, ref); + if (ir_is_dead(ctx, input)) { + /* schedule DCE */ + ir_bitqueue_add(worklist, input); + } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { + /* try to optimize PHI into ABS/MIN/MAX/COND */ + ir_bitqueue_add(worklist, input); + } + } + } + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs]; + if (new_ref <= 0) { + /* constant or IR_UNUSED */ + for (; n; p++, n--) { + use = *p; + IR_ASSERT(use != ref); + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + /* schedule folding */ + ir_bitqueue_add(worklist, use); + } + } else { + for (j = 0; j < n; j++, p++) { + use = *p; + IR_ASSERT(use != ref); + insn = &ctx->ir_base[use]; + i = ir_insn_find_op(insn, ref); + IR_ASSERT(i > 0); + ir_insn_set_op(insn, i, new_ref); + if (ir_use_list_add(ctx, new_ref, use)) { + /* restore after reallocation */ + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs + j]; + } + /* schedule folding */ + ir_bitqueue_add(worklist, use); + } + } + CLEAR_USES(ref); +} + +static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) +{ + uint32_t opt; + ir_ref op1, op2, op3; + ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; + + insn = &ctx->ir_base[ref]; + opt = insn->opt; + op1 = insn->op1; + op2 = insn->op2; + op3 = insn->op3; + +restart: + op1_insn = ctx->ir_base + op1; + op2_insn = ctx->ir_base + op2; + op3_insn = ctx->ir_base + op3; + + switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { + case IR_FOLD_DO_RESTART: + opt = ctx->fold_insn.optx; + op1 = ctx->fold_insn.op1; + op2 = ctx->fold_insn.op2; + op3 = ctx->fold_insn.op3; + goto restart; + case IR_FOLD_DO_EMIT: + insn = &ctx->ir_base[ref]; + if (insn->opt != ctx->fold_insn.opt + || insn->op1 != ctx->fold_insn.op1 + || insn->op2 != ctx->fold_insn.op2 + || insn->op3 != ctx->fold_insn.op3) { + + ir_use_list *use_list; + ir_ref n, j, *p, use; + + insn->optx = ctx->fold_insn.opt; + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK])); + insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); + if (insn->op1 != ctx->fold_insn.op1) { + if (insn->op1 > 0) { + ir_use_list_remove_one(ctx, insn->op1, ref); + } + if (ctx->fold_insn.op1 > 0) { + ir_use_list_add(ctx, ctx->fold_insn.op1, ref); + } + } + if (insn->op2 != ctx->fold_insn.op2) { + if (insn->op2 > 0) { + ir_use_list_remove_one(ctx, insn->op2, ref); + } + if (ctx->fold_insn.op2 > 0) { + ir_use_list_add(ctx, ctx->fold_insn.op2, ref); + } + } + if (insn->op3 != ctx->fold_insn.op3) { + if (insn->op3 > 0) { + ir_use_list_remove_one(ctx, insn->op3, ref); + } + if (ctx->fold_insn.op3 > 0) { + ir_use_list_add(ctx, ctx->fold_insn.op3, ref); + } + } + insn->op1 = ctx->fold_insn.op1; + insn->op2 = ctx->fold_insn.op2; + insn->op3 = ctx->fold_insn.op3; + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + ir_bitqueue_add(worklist, use); + } + } + break; + case IR_FOLD_DO_COPY: + op1 = ctx->fold_insn.op1; + ir_iter_replace_insn(ctx, ref, op1, worklist); + break; + case IR_FOLD_DO_CONST: + op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); + ir_iter_replace_insn(ctx, ref, op1, worklist); + break; + default: + IR_ASSERT(0); + break; + } +} + static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref) { ir_insn *insn = &ctx->ir_base[ref]; @@ -1201,7 +1626,7 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi } } - ir_sccp_replace_insn2(ctx, ext_ref, ref, worklist); + ir_iter_replace_insn(ctx, ext_ref, ref, worklist); phi_insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(phi_insn->op2) @@ -2213,343 +2638,19 @@ static void ir_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ } } -int ir_sccp(ir_ctx *ctx) +static void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) { - ir_ref i, j, n, *p, use; - ir_use_list *use_list; - ir_insn *insn, *use_insn, *value; - uint32_t flags; - ir_bitqueue worklist, worklist2; - ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); + ir_ref i; + ir_insn *insn; - ctx->flags2 |= IR_OPT_IN_SCCP; - - /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ - ir_bitqueue_init(&worklist2, ctx->insns_count); - ir_bitqueue_init(&worklist, ctx->insns_count); - worklist.pos = 0; - ir_bitset_incl(worklist.set, 1); - while ((i = ir_bitqueue_pop(&worklist)) >= 0) { - insn = &ctx->ir_base[i]; - flags = ir_op_flags[insn->op]; - if (flags & IR_OP_FLAG_DATA) { - if (ctx->use_lists[i].count == 0) { - /* dead code */ - continue; - } else if (insn->op == IR_PHI) { - if (!ir_sccp_meet_phi(ctx, _values, i, insn, &worklist)) { - continue; - } - } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { - bool may_benefit = 0; - bool has_top = 0; - - IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); - n = IR_INPUT_EDGES_COUNT(flags); - for (p = insn->ops + 1; n > 0; p++, n--) { - ir_ref input = *p; - if (input > 0) { - if (_values[input].optx == IR_TOP) { - has_top = 1; - /* do backward propagaton only once */ - if (!_values[input].op1) { - _values[input].op1 = 1; - ir_bitqueue_add(&worklist, input); - } - } else if (_values[input].optx != IR_BOTTOM) { - /* Perform folding only if some of direct inputs - * is going to be replaced by a constant or copy. - * This approach may miss some folding optimizations - * dependent on indirect inputs. e.g. reassociation. - */ - may_benefit = 1; - } - } - } - if (has_top) { - continue; - } - if (!may_benefit) { - IR_MAKE_BOTTOM(i); - if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC - || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { - ir_bitqueue_add(&worklist2, i); - } - } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { - /* not changed */ - continue; - } else if (_values[i].optx == IR_BOTTOM) { - insn = &ctx->ir_base[i]; - if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC - || insn->op == IR_ZEXT || insn->op == IR_SEXT || insn->op == IR_EQ || insn->op == IR_NE) { - ir_bitqueue_add(&worklist2, i); - } - } - } else { - IR_MAKE_BOTTOM(i); - } - } else if (flags & IR_OP_FLAG_BB_START) { - if (insn->op == IR_MERGE || insn->op == IR_BEGIN) { - ir_bitqueue_add(&worklist2, i); - } - if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { - ir_ref unfeasible_inputs = 0; - - n = insn->inputs_count; - if (n > 3 && _values[i].optx == IR_TOP) { - for (j = 0; j < (n>>2); j++) { - _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ - } - } - for (p = insn->ops + 1; n > 0; p++, n--) { - ir_ref input = *p; - IR_ASSERT(input > 0); - if (_values[input].optx == IR_TOP) { - unfeasible_inputs++; - } - } - if (unfeasible_inputs == 0) { - IR_MAKE_BOTTOM(i); - } else if (_values[i].op1 != unfeasible_inputs) { - _values[i].optx = IR_MERGE; - _values[i].op1 = unfeasible_inputs; - } else { - continue; - } - } else { - IR_ASSERT(insn->op == IR_START || IR_IS_FEASIBLE(insn->op1)); - IR_MAKE_BOTTOM(i); - } - } else { - IR_ASSERT(insn->op1 > 0); - if (_values[insn->op1].optx == IR_TOP) { - /* control inpt is not feasible */ - continue; - } - if (insn->op == IR_IF) { - if (IR_IS_TOP(insn->op2)) { - /* do backward propagaton only once */ - if (!_values[insn->op2].op1) { - _values[insn->op2].op1 = 1; - ir_bitqueue_add(&worklist, insn->op2); - } - continue; - } - if (!IR_IS_BOTTOM(insn->op2) -#if IR_COMBO_COPY_PROPAGATION - && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY) -#endif - ) { - bool b = ir_sccp_is_true(ctx, _values, insn->op2); - use_list = &ctx->use_lists[i]; - IR_ASSERT(use_list->count == 2); - p = &ctx->use_edges[use_list->refs]; - use = *p; - use_insn = &ctx->ir_base[use]; - IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE); - if ((use_insn->op == IR_IF_TRUE) != b) { - use = *(p+1); - IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE); - } - if (_values[i].optx == IR_TOP) { - _values[i].optx = IR_IF; - _values[i].op1 = use; - } else if (_values[i].optx != IR_IF || _values[i].op1 != use) { - IR_MAKE_BOTTOM(i); - } - if (!IR_IS_BOTTOM(use)) { - ir_bitqueue_add(&worklist, use); - } - continue; - } - IR_MAKE_BOTTOM(i); - } else if (insn->op == IR_SWITCH) { - if (IR_IS_TOP(insn->op2)) { - /* do backward propagaton only once */ - if (!_values[insn->op2].op1) { - _values[insn->op2].op1 = 1; - ir_bitqueue_add(&worklist, insn->op2); - } - continue; - } - if (!IR_IS_BOTTOM(insn->op2) -#if IR_COMBO_COPY_PROPAGATION - && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY) -#endif - ) { - ir_ref use_case = IR_UNUSED; - - use_list = &ctx->use_lists[i]; - n = use_list->count; - for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { - use = *p; - IR_ASSERT(use > 0); - use_insn = &ctx->ir_base[use]; - if (use_insn->op == IR_CASE_VAL) { - if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) { - use_case = use; - break; - } - } else if (use_insn->op == IR_CASE_DEFAULT) { - use_case = use; - } - } - if (use_case) { - use_insn = &ctx->ir_base[use_case]; - if (_values[i].optx == IR_TOP) { - _values[i].optx = IR_IF; - _values[i].op1 = use_case; - } else if (_values[i].optx != IR_IF || _values[i].op1 != use_case) { - IR_MAKE_BOTTOM(i); - } - if (!IR_IS_BOTTOM(use_case)) { - ir_bitqueue_add(&worklist, use_case); - } - } - if (!IR_IS_BOTTOM(i)) { - continue; - } - } - IR_MAKE_BOTTOM(i); - } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) { - /* dead load */ - _values[i].optx = IR_LOAD; - } else { - IR_MAKE_BOTTOM(i); - - /* control, call, load and store instructions may have unprocessed inputs */ - n = IR_INPUT_EDGES_COUNT(flags); - if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { - for (j = 0; j < (n>>2); j++) { - _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ - } - for (j = 2, p = insn->ops + j; j <= n; j++, p++) { - IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); - use = *p; - if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { - ir_bitqueue_add(&worklist, use); - } - } - } else if (n >= 2) { - IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); - use = insn->op2; - if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { - ir_bitqueue_add(&worklist, use); - } - if (n > 2) { - IR_ASSERT(n == 3); - IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); - use = insn->op3; - if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { - ir_bitqueue_add(&worklist, use); - } - } - } - } - } - use_list = &ctx->use_lists[i]; - n = use_list->count; - for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { - use = *p; - if (_values[use].optx != IR_BOTTOM) { - ir_bitqueue_add(&worklist, use); - } - } - } - -#ifdef IR_DEBUG - if (ctx->flags & IR_DEBUG_SCCP) { - for (i = 1; i < ctx->insns_count; i++) { - if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { - fprintf(stderr, "%d. CONST(", i); - ir_print_const(ctx, &_values[i], stderr, true); - fprintf(stderr, ")\n"); -#if IR_COMBO_COPY_PROPAGATION - } else if (_values[i].op == IR_COPY) { - fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1); -#endif - } else if (IR_IS_TOP(i)) { - fprintf(stderr, "%d. TOP\n", i); - } else if (_values[i].op == IR_IF) { - fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1); - } else if (_values[i].op == IR_MERGE) { - fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1); - } else if (!IR_IS_BOTTOM(i)) { - fprintf(stderr, "%d. %d\n", i, _values[i].op); - } - } - } -#endif - - for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) { - if (value->op == IR_BOTTOM) { - continue; - } else if (IR_IS_CONST_OP(value->op)) { - /* replace instruction by constant */ - j = ir_const(ctx, value->val, value->type); - ir_sccp_replace_insn(ctx, _values, i, j, &worklist2); - } else if (IR_IS_SYM_CONST(value->op)) { - /* replace instruction by constant */ - j = ir_const_ex(ctx, value->val, value->type, value->optx); - ir_sccp_replace_insn(ctx, _values, i, j, &worklist2); -#if IR_COMBO_COPY_PROPAGATION - } else if (value->op == IR_COPY) { - ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist2); -#endif - } else if (value->op == IR_TOP) { - /* remove unreachable instruction */ - insn = &ctx->ir_base[i]; - if (insn->op == IR_NOP) { - /* already removed */ - } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { - if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) { - ir_sccp_remove_insn(ctx, _values, i, &worklist2); - } - } else { - if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { - /* remove from terminators list */ - ir_ref prev = ctx->ir_base[1].op1; - if (prev == i) { - ctx->ir_base[1].op1 = insn->op3; - } else { - while (prev) { - if (ctx->ir_base[prev].op3 == i) { - ctx->ir_base[prev].op3 = insn->op3; - break; - } - prev = ctx->ir_base[prev].op3; - } - } - } - ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, &worklist2); - } - } else if (value->op == IR_IF) { - /* remove one way IF/SWITCH */ - ir_sccp_remove_if(ctx, _values, i, value->op1); - } else if (value->op == IR_MERGE) { - /* schedule merge to remove unfeasible MERGE inputs */ - ir_bitqueue_add(&worklist, i); - } else if (value->op == IR_LOAD) { - /* schedule dead load elimination */ - ir_bitqueue_add(&worklist2, i); - } - } - - while ((i = ir_bitqueue_pop(&worklist)) >= 0) { - IR_ASSERT(_values[i].op == IR_MERGE); - ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); - } - - ctx->flags2 |= IR_CFG_REACHABLE; - - while ((i = ir_bitqueue_pop(&worklist2)) >= 0) { + while ((i = ir_bitqueue_pop(worklist)) >= 0) { insn = &ctx->ir_base[i]; if (IR_IS_FOLDABLE_OP(insn->op)) { if (ctx->use_lists[i].count == 0) { if (insn->op == IR_PHI) { - ir_bitqueue_add(&worklist2, insn->op1); + ir_bitqueue_add(worklist, insn->op1); } - ir_sccp_remove_insn2(ctx, i, &worklist2); + ir_iter_remove_insn(ctx, i, worklist); } else { insn = &ctx->ir_base[i]; switch (insn->op) { @@ -2558,14 +2659,14 @@ int ir_sccp(ir_ctx *ctx) if (ir_may_promote_d2f(ctx, insn->op1)) { ir_ref ref = ir_promote_d2f(ctx, insn->op1, i); insn->op1 = ref; - ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + ir_iter_replace_insn(ctx, i, ref, worklist); break; } } else { if (ir_may_promote_f2d(ctx, insn->op1)) { ir_ref ref = ir_promote_f2d(ctx, insn->op1, i); insn->op1 = ref; - ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + ir_iter_replace_insn(ctx, i, ref, worklist); break; } } @@ -2585,13 +2686,13 @@ int ir_sccp(ir_ctx *ctx) if (ir_may_promote_i2i(ctx, insn->type, insn->op1)) { ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i); insn->op1 = ref; - ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + ir_iter_replace_insn(ctx, i, ref, worklist); break; } goto folding; case IR_SEXT: case IR_ZEXT: - if (ir_try_promote_ext(ctx, i, insn, &worklist2)) { + if (ir_try_promote_ext(ctx, i, insn, worklist)) { break; } goto folding; @@ -2599,7 +2700,7 @@ int ir_sccp(ir_ctx *ctx) break; default: folding: - ir_sccp_fold2(ctx, i, &worklist2); + ir_iter_fold(ctx, i, worklist); break; } } @@ -2609,10 +2710,10 @@ folding: } else if (insn->op == IR_BEGIN) { if (ctx->ir_base[insn->op1].op == IR_END && ctx->use_lists[i].count == 1) { - ir_merge_blocks(ctx, insn->op1, i, &worklist2); + ir_merge_blocks(ctx, insn->op1, i, worklist); } } else if (insn->op == IR_MERGE) { - ir_optimize_merge(ctx, i, insn, &worklist2); + ir_optimize_merge(ctx, i, insn, worklist); } } else if (ir_is_dead_load(ctx, i)) { ir_ref next = ctx->use_edges[ctx->use_lists[i].refs]; @@ -2621,16 +2722,34 @@ folding: ctx->ir_base[next].op1 = insn->op1; ir_use_list_replace_one(ctx, insn->op1, i, next); insn->op1 = IR_UNUSED; - ir_sccp_remove_insn2(ctx, i, &worklist2); + ir_iter_remove_insn(ctx, i, worklist); } } +} + +int ir_sccp(ir_ctx *ctx) +{ + ir_bitqueue sccp_worklist, iter_worklist; + ir_insn *_values; + + ctx->flags2 |= IR_OPT_IN_SCCP; + ir_bitqueue_init(&iter_worklist, ctx->insns_count); + ir_bitqueue_init(&sccp_worklist, ctx->insns_count); + _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); + + ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist); + ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist); ir_mem_free(_values); - ir_bitqueue_free(&worklist); - ir_bitqueue_free(&worklist2); + ir_bitqueue_free(&sccp_worklist); + + ctx->flags2 |= IR_CFG_REACHABLE; + + ir_iter_opt(ctx, &iter_worklist); + + ir_bitqueue_free(&iter_worklist); ctx->flags2 &= ~IR_OPT_IN_SCCP; - ctx->flags2 |= IR_SCCP_DONE; return 1; } diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 58c6ed40f7d..dce15b5be3b 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -3090,9 +3090,16 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); + ir_insn *var_insn; + int32_t offset; IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); + var_insn = &ctx->ir_base[src]; + if (var_insn->op == IR_VADDR) { + var_insn = &ctx->ir_base[var_insn->op1]; + } + IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA); + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); if (offset == 0) { | mov Ra(reg), Ra(base) } else { @@ -7569,7 +7576,11 @@ static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) mem = ir_var_spill_slot(ctx, insn->op1); fp = IR_MEM_BASE(mem); offset = IR_MEM_OFFSET(mem); - | lea Ra(def_reg), aword [Ra(fp)+offset] + if (offset == 0) { + | mov Ra(def_reg), Ra(fp) + } else { + | lea Ra(def_reg), aword [Ra(fp)+offset] + } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } @@ -10237,10 +10248,15 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); ir_ref *ops = insn->ops; IR_REGSET_EXCL(available, reg); - if (constraints.tmp_regs[n].num > 0 - && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { - /* rematerialization */ - reg |= IR_REG_SPILL_LOAD; + if (constraints.tmp_regs[n].num > 0) { + if (IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { + /* rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } else if (ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_ALLOCA || + ctx->ir_base[ops[constraints.tmp_regs[n].num]].op == IR_VADDR) { + /* local address rematerialization */ + reg |= IR_REG_SPILL_LOAD; + } } ctx->regs[i][constraints.tmp_regs[n].num] = reg; } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) {