From 793ddc7c8aa6ff47b8f2da70d961ed77755c67e9 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Wed, 21 Feb 2024 23:36:52 +0300 Subject: [PATCH] Update IR IR commit: 0b557c0e4578cbfdbf8017f4adac335d795156dc --- ext/opcache/jit/ir/ir_aarch64.dasc | 27 ++++-- ext/opcache/jit/ir/ir_cfg.c | 99 +++++++++++++++++-- ext/opcache/jit/ir/ir_fold.h | 20 ++++ ext/opcache/jit/ir/ir_private.h | 1 + ext/opcache/jit/ir/ir_ra.c | 70 +++++++++++--- ext/opcache/jit/ir/ir_sccp.c | 150 +++++++++-------------------- ext/opcache/jit/ir/ir_x86.dasc | 32 +++--- 7 files changed, 248 insertions(+), 151 deletions(-) diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 2c82712a9b2..def338dd338 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -700,7 +700,7 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.i64 == 0) { - return IR_COPY_INT; + // return IR_COPY_INT; } } binop_int: @@ -721,7 +721,7 @@ binop_fp: } else if (op2_insn->val.u64 == 0) { // 0 } else if (op2_insn->val.u64 == 1) { - return IR_COPY_INT; + // return IR_COPY_INT; } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { return IR_MUL_PWR2; } @@ -747,7 +747,7 @@ binop_fp: } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 1) { - return IR_COPY_INT; + // return IR_COPY_INT; } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { if (IR_IS_TYPE_UNSIGNED(insn->type)) { return IR_DIV_PWR2; @@ -798,7 +798,7 @@ binop_fp: } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.i64 == 0) { - return IR_COPY_INT; + // return IR_COPY_INT; } else if (op2_insn->val.i64 == -1) { // -1 } @@ -814,7 +814,7 @@ binop_fp: } else if (op2_insn->val.i64 == 0) { // 0 } else if (op2_insn->val.i64 == -1) { - return IR_COPY_INT; + // return IR_COPY_INT; } } goto binop_int; @@ -837,7 +837,7 @@ binop_fp: } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { - return IR_COPY_INT; + // return IR_COPY_INT; } else if (ir_type_size[insn->type] >= 4) { if (op2_insn->val.u64 == 1) { // lea [op1*2] @@ -863,7 +863,7 @@ binop_fp: } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { - return IR_COPY_INT; + // return IR_COPY_INT; } } return IR_SHIFT_CONST; @@ -880,11 +880,20 @@ binop_fp: // case IR_COND: case IR_COPY: if (IR_IS_TYPE_INT(insn->type)) { - return IR_COPY_INT; + return IR_COPY_INT | IR_MAY_REUSE; } else { - return IR_COPY_FP; + return IR_COPY_FP | IR_MAY_REUSE; } break; + case IR_TRUNC: + case IR_PROTO: + return insn->op | IR_MAY_REUSE; + case IR_BITCAST: + if (IR_IS_TYPE_INT(insn->type) && IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { + return insn->op | IR_MAY_REUSE; + } else { + return insn->op; + } case IR_CALL: ctx->flags2 |= IR_HAS_CALLS; return IR_CALL; diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index 3fa06b03e55..651c811bc43 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -210,6 +210,13 @@ static ir_ref ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn } } +static bool ir_is_zero(ir_ctx *ctx, ir_ref ref) +{ + return IR_IS_CONST_REF(ref) + && !IR_IS_SYM_CONST(ctx->ir_base[ref].op) + && ctx->ir_base[ref].val.u32 == 0; +} + static ir_ref ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn) { IR_ASSERT(insn->inputs_count == 3); @@ -237,11 +244,15 @@ static ir_ref ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ if (IR_IS_TYPE_FP(type)) { is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE || cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE); + is_less = (cond->op == IR_LT || cond->op == IR_LE || + cond->op == IR_ULT || cond->op == IR_ULE); } else if (IR_IS_TYPE_SIGNED(type)) { is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE); + is_less = (cond->op == IR_LT || cond->op == IR_LE); } else { IR_ASSERT(IR_IS_TYPE_UNSIGNED(type)); is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE); + is_less = (cond->op == IR_ULT || cond->op == IR_ULE); } if (is_cmp @@ -277,15 +288,6 @@ static ir_ref ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ IR_ASSERT(ctx->use_lists[start1_ref].count == 1); IR_ASSERT(ctx->use_lists[start2_ref].count == 1); - if (IR_IS_TYPE_FP(type)) { - is_less = (cond->op == IR_LT || cond->op == IR_LE || - cond->op == IR_ULT || cond->op == IR_ULE); - } else if (IR_IS_TYPE_SIGNED(type)) { - is_less = (cond->op == IR_LT || cond->op == IR_LE); - } else { - IR_ASSERT(IR_IS_TYPE_UNSIGNED(type)); - is_less = (cond->op == IR_ULT || cond->op == IR_ULE); - } insn->op = ( (is_less ? cond->op1 : cond->op2) == @@ -318,6 +320,85 @@ static ir_ref ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ MAKE_NOP(end2); CLEAR_USES(end2_ref); MAKE_NOP(merge); CLEAR_USES(merge_ref); + return next_ref; + } else if (is_cmp + && ((ctx->ir_base[insn->op2].op == IR_NEG + && ctx->use_lists[insn->op2].count == 1 + && ctx->ir_base[insn->op2].op1 == insn->op3 + && ((cond->op1 == insn->op3 + && ir_is_zero(ctx, cond->op2) + && is_less == (start1->op == IR_IF_TRUE)) + || (cond->op2 == insn->op3 + && ir_is_zero(ctx, cond->op1) + && is_less != (start1->op == IR_IF_TRUE)))) + || (ctx->ir_base[insn->op3].op == IR_NEG + && ctx->use_lists[insn->op3].count == 1 + && ctx->ir_base[insn->op3].op1 == insn->op2 + && ((cond->op1 == insn->op2 + && ir_is_zero(ctx, cond->op2) + && is_less != (start1->op == IR_IF_TRUE)) + || (cond->op2 == insn->op2 + && ir_is_zero(ctx, cond->op1) + && is_less == (start1->op == IR_IF_TRUE)))))) { + /* ABS + * + * prev prev + * | LT(A, 0) | + * | / | + * IF | + * | \ | + * | +-----+ | + * | IF_FALSE | + * IF_TRUE | => | + * | END | + * END / | + * | +---+ | + * | / | + * MERGE | + * | \ | + * | PHI(A, NEG(A)) | ABS(A) + * next next + */ + ir_ref neg_ref; + ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs]; + ir_insn *next; + + if (next_ref == ref) { + next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1]; + } + next = &ctx->ir_base[next_ref]; + + IR_ASSERT(ctx->use_lists[start1_ref].count == 1); + IR_ASSERT(ctx->use_lists[start2_ref].count == 1); + + insn->op = IR_ABS; + insn->inputs_count = 1; + if (ctx->ir_base[insn->op2].op == IR_NEG) { + neg_ref = insn->op2; + insn->op1 = insn->op3; + } else { + neg_ref = insn->op3; + insn->op1 = insn->op2; + } + insn->op2 = IR_UNUSED; + insn->op3 = IR_UNUSED; + + next->op1 = root->op1; + ir_use_list_replace(ctx, root->op1, root_ref, next_ref); + ir_use_list_remove_all(ctx, root->op2, root_ref); + if (!IR_IS_CONST_REF(insn->op1)) { + ir_use_list_remove_all(ctx, insn->op1, cond_ref); + } + + MAKE_NOP(cond); CLEAR_USES(cond_ref); + MAKE_NOP(root); CLEAR_USES(root_ref); + MAKE_NOP(start1); CLEAR_USES(start1_ref); + MAKE_NOP(start2); CLEAR_USES(start2_ref); + MAKE_NOP(end1); CLEAR_USES(end1_ref); + MAKE_NOP(end2); CLEAR_USES(end2_ref); + MAKE_NOP(merge); CLEAR_USES(merge_ref); + MAKE_NOP(&ctx->ir_base[neg_ref]); CLEAR_USES(neg_ref); + return next_ref; #if 0 } else { diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index f2ab22cb74a..ae3af72f09f 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -2257,6 +2257,26 @@ IR_FOLD(TRUNC(SEXT)) IR_FOLD_NEXT; } +IR_FOLD(TRUNC(BITCAST)) +IR_FOLD(ZEXT(BITCAST)) +IR_FOLD(SEXT(BITCAST)) +IR_FOLD(BITCAST(BITCAST)) +{ + if (IR_IS_TYPE_INT(op1_insn->type)) { + op1 = op1_insn->op1; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +//IR_FOLD(TRUNC(TRUNC)) +IR_FOLD(ZEXT(ZEXT)) +IR_FOLD(SEXT(SEXT)) +{ + op1 = op1_insn->op1; + IR_FOLD_RESTART; +} + IR_FOLD(TRUNC(AND)) { if (IR_IS_CONST_REF(op1_insn->op2)) { diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index a0001aed30e..f3b6a8d839f 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -1219,6 +1219,7 @@ IR_ALWAYS_INLINE int8_t ir_get_alocated_reg(const ir_ctx *ctx, ir_ref ref, int o #define IR_SIMPLE (1U<<29) /* Insn doesn't have any target constraints */ #define IR_FUSED_REG (1U<<28) /* Register assignemnt may be stored in ctx->fused_regs instead of ctx->regs */ #define IR_MAY_SWAP (1U<<27) /* Allow swapping operands for better register allocation */ +#define IR_MAY_REUSE (1U<<26) /* Result may reuse register of the source */ #define IR_RULE_MASK 0xff diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index d8d94316a63..4860dae0ca0 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -1535,6 +1535,33 @@ static ir_live_pos ir_vregs_overlap(ir_ctx *ctx, uint32_t r1, uint32_t r2) return ir_ivals_overlap(&ival1->range, &ival2->range); } +static bool ir_ivals_inside(ir_live_range *parent, ir_live_range *child) +{ + do { + while (parent && parent->end < child->start) { + parent = parent->next; + } + if (!parent || parent->start > child->start || parent->end < child->end) { + return 0; + } + child = child->next; + } while (child); + return 1; +} + +static bool ir_vregs_inside(ir_ctx *ctx, uint32_t parent, uint32_t child) +{ + ir_live_interval *child_ival = ctx->live_intervals[child]; + ir_live_interval *parent_ival = ctx->live_intervals[parent]; + +#if 0 + if (child_ival->end >= parent_ival->end) { + return 0; + } +#endif + return ir_ivals_inside(&parent_ival->range, &child_ival->range); +} + static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2) { ir_live_interval *ival = ctx->live_intervals[r2]; @@ -1922,29 +1949,33 @@ int ir_coalesce(ir_ctx *ctx) if (ctx->rules) { /* try to swap operands of commutative instructions for better register allocation */ - for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { - ir_ref i; + uint32_t *rule = ctx->rules + 1; + ir_ref i; - IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); - i = bb->end; - - /* skip last instruction */ - i = ctx->prev_ref[i]; - - while (i != bb->start) { - if (ctx->rules[i] & IR_MAY_SWAP) { - insn = &ctx->ir_base[i]; + for (i = 1; i < ctx->insns_count; rule++, i++) { + if ((*rule) & (IR_MAY_SWAP|IR_MAY_REUSE)) { + insn = &ctx->ir_base[i]; + IR_ASSERT(ctx->vregs[i]); + if ((*rule) & IR_MAY_SWAP) { IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE); - if (ctx->vregs[i] - && ctx->live_intervals[ctx->vregs[i]]->use_pos + if (ctx->live_intervals[ctx->vregs[i]]->use_pos && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG) && insn->op2 > 0 && insn->op1 > 0 && insn->op1 != insn->op2) { ir_try_swap_operands(ctx, i, insn); } + } else { + IR_ASSERT((*rule) & IR_MAY_REUSE); + if (insn->op1 > 0 + && ctx->vregs[insn->op1] + && ctx->vregs[i] != ctx->vregs[insn->op1]) { + if (ir_vregs_inside(ctx, ctx->vregs[insn->op1], ctx->vregs[i])) { + ir_vregs_coalesce(ctx, ctx->vregs[i], ctx->vregs[insn->op1], i, insn->op1); + compact = 1; + } + } } - i = ctx->prev_ref[i]; } } } @@ -3807,6 +3838,17 @@ static void assign_regs(ir_ctx *ctx) ref = IR_LIVE_POS_TO_REF(use_pos->pos); // TODO: Insert spill loads and stores in optimal positions (resolution) if (use_pos->op_num == 0) { + if ((ctx->ir_base[ref].op == IR_COPY + || ctx->ir_base[ref].op == IR_BITCAST + || ctx->ir_base[ref].op == IR_TRUNC) + && !IR_IS_CONST_REF(ctx->ir_base[ref].op1) + && ctx->vregs[ctx->ir_base[ref].op1] == (uint32_t)i) { + /* register reuse */ + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + prev_use_ref = ref; + use_pos = use_pos->next; + continue; + } ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1)); if (ctx->ir_base[ref].op == IR_PHI) { /* Spilled PHI var is passed through memory */ diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 85bdcb2351c..0a25476e762 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -688,26 +688,7 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values } } -static void ir_replace_inputs(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_ref new_input) -{ - ir_use_list *use_list = &ctx->use_lists[ref]; - ir_ref n = use_list->count; - ir_ref *p = &ctx->use_edges[use_list->refs]; - - for (; n; p++, n--) { - ir_ref use = *p; - ir_insn *insn = &ctx->ir_base[use]; - ir_ref k, l = insn->inputs_count; - - for (k = 1; k <= l; k++) { - if (ir_insn_op(insn, k) == input) { - ir_insn_set_op(insn, k, new_input); - } - } - } -} - -static bool ir_may_promote_d2f_op(ir_ctx *ctx, ir_ref ref) +static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref) { ir_insn *insn = &ctx->ir_base[ref]; @@ -723,7 +704,7 @@ static bool ir_may_promote_d2f_op(ir_ctx *ctx, ir_ref ref) case IR_NEG: case IR_ABS: return ctx->use_lists[ref].count == 1 && - ir_may_promote_d2f_op(ctx, insn->op1); + ir_may_promote_d2f(ctx, insn->op1); case IR_ADD: case IR_SUB: case IR_MUL: @@ -731,8 +712,8 @@ static bool ir_may_promote_d2f_op(ir_ctx *ctx, ir_ref ref) case IR_MIN: case IR_MAX: return ctx->use_lists[ref].count == 1 && - ir_may_promote_d2f_op(ctx, insn->op1) && - ir_may_promote_d2f_op(ctx, insn->op2); + ir_may_promote_d2f(ctx, insn->op1) && + ir_may_promote_d2f(ctx, insn->op2); default: break; } @@ -740,7 +721,7 @@ static bool ir_may_promote_d2f_op(ir_ctx *ctx, ir_ref ref) return 0; } -static bool ir_may_promote_f2d_op(ir_ctx *ctx, ir_ref ref) +static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref) { ir_insn *insn = &ctx->ir_base[ref]; @@ -756,7 +737,7 @@ static bool ir_may_promote_f2d_op(ir_ctx *ctx, ir_ref ref) case IR_NEG: case IR_ABS: return ctx->use_lists[ref].count == 1 && - ir_may_promote_f2d_op(ctx, insn->op1); + ir_may_promote_f2d(ctx, insn->op1); case IR_ADD: case IR_SUB: case IR_MUL: @@ -764,8 +745,8 @@ static bool ir_may_promote_f2d_op(ir_ctx *ctx, ir_ref ref) case IR_MIN: case IR_MAX: return ctx->use_lists[ref].count == 1 && - ir_may_promote_f2d_op(ctx, insn->op1) && - ir_may_promote_f2d_op(ctx, insn->op2); + ir_may_promote_f2d(ctx, insn->op1) && + ir_may_promote_f2d(ctx, insn->op2); default: break; } @@ -773,7 +754,7 @@ static bool ir_may_promote_f2d_op(ir_ctx *ctx, ir_ref ref) return 0; } -static ir_ref ir_promote_d2f_op(ir_ctx *ctx, ir_ref ref, ir_ref use) +static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) { ir_insn *insn = &ctx->ir_base[ref]; @@ -799,7 +780,7 @@ static ir_ref ir_promote_d2f_op(ir_ctx *ctx, ir_ref ref, ir_ref use) // return ref; case IR_NEG: case IR_ABS: - insn->op1 = ir_promote_d2f_op(ctx, insn->op1, ref); + insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); insn->type = IR_FLOAT; return ref; case IR_ADD: @@ -809,10 +790,10 @@ static ir_ref ir_promote_d2f_op(ir_ctx *ctx, ir_ref ref, ir_ref use) case IR_MIN: case IR_MAX: if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_d2f_op(ctx, insn->op1, ref); + insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); } else { - insn->op1 = ir_promote_d2f_op(ctx, insn->op1, ref); - insn->op2 = ir_promote_d2f_op(ctx, insn->op2, ref); + insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); + insn->op2 = ir_promote_d2f(ctx, insn->op2, ref); } insn->type = IR_FLOAT; return ref; @@ -824,7 +805,7 @@ static ir_ref ir_promote_d2f_op(ir_ctx *ctx, ir_ref ref, ir_ref use) return ref; } -static ir_ref ir_promote_f2d_op(ir_ctx *ctx, ir_ref ref, ir_ref use) +static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use) { ir_insn *insn = &ctx->ir_base[ref]; @@ -850,7 +831,7 @@ static ir_ref ir_promote_f2d_op(ir_ctx *ctx, ir_ref ref, ir_ref use) return ref; case IR_NEG: case IR_ABS: - insn->op1 = ir_promote_f2d_op(ctx, insn->op1, ref); + insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); insn->type = IR_DOUBLE; return ref; case IR_ADD: @@ -860,10 +841,10 @@ static ir_ref ir_promote_f2d_op(ir_ctx *ctx, ir_ref ref, ir_ref use) case IR_MIN: case IR_MAX: if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_f2d_op(ctx, insn->op1, ref); + insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); } else { - insn->op1 = ir_promote_f2d_op(ctx, insn->op1, ref); - insn->op2 = ir_promote_f2d_op(ctx, insn->op2, ref); + insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); + insn->op2 = ir_promote_f2d(ctx, insn->op2, ref); } insn->type = IR_DOUBLE; return ref; @@ -875,43 +856,7 @@ static ir_ref ir_promote_f2d_op(ir_ctx *ctx, ir_ref ref, ir_ref use) return ref; } -static void ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_insn *insn) -{ - if (ir_may_promote_d2f_op(ctx, insn->op1)) { - ir_ref new_ref = ir_promote_d2f_op(ctx, insn->op1, ref); - if (insn->op1 == new_ref) { - ir_replace_inputs(ctx, ref, ref, insn->op1); - ctx->use_lists[insn->op1] = ctx->use_lists[ref]; - ctx->use_lists[ref].count = 0; - ctx->use_lists[ref].refs = 0; - insn->optx = IR_NOP; - insn->op1 = IR_UNUSED; - } else { - insn->optx = IR_OPTX(IR_COPY, IR_FLOAT, 1); - insn->op1 = new_ref; - } - } -} - -static void ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_insn *insn) -{ - if (ir_may_promote_f2d_op(ctx, insn->op1)) { - ir_ref new_ref = ir_promote_f2d_op(ctx, insn->op1, ref); - if (insn->op1 == new_ref) { - ir_replace_inputs(ctx, ref, ref, insn->op1); - ctx->use_lists[insn->op1] = ctx->use_lists[ref]; - ctx->use_lists[ref].count = 0; - ctx->use_lists[ref].refs = 0; - insn->optx = IR_NOP; - insn->op1 = IR_UNUSED; - } else { - insn->optx = IR_OPTX(IR_COPY, IR_DOUBLE, 1); - insn->op1 = new_ref; - } - } -} - -static bool ir_may_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref) +static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref) { ir_insn *insn = &ctx->ir_base[ref]; @@ -926,7 +871,7 @@ static bool ir_may_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref) case IR_ABS: case IR_NOT: return ctx->use_lists[ref].count == 1 && - ir_may_promote_i2i_op(ctx, type, insn->op1); + ir_may_promote_i2i(ctx, type, insn->op1); case IR_ADD: case IR_SUB: case IR_MUL: @@ -937,8 +882,8 @@ static bool ir_may_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref) case IR_AND: case IR_XOR: return ctx->use_lists[ref].count == 1 && - ir_may_promote_i2i_op(ctx, type, insn->op1) && - ir_may_promote_i2i_op(ctx, type, insn->op2); + ir_may_promote_i2i(ctx, type, insn->op1) && + ir_may_promote_i2i(ctx, type, insn->op2); default: break; } @@ -946,7 +891,7 @@ static bool ir_may_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref) return 0; } -static ir_ref ir_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) +static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) { ir_insn *insn = &ctx->ir_base[ref]; @@ -970,7 +915,7 @@ static ir_ref ir_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref us case IR_NEG: case IR_ABS: case IR_NOT: - insn->op1 = ir_promote_i2i_op(ctx, type, insn->op1, ref); + insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref); insn->type = type; return ref; case IR_ADD: @@ -983,10 +928,10 @@ static ir_ref ir_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref us case IR_AND: case IR_XOR: if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_i2i_op(ctx, type, insn->op1, ref); + insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref); } else { - insn->op1 = ir_promote_i2i_op(ctx, type, insn->op1, ref); - insn->op2 = ir_promote_i2i_op(ctx, type, insn->op2, ref); + insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref); + insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref); } insn->type = type; return ref; @@ -998,24 +943,6 @@ static ir_ref ir_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref us return ref; } -static void ir_promote_trunc(ir_ctx *ctx, ir_ref ref, ir_insn *insn) -{ - if (ir_may_promote_i2i_op(ctx, insn->type, insn->op1)) { - ir_ref new_ref = ir_promote_i2i_op(ctx, insn->type, insn->op1, ref); - if (insn->op1 == new_ref) { - ir_replace_inputs(ctx, ref, ref, insn->op1); - ctx->use_lists[insn->op1] = ctx->use_lists[ref]; - ctx->use_lists[ref].count = 0; - ctx->use_lists[ref].refs = 0; - insn->optx = IR_NOP; - insn->op1 = IR_UNUSED; - } else { - insn->optx = IR_OPTX(IR_COPY, insn->type, 1); - insn->op1 = new_ref; - } - } -} - int ir_sccp(ir_ctx *ctx) { ir_ref i, j, n, *p, use; @@ -1343,24 +1270,33 @@ int ir_sccp(ir_ctx *ctx) switch (insn->op) { case IR_FP2FP: if (insn->type == IR_FLOAT) { - ir_promote_d2f(ctx, i, insn); + if (ir_may_promote_d2f(ctx, insn->op1)) { + ir_ref ref = ir_promote_d2f(ctx, insn->op1, i); + ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + } } else { - ir_promote_f2d(ctx, i, insn); + if (ir_may_promote_f2d(ctx, insn->op1)) { + ir_ref ref = ir_promote_f2d(ctx, insn->op1, i); + ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + } } break; case IR_FP2INT: if (ctx->ir_base[insn->op1].type == IR_DOUBLE) { - if (ir_may_promote_d2f_op(ctx, insn->op1)) { - insn->op1 = ir_promote_d2f_op(ctx, insn->op1, i); + if (ir_may_promote_d2f(ctx, insn->op1)) { + insn->op1 = ir_promote_d2f(ctx, insn->op1, i); } } else { - if (ir_may_promote_f2d_op(ctx, insn->op1)) { - insn->op1 = ir_promote_f2d_op(ctx, insn->op1, i); + if (ir_may_promote_f2d(ctx, insn->op1)) { + insn->op1 = ir_promote_f2d(ctx, insn->op1, i); } } break; case IR_TRUNC: - ir_promote_trunc(ctx, i, insn); + if (ir_may_promote_i2i(ctx, insn->type, insn->op1)) { + ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i); + ir_sccp_replace_insn2(ctx, i, ref, &worklist2); + } break; default: ir_sccp_fold2(ctx, i, &worklist2); diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index e5032c02603..7deb3557d9a 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1737,7 +1737,7 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) } /* pass */ } else if (op2_insn->val.i64 == 0) { - return IR_COPY_INT; + // return IR_COPY_INT; } else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) || (ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_NEG_32BIT(op2_insn->val.i64))) { lea: @@ -1878,7 +1878,7 @@ binop_fp: } else if (op2_insn->val.u64 == 0) { // 0 } else if (op2_insn->val.u64 == 1) { - return IR_COPY_INT; + // return IR_COPY_INT; } else if (ir_type_size[insn->type] >= 4 && (op2_insn->val.u64 == 2 || op2_insn->val.u64 == 4 || op2_insn->val.u64 == 8)) { /* MUL(X, 2|4|8) => LEA [X*2|4|8] */ @@ -1940,7 +1940,7 @@ binop_fp: } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 1) { - return IR_COPY_INT; + // return IR_COPY_INT; } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { /* DIV(X, PWR2) => SHR */ if (IR_IS_TYPE_UNSIGNED(insn->type)) { @@ -2004,7 +2004,7 @@ binop_fp: } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.i64 == 0) { - return IR_COPY_INT; + // return IR_COPY_INT; } else if (op2_insn->val.i64 == -1) { // -1 } @@ -2020,7 +2020,7 @@ binop_fp: } else if (op2_insn->val.i64 == 0) { // 0 } else if (op2_insn->val.i64 == -1) { - return IR_COPY_INT; + // return IR_COPY_INT; } } goto binop_int; @@ -2043,7 +2043,7 @@ binop_fp: } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { - return IR_COPY_INT; + // return IR_COPY_INT; } else if (ir_type_size[insn->type] >= 4) { if (op2_insn->val.u64 == 1) { // lea [op1*2] @@ -2069,7 +2069,7 @@ binop_fp: } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { - return IR_COPY_INT; + // return IR_COPY_INT; } } return IR_SHIFT_CONST; @@ -2086,9 +2086,9 @@ binop_fp: // case IR_COND: case IR_COPY: if (IR_IS_TYPE_INT(insn->type)) { - return IR_COPY_INT; + return IR_COPY_INT | IR_MAY_REUSE; } else { - return IR_COPY_FP; + return IR_COPY_FP | IR_MAY_REUSE; } break; case IR_CALL: @@ -2556,13 +2556,21 @@ store_int: return insn->op; case IR_SEXT: case IR_ZEXT: - case IR_TRUNC: - case IR_BITCAST: case IR_FP2INT: case IR_FP2FP: - case IR_PROTO: ir_match_fuse_load(ctx, insn->op1, ref); return insn->op; + case IR_TRUNC: + case IR_PROTO: + ir_match_fuse_load(ctx, insn->op1, ref); + return insn->op | IR_MAY_REUSE; + case IR_BITCAST: + ir_match_fuse_load(ctx, insn->op1, ref); + if (IR_IS_TYPE_INT(insn->type) && IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { + return insn->op | IR_MAY_REUSE; + } else { + return insn->op; + } case IR_CTLZ: case IR_CTTZ: ir_match_fuse_load(ctx, insn->op1, ref);