From 2b9840894da4a178585c74ad3b2bfb119890d902 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 27 Mar 2025 22:24:46 +0300 Subject: [PATCH] Update IR IR commit: dd228777b67334d8ed51de44f427d66d4ac99c08 --- ext/opcache/jit/ir/ir_fold.h | 35 +++++++++++++++- ext/opcache/jit/ir/ir_sccp.c | 57 +++++++++++++------------ ext/opcache/jit/ir/ir_x86.dasc | 76 ++++++++++++++++++++++++++++++---- 3 files changed, 130 insertions(+), 38 deletions(-) diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index c7745a8c687..78f3ca0c01e 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -1859,8 +1859,39 @@ IR_FOLD(SUB(ADD, ADD)) } // IR_FOLD(SUB(NEG, CONST)) TODO: -a - b => -b - a -// IR_FOLD(MUL(NEG, CONST)) TODO: -a * b => a * -b -// IR_FOLD(DIV(NEG, CONST)) TODO: -a / b => a / -b + +IR_FOLD(MUL(NEG, C_I8)) +IR_FOLD(MUL(NEG, C_I16)) +IR_FOLD(MUL(NEG, C_I32)) +IR_FOLD(MUL(NEG, C_I64)) +IR_FOLD(DIV(NEG, C_I8)) +IR_FOLD(DIV(NEG, C_I16)) +IR_FOLD(DIV(NEG, C_I32)) +IR_FOLD(DIV(NEG, C_I64)) +{ + op1 = op1_insn->op1; + val.i64 = -op2_insn->val.i64; + op2 = ir_const(ctx, val, op2_insn->type); + IR_FOLD_RESTART; +} + +IR_FOLD(MUL(NEG, C_FLOAT)) +IR_FOLD(DIV(NEG, C_FLOAT)) +{ + op1 = op1_insn->op1; + val.f = -op2_insn->val.f; + op2 = ir_const(ctx, val, op2_insn->type); + IR_FOLD_RESTART; +} + +IR_FOLD(MUL(NEG, C_DOUBLE)) +IR_FOLD(DIV(NEG, C_DOUBLE)) +{ + op1 = op1_insn->op1; + val.d = -op2_insn->val.d; + op2 = ir_const(ctx, val, op2_insn->type); + IR_FOLD_RESTART; +} IR_FOLD(MUL(_, C_U8)) IR_FOLD(MUL(_, C_U16)) diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index af039aaef82..8480861f91f 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -1517,7 +1517,7 @@ static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref) return 0; } -static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) +static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist) { ir_insn *insn = &ctx->ir_base[ref]; uint32_t count; @@ -1526,6 +1526,7 @@ static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) if (IR_IS_CONST_REF(ref)) { return ir_const_float(ctx, (float)insn->val.d); } else { + ir_bitqueue_add(worklist, ref); switch (insn->op) { case IR_FP2FP: count = ctx->use_lists[ref].count; @@ -1555,7 +1556,7 @@ static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) // return ref; case IR_NEG: case IR_ABS: - insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); + insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist); insn->type = IR_FLOAT; return ref; case IR_ADD: @@ -1565,10 +1566,10 @@ static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) case IR_MIN: case IR_MAX: if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); + insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist); } else { - insn->op1 = ir_promote_d2f(ctx, insn->op1, ref); - insn->op2 = ir_promote_d2f(ctx, insn->op2, ref); + insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist); + insn->op2 = ir_promote_d2f(ctx, insn->op2, ref, worklist); } insn->type = IR_FLOAT; return ref; @@ -1580,7 +1581,7 @@ static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use) return ref; } -static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use) +static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist) { ir_insn *insn = &ctx->ir_base[ref]; uint32_t count; @@ -1590,6 +1591,7 @@ static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use) if (IR_IS_CONST_REF(ref)) { return ir_const_double(ctx, (double)insn->val.f); } else { + ir_bitqueue_add(worklist, ref); switch (insn->op) { case IR_FP2FP: count = ctx->use_lists[ref].count; @@ -1628,7 +1630,7 @@ static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use) return ref; case IR_NEG: case IR_ABS: - insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); + insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist); insn->type = IR_DOUBLE; return ref; case IR_ADD: @@ -1638,10 +1640,10 @@ static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use) case IR_MIN: case IR_MAX: if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); + insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist); } else { - insn->op1 = ir_promote_f2d(ctx, insn->op1, ref); - insn->op2 = ir_promote_f2d(ctx, insn->op2, ref); + insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist); + insn->op2 = ir_promote_f2d(ctx, insn->op2, ref, worklist); } insn->type = IR_DOUBLE; return ref; @@ -1707,7 +1709,7 @@ static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref) } } } - for (p = insn->ops + 1, n = insn->inputs_count - 1; n > 0; p++, n--) { + for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) { input = *p; if (input != ref) { if (!ir_may_promote_trunc(ctx, type, input)) { @@ -1723,7 +1725,7 @@ static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref) return 0; } -static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) +static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, ir_bitqueue *worklist) { ir_insn *insn = &ctx->ir_base[ref]; uint32_t count; @@ -1732,6 +1734,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) if (IR_IS_CONST_REF(ref)) { return ir_const(ctx, insn->val, type); } else { + ir_bitqueue_add(worklist, ref); switch (insn->op) { case IR_ZEXT: case IR_SEXT: @@ -1776,7 +1779,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) case IR_NEG: case IR_ABS: case IR_NOT: - insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref); + insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist); insn->type = type; return ref; case IR_ADD: @@ -1789,10 +1792,10 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) case IR_XOR: case IR_SHL: if (insn->op1 == insn->op2) { - insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref); + insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist); } else { - insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref); - insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref); + insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist); + insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist); } insn->type = type; return ref; @@ -1804,18 +1807,18 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) // TODO: ??? case IR_COND: if (insn->op2 == insn->op3) { - insn->op3 = insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref); + insn->op3 = insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist); } else { - insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref); - insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref); + insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist); + insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist); } insn->type = type; return ref; case IR_PHI: - for (p = insn->ops + 1, n = insn->inputs_count - 1; n > 0; p++, n--) { + for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) { input = *p; if (input != ref) { - *p = ir_promote_i2i(ctx, type, input, ref); + *p = ir_promote_i2i(ctx, type, input, ref, worklist); } } insn->type = type; @@ -1906,7 +1909,7 @@ static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val) } IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_DATA); - if (IR_OPND_KIND(ir_op_flags[insn->op], 1) & IR_OPND_CONTROL_DEP) { + if (IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL_DEP) { return insn->op1; } @@ -3479,14 +3482,14 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) case IR_FP2FP: if (insn->type == IR_FLOAT) { if (ir_may_promote_d2f(ctx, insn->op1)) { - ir_ref ref = ir_promote_d2f(ctx, insn->op1, i); + ir_ref ref = ir_promote_d2f(ctx, insn->op1, i, worklist); insn->op1 = ref; ir_iter_replace_insn(ctx, i, ref, worklist); break; } } else { if (ir_may_promote_f2d(ctx, insn->op1)) { - ir_ref ref = ir_promote_f2d(ctx, insn->op1, i); + ir_ref ref = ir_promote_f2d(ctx, insn->op1, i, worklist); insn->op1 = ref; ir_iter_replace_insn(ctx, i, ref, worklist); break; @@ -3496,17 +3499,17 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) case IR_FP2INT: if (ctx->ir_base[insn->op1].type == IR_DOUBLE) { if (ir_may_promote_d2f(ctx, insn->op1)) { - insn->op1 = ir_promote_d2f(ctx, insn->op1, i); + insn->op1 = ir_promote_d2f(ctx, insn->op1, i, worklist); } } else { if (ir_may_promote_f2d(ctx, insn->op1)) { - insn->op1 = ir_promote_f2d(ctx, insn->op1, i); + insn->op1 = ir_promote_f2d(ctx, insn->op1, i, worklist); } } goto folding; case IR_TRUNC: if (ir_may_promote_trunc(ctx, insn->type, insn->op1)) { - ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i); + ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i, worklist); insn->op1 = ref; ir_iter_replace_insn(ctx, i, ref, worklist); break; diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 3b6cf156ad9..ad785d3b891 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1003,6 +1003,8 @@ const char *ir_reg_name(int8_t reg, ir_type type) _(LEA_SI_B) \ _(LEA_B_SI_O) \ _(LEA_SI_B_O) \ + _(LEA_SYM_O) \ + _(LEA_O_SYM) \ _(INC) \ _(DEC) \ _(MUL_PWR2) \ @@ -1065,6 +1067,9 @@ const char *ir_reg_name(int8_t reg, ir_type type) _(SSE_TRUNC) \ _(SSE_NEARBYINT) \ +#define IR_LEA_FIRST IR_LEA_OB +#define IR_LEA_LAST IR_LEA_O_SYM + #define IR_RULE_ENUM(name) IR_ ## name, #define IR_STATIC_ALLOCA (IR_SKIPPED | IR_FUSED | IR_SIMPLE | IR_ALLOCA) @@ -1584,7 +1589,7 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) if (!rule) { ctx->rules[addr_ref] = rule = ir_match_insn(ctx, addr_ref); } - if (rule >= IR_LEA_OB && rule <= IR_LEA_SI_B_O) { + if (rule >= IR_LEA_FIRST && rule <= IR_LEA_LAST) { ir_use_list *use_list; ir_ref j; @@ -1972,6 +1977,19 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + + if (insn->op == IR_ADD + && IR_IS_SYM_CONST(op1_insn->op) + && !IR_IS_SYM_CONST(op2_insn->op) + && IR_IS_SIGNED_32BIT((intptr_t)ir_sym_val(ctx, op1_insn) + (intptr_t)op2_insn->val.i64)) { + return IR_LEA_SYM_O; + } else if (insn->op == IR_ADD + && IR_IS_SYM_CONST(op2_insn->op) + && !IR_IS_SYM_CONST(op1_insn->op) + && IR_IS_SIGNED_32BIT((intptr_t)ir_sym_val(ctx, op2_insn) + (intptr_t)op1_insn->val.i64)) { + return IR_LEA_O_SYM; + } // const // TODO: add support for sym+offset ??? } else if (IR_IS_SYM_CONST(op2_insn->op)) { @@ -3264,7 +3282,11 @@ static void ir_emit_mov_ext(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) | ASM_REG_REG_OP mov, type, dst, src } else if (ir_type_size[type] == 2) { if (IR_IS_TYPE_SIGNED(type)) { - | movsx Rd(dst), Rw(src) + if (dst == IR_REG_RAX && src == IR_REG_RAX) { + | cwde + } else { + | movsx Rd(dst), Rw(src) + } } else { | movzx Rd(dst), Rw(src) } @@ -3311,8 +3333,8 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) ir_reg base_reg = IR_REG_NONE, index_reg; int32_t offset = 0, scale; - IR_ASSERT(((rule & IR_RULE_MASK) >= IR_LEA_OB && - (rule & IR_RULE_MASK) <= IR_LEA_SI_B_O) || + IR_ASSERT(((rule & IR_RULE_MASK) >= IR_LEA_FIRST && + (rule & IR_RULE_MASK) <= IR_LEA_LAST) || rule == IR_STATIC_ALLOCA); switch (rule & IR_RULE_MASK) { default: @@ -3498,6 +3520,22 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) op1_insn = &ctx->ir_base[op1_insn->op1]; scale = ctx->ir_base[op1_insn->op2].val.i32; break; + case IR_LEA_SYM_O: + op1_insn = &ctx->ir_base[insn->op1]; + op2_insn = &ctx->ir_base[insn->op2]; + offset = (intptr_t)ir_sym_val(ctx, op1_insn) + (intptr_t)op2_insn->val.i64; + base_reg_ref = index_reg_ref = IR_UNUSED; + scale = 1; + offset_insn = NULL; + break; + case IR_LEA_O_SYM: + op1_insn = &ctx->ir_base[insn->op1]; + op2_insn = &ctx->ir_base[insn->op2]; + offset = (intptr_t)ir_sym_val(ctx, op2_insn) + (intptr_t)op1_insn->val.i64; + base_reg_ref = index_reg_ref = IR_UNUSED; + scale = 1; + offset_insn = NULL; + break; case IR_ALLOCA: offset = IR_SPILL_POS_TO_OFFSET(insn->op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; @@ -5186,7 +5224,7 @@ static void ir_emit_mul_div_mod(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else if (ir_type_size[type] == 2) { | cwd } else { - | movsx ax, al + | cbw } if (op2_reg != IR_REG_NONE) { | ASM_REG_OP idiv, type, op2_reg @@ -6813,7 +6851,11 @@ static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) } if (ir_type_size[src_type] == 1) { if (ir_type_size[dst_type] == 2) { - | movsx Rw(def_reg), Rb(op1_reg) + if (def_reg == IR_REG_RAX && op1_reg == IR_REG_RAX) { + | cbw + } else { + | movsx Rw(def_reg), Rb(op1_reg) + } } else if (ir_type_size[dst_type] == 4) { | movsx Rd(def_reg), Rb(op1_reg) } else { @@ -6825,7 +6867,11 @@ static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } else if (ir_type_size[src_type] == 2) { if (ir_type_size[dst_type] == 4) { - | movsx Rd(def_reg), Rw(op1_reg) + if (def_reg == IR_REG_RAX && op1_reg == IR_REG_RAX) { + | cwde + } else { + | movsx Rd(def_reg), Rw(op1_reg) + } } else { IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); @@ -6838,7 +6884,11 @@ static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 - | movsxd Rq(def_reg), Rd(op1_reg) + if (def_reg == IR_REG_RAX && op1_reg == IR_REG_RAX) { + | cdqe + } else { + | movsxd Rq(def_reg), Rd(op1_reg) + } |.endif } } else if (IR_IS_CONST_REF(insn->op1)) { @@ -7203,6 +7253,8 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.else || if (ir_type_size[src_type] == 1) { | movsx Rd(op1_reg), Rb(op1_reg) +|| } else if (op1_reg == IR_REG_RAX) { + | cwde || } else { | movsx Rd(op1_reg), Rw(op1_reg) || } @@ -8502,7 +8554,11 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) case 4: |.if X64 if (IR_IS_TYPE_SIGNED(type)) { - | movsxd Ra(op2_reg), Rd(op2_reg) + if (op2_reg == IR_REG_RAX) { + | cdqe + } else { + | movsxd Ra(op2_reg), Rd(op2_reg) + } } else { | mov Rd(op2_reg), Rd(op2_reg) } @@ -10603,6 +10659,8 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_LEA_SI_B: case IR_LEA_B_SI_O: case IR_LEA_SI_B_O: + case IR_LEA_SYM_O: + case IR_LEA_O_SYM: ir_emit_lea(ctx, i, insn->type); break; case IR_MUL_PWR2: