diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index 6c8cb7db944..832b5c2a9e2 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -541,6 +541,7 @@ void ir_strtab_free(ir_strtab *strtab); # define IR_DEBUG_GCM_SPLIT (1<<28) # define IR_DEBUG_SCHEDULE (1<<29) # define IR_DEBUG_RA (1<<30) +# define IR_DEBUG_BB_SCHEDULE (1U<<31) #endif typedef struct _ir_ctx ir_ctx; diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 777c16f0fc6..3ac9b5bfc72 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -1010,7 +1010,7 @@ binop_fp: return IR_RETURN_FP; } case IR_IF: - if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { @@ -1020,7 +1020,7 @@ binop_fp: ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; return IR_CMP_AND_BRANCH_FP; } - } else if (op2_insn->op == IR_OVERFLOW) { + } else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) { ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; return IR_OVERFLOW_AND_BRANCH; } @@ -1033,7 +1033,7 @@ binop_fp: } case IR_GUARD: case IR_GUARD_NOT: - if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP @@ -1047,7 +1047,7 @@ binop_fp: ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; return IR_GUARD_CMP_FP; } - } else if (op2_insn->op == IR_OVERFLOW) { + } else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) { ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; return IR_GUARD_OVERFLOW; } diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index 355c53fb04c..81ecc09c9ea 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -1151,13 +1151,11 @@ static void ir_insert_chain_before(ir_chain *chains, uint32_t c, uint32_t before } #ifndef IR_DEBUG_BB_SCHEDULE_GRAPH -# define IR_DEBUG_BB_SCHEDULE_GRAPH 0 -#endif -#ifndef IR_DEBUG_BB_SCHEDULE_EDGES -# define IR_DEBUG_BB_SCHEDULE_EDGES 0 -#endif -#ifndef IR_DEBUG_BB_SCHEDULE_CHAINS -# define IR_DEBUG_BB_SCHEDULE_CHAINS 0 +# ifdef IR_DEBUG +# define IR_DEBUG_BB_SCHEDULE_GRAPH 1 +# else +# define IR_DEBUG_BB_SCHEDULE_GRAPH 0 +# endif #endif #if IR_DEBUG_BB_SCHEDULE_GRAPH @@ -1210,20 +1208,17 @@ static void ir_dump_cfg_freq_graph(ir_ctx *ctx, float *bb_freq, uint32_t edges_c } #endif -#if IR_DEBUG_BB_SCHEDULE_EDGES +#ifdef IR_DEBUG static void ir_dump_edges(ir_ctx *ctx, uint32_t edges_count, ir_edge_info *edges) { uint32_t i; fprintf(stderr, "Edges:\n"); for (i = 0; i < edges_count; i++) { - fprintf(stderr, "\tBB%d -> BB%d [label=\"%0.3f\"]\n", edges[i].from, edges[i].to, edges[i].freq); + fprintf(stderr, "\tBB%d -> BB%d %0.3f\n", edges[i].from, edges[i].to, edges[i].freq); } - fprintf(stderr, "}\n"); } -#endif -#if IR_DEBUG_BB_SCHEDULE_CHAINS static void ir_dump_chains(ir_ctx *ctx, ir_chain *chains) { uint32_t b, tail, i; @@ -1507,8 +1502,10 @@ restart: /* 2. Sort EDGEs according to their frequencies */ qsort(edges, edges_count, sizeof(ir_edge_info), ir_edge_info_cmp); -#if IR_DEBUG_BB_SCHEDULE_EDGES - ir_dump_edges(ctx, edges_count, edges); +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_BB_SCHEDULE) { + ir_dump_edges(ctx, edges_count, edges); + } #endif /* 3. Process EDGEs in the decreasing frequency order and join the connected chains */ @@ -1555,13 +1552,17 @@ restart: } #if IR_DEBUG_BB_SCHEDULE_GRAPH - ir_dump_cfg_freq_graph(ctx, bb_freq, edges_count, edges, chains); + if (ctx->flags & IR_DEBUG_BB_SCHEDULE) { + ir_dump_cfg_freq_graph(ctx, bb_freq, edges_count, edges, chains); + } #endif ir_mem_free(bb_freq); -#if IR_DEBUG_BB_SCHEDULE_CHAINS - ir_dump_chains(ctx, chains); +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_BB_SCHEDULE) { + ir_dump_chains(ctx, chains); + } #endif /* 4. Merge empty entry blocks */ @@ -1585,8 +1586,10 @@ restart: } } -#if IR_DEBUG_BB_SCHEDULE_CHAINS - ir_dump_chains(ctx, chains); +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_BB_SCHEDULE) { + ir_dump_chains(ctx, chains); + } #endif } @@ -1619,8 +1622,10 @@ restart: } } -#if IR_DEBUG_BB_SCHEDULE_CHAINS - ir_dump_chains(ctx, chains); +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_BB_SCHEDULE) { + ir_dump_chains(ctx, chains); + } #endif /* 7. Form a final BB order */ diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index adbe77a3f5d..9c2697c4740 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -84,6 +84,7 @@ static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca) return lca; } +#if 0 /* This is not necessary anymore. Conditions may be fused with IF across BBs. */ if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) { ir_use_list *use_list = &ctx->use_lists[ref]; @@ -96,6 +97,7 @@ static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca) } } } +#endif flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; if ((flags & IR_BB_LOOP_WITH_ENTRY) @@ -487,9 +489,19 @@ static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b) b = ir_gcm_select_best_block(ctx, ref, lca); ctx->cfg_map[ref] = b; - if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) { - /* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */ - ctx->cfg_map[ref + 1] = b; + + /* OVERFLOW is a projection of ADD/SUB/MUL_OV and must be scheduled into the same block */ + if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) { + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_ref n, *p, use; + + for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) { + use = *p; + if (ctx->ir_base[use].op == IR_OVERFLOW) { + ctx->cfg_map[use] = b; + break; + } + } } } } diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index ec8f8779b07..8f8db7b4a3d 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -582,6 +582,17 @@ IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n) q->set = ir_bitset_malloc(n); } +IR_ALWAYS_INLINE void ir_bitqueue_grow(ir_bitqueue *q, uint32_t n) +{ + uint32_t len = ir_bitset_len(n); + IR_ASSERT(len >= q->len); + if (len > q->len) { + q->set = ir_mem_realloc(q->set, len * (IR_BITSET_BITS / 8)); + memset(q->set + q->len, 0, (len - q->len) * (IR_BITSET_BITS / 8)); + q->len = len; + } +} + IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q) { ir_mem_free(q->set); diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index 789e9c5612d..8105aea68e8 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -2115,7 +2115,7 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy) ir_insn *insn; uint32_t len; ir_bitset todo, ready; - bool have_constants = 0; + bool have_constants_or_addresses = 0; bb = &ctx->cfg_blocks[b]; if (!(bb->flags & IR_BB_DESSA_MOVES)) { @@ -2141,8 +2141,8 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy) insn = &ctx->ir_base[ref]; if (insn->op == IR_PHI) { input = ir_insn_op(insn, k); - if (IR_IS_CONST_REF(input)) { - have_constants = 1; + if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) { + have_constants_or_addresses = 1; } else if (ctx->vregs[input] != ctx->vregs[ref]) { s = ctx->vregs[input]; d = ctx->vregs[ref]; @@ -2204,13 +2204,13 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy) ir_mem_free(todo); ir_mem_free(loc); - if (have_constants) { + if (have_constants_or_addresses) { for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { ref = *p; insn = &ctx->ir_base[ref]; if (insn->op == IR_PHI) { input = ir_insn_op(insn, k); - if (IR_IS_CONST_REF(input)) { + if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) { emit_copy(ctx, insn->type, input, ref); } } diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 547ae79fe5d..c5665873aa9 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -347,7 +347,8 @@ static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bi for (j = 1, p = insn->ops + j; j <= n; j++, p++) { ir_ref input = *p; *p = IR_UNUSED; - if (input > 0 && _values[input].op == IR_BOTTOM) { + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (input > 0 && _values[input].op > IR_COPY) { ir_use_list_remove_all(ctx, input, ref); if (ir_is_dead(ctx, input)) { /* schedule DCE */ @@ -396,13 +397,12 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { ir_ref input = *p; *p = IR_UNUSED; - if (input > 0) { + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (input > 0 && _values[input].op > IR_COPY) { ir_use_list_remove_all(ctx, input, ref); - if (_values[input].op == IR_BOTTOM) { - if (ir_is_dead(ctx, input)) { - /* schedule DCE */ - ir_bitqueue_add(worklist, input); - } + if (ir_is_dead(ctx, input)) { + /* schedule DCE */ + ir_bitqueue_add(worklist, input); } } } @@ -429,8 +429,9 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r } } #endif - /* schedule folding */ - if (worklist && _values[use].op == IR_BOTTOM) { + /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ + if (worklist && _values[use].op > IR_COPY) { + /* schedule folding */ ir_bitqueue_add(worklist, use); } } @@ -1067,7 +1068,7 @@ static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type typ return ir_const(ctx, new_val, type); } -static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type) +static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type, ir_bitqueue *worklist) { uint32_t optx = IR_OPTX(op, type, 1); ir_ref ref; @@ -1079,6 +1080,7 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, if (!IR_IS_CONST_REF(src_ref)) { ir_use_list_remove_one(ctx, src_ref, var_ref); } + ir_bitqueue_add(worklist, ref); return ref; } } @@ -1091,6 +1093,8 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, if (!IR_IS_CONST_REF(src_ref)) { ir_use_list_replace_one(ctx, src_ref, var_ref, ref); } + ir_bitqueue_grow(worklist, ref + 1); + ir_bitqueue_add(worklist, ref); return ref; } @@ -1162,8 +1166,7 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) { ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type); } else { - ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type); - ir_bitqueue_add(worklist, ctx->ir_base[use].op1); + ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist); } } if (use_insn->op2 != ref) { @@ -1171,8 +1174,7 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) { ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type); } else { - ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type); - ir_bitqueue_add(worklist, ctx->ir_base[use].op2); + ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist); } } } @@ -1185,8 +1187,7 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) { ctx->ir_base[ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type); } else { - ctx->ir_base[ref].op2 = ir_ext_ref(ctx, ref, phi_insn->op2, op, type); - ir_bitqueue_add(worklist, ctx->ir_base[ref].op2); + ctx->ir_base[ref].op2 = ir_ext_ref(ctx, ref, phi_insn->op2, op, type, worklist); } return 1; diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 94cb5fd9f9a..df48daaeeea 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1586,6 +1586,69 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) } } +static bool ir_match_may_fuse_SI(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_insn *op2_insn, *insn = &ctx->ir_base[use]; + + if (insn->op == IR_ADD) { + if (insn->op1 == ref) { + if (IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_SYM_CONST(op2_insn->op)) { + if (ir_may_fuse_addr(ctx, op2_insn)) { + return 1; // LEA_SI_O + } + } else if (IR_IS_SIGNED_32BIT(op2_insn->val.i64)) { + return 1; // LEA_SI_O + } + } else if (insn->op2 != ref) { + return 1; // LEA_SI_B or LEA_SI_OB + } + } else if (insn->op2 == ref && insn->op1 != insn->op2) { + return 1; // LEA_B_SI or LEA_OB_SI + } + } + return 0; +} + +static bool ir_match_fuse_addr_all_useges(ir_ctx *ctx, ir_ref ref) +{ + uint32_t rule = ctx->rules[ref]; + ir_use_list *use_list; + ir_ref n, *p, use; + + if (rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { + return 1; + } else if (!rule) { + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(IR_IS_TYPE_INT(insn->type) && ir_type_size[insn->type] >= 4); + if (insn->op == IR_MUL + && IR_IS_CONST_REF(insn->op2)) { + insn = &ctx->ir_base[insn->op2]; + if (!IR_IS_SYM_CONST(insn->op) + && (insn->val.u64 == 2 || insn->val.u64 == 4 || insn->val.u64 == 8)) { + ctx->rules[ref] = IR_LEA_SI; + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + IR_ASSERT(n > 1); + p = &ctx->use_edges[use_list->refs]; + for (; n > 0; p++, n--) { + use = *p; + if (!ir_match_may_fuse_SI(ctx, ref, use)) { + return 0; + } + } + + return 1; + } + } + } + + return 0; +} + /* A naive check if there is a STORE or CALL between this LOAD and the fusion root */ static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root) { @@ -1895,13 +1958,13 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) } else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) || (ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_NEG_32BIT(op2_insn->val.i64))) { lea: - if (ctx->use_lists[insn->op1].count == 1) { + if (ctx->use_lists[insn->op1].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op1)) { uint32_t rule = ctx->rules[insn->op1]; if (!rule) { ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); } - if (rule == IR_LEA_SI) { + if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { /* z = MUL(Y, 2|4|8) ... ADD(z, imm32) => SKIP ... LEA [Y*2|4|8+im32] */ ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; return IR_LEA_SI_O; @@ -1938,19 +2001,19 @@ lea: } } else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) { if (insn->op1 != insn->op2) { - if (ctx->use_lists[insn->op1].count == 1) { + if (ctx->use_lists[insn->op1].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op1)) { uint32_t rule =ctx->rules[insn->op1]; if (!rule) { ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); } if (rule == IR_LEA_OB) { ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; - if (ctx->use_lists[insn->op2].count == 1) { + if (ctx->use_lists[insn->op2].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op2)) { rule = ctx->rules[insn->op2]; if (!rule) { ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); } - if (rule == IR_LEA_SI) { + if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(x, y) => SKIP ... SKIP ... LEA */ ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; return IR_LEA_OB_SI; @@ -1958,7 +2021,7 @@ lea: } /* x = ADD(X, imm32) ... ADD(x, Y) => SKIP ... LEA */ return IR_LEA_OB_I; - } else if (rule == IR_LEA_SI) { + } else if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; if (ctx->use_lists[insn->op2].count == 1) { rule = ctx->rules[insn->op2]; @@ -1975,7 +2038,7 @@ lea: return IR_LEA_SI_B; } } - if (ctx->use_lists[insn->op2].count == 1) { + if (ctx->use_lists[insn->op2].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op2)) { uint32_t rule = ctx->rules[insn->op2]; if (!rule) { ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); @@ -1984,7 +2047,7 @@ lea: ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; /* x = ADD(X, imm32) ... ADD(Y, x) => SKIP ... LEA */ return IR_LEA_I_OB; - } else if (rule == IR_LEA_SI) { + } else if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; /* x = MUL(X, 2|4|8) ... ADD(Y, x) => SKIP ... LEA */ return IR_LEA_B_SI; @@ -2497,7 +2560,7 @@ store_int: return IR_RETURN_FP; } case IR_IF: - if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { @@ -2545,15 +2608,14 @@ store_int: ir_match_fuse_load_test_int(ctx, op2_insn, ref); ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; return IR_TEST_AND_BRANCH_INT; - } else if (op2_insn->op == IR_OVERFLOW) { + } else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) { /* c = OVERFLOW(_) ... IF(c) => SKIP_OVERFLOW ... OVERFLOW_AND_BRANCH */ ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; return IR_OVERFLOW_AND_BRANCH; } } if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { - if (insn->op2 == ref - 1 /* previous instruction */ - && ir_in_same_block(ctx, insn->op2)) { + if (insn->op2 == ref - 1) { /* previous instruction */ op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op == IR_ADD || op2_insn->op == IR_SUB || @@ -2575,7 +2637,6 @@ store_int: } else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op1 == ref - 1 /* previous instruction */ && insn->op2 == ref - 2 /* previous instruction */ - && ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 2 && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { ir_insn *store_insn = &ctx->ir_base[insn->op1]; @@ -2626,7 +2687,7 @@ store_int: break; } case IR_COND: - if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { + if (!IR_IS_CONST_REF(insn->op1) && ctx->use_lists[insn->op1].count == 1) { ir_insn *op1_insn = &ctx->ir_base[insn->op1]; if (op1_insn->op >= IR_EQ && op1_insn->op <= IR_UGT) { @@ -2644,7 +2705,7 @@ store_int: return IR_COND; case IR_GUARD: case IR_GUARD_NOT: - if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP @@ -2734,7 +2795,7 @@ store_int: ir_match_fuse_load_test_int(ctx, op2_insn, ref); ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; return IR_GUARD_TEST_INT; - } else if (op2_insn->op == IR_OVERFLOW) { + } else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) { /* c = OVERFLOW(_) ... GUARD(c) => SKIP_OVERFLOW ... GUARD_OVERFLOW */ ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; return IR_GUARD_OVERFLOW;