diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index e90a5e80bf0..a9f55cc0e46 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -803,9 +803,7 @@ ir_ref ir_proto(ir_ctx *ctx, uint8_t flags, ir_type ret_type, uint32_t params_co proto->flags = flags; proto->ret_type = ret_type; proto->params_count = params_count; - if (params_count) { - memcpy(proto->param_types, param_types, params_count); - } + memcpy(proto->param_types, param_types, params_count); return ir_strl(ctx, (const char *)proto, offsetof(ir_proto_t, param_types) + params_count); } diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index be8779e0194..ec5e57129c9 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -854,9 +854,6 @@ void ir_gdb_unregister_all(void); bool ir_gdb_present(void); /* IR load API (implementation in ir_load.c) */ -#define IR_RESOLVE_SYM_ADD_THUNK (1<<0) -#define IR_RESOLVE_SYM_SILENT (1<<1) - struct _ir_loader { uint32_t default_func_flags; bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target); @@ -873,7 +870,7 @@ struct _ir_loader { bool (*sym_data_end) (ir_loader *loader, uint32_t flags); bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name); bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name); - void*(*resolve_sym_name) (ir_loader *loader, const char *name, uint32_t flags); + void*(*resolve_sym_name) (ir_loader *loader, const char *name, bool add_thunk); bool (*has_sym) (ir_loader *loader, const char *name); bool (*add_sym) (ir_loader *loader, const char *name, void *addr); }; diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 1d927cc8c72..772eea7a5d7 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -4366,15 +4366,11 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; - ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg def_reg = ctx->regs[def][0]; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; - if (ctx->use_lists[def].count == 1) { - /* dead load */ - return; - } IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { @@ -4398,15 +4394,11 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; - ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg def_reg = ctx->regs[def][0]; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; - if (ctx->use_lists[def].count == 1) { - /* dead load */ - return; - } IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { @@ -4943,28 +4935,6 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) return; } - /* Move op2 to a tmp register before epilogue if it's in - * used_preserved_regs, because it will be overridden. */ - - ir_reg op2_reg = IR_REG_NONE; - if (!IR_IS_CONST_REF(insn->op2)) { - op2_reg = ctx->regs[def][2]; - IR_ASSERT(op2_reg != IR_REG_NONE); - - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_INT_TMP; - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } else if (IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, IR_REG_NUM(op2_reg))) { - ir_reg orig_op2_reg = op2_reg; - op2_reg = IR_REG_INT_TMP; - - ir_type type = ctx->ir_base[insn->op2].type; - | ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg) - } else { - op2_reg = IR_REG_NUM(op2_reg); - } - } - ir_emit_epilogue(ctx); if (IR_IS_CONST_REF(insn->op2)) { @@ -4977,8 +4947,13 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) | br Rx(IR_REG_INT_TMP) } } else { + ir_reg op2_reg = ctx->regs[def][2]; + IR_ASSERT(op2_reg != IR_REG_NONE); - IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg)); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } | br Rx(op2_reg) } } diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index 34375b0a3b5..01532c8ea3e 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -244,6 +244,7 @@ int ir_build_cfg(ir_ctx *ctx) _blocks[start] = b; _blocks[end] = b; IR_ASSERT(IR_IS_BB_START(insn->op)); + IR_ASSERT(end > start); bb->start = start; bb->end = end; bb->successors = count; @@ -582,6 +583,7 @@ static int ir_remove_unreachable_blocks(ir_ctx *ctx) return 1; } +#if 0 static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b) { uint32_t i, *p; @@ -605,42 +607,34 @@ static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b) /* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by * Cooper, Harvey and Kennedy. */ -static int ir_build_dominators_tree_slow(ir_ctx *ctx) +int ir_build_dominators_tree(ir_ctx *ctx) { uint32_t blocks_count, b, postnum; ir_block *blocks, *bb; uint32_t *edges; bool changed; - blocks = ctx->cfg_blocks; - edges = ctx->cfg_edges; - blocks_count = ctx->cfg_blocks_count; - - /* Clear the dominators tree */ - for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) { - bb->idom = 0; - bb->dom_depth = 0; - bb->dom_child = 0; - bb->dom_next_child = 0; - } - ctx->flags2 &= ~IR_NO_LOOPS; postnum = 1; compute_postnum(ctx, &postnum, 1); - /* Find immediate dominators by iterative fixed-point algorithm */ + /* Find immediate dominators */ + blocks = ctx->cfg_blocks; + edges = ctx->cfg_edges; + blocks_count = ctx->cfg_blocks_count; blocks[1].idom = 1; do { changed = 0; /* Iterating in Reverse Post Order */ for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); - IR_ASSERT(bb->predecessors_count > 0); if (bb->predecessors_count == 1) { uint32_t pred_b = edges[bb->predecessors]; - if (blocks[pred_b].idom > 0 && bb->idom != pred_b) { + if (blocks[pred_b].idom <= 0) { + //IR_ASSERT("Wrong blocks order: BB is before its single predecessor"); + } else if (bb->idom != pred_b) { bb->idom = pred_b; changed = 1; } @@ -686,37 +680,39 @@ static int ir_build_dominators_tree_slow(ir_ctx *ctx) } } } while (changed); - - /* Build dominators tree */ blocks[1].idom = 0; blocks[1].dom_depth = 0; + + /* Construct dominators tree */ for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { - uint32_t idom = bb->idom; - ir_block *idom_bb = &blocks[idom]; + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (bb->idom > 0) { + ir_block *idom_bb = &blocks[bb->idom]; - bb->dom_depth = idom_bb->dom_depth + 1; - /* Sort by block number to traverse children in pre-order */ - if (idom_bb->dom_child == 0) { - idom_bb->dom_child = b; - } else if (b < idom_bb->dom_child) { - bb->dom_next_child = idom_bb->dom_child; - idom_bb->dom_child = b; - } else { - int child = idom_bb->dom_child; - ir_block *child_bb = &blocks[child]; + bb->dom_depth = idom_bb->dom_depth + 1; + /* Sort by block number to traverse children in pre-order */ + if (idom_bb->dom_child == 0) { + idom_bb->dom_child = b; + } else if (b < idom_bb->dom_child) { + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; + } else { + int child = idom_bb->dom_child; + ir_block *child_bb = &blocks[child]; - while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { - child = child_bb->dom_next_child; - child_bb = &blocks[child]; + while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { + child = child_bb->dom_next_child; + child_bb = &blocks[child]; + } + bb->dom_next_child = child_bb->dom_next_child; + child_bb->dom_next_child = b; } - bb->dom_next_child = child_bb->dom_next_child; - child_bb->dom_next_child = b; } } return 1; } - +#else /* A single pass modification of "A Simple, Fast Dominance Algorithm" by * Cooper, Harvey and Kennedy, that relays on IR block ordering. * It may fallback to the general slow fixed-point algorithm. */ @@ -751,11 +747,7 @@ int ir_build_dominators_tree(ir_ctx *ctx) if (UNEXPECTED(idom >= b)) { /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */ ctx->flags2 &= ~IR_NO_LOOPS; -// IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor"); - if (UNEXPECTED(k <= 1)) { - ir_list_free(&worklist); - return ir_build_dominators_tree_slow(ctx); - } + IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor"); ir_list_push(&worklist, idom); while (1) { k--; @@ -950,6 +942,7 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx) return 1; } +#endif static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2) { @@ -965,7 +958,7 @@ static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2) int ir_find_loops(ir_ctx *ctx) { - uint32_t b, j, n, count; + uint32_t i, j, n, count; uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1; ir_block *blocks = ctx->cfg_blocks; uint32_t *edges = ctx->cfg_edges; @@ -990,13 +983,13 @@ int ir_find_loops(ir_ctx *ctx) int child; next: - b = ir_worklist_peek(&work); - if (!entry_times[b]) { - entry_times[b] = time++; + i = ir_worklist_peek(&work); + if (!entry_times[i]) { + entry_times[i] = time++; } - /* Visit blocks immediately dominated by "b". */ - bb = &blocks[b]; + /* Visit blocks immediately dominated by i. */ + bb = &blocks[i]; for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) { if (ir_worklist_push(&work, child)) { goto next; @@ -1006,17 +999,17 @@ next: /* Visit join edges. */ if (bb->successors_count) { uint32_t *p = edges + bb->successors; - for (j = 0; j < bb->successors_count; j++, p++) { + for (j = 0; j < bb->successors_count; j++,p++) { uint32_t succ = *p; - if (blocks[succ].idom == b) { + if (blocks[succ].idom == i) { continue; } else if (ir_worklist_push(&work, succ)) { goto next; } } } - exit_times[b] = time++; + exit_times[i] = time++; ir_worklist_pop(&work); } @@ -1025,7 +1018,7 @@ next: j = 1; n = 2; while (j != n) { - uint32_t i = j; + i = j; j = n; for (; i < j; i++) { int child; @@ -1037,82 +1030,9 @@ next: count = n; /* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */ - uint32_t prev_dom_depth = blocks[sorted_blocks[n - 1]].dom_depth; - uint32_t prev_irreducible = 0; while (n > 1) { - b = sorted_blocks[--n]; - ir_block *bb = &blocks[b]; - - IR_ASSERT(bb->dom_depth <= prev_dom_depth); - if (UNEXPECTED(prev_irreducible) && bb->dom_depth != prev_dom_depth) { - /* process delyed irreducible loops */ - do { - b = sorted_blocks[prev_irreducible]; - bb = &blocks[b]; - if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) && !bb->loop_depth) { - /* process irreducible loop */ - uint32_t hdr = b; - - bb->loop_depth = 1; - if (ctx->ir_base[bb->start].op == IR_MERGE) { - ctx->ir_base[bb->start].op = IR_LOOP_BEGIN; - } - - /* find the closing edge(s) of the irreucible loop */ - IR_ASSERT(bb->predecessors_count > 1); - uint32_t *p = &edges[bb->predecessors]; - j = bb->predecessors_count; - do { - uint32_t pred = *p; - - if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) { - if (!ir_worklist_len(&work)) { - ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work))); - } - blocks[pred].loop_header = 0; /* support for merged loops */ - ir_worklist_push(&work, pred); - } - p++; - } while (--j); - IR_ASSERT(ir_worklist_len(&work) != 0); - - /* collect members of the irreducible loop */ - while (ir_worklist_len(&work)) { - b = ir_worklist_pop(&work); - if (b != hdr) { - ir_block *bb = &blocks[b]; - bb->loop_header = hdr; - if (bb->predecessors_count) { - uint32_t *p = &edges[bb->predecessors]; - uint32_t n = bb->predecessors_count; - do { - uint32_t pred = *p; - while (blocks[pred].loop_header > 0) { - pred = blocks[pred].loop_header; - } - if (pred != hdr) { - if (entry_times[pred] > entry_times[hdr] && exit_times[pred] < exit_times[hdr]) { - /* "pred" is a descendant of "hdr" */ - ir_worklist_push(&work, pred); - } else { - /* another entry to the irreducible loop */ - bb->flags |= IR_BB_IRREDUCIBLE_LOOP; - if (ctx->ir_base[bb->start].op == IR_MERGE) { - ctx->ir_base[bb->start].op = IR_LOOP_BEGIN; - } - } - } - p++; - } while (--n); - } - } - } - } - } while (--prev_irreducible != n); - prev_irreducible = 0; - b = sorted_blocks[n]; - bb = &blocks[b]; - } + i = sorted_blocks[--n]; + ir_block *bb = &blocks[i]; if (bb->predecessors_count > 1) { bool irreducible = 0; @@ -1127,7 +1047,7 @@ next: if (bb->idom != pred) { /* In a loop back-edge (back-join edge), the successor dominates the predecessor. */ - if (ir_dominates(blocks, b, pred)) { + if (ir_dominates(blocks, i, pred)) { if (!ir_worklist_len(&work)) { ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work))); } @@ -1136,9 +1056,8 @@ next: } else { /* Otherwise it's a cross-join edge. See if it's a branch to an ancestor on the DJ spanning tree. */ - if (entry_times[pred] > entry_times[b] && exit_times[pred] < exit_times[b]) { + if (entry_times[pred] > entry_times[i] && exit_times[pred] < exit_times[i]) { irreducible = 1; - break; } } } @@ -1146,55 +1065,46 @@ next: } while (--j); if (UNEXPECTED(irreducible)) { - bb->flags |= IR_BB_LOOP_HEADER | IR_BB_IRREDUCIBLE_LOOP; - ctx->flags2 |= IR_CFG_HAS_LOOPS | IR_IRREDUCIBLE_CFG; - /* Remember the position of the first irreducible loop to process all the irreducible loops - * after the reducible loops with the same dominator tree depth - */ - if (!prev_irreducible) { - prev_irreducible = n; + // TODO: Support for irreducible loops ??? + bb->flags |= IR_BB_IRREDUCIBLE_LOOP; + ctx->flags2 |= IR_IRREDUCIBLE_CFG; + while (ir_worklist_len(&work)) { + ir_worklist_pop(&work); } - ir_list_clear(&work.l); } else if (ir_worklist_len(&work)) { - /* collect members of the reducible loop */ - uint32_t hdr = b; - bb->flags |= IR_BB_LOOP_HEADER; ctx->flags2 |= IR_CFG_HAS_LOOPS; bb->loop_depth = 1; - if (ctx->ir_base[bb->start].op == IR_MERGE) { - ctx->ir_base[bb->start].op = IR_LOOP_BEGIN; - } while (ir_worklist_len(&work)) { - b = ir_worklist_pop(&work); - if (b != hdr) { - ir_block *bb = &blocks[b]; - bb->loop_header = hdr; + j = ir_worklist_pop(&work); + while (blocks[j].loop_header > 0) { + j = blocks[j].loop_header; + } + if (j != i) { + ir_block *bb = &blocks[j]; + if (bb->idom == 0 && j != 1) { + /* Ignore blocks that are unreachable or only abnormally reachable. */ + continue; + } + bb->loop_header = i; if (bb->predecessors_count) { uint32_t *p = &edges[bb->predecessors]; - uint32_t n = bb->predecessors_count; + j = bb->predecessors_count; do { - uint32_t pred = *p; - while (blocks[pred].loop_header > 0) { - pred = blocks[pred].loop_header; - } - if (pred != hdr) { - ir_worklist_push(&work, pred); - } + ir_worklist_push(&work, *p); p++; - } while (--n); + } while (--j); } } } } } } - IR_ASSERT(!prev_irreducible); if (ctx->flags2 & IR_CFG_HAS_LOOPS) { for (n = 1; n < count; n++) { - b = sorted_blocks[n]; - ir_block *bb = &blocks[b]; + i = sorted_blocks[n]; + ir_block *bb = &blocks[i]; if (bb->loop_header > 0) { ir_block *loop = &blocks[bb->loop_header]; uint32_t loop_depth = loop->loop_depth; @@ -1479,7 +1389,7 @@ restart: goto restart; } } else if (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b) { - /* not a loop back-edge */ + ir_dump_cfg(ctx, stderr); IR_ASSERT(b == predecessor || ctx->cfg_blocks[predecessor].loop_header == b); } } diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c index a791baef5db..f12b4776fa1 100644 --- a/ext/opcache/jit/ir/ir_check.c +++ b/ext/opcache/jit/ir/ir_check.c @@ -213,16 +213,11 @@ bool ir_check(const ir_ctx *ctx) ok = 0; } } - if ((ctx->flags2 & IR_LINEAR) - && use >= i - && !(insn->op == IR_LOOP_BEGIN)) { - fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); - ok = 0; - } break; case IR_OPND_CONTROL_DEP: if ((ctx->flags2 & IR_LINEAR) - && use >= i) { + && use >= i + && !(insn->op == IR_LOOP_BEGIN)) { fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); ok = 0; } else if (insn->op == IR_PHI) { diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c index fab9f56228d..c82655daf48 100644 --- a/ext/opcache/jit/ir/ir_emit.c +++ b/ext/opcache/jit/ir/ir_emit.c @@ -309,7 +309,7 @@ static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn) { const char *name = ir_get_str(ctx, addr_insn->val.name); void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? - ctx->loader->resolve_sym_name(ctx->loader, name, IR_RESOLVE_SYM_SILENT) : + ctx->loader->resolve_sym_name(ctx->loader, name, 0) : ir_resolve_sym_name(name); return addr; @@ -320,7 +320,7 @@ static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn) { const char *name = ir_get_str(ctx, addr_insn->val.name); void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? - ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC ? IR_RESOLVE_SYM_ADD_THUNK : 0) : + ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC) : ir_resolve_sym_name(name); IR_ASSERT(addr); diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index 90112214d0c..88539e52ab0 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -1909,9 +1909,7 @@ IR_FOLD(SUB(_, SUB)) IR_FOLD(SUB(ADD, ADD)) { if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { - if (op1 == op2) { - IR_FOLD_CONST_U(0); - } else if (op1_insn->op1 == op2_insn->op1) { + if (op1_insn->op1 == op2_insn->op1) { /* (a + b) - (a + c) => b - c */ op1 = op1_insn->op2; op2 = op2_insn->op2; diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 0d8a6c2d760..8bd6be5d10a 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -785,139 +785,6 @@ IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref) return 0; } -IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start) -{ - ir_insn *insn = &ctx->ir_base[start]; - uint32_t n = insn->inputs_count; - ir_ref *p = insn->ops + 1; - - if (n == 1) { - return *p < start; - } else { - IR_ASSERT(n > 1); - for (; n > 0; p++, n--) { - ir_ref input = *p; - if (input < start) { - /* ordered */ - } else if ((bb->flags & IR_BB_LOOP_HEADER) - && (ctx->cfg_map[input] == b || ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == b)) { - /* back-edge of reducible loop */ - } else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) - && (ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == ctx->cfg_blocks[b].loop_header)) { - /* closing edge of irreducible loop */ - } else { - return 0; - } - } - return 1; - } -} - -static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *_next) -{ - uint32_t b, succ, count, *q, *xlat; - ir_block *bb; - ir_ref ref, n, prev; - ir_worklist worklist; - ir_block *new_blocks; - -#if 0 - for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { - if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) { - goto fix; - } - } - return; - -fix: -#endif - count = ctx->cfg_blocks_count + 1; - new_blocks = ir_mem_malloc(count * sizeof(ir_block)); - xlat = ir_mem_malloc(count * sizeof(uint32_t)); - ir_worklist_init(&worklist, count); - ir_worklist_push(&worklist, 1); - while (ir_worklist_len(&worklist) != 0) { -next: - b = ir_worklist_peek(&worklist); - bb = &ctx->cfg_blocks[b]; - n = bb->successors_count; - if (n == 1) { - succ = ctx->cfg_edges[bb->successors]; - if (ir_worklist_push(&worklist, succ)) { - goto next; - } - } else if (n > 1) { - uint32_t best = 0; - uint32_t best_loop_depth = 0; - - q = ctx->cfg_edges + bb->successors + n; - do { - q--; - succ = *q; - if (ir_bitset_in(worklist.visited, succ)) { - /* already processed */ - } else if ((ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER) - && (succ == b || ctx->cfg_blocks[b].loop_header == succ)) { - /* back-edge of reducible loop */ - } else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP) - && (ctx->cfg_blocks[succ].loop_header == ctx->cfg_blocks[b].loop_header)) { - /* closing edge of irreducible loop */ - } else if (!best) { - best = succ; - best_loop_depth = ctx->cfg_blocks[best].loop_depth; - } else if (ctx->cfg_blocks[succ].loop_depth < best_loop_depth) { - /* prefer deeper loop */ - best = succ; - best_loop_depth = ctx->cfg_blocks[best].loop_depth; - } - n--; - } while (n > 0); - if (best) { - ir_worklist_push(&worklist, best); - goto next; - } - } - ir_worklist_pop(&worklist); - count--; - new_blocks[count] = *bb; - xlat[b] = count; - } - IR_ASSERT(count == 1); - xlat[0] = 0; - ir_worklist_free(&worklist); - - prev = 0; - for (b = 1, bb = new_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { - bb->idom = xlat[bb->idom]; - bb->loop_header = xlat[bb->loop_header]; - n = bb->successors_count; - if (n > 0) { - for (q = ctx->cfg_edges + bb->successors; n > 0; q++, n--) { - *q = xlat[*q]; - } - } - n = bb->predecessors_count; - if (n > 0) { - for (q = ctx->cfg_edges + bb->predecessors; n > 0; q++, n--) { - *q = xlat[*q]; - } - } - _next[prev] = bb->start; - _prev[bb->start] = prev; - prev = bb->end; - } - _next[0] = 0; - _next[prev] = 0; - - for (ref = 2; ref < ctx->insns_count; ref++) { - ctx->cfg_map[ref] = xlat[ctx->cfg_map[ref]]; - } - ir_mem_free(xlat); - - ir_mem_free(ctx->cfg_blocks); - ctx->cfg_blocks = new_blocks; -} - int ir_schedule(ir_ctx *ctx) { ir_ctx new_ctx; @@ -933,7 +800,6 @@ int ir_schedule(ir_ctx *ctx) ir_block *bb; ir_insn *insn, *new_insn; ir_use_list *lists, *use_list, *new_list; - bool bad_bb_order = 0; /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */ IR_ASSERT(_blocks[1] == 1); @@ -952,50 +818,27 @@ int ir_schedule(ir_ctx *ctx) } else if (b > prev_b) { bb = &ctx->cfg_blocks[b]; if (i == bb->start) { - if (bb->end > bb->start) { - prev_b = b; - prev_b_end = bb->end; - /* add to the end of the list */ - _next[j] = i; - _prev[i] = j; - j = i; - } else { - prev_b = 0; - prev_b_end = 0; - k = bb->end; - while (_blocks[_prev[k]] == b) { - k = _prev[k]; - } - /* insert before "k" */ - _prev[i] = _prev[k]; - _next[i] = k; - _next[_prev[k]] = i; - _prev[k] = i; - } - if (!ir_is_good_bb_order(ctx, b, bb, i)) { - bad_bb_order = 1; - } - } else if (i != bb->end) { - /* move down late (see the following loop) */ - _next[i] = _move_down; - _move_down = i; - } else { - IR_ASSERT(bb->start > bb->end); - prev_b = 0; - prev_b_end = 0; + IR_ASSERT(bb->end > bb->start); + prev_b = b; + prev_b_end = bb->end; + _prev[bb->end] = 0; /* add to the end of the list */ _next[j] = i; _prev[i] = j; j = i; + } else { + IR_ASSERT(i != bb->end); + /* move down late (see the following loop) */ + _next[i] = _move_down; + _move_down = i; } } else if (b) { bb = &ctx->cfg_blocks[b]; IR_ASSERT(i != bb->start); - if (i > bb->end) { + if (_prev[bb->end]) { /* move up, insert before the end of the already scheduled BB */ k = bb->end; } else { - IR_ASSERT(i > bb->start); /* move up, insert at the end of the block */ k = ctx->cfg_blocks[b + 1].start; } @@ -1040,10 +883,6 @@ int ir_schedule(ir_ctx *ctx) } #endif - if (bad_bb_order) { - ir_fix_bb_order(ctx, _prev, _next); - } - _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); _xlat += ctx->consts_count; _xlat[IR_TRUE] = IR_TRUE; diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index ac952e402f5..69a0101d24e 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -62,7 +62,7 @@ #define IR_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define IR_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define IR_IS_POWER_OF_TWO(x) ((x) && (!((x) & ((x) - 1)))) +#define IR_IS_POWER_OF_TWO(x) (!((x) & ((x) - 1))) #define IR_LOG2(x) ir_ntzl(x) diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c index 595f2d9d6a2..b12cc267af6 100644 --- a/ext/opcache/jit/ir/ir_save.c +++ b/ext/opcache/jit/ir/ir_save.c @@ -140,9 +140,6 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f) fprintf(f, ", loop=BB%d(%d)", bb->loop_header, bb->loop_depth); } } - if (bb->flags & IR_BB_IRREDUCIBLE_LOOP) { - fprintf(f, ", IRREDUCIBLE"); - } if (bb->predecessors_count) { uint32_t i; diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 221a86a5ad8..2e006516df8 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -1732,20 +1732,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, ir_ref *p, n, input; if (IR_IS_CONST_REF(ref)) { - ir_val val; - - switch (type) { - case IR_I8: val.i64 = insn->val.i8; break; - case IR_U8: val.u64 = insn->val.u8; break; - case IR_I16: val.i64 = insn->val.i16; break; - case IR_U16: val.u64 = insn->val.u16; break; - case IR_I32: val.i64 = insn->val.i32; break; - case IR_U32: val.u64 = insn->val.u32; break; - case IR_CHAR:val.i64 = insn->val.i8; break; - case IR_BOOL:val.u64 = insn->val.u8 != 0; break; - default: IR_ASSERT(0); val.u64 = 0; - } - return ir_const(ctx, val, type); + return ir_const(ctx, insn->val, type); } else { ir_bitqueue_add(worklist, ref); switch (insn->op) { diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index d56cb8645e1..76602c2b4bc 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -6868,24 +6868,7 @@ static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; - if (IR_IS_CONST_REF(insn->op2)) { - ir_insn *value = &ctx->ir_base[insn->op2]; - - if ((type == IR_FLOAT && value->val.f == 0.0) || (type == IR_DOUBLE && value->val.d == 0.0)) { - | fldz - } else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) { - | fld1 - } else { - int label = ir_const_label(ctx, insn->op2); - - if (type == IR_DOUBLE) { - | fld qword [=>label] - } else { - IR_ASSERT(type == IR_FLOAT); - | fld dword [=>label] - } - } - } else if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { + if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { ir_reg fp; int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp); @@ -8459,15 +8442,11 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; - ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg def_reg = ctx->regs[def][0]; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; - if (ctx->use_lists[def].count == 1) { - /* dead load */ - return; - } IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { @@ -8492,15 +8471,11 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; - ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg def_reg = ctx->regs[def][0]; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; - if (ctx->use_lists[def].count == 1) { - /* dead load */ - return; - } IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { @@ -9246,58 +9221,6 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) return; } - /* Move op2 to a tmp register before epilogue if it's in - * used_preserved_regs, because it will be overridden. */ - - ir_reg op2_reg = IR_REG_NONE; - ir_mem mem = IR_MEM_B(IR_REG_NONE); - if (!IR_IS_CONST_REF(insn->op2)) { - op2_reg = ctx->regs[def][2]; - - ir_regset preserved_regs = (ir_regset)ctx->used_preserved_regs | IR_REGSET(IR_REG_STACK_POINTER); - if (ctx->flags & IR_USE_FRAME_POINTER) { - preserved_regs |= IR_REGSET(IR_REG_FRAME_POINTER); - } - - bool is_spill_slot = op2_reg != IR_REG_NONE - && IR_REG_SPILLED(op2_reg) - && ctx->vregs[insn->op2]; - - if (op2_reg != IR_REG_NONE && !is_spill_slot) { - if (IR_REGSET_IN(preserved_regs, IR_REG_NUM(op2_reg))) { - ir_ref orig_op2_reg = op2_reg; - op2_reg = IR_REG_RAX; - - if (IR_REG_SPILLED(orig_op2_reg)) { - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } else { - ir_type type = ctx->ir_base[insn->op2].type; - | ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg) - } - } else { - op2_reg = IR_REG_NUM(op2_reg); - } - } else { - if (ir_rule(ctx, insn->op2) & IR_FUSED) { - IR_ASSERT(op2_reg == IR_REG_NONE); - mem = ir_fuse_load(ctx, def, insn->op2); - } else { - mem = ir_ref_spill_slot(ctx, insn->op2); - } - ir_reg base = IR_MEM_BASE(mem); - ir_reg index = IR_MEM_INDEX(mem); - if ((base != IR_REG_NONE && IR_REGSET_IN(preserved_regs, base)) || - (index != IR_REG_NONE && IR_REGSET_IN(preserved_regs, index))) { - op2_reg = IR_REG_RAX; - - ir_type type = ctx->ir_base[insn->op2].type; - ir_emit_load_mem_int(ctx, type, op2_reg, mem); - } else { - op2_reg = IR_REG_NONE; - } - } - } - ir_emit_epilogue(ctx); if (IR_IS_CONST_REF(insn->op2)) { @@ -9323,10 +9246,22 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.endif } } else { + ir_reg op2_reg = ctx->regs[def][2]; + if (op2_reg != IR_REG_NONE) { - IR_ASSERT(!IR_REGSET_IN((ir_regset)ctx->used_preserved_regs, op2_reg)); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } | jmp Ra(op2_reg) } else { + ir_mem mem; + + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + mem = ir_fuse_load(ctx, def, insn->op2); + } else { + mem = ir_ref_spill_slot(ctx, insn->op2); + } | ASM_TMEM_OP jmp, aword, mem } }