diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index 9b14217bb22..3adeb5ab601 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -1237,6 +1237,92 @@ void ir_build_def_use_lists(ir_ctx *ctx) } #endif +void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref ref) +{ + ir_ref j, n, *p, *q, use; + ir_use_list *use_list = &ctx->use_lists[from]; + ir_ref skip = 0; + + n = use_list->count; + for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (use == ref) { + skip++; + } else { + if (p != q) { + *q = use; + } + q++; + } + } + if (skip) { + use_list->count -= skip; + do { + *q = IR_UNUSED; + q++; + } while (--skip); + } +} + +void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref) +{ + ir_ref j, n, *p; + ir_use_list *use_list = &ctx->use_lists[from]; + + n = use_list->count; + j = 0; + p = &ctx->use_edges[use_list->refs]; + while (j < n) { + if (*p == ref) { + use_list->count--; + j++; + while (j < n) { + *p = *(p+1); + p++; + j++; + } + *p = IR_UNUSED; + break; + } + j++; + } +} + +void ir_use_list_replace(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use) +{ + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_ref i, n, *p; + + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + if (*p == use) { + *p = new_use; + break; + } + } +} + +bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref ref) +{ + ir_use_list *use_list = &ctx->use_lists[to]; + ir_ref n = use_list->refs + use_list->count; + + if (n < ctx->use_edges_count && ctx->use_edges[n] == IR_UNUSED) { + ctx->use_edges[n] = ref; + use_list->count++; + return 0; + } else { + /* Reallocate the whole edges buffer (this is inefficient) */ + ctx->use_edges = ir_mem_realloc(ctx->use_edges, (ctx->use_edges_count + use_list->count + 1) * sizeof(ir_ref)); + memcpy(ctx->use_edges + ctx->use_edges_count, ctx->use_edges + use_list->refs, use_list->count * sizeof(ir_ref)); + use_list->refs = ctx->use_edges_count; + ctx->use_edges[use_list->refs + use_list->count] = ref; + use_list->count++; + ctx->use_edges_count += use_list->count; + return 1; + } +} + /* Helper Data Types */ void ir_array_grow(ir_array *a, uint32_t size) { diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 5f309f87d3f..2c82712a9b2 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -5690,7 +5690,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) while (i <= bb->end) { if (!((*rule) & (IR_FUSED|IR_SKIPPED))) - switch (*rule) { + switch ((*rule) & IR_RULE_MASK) { case IR_VAR: case IR_PARAM: case IR_PI: diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index 8890dfc82c6..0343c8932aa 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -8,6 +8,48 @@ #include "ir.h" #include "ir_private.h" +#define MAKE_NOP(_insn) do { \ + ir_insn *__insn = _insn; \ + __insn->optx = IR_NOP; \ + __insn->op1 = __insn->op2 = __insn->op3 = IR_UNUSED; \ + } while (0) + +#define CLEAR_USES(_ref) do { \ + ir_use_list *__use_list = &ctx->use_lists[_ref]; \ + __use_list->count = 0; \ + __use_list->refs = 0; \ + } while (0) + +#define SWAP_REFS(_ref1, _ref2) do { \ + ir_ref _tmp = _ref1; \ + _ref1 = _ref2; \ + _ref2 = _tmp; \ + } while (0) + +#define SWAP_INSNS(_insn1, _insn2) do { \ + ir_insn *_tmp = _insn1; \ + _insn1 = _insn2; \ + _insn2 = _tmp; \ + } while (0) + +static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref) +{ + ir_use_list *use_list = &ctx->use_lists[if_ref]; + ir_ref *p = &ctx->use_edges[use_list->refs]; + + IR_ASSERT(use_list->count == 2); + if (ctx->ir_base[*p].op == IR_IF_TRUE) { + IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_FALSE); + *if_true_ref = *p; + *if_false_ref = *(p + 1); + } else { + IR_ASSERT(ctx->ir_base[*p].op == IR_IF_FALSE); + IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_TRUE); + *if_false_ref = *p; + *if_true_ref = *(p + 1); + } +} + static ir_ref _ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin) { ir_ref prev, next; @@ -46,6 +88,726 @@ static ir_ref _ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin) return next; } +static ir_ref ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + if (insn->inputs_count == 2) { + ir_ref end1_ref = insn->op1, end2_ref = insn->op2; + ir_insn *end1 = &ctx->ir_base[end1_ref]; + ir_insn *end2 = &ctx->ir_base[end2_ref]; + + if (end1->op != IR_END || end2->op != IR_END) { + return IR_UNUSED; + } + + ir_ref start1_ref = end1->op1, start2_ref = end2->op1; + ir_insn *start1 = &ctx->ir_base[start1_ref]; + ir_insn *start2 = &ctx->ir_base[start2_ref]; + + if (start1->op1 != start2->op1) { + return IR_UNUSED; + } + + ir_ref root_ref = start1->op1; + ir_insn *root = &ctx->ir_base[root_ref]; + + if (root->op != IR_IF + && !(root->op == IR_SWITCH && ctx->use_lists[root_ref].count == 2)) { + return IR_UNUSED; + } + + /* Empty Diamond + * + * prev prev + * | condition | condition + * | / | + * IF | + * | \ | + * | +-----+ | + * | IF_FALSE | + * IF_TRUE | => | + * | END | + * END / | + * | +---+ | + * | / | + * MERGE | + * | | + * next next + */ + + ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs]; + ir_insn *next = &ctx->ir_base[next_ref]; + + IR_ASSERT(ctx->use_lists[start1_ref].count == 1); + IR_ASSERT(ctx->use_lists[start2_ref].count == 1); + + next->op1 = root->op1; + ir_use_list_replace(ctx, root->op1, root_ref, next_ref); + if (!IR_IS_CONST_REF(root->op2)) { + ir_use_list_remove_all(ctx, root->op2, root_ref); + } + + MAKE_NOP(root); CLEAR_USES(root_ref); + MAKE_NOP(start1); CLEAR_USES(start1_ref); + MAKE_NOP(start2); CLEAR_USES(start2_ref); + MAKE_NOP(end1); CLEAR_USES(end1_ref); + MAKE_NOP(end2); CLEAR_USES(end2_ref); + MAKE_NOP(insn); CLEAR_USES(ref); + + return next_ref; + } else { + ir_ref i, count = insn->inputs_count, *ops = insn->ops + 1; + ir_ref root_ref = IR_UNUSED; + + for (i = 0; i < count; i++) { + ir_ref end_ref, start_ref; + ir_insn *end, *start; + + end_ref = ops[i]; + end = &ctx->ir_base[end_ref]; + if (end->op != IR_END) { + return IR_UNUSED; + } + start_ref = end->op1; + start = &ctx->ir_base[start_ref]; + if (start->op != IR_CASE_VAL && start->op != IR_CASE_DEFAULT) { + return IR_UNUSED; + } + IR_ASSERT(ctx->use_lists[start_ref].count == 1); + if (!root_ref) { + root_ref = start->op1; + if (ctx->use_lists[root_ref].count != count) { + return IR_UNUSED; + } + } else if (start->op1 != root_ref) { + return IR_UNUSED; + } + } + + /* Empty N-Diamond */ + ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs]; + ir_insn *next = &ctx->ir_base[next_ref]; + ir_insn *root = &ctx->ir_base[root_ref]; + + next->op1 = root->op1; + ir_use_list_replace(ctx, root->op1, root_ref, next_ref); + ir_use_list_remove_all(ctx, root->op2, root_ref); + + MAKE_NOP(root); CLEAR_USES(root_ref); + + for (i = 0; i < count; i++) { + ir_ref end_ref = ops[i]; + ir_insn *end = &ctx->ir_base[end_ref]; + ir_ref start_ref = end->op1; + ir_insn *start = &ctx->ir_base[start_ref]; + + MAKE_NOP(start); CLEAR_USES(start_ref); + MAKE_NOP(end); CLEAR_USES(end_ref); + } + + MAKE_NOP(insn); CLEAR_USES(ref); + + return next_ref; + } +} + +static ir_ref ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn) +{ + IR_ASSERT(insn->inputs_count == 3); + IR_ASSERT(ctx->use_lists[merge_ref].count == 2); + + ir_ref end1_ref = merge->op1, end2_ref = merge->op2; + ir_insn *end1 = &ctx->ir_base[end1_ref]; + ir_insn *end2 = &ctx->ir_base[end2_ref]; + + if (end1->op == IR_END && end2->op == IR_END) { + ir_ref start1_ref = end1->op1, start2_ref = end2->op1; + ir_insn *start1 = &ctx->ir_base[start1_ref]; + ir_insn *start2 = &ctx->ir_base[start2_ref]; + + if (start1->op1 == start2->op1) { + ir_ref root_ref = start1->op1; + ir_insn *root = &ctx->ir_base[root_ref]; + + if (root->op == IR_IF && ctx->use_lists[root->op2].count == 1) { + ir_ref cond_ref = root->op2; + ir_insn *cond = &ctx->ir_base[cond_ref]; + ir_type type = insn->type; + bool is_cmp, is_less; + + if (IR_IS_TYPE_FP(type)) { + is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE || + cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE); + } else if (IR_IS_TYPE_SIGNED(type)) { + is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE); + } else if (IR_IS_TYPE_UNSIGNED(type)) { + is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE); + } + + if (is_cmp + && ((insn->op2 == cond->op1 && insn->op3 == cond->op2) + || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) { + /* MAX/MIN + * + * prev prev + * | LT(A, B) | + * | / | + * IF | + * | \ | + * | +-----+ | + * | IF_FALSE | + * IF_TRUE | => | + * | END | + * END / | + * | +---+ | + * | / | + * MERGE | + * | \ | + * | PHI(A, B) | MIN(A, B) + * next next + */ + ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs]; + ir_insn *next; + + if (next_ref == ref) { + next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1]; + } + next = &ctx->ir_base[next_ref]; + + IR_ASSERT(ctx->use_lists[start1_ref].count == 1); + IR_ASSERT(ctx->use_lists[start2_ref].count == 1); + + if (IR_IS_TYPE_FP(type)) { + is_less = (cond->op == IR_LT || cond->op == IR_LE || + cond->op == IR_ULT || cond->op == IR_ULE); + } else if (IR_IS_TYPE_SIGNED(type)) { + is_less = (cond->op == IR_LT || cond->op == IR_LE); + } else if (IR_IS_TYPE_UNSIGNED(type)) { + is_less = (cond->op == IR_ULT || cond->op == IR_ULE); + } + insn->op = ( + (is_less ? cond->op1 : cond->op2) + == + ((start1->op == IR_IF_TRUE) ? insn->op2 : insn->op3) + ) ? IR_MIN : IR_MAX; + insn->inputs_count = 2; + if (insn->op2 > insn->op3) { + insn->op1 = insn->op2; + insn->op2 = insn->op3; + } else { + insn->op1 = insn->op3; + } + insn->op3 = IR_UNUSED; + + next->op1 = root->op1; + ir_use_list_replace(ctx, root->op1, root_ref, next_ref); + ir_use_list_remove_all(ctx, root->op2, root_ref); + if (!IR_IS_CONST_REF(insn->op1)) { + ir_use_list_remove_all(ctx, insn->op1, cond_ref); + } + if (!IR_IS_CONST_REF(insn->op2)) { + ir_use_list_remove_all(ctx, insn->op2, cond_ref); + } + + MAKE_NOP(cond); CLEAR_USES(cond_ref); + MAKE_NOP(root); CLEAR_USES(root_ref); + MAKE_NOP(start1); CLEAR_USES(start1_ref); + MAKE_NOP(start2); CLEAR_USES(start2_ref); + MAKE_NOP(end1); CLEAR_USES(end1_ref); + MAKE_NOP(end2); CLEAR_USES(end2_ref); + MAKE_NOP(merge); CLEAR_USES(merge_ref); + + return next_ref; +#if 0 + } else { + /* COND + * + * prev prev + * | cond | + * | / | + * IF | + * | \ | + * | +-----+ | + * | IF_FALSE | + * IF_TRUE | => | + * | END | + * END / | + * | +---+ | + * | / | + * MERGE | + * | \ | + * | PHI(A, B) | COND(cond, A, B) + * next next + */ + ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs]; + ir_insn *next; + + if (next_ref == ref) { + next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1]; + } + next = &ctx->ir_base[next_ref]; + + IR_ASSERT(ctx->use_lists[start1_ref].count == 1); + IR_ASSERT(ctx->use_lists[start2_ref].count == 1); + + insn->op = IR_COND; + insn->inputs_count = 3; + insn->op1 = cond_ref; + if (start1->op == IR_IF_FALSE) { + SWAP_REFS(insn->op2, insn->op3); + } + + next->op1 = root->op1; + ir_use_list_replace(ctx, cond_ref, root_ref, ref); + ir_use_list_replace(ctx, root->op1, root_ref, next_ref); + ir_use_list_remove_all(ctx, root->op2, root_ref); + + MAKE_NOP(root); CLEAR_USES(root_ref); + MAKE_NOP(start1); CLEAR_USES(start1_ref); + MAKE_NOP(start2); CLEAR_USES(start2_ref); + MAKE_NOP(end1); CLEAR_USES(end1_ref); + MAKE_NOP(end2); CLEAR_USES(end2_ref); + MAKE_NOP(merge); CLEAR_USES(merge_ref); + + return next_ref; +#endif + } + } + } + } + + return IR_UNUSED; +} + +static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2) +{ + IR_ASSERT(op1->type == op2->type); + if (IR_IS_TYPE_INT(op1->type)) { + if (op == IR_EQ) { + return op1->val.u64 == op2->val.u64; + } else if (op == IR_NE) { + return op1->val.u64 != op2->val.u64; + } else if (op == IR_LT) { + if (IR_IS_TYPE_SIGNED(op1->type)) { + return op1->val.i64 < op2->val.i64; + } else { + return op1->val.u64 < op2->val.u64; + } + } else if (op == IR_GE) { + if (IR_IS_TYPE_SIGNED(op1->type)) { + return op1->val.i64 >= op2->val.i64; + } else { + return op1->val.u64 >= op2->val.u64; + } + } else if (op == IR_LE) { + if (IR_IS_TYPE_SIGNED(op1->type)) { + return op1->val.i64 <= op2->val.i64; + } else { + return op1->val.u64 <= op2->val.u64; + } + } else if (op == IR_GT) { + if (IR_IS_TYPE_SIGNED(op1->type)) { + return op1->val.i64 > op2->val.i64; + } else { + return op1->val.u64 > op2->val.u64; + } + } else if (op == IR_ULT) { + return op1->val.u64 < op2->val.u64; + } else if (op == IR_UGE) { + return op1->val.u64 >= op2->val.u64; + } else if (op == IR_ULE) { + return op1->val.u64 <= op2->val.u64; + } else if (op == IR_UGT) { + return op1->val.u64 > op2->val.u64; + } else { + IR_ASSERT(0); + return 0; + } + } else if (op1->type == IR_DOUBLE) { + if (op == IR_EQ) { + return op1->val.d == op2->val.d; + } else if (op == IR_NE) { + return op1->val.d != op2->val.d; + } else if (op == IR_LT) { + return op1->val.d < op2->val.d; + } else if (op == IR_GE) { + return op1->val.d >= op2->val.d; + } else if (op == IR_LE) { + return op1->val.d <= op2->val.d; + } else if (op == IR_GT) { + return op1->val.d > op2->val.d; + } else if (op == IR_ULT) { + return !(op1->val.d >= op2->val.d); + } else if (op == IR_UGE) { + return !(op1->val.d < op2->val.d); + } else if (op == IR_ULE) { + return !(op1->val.d > op2->val.d); + } else if (op == IR_UGT) { + return !(op1->val.d <= op2->val.d); + } else { + IR_ASSERT(0); + return 0; + } + } else { + IR_ASSERT(op1->type == IR_FLOAT); + if (op == IR_EQ) { + return op1->val.f == op2->val.f; + } else if (op == IR_NE) { + return op1->val.f != op2->val.f; + } else if (op == IR_LT) { + return op1->val.f < op2->val.f; + } else if (op == IR_GE) { + return op1->val.f >= op2->val.f; + } else if (op == IR_LE) { + return op1->val.f <= op2->val.f; + } else if (op == IR_GT) { + return op1->val.f > op2->val.f; + } else if (op == IR_ULT) { + return !(op1->val.f >= op2->val.f); + } else if (op == IR_UGE) { + return !(op1->val.f < op2->val.f); + } else if (op == IR_ULE) { + return !(op1->val.f > op2->val.f); + } else if (op == IR_UGT) { + return !(op1->val.f <= op2->val.f); + } else { + IR_ASSERT(0); + return 0; + } + } +} + +static ir_ref ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_ref cond_ref = insn->op2; + ir_insn *cond = &ctx->ir_base[cond_ref]; + + if (cond->op == IR_PHI + && cond->inputs_count == 3 + && cond->op1 == insn->op1 + && ((IR_IS_CONST_REF(cond->op2) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) + || (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)))) { + ir_ref merge_ref = insn->op1; + ir_insn *merge = &ctx->ir_base[merge_ref]; + + if (ctx->use_lists[merge_ref].count == 2) { + ir_ref end1_ref = merge->op1, end2_ref = merge->op2; + ir_insn *end1 = &ctx->ir_base[end1_ref]; + ir_insn *end2 = &ctx->ir_base[end2_ref]; + + if (end1->op == IR_END && end2->op == IR_END) { + ir_ref if_true_ref, if_false_ref; + ir_insn *if_true, *if_false; + ir_op op = IR_IF_FALSE; + + ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref); + + if (!IR_IS_CONST_REF(cond->op2) || IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) { + IR_ASSERT(IR_IS_CONST_REF(cond->op3)); + SWAP_REFS(cond->op2, cond->op3); + SWAP_REFS(merge->op1, merge->op2); + SWAP_REFS(end1_ref, end2_ref); + SWAP_INSNS(end1, end2); + } + if (ir_const_is_true(&ctx->ir_base[cond->op2])) { + SWAP_REFS(if_true_ref, if_false_ref); + op = IR_IF_TRUE; + } + if_true = &ctx->ir_base[if_true_ref]; + if_false = &ctx->ir_base[if_false_ref]; + + /* Simple IF Split + * + * | | | | + * | END | IF(X) + * END / END / \ + * | +---+ | +--+ + + * | / | / | + * MERGE | IF_FALSE | + * | \ | | | + * | PHI(false, X) | | | + * | / | | | + * IF => | END | + * | \ | | | + * | +------+ | | | + * | IF_TRUE | | IF_TRUE + * IF_FALSE | MERGE + * | | + */ + ir_use_list_remove_all(ctx, merge_ref, cond_ref); + ir_use_list_remove_all(ctx, ref, if_true_ref); + ir_use_list_replace(ctx, cond->op3, cond_ref, end2_ref); + ir_use_list_replace(ctx, end1_ref, merge_ref, if_false_ref); + ir_use_list_add(ctx, end2_ref, if_true_ref); + + end2->optx = IR_OPTX(IR_IF, IR_VOID, 2); + end2->op2 = cond->op3; + + merge->optx = IR_OPTX(op, IR_VOID, 1); + merge->op1 = end2_ref; + merge->op2 = IR_UNUSED; + + MAKE_NOP(cond); + CLEAR_USES(cond_ref); + + insn->optx = IR_OPTX(IR_END, IR_VOID, 1); + insn->op1 = merge_ref; + insn->op2 = IR_UNUSED; + + if_true->op1 = end2_ref; + + if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); + if_false->op1 = end1_ref; + if_false->op2 = ref; + + return ref; + } + } + } + + return IR_UNUSED; +} + +static ir_ref ir_try_split_if_cmp(ir_ctx *ctx, ir_worklist *worklist, ir_ref ref, ir_insn *insn) +{ + ir_ref cond_ref = insn->op2; + ir_insn *cond = &ctx->ir_base[cond_ref]; + + if (cond->op >= IR_EQ && cond->op <= IR_UGT + && IR_IS_CONST_REF(cond->op2) + && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op) + && ctx->use_lists[insn->op2].count == 1) { + ir_ref phi_ref = cond->op1; + ir_insn *phi = &ctx->ir_base[phi_ref]; + + if (phi->op == IR_PHI + && phi->inputs_count == 3 + && phi->op1 == insn->op1 + && ctx->use_lists[phi_ref].count == 1 + && ((IR_IS_CONST_REF(phi->op2) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) + || (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)))) { + ir_ref merge_ref = insn->op1; + ir_insn *merge = &ctx->ir_base[merge_ref]; + + if (ctx->use_lists[merge_ref].count == 2) { + ir_ref end1_ref = merge->op1, end2_ref = merge->op2; + ir_insn *end1 = &ctx->ir_base[end1_ref]; + ir_insn *end2 = &ctx->ir_base[end2_ref]; + + if (end1->op == IR_END && end2->op == IR_END) { + ir_ref if_true_ref, if_false_ref; + ir_insn *if_true, *if_false; + ir_op op = IR_IF_FALSE; + + ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref); + + if (!IR_IS_CONST_REF(phi->op2) || IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) { + IR_ASSERT(IR_IS_CONST_REF(phi->op3)); + SWAP_REFS(phi->op2, phi->op3); + SWAP_REFS(merge->op1, merge->op2); + SWAP_REFS(end1_ref, end2_ref); + SWAP_INSNS(end1, end2); + } + if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op2], &ctx->ir_base[cond->op2])) { + SWAP_REFS(if_true_ref, if_false_ref); + op = IR_IF_TRUE; + } + if_true = &ctx->ir_base[if_true_ref]; + if_false = &ctx->ir_base[if_false_ref]; + + if (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)) { + if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op3], &ctx->ir_base[cond->op2]) ^ (op == IR_IF_TRUE)) { + /* IF Split + * + * | | | | + * | END | END + * END / END | + * | +---+ | | + * | / | | + * MERGE | | + * | \ | | + * | PHI(C1, X) | | + * | | | | + * | CMP(_, C2) | | + * | / | | + * IF => | | + * | \ | | + * | +------+ | | + * | IF_TRUE | BEGIN + * IF_FALSE | BEGIN | + * | | + */ + + ir_use_list_replace(ctx, end1_ref, merge_ref, if_false_ref); + ir_use_list_replace(ctx, end2_ref, merge_ref, if_true_ref); + + MAKE_NOP(merge); CLEAR_USES(merge_ref); + MAKE_NOP(phi); CLEAR_USES(phi_ref); + MAKE_NOP(cond); CLEAR_USES(cond_ref); + MAKE_NOP(insn); CLEAR_USES(ref); + + if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); + if_false->op1 = end1_ref; + + if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); + if_true->op1 = end2_ref; + + ir_worklist_push(worklist, end1_ref); + ir_worklist_push(worklist, end2_ref); + + return IR_NULL; + } else { + /* IF Split + * + * | | | | + * | END | END + * END / END | + * | +---+ | | + * | / | | + * MERGE | | + * | \ | | + * | PHI(C1, X) | | + * | | | + + * | CMP(_, C2) | / + * | / | / + * IF => | / + * | \ | / + * | +------+ | / + * | IF_TRUE | / BEGIN(unreachable) + * IF_FALSE | MERGE | + * | | + */ + + ir_use_list_replace(ctx, end1_ref, merge_ref, if_false_ref); + ir_use_list_replace(ctx, end2_ref, merge_ref, if_false_ref); + + MAKE_NOP(merge); CLEAR_USES(merge_ref); + MAKE_NOP(phi); CLEAR_USES(phi_ref); + MAKE_NOP(cond); CLEAR_USES(cond_ref); + MAKE_NOP(insn); CLEAR_USES(ref); + + if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); + if_false->op1 = end1_ref; + if_false->op2 = end2_ref; + + if_true->optx = IR_BEGIN; + if_true->op1 = IR_UNUSED; + + ctx->flags2 &= ~IR_SCCP_DONE; + + ir_worklist_push(worklist, end1_ref); + ir_worklist_push(worklist, end2_ref); + + return IR_NULL; + } + } else { + /* IF Split + * + * | | | | + * | END | IF<----+ + * END / END / \ | + * | +---+ | +--+ + | + * | / | / | | + * MERGE | IF_FALSE | | + * | \ | | | | + * | PHI(C1, X) | | | | + * | | | | | | + * | CMP(_, C2) | | | CMP(X, C2) + * | / | | | + * IF => | END | + * | \ | | | + * | +------+ | | | + * | IF_TRUE | | IF_TRUE + * IF_FALSE | MERGE + * | | + */ + + ir_use_list_remove_all(ctx, merge_ref, phi_ref); + ir_use_list_remove_all(ctx, ref, if_true_ref); + if (!IR_IS_CONST_REF(phi->op3)) { + ir_use_list_replace(ctx, phi->op3, phi_ref, insn->op2); + } + ir_use_list_replace(ctx, end1_ref, merge_ref, if_false_ref); + ir_use_list_replace(ctx, cond_ref, ref, end2_ref); + ir_use_list_add(ctx, end2_ref, if_true_ref); + + end2->optx = IR_OPTX(IR_IF, IR_VOID, 2); + end2->op2 = insn->op2; + + merge->optx = IR_OPTX(op, IR_VOID, 1); + merge->op1 = end2_ref; + merge->op2 = IR_UNUSED; + + cond->op1 = phi->op3; + MAKE_NOP(phi); + CLEAR_USES(phi_ref); + + insn->optx = IR_OPTX(IR_END, IR_VOID, 1); + insn->op1 = merge_ref; + insn->op2 = IR_UNUSED; + + if_true->op1 = end2_ref; + + if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); + if_false->op1 = end1_ref; + if_false->op2 = ref; + + ir_worklist_push(worklist, end1_ref); + + return ref; + } + } + } + } + } + + return IR_UNUSED; +} + +static ir_ref ir_optimize_merge(ir_ctx *ctx, ir_worklist *worklist, ir_ref merge_ref, ir_insn *merge) +{ + ir_use_list *use_list = &ctx->use_lists[merge_ref]; + + if (use_list->count == 1) { + return ir_try_remove_empty_diamond(ctx, merge_ref, merge); + } else if (use_list->count == 2) { + if (merge->inputs_count == 2) { + ir_ref phi_ref = ctx->use_edges[use_list->refs]; + ir_insn *phi = &ctx->ir_base[phi_ref]; + + ir_ref next_ref = ctx->use_edges[use_list->refs + 1]; + ir_insn *next = &ctx->ir_base[next_ref]; + IR_ASSERT(next->op != IR_PHI); + + if (phi->op == IR_PHI) { + if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) { + if (next->op2 == phi_ref) { + ir_ref ref = ir_try_split_if(ctx, next_ref, next); + if (ref) { + return ref; + } + } else { + ir_insn *cmp = &ctx->ir_base[next->op2]; + + if (cmp->op >= IR_EQ && cmp->op <= IR_UGT + && cmp->op1 == phi_ref + && IR_IS_CONST_REF(cmp->op2) + && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op) + && ctx->use_lists[next->op2].count == 1) { + ir_ref ref = ir_try_split_if_cmp(ctx, worklist, next_ref, next); + if (ref) { + return ref; + } + } + } + } + return ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi); + } + } + } + + return IR_UNUSED; +} + IR_ALWAYS_INLINE void _ir_add_successors(const ir_ctx *ctx, ir_ref ref, ir_worklist *worklist) { ir_use_list *use_list = &ctx->use_lists[ref]; @@ -122,10 +884,14 @@ int ir_build_cfg(ir_ctx *ctx) ref = ctx->ir_base[ref].op3; } +next: while (ir_worklist_len(&worklist)) { ref = ir_worklist_pop(&worklist); insn = &ctx->ir_base[ref]; + if (insn->op == IR_NOP) { + continue; + } IR_ASSERT(IR_IS_BB_END(insn->op)); /* Remember BB end */ end = ref; @@ -143,13 +909,24 @@ int ir_build_cfg(ir_ctx *ctx) while (1) { insn = &ctx->ir_base[ref]; if (IR_IS_BB_START(insn->op)) { - if (insn->op == IR_BEGIN - && (ctx->flags & IR_OPT_CFG) - && ctx->ir_base[insn->op1].op == IR_END - && ctx->use_lists[ref].count == 1) { - ref = _ir_merge_blocks(ctx, insn->op1, ref); - ref = ctx->ir_base[ref].op1; - continue; + if (ctx->flags & IR_OPT_CFG) { + if (insn->op == IR_BEGIN) { + if (ctx->ir_base[insn->op1].op == IR_END + && ctx->use_lists[ref].count == 1) { + ref = _ir_merge_blocks(ctx, insn->op1, ref); + ref = ctx->ir_base[ref].op1; + continue; + } + } else if (insn->op == IR_MERGE) { + ir_ref prev = ir_optimize_merge(ctx, &worklist, ref, insn); + if (prev) { + if (prev == IR_NULL) { + goto next; + } + ref = ctx->ir_base[prev].op1; + continue; + } + } } break; } @@ -180,6 +957,9 @@ int ir_build_cfg(ir_ctx *ctx) ref = ir_worklist_pop(&worklist); insn = &ctx->ir_base[ref]; + if (insn->op == IR_NOP) { + continue; + } IR_ASSERT(IR_IS_BB_START(insn->op)); /* Remember BB start */ start = ref; @@ -233,10 +1013,14 @@ next_successor: /* SCCP already removed UNREACHABKE blocks, otherwise all blocks are marked as UNREACHABLE first */ bb_init_falgs = (ctx->flags2 & IR_SCCP_DONE) ? 0 : IR_BB_UNREACHABLE; IR_BITSET_FOREACH(bb_starts, len, start) { + insn = &ctx->ir_base[start]; + if (insn->op == IR_NOP) { + _blocks[start] = 0; + continue; + } end = _blocks[start]; _blocks[start] = b; _blocks[end] = b; - insn = &ctx->ir_base[start]; IR_ASSERT(IR_IS_BB_START(insn->op)); IR_ASSERT(end > start); bb->start = start; @@ -277,6 +1061,7 @@ next_successor: b++; bb++; } IR_BITSET_FOREACH_END(); + bb_count = b - 1; IR_ASSERT(count == edges_count * 2); ir_mem_free(bb_starts); @@ -363,27 +1148,6 @@ static void ir_remove_predecessor(ir_ctx *ctx, ir_block *bb, uint32_t from) bb->predecessors_count = n; } -static void ir_remove_from_use_list(ir_ctx *ctx, ir_ref from, ir_ref ref) -{ - ir_ref j, n, *p, *q, use; - ir_use_list *use_list = &ctx->use_lists[from]; - ir_ref skip = 0; - - n = use_list->count; - for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { - use = *p; - if (use == ref) { - skip++; - } else { - if (p != q) { - *q = use; - } - q++; - } - } - use_list->count -= skip; -} - static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) { ir_ref i, j, n, k, *p, use; @@ -425,13 +1189,13 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) if (ir_bitset_in(life_inputs, j - 1)) { use_insn->op1 = ir_insn_op(use_insn, j); } else if (input > 0) { - ir_remove_from_use_list(ctx, input, use); + ir_use_list_remove_all(ctx, input, use); } } use_insn->op = IR_COPY; use_insn->op2 = IR_UNUSED; use_insn->op3 = IR_UNUSED; - ir_remove_from_use_list(ctx, merge, use); + ir_use_list_remove_all(ctx, merge, use); } } } @@ -456,7 +1220,7 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) } i++; } else if (input > 0) { - ir_remove_from_use_list(ctx, input, use); + ir_use_list_remove_all(ctx, input, use); } } } @@ -464,7 +1228,7 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) } } ir_mem_free(life_inputs); - ir_remove_from_use_list(ctx, from, merge); + ir_use_list_remove_all(ctx, from, merge); } /* CFG constructed after SCCP pass doesn't have unreachable BBs, otherwise they should be removed */ diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c index b4641ab381a..47c2205d0c5 100644 --- a/ext/opcache/jit/ir/ir_dump.c +++ b/ext/opcache/jit/ir/ir_dump.c @@ -607,7 +607,7 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f) } if (ctx->rules) { uint32_t rule = ctx->rules[i]; - uint32_t id = rule & ~(IR_FUSED_REG|IR_FUSED|IR_SKIPPED|IR_SIMPLE); + uint32_t id = rule & IR_RULE_MASK; if (id < IR_LAST_OP) { fprintf(f, " # RULE(%s", ir_op_name[id]); diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index 708d98e2323..f2ab22cb74a 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -288,13 +288,13 @@ IR_FOLD(ADD(C_U16, C_U16)) IR_FOLD(ADD(C_U32, C_U32)) { - IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type || (sizeof(void*) == 4 && IR_OPT_TYPE(opt) == IR_ADDR)); IR_FOLD_CONST_U(op1_insn->val.u32 + op2_insn->val.u32); } IR_FOLD(ADD(C_U64, C_U64)) { - IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type || (sizeof(void*) == 8 && IR_OPT_TYPE(opt) == IR_ADDR)); IR_FOLD_CONST_U(op1_insn->val.u64 + op2_insn->val.u64); } @@ -318,13 +318,13 @@ IR_FOLD(ADD(C_I16, C_I16)) IR_FOLD(ADD(C_I32, C_I32)) { - IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type || (sizeof(void*) == 4 && IR_OPT_TYPE(opt) == IR_ADDR)); IR_FOLD_CONST_I(op1_insn->val.i32 + op2_insn->val.i32); } IR_FOLD(ADD(C_I64, C_I64)) { - IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type || (sizeof(void*) == 8 && IR_OPT_TYPE(opt) == IR_ADDR)); IR_FOLD_CONST_I(op1_insn->val.i64 + op2_insn->val.i64); } @@ -1478,6 +1478,32 @@ IR_FOLD(SUB_OV(_, C_ADDR)) IR_FOLD_NEXT; } +IR_FOLD(ADD(C_U8, _)) +IR_FOLD(ADD(C_U16, _)) +IR_FOLD(ADD(C_U32, _)) +IR_FOLD(ADD(C_U64, _)) +IR_FOLD(ADD(C_I8, _)) +IR_FOLD(ADD(C_I16, _)) +IR_FOLD(ADD(C_I32, _)) +IR_FOLD(ADD(C_I64, _)) +IR_FOLD(ADD(C_ADDR, _)) +IR_FOLD(ADD_OV(C_U8, _)) +IR_FOLD(ADD_OV(C_U16, _)) +IR_FOLD(ADD_OV(C_U32, _)) +IR_FOLD(ADD_OV(C_U64, _)) +IR_FOLD(ADD_OV(C_I8, _)) +IR_FOLD(ADD_OV(C_I16, _)) +IR_FOLD(ADD_OV(C_I32, _)) +IR_FOLD(ADD_OV(C_I64, _)) +IR_FOLD(ADD_OV(C_ADDR, _)) +{ + if (op1_insn->val.u64 == 0) { + /* 0 + a => a */ + IR_FOLD_COPY(op2); + } + IR_FOLD_NEXT; +} + IR_FOLD(SUB(C_I8, _)) IR_FOLD(SUB(C_I16, _)) IR_FOLD(SUB(C_I32, _)) @@ -1628,13 +1654,14 @@ IR_FOLD(MUL(_, C_U8)) IR_FOLD(MUL(_, C_U16)) IR_FOLD(MUL(_, C_U32)) IR_FOLD(MUL(_, C_U64)) +IR_FOLD(MUL(_, C_ADDR)) { if (op2_insn->val.u64 == 0) { /* a * 0 => 0 */ IR_FOLD_COPY(op2); } else if (op2_insn->val.u64 == 1) { IR_FOLD_COPY(op1); - } else if (op2_insn->val.u64 == 2) { + } else if (op2_insn->val.u64 == 2 && IR_OPT_TYPE(opt) != IR_ADDR) { opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); op2 = op1; IR_FOLD_RESTART; @@ -1667,6 +1694,51 @@ IR_FOLD(MUL(_, C_I64)) IR_FOLD_NEXT; } +IR_FOLD(MUL(C_U8, _)) +IR_FOLD(MUL(C_U16, _)) +IR_FOLD(MUL(C_U32, _)) +IR_FOLD(MUL(C_U64, _)) +IR_FOLD(MUL(C_ADDR, _)) +{ + if (op1_insn->val.u64 == 0) { + /* 0 * a => 0 */ + IR_FOLD_COPY(op1); + } else if (op1_insn->val.u64 == 1) { + IR_FOLD_COPY(op2); + } else if (op1_insn->val.u64 == 2 && IR_OPT_TYPE(opt) != IR_ADDR) { + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op1 = op2; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(C_I8, _)) +IR_FOLD(MUL(C_I16, _)) +IR_FOLD(MUL(C_I32, _)) +IR_FOLD(MUL(C_I64, _)) +{ + if (op1_insn->val.i64 == 0) { + /* 0 * a => 0 */ + IR_FOLD_COPY(op1); + } else if (op1_insn->val.i64 == 1) { + /* 1 * a => a */ + IR_FOLD_COPY(op2); + } else if (op1_insn->val.i64 == 2) { + /* 2 * a => a + a */ + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op1 = op2; + IR_FOLD_RESTART; + } else if (op1_insn->val.i64 == -1) { + /* -1 * a => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op1 = op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + IR_FOLD(MUL(_, C_DOUBLE)) { if (op2_insn->val.d == 1.0) { diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index d73bcd07ac9..a0001aed30e 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -921,6 +921,11 @@ struct _ir_use_list { ir_ref count; }; +void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref use); +void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref use); +void ir_use_list_replace(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use); +bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref new_use); + /*** IR Basic Blocks info ***/ #define IR_IS_BB_START(op) \ ((ir_op_flags[op] & IR_OP_FLAG_BB_START) != 0) @@ -1213,6 +1218,7 @@ IR_ALWAYS_INLINE int8_t ir_get_alocated_reg(const ir_ctx *ctx, ir_ref ref, int o #define IR_SKIPPED (1U<<30) /* Insn is skipped (code is not generated) */ #define IR_SIMPLE (1U<<29) /* Insn doesn't have any target constraints */ #define IR_FUSED_REG (1U<<28) /* Register assignemnt may be stored in ctx->fused_regs instead of ctx->regs */ +#define IR_MAY_SWAP (1U<<27) /* Allow swapping operands for better register allocation */ #define IR_RULE_MASK 0xff diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index d94108171da..d8d94316a63 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -1883,25 +1883,27 @@ int ir_coalesce(ir_ctx *ctx) compact = 1; } else { #if 1 - ir_insn *input_insn = &ctx->ir_base[input]; + if (ctx->rules && (ctx->rules[input] & IR_MAY_SWAP)) { + ir_insn *input_insn = &ctx->ir_base[input]; - if ((ir_op_flags[input_insn->op] & IR_OP_FLAG_COMMUTATIVE) - && input_insn->op2 == use - && input_insn->op1 != use - && (ctx->live_intervals[v1]->use_pos->flags & IR_DEF_REUSES_OP1_REG) - && ctx->live_intervals[v2]->end == IR_USE_LIVE_POS_FROM_REF(input)) { - ir_live_range *r = &ctx->live_intervals[v2]->range; + IR_ASSERT(ir_op_flags[input_insn->op] & IR_OP_FLAG_COMMUTATIVE); + if (input_insn->op2 == use + && input_insn->op1 != use + && (ctx->live_intervals[v1]->use_pos->flags & IR_DEF_REUSES_OP1_REG) + && ctx->live_intervals[v2]->end == IR_USE_LIVE_POS_FROM_REF(input)) { + ir_live_range *r = &ctx->live_intervals[v2]->range; - while (r->next) { - r = r->next; + while (r->next) { + r = r->next; + } + r->end = IR_LOAD_LIVE_POS_FROM_REF(input); + ctx->live_intervals[v2]->end = IR_LOAD_LIVE_POS_FROM_REF(input); + ir_swap_operands(ctx, input, input_insn); + IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2)); + ir_vregs_coalesce(ctx, v1, v2, input, use); + compact = 1; + continue; } - r->end = IR_LOAD_LIVE_POS_FROM_REF(input); - ctx->live_intervals[v2]->end = IR_LOAD_LIVE_POS_FROM_REF(input); - ir_swap_operands(ctx, input, input_insn); - IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2)); - ir_vregs_coalesce(ctx, v1, v2, input, use); - compact = 1; - continue; } #endif ir_add_phi_move(ctx, b, input, use); @@ -1930,15 +1932,17 @@ int ir_coalesce(ir_ctx *ctx) i = ctx->prev_ref[i]; while (i != bb->start) { - insn = &ctx->ir_base[i]; - if ((ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) - && ctx->vregs[i] - && ctx->live_intervals[ctx->vregs[i]]->use_pos - && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG) - && insn->op2 > 0 - && insn->op1 > 0 - && insn->op1 != insn->op2) { - ir_try_swap_operands(ctx, i, insn); + if (ctx->rules[i] & IR_MAY_SWAP) { + insn = &ctx->ir_base[i]; + IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE); + if (ctx->vregs[i] + && ctx->live_intervals[ctx->vregs[i]]->use_pos + && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG) + && insn->op2 > 0 + && insn->op1 > 0 + && insn->op1 != insn->op2) { + ir_try_swap_operands(ctx, i, insn); + } } i = ctx->prev_ref[i]; } diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 983f9bfd939..85bdcb2351c 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -29,7 +29,7 @@ IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_insn *_values, ir_ref a) { if (a > 0 && _values[a].op == IR_COPY) { a = _values[a].op1; - IR_ASSERT(a > 0 && _values[a].op != IR_COPY); + IR_ASSERT(a < 0 || _values[a].op != IR_COPY); /* this may be a copy of symbolic constant */ } return a; } @@ -150,7 +150,7 @@ static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *i #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { input = v->op1; - IR_ASSERT(input > 0 && _values[input].op != IR_COPY); + IR_ASSERT(input < 0 || _values[input].op != IR_COPY); new_copy = input; goto next; } else if (v->op == IR_BOTTOM) { @@ -198,7 +198,7 @@ next: #if IR_COMBO_COPY_PROPAGATION } else if (v->op == IR_COPY) { input = v->op1; - IR_ASSERT(input > 0 && _values[input].op != IR_COPY); + IR_ASSERT(input < 0 || _values[input].op != IR_COPY); if (new_copy == input) { continue; } else { @@ -266,87 +266,6 @@ static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) return v1->val.u64 == v2->val.u64; } -static void ir_sccp_remove_from_use_list(ir_ctx *ctx, ir_ref from, ir_ref ref) -{ - ir_ref j, n, *p, *q, use; - ir_use_list *use_list = &ctx->use_lists[from]; - ir_ref skip = 0; - - n = use_list->count; - for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { - use = *p; - if (use == ref) { - skip++; - } else { - if (p != q) { - *q = use; - } - q++; - } - } - use_list->count -= skip; -#if IR_COMBO_COPY_PROPAGATION - if (skip) { - do { - *q = IR_UNUSED; - q++; - } while (--skip); - } -#endif -} - -static void ir_sccp_remove_from_use_list_1(ir_ctx *ctx, ir_ref from, ir_ref ref) -{ - ir_ref j, n, *p; - ir_use_list *use_list = &ctx->use_lists[from]; - - n = use_list->count; - j = 0; - p = &ctx->use_edges[use_list->refs]; - while (j < n) { - if (*p == ref) { - break; - } - j++; - } - - if (j < n) { - use_list->count--; - j++; - while (j < n) { - *p = *(p+1); - p++; - j++; - } -#if IR_COMBO_COPY_PROPAGATION - *p = IR_UNUSED; -#endif - } -} - -#if IR_COMBO_COPY_PROPAGATION -static int ir_sccp_add_to_use_list(ir_ctx *ctx, ir_ref to, ir_ref ref) -{ - ir_use_list *use_list = &ctx->use_lists[to]; - ir_ref n = use_list->refs + use_list->count; - - if (n < ctx->use_edges_count && ctx->use_edges[n] == IR_UNUSED) { - ctx->use_edges[n] = ref; - use_list->count++; - return 0; - } else { - /* Reallocate the whole edges buffer (this is inefficient) */ - ctx->use_edges = ir_mem_realloc(ctx->use_edges, (ctx->use_edges_count + use_list->count + 1) * sizeof(ir_ref)); - memcpy(ctx->use_edges + ctx->use_edges_count, ctx->use_edges + use_list->refs, use_list->count * sizeof(ir_ref)); - use_list->refs = ctx->use_edges_count; - ctx->use_edges[use_list->refs + use_list->count] = ref; - use_list->count++; - ctx->use_edges_count += use_list->count; - return 1; - } -} -#endif - static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref) { ir_ref j, n, *p; @@ -380,7 +299,34 @@ static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bi ir_ref input = *p; *p = IR_UNUSED; if (input > 0 && _values[input].op == IR_BOTTOM) { - ir_sccp_remove_from_use_list(ctx, input, ref); + ir_use_list_remove_all(ctx, input, ref); + /* schedule DCE */ + if ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0) + || ((ir_op_flags[ctx->ir_base[input].op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[input].count == 1)) { + ir_bitqueue_add(worklist, input); + } + } + } +} + +static void ir_sccp_remove_insn2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p; + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_insn *insn; + + use_list->refs = 0; + use_list->count = 0; + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_use_list_remove_all(ctx, input, ref); /* schedule DCE */ if ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0) || ((ir_op_flags[ctx->ir_base[input].op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) @@ -406,7 +352,7 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r ir_ref input = *p; *p = IR_UNUSED; if (input > 0) { - ir_sccp_remove_from_use_list(ctx, input, ref); + ir_use_list_remove_all(ctx, input, ref); /* schedule DCE */ if (worklist && ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0) @@ -431,7 +377,7 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r } #if IR_COMBO_COPY_PROPAGATION if (new_ref > 0 && IR_IS_BOTTOM(use)) { - if (ir_sccp_add_to_use_list(ctx, new_ref, use)) { + if (ir_use_list_add(ctx, new_ref, use)) { /* restore after reallocation */ use_list = &ctx->use_lists[ref]; n = use_list->count; @@ -450,7 +396,61 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r use_list->count = 0; } -static void ir_sccp_fold2(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) +static void ir_sccp_replace_insn2(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p, use, k, l; + ir_insn *insn; + ir_use_list *use_list; + + IR_ASSERT(ref != new_ref); + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_use_list_remove_all(ctx, input, ref); + /* schedule DCE */ + if ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0) + || ((ir_op_flags[ctx->ir_base[input].op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[input].count == 1)) { + ir_bitqueue_add(worklist, input); + } + } + } + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + l = insn->inputs_count; + for (k = 1; k <= l; k++) { + if (ir_insn_op(insn, k) == ref) { + ir_insn_set_op(insn, k, new_ref); + } + } +#if IR_COMBO_COPY_PROPAGATION + if (new_ref > 0) { + if (ir_use_list_add(ctx, new_ref, use)) { + /* restore after reallocation */ + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs + j]; + } + } +#endif + /* schedule folding */ + ir_bitqueue_add(worklist, use); + } + + use_list->refs = 0; + use_list->count = 0; +} + +static void ir_sccp_fold2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) { uint32_t opt; ir_ref op1, op2, op3; @@ -489,26 +489,26 @@ restart: insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); if (insn->op1 != ctx->fold_insn.op1) { if (!IR_IS_CONST_REF(insn->op1) && insn->op1 != ctx->fold_insn.op2 && insn->op1 != ctx->fold_insn.op3) { - ir_sccp_remove_from_use_list(ctx, insn->op1, ref); + ir_use_list_remove_all(ctx, insn->op1, ref); } if (!IR_IS_CONST_REF(ctx->fold_insn.op1) && ctx->fold_insn.op1 != insn->op2 && ctx->fold_insn.op1 != insn->op3) { - ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op1, ref); + ir_use_list_add(ctx, ctx->fold_insn.op1, ref); } } if (insn->op2 != ctx->fold_insn.op2) { if (!IR_IS_CONST_REF(insn->op2) && insn->op2 != ctx->fold_insn.op1 && insn->op2 != ctx->fold_insn.op3) { - ir_sccp_remove_from_use_list(ctx, insn->op2, ref); + ir_use_list_remove_all(ctx, insn->op2, ref); } if (!IR_IS_CONST_REF(ctx->fold_insn.op2) && ctx->fold_insn.op2 != insn->op1 && ctx->fold_insn.op2 != insn->op3) { - ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op2, ref); + ir_use_list_add(ctx, ctx->fold_insn.op2, ref); } } if (insn->op3 != ctx->fold_insn.op3) { if (!IR_IS_CONST_REF(insn->op3) && insn->op3 != ctx->fold_insn.op1 && insn->op3 != ctx->fold_insn.op2) { - ir_sccp_remove_from_use_list(ctx, insn->op3, ref); + ir_use_list_remove_all(ctx, insn->op3, ref); } if (!IR_IS_CONST_REF(ctx->fold_insn.op3) && ctx->fold_insn.op3 != insn->op1 && ctx->fold_insn.op3 != insn->op2) { - ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op3, ref); + ir_use_list_add(ctx, ctx->fold_insn.op3, ref); } } insn->op1 = ctx->fold_insn.op1; @@ -519,19 +519,17 @@ restart: n = use_list->count; for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { use = *p; - if (_values[use].op == IR_BOTTOM) { - ir_bitqueue_add(worklist, use); - } + ir_bitqueue_add(worklist, use); } } break; case IR_FOLD_DO_COPY: op1 = ctx->fold_insn.op1; - ir_sccp_replace_insn(ctx, _values, ref, op1, worklist); + ir_sccp_replace_insn2(ctx, ref, op1, worklist); break; case IR_FOLD_DO_CONST: op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); - ir_sccp_replace_insn(ctx, _values, ref, op1, worklist); + ir_sccp_replace_insn2(ctx, ref, op1, worklist); break; default: IR_ASSERT(0); @@ -539,20 +537,6 @@ restart: } } -static void ir_sccp_replace_use(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use) -{ - ir_use_list *use_list = &ctx->use_lists[ref]; - ir_ref i, n, *p; - - n = use_list->count; - for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { - if (*p == use) { - *p = new_use; - break; - } - } -} - static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) { ir_ref j, n, *p, use, next; @@ -568,7 +552,7 @@ static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref next_insn = &ctx->ir_base[next]; /* remove IF and IF_TRUE/FALSE from double linked control list */ next_insn->op1 = insn->op1; - ir_sccp_replace_use(ctx, insn->op1, ref, next); + ir_use_list_replace(ctx, insn->op1, ref, next); /* remove IF and IF_TRUE/FALSE instructions */ ir_sccp_make_nop(ctx, ref); ir_sccp_make_nop(ctx, use); @@ -618,7 +602,7 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values } else { IR_ASSERT(use_insn->op1 == ref); use_insn->op1 = prev; - ir_sccp_add_to_use_list(ctx, prev, use); + ir_use_list_add(ctx, prev, use); p = &ctx->use_edges[use_list->refs + k]; } } @@ -626,7 +610,7 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values IR_ASSERT(prev && next); /* remove MERGE and input END from double linked control list */ next_insn->op1 = prev; - ir_sccp_replace_use(ctx, prev, input, next); + ir_use_list_replace(ctx, prev, input, next); /* remove MERGE and input END instructions */ ir_sccp_make_nop(ctx, ref); ir_sccp_make_nop(ctx, input); @@ -689,7 +673,7 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values } i++; } else if (!IR_IS_CONST_REF(input)) { - ir_sccp_remove_from_use_list_1(ctx, input, use); + ir_use_list_remove_one(ctx, input, use); } } while (i <= n) { @@ -704,18 +688,347 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values } } +static void ir_replace_inputs(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_ref new_input) +{ + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_ref n = use_list->count; + ir_ref *p = &ctx->use_edges[use_list->refs]; + + for (; n; p++, n--) { + ir_ref use = *p; + ir_insn *insn = &ctx->ir_base[use]; + ir_ref k, l = insn->inputs_count; + + for (k = 1; k <= l; k++) { + if (ir_insn_op(insn, k) == input) { + ir_insn_set_op(insn, k, new_input); + } + } + } +} + +static bool ir_may_promote_d2f_op(ir_ctx *ctx, ir_ref ref) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->type == IR_DOUBLE); + if (IR_IS_CONST_REF(ref)) { + return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d; + } else { + switch (insn->op) { + case IR_FP2FP: + return 1; +// case IR_INT2FP: +// return ctx->use_lists[ref].count == 1; + case IR_NEG: + case IR_ABS: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_d2f_op(ctx, insn->op1); + case IR_ADD: + case IR_SUB: + case IR_MUL: + case IR_DIV: + case IR_MIN: + case IR_MAX: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_d2f_op(ctx, insn->op1) && + ir_may_promote_d2f_op(ctx, insn->op2); + default: + break; + } + } + return 0; +} + +static bool ir_may_promote_f2d_op(ir_ctx *ctx, ir_ref ref) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->type == IR_FLOAT); + if (IR_IS_CONST_REF(ref)) { + return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f; + } else { + switch (insn->op) { + case IR_FP2FP: + return 1; + case IR_INT2FP: + return ctx->use_lists[ref].count == 1; + case IR_NEG: + case IR_ABS: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_f2d_op(ctx, insn->op1); + case IR_ADD: + case IR_SUB: + case IR_MUL: +// case IR_DIV: + case IR_MIN: + case IR_MAX: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_f2d_op(ctx, insn->op1) && + ir_may_promote_f2d_op(ctx, insn->op2); + default: + break; + } + } + return 0; +} + +static ir_ref ir_promote_d2f_op(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->type == IR_DOUBLE); + if (IR_IS_CONST_REF(ref)) { + return ir_const_float(ctx, (float)insn->val.d); + } else { + switch (insn->op) { + case IR_FP2FP: + ir_use_list_remove_all(ctx, ref, use); + if (ctx->use_lists[ref].count == 0) { + ir_use_list_replace(ctx, insn->op1, ref, use); + ref = insn->op1; + insn->optx = IR_NOP; + insn->op1 = IR_UNUSED; + return ref; + } else { + ir_use_list_add(ctx, insn->op1, use); + } + return insn->op1; +// case IR_INT2FP: +// insn->type = IR_FLOAT; +// return ref; + case IR_NEG: + case IR_ABS: + insn->op1 = ir_promote_d2f_op(ctx, insn->op1, ref); + insn->type = IR_FLOAT; + return ref; + case IR_ADD: + case IR_SUB: + case IR_MUL: + case IR_DIV: + case IR_MIN: + case IR_MAX: + if (insn->op1 == insn->op2) { + insn->op2 = insn->op1 = ir_promote_d2f_op(ctx, insn->op1, ref); + } else { + insn->op1 = ir_promote_d2f_op(ctx, insn->op1, ref); + insn->op2 = ir_promote_d2f_op(ctx, insn->op2, ref); + } + insn->type = IR_FLOAT; + return ref; + default: + break; + } + } + IR_ASSERT(0); + return ref; +} + +static ir_ref ir_promote_f2d_op(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->type == IR_FLOAT); + if (IR_IS_CONST_REF(ref)) { + return ir_const_double(ctx, (double)insn->val.f); + } else { + switch (insn->op) { + case IR_FP2FP: + ir_use_list_remove_all(ctx, ref, use); + if (ctx->use_lists[ref].count == 0) { + ir_use_list_replace(ctx, insn->op1, ref, use); + ref = insn->op1; + insn->optx = IR_NOP; + insn->op1 = IR_UNUSED; + return ref; + } else { + ir_use_list_add(ctx, insn->op1, use); + } + return insn->op1; + case IR_INT2FP: + insn->type = IR_DOUBLE; + return ref; + case IR_NEG: + case IR_ABS: + insn->op1 = ir_promote_f2d_op(ctx, insn->op1, ref); + insn->type = IR_DOUBLE; + return ref; + case IR_ADD: + case IR_SUB: + case IR_MUL: +// case IR_DIV: + case IR_MIN: + case IR_MAX: + if (insn->op1 == insn->op2) { + insn->op2 = insn->op1 = ir_promote_f2d_op(ctx, insn->op1, ref); + } else { + insn->op1 = ir_promote_f2d_op(ctx, insn->op1, ref); + insn->op2 = ir_promote_f2d_op(ctx, insn->op2, ref); + } + insn->type = IR_DOUBLE; + return ref; + default: + break; + } + } + IR_ASSERT(0); + return ref; +} + +static void ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + if (ir_may_promote_d2f_op(ctx, insn->op1)) { + ir_ref new_ref = ir_promote_d2f_op(ctx, insn->op1, ref); + if (insn->op1 == new_ref) { + ir_replace_inputs(ctx, ref, ref, insn->op1); + ctx->use_lists[insn->op1] = ctx->use_lists[ref]; + ctx->use_lists[ref].count = 0; + ctx->use_lists[ref].refs = 0; + insn->optx = IR_NOP; + insn->op1 = IR_UNUSED; + } else { + insn->optx = IR_OPTX(IR_COPY, IR_FLOAT, 1); + insn->op1 = new_ref; + } + } +} + +static void ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + if (ir_may_promote_f2d_op(ctx, insn->op1)) { + ir_ref new_ref = ir_promote_f2d_op(ctx, insn->op1, ref); + if (insn->op1 == new_ref) { + ir_replace_inputs(ctx, ref, ref, insn->op1); + ctx->use_lists[insn->op1] = ctx->use_lists[ref]; + ctx->use_lists[ref].count = 0; + ctx->use_lists[ref].refs = 0; + insn->optx = IR_NOP; + insn->op1 = IR_UNUSED; + } else { + insn->optx = IR_OPTX(IR_COPY, IR_DOUBLE, 1); + insn->op1 = new_ref; + } + } +} + +static bool ir_may_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + if (IR_IS_CONST_REF(ref)) { + return !IR_IS_SYM_CONST(insn->op); + } else { + switch (insn->op) { + case IR_ZEXT: + case IR_SEXT: + return ctx->ir_base[insn->op1].type == type; + case IR_NEG: + case IR_ABS: + case IR_NOT: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_i2i_op(ctx, type, insn->op1); + case IR_ADD: + case IR_SUB: + case IR_MUL: +// case IR_DIV: + case IR_MIN: + case IR_MAX: + case IR_OR: + case IR_AND: + case IR_XOR: + return ctx->use_lists[ref].count == 1 && + ir_may_promote_i2i_op(ctx, type, insn->op1) && + ir_may_promote_i2i_op(ctx, type, insn->op2); + default: + break; + } + } + return 0; +} + +static ir_ref ir_promote_i2i_op(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use) +{ + ir_insn *insn = &ctx->ir_base[ref]; + + if (IR_IS_CONST_REF(ref)) { + return ir_const(ctx, insn->val, type); + } else { + switch (insn->op) { + case IR_ZEXT: + case IR_SEXT: + ir_use_list_remove_all(ctx, ref, use); + if (ctx->use_lists[ref].count == 0) { + ir_use_list_replace(ctx, insn->op1, ref, use); + ref = insn->op1; + insn->optx = IR_NOP; + insn->op1 = IR_UNUSED; + return ref; + } else { + ir_use_list_add(ctx, insn->op1, use); + } + return insn->op1; + case IR_NEG: + case IR_ABS: + case IR_NOT: + insn->op1 = ir_promote_i2i_op(ctx, type, insn->op1, ref); + insn->type = type; + return ref; + case IR_ADD: + case IR_SUB: + case IR_MUL: +// case IR_DIV: + case IR_MIN: + case IR_MAX: + case IR_OR: + case IR_AND: + case IR_XOR: + if (insn->op1 == insn->op2) { + insn->op2 = insn->op1 = ir_promote_i2i_op(ctx, type, insn->op1, ref); + } else { + insn->op1 = ir_promote_i2i_op(ctx, type, insn->op1, ref); + insn->op2 = ir_promote_i2i_op(ctx, type, insn->op2, ref); + } + insn->type = type; + return ref; + default: + break; + } + } + IR_ASSERT(0); + return ref; +} + +static void ir_promote_trunc(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + if (ir_may_promote_i2i_op(ctx, insn->type, insn->op1)) { + ir_ref new_ref = ir_promote_i2i_op(ctx, insn->type, insn->op1, ref); + if (insn->op1 == new_ref) { + ir_replace_inputs(ctx, ref, ref, insn->op1); + ctx->use_lists[insn->op1] = ctx->use_lists[ref]; + ctx->use_lists[ref].count = 0; + ctx->use_lists[ref].refs = 0; + insn->optx = IR_NOP; + insn->op1 = IR_UNUSED; + } else { + insn->optx = IR_OPTX(IR_COPY, insn->type, 1); + insn->op1 = new_ref; + } + } +} + int ir_sccp(ir_ctx *ctx) { ir_ref i, j, n, *p, use; ir_use_list *use_list; ir_insn *insn, *use_insn, *value; uint32_t flags; - ir_bitqueue worklist; + ir_bitqueue worklist, worklist2; ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); ctx->flags2 |= IR_OPT_IN_SCCP; /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ + ir_bitqueue_init(&worklist2, ctx->insns_count); ir_bitqueue_init(&worklist, ctx->insns_count); worklist.pos = 0; ir_bitset_incl(worklist.set, 1); @@ -761,6 +1074,9 @@ int ir_sccp(ir_ctx *ctx) } if (!may_benefit) { IR_MAKE_BOTTOM(i); + if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) { + ir_bitqueue_add(&worklist2, i); + } } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { /* not changed */ continue; @@ -966,21 +1282,21 @@ int ir_sccp(ir_ctx *ctx) } else if (IR_IS_CONST_OP(value->op)) { /* replace instruction by constant */ j = ir_const(ctx, value->val, value->type); - ir_sccp_replace_insn(ctx, _values, i, j, &worklist); + ir_sccp_replace_insn(ctx, _values, i, j, &worklist2); } else if (IR_IS_SYM_CONST(value->op)) { /* replace instruction by constant */ j = ir_const_ex(ctx, value->val, value->type, value->optx); - ir_sccp_replace_insn(ctx, _values, i, j, &worklist); + ir_sccp_replace_insn(ctx, _values, i, j, &worklist2); #if IR_COMBO_COPY_PROPAGATION } else if (value->op == IR_COPY) { - ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist); + ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist2); #endif } else if (value->op == IR_TOP) { /* remove unreachable instruction */ insn = &ctx->ir_base[i]; if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { if (insn->op != IR_PARAM && insn->op != IR_VAR) { - ir_sccp_remove_insn(ctx, _values, i, &worklist); + ir_sccp_remove_insn(ctx, _values, i, &worklist2); } } else { if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { @@ -1008,38 +1324,66 @@ int ir_sccp(ir_ctx *ctx) ir_bitqueue_add(&worklist, i); } else if (value->op == IR_LOAD) { /* schedule dead load elimination */ - ir_bitqueue_add(&worklist, i); + ir_bitqueue_add(&worklist2, i); } } while ((i = ir_bitqueue_pop(&worklist)) >= 0) { - if (_values[i].op == IR_MERGE) { - ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); - } else { - insn = &ctx->ir_base[i]; - if (IR_IS_FOLDABLE_OP(insn->op)) { - if (ctx->use_lists[i].count == 0) { - ir_sccp_remove_insn(ctx, _values, i, &worklist); - } else { - ir_sccp_fold2(ctx, _values, i, &worklist); - } - } else if (((ir_op_flags[insn->op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) - || insn->op == IR_ALLOCA) - && ctx->use_lists[i].count == 1) { - /* dead load */ - ir_ref next = ctx->use_edges[ctx->use_lists[i].refs]; + IR_ASSERT(_values[i].op == IR_MERGE); + ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); + } - /* remove LOAD from double linked control list */ - ctx->ir_base[next].op1 = insn->op1; - ir_sccp_replace_use(ctx, insn->op1, i, next); - insn->op1 = IR_UNUSED; - ir_sccp_remove_insn(ctx, _values, i, &worklist); + while ((i = ir_bitqueue_pop(&worklist2)) >= 0) { + insn = &ctx->ir_base[i]; + if (IR_IS_FOLDABLE_OP(insn->op)) { + if (ctx->use_lists[i].count == 0) { + ir_sccp_remove_insn2(ctx, i, &worklist2); + } else { + insn = &ctx->ir_base[i]; + switch (insn->op) { + case IR_FP2FP: + if (insn->type == IR_FLOAT) { + ir_promote_d2f(ctx, i, insn); + } else { + ir_promote_f2d(ctx, i, insn); + } + break; + case IR_FP2INT: + if (ctx->ir_base[insn->op1].type == IR_DOUBLE) { + if (ir_may_promote_d2f_op(ctx, insn->op1)) { + insn->op1 = ir_promote_d2f_op(ctx, insn->op1, i); + } + } else { + if (ir_may_promote_f2d_op(ctx, insn->op1)) { + insn->op1 = ir_promote_f2d_op(ctx, insn->op1, i); + } + } + break; + case IR_TRUNC: + ir_promote_trunc(ctx, i, insn); + break; + default: + ir_sccp_fold2(ctx, i, &worklist2); + break; + } } + } else if (((ir_op_flags[insn->op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + || insn->op == IR_ALLOCA) + && ctx->use_lists[i].count == 1) { + /* dead load */ + ir_ref next = ctx->use_edges[ctx->use_lists[i].refs]; + + /* remove LOAD from double linked control list */ + ctx->ir_base[next].op1 = insn->op1; + ir_use_list_replace(ctx, insn->op1, i, next); + insn->op1 = IR_UNUSED; + ir_sccp_remove_insn2(ctx, i, &worklist2); } } ir_mem_free(_values); ir_bitqueue_free(&worklist); + ir_bitqueue_free(&worklist2); ctx->flags2 &= ~IR_OPT_IN_SCCP; ctx->flags2 |= IR_SCCP_DONE; diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 5db848b95ff..e5032c02603 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1454,12 +1454,12 @@ static bool ir_match_try_revert_lea_to_add(ir_ctx *ctx, ir_ref ref) if (insn->op1 == insn->op2) { /* pass */ } else if (ir_match_try_fuse_load(ctx, insn->op2, ref)) { - ctx->rules[ref] = IR_BINOP_INT; + ctx->rules[ref] = IR_BINOP_INT | IR_MAY_SWAP; return 1; } else if (ir_match_try_fuse_load(ctx, insn->op1, ref)) { /* swap for better load fusion */ ir_swap_ops(insn); - ctx->rules[ref] = IR_BINOP_INT; + ctx->rules[ref] = IR_BINOP_INT | IR_MAY_SWAP; return 1; } return 0; @@ -1708,10 +1708,11 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) /* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */ if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); + ctx->rules[insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, op1_insn->op2, ref); + ctx->rules[insn->op1] = IR_BINOP_INT; } - ctx->rules[insn->op1] = IR_BINOP_INT; return IR_SETCC_INT; } } @@ -1842,21 +1843,27 @@ lea: binop_int: if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, insn, ref); + return IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, insn->op2, ref); + return IR_BINOP_INT; } - return IR_BINOP_INT; } else { binop_fp: if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_fp(ctx, insn, ref); + if (ctx->mflags & IR_X86_AVX) { + return IR_BINOP_AVX; + } else { + return IR_BINOP_SSE2 | IR_MAY_SWAP; + } } else { ir_match_fuse_load(ctx, insn->op2, ref); - } - if (ctx->mflags & IR_X86_AVX) { - return IR_BINOP_AVX; - } else { - return IR_BINOP_SSE2; + if (ctx->mflags & IR_X86_AVX) { + return IR_BINOP_AVX; + } else { + return IR_BINOP_SSE2; + } } } break; @@ -2071,7 +2078,7 @@ binop_fp: case IR_MIN: case IR_MAX: if (IR_IS_TYPE_INT(insn->type)) { - return IR_MIN_MAX_INT; + return IR_MIN_MAX_INT | IR_MAY_SWAP; } else { goto binop_fp; } @@ -2124,7 +2131,7 @@ store_int: if (!rule) { ctx->rules[insn->op3] = rule = ir_match_insn(ctx, insn->op3); } - if ((rule == IR_BINOP_INT && op_insn->op != IR_MUL) || rule == IR_LEA_OB || rule == IR_LEA_IB) { + if (((rule & IR_RULE_MASK) == IR_BINOP_INT && op_insn->op != IR_MUL) || rule == IR_LEA_OB || rule == IR_LEA_IB) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 @@ -2338,10 +2345,11 @@ store_int: /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */ if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); + ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, op1_insn->op2, ref); + ctx->rules[op2_insn->op1] = IR_BINOP_INT; } - ctx->rules[op2_insn->op1] = IR_BINOP_INT; ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; return IR_JCC_INT; } @@ -2381,10 +2389,11 @@ store_int: /* v = BINOP(_, _); IF(v) => BINOP; JCC */ if (ir_op_flags[op2_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); + ctx->rules[insn->op2] = IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, op2_insn->op2, ref); + ctx->rules[insn->op2] = IR_BINOP_INT; } - ctx->rules[insn->op2] = IR_BINOP_INT; return IR_JCC_INT; } } else if ((ctx->flags & IR_OPT_CODEGEN) @@ -2462,11 +2471,12 @@ store_int: (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); + ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, op1_insn->op2, ref); + ctx->rules[op2_insn->op1] = IR_BINOP_INT; } /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */ - ctx->rules[op2_insn->op1] = IR_BINOP_INT; ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; return IR_GUARD_JCC_INT; } @@ -6396,16 +6406,16 @@ static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (!dst64) { if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { - | vcvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + | vcvttsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { - | cvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + | cvttsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { - | vcvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + | vcvttss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { - | cvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + | cvttss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } } } else { @@ -6413,16 +6423,16 @@ static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.if X64 if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { - | vcvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + | vcvttsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { - | cvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + | cvttsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { - | vcvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + | vcvttss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { - | cvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + | cvttss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } } |.endif @@ -6433,16 +6443,16 @@ static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (!dst64) { if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { - | vcvtsd2si Rd(def_reg), qword [=>label] + | vcvttsd2si Rd(def_reg), qword [=>label] } else { - | cvtsd2si Rd(def_reg), qword [=>label] + | cvttsd2si Rd(def_reg), qword [=>label] } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { - | vcvtss2si Rd(def_reg), dword [=>label] + | vcvttss2si Rd(def_reg), dword [=>label] } else { - | cvtss2si Rd(def_reg), dword [=>label] + | cvttss2si Rd(def_reg), dword [=>label] } } } else { @@ -6450,16 +6460,16 @@ static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.if X64 if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { - | vcvtsd2si Rq(def_reg), qword [=>label] + | vcvttsd2si Rq(def_reg), qword [=>label] } else { - | cvtsd2si Rq(def_reg), qword [=>label] + | cvttsd2si Rq(def_reg), qword [=>label] } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { - | vcvtss2si Rq(def_reg), dword [=>label] + | vcvttss2si Rq(def_reg), dword [=>label] } else { - | cvtss2si Rq(def_reg), dword [=>label] + | cvttss2si Rq(def_reg), dword [=>label] } } |.endif @@ -6476,16 +6486,16 @@ static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (!dst64) { if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { - | ASM_TXT_TMEM_OP vcvtsd2si, Rd(def_reg), qword, mem + | ASM_TXT_TMEM_OP vcvttsd2si, Rd(def_reg), qword, mem } else { - | ASM_TXT_TMEM_OP cvtsd2si, Rd(def_reg), qword, mem + | ASM_TXT_TMEM_OP cvttsd2si, Rd(def_reg), qword, mem } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { - | ASM_TXT_TMEM_OP vcvtss2si, Rd(def_reg), dword, mem + | ASM_TXT_TMEM_OP vcvttss2si, Rd(def_reg), dword, mem } else { - | ASM_TXT_TMEM_OP cvtss2si, Rd(def_reg), dword, mem + | ASM_TXT_TMEM_OP cvttss2si, Rd(def_reg), dword, mem } } } else { @@ -6493,16 +6503,16 @@ static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.if X64 if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { - | ASM_TXT_TMEM_OP vcvtsd2si, Rq(def_reg), qword, mem + | ASM_TXT_TMEM_OP vcvttsd2si, Rq(def_reg), qword, mem } else { - | ASM_TXT_TMEM_OP cvtsd2si, Rq(def_reg), qword, mem + | ASM_TXT_TMEM_OP cvttsd2si, Rq(def_reg), qword, mem } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { - | ASM_TXT_TMEM_OP vcvtss2si, Rq(def_reg), dword, mem + | ASM_TXT_TMEM_OP vcvttss2si, Rq(def_reg), dword, mem } else { - | ASM_TXT_TMEM_OP cvtss2si, Rq(def_reg), dword, mem + | ASM_TXT_TMEM_OP cvttss2si, Rq(def_reg), dword, mem } } |.endif @@ -9334,7 +9344,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) while (i <= bb->end) { if (!((*rule) & (IR_FUSED|IR_SKIPPED))) - switch (*rule) { + switch ((*rule) & IR_RULE_MASK) { case IR_VAR: case IR_PARAM: case IR_PI: