From 191430dc3db3d32156761d378310037352ef288c Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Tue, 14 Oct 2025 23:21:49 +0300 Subject: [PATCH] Update IR IR commit: 5a81104e650ebd7ac24eb63d4dff67db723a5278 --- ext/opcache/jit/ir/ir.h | 2 + ext/opcache/jit/ir/ir_aarch64.dasc | 64 +++++++++++----- ext/opcache/jit/ir/ir_check.c | 1 + ext/opcache/jit/ir/ir_fold.h | 104 +++++++++++++++++++++++++ ext/opcache/jit/ir/ir_gcm.c | 2 + ext/opcache/jit/ir/ir_sccp.c | 37 +++++++-- ext/opcache/jit/ir/ir_x86.dasc | 118 +++++++++++++++++++++-------- 7 files changed, 268 insertions(+), 60 deletions(-) diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index 93ed4d3163e..8fcfbffa7d6 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -257,6 +257,8 @@ typedef enum _ir_type { _(UGE, d2, def, def, ___) /* unsigned greater or equal */ \ _(ULE, d2, def, def, ___) /* unsigned less or equal */ \ _(UGT, d2, def, def, ___) /* unsigned greater */ \ + _(ORDERED, d2, def, def, ___) /* both operands are not NAN */ \ + _(UNORDERED, d2, def, def, ___) /* one of operands is NAN */ \ \ /* arithmetic ops */ \ _(ADD, d2C, def, def, ___) /* addition */ \ diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index d0edb33e8b3..12c3694d469 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -747,6 +747,9 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) return IR_CMP_FP; } break; + case IR_ORDERED: + case IR_UNORDERED: + return IR_CMP_FP; case IR_ADD: case IR_SUB: if (IR_IS_TYPE_INT(insn->type)) { @@ -1043,7 +1046,7 @@ binop_fp: case IR_IF: if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; - if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) { if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; return IR_CMP_AND_BRANCH_INT; @@ -1066,7 +1069,7 @@ binop_fp: case IR_GUARD_NOT: if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; - if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP && (insn->op2 == ref - 1 || (insn->op2 == ctx->prev_ref[ref] - 1 @@ -1110,6 +1113,9 @@ binop_fp: ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; } } + } else { + /* va_list may escape */ + ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; } return IR_VA_START; case IR_VA_END: @@ -2991,6 +2997,12 @@ static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) case IR_UGT: | cset Rw(def_reg), hi break; + case IR_ORDERED: + | cset Rw(def_reg), vc + break; + case IR_UNORDERED: + | cset Rw(def_reg), vs + break; } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); @@ -3065,7 +3077,7 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint ir_get_true_false_blocks(ctx, b, &true_block, &false_block); if (true_block == next_block) { /* swap to avoid unconditional JMP */ - if (int_cmp || op == IR_EQ || op == IR_NE) { + if (int_cmp || op == IR_EQ || op == IR_NE || op == IR_ORDERED || op == IR_UNORDERED) { op ^= 1; // reverse } else { op ^= 5; // reverse @@ -3145,6 +3157,11 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint case IR_UGT: | bhi =>true_block break; + case IR_ORDERED: + | bvc =>true_block + break; + case IR_UNORDERED: + | bvs =>true_block // case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; // case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; // case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; @@ -4462,11 +4479,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; - if (ctx->use_lists[def].count == 1) { - /* dead load */ - return; - } - IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); @@ -4479,10 +4492,12 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } | ldr Rx(tmp_reg), [Rx(op2_reg), #offset] - ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); + if (def_reg != IR_REG_NONE) { + ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); + } | add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*)) | str Rx(tmp_reg), [Rx(op2_reg), #offset] - if (IR_REG_SPILLED(ctx->regs[def][0])) { + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } #else @@ -4494,11 +4509,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; - if (ctx->use_lists[def].count == 1) { - /* dead load */ - return; - } - IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); @@ -4517,13 +4528,17 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))] | sxtw Rx(tmp_reg), Rw(tmp_reg) | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) - | ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)] + if (def_reg != IR_REG_NONE) { + | ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)] + } | add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*) | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] | b >2 |1: | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] - | ldr Rx(def_reg), [Rx(tmp_reg)] + if (def_reg != IR_REG_NONE) { + | ldr Rx(def_reg), [Rx(tmp_reg)] + } | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] |2: @@ -4534,18 +4549,22 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))] | sxtw Rx(tmp_reg), Rw(tmp_reg) | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) - | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)] + if (def_reg != IR_REG_NONE) { + | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)] + } | add Rw(tmp_reg), Rw(tmp_reg), #16 | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))] | b >2 |1: | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] - | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)] + if (def_reg != IR_REG_NONE) { + | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)] + } | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] |2: } - if (IR_REG_SPILLED(ctx->regs[def][0])) { + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } #endif @@ -5378,6 +5397,11 @@ static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp) case IR_GT: | bgt &addr break; + case IR_ORDERED: + | bvc &addr + break; + case IR_UNORDERED: + | bvs &addr // case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; // case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; // case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c index 6a687b84cc2..c25a984aefc 100644 --- a/ext/opcache/jit/ir/ir_check.c +++ b/ext/opcache/jit/ir/ir_check.c @@ -36,6 +36,7 @@ void ir_consistency_check(void) IR_ASSERT((IR_UGT ^ 3) == IR_ULT); IR_ASSERT((IR_ULE ^ 3) == IR_UGE); IR_ASSERT((IR_UGE ^ 3) == IR_ULE); + IR_ASSERT((IR_ORDERED ^ 1) == IR_UNORDERED); IR_ASSERT(IR_ADD + 1 == IR_SUB); } diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index 286932503d1..74f7818d747 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -274,6 +274,106 @@ IR_FOLD(UGT(C_FLOAT, C_FLOAT)) IR_FOLD_BOOL(!(op1_insn->val.f <= op2_insn->val.f)); } +IR_FOLD(ORDERED(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(!isnan(op1_insn->val.f) && !isnan(op2_insn->val.f)); +} + +IR_FOLD(ORDERED(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(!isnan(op1_insn->val.d) && !isnan(op2_insn->val.d)); +} + +IR_FOLD(UNORDERED(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(isnan(op1_insn->val.f) || isnan(op2_insn->val.f)); +} + +IR_FOLD(UNORDERED(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(isnan(op1_insn->val.d) || isnan(op2_insn->val.d)); +} + +IR_FOLD(EQ(_, C_FLOAT)) +IR_FOLD(LT(_, C_FLOAT)) +IR_FOLD(GE(_, C_FLOAT)) +IR_FOLD(LE(_, C_FLOAT)) +IR_FOLD(GT(_, C_FLOAT)) +{ + if (isnan(op2_insn->val.f)) { + IR_FOLD_COPY(IR_FALSE); + } + IR_FOLD_NEXT; +} + +IR_FOLD(NE(_, C_FLOAT)) +{ + if (isnan(op2_insn->val.f)) { + IR_FOLD_COPY(IR_TRUE); + } + IR_FOLD_NEXT; +} + +IR_FOLD(ORDERED(_, C_FLOAT)) +{ + if (isnan(op2_insn->val.f)) { + IR_FOLD_COPY(IR_FALSE); + } else { + op2 = op1; + IR_FOLD_RESTART; + } +} + +IR_FOLD(UNORDERED(_, C_FLOAT)) +{ + if (isnan(op2_insn->val.f)) { + IR_FOLD_COPY(IR_TRUE); + } else { + op2 = op1; + IR_FOLD_RESTART; + } +} + +IR_FOLD(EQ(_, C_DOUBLE)) +IR_FOLD(LT(_, C_DOUBLE)) +IR_FOLD(GE(_, C_DOUBLE)) +IR_FOLD(LE(_, C_DOUBLE)) +IR_FOLD(GT(_, C_DOUBLE)) +{ + if (isnan(op2_insn->val.d)) { + IR_FOLD_COPY(IR_FALSE); + } + IR_FOLD_NEXT; +} + +IR_FOLD(NE(_, C_DOUBLE)) +{ + if (isnan(op2_insn->val.d)) { + IR_FOLD_COPY(IR_TRUE); + } + IR_FOLD_NEXT; +} + +IR_FOLD(ORDERED(_, C_DOUBLE)) +{ + if (isnan(op2_insn->val.d)) { + IR_FOLD_COPY(IR_FALSE); + } else { + op2 = op1; + IR_FOLD_RESTART; + } +} + +IR_FOLD(UNORDERED(_, C_DOUBLE)) +{ + if (isnan(op2_insn->val.d)) { + IR_FOLD_COPY(IR_TRUE); + } else { + op2 = op1; + IR_FOLD_RESTART; + } +} + IR_FOLD(ADD(C_U8, C_U8)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); @@ -1645,6 +1745,8 @@ IR_FOLD(NOT(ULT)) IR_FOLD(NOT(UGE)) IR_FOLD(NOT(ULE)) IR_FOLD(NOT(UGT)) +IR_FOLD(NOT(ORDERED)) +IR_FOLD(NOT(UNORDERED)) { if (IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op1].type)) { opt = op1_insn->opt ^ 1; @@ -3182,6 +3284,8 @@ IR_FOLD(ADD(SHR, SHL)) /* Swap operands (move lower ref to op2) for better CSE */ IR_FOLD(MUL(_, _)) +IR_FOLD(ORDERED(_, _)) +IR_FOLD(UNORDERED(_, _)) IR_FOLD_NAMED(swap_ops) { if (op1 < op2) { /* move lower ref to op2 */ diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index c170fa47476..043e1e7bdd8 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -1379,6 +1379,8 @@ restart: switch (new_insn->op) { case IR_EQ: case IR_NE: + case IR_ORDERED: + case IR_UNORDERED: case IR_ADD: case IR_MUL: case IR_ADD_OV: diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index c90baab7ffa..45df92ec2be 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -12,6 +12,8 @@ #include "ir.h" #include "ir_private.h" +#include + #define IR_COMBO_COPY_PROPAGATION 1 #define IR_TOP IR_UNUSED @@ -420,11 +422,12 @@ static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref) { if (ctx->use_lists[ref].count == 1) { - uint32_t flags = ir_op_flags[ctx->ir_base[ref].op]; + ir_insn *insn = &ctx->ir_base[ref]; + uint32_t flags = ir_op_flags[insn->op]; if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) { return 1; - } else if (ctx->ir_base[ref].op == IR_ALLOCA) { + } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) { return 1; } } @@ -2808,6 +2811,10 @@ static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2) return !(op1->val.d > op2->val.d); } else if (op == IR_UGT) { return !(op1->val.d <= op2->val.d); + } else if (op == IR_ORDERED) { + return !isnan(op1->val.d) && !isnan(op2->val.d); + } else if (op == IR_UNORDERED) { + return isnan(op1->val.d) || isnan(op2->val.d); } else { IR_ASSERT(0); return 0; @@ -2834,6 +2841,10 @@ static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2) return !(op1->val.f > op2->val.f); } else if (op == IR_UGT) { return !(op1->val.f <= op2->val.f); + } else if (op == IR_ORDERED) { + return !isnan(op1->val.f) && !isnan(op2->val.f); + } else if (op == IR_UNORDERED) { + return isnan(op1->val.f) || isnan(op2->val.f); } else { IR_ASSERT(0); return 0; @@ -3465,9 +3476,18 @@ static void ir_iter_optimize_guard(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bi remove_guard: prev = insn->op1; next = ir_next_control(ctx, ref); + if (ctx->ir_base[prev].op == IR_SNAPSHOT) { + ir_ref snapshot = prev; + prev = ctx->ir_base[prev].op1; + ir_use_list_remove_one(ctx, snapshot, ref); + ir_use_list_remove_one(ctx, ref, next); + ir_use_list_replace_one(ctx, prev, snapshot, next); + ir_iter_remove_insn(ctx, snapshot, worklist); + } else { + ir_use_list_remove_one(ctx, ref, next); + ir_use_list_replace_one(ctx, prev, ref, next); + } ctx->ir_base[next].op1 = prev; - ir_use_list_remove_one(ctx, ref, next); - ir_use_list_replace_one(ctx, prev, ref, next); insn->op1 = IR_UNUSED; if (!IR_IS_CONST_REF(insn->op2)) { @@ -3478,9 +3498,12 @@ remove_guard: } } - if (insn->op3) { - /* SNAPSHOT */ - ir_iter_remove_insn(ctx, insn->op3, worklist); + if (!IR_IS_CONST_REF(insn->op3)) { + ir_use_list_remove_one(ctx, insn->op3, ref); + if (ir_is_dead(ctx, insn->op3)) { + /* schedule DCE */ + ir_bitqueue_add(worklist, insn->op3); + } } MAKE_NOP(insn); diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index a12c660376d..42e4eee7da0 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1881,7 +1881,8 @@ static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) /* pass */ } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { ir_swap_ops(insn); - if (insn->op != IR_EQ && insn->op != IR_NE) { + if (insn->op != IR_EQ && insn->op != IR_NE + && insn->op != IR_ORDERED && insn->op != IR_UNORDERED) { insn->op ^= 3; } } @@ -1908,7 +1909,8 @@ static void ir_match_fuse_load_cmp_fp_br(ir_ctx *ctx, ir_insn *insn, ir_ref root /* pass */ } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { ir_swap_ops(insn); - if (insn->op != IR_EQ && insn->op != IR_NE) { + if (insn->op != IR_EQ && insn->op != IR_NE + && insn->op != IR_ORDERED && insn->op != IR_UNORDERED) { insn->op ^= 3; } } @@ -2035,6 +2037,10 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) return IR_CMP_FP; } break; + case IR_ORDERED: + case IR_UNORDERED: + ir_match_fuse_load_cmp_fp(ctx, insn, ref); + return IR_CMP_FP; case IR_ADD: case IR_SUB: if (IR_IS_TYPE_INT(insn->type)) { @@ -2694,7 +2700,7 @@ store_int: case IR_IF: if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; - if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) { if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { if (IR_IS_CONST_REF(op2_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) @@ -2823,7 +2829,7 @@ store_int: if (!IR_IS_CONST_REF(insn->op1) && ctx->use_lists[insn->op1].count == 1) { ir_insn *op1_insn = &ctx->ir_base[insn->op1]; - if (op1_insn->op >= IR_EQ && op1_insn->op <= IR_UGT) { + if (op1_insn->op >= IR_EQ && op1_insn->op <= IR_UNORDERED) { if (IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op1].type)) { ir_match_fuse_load_cmp_int(ctx, op1_insn, ref); ctx->rules[insn->op1] = IR_FUSED | IR_CMP_INT; @@ -2840,7 +2846,7 @@ store_int: case IR_GUARD_NOT: if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; - if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP && (insn->op2 == ref - 1 || (insn->op2 == ctx->prev_ref[ref] - 1 @@ -2993,6 +2999,9 @@ store_int: ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; } } + } else { + /* va_list may escape */ + ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; } return IR_VA_START; case IR_VA_END: @@ -6187,6 +6196,12 @@ static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) | mov Rd(tmp_reg), 1 | cmova Rd(def_reg), Rd(tmp_reg) break; + case IR_ORDERED: + | setnp Rb(def_reg) + break; + case IR_UNORDERED: + | setp Rb(def_reg) + break; } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); @@ -6226,7 +6241,7 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint ir_get_true_false_blocks(ctx, b, &true_block, &false_block); if (true_block == next_block) { /* swap to avoid unconditional JMP */ - if (int_cmp || op == IR_EQ || op == IR_NE) { + if (int_cmp || op == IR_EQ || op == IR_NE || op == IR_ORDERED || op == IR_UNORDERED) { op ^= 1; // reverse } else { op ^= 5; // reverse @@ -6338,6 +6353,12 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint | jp =>true_block | ja =>true_block break; + case IR_ORDERED: + | jnp =>true_block + break; + case IR_UNORDERED: + | jp =>true_block + break; } } if (false_block) { @@ -6856,6 +6877,12 @@ static void ir_emit_cond_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) | jp >1 | jbe >2 break; + case IR_ORDERED: + | jp >2 + break; + case IR_UNORDERED: + | jnp >2 + break; } |1: @@ -8536,11 +8563,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; - if (ctx->use_lists[def].count == 1) { - /* dead load */ - return; - } - IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); @@ -8554,27 +8577,34 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) } | mov Ra(tmp_reg), aword [Ra(op2_reg)+offset] #ifdef _WIN64 - ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); + if (def_reg != IR_REG_NONE) { + ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); + } | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) #else if (!insn->op3) { - ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); + if (def_reg != IR_REG_NONE) { + ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); + } | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) } else { - IR_ASSERT(type == IR_ADDR); - int align = 1U << (insn->op3 & 0x7); int size = (uint32_t)insn->op3 >> 3; - if (align > (int)sizeof(void*)) { - | add Ra(tmp_reg), (align-1) - | and Ra(tmp_reg), ~(align-1) + if (def_reg != IR_REG_NONE) { + IR_ASSERT(type == IR_ADDR); + int align = 1U << (insn->op3 & 0x7); + + if (align > (int)sizeof(void*)) { + | add Ra(tmp_reg), (align-1) + | and Ra(tmp_reg), ~(align-1) + } + | mov Ra(def_reg), Ra(tmp_reg) } - | mov Ra(def_reg), Ra(tmp_reg) | add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*)) } #endif | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) - if (IR_REG_SPILLED(ctx->regs[def][0])) { + if (def_reg && IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } #elif defined(IR_TARGET_X64) @@ -8587,11 +8617,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; - if (ctx->use_lists[def].count == 1) { - /* dead load */ - return; - } - IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE); + IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); @@ -8614,7 +8640,9 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) | add Ra(tmp_reg), (align-1) | and Ra(tmp_reg), ~(align-1) } - | mov Ra(def_reg), Ra(tmp_reg) + if (def_reg != IR_REG_NONE) { + | mov Ra(def_reg), Ra(tmp_reg) + } | add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*)) | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) } else if (IR_IS_TYPE_INT(type)) { @@ -8630,10 +8658,12 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) | add Ra(tmp_reg), sizeof(void*) | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) |2: - if (ir_type_size[type] == 8) { - | mov Rq(def_reg), qword [Ra(tmp_reg)-sizeof(void*)] - } else { - | mov Rd(def_reg), dword [Ra(tmp_reg)-sizeof(void*)] + if (def_reg != IR_REG_NONE) { + if (ir_type_size[type] == 8) { + | mov Rq(def_reg), qword [Ra(tmp_reg)-sizeof(void*)] + } else { + | mov Rd(def_reg), dword [Ra(tmp_reg)-sizeof(void*)] + } } } else { | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))] @@ -8642,16 +8672,20 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) | add Rd(tmp_reg), 16 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], Rd(tmp_reg) | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))] - ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16)); + if (def_reg != IR_REG_NONE) { + ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16)); + } | jmp >2 |1: | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] - ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); + if (def_reg != IR_REG_NONE) { + ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); + } | add Ra(tmp_reg), 8 | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) |2: } - if (IR_REG_SPILLED(ctx->regs[def][0])) { + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } |.endif @@ -9789,6 +9823,12 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next | jp &addr | jbe =>target break; + case IR_ORDERED: + | jnp =>target + break; + case IR_UNORDERED: + | jp =>target + break; } } | jmp &addr @@ -9868,6 +9908,12 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next | jp &addr | jbe &target_addr break; + case IR_ORDERED: + | jnp &target_addr + break; + case IR_UNORDERED: + | jp &target_addr + break; } } | jmp &addr @@ -9947,6 +9993,12 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next case IR_GT: | ja &addr break; + case IR_ORDERED: + | jp &addr + break; + case IR_UNORDERED: + | jnp &addr + break; // case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; // case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; // case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;