From dd4189da83baad76526be0aa8dd60daeaf1a35d3 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Wed, 8 Oct 2025 23:36:58 +0300 Subject: [PATCH] Update IR IR commit: 62d48607eb3ae5a9d1240115e9e4bdb3decdcadf --- ext/opcache/jit/ir/ir.h | 1 + ext/opcache/jit/ir/ir_aarch64.dasc | 43 +++++++++++++++-- ext/opcache/jit/ir/ir_check.c | 69 +++++++++++++++++++++++++-- ext/opcache/jit/ir/ir_emit.c | 6 +++ ext/opcache/jit/ir/ir_fold.h | 59 +++++++++++++++++++++-- ext/opcache/jit/ir/ir_private.h | 2 + ext/opcache/jit/ir/ir_sccp.c | 75 +++++++++++++----------------- ext/opcache/jit/ir/ir_x86.dasc | 70 ++++++++++++++++++++++++---- 8 files changed, 261 insertions(+), 64 deletions(-) diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index 52cbc06b153..93ed4d3163e 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -1032,6 +1032,7 @@ IR_ALWAYS_INLINE void *ir_jit_compile(ir_ctx *ctx, int opt_level, size_t *size) #define IR_ERROR_UNSUPPORTED_CODE_RULE 3 #define IR_ERROR_LINK 4 #define IR_ERROR_ENCODE 5 +#define IR_ERROR_TOO_LARGE 6 /* IR Memmory Allocation */ #ifndef ir_mem_malloc diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 2caabb3ebeb..d0edb33e8b3 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -658,16 +658,35 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co flags = IR_OP2_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[1] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 2; + } break; case IR_VA_ARG: flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 1; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[1] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 2; + } break; case IR_VA_COPY: flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op3)) { + constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } break; } constraints->tmps_count = n; @@ -6143,6 +6162,14 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) dasm_setup(&data.dasm_state, dasm_actions); /* labels for each block + for each constant + rodata label + jmp_table label + for each entry + exit_table label */ dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count + 1); + if (data.dasm_state->status != DASM_S_OK) { + IR_ASSERT(data.dasm_state->status == DASM_S_NOMEM); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_TOO_LARGE; + return NULL; + } + data.emit_constants = ir_bitset_malloc(ctx->consts_count); if (!(ctx->flags & IR_SKIP_PROLOGUE)) { @@ -6509,12 +6536,20 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) return NULL; } - ret = dasm_link(&data.dasm_state, size_ptr); - if (ret != DASM_S_OK) { - IR_ASSERT(0); + if (data.dasm_state->status != DASM_S_OK) { + IR_ASSERT(data.dasm_state->status == DASM_S_NOMEM); dasm_free(&data.dasm_state); ctx->data = NULL; - ctx->status = IR_ERROR_LINK; + ctx->status = IR_ERROR_TOO_LARGE; + return NULL; + } + + ret = dasm_link(&data.dasm_state, size_ptr); + if (ret != DASM_S_OK) { + IR_ASSERT(ret == DASM_S_NOMEM); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = (ret == DASM_S_NOMEM) ? IR_ERROR_TOO_LARGE : IR_ERROR_LINK; return NULL; } size = *size_ptr; diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c index a791baef5db..6a687b84cc2 100644 --- a/ext/opcache/jit/ir/ir_check.c +++ b/ext/opcache/jit/ir/ir_check.c @@ -40,12 +40,41 @@ void ir_consistency_check(void) IR_ASSERT(IR_ADD + 1 == IR_SUB); } -static bool ir_check_use_list(const ir_ctx *ctx, ir_ref from, ir_ref to) +typedef struct { + ir_arena *arena; + ir_bitset *use_set; + ir_bitset *input_set; +} ir_check_ctx; + +static bool ir_check_use_list(ir_check_ctx *check_ctx, const ir_ctx *ctx, ir_ref from, ir_ref to) { ir_ref n, *p; ir_use_list *use_list = &ctx->use_lists[from]; n = use_list->count; + if (n > 16) { + /* Avoid quadratic complexity by maintaining a temporary bit-set */ + ir_bitset set; + + if (!check_ctx->use_set || !(set = check_ctx->use_set[from])) { + if (!check_ctx->arena) { + check_ctx->arena = ir_arena_create(sizeof(ir_arena) + + ctx->insns_count * sizeof(ir_bitset) + + ir_bitset_len(ctx->insns_count) * sizeof(ir_bitset_base_t)); + } + if (!check_ctx->use_set) { + check_ctx->use_set = ir_arena_alloc(&check_ctx->arena, ctx->insns_count * sizeof(ir_bitset)); + memset(check_ctx->use_set, 0, ctx->insns_count * sizeof(ir_bitset)); + } + check_ctx->use_set[from] = set = (ir_bitset)ir_arena_alloc(&check_ctx->arena, + ir_bitset_len(ctx->insns_count) * sizeof(ir_bitset_base_t)); + memset(set, 0, ir_bitset_len(ctx->insns_count) * sizeof(ir_bitset_base_t)); + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + ir_bitset_incl(set, *p); + } + } + return ir_bitset_in(set, to); + } for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { if (*p == to) { return 1; @@ -54,12 +83,35 @@ static bool ir_check_use_list(const ir_ctx *ctx, ir_ref from, ir_ref to) return 0; } -static bool ir_check_input_list(const ir_ctx *ctx, ir_ref from, ir_ref to) +static bool ir_check_input_list(ir_check_ctx *check_ctx, const ir_ctx *ctx, ir_ref from, ir_ref to) { ir_insn *insn = &ctx->ir_base[to]; ir_ref n, j, *p; n = ir_input_edges_count(ctx, insn); + if (n > 16) { + /* Avoid quadratic complexity by maintaining a temporary bit-set */ + ir_bitset set; + + if (!check_ctx->input_set || !(set = check_ctx->input_set[to])) { + if (!check_ctx->arena) { + check_ctx->arena = ir_arena_create(sizeof(ir_arena) + + ctx->insns_count * sizeof(ir_bitset) + + ir_bitset_len(ctx->insns_count) * sizeof(ir_bitset_base_t)); + } + if (!check_ctx->input_set) { + check_ctx->input_set = ir_arena_alloc(&check_ctx->arena, ctx->insns_count * sizeof(ir_bitset)); + memset(check_ctx->input_set, 0, ctx->insns_count * sizeof(ir_bitset)); + } + check_ctx->input_set[to] = set = (ir_bitset)ir_arena_alloc(&check_ctx->arena, + ir_bitset_len(ctx->insns_count) * sizeof(ir_bitset_base_t)); + memset(set, 0, ir_bitset_len(ctx->insns_count) * sizeof(ir_bitset_base_t)); + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + if (*p > 0) ir_bitset_incl(set, *p); + } + } + return ir_bitset_in(set, from); + } for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { if (*p == from) { return 1; @@ -93,6 +145,11 @@ bool ir_check(const ir_ctx *ctx) ir_type type; uint32_t flags; bool ok = 1; + ir_check_ctx check_ctx; + + check_ctx.arena = NULL; + check_ctx.use_set = NULL; + check_ctx.input_set = NULL; for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { if (insn->op >= IR_LAST_OP) { @@ -255,7 +312,7 @@ bool ir_check(const ir_ctx *ctx) } if (ctx->use_lists && use > 0 - && !ir_check_use_list(ctx, use, i)) { + && !ir_check_use_list(&check_ctx, ctx, use, i)) { fprintf(stderr, "ir_base[%d].ops[%d] is not in use list (%d)\n", i, j, use); ok = 0; } @@ -313,7 +370,7 @@ bool ir_check(const ir_ctx *ctx) for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { use = *p; - if (!ir_check_input_list(ctx, i, use)) { + if (!ir_check_input_list(&check_ctx, ctx, i, use)) { fprintf(stderr, "ir_base[%d] is in use list of ir_base[%d]\n", use, i); ok = 0; } @@ -393,6 +450,10 @@ bool ir_check(const ir_ctx *ctx) insn += n; } + if (check_ctx.arena) { + ir_arena_free(check_ctx.arena); + } + // if (!ok) { // ir_dump_codegen(ctx, stderr); // } diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c index c9e65229c39..7a10da1322a 100644 --- a/ext/opcache/jit/ir/ir_emit.c +++ b/ext/opcache/jit/ir/ir_emit.c @@ -32,8 +32,14 @@ do { \ size_t _sz = (sz), _need = (need); \ if (_sz < _need) { \ + size_t _limit = sizeof(t) * DASM_SEC2POS(1); \ + if (_need > _limit) { \ + Dst_REF->status = DASM_S_NOMEM; \ + return; \ + } \ if (_sz < 16) _sz = 16; \ while (_sz < _need) _sz += _sz; \ + if (_sz > _limit) _sz = _limit; \ (p) = (t *)ir_mem_realloc((p), _sz); \ (sz) = _sz; \ } \ diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index 7ae6ca539da..286932503d1 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -538,7 +538,8 @@ IR_FOLD(DIV(C_ADDR, C_ADDR)) IR_FOLD(DIV(C_I8, C_I8)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - if (op2_insn->val.i64 == 0) { + if (op2_insn->val.i64 == 0 + || (op2_insn->val.i64 == -1 && op1_insn->val.u8 == 0x80)) { /* division by zero */ IR_FOLD_EMIT; } @@ -548,7 +549,8 @@ IR_FOLD(DIV(C_I8, C_I8)) IR_FOLD(DIV(C_I16, C_I16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - if (op2_insn->val.i64 == 0) { + if (op2_insn->val.i64 == 0 + || (op2_insn->val.i64 == -1 && op1_insn->val.u16 == 0x8000)) { /* division by zero */ IR_FOLD_EMIT; } @@ -558,7 +560,8 @@ IR_FOLD(DIV(C_I16, C_I16)) IR_FOLD(DIV(C_I32, C_I32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - if (op2_insn->val.i64 == 0) { + if (op2_insn->val.i64 == 0 + || (op2_insn->val.i64 == -1 && op1_insn->val.u32 == 0x80000000)) { /* division by zero */ IR_FOLD_EMIT; } @@ -568,7 +571,8 @@ IR_FOLD(DIV(C_I32, C_I32)) IR_FOLD(DIV(C_I64, C_I64)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - if (op2_insn->val.i64 == 0) { + if (op2_insn->val.i64 == 0 + || (op2_insn->val.i64 == -1 && op1_insn->val.u64 == 0x8000000000000000)) { /* division by zero */ IR_FOLD_EMIT; } @@ -615,12 +619,27 @@ IR_FOLD(MOD(C_I64, C_I64)) } IR_FOLD(NEG(C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I((int8_t)(0 - op1_insn->val.u8)); +} + IR_FOLD(NEG(C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I((int16_t)(0 -op1_insn->val.u16)); +} + IR_FOLD(NEG(C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I((int32_t)(0 - op1_insn->val.u32)); +} + IR_FOLD(NEG(C_I64)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I(-op1_insn->val.u64); + IR_FOLD_CONST_I(0 - op1_insn->val.u64); } IR_FOLD(NEG(C_DOUBLE)) @@ -1841,6 +1860,12 @@ IR_FOLD(ADD(SUB, _)) if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { if (op1_insn->op2 == op2) { /* (a - b) + b => a */ + if (ctx->ir_base[op1_insn->op1].type != IR_OPT_TYPE(opt)) { + opt = IR_BITCAST | (opt & IR_OPT_TYPE_MASK); + op1 = op1_insn->op1; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } IR_FOLD_COPY(op1_insn->op1); } } @@ -1852,6 +1877,12 @@ IR_FOLD(ADD(_, SUB)) if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { if (op2_insn->op2 == op1) { /* a + (b - a) => b */ + if (ctx->ir_base[op2_insn->op1].type != IR_OPT_TYPE(opt)) { + opt = IR_BITCAST | (opt & IR_OPT_TYPE_MASK); + op1 = op2_insn->op1; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } IR_FOLD_COPY(op2_insn->op1); } } @@ -1863,9 +1894,21 @@ IR_FOLD(SUB(ADD, _)) if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { if (op1_insn->op1 == op2) { /* (a + b) - a => b */ + if (ctx->ir_base[op1_insn->op2].type != IR_OPT_TYPE(opt)) { + opt = IR_BITCAST | (opt & IR_OPT_TYPE_MASK); + op1 = op1_insn->op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } IR_FOLD_COPY(op1_insn->op2); } else if (op1_insn->op2 == op2) { /* (a + b) - a => b */ + if (ctx->ir_base[op1_insn->op1].type != IR_OPT_TYPE(opt)) { + opt = IR_BITCAST | (opt & IR_OPT_TYPE_MASK); + op1 = op1_insn->op1; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } IR_FOLD_COPY(op1_insn->op1); } } @@ -1911,6 +1954,12 @@ IR_FOLD(SUB(_, SUB)) if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { if (op2_insn->op1 == op1) { /* a - (a - b) => b */ + if (ctx->ir_base[op2_insn->op2].type != IR_OPT_TYPE(opt)) { + opt = IR_BITCAST | (opt & IR_OPT_TYPE_MASK); + op1 = op2_insn->op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } IR_FOLD_COPY(op2_insn->op2); } } diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index 369b4c34e37..2f457cbc993 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -238,6 +238,7 @@ IR_ALWAYS_INLINE ir_arena* ir_arena_create(size_t size) IR_ASSERT(size >= IR_ALIGNED_SIZE(sizeof(ir_arena), 8)); arena = (ir_arena*)ir_mem_malloc(size); + if (UNEXPECTED(!arena))return NULL; arena->ptr = (char*) arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8); arena->end = (char*) arena + size; arena->prev = NULL; @@ -267,6 +268,7 @@ IR_ALWAYS_INLINE void* ir_arena_alloc(ir_arena **arena_ptr, size_t size) (size_t)(arena->end - (char*) arena); ir_arena *new_arena = (ir_arena*)ir_mem_malloc(arena_size); + if (UNEXPECTED(!new_arena)) return NULL; ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8); new_arena->ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8) + size; new_arena->end = (char*) new_arena + arena_size; diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 48659cd4bd7..c90baab7ffa 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -583,6 +583,15 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi } } else { IR_MAKE_BOTTOM_EX(i); + n = IR_INPUT_EDGES_COUNT(flags); + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + if (input > 0) { + if (_values[input].op == IR_TOP) { + ir_sccp_add_input(ctx, _values, worklist, input); + } + } + } } } else if (flags & IR_OP_FLAG_BB_START) { if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_BEGIN) { @@ -717,52 +726,32 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi ir_bitqueue_add(iter_worklist, i); IR_MAKE_BOTTOM(i); } else { - if (_values[i].op == IR_TOP) { - bool has_top = 0; - - /* control, call, load and store instructions may have unprocessed inputs */ - n = IR_INPUT_EDGES_COUNT(flags); - if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { - for (j = 0; j < (n>>2); j++) { - _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ - } - for (j = 2, p = insn->ops + j; j <= n; j++, p++) { - IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); - use = *p; - if (use > 0 && _values[use].op == IR_TOP) { - has_top = 1; - ir_sccp_add_input(ctx, _values, worklist, use); - } - } - } else if (n >= 2) { - IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); - use = insn->op2; + /* control, call, load and store instructions may have unprocessed inputs */ + n = IR_INPUT_EDGES_COUNT(flags); + if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + for (j = 2, p = insn->ops + j; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + use = *p; if (use > 0 && _values[use].op == IR_TOP) { - has_top = 1; ir_sccp_add_input(ctx, _values, worklist, use); } - if (n > 2) { - IR_ASSERT(n == 3); - IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); - use = insn->op3; - if (use > 0 && _values[use].op == IR_TOP) { - has_top = 1; - ir_sccp_add_input(ctx, _values, worklist, use); - } - } } - - if (has_top && !(flags & IR_OP_FLAG_BB_END)) { - use = ir_next_control(ctx, i); - if (_values[use].op == IR_TOP) { - has_top = 1; - /* do forward control propagaton only once */ - if (!_values[use].op1) { - _values[use].op1 = 1; - ir_bitqueue_add(worklist, use); - } + } else if (n >= 2) { + IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); + use = insn->op2; + if (use > 0 && _values[use].op == IR_TOP) { + ir_sccp_add_input(ctx, _values, worklist, use); + } + if (n > 2) { + IR_ASSERT(n == 3); + IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); + use = insn->op3; + if (use > 0 && _values[use].op == IR_TOP) { + ir_sccp_add_input(ctx, _values, worklist, use); } - continue; } } IR_MAKE_BOTTOM(i); @@ -1325,8 +1314,8 @@ static ir_ref ir_iter_find_cse(ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1 } } } else if (n < 2) { - IR_ASSERT(n == 1); - if (!IR_IS_CONST_REF(op1)) { + if (op1 > 0) { + IR_ASSERT(n == 1); use_list = &ctx->use_lists[op1]; n = use_list->count; for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 781c8e5269c..a12c660376d 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1238,6 +1238,9 @@ op2_const: if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; + } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { + constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; } break; case IR_CMP_INT: @@ -1564,16 +1567,35 @@ op2_const: flags = IR_OP2_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[1] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 2; + } break; case IR_VA_ARG: flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 1; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[1] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 2; + } break; case IR_VA_COPY: flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op3)) { + constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } break; case IR_SSE_SQRT: case IR_SSE_RINT: @@ -3573,7 +3595,13 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) case IR_LEA_B_SI_O: offset_insn = insn; op1_insn = &ctx->ir_base[insn->op1]; - base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; + if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { + offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op1]); + base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + base_reg_ref = IR_UNUSED; + } else { + base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; + } index_reg_ref = op1_insn->op2 * sizeof(ir_ref) + 1; op2_insn = &ctx->ir_base[op1_insn->op2]; scale = ctx->ir_base[op2_insn->op2].val.i32; @@ -3582,7 +3610,13 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) offset_insn = insn; op1_insn = &ctx->ir_base[insn->op1]; index_reg_ref = op1_insn->op1 * sizeof(ir_ref) + 1; - base_reg_ref = insn->op1 * sizeof(ir_ref) + 2; + if (ir_rule(ctx, op1_insn->op2) == IR_STATIC_ALLOCA) { + offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op2]); + base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + base_reg_ref = IR_UNUSED; + } else { + base_reg_ref = insn->op1 * sizeof(ir_ref) + 2; + } op1_insn = &ctx->ir_base[op1_insn->op1]; scale = ctx->ir_base[op1_insn->op2].val.i32; break; @@ -3620,7 +3654,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) offset += (int64_t)(intptr_t)(addr); } else { if (offset_insn->op == IR_SUB) { - offset = -addr_insn->val.i32; + offset -= addr_insn->val.i32; } else { offset += addr_insn->val.i32; } @@ -8596,7 +8630,11 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) | add Ra(tmp_reg), sizeof(void*) | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) |2: - | mov Ra(def_reg), aword [Ra(tmp_reg)-sizeof(void*)] + if (ir_type_size[type] == 8) { + | mov Rq(def_reg), qword [Ra(tmp_reg)-sizeof(void*)] + } else { + | mov Rd(def_reg), dword [Ra(tmp_reg)-sizeof(void*)] + } } else { | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))] | cmp Rd(tmp_reg), sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS @@ -11019,6 +11057,14 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) dasm_setup(&data.dasm_state, dasm_actions); /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); + if (data.dasm_state->status != DASM_S_OK) { + IR_ASSERT(data.dasm_state->status == DASM_S_NOMEM); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_TOO_LARGE; + return NULL; + } + data.emit_constants = ir_bitset_malloc(ctx->consts_count); if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_START_BR_TARGET)) { @@ -11515,12 +11561,20 @@ next_block:; return NULL; } - ret = dasm_link(&data.dasm_state, size_ptr); - if (ret != DASM_S_OK) { - IR_ASSERT(0); + if (data.dasm_state->status != DASM_S_OK) { + IR_ASSERT(data.dasm_state->status == DASM_S_NOMEM); dasm_free(&data.dasm_state); ctx->data = NULL; - ctx->status = IR_ERROR_LINK; + ctx->status = IR_ERROR_TOO_LARGE; + return NULL; + } + + ret = dasm_link(&data.dasm_state, size_ptr); + if (ret != DASM_S_OK) { + IR_ASSERT(ret == DASM_S_NOMEM); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = (ret == DASM_S_NOMEM) ? IR_ERROR_TOO_LARGE : IR_ERROR_LINK; return NULL; } size = *size_ptr;