From 29a70828b9d12e4c9d1c2b4cda1bf73ea10c1d8e Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Wed, 29 Nov 2023 12:27:05 +0300 Subject: [PATCH] Update IR IR commit: 81047af575864c5dfd5fda62394ada46cc9cae27 --- ext/opcache/jit/ir/ir.c | 14 +- ext/opcache/jit/ir/ir.h | 5 +- ext/opcache/jit/ir/ir_aarch64.dasc | 293 +++++++++++++++++++++- ext/opcache/jit/ir/ir_aarch64.h | 10 + ext/opcache/jit/ir/ir_private.h | 5 + ext/opcache/jit/ir/ir_ra.c | 8 +- ext/opcache/jit/ir/ir_x86.dasc | 374 ++++++++++++++++++++++++++++- ext/opcache/jit/ir/ir_x86.h | 7 + 8 files changed, 698 insertions(+), 18 deletions(-) diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index 521d33a2f6c..6c1c4e0c983 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -1424,13 +1424,23 @@ int ir_mem_unmap(void *ptr, size_t size) int ir_mem_protect(void *ptr, size_t size) { - mprotect(ptr, size, PROT_READ | PROT_EXEC); + if (mprotect(ptr, size, PROT_READ | PROT_EXEC) != 0) { +#ifdef IR_DEBUG + fprintf(stderr, "mprotect() failed\n"); +#endif + return 0; + } return 1; } int ir_mem_unprotect(void *ptr, size_t size) { - mprotect(ptr, size, PROT_READ | PROT_WRITE); + if (mprotect(ptr, size, PROT_READ | PROT_WRITE) != 0) { +#ifdef IR_DEBUG + fprintf(stderr, "mprotect() failed\n"); +#endif + return 0; + } return 1; } diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index a09bf82ec33..669f94e0438 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -564,6 +564,10 @@ struct _ir_ctx { int32_t fixed_stack_frame_size; /* fixed stack allocated by generated code for spills and registers save/restore */ int32_t fixed_call_stack_size; /* fixed preallocated stack for parameter passing (default 0) */ uint64_t fixed_save_regset; /* registers that always saved/restored in prologue/epilogue */ + uint32_t locals_area_size; + uint32_t gp_reg_params; + uint32_t fp_reg_params; + int32_t param_stack_size; ir_live_interval **live_intervals; ir_arena *arena; ir_live_range *unused_ranges; @@ -581,7 +585,6 @@ struct _ir_ctx { int32_t call_stack_size; /* stack for parameter passing (used by register allocator and code generator) */ uint64_t used_preserved_regs; #ifdef IR_TARGET_X86 - int32_t param_stack_size; int32_t ret_slot; #endif uint32_t rodata_offset; diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 76b03ebd51d..709b7afe15e 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -524,6 +524,16 @@ int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constrain case IR_SNAPSHOT: flags = 0; break; + case IR_VA_START: + flags = IR_OP1_MUST_BE_IN_REG; + constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + break; + case IR_VA_ARG: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; + constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n = 1; + break; } constraints->tmps_count = n; @@ -875,6 +885,48 @@ binop_fp: } } return insn->op; + case IR_VA_START: + ctx->flags2 |= IR_HAS_VA_START; + if (ctx->ir_base[insn->op2].op == IR_ALLOCA) { + ir_use_list *use_list = &ctx->use_lists[insn->op2]; + ir_ref *p, n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + ir_insn *use_insn = &ctx->ir_base[*p]; + if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) { + } else if (use_insn->op == IR_VA_COPY) { + if (use_insn->op3 == insn->op2) { + ctx->flags2 |= IR_HAS_VA_COPY; + } + } else if (use_insn->op == IR_VA_ARG) { + if (use_insn->op2 == insn->op2) { + if (IR_IS_TYPE_INT(use_insn->type)) { + ctx->flags2 |= IR_HAS_VA_ARG_GP; + } else { + IR_ASSERT(IR_IS_TYPE_FP(use_insn->type)); + ctx->flags2 |= IR_HAS_VA_ARG_FP; + } + } + } else if (*p > ref) { + /* diriect va_list access */ + ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; + } + } + } + return IR_VA_START; + case IR_VA_END: + return IR_SKIPPED | IR_NOP; + case IR_VADDR: + if (ctx->use_lists[ref].count > 0) { + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_ref *p, n = use_list->count; + + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + if (ctx->ir_base[*p].op != IR_VA_END) { + return IR_VADDR; + } + } + } + return IR_SKIPPED | IR_NOP; default: break; } @@ -1266,15 +1318,17 @@ static void ir_emit_prologue(ir_ctx *ctx) } } if (ctx->used_preserved_regs) { + ir_reg fp; int offset; uint32_t i; ir_reg prev = IR_REG_NONE; - ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; offset = ctx->stack_frame_size + sizeof(void*) * 2; } else { + fp = IR_REG_STACK_POINTER; offset = ctx->stack_frame_size + ctx->call_stack_size; } for (i = 0; i < IR_REG_NUM; i++) { @@ -1309,6 +1363,53 @@ static void ir_emit_prologue(ir_ctx *ctx) } } } + if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { +#ifndef __APPLE__ + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + ir_reg fp; + int offset; + int i; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + + offset = ctx->locals_area_size + sizeof(void*) * 2; + } else { + fp = IR_REG_STACK_POINTER; + offset = ctx->locals_area_size + ctx->call_stack_size; + } + + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { + ir_reg prev = IR_REG_NONE; + + /* skip named args */ + offset += sizeof(void*) * ctx->gp_reg_params; + for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { + if (prev != IR_REG_NONE) { + | stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset] + prev = IR_REG_NONE; + offset += sizeof(void*) * 2; + } else { + prev = int_reg_params[i]; + } + } + if (prev != IR_REG_NONE) { + | str Rx(prev), [Rx(fp), #offset] + offset += sizeof(void*); + } + } + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { + /* skip named args */ + offset += 16 * ctx->fp_reg_params; + for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { + // TODO: Rd->Rq stur->str ??? + | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset] + offset += 16; + } + } +#endif + } } static void ir_emit_epilogue(ir_ctx *ctx) @@ -3543,14 +3644,183 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) { +#ifdef __APPLE__ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg fp; + int arg_area_offset; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; + } else { + fp = IR_REG_STACK_POINTER; + arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; + } + | add Rx(tmp_reg), Rx(fp), #arg_area_offset + if (op2_reg != IR_REG_NONE) { + | str Rx(tmp_reg), [Rx(op2_reg)] + } else { + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + + | str Rx(tmp_reg), [Rx(op2_reg), #offset] + } +#else + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg fp; + int reg_save_area_offset; + int overflow_arg_area_offset; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2; + overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; + } else { + fp = IR_REG_STACK_POINTER; + reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; + overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; + } + + /* Set va_list.stack */ + | add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset + | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { + reg_save_area_offset += sizeof(void*) * IR_REG_INT_ARGS; + /* Set va_list.gr_top */ + if (overflow_arg_area_offset != reg_save_area_offset) { + | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset + } + | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_top)] + /* Set va_list.gr_offset */ + | movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (IR_REG_INT_ARGS - ctx->gp_reg_params))) + | str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)] + } else { + /* Set va_list.gr_offset */ + | str wzr, [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)] + } + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { + reg_save_area_offset += 16 * IR_REG_FP_ARGS; + /* Set va_list.vr_top */ + if (overflow_arg_area_offset != reg_save_area_offset) { + | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset + } + | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_top)] + /* Set va_list.vr_offset */ + | movn Rw(tmp_reg), #~(0 - (16 * (IR_REG_FP_ARGS - ctx->fp_reg_params))) + | str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)] + } else { + /* Set va_list.vr_offset */ + | str wzr, [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)] + } +#endif } static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) { + IR_ASSERT(0 && "NIY va_copy"); } static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) { +#ifdef __APPLE__ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = ctx->regs[def][0]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | ldr Rx(tmp_reg), [Rx(op2_reg)] + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, tmp_reg, 0); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, 0); + } + | add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*)) + if (op2_reg != IR_REG_NONE) { + | str Rx(tmp_reg), [Rx(op2_reg)] + } else { + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + + | str Rx(tmp_reg), [Rx(op2_reg), #offset] + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +#else + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = ctx->regs[def][0]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + if (IR_IS_TYPE_INT(type)) { + | ldr Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)] + | cmp Rw(tmp_reg), wzr + | bge >1 + | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #offsetof(ir_va_list, gr_top)] + | sxtw Rx(tmp_reg), Rw(tmp_reg) + | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) + | ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)] + | add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*) + | str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)] + | b >2 + |1: + | ldr Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] + | ldr Rx(def_reg), [Rx(tmp_reg)] + | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) + | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] + |2: + } else { + | ldr Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)] + | cmp Rw(tmp_reg), wzr + | bge >1 + | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #offsetof(ir_va_list, vr_top)] + | sxtw Rx(tmp_reg), Rw(tmp_reg) + | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) + | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)] + | add Rw(tmp_reg), Rw(tmp_reg), #16 + | str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)] + | b >2 + |1: + | ldr Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] + | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)] + | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) + | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] + |2: + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +#endif } static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) @@ -4724,6 +4994,7 @@ static void ir_fix_param_spills(ir_ctx *ctx) const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; int32_t stack_offset = 0; + int32_t param_stack_size = 0; if (ctx->flags & IR_USE_FRAME_POINTER) { /* skip old frame pointer and return address */ @@ -4759,17 +5030,22 @@ static void ir_fix_param_spills(ir_ctx *ctx) && ival->stack_spill_pos == -1 && (ival->next || ival->reg == IR_REG_NONE)) { ival->stack_spill_pos = stack_offset; - ctx->regs[use][0] = IR_REG_NONE; } } if (sizeof(void*) == 8) { stack_offset += sizeof(void*); + param_stack_size += sizeof(void*); } else { stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + param_stack_size += IR_MAX(sizeof(void*), ir_type_size[insn->type]); } } } } + + ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); + ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); + ctx->param_stack_size = param_stack_size; } static void ir_allocate_unique_spill_slots(ir_ctx *ctx) @@ -4977,6 +5253,8 @@ void ir_fix_stack_frame(ir_ctx *ctx) { uint32_t additional_size = 0; + ctx->locals_area_size = ctx->stack_frame_size; + if (ctx->used_preserved_regs) { ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; ir_reg reg; @@ -4987,6 +5265,15 @@ void ir_fix_stack_frame(ir_ctx *ctx) } IR_REGSET_FOREACH_END(); } + if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { + additional_size += sizeof(void*) * IR_REG_INT_ARGS; + } + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { + additional_size += 16 * IR_REG_FP_ARGS; + } + } + ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); ctx->stack_frame_size += additional_size; ctx->stack_frame_alignment = 0; @@ -5321,7 +5608,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) ir_emit_tls(ctx, i, insn); break; case IR_TRAP: - | brk; + | brk break; default: IR_ASSERT(0 && "NIY rule/instruction"); diff --git a/ext/opcache/jit/ir/ir_aarch64.h b/ext/opcache/jit/ir/ir_aarch64.h index efd88c7f878..c699ee2afa8 100644 --- a/ext/opcache/jit/ir/ir_aarch64.h +++ b/ext/opcache/jit/ir/ir_aarch64.h @@ -152,6 +152,16 @@ enum _ir_reg { (IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) \ | IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15)) +#ifndef __APPLE__ +typedef struct _ir_va_list { + void *stack; + void *gr_top; + void *vr_top; + int32_t gr_offset; + int32_t vr_offset; +} ir_va_list; +#endif + typedef struct _ir_tmp_reg { union { uint8_t num; diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index 5010793c957..d8dfe3f9301 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -883,6 +883,11 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn) #define IR_HAS_CALLS (1<<3) #define IR_OPT_IN_SCCP (1<<4) #define IR_LINEAR (1<<5) +#define IR_HAS_VA_START (1<<6) +#define IR_HAS_VA_COPY (1<<7) +#define IR_HAS_VA_ARG_GP (1<<8) +#define IR_HAS_VA_ARG_FP (1<<9) +#define IR_HAS_FP_RET_SLOT (1<<10) /* Temporary: SCCP -> CFG */ #define IR_SCCP_DONE (1<<25) diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index 916653bdc99..f400127f291 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -3598,7 +3598,9 @@ static int ir_linear_scan(ir_ctx *ctx) } #ifdef IR_TARGET_X86 - if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { + if (ctx->flags2 & IR_HAS_FP_RET_SLOT) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data); + } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data); } else { ctx->ret_slot = -1; @@ -3733,6 +3735,10 @@ static void assign_regs(ir_ctx *ctx) if (ctx->ir_base[ref].op == IR_PHI) { /* Spilled PHI var is passed through memory */ reg = IR_REG_NONE; + } else if (ctx->ir_base[ref].op == IR_PARAM + && (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) { + /* Stack PARAM var is passed through memory */ + reg = IR_REG_NONE; } else { uint32_t use_b = ctx->cfg_map[ref]; diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 68f2933a729..7e7eeab2baf 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -877,6 +877,16 @@ op2_const: case IR_SNAPSHOT: flags = 0; break; + case IR_VA_START: + flags = IR_OP1_MUST_BE_IN_REG; + constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + break; + case IR_VA_ARG: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; + constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n = 1; + break; } constraints->tmps_count = n; @@ -1424,6 +1434,11 @@ binop_fp: break; case IR_CALL: ctx->flags2 |= IR_HAS_CALLS; +#ifndef IR_REG_FP_RET1 + if (IR_IS_TYPE_FP(insn->type)) { + ctx->flags2 |= IR_HAS_FP_RET_SLOT; + } +#endif IR_FALLTHROUGH; case IR_TAILCALL: if (ir_in_same_block(ctx, insn->op2)) { @@ -1921,6 +1936,48 @@ store_int: case IR_CTPOP: ir_match_fuse_load(ctx, insn->op1, ref); return (ctx->mflags & IR_X86_BMI1) ? IR_BIT_COUNT : IR_CTPOP; + case IR_VA_START: + ctx->flags2 |= IR_HAS_VA_START; + if ((ctx->ir_base[insn->op2].op == IR_ALLOCA) || (ctx->ir_base[insn->op2].op == IR_VADDR)) { + ir_use_list *use_list = &ctx->use_lists[insn->op2]; + ir_ref *p, n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + ir_insn *use_insn = &ctx->ir_base[*p]; + if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) { + } else if (use_insn->op == IR_VA_COPY) { + if (use_insn->op3 == insn->op2) { + ctx->flags2 |= IR_HAS_VA_COPY; + } + } else if (use_insn->op == IR_VA_ARG) { + if (use_insn->op2 == insn->op2) { + if (IR_IS_TYPE_INT(use_insn->type)) { + ctx->flags2 |= IR_HAS_VA_ARG_GP; + } else { + IR_ASSERT(IR_IS_TYPE_FP(use_insn->type)); + ctx->flags2 |= IR_HAS_VA_ARG_FP; + } + } + } else if (*p > ref) { + /* diriect va_list access */ + ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; + } + } + } + return IR_VA_START; + case IR_VA_END: + return IR_SKIPPED | IR_NOP; + case IR_VADDR: + if (ctx->use_lists[ref].count > 0) { + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_ref *p, n = use_list->count; + + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + if (ctx->ir_base[*p].op != IR_VA_END) { + return IR_VADDR; + } + } + } + return IR_SKIPPED | IR_NOP; default: break; } @@ -2224,25 +2281,24 @@ static void ir_emit_prologue(ir_ctx *ctx) } } if (ctx->used_preserved_regs) { + ir_reg fp; int offset; uint32_t i; ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; offset = 0; } else { + fp = IR_REG_STACK_POINTER; offset = ctx->stack_frame_size + ctx->call_stack_size; } for (i = 0; i < IR_REG_NUM; i++) { if (IR_REGSET_IN(used_preserved_regs, i)) { if (i < IR_REG_FP_FIRST) { - ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset -= sizeof(void*); | mov aword [Ra(fp)+offset], Ra(i) } else { - ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset -= sizeof(void*); if (ctx->mflags & IR_X86_AVX) { | vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) @@ -2253,6 +2309,59 @@ static void ir_emit_prologue(ir_ctx *ctx) } } } + if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { + ir_reg fp; + int offset; + +#if defined(_WIN64) + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + offset = sizeof(void*) * 2; + } else { + fp = IR_REG_STACK_POINTER; + offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*); + } + | mov [Ra(fp)+offset], Ra(IR_REG_INT_ARG1) + | mov [Ra(fp)+offset+8], Ra(IR_REG_INT_ARG2) + | mov [Ra(fp)+offset+16], Ra(IR_REG_INT_ARG3) + | mov [Ra(fp)+offset+24], Ra(IR_REG_INT_ARG4) +#elif defined(IR_TARGET_X64) +|.if X64 + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + uint32_t i; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + + offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); + } else { + fp = IR_REG_STACK_POINTER; + offset = ctx->locals_area_size + ctx->call_stack_size; + } + + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { + /* skip named args */ + offset += sizeof(void*) * ctx->gp_reg_params; + for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { + | mov qword [Ra(fp)+offset], Rq(int_reg_params[i]) + offset += sizeof(void*); + } + } + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { + | test al, al + | je >1 + /* skip named args */ + offset += 16 * ctx->fp_reg_params; + for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { + | movaps [Ra(fp)+offset], xmm(fp_reg_params[i]-IR_REG_FP_FIRST) + offset += 16; + } + |1: + } +|.endif +#endif + } } static void ir_emit_epilogue(ir_ctx *ctx) @@ -6245,6 +6354,188 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) } } +static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ +#if defined(_WIN64) || defined(IR_TARGET_X86) + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg fp; + int arg_area_offset; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; + } else { + fp = IR_REG_STACK_POINTER; + arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; + } + | lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset] + if (op2_reg != IR_REG_NONE) { + | mov aword [Ra(op2_reg)], Ra(tmp_reg) + } else { + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + + | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) + } +#elif defined(IR_TARGET_X64) +|.if X64 + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg fp; + int reg_save_area_offset; + int overflow_arg_area_offset; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + bool have_reg_save_area = 0; + + IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + reg_save_area_offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); + overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; + } else { + fp = IR_REG_STACK_POINTER; + reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; + overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; + } + + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { + | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] + have_reg_save_area = 1; + /* Set va_list.gp_offset */ + | mov dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)], sizeof(void*) * ctx->gp_reg_params + } else { + reg_save_area_offset -= sizeof(void*) * IR_REG_INT_ARGS; + /* Set va_list.gp_offset */ + | mov dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)], sizeof(void*) * IR_REG_INT_ARGS + } + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { + if (!have_reg_save_area) { + | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] + have_reg_save_area = 1; + } + /* Set va_list.fp_offset */ + | mov dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)], sizeof(void*) * IR_REG_INT_ARGS + 16 * ctx->fp_reg_params + } else { + /* Set va_list.fp_offset */ + | mov dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)], sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS + } + if (have_reg_save_area) { + /* Set va_list.reg_save_area */ + | mov qword [Ra(op2_reg)+offsetof(ir_va_list, reg_save_area)], Ra(tmp_reg) + } + | lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset] + /* Set va_list.overflow_arg_area */ + | mov qword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg) +|.endif +#else + IR_ASSERT(0 && "NIY va_start"); +#endif +} + +static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + IR_ASSERT(0 && "NIY va_copy"); +} + +static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ +#if defined(_WIN64) || defined(IR_TARGET_X86) + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = ctx->regs[def][0]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | mov Ra(tmp_reg), aword [Ra(op2_reg)] + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, tmp_reg, 0); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, 0); + } + | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) + if (op2_reg != IR_REG_NONE) { + | mov aword [Ra(op2_reg)], Ra(tmp_reg) + } else { + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + + | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +#elif defined(IR_TARGET_X64) +|.if X64 + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = ctx->regs[def][0]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + if (IR_IS_TYPE_INT(type)) { + | mov Rd(tmp_reg), dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)] + | cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS + | jge >1 + | add Rd(tmp_reg), sizeof(void*) + | mov dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)], Rd(tmp_reg) + | add Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, reg_save_area)] + | jmp >2 + |1: + | mov Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)] + | add Ra(tmp_reg), sizeof(void*) + | mov aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg) + |2: + | mov Ra(def_reg), aword [Ra(tmp_reg)-sizeof(void*)] + } else { + | mov Rd(tmp_reg), dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)] + | cmp Rd(tmp_reg), sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS + | jge >1 + | add Rd(tmp_reg), 16 + | mov dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)], Rd(tmp_reg) + | add Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, reg_save_area)] + ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, -16); + | jmp >2 + |1: + | mov Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)] + ir_emit_load_mem_fp(ctx, type, def_reg, tmp_reg, 0); + | add Ra(tmp_reg), 8 + | mov aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg) + |2: + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +|.endif +#else + IR_ASSERT(0 && "NIY va_arg"); +#endif +} + static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; @@ -7154,7 +7445,35 @@ static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); } #else - IR_ASSERT(0); // TODO: float/double return value + if (ctx->use_lists[def].count > 1) { + int32_t offset; + ir_reg fp; + + if (def_reg == IR_REG_NONE) { + offset = ir_ref_spill_slot(ctx, def, &fp); + if (insn->type == IR_DOUBLE) { + | fstp qword [Ra(fp)+offset] + } else { + IR_ASSERT(insn->type == IR_FLOAT); + | fstp dword [Ra(fp)+offset] + } + } else { + offset = ctx->ret_slot; + IR_ASSERT(offset != -1); + offset = IR_SPILL_POS_TO_OFFSET(offset); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + if (insn->type == IR_DOUBLE) { + | fstp qword [Ra(fp)+offset] + } else { + IR_ASSERT(insn->type == IR_FLOAT); + | fstp dword [Ra(fp)+offset] + } + ir_emit_load_mem_fp(ctx, insn->type, def_reg, fp, offset); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } + } #endif } } @@ -8258,7 +8577,6 @@ static void ir_fix_param_spills(ir_ctx *ctx) && ival->stack_spill_pos == -1 && (ival->next || ival->reg == IR_REG_NONE)) { ival->stack_spill_pos = stack_start + stack_offset; - ctx->regs[use][0] = IR_REG_NONE; } } if (sizeof(void*) == 8) { @@ -8270,11 +8588,13 @@ static void ir_fix_param_spills(ir_ctx *ctx) } } -#ifdef IR_TARGET_X86 - if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { - ctx->param_stack_size = stack_offset; - } +#ifdef _WIN64 + /* WIN64 uses shsow area for registers */ + stack_offset += IR_MIN(int_param_num, int_reg_params_count) * sizeof(void*); #endif + ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); + ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); + ctx->param_stack_size = stack_offset; } static void ir_allocate_unique_spill_slots(ir_ctx *ctx) @@ -8291,7 +8611,9 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_reg reg; #ifndef IR_REG_FP_RET1 - if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { + if (ctx->flags2 & IR_HAS_FP_RET_SLOT) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); + } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data); } else { ctx->ret_slot = -1; @@ -8323,6 +8645,13 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) case IR_LOOP_BEGIN: case IR_LOOP_END: break; +#ifndef IR_REG_FP_RET1 + case IR_CALL: + if (ctx->ret_slot == -1 && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); + } +#endif + IR_FALLTHROUGH; default: def_flags = ir_get_target_constraints(ctx, i, &constraints); if (ctx->rules @@ -8495,6 +8824,8 @@ void ir_fix_stack_frame(ir_ctx *ctx) { uint32_t additional_size = 0; + ctx->locals_area_size = ctx->stack_frame_size; + if (ctx->used_preserved_regs) { ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; ir_reg reg; @@ -8505,6 +8836,17 @@ void ir_fix_stack_frame(ir_ctx *ctx) } IR_REGSET_FOREACH_END(); } +#if defined(IR_TARGET_X64) && !defined(_WIN64) + if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { + additional_size += sizeof(void*) * IR_REG_INT_ARGS; + } + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { + additional_size += 16 * IR_REG_FP_ARGS; + } + } +#endif + ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); ctx->stack_frame_size += additional_size; ctx->stack_frame_alignment = 0; @@ -8653,6 +8995,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_PI: case IR_PHI: case IR_SNAPSHOT: + case IR_VA_END: break; case IR_LEA_OB: { @@ -9156,6 +9499,15 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_ALLOCA: ir_emit_alloca(ctx, i, insn); break; + case IR_VA_START: + ir_emit_va_start(ctx, i, insn); + break; + case IR_VA_COPY: + ir_emit_va_copy(ctx, i, insn); + break; + case IR_VA_ARG: + ir_emit_va_arg(ctx, i, insn); + break; case IR_AFREE: ir_emit_afree(ctx, i, insn); break; diff --git a/ext/opcache/jit/ir/ir_x86.h b/ext/opcache/jit/ir/ir_x86.h index ff4b767b2eb..4b86c291bdf 100644 --- a/ext/opcache/jit/ir/ir_x86.h +++ b/ext/opcache/jit/ir/ir_x86.h @@ -178,6 +178,13 @@ enum _ir_reg { | IR_REGSET(IR_REG_RBP) \ | IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15)) +typedef struct _ir_va_list { + uint32_t gp_offset; + uint32_t fp_offset; + void *overflow_arg_area; + void *reg_save_area; +} ir_va_list; + #elif defined(IR_TARGET_X86) # define IR_REG_INT_RET1 IR_REG_RAX