1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Update IR

IR commit: 503018483d8333a3cfb25ab89a1eadefbee665bc
This commit is contained in:
Dmitry Stogov
2025-09-22 19:31:06 +03:00
parent 32c919b474
commit ef202cc4b7
14 changed files with 625 additions and 299 deletions

View File

@@ -1,6 +1,7 @@
MIT License
Copyright (c) 2022 Zend by Perforce
Copyright (c) 2025 Dmitry Stogov
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@@ -227,6 +227,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
#define ir_op_flag_d0 ir_op_flag_d
#define ir_op_flag_d1 (ir_op_flag_d | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_d1X1 (ir_op_flag_d | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_d1X2 (ir_op_flag_d | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_d2 (ir_op_flag_d | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_d2C (ir_op_flag_d | IR_OP_FLAG_COMMUTATIVE | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_d3 (ir_op_flag_d | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
@@ -270,6 +271,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted
#define ir_op_flag_s3 (ir_op_flag_s | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_x1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_x2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_x2X1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_x3 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT))
#define ir_op_flag_xN (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | IR_OP_FLAG_VAR_INPUTS)
#define ir_op_flag_a1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_ALLOC | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT))
@@ -392,6 +394,8 @@ void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limi
ctx->insns_limit = insns_limit;
ctx->consts_count = -(IR_TRUE - 1);
ctx->consts_limit = consts_limit;
ctx->const_hash = ctx->_const_hash;
ctx->const_hash_mask = IR_CONST_HASH_SIZE - 1;
ctx->fold_cse_limit = IR_UNUSED + 1;
ctx->flags = flags;
@@ -414,6 +418,9 @@ void ir_free(ir_ctx *ctx)
{
ir_insn *buf = ctx->ir_base - ctx->consts_limit;
ir_mem_free(buf);
if (ctx->value_params) {
ir_mem_free(ctx->value_params);
}
if (ctx->strtab.data) {
ir_strtab_free(&ctx->strtab);
}
@@ -468,6 +475,10 @@ void ir_free(ir_ctx *ctx)
ir_list_free((ir_list*)ctx->osr_entry_loads);
ir_mem_free(ctx->osr_entry_loads);
}
if (ctx->const_hash_mask != IR_CONST_HASH_SIZE - 1) {
ir_mem_free(ctx->const_hash);
}
}
ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr)
@@ -479,72 +490,64 @@ ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr)
insn->val.u64 = addr;
/* don't insert into constants chain */
insn->prev_const = IR_UNUSED;
#if 0
insn->prev_const = ctx->prev_const_chain[IR_ADDR];
ctx->prev_const_chain[IR_ADDR] = ref;
#endif
#if 0
ir_insn *prev_insn, *next_insn;
ir_ref next;
prev_insn = NULL;
next = ctx->prev_const_chain[IR_ADDR];
while (next) {
next_insn = &ctx->ir_base[next];
if (UNEXPECTED(next_insn->val.u64 >= addr)) {
break;
}
prev_insn = next_insn;
next = next_insn->prev_const;
}
if (prev_insn) {
insn->prev_const = prev_insn->prev_const;
prev_insn->prev_const = ref;
} else {
insn->prev_const = ctx->prev_const_chain[IR_ADDR];
ctx->prev_const_chain[IR_ADDR] = ref;
}
#endif
return ref;
}
IR_ALWAYS_INLINE uintptr_t ir_const_hash(ir_val val, uint32_t optx)
{
return (val.u64 ^ (val.u64 >> 32) ^ optx);
}
static IR_NEVER_INLINE void ir_const_hash_rehash(ir_ctx *ctx)
{
ir_insn *insn;
ir_ref ref;
uintptr_t hash;
if (ctx->const_hash_mask != IR_CONST_HASH_SIZE - 1) {
ir_mem_free(ctx->const_hash);
}
ctx->const_hash_mask = (ctx->const_hash_mask + 1) * 2 - 1;
ctx->const_hash = ir_mem_calloc(ctx->const_hash_mask + 1, sizeof(ir_ref));
for (ref = IR_TRUE - 1; ref > -ctx->consts_count; ref--) {
insn = &ctx->ir_base[ref];
hash = ir_const_hash(insn->val, insn->optx) & ctx->const_hash_mask;
insn->prev_const = ctx->const_hash[hash];
ctx->const_hash[hash] = ref;
}
}
ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, uint32_t optx)
{
ir_insn *insn, *prev_insn;
ir_insn *insn;
ir_ref ref, prev;
uintptr_t hash;
if (type == IR_BOOL) {
return val.u64 ? IR_TRUE : IR_FALSE;
} else if (type == IR_ADDR && val.u64 == 0) {
return IR_NULL;
}
prev_insn = NULL;
ref = ctx->prev_const_chain[type];
hash = ir_const_hash(val, optx) & ctx->const_hash_mask;
ref = ctx->const_hash[hash];
while (ref) {
insn = &ctx->ir_base[ref];
if (UNEXPECTED(insn->val.u64 >= val.u64)) {
if (insn->val.u64 == val.u64) {
if (insn->optx == optx) {
return ref;
}
} else {
break;
}
if (insn->val.u64 == val.u64 && insn->optx == optx) {
return ref;
}
prev_insn = insn;
ref = insn->prev_const;
}
if (prev_insn) {
prev = prev_insn->prev_const;
prev_insn->prev_const = -ctx->consts_count;
} else {
prev = ctx->prev_const_chain[type];
ctx->prev_const_chain[type] = -ctx->consts_count;
if ((uintptr_t)ctx->consts_count > ctx->const_hash_mask) {
ir_const_hash_rehash(ctx);
hash = ir_const_hash(val, optx) & ctx->const_hash_mask;
}
prev = ctx->const_hash[hash];
ctx->const_hash[hash] = -ctx->consts_count;
ref = ir_next_const(ctx);
insn = &ctx->ir_base[ref];
insn->prev_const = prev;
@@ -2092,10 +2095,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ
if (insn->type == type) {
return ref; /* load forwarding (L2L) */
} else if (ir_type_size[insn->type] == ir_type_size[type]) {
return ir_fold1(ctx, IR_OPT(IR_BITCAST, type), ref); /* load forwarding with bitcast (L2L) */
return ref; /* load forwarding with bitcast (L2L) */
} else if (ir_type_size[insn->type] > ir_type_size[type]
&& IR_IS_TYPE_INT(type) && IR_IS_TYPE_INT(insn->type)) {
return ir_fold1(ctx, IR_OPT(IR_TRUNC, type), ref); /* partial load forwarding (L2L) */
return ref; /* partial load forwarding (L2L) */
}
}
} else if (insn->op == IR_VSTORE) {
@@ -2105,10 +2108,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ
if (type2 == type) {
return insn->op3; /* store forwarding (S2L) */
} else if (ir_type_size[type2] == ir_type_size[type]) {
return ir_fold1(ctx, IR_OPT(IR_BITCAST, type), insn->op3); /* store forwarding with bitcast (S2L) */
return insn->op3; /* store forwarding with bitcast (S2L) */
} else if (ir_type_size[type2] > ir_type_size[type]
&& IR_IS_TYPE_INT(type) && IR_IS_TYPE_INT(type2)) {
return ir_fold1(ctx, IR_OPT(IR_TRUNC, type), insn->op3); /* partial store forwarding (S2L) */
return insn->op3; /* partial store forwarding (S2L) */
} else {
break;
}
@@ -3214,6 +3217,13 @@ ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list)
return ctx->control = ir_emit2(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list);
}
ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size)
{
IR_ASSERT(ctx->control);
IR_ASSERT(size <= 0x7fffffff);
return ctx->control = ir_emit3(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list, (ir_ref)size);
}
ir_ref _ir_BLOCK_BEGIN(ir_ctx *ctx)
{
IR_ASSERT(ctx->control);

View File

@@ -310,6 +310,8 @@ typedef enum _ir_type {
_(PHI, pN, reg, def, def) /* SSA Phi function */ \
_(COPY, d1X1, def, opt, ___) /* COPY (last foldable op) */ \
_(PI, p2, reg, def, ___) /* e-SSA Pi constraint ??? */ \
_(ARGVAL, d1X2, def, num, num) /* pass struct arg by value */ \
/* (op2 - size, op3 - align) */ \
/* (USE, RENAME) */ \
\
/* data ops */ \
@@ -343,7 +345,8 @@ typedef enum _ir_type {
_(VA_START, x2, src, def, ___) /* va_start(va_list) */ \
_(VA_END, x2, src, def, ___) /* va_end(va_list) */ \
_(VA_COPY, x3, src, def, def) /* va_copy(dst, stc) */ \
_(VA_ARG, x2, src, def, ___) /* va_arg(va_list) */ \
_(VA_ARG, x2X1, src, def, opt) /* va_arg(va_list) */ \
/* op3 - (size<<3)+log2(align) */ \
\
/* guards */ \
_(GUARD, c3, src, def, def) /* IF without second successor */ \
@@ -583,12 +586,22 @@ typedef struct _ir_code_buffer {
void *pos;
} ir_code_buffer;
typedef struct {
int size;
int align;
int offset;
} ir_value_param;
#define IR_CONST_HASH_SIZE 64
struct _ir_ctx {
ir_insn *ir_base; /* two directional array - instructions grow down, constants grow up */
ir_ref insns_count; /* number of instructions stored in instructions buffer */
ir_ref insns_limit; /* size of allocated instructions buffer (it's extended when overflow) */
ir_ref consts_count; /* number of constants stored in constants buffer */
ir_ref consts_limit; /* size of allocated constants buffer (it's extended when overflow) */
uintptr_t const_hash_mask;
ir_ref *const_hash;
uint32_t flags; /* IR context flags (see IR_* defines above) */
uint32_t flags2; /* IR context private flags (see IR_* defines in ir_private.h) */
ir_type ret_type; /* Function return type */
@@ -596,6 +609,7 @@ struct _ir_ctx {
int32_t status; /* non-zero error code (see IR_ERROR_... macros), app may use negative codes */
ir_ref fold_cse_limit; /* CSE finds identical insns backward from "insn_count" to "fold_cse_limit" */
ir_insn fold_insn; /* temporary storage for folding engine */
ir_value_param *value_params; /* information about "by-val" struct parameters */
ir_hashtab *binding;
ir_use_list *use_lists; /* def->use lists for each instruction */
ir_ref *use_edges; /* the actual uses: use = ctx->use_edges[ctx->use_lists[def].refs + n] */
@@ -655,7 +669,7 @@ struct _ir_ctx {
ir_loader *loader;
ir_strtab strtab;
ir_ref prev_insn_chain[IR_LAST_FOLDABLE_OP + 1];
ir_ref prev_const_chain[IR_LAST_TYPE];
ir_ref _const_hash[IR_CONST_HASH_SIZE];
};
/* Basic IR Construction API (implementation in ir.c) */
@@ -896,6 +910,7 @@ int ir_load_llvm_asm(ir_loader *loader, const char *filename);
#define IR_SAVE_SAFE_NAMES (1<<5) /* add '@' prefix to symbol names */
void ir_print_proto(const ir_ctx *ctx, ir_ref proto, FILE *f);
void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f);
void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f);
/* IR debug dump API (implementation in ir_dump.c) */

View File

@@ -574,6 +574,10 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n++;
break;
case IR_ARGVAL:
constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF);
n = 1;
break;
case IR_CALL:
insn = &ctx->ir_base[ref];
constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1;
@@ -1103,6 +1107,8 @@ binop_fp:
}
}
return IR_SKIPPED | IR_NOP;
case IR_ARGVAL:
return IR_FUSED | IR_ARGVAL;
case IR_NOP:
return IR_SKIPPED | IR_NOP;
default:
@@ -1386,6 +1392,12 @@ static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem)
}
}
static int32_t ir_local_offset(ir_ctx *ctx, ir_insn *insn)
{
IR_ASSERT(insn->op == IR_VAR || insn->op == IR_ALLOCA || insn->op == IR_VADDR);
return IR_SPILL_POS_TO_OFFSET(insn->op3);
}
static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src)
{
ir_backend_data *data = ctx->data;
@@ -1399,8 +1411,7 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src)
if (var_insn->op == IR_VADDR) {
var_insn = &ctx->ir_base[var_insn->op1];
}
IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA);
offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3);
offset = ir_local_offset(ctx, var_insn);
if (aarch64_may_encode_imm12(offset)) {
| add Rx(reg), Rx(base), #offset
} else {
@@ -3858,7 +3869,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op));
if (ir_rule(ctx, addr_insn->op1) == IR_STATIC_ALLOCA) {
reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[addr_insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[addr_insn->op1]);
offset += ctx->ir_base[addr_insn->op2].val.i32;
return IR_MEM_BO(reg, offset);
} else {
@@ -3876,7 +3887,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
} else {
IR_ASSERT(addr_insn->op == IR_ALLOCA || addr_insn->op == IR_VADDR);
reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[ref].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[ref]);
return IR_MEM_BO(reg, offset);
}
}
@@ -4265,7 +4276,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
@@ -4297,7 +4308,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
@@ -4364,7 +4375,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (op3_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op3_reg)) {
@@ -4375,7 +4386,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3);
op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
}
| ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset]
| str Rx(tmp_reg), [Rx(op2_reg), #op2_offset]
@@ -4397,7 +4408,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (op3_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op3_reg)) {
@@ -4408,7 +4419,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3);
op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
}
| ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset]
| str Rx(tmp_reg), [Rx(op2_reg), #op2_offset]
@@ -4446,7 +4457,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ct, &ctx->ir_base[insn->op2]);
}
| ldr Rx(tmp_reg), [Rx(op2_reg), #offset]
ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0));
@@ -4478,7 +4489,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (IR_IS_TYPE_INT(type)) {
| ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
@@ -4741,7 +4752,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
}
}
static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int32_t *copy_stack_ptr)
{
int j, n;
ir_type type;
@@ -4749,7 +4760,7 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
int fp_param = 0;
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
int32_t used_stack = 0;
int32_t used_stack = 0, copy_stack = 0;
#ifdef __APPLE__
const ir_proto_t *proto = ir_call_proto(ctx, insn);
int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
@@ -4757,7 +4768,16 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
n = insn->inputs_count;
for (j = 3; j <= n; j++) {
type = ctx->ir_base[ir_insn_op(insn, j)].type;
ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)];
type = arg->type;
if (arg->op == IR_ARGVAL) {
int size = arg->op2;
int align = arg->op3;
copy_stack += size;
align = IR_MAX((int)sizeof(void*), align);
copy_stack = IR_ALIGNED_SIZE(copy_stack, align);
type = IR_ADDR;
}
#ifdef __APPLE__
if (j > last_named_input) {
used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
@@ -4777,7 +4797,9 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
}
}
return used_stack;
copy_stack = IR_ALIGNED_SIZE(copy_stack, 16);
*copy_stack_ptr = copy_stack;
return used_stack + copy_stack;
}
static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg)
@@ -4796,7 +4818,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
int32_t used_stack, stack_offset = 0;
int32_t used_stack, copy_stack = 0, stack_offset = 0, copy_stack_offset = 0;
ir_copy *copies;
bool do_pass3 = 0;
/* For temporaries we may use any scratch registers except for registers used for parameters */
@@ -4815,7 +4837,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
// TODO: support for preallocated stack
used_stack = 0;
} else {
used_stack = ir_call_used_stack(ctx, insn);
used_stack = ir_call_used_stack(ctx, insn, &copy_stack);
/* Stack must be 16 byte aligned */
used_stack = IR_ALIGNED_SIZE(used_stack, 16);
if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) {
@@ -4838,6 +4860,48 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
#endif
if (copy_stack) {
/* Copy struct arguments */
for (j = 3; j <= n; j++) {
arg = ir_insn_op(insn, j);
src_reg = ir_get_alocated_reg(ctx, def, j);
arg_insn = &ctx->ir_base[arg];
type = arg_insn->type;
if (arg_insn->op == IR_ARGVAL) {
/* make a stack copy */
void *addr = memcpy;
int size = arg_insn->op2;
int align = arg_insn->op3;
copy_stack_offset += size;
align = IR_MAX((int)sizeof(void*), align);
copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align);
src_reg = ctx->regs[arg][1];
| add Rx(IR_REG_INT_ARG1), sp, #(used_stack - copy_stack_offset)
if (src_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(src_reg)) {
src_reg = IR_REG_NUM(src_reg);
ir_emit_load(ctx, IR_ADDR, src_reg, arg_insn->op1);
}
| mov Rx(IR_REG_INT_ARG2), Rx(src_reg)
} else {
ir_emit_load(ctx, IR_ADDR, IR_REG_INT_ARG2, arg_insn->op1);
}
ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_ARG3, size);
if (aarch64_may_use_b(ctx->code_buffer, addr)) {
| bl &addr
} else {
ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
| blr Rx(IR_REG_INT_TMP)
}
}
}
copy_stack_offset = 0;
}
/* 1. move all register arguments that should be passed through stack
* and collect arguments that should be passed through registers */
copies = ir_mem_malloc((n - 2) * sizeof(ir_copy));
@@ -4846,8 +4910,13 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
src_reg = ir_get_alocated_reg(ctx, def, j);
arg_insn = &ctx->ir_base[arg];
type = arg_insn->type;
#ifdef __APPLE__
if (j > last_named_input) {
if (arg_insn->op == IR_ARGVAL) {
do_pass3 = 1;
continue;
}
dst_reg = IR_REG_NONE; /* pass argument through stack */
} else
#endif
@@ -4858,6 +4927,10 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
dst_reg = IR_REG_NONE; /* pass argument through stack */
}
int_param++;
if (arg_insn->op == IR_ARGVAL) {
do_pass3 = 1;
continue;
}
} else {
IR_ASSERT(IR_IS_TYPE_FP(type));
if (fp_param < fp_reg_params_count) {
@@ -4914,6 +4987,31 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
src_reg = ir_get_alocated_reg(ctx, def, j);
arg_insn = &ctx->ir_base[arg];
type = arg_insn->type;
if (arg_insn->op == IR_ARGVAL) {
/* pass pointer to the copy on stack */
int size = arg_insn->op2;
int align = arg_insn->op3;
copy_stack_offset += size;
align = IR_MAX((int)sizeof(void*), align);
copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align);
#ifdef __APPLE__
if (j > last_named_input) {
| add Rx(tmp_reg), sp, #(used_stack - copy_stack_offset)
ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg);
} else
#endif
if (int_param < int_reg_params_count) {
dst_reg = int_reg_params[int_param];
| add Rx(dst_reg), sp, #(used_stack - copy_stack_offset)
} else {
| add Rx(tmp_reg), sp, #(used_stack - copy_stack_offset)
ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg);
stack_offset += sizeof(void*);
}
int_param++;
continue;
}
#ifdef __APPLE__
if (j > last_named_input) {
dst_reg = IR_REG_NONE; /* pass argument through stack */
@@ -5896,7 +5994,8 @@ static void ir_preallocate_call_stack(ir_ctx *ctx)
for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) {
if (insn->op == IR_CALL) {
call_stack_size = ir_call_used_stack(ctx, insn);
int32_t copy_stack;
call_stack_size = ir_call_used_stack(ctx, insn, &copy_stack);
if (call_stack_size > peak_call_stack_size) {
peak_call_stack_size = call_stack_size;
}

View File

@@ -586,6 +586,7 @@ extern "C" {
#define ir_VA_END(_list) _ir_VA_END(_ir_CTX, _list)
#define ir_VA_COPY(_dst, _src) _ir_VA_COPY(_ir_CTX, _dst, _src)
#define ir_VA_ARG(_list, _type) _ir_VA_ARG(_ir_CTX, _type, _list)
#define ir_VA_ARG_EX(_list, _type, size) _ir_VA_ARG_EX(_ir_CTX, _type, _list, size)
#define ir_START() _ir_START(_ir_CTX)
#define ir_ENTRY(_src, _num) _ir_ENTRY(_ir_CTX, (_src), (_num))
@@ -661,6 +662,7 @@ void _ir_VA_START(ir_ctx *ctx, ir_ref list);
void _ir_VA_END(ir_ctx *ctx, ir_ref list);
void _ir_VA_COPY(ir_ctx *ctx, ir_ref dst, ir_ref src);
ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list);
ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size);
void _ir_START(ir_ctx *ctx);
void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num);
void _ir_BEGIN(ir_ctx *ctx, ir_ref src);

View File

@@ -605,7 +605,7 @@ static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by
* Cooper, Harvey and Kennedy. */
static int ir_build_dominators_tree_slow(ir_ctx *ctx)
static IR_NEVER_INLINE int ir_build_dominators_tree_slow(ir_ctx *ctx)
{
uint32_t blocks_count, b, postnum;
ir_block *blocks, *bb;
@@ -690,28 +690,13 @@ static int ir_build_dominators_tree_slow(ir_ctx *ctx)
/* Build dominators tree */
blocks[1].idom = 0;
blocks[1].dom_depth = 0;
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
uint32_t idom = bb->idom;
ir_block *idom_bb = &blocks[idom];
/* Construct children lists sorted by block number */
for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) {
ir_block *idom_bb = &blocks[bb->idom];
bb->dom_depth = 0;
/* Sort by block number to traverse children in pre-order */
if (idom_bb->dom_child == 0) {
idom_bb->dom_child = b;
} else if (b < idom_bb->dom_child) {
bb->dom_next_child = idom_bb->dom_child;
idom_bb->dom_child = b;
} else {
int child = idom_bb->dom_child;
ir_block *child_bb = &blocks[child];
while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
child = child_bb->dom_next_child;
child_bb = &blocks[child];
}
bb->dom_next_child = child_bb->dom_next_child;
child_bb->dom_next_child = b;
}
bb->dom_next_child = idom_bb->dom_child;
idom_bb->dom_child = b;
}
/* Recalculate dom_depth for all blocks */
@@ -769,6 +754,7 @@ int ir_build_dominators_tree(ir_ctx *ctx)
ctx->flags2 &= ~IR_NO_LOOPS;
// IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
if (UNEXPECTED(k <= 1)) {
slow_case:
ir_list_free(&worklist);
return ir_build_dominators_tree_slow(ctx);
}
@@ -780,7 +766,9 @@ int ir_build_dominators_tree(ir_ctx *ctx)
if (idom < b) {
break;
}
IR_ASSERT(k > 0);
if (UNEXPECTED(k == 0)) {
goto slow_case;
}
ir_list_push(&worklist, idom);
}
}
@@ -808,25 +796,14 @@ int ir_build_dominators_tree(ir_ctx *ctx)
}
bb->idom = idom;
idom_bb = &blocks[idom];
bb->dom_depth = idom_bb->dom_depth + 1;
/* Sort by block number to traverse children in pre-order */
if (idom_bb->dom_child == 0) {
idom_bb->dom_child = b;
} else if (b < idom_bb->dom_child) {
bb->dom_next_child = idom_bb->dom_child;
idom_bb->dom_child = b;
} else {
int child = idom_bb->dom_child;
ir_block *child_bb = &blocks[child];
}
while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
child = child_bb->dom_next_child;
child_bb = &blocks[child];
}
bb->dom_next_child = child_bb->dom_next_child;
child_bb->dom_next_child = b;
}
/* Construct children lists sorted by block number */
for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) {
ir_block *idom_bb = &blocks[bb->idom];
bb->dom_next_child = idom_bb->dom_child;
idom_bb->dom_child = b;
}
blocks[1].idom = 0;
@@ -945,23 +922,13 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
ir_block *idom_bb = &blocks[idom];
bb->dom_depth = idom_bb->dom_depth + 1;
/* Sort by block number to traverse children in pre-order */
if (idom_bb->dom_child == 0) {
idom_bb->dom_child = b;
} else if (b < idom_bb->dom_child) {
bb->dom_next_child = idom_bb->dom_child;
idom_bb->dom_child = b;
} else {
int child = idom_bb->dom_child;
ir_block *child_bb = &blocks[child];
}
while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
child = child_bb->dom_next_child;
child_bb = &blocks[child];
}
bb->dom_next_child = child_bb->dom_next_child;
child_bb->dom_next_child = b;
}
/* Construct children lists sorted by block number */
for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) {
ir_block *idom_bb = &blocks[bb->idom];
bb->dom_next_child = idom_bb->dom_child;
idom_bb->dom_child = b;
}
return 1;

View File

@@ -660,6 +660,12 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f)
}
if (first) {
fprintf(f, ";");
} else if (ctx->value_params
&& insn->op == IR_PARAM
&& ctx->value_params[insn->op3 - 1].align) {
fprintf(f, ") ByVal(%d, %d);",
ctx->value_params[insn->op3 - 1].size,
ctx->value_params[insn->op3 - 1].align);
} else {
fprintf(f, ");");
}

View File

@@ -167,11 +167,24 @@ static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref)
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
if (use == ref) {
#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86)
if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
/* struct passed by value on stack */
return IR_REG_NONE;
} else
#endif
if (int_param < int_reg_params_count) {
return int_reg_params[int_param];
} else {
return IR_REG_NONE;
}
#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86)
} else {
if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
/* struct passed by value on stack */
continue;
}
#endif
}
int_param++;
#ifdef _WIN64
@@ -222,9 +235,12 @@ static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs
n = insn->inputs_count;
n = IR_MIN(n, IR_MAX_REG_ARGS + 2);
for (j = 3; j <= n; j++) {
type = ctx->ir_base[ir_insn_op(insn, j)].type;
ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)];
type = arg->type;
if (IR_IS_TYPE_INT(type)) {
if (int_param < int_reg_params_count) {
if (arg->op == IR_ARGVAL) {
continue;
} else if (int_param < int_reg_params_count) {
regs[j] = int_reg_params[int_param];
count = j + 1;
} else {

View File

@@ -1703,6 +1703,11 @@ IR_FOLD(SUB_OV(_, C_ADDR))
{
if (op2_insn->val.u64 == 0) {
/* a +/- 0 => a */
if (op1_insn->type != IR_OPT_TYPE(opt)) {
opt = IR_BITCAST | (opt & IR_OPT_TYPE_MASK);
op2 = IR_UNUSED;
IR_FOLD_RESTART;
}
IR_FOLD_COPY(op1);
}
IR_FOLD_NEXT;
@@ -1721,6 +1726,12 @@ IR_FOLD(ADD(C_ADDR, _))
{
if (op1_insn->val.u64 == 0) {
/* 0 + a => a */
if (op2_insn->type != IR_OPT_TYPE(opt)) {
opt = IR_BITCAST | (opt & IR_OPT_TYPE_MASK);
op1 = op2;
op2 = IR_UNUSED;
IR_FOLD_RESTART;
}
IR_FOLD_COPY(op2);
}
IR_FOLD_NEXT;
@@ -2927,7 +2938,7 @@ IR_FOLD(SUB(C_ADDR, SUB))
/* c1 - (x - c2) => (c1 + c2) - x */
val.u64 = op1_insn->val.u64 + ctx->ir_base[op2_insn->op2].val.u64;
op2 = op2_insn->op1;
op1 = ir_const(ctx, val, op1_insn->op1);
op1 = ir_const(ctx, val, op1_insn->type);
IR_FOLD_RESTART;
} else if (IR_IS_CONST_REF(op2_insn->op1) && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op1].op)) {
/* c1 - (c2 - x) => x + (c1 - c2) */

View File

@@ -792,21 +792,26 @@ IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb,
ir_ref *p = insn->ops + 1;
if (n == 1) {
return *p < start;
return ctx->cfg_map[*p] < b;
} else {
IR_ASSERT(n > 1);
for (; n > 0; p++, n--) {
ir_ref input = *p;
if (input < start) {
/* ordered */
} else if ((bb->flags & IR_BB_LOOP_HEADER)
&& (ctx->cfg_map[input] == b || ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == b)) {
/* back-edge of reducible loop */
} else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP)
&& (ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == ctx->cfg_blocks[b].loop_header)) {
/* closing edge of irreducible loop */
} else {
return 0;
if (!IR_IS_CONST_REF(input)) {
uint32_t input_b = ctx->cfg_map[input];
if (input_b < b) {
/* ordered */
} else if ((bb->flags & IR_BB_LOOP_HEADER)
&& (input_b == b || ctx->cfg_blocks[input_b].loop_header == b)) {
/* back-edge of reducible loop */
} else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP)
&& (ctx->cfg_blocks[input_b].loop_header == bb->loop_header)) {
/* closing edge of irreducible loop */
} else {
return 0;
}
}
}
return 1;
@@ -925,121 +930,54 @@ int ir_schedule(ir_ctx *ctx)
ir_ref *_xlat;
ir_ref *edges;
ir_ref prev_b_end;
uint32_t b, prev_b;
uint32_t b;
uint32_t *_blocks = ctx->cfg_map;
ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
ir_ref _move_down = 0;
ir_block *bb;
ir_insn *insn, *new_insn;
ir_use_list *lists, *use_list, *new_list;
bool bad_bb_order = 0;
/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
IR_ASSERT(_blocks[1] == 1);
prev_b = 1;
prev_b_end = ctx->cfg_blocks[1].end;
/* link BB boundaries */
_prev[1] = 0;
_prev[prev_b_end] = 0;
for (i = 2, j = 1; i < ctx->insns_count; i++) {
b = _blocks[i];
IR_ASSERT((int32_t)b >= 0);
if (b == prev_b && i <= prev_b_end) {
/* add to the end of the list */
_next[j] = i;
_prev[i] = j;
j = i;
} else if (b > prev_b) {
bb = &ctx->cfg_blocks[b];
if (i == bb->start) {
if (bb->end > bb->start) {
prev_b = b;
prev_b_end = bb->end;
/* add to the end of the list */
_next[j] = i;
_prev[i] = j;
j = i;
} else {
prev_b = 0;
prev_b_end = 0;
k = bb->end;
while (_blocks[_prev[k]] == b) {
k = _prev[k];
}
/* insert before "k" */
_prev[i] = _prev[k];
_next[i] = k;
_next[_prev[k]] = i;
_prev[k] = i;
}
if (!ir_is_good_bb_order(ctx, b, bb, i)) {
bad_bb_order = 1;
}
} else if (i != bb->end) {
/* move down late (see the following loop) */
_next[i] = _move_down;
_move_down = i;
} else {
prev_b = 0;
prev_b_end = 0;
if (bb->start > bb->end) {
/* add to the end of the list */
_next[j] = i;
_prev[i] = j;
j = i;
} else {
k = bb->start;
while (_blocks[_next[k]] == b) {
k = _next[k];
}
/* insert after "k" */
_next[i] = _next[k];
_prev[i] = k;
_prev[_next[k]] = i;
_next[k] = i;
}
}
} else if (b) {
bb = &ctx->cfg_blocks[b];
IR_ASSERT(i != bb->start);
if (i > bb->end) {
/* move up, insert before the end of the already scheduled BB */
k = bb->end;
} else {
IR_ASSERT(i > bb->start);
/* move up, insert at the end of the block */
k = ctx->cfg_blocks[b + 1].start;
}
/* insert before "k" */
_prev[i] = _prev[k];
_next[i] = k;
_next[_prev[k]] = i;
_prev[k] = i;
prev_b_end = ctx->cfg_blocks[1].end;
_next[1] = prev_b_end;
_prev[prev_b_end] = 1;
for (b = 2, bb = ctx->cfg_blocks + 2; b <= ctx->cfg_blocks_count; b++, bb++) {
_next[prev_b_end] = bb->start;
_prev[bb->start] = prev_b_end;
_next[bb->start] = bb->end;
_prev[bb->end] = bb->start;
prev_b_end = bb->end;
if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) {
bad_bb_order = 1;
}
}
_next[j] = 0;
_next[prev_b_end] = 0;
while (_move_down) {
i = _move_down;
_move_down = _next[i];
/* insert intermediate BB nodes */
for (i = 2, j = 1; i < ctx->insns_count; i++) {
b = _blocks[i];
if (!b) continue;
bb = &ctx->cfg_blocks[b];
k = _next[bb->start];
if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
/* insert after the start of the block and all PARAM, VAR, PI, PHI */
insn = &ctx->ir_base[k];
while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
k = _next[k];
insn = &ctx->ir_base[k];
}
if (i != bb->start && i != bb->end) {
/* insert before "end" */
ir_ref n = bb->end;
ir_ref p = _prev[n];
_prev[i] = p;
_next[i] = n;
_next[p] = i;
_prev[n] = i;
}
}
/* insert before "k" */
_prev[i] = _prev[k];
_next[i] = k;
_next[_prev[k]] = i;
_prev[k] = i;
if (bad_bb_order) {
ir_fix_bb_order(ctx, _prev, _next);
}
#ifdef IR_DEBUG
@@ -1051,10 +989,6 @@ int ir_schedule(ir_ctx *ctx)
}
#endif
if (bad_bb_order) {
ir_fix_bb_order(ctx, _prev, _next);
}
_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
_xlat += ctx->consts_count;
_xlat[IR_TRUE] = IR_TRUE;
@@ -1168,7 +1102,11 @@ int ir_schedule(ir_ctx *ctx)
if (end->op == IR_IF) {
/* Move condition closer to IF */
input = end->op2;
if (input > 0 && _blocks[input] == b && !_xlat[input] && _prev[j] != input) {
if (input > 0
&& _blocks[input] == b
&& !_xlat[input]
&& _prev[j] != input
&& (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) {
if (input == i) {
i = _next[i];
insn = &ctx->ir_base[i];
@@ -1188,6 +1126,7 @@ int ir_schedule(ir_ctx *ctx)
ir_ref n, j, *p, input;
restart:
IR_ASSERT(_blocks[i] == b);
n = insn->inputs_count;
for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
input = *p;
@@ -1221,6 +1160,7 @@ restart:
}
_xlat[i] = insns_count;
insns_count += ir_insn_inputs_to_len(n);
IR_ASSERT(_next[i] != IR_UNUSED);
i = _next[i];
insn = &ctx->ir_base[i];
}
@@ -1274,6 +1214,7 @@ restart:
new_ctx.insns_count = insns_count;
new_ctx.flags2 = ctx->flags2;
new_ctx.ret_type = ctx->ret_type;
new_ctx.value_params = ctx->value_params;
new_ctx.mflags = ctx->mflags;
new_ctx.spill_base = ctx->spill_base;
new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
@@ -1511,6 +1452,7 @@ restart:
new_ctx.cfg_edges = ctx->cfg_edges;
ctx->cfg_blocks = NULL;
ctx->cfg_edges = NULL;
ctx->value_params = NULL;
ir_code_buffer *saved_code_buffer = ctx->code_buffer;
ir_free(ctx);

View File

@@ -1193,7 +1193,7 @@ static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block
n = IR_INPUT_EDGES_COUNT(flags);
j = 1;
p = insn->ops + j;
if (flags & IR_OP_FLAG_CONTROL) {
if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_PINNED)) {
j++;
p++;
}
@@ -1340,7 +1340,7 @@ int ir_compute_live_ranges(ir_ctx *ctx)
|| (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA)
&& ctx->use_lists[ref].count > 0) {
insn = &ctx->ir_base[ref];
if (insn->op != IR_VADDR) {
if (insn->op != IR_VADDR && insn->op != IR_PARAM) {
insn->op3 = ctx->vars;
ctx->vars = ref;
}
@@ -1630,6 +1630,10 @@ static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2)
if (ctx->ir_base[IR_LIVE_POS_TO_REF(ctx->live_intervals[r1]->use_pos->pos)].op != IR_VLOAD) {
ctx->live_intervals[r1]->flags &= ~IR_LIVE_INTERVAL_MEM_LOAD;
}
if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
IR_ASSERT(!(ctx->live_intervals[r1]->flags & IR_LIVE_INTERVAL_MEM_PARAM));
ctx->live_intervals[r1]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
}
ctx->live_intervals[r2] = NULL;
// TODO: remember to reuse ???

View File

@@ -10,34 +10,38 @@
void ir_print_proto(const ir_ctx *ctx, ir_ref func_proto, FILE *f)
{
ir_ref j;
if (func_proto) {
const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func_proto);
fprintf(f, "(");
if (proto->params_count > 0) {
fprintf(f, "%s", ir_type_cname[proto->param_types[0]]);
for (j = 1; j < proto->params_count; j++) {
fprintf(f, ", %s", ir_type_cname[proto->param_types[j]]);
}
if (proto->flags & IR_VARARG_FUNC) {
fprintf(f, ", ...");
}
} else if (proto->flags & IR_VARARG_FUNC) {
fprintf(f, "...");
}
fprintf(f, "): %s", ir_type_cname[proto->ret_type]);
if (proto->flags & IR_FASTCALL_FUNC) {
fprintf(f, " __fastcall");
} else if (proto->flags & IR_BUILTIN_FUNC) {
fprintf(f, " __builtin");
}
ir_print_proto_ex(proto->flags, proto->ret_type, proto->params_count, proto->param_types, f);
} else {
fprintf(f, "(): int32_t");
}
}
void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f)
{
uint32_t j;
fprintf(f, "(");
if (params_count > 0) {
fprintf(f, "%s", ir_type_cname[param_types[0]]);
for (j = 1; j < params_count; j++) {
fprintf(f, ", %s", ir_type_cname[param_types[j]]);
}
if (flags & IR_VARARG_FUNC) {
fprintf(f, ", ...");
}
} else if (flags & IR_VARARG_FUNC) {
fprintf(f, "...");
}
fprintf(f, "): %s", ir_type_cname[ret_type]);
if (flags & IR_FASTCALL_FUNC) {
fprintf(f, " __fastcall");
} else if (flags & IR_BUILTIN_FUNC) {
fprintf(f, " __builtin");
}
}
static void ir_save_dessa_moves(const ir_ctx *ctx, int b, ir_block *bb, FILE *f)
{
uint32_t succ;
@@ -280,6 +284,12 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f)
}
if (first) {
fprintf(f, ";");
} else if (ctx->value_params
&& insn->op == IR_PARAM
&& ctx->value_params[insn->op3 - 1].align) {
fprintf(f, ") ByVal(%d, %d);",
ctx->value_params[insn->op3 - 1].size,
ctx->value_params[insn->op3 - 1].align);
} else {
fprintf(f, ");");
}

View File

@@ -1875,6 +1875,7 @@ static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type typ
case IR_I8:
case IR_U8:
case IR_BOOL:
case IR_CHAR:
if (op == IR_SEXT) {
new_val.i64 = (int64_t)val_insn->val.i8;
} else {
@@ -1928,7 +1929,7 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op,
return ref;
}
static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val)
static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop)
{
ir_insn *insn;
ir_ref n, *p, input, result, ctrl;
@@ -1953,7 +1954,8 @@ static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val)
result = 1;
for (; n > 0; p++, n--) {
input = *p;
ctrl = _ir_estimated_control(ctx, input);
ctrl = _ir_estimated_control(ctx, input, loop);
if (ctrl >= loop) return ctrl;
if (ctrl > result) { // TODO: check dominance depth instead of order
result = ctrl;
}
@@ -1963,7 +1965,7 @@ static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val)
static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop)
{
ref = _ir_estimated_control(ctx, ref);
ref = _ir_estimated_control(ctx, ref, loop);
return ref < loop; // TODO: check dominance instead of order
}

View File

@@ -1387,6 +1387,12 @@ op2_const:
constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
n++;
break;
case IR_ARGVAL:
constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RSI, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF);
constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RDI, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF);
constraints->tmp_regs[2] = IR_SCRATCH_REG(IR_REG_RCX, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF);
n = 3;
break;
case IR_CALL:
insn = &ctx->ir_base[ref];
if (IR_IS_TYPE_INT(insn->type)) {
@@ -2431,6 +2437,11 @@ binop_fp:
case IR_VAR:
return IR_SKIPPED | IR_VAR;
case IR_PARAM:
#ifndef _WIN64
if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
return IR_STATIC_ALLOCA;
}
#endif
return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM;
case IR_ALLOCA:
/* alloca() may be used only in functions */
@@ -2976,6 +2987,8 @@ store_int:
}
}
return IR_SKIPPED | IR_NOP;
case IR_ARGVAL:
return IR_FUSED | IR_ARGVAL;
case IR_NOP:
return IR_SKIPPED | IR_NOP;
default:
@@ -3153,6 +3166,17 @@ static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem)
}
}
static int32_t ir_local_offset(ir_ctx *ctx, ir_insn *insn)
{
if (insn->op != IR_PARAM) {
IR_ASSERT(insn->op == IR_VAR || insn->op == IR_ALLOCA || insn->op == IR_VADDR);
return IR_SPILL_POS_TO_OFFSET(insn->op3);
} else {
IR_ASSERT(ctx->value_params && ctx->value_params[insn->op3 - 1].align);
return IR_SPILL_POS_TO_OFFSET(ctx->value_params[insn->op3 - 1].offset);
}
}
static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src)
{
ir_backend_data *data = ctx->data;
@@ -3166,8 +3190,7 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src)
if (var_insn->op == IR_VADDR) {
var_insn = &ctx->ir_base[var_insn->op1];
}
IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA);
offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3);
offset = ir_local_offset(ctx, var_insn);
if (offset == 0) {
| mov Ra(reg), Ra(base)
} else {
@@ -3385,7 +3408,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
case IR_LEA_OB:
offset_insn = insn;
if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op1]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
} else {
@@ -3407,12 +3430,12 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
break;
case IR_LEA_IB:
if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op1]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
index_reg_ref = ref * sizeof(ir_ref) + 2;
} else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
index_reg_ref = ref * sizeof(ir_ref) + 1;
@@ -3428,12 +3451,12 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
offset_insn = op1_insn;
scale = 1;
if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
index_reg_ref = insn->op1 * sizeof(ir_ref) + 1;
} else if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op1]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
index_reg_ref = ref * sizeof(ir_ref) + 2;
@@ -3447,12 +3470,12 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
offset_insn = op2_insn;
scale = 1;
if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op1]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
index_reg_ref = insn->op2 * sizeof(ir_ref) + 1;
} else if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[op2_insn->op1]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
index_reg_ref = ref * sizeof(ir_ref) + 1;
@@ -3479,12 +3502,12 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
offset_insn = insn;
scale = 1;
if (ir_rule(ctx, op1_insn->op2) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op2]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
index_reg_ref = insn->op1 * sizeof(ir_ref) + 1;
} else if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op1]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
index_reg_ref = insn->op1 * sizeof(ir_ref) + 2;
@@ -3500,7 +3523,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
op2_insn = &ctx->ir_base[insn->op2];
scale = ctx->ir_base[op2_insn->op2].val.i32;
if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op1]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
} else {
@@ -3514,7 +3537,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
op2_insn = &ctx->ir_base[insn->op2];
offset_insn = op2_insn;
if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[op2_insn->op1]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
} else {
@@ -3523,7 +3546,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
break;
case IR_LEA_B_SI:
if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op1]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
} else {
@@ -3537,7 +3560,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
case IR_LEA_SI_B:
index_reg_ref = insn->op1 * sizeof(ir_ref) + 1;
if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = IR_UNUSED;
} else {
@@ -3580,7 +3603,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
offset_insn = NULL;
break;
case IR_ALLOCA:
offset = IR_SPILL_POS_TO_OFFSET(insn->op3);
offset = ir_local_offset(ctx, insn);
base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
base_reg_ref = index_reg_ref = IR_UNUSED;
scale = 1;
@@ -8306,7 +8329,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
@@ -8340,7 +8363,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (ctx->flags & IR_USE_FRAME_POINTER) {
@@ -8407,7 +8430,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (op3_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op3_reg)) {
@@ -8418,7 +8441,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3);
op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
}
| mov Ra(tmp_reg), aword [Ra(op3_reg)+op3_offset]
| mov aword [Ra(op2_reg)+op2_offset], Ra(tmp_reg)
@@ -8441,7 +8464,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (op3_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(op3_reg)) {
@@ -8452,7 +8475,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3);
op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]);
}
| mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, gp_offset))]
| mov dword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg)
@@ -8493,11 +8516,29 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
| mov Ra(tmp_reg), aword [Ra(op2_reg)+offset]
#ifdef _WIN64
ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg));
| add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*))
#else
if (!insn->op3) {
ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg));
| add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*))
} else {
IR_ASSERT(type == IR_ADDR);
int align = 1U << (insn->op3 & 0x7);
int size = (uint32_t)insn->op3 >> 3;
if (align > (int)sizeof(void*)) {
| add Ra(tmp_reg), (align-1)
| and Ra(tmp_reg), ~(align-1)
}
| mov Ra(def_reg), Ra(tmp_reg)
| add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*))
}
#endif
| mov aword [Ra(op2_reg)+offset], Ra(tmp_reg)
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, type, def, def_reg);
@@ -8526,9 +8567,23 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
} else {
IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
}
if (IR_IS_TYPE_INT(type)) {
if (insn->op3) {
/* long struct arguemnt */
IR_ASSERT(type == IR_ADDR);
int align = 1U << (insn->op3 & 0x7);
int size = (uint32_t)insn->op3 >> 3;
| mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))]
if (align > (int)sizeof(void*)) {
| add Ra(tmp_reg), (align-1)
| and Ra(tmp_reg), ~(align-1)
}
| mov Ra(def_reg), Ra(tmp_reg)
| add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*))
| mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg)
} else if (IR_IS_TYPE_INT(type)) {
| mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))]
| cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS
| jge >1
@@ -8847,7 +8902,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
}
}
static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int *copy_stack_ptr)
{
int j, n;
ir_type type;
@@ -8856,6 +8911,9 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
int32_t used_stack = 0;
#ifdef _WIN64
int32_t copy_stack = 0;
#endif
#ifdef IR_HAVE_FASTCALL
if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
@@ -8866,8 +8924,26 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
n = insn->inputs_count;
for (j = 3; j <= n; j++) {
type = ctx->ir_base[ir_insn_op(insn, j)].type;
ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)];
type = arg->type;
if (IR_IS_TYPE_INT(type)) {
if (arg->op == IR_ARGVAL) {
int size = arg->op2;
int align = arg->op3;
#ifdef _WIN64
copy_stack += size;
align = IR_MAX((int)sizeof(void*), align);
copy_stack = IR_ALIGNED_SIZE(copy_stack, align);
type = IR_ADDR;
#else
align = IR_MAX((int)sizeof(void*), align);
used_stack = IR_ALIGNED_SIZE(used_stack, align);
used_stack += size;
used_stack = IR_ALIGNED_SIZE(used_stack, sizeof(void*));
continue;
#endif
}
if (int_param >= int_reg_params_count) {
used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
}
@@ -8892,6 +8968,14 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
/* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */
used_stack += IR_SHADOW_ARGS;
#ifdef _WIN64
copy_stack = IR_ALIGNED_SIZE(copy_stack, 16);
used_stack += copy_stack;
*copy_stack_ptr = copy_stack;
#else
*copy_stack_ptr = 0;
#endif
return used_stack;
}
@@ -8911,7 +8995,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
int32_t used_stack, stack_offset = IR_SHADOW_ARGS;
int32_t used_stack, copy_stack = 0, stack_offset = IR_SHADOW_ARGS;
ir_copy *copies;
bool do_pass3 = 0;
/* For temporaries we may use any scratch registers except for registers used for parameters */
@@ -8942,9 +9026,13 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
#endif
) {
// TODO: support for preallocated stack
#ifdef _WIN64
used_stack = ir_call_used_stack(ctx, insn, &copy_stack);
#else
used_stack = 0;
#endif
} else {
used_stack = ir_call_used_stack(ctx, insn);
used_stack = ir_call_used_stack(ctx, insn, &copy_stack);
if (IR_SHADOW_ARGS
&& insn->op == IR_TAILCALL
&& used_stack == IR_SHADOW_ARGS) {
@@ -8967,6 +9055,46 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
}
}
#ifdef _WIN64
|.if X64
if (copy_stack) {
/* Copy struct arguments */
int copy_stack_offset = 0;
for (j = 3; j <= n; j++) {
arg = ir_insn_op(insn, j);
src_reg = ir_get_alocated_reg(ctx, def, j);
arg_insn = &ctx->ir_base[arg];
type = arg_insn->type;
if (arg_insn->op == IR_ARGVAL) {
/* make a stack copy */
int size = arg_insn->op2;
int align = arg_insn->op3;
copy_stack_offset += size;
align = IR_MAX((int)sizeof(void*), align);
copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align);
src_reg = ctx->regs[arg][1];
| lea rdi, [rsp + (used_stack - copy_stack_offset)]
if (src_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(src_reg)) {
src_reg = IR_REG_NUM(src_reg);
ir_emit_load(ctx, IR_ADDR, src_reg, arg_insn->op1);
}
| mov rsi, Ra(src_reg)
} else {
ir_emit_load(ctx, IR_ADDR, IR_REG_RSI, arg_insn->op1);
}
ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_RCX, size);
| rep; movsb
}
}
}
|.endif
#endif
/* 1. move all register arguments that should be passed through stack
* and collect arguments that should be passed through registers */
copies = ir_mem_malloc((n - 2) * sizeof(ir_copy));
@@ -8976,6 +9104,55 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
arg_insn = &ctx->ir_base[arg];
type = arg_insn->type;
if (IR_IS_TYPE_INT(type)) {
#ifndef _WIN64
if (arg_insn->op == IR_ARGVAL) {
int size = arg_insn->op2;
int align = arg_insn->op3;
align = IR_MAX((int)sizeof(void*), align);
stack_offset = IR_ALIGNED_SIZE(stack_offset, align);
if (size) {
src_reg = ctx->regs[arg][1];
if (src_reg != IR_REG_NONE) {
if (IR_REG_SPILLED(src_reg)) {
src_reg = IR_REG_NUM(src_reg);
ir_emit_load(ctx, IR_ADDR, src_reg, arg_insn->op1);
}
if (src_reg != IR_REG_RSI) {
|.if X64
| mov rsi, Ra(src_reg)
|.else
| mov esi, Ra(src_reg)
|.endif
}
} else {
ir_emit_load(ctx, IR_ADDR, IR_REG_RSI, arg_insn->op1);
}
if (stack_offset == 0) {
|.if X64
| mov rdi, rsp
|.else
| mov edi, esp
|.endif
} else {
|.if X64
| lea rdi, [rsp+stack_offset]
|.else
| lea edi, [esp+stack_offset]
|.endif
}
|.if X64
| mov rcx, size
| rep; movsb
|.else
| mov ecx, size
| rep; movsb
|.endif
}
stack_offset += size;
stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*));
continue;
}
#endif
if (int_param < int_reg_params_count) {
dst_reg = int_reg_params[int_param];
} else {
@@ -8985,6 +9162,10 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
#ifdef _WIN64
/* WIN64 calling convention use common couter for int and fp registers */
fp_param++;
if (arg_insn->op == IR_ARGVAL) {
do_pass3 = 3;
continue;
}
#endif
} else {
IR_ASSERT(IR_IS_TYPE_FP(type));
@@ -9037,6 +9218,10 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
/* 3. move the remaining memory and immediate values */
if (do_pass3) {
#ifdef _WIN64
int copy_stack_offset = 0;
#endif
stack_offset = IR_SHADOW_ARGS;
int_param = 0;
fp_param = 0;
@@ -9046,6 +9231,37 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
arg_insn = &ctx->ir_base[arg];
type = arg_insn->type;
if (IR_IS_TYPE_INT(type)) {
if (arg_insn->op == IR_ARGVAL) {
int size = arg_insn->op2;
int align = arg_insn->op3;
#ifndef _WIN64
align = IR_MAX((int)sizeof(void*), align);
stack_offset = IR_ALIGNED_SIZE(stack_offset, align);
stack_offset += size;
stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*));
continue;
#else
|.if X64
/* pass pointer to the copy on stack */
copy_stack_offset += size;
align = IR_MAX((int)sizeof(void*), align);
copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align);
if (int_param < int_reg_params_count) {
dst_reg = int_reg_params[int_param];
| lea Ra(dst_reg), [rsp + (used_stack - copy_stack_offset)]
} else {
| lea Ra(tmp_reg), [rsp + (used_stack - copy_stack_offset)]
ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg);
stack_offset += sizeof(void*);
}
int_param++;
/* WIN64 calling convention use common couter for int and fp registers */
fp_param++;
continue;
|.endif
#endif
}
if (int_param < int_reg_params_count) {
dst_reg = int_reg_params[int_param];
} else {
@@ -9161,6 +9377,9 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg
}
}
}
if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) {
used_stack = 0;
}
#endif
#ifdef IR_REG_VARARG_FP_REGS
/* set hidden argument to specify the number of vector registers used */
@@ -10230,7 +10449,16 @@ static void ir_emit_load_params(ir_ctx *ctx)
insn = &ctx->ir_base[use];
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
if (int_param_num < int_reg_params_count) {
if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
/* struct passed by value on stack */
size_t align = ctx->value_params[insn->op3 - 1].align;
align = IR_MAX(sizeof(void*), align);
stack_offset = IR_ALIGNED_SIZE(stack_offset, align);
stack_offset += ctx->value_params[insn->op3 - 1].size;
stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*));
continue;
} else if (int_param_num < int_reg_params_count) {
src_reg = int_reg_params[int_param_num];
} else {
src_reg = IR_REG_NONE;
@@ -10358,6 +10586,19 @@ static void ir_fix_param_spills(ir_ctx *ctx)
insn = &ctx->ir_base[use];
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
#ifndef _WIN64
if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
/* struct passed by value on stack */
size_t align = ctx->value_params[insn->op3 - 1].align;
align = IR_MAX(sizeof(void*), align);
stack_offset = IR_ALIGNED_SIZE(stack_offset, align);
ctx->value_params[insn->op3 - 1].offset = stack_start + stack_offset;
stack_offset += ctx->value_params[insn->op3 - 1].size;
stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*));
continue;
}
#endif
if (int_param_num < int_reg_params_count) {
src_reg = int_reg_params[int_param_num];
} else {
@@ -10618,13 +10859,13 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
static void ir_preallocate_call_stack(ir_ctx *ctx)
{
int call_stack_size, peak_call_stack_size = 0;
int call_stack_size, copy_stack, peak_call_stack_size = 0;
ir_ref i, n;
ir_insn *insn;
for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) {
if (insn->op == IR_CALL) {
call_stack_size = ir_call_used_stack(ctx, insn);
call_stack_size = ir_call_used_stack(ctx, insn, &copy_stack);
if (call_stack_size > peak_call_stack_size
#ifdef IR_HAVE_FASTCALL
&& !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */