1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00
IR commit: a098f9ed6c2f1c2852d6c0921283212aafb4afed
This commit is contained in:
Dmitry Stogov
2026-02-10 01:34:09 +03:00
committed by GitHub
parent 19ee3e6697
commit dd9421d825
13 changed files with 1099 additions and 677 deletions

View File

@@ -858,7 +858,7 @@ ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
static ir_ref _ir_fold_cse(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
{
ir_ref ref = ctx->prev_insn_chain[opt & IR_OPT_OP_MASK];
ir_insn *insn;
const ir_insn *insn;
if (ref) {
ir_ref limit = ctx->fold_cse_limit;
@@ -954,7 +954,8 @@ IR_ALWAYS_INLINE ir_ref _ir_fold_cast(ir_ctx *ctx, ir_ref ref, ir_type type)
* ANY and UNUSED ops are represented by 0
*/
ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn)
ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3,
const ir_insn *op1_insn, const ir_insn *op2_insn, const ir_insn *op3_insn)
{
uint8_t op;
ir_ref ref;
@@ -1136,9 +1137,9 @@ void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val)
ir_insn_set_op(insn, n, val);
}
ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n)
ir_ref ir_get_op(const ir_ctx *ctx, ir_ref ref, int32_t n)
{
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
#ifdef IR_DEBUG
if (n > 3) {
@@ -2025,7 +2026,7 @@ static ir_alias ir_check_aliasing(ir_ctx *ctx, ir_ref addr1, ir_ref addr2)
ir_alias ir_check_partial_aliasing(const ir_ctx *ctx, ir_ref addr1, ir_ref addr2, ir_type type1, ir_type type2)
{
ir_insn *insn1, *insn2;
const ir_insn *insn1, *insn2;
ir_ref base1, base2, off1, off2;
/* this must be already check */
@@ -2117,9 +2118,9 @@ ir_alias ir_check_partial_aliasing(const ir_ctx *ctx, ir_ref addr1, ir_ref addr2
return IR_MAY_ALIAS;
}
IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr, ir_ref limit)
IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr, ir_ref limit)
{
ir_insn *insn;
const ir_insn *insn;
uint32_t modified_regset = 0;
while (ref > limit) {
@@ -2159,7 +2160,7 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type
} else if (insn->op == IR_RSTORE) {
modified_regset |= (1 << insn->op3);
} else if (insn->op == IR_CALL) {
ir_insn *func = &ctx->ir_base[insn->op2];
const ir_insn *func = &ctx->ir_base[insn->op2];
ir_ref func_proto;
const ir_proto_t *proto;
@@ -2186,14 +2187,14 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type
return IR_UNUSED;
}
ir_ref ir_find_aliasing_load(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr)
ir_ref ir_find_aliasing_load(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr)
{
return ir_find_aliasing_load_i(ctx, ref, type, addr, (addr > 0 && addr < ref) ? addr : 1);
}
IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
{
ir_insn *insn;
const ir_insn *insn;
while (ref > var) {
insn = &ctx->ir_base[ref];
@@ -2224,7 +2225,7 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ
}
}
} else if (insn->op == IR_CALL) {
ir_insn *func = &ctx->ir_base[insn->op2];
const ir_insn *func = &ctx->ir_base[insn->op2];
ir_ref func_proto;
const ir_proto_t *proto;
@@ -2251,7 +2252,7 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ
return IR_UNUSED;
}
ir_ref ir_find_aliasing_vload(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
ir_ref ir_find_aliasing_vload(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var)
{
return ir_find_aliasing_vload_i(ctx, ref, type, var);
}
@@ -2547,12 +2548,12 @@ void _ir_BEGIN(ir_ctx *ctx, ir_ref src)
}
}
static ir_ref _ir_fold_condition(ir_ctx *ctx, ir_ref ref)
static ir_ref _ir_fold_condition(const ir_ctx *ctx, ir_ref ref)
{
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
if (insn->op == IR_NE && IR_IS_CONST_REF(insn->op2)) {
ir_insn *op2_insn = &ctx->ir_base[insn->op2];
const ir_insn *op2_insn = &ctx->ir_base[insn->op2];
if (IR_IS_TYPE_INT(op2_insn->type) && op2_insn->val.u64 == 0) {
ref = insn->op1;
@@ -2565,7 +2566,7 @@ static ir_ref _ir_fold_condition(ir_ctx *ctx, ir_ref ref)
ref = insn->op1;
insn = &ctx->ir_base[ref];
} else if (insn->op == IR_EQ && insn->op2 == IR_NULL) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
const ir_insn *op1_insn = &ctx->ir_base[insn->op1];
if (op1_insn->op == IR_ALLOCA || op1_insn->op == IR_VADDR) {
return IR_FALSE;
}
@@ -2577,10 +2578,10 @@ static ir_ref _ir_fold_condition(ir_ctx *ctx, ir_ref ref)
return ref;
}
IR_ALWAYS_INLINE ir_ref ir_check_dominating_predicates_i(ir_ctx *ctx, ir_ref ref, ir_ref condition, ir_ref limit)
IR_ALWAYS_INLINE ir_ref ir_check_dominating_predicates_i(const ir_ctx *ctx, ir_ref ref, ir_ref condition, ir_ref limit)
{
ir_insn *prev = NULL;
ir_insn *insn;
const ir_insn *prev = NULL;
const ir_insn *insn;
while (ref > limit) {
insn = &ctx->ir_base[ref];
@@ -2610,7 +2611,7 @@ IR_ALWAYS_INLINE ir_ref ir_check_dominating_predicates_i(ir_ctx *ctx, ir_ref ref
return condition;
}
ir_ref ir_check_dominating_predicates(ir_ctx *ctx, ir_ref ref, ir_ref condition)
ir_ref ir_check_dominating_predicates(const ir_ctx *ctx, ir_ref ref, ir_ref condition)
{
IR_ASSERT(!IR_IS_CONST_REF(condition));
return ir_check_dominating_predicates_i(ctx, ref, condition, (condition < ref) ? condition : 1);
@@ -2751,7 +2752,7 @@ void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list)
/* count inputs count */
do {
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
IR_ASSERT(insn->op == IR_END);
ref = insn->op2;
@@ -2781,8 +2782,10 @@ void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list)
ir_ref _ir_PHI_LIST(ir_ctx *ctx, ir_ref list)
{
ir_insn *merge, *end;
ir_ref phi, *ops, i;
const ir_insn *merge;
const ir_ref *ops;
ir_insn *end;
ir_ref phi, i;
ir_type type;
if (list == IR_UNUSED) {
@@ -3246,7 +3249,8 @@ ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var)
if (EXPECTED(ctx->flags & IR_OPT_FOLDING)) {
ref = ir_find_aliasing_vload_i(ctx, ctx->control, type, var);
if (ref) {
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
if (insn->type == type) {
return ref;
} else if (ir_type_size[insn->type] == ir_type_size[type]) {
@@ -3312,7 +3316,8 @@ ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr)
}
ref = ir_find_aliasing_load_i(ctx, ctx->control, type, addr, (addr > 0) ? addr : 1);
if (ref) {
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
if (insn->type == type) {
return ref;
} else if (ir_type_size[insn->type] == ir_type_size[type]) {

View File

@@ -569,8 +569,6 @@ void ir_strtab_free(ir_strtab *strtab);
#define IR_OPT_CFG (1<<21) /* merge BBs, by remove END->BEGIN nodes during CFG construction */
#define IR_OPT_MEM2SSA (1<<22)
#define IR_OPT_CODEGEN (1<<23)
#define IR_GEN_NATIVE (1<<24)
#define IR_GEN_CODE (1<<25)
/* debug related */
#ifdef IR_DEBUG
@@ -771,7 +769,7 @@ ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count);
void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val);
ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n);
ir_ref ir_get_op(const ir_ctx *ctx, ir_ref ref, int32_t n);
IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val)
{
@@ -865,13 +863,13 @@ int ir_reg_alloc(ir_ctx *ctx);
int ir_regs_number(void);
bool ir_reg_is_int(int32_t reg);
const char *ir_reg_name(int8_t reg, ir_type type);
int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref);
int32_t ir_get_spill_slot_offset(const ir_ctx *ctx, ir_ref ref);
/* Target CPU instruction selection and code generation (see ir_x86.c) */
int ir_match(ir_ctx *ctx);
void *ir_emit_code(ir_ctx *ctx, size_t *size);
bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr);
bool ir_needs_thunk(const ir_code_buffer *code_buffer, void *addr);
void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr);
void ir_fix_thunk(void *thunk_entry, void *addr);
@@ -947,13 +945,14 @@ int ir_load_llvm_asm(ir_loader *loader, const char *filename);
#define IR_SAVE_REGS (1<<4) /* add info about selected registers */
#define IR_SAVE_SAFE_NAMES (1<<5) /* add '@' prefix to symbol names */
void ir_print_func_proto(const ir_ctx *ctx, const char *name, bool prefix, FILE *f);
void ir_print_proto(const ir_ctx *ctx, ir_ref proto, FILE *f);
void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f);
void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f);
/* IR debug dump API (implementation in ir_dump.c) */
void ir_dump(const ir_ctx *ctx, FILE *f);
void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f);
void ir_dump_dot(const ir_ctx *ctx, const char *name, const char *comments, FILE *f);
void ir_dump_use_lists(const ir_ctx *ctx, FILE *f);
void ir_dump_cfg(ir_ctx *ctx, FILE *f);
void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f);

View File

@@ -60,7 +60,7 @@ IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_
#define ADR_IMM (1<<20) // signed imm21
#define ADRP_IMM (1LL<<32) // signed imm21 * 4096
static bool aarch64_may_use_b(ir_code_buffer *code_buffer, const void *addr)
static bool aarch64_may_use_b(const ir_code_buffer *code_buffer, const void *addr)
{
if (code_buffer) {
if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) {
@@ -824,6 +824,34 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type)
}
}
static bool all_usages_are_fusable(ir_ctx *ctx, ir_ref ref)
{
ir_insn *insn = &ctx->ir_base[ref];
if (insn->op >= IR_EQ && insn->op <= IR_UNORDERED) {
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref n = use_list->count;
if (n > 0) {
ir_ref *p = ctx->use_edges + use_list->refs;
do {
insn = &ctx->ir_base[*p];
if (insn->op != IR_IF
&& insn->op != IR_GUARD
&& insn->op != IR_GUARD_NOT) {
return 0;
}
p++;
n--;
} while (n);
return 1;
}
}
return 0;
}
static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
{
ir_insn *op2_insn;
@@ -1145,7 +1173,7 @@ binop_fp:
return IR_RETURN_FP;
}
case IR_IF:
if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
@@ -1168,13 +1196,13 @@ binop_fp:
}
case IR_GUARD:
case IR_GUARD_NOT:
if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
&& (insn->op2 == ref - 1 ||
(insn->op2 == ctx->prev_ref[ref] - 1
&& ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
//??? && (insn->op2 == ref - 1 ||
//??? (insn->op2 == ctx->prev_ref[ref] - 1
//??? && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
return IR_GUARD_CMP_INT;
@@ -3084,7 +3112,7 @@ static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn)
static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_insn *cmp_insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
@@ -3093,16 +3121,12 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_ins
ir_ref op1, op2;
ir_reg op1_reg, op2_reg;
if (op == IR_LT || op == IR_LE) {
/* swap operands to avoid P flag check */
op ^= 3;
op1 = cmp_insn->op2;
op2 = cmp_insn->op1;
op1_reg = ctx->regs[cmp_ref][2];
op2_reg = ctx->regs[cmp_ref][1];
op1 = cmp_insn->op1;
op2 = cmp_insn->op2;
if (UNEXPECTED(ctx->rules[cmp_ref] & IR_FUSED_REG)) {
op1_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 1);
op2_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 2);
} else {
op1 = cmp_insn->op1;
op2 = cmp_insn->op2;
op1_reg = ctx->regs[cmp_ref][1];
op2_reg = ctx->regs[cmp_ref][2];
}
@@ -3131,7 +3155,7 @@ static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_op op = ir_emit_cmp_fp_common(ctx, def, insn);
ir_op op = ir_emit_cmp_fp_common(ctx, def, def, insn);
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
//??? ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into account vs flag
@@ -3348,8 +3372,15 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
ir_reg op1_reg = ctx->regs[insn->op2][1];
ir_reg op2_reg = ctx->regs[insn->op2][2];
ir_reg op1_reg, op2_reg;
if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
} else {
op1_reg = ctx->regs[insn->op2][1];
op2_reg = ctx->regs[insn->op2][2];
}
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -3390,7 +3421,7 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
{
ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 0);
}
@@ -3459,14 +3490,14 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
op3_reg = op2_reg;
}
}
if (op3 != op2 && IR_REG_SPILLED(op3_reg)) {
if (IR_REG_SPILLED(op3_reg)) {
op3_reg = IR_REG_NUM(op3_reg);
ir_emit_load(ctx, type, op3_reg, op3);
if (op1 == op2) {
if (op1 == op3) {
op1_reg = op3_reg;
}
}
if (op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) {
if (IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
ir_emit_load(ctx, op1_type, op1_reg, op1);
}
@@ -5682,9 +5713,16 @@ static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
ir_reg op1_reg = ctx->regs[insn->op2][1];
ir_reg op2_reg = ctx->regs[insn->op2][2];
void *addr;
ir_reg op1_reg, op2_reg;
if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
} else {
op1_reg = ctx->regs[insn->op2][1];
op2_reg = ctx->regs[insn->op2][2];
}
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -5738,7 +5776,7 @@ static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
{
ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]);
void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
if (insn->op == IR_GUARD) {
@@ -7143,7 +7181,7 @@ static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, ui
return n;
}
bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr)
bool ir_needs_thunk(const ir_code_buffer *code_buffer, void *addr)
{
return !aarch64_may_use_b(code_buffer, addr);
}

View File

@@ -77,12 +77,86 @@ void ir_reset_cfg(ir_ctx *ctx)
}
}
static void ir_remove_phis_inputs(ir_ctx *ctx, ir_use_list *use_list, int new_inputs_count, ir_bitset life_inputs)
{
ir_ref i, j, n, k, *p, *q, use;
ir_insn *use_insn;
if (new_inputs_count == 1) {
for (k = use_list->count, p = q = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
use = *p;
use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_PHI) {
/* Convert PHI to COPY */
n = use_insn->inputs_count;
i = 2;
for (j = 2; j <= n; j++) {
ir_ref input = ir_insn_op(use_insn, j);
if (ir_bitset_in(life_inputs, j - 1)) {
use_insn->op1 = ir_insn_op(use_insn, j);
} else if (input > 0) {
ir_use_list_remove_one(ctx, input, use);
}
}
use_insn->op = IR_COPY;
use_insn->inputs_count = 1;
for (j = 2; j <= n; j++) {
ir_insn_set_op(use_insn, j, IR_UNUSED);
}
continue;
}
/*compact use list */
if (p != q){
*q = use;
}
q++;
}
if (p != q) {
use_list->count -= (p - q);
do {
*q = IR_UNUSED; /* clenu-op the removed tail */
q++;
} while (p != q);
}
} else {
for (k = use_list->count, p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
use = *p;
use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_PHI) {
n = use_insn->inputs_count;
i = 2;
for (j = 2; j <= n; j++) {
ir_ref input = ir_insn_op(use_insn, j);
if (ir_bitset_in(life_inputs, j - 1)) {
IR_ASSERT(input);
if (i != j) {
ir_insn_set_op(use_insn, i, input);
}
i++;
} else if (input > 0) {
ir_use_list_remove_one(ctx, input, use);
}
}
use_insn->inputs_count = i - 1;
for (j = i; j <= n; j++) {
ir_insn_set_op(use_insn, j, IR_UNUSED);
}
}
}
}
}
static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t *_blocks, ir_block *blocks, uint32_t bb_count)
{
uint32_t b, count = 0;
ir_block *bb = blocks + 1;
ir_insn *insn;
ir_ref i, j, n, *ops, input;
ir_bitset life_inputs = NULL;
for (b = 1; b <= bb_count; b++, bb++) {
bb->successors = count;
@@ -96,12 +170,27 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t
for (i = 1, j = 1; i <= n; i++) {
input = ops[i];
if (_blocks[input]) {
if (life_inputs) {
ir_bitset_incl(life_inputs, i);
}
if (i != j) {
ops[j] = ops[i];
}
j++;
} else if (input > 0) {
ir_use_list_remove_one(ctx, input, bb->start);
} else {
if (ctx->use_lists[bb->start].count > 1) {
/* Some inputs of this MERGE are deleted and we have to update the depended PHIs */
if (!life_inputs) {
int k;
life_inputs = ir_bitset_malloc(n + 1);
for (k = 1; k < i; k++) {
ir_bitset_incl(life_inputs, k);
}
}
}
if (input > 0) {
ir_use_list_remove_one(ctx, input, bb->start);
}
}
}
j--;
@@ -115,6 +204,10 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t
for (;j <= n; j++) {
ops[j] = IR_UNUSED;
}
if (life_inputs) {
ir_remove_phis_inputs(ctx, &ctx->use_lists[bb->start], insn->inputs_count, life_inputs);
ir_mem_free(life_inputs);
}
}
}
count += bb->predecessors_count;
@@ -375,8 +468,7 @@ static void ir_remove_predecessor(ir_ctx *ctx, ir_block *bb, uint32_t from)
static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from)
{
ir_ref i, j, n, k, *p, *q, use;
ir_insn *use_insn;
ir_ref i, j, n;
ir_use_list *use_list;
ir_bitset life_inputs;
ir_insn *insn = &ctx->ir_base[merge];
@@ -402,80 +494,14 @@ static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from)
}
if (i == 1) {
insn->op = IR_BEGIN;
insn->inputs_count = 1;
use_list = &ctx->use_lists[merge];
if (use_list->count > 1) {
n++;
for (k = use_list->count, p = q = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
use = *p;
use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_PHI) {
/* Convert PHI to COPY */
i = 2;
for (j = 2; j <= n; j++) {
ir_ref input = ir_insn_op(use_insn, j);
if (ir_bitset_in(life_inputs, j - 1)) {
use_insn->op1 = ir_insn_op(use_insn, j);
} else if (input > 0) {
ir_use_list_remove_one(ctx, input, use);
}
}
use_insn->op = IR_COPY;
use_insn->inputs_count = 1;
for (j = 2; j <= n; j++) {
ir_insn_set_op(use_insn, j, IR_UNUSED);
}
continue;
}
/*compact use list */
if (p != q){
*q = use;
}
q++;
}
if (p != q) {
use_list->count -= (p - q);
do {
*q = IR_UNUSED; /* clenu-op the removed tail */
q++;
} while (p != q);
}
}
} else {
insn->inputs_count = i;
use_list = &ctx->use_lists[merge];
if (use_list->count > 1) {
n++;
for (k = use_list->count, p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
use = *p;
use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_PHI) {
i = 2;
for (j = 2; j <= n; j++) {
ir_ref input = ir_insn_op(use_insn, j);
if (ir_bitset_in(life_inputs, j - 1)) {
IR_ASSERT(input);
if (i != j) {
ir_insn_set_op(use_insn, i, input);
}
i++;
} else if (input > 0) {
ir_use_list_remove_one(ctx, input, use);
}
}
use_insn->inputs_count = i - 1;
for (j = i; j <= n; j++) {
ir_insn_set_op(use_insn, j, IR_UNUSED);
}
}
}
}
}
insn->inputs_count = i;
use_list = &ctx->use_lists[merge];
if (use_list->count > 1) {
ir_remove_phis_inputs(ctx, use_list, i, life_inputs);
}
ir_mem_free(life_inputs);
ir_use_list_remove_all(ctx, from, merge);
}

View File

@@ -60,7 +60,7 @@ void ir_dump(const ir_ctx *ctx, FILE *f)
}
}
void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f)
void ir_dump_dot(const ir_ctx *ctx, const char *name, const char *comments, FILE *f)
{
int DATA_WEIGHT = 0;
int CONTROL_WEIGHT = 5;
@@ -70,6 +70,13 @@ void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f)
uint32_t flags;
fprintf(f, "digraph %s {\n", name);
fprintf(f, "\tlabelloc=t;\n");
fprintf(f, "\tlabel=\"");
ir_print_func_proto(ctx, name, 0, f);
if (comments) {
fprintf(f, " # %s", comments);
}
fprintf(f, "\"\n");
fprintf(f, "\trankdir=TB;\n");
for (i = 1 - ctx->consts_count, insn = ctx->ir_base + i; i < IR_UNUSED; i++, insn++) {
fprintf(f, "\tc%d [label=\"C%d: CONST %s(", -i, -i, ir_type_name[insn->type]);

View File

@@ -971,7 +971,7 @@ int ir_match(ir_ctx *ctx)
return 1;
}
int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref)
int32_t ir_get_spill_slot_offset(const ir_ctx *ctx, ir_ref ref)
{
int32_t offset;

View File

@@ -3439,5 +3439,84 @@ IR_FOLD(COND(_, _)) // TODO: COND(_, _, _)
if (op2 == op3) {
IR_FOLD_COPY(op2);
}
if (op1_insn->type == IR_BOOL) {
if (op2 == IR_TRUE) {
if (op3 == IR_FALSE) {
/* a ? true : false => a */
IR_FOLD_COPY(op1);
} else {
/* a ? true : b => a | b */
opt = IR_OPT(IR_OR, IR_BOOL);
op2 = op3;
op3 = IR_UNUSED;
IR_FOLD_RESTART;
}
} else if (op3 == IR_FALSE) {
/* a ? b : false => a & b */
opt = IR_OPT(IR_AND, IR_BOOL);
op3 = IR_UNUSED;
IR_FOLD_RESTART;
} else if (op2 == IR_FALSE) {
if (op3 == IR_TRUE) {
/* a ? flase : true => !a */
opt = IR_OPT(IR_NOT, IR_BOOL);
op2 = IR_UNUSED;
op3 = IR_UNUSED;
IR_FOLD_RESTART;
}
} else if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))
&& IR_IS_CONST_REF(op2)
&& IR_IS_CONST_REF(op3)
&& op2_insn->val.u64 == 1
&& op3_insn->val.u64 == 0) {
if (ir_type_size[IR_OPT_TYPE(opt)] > 1) {
/* a ? 1 : 0 => ZEXT(a) */
opt = IR_OPT(IR_ZEXT, IR_OPT_TYPE(opt));
} else {
/* a ? 1 : 0 => BITCAST(a) */
opt = IR_OPT(IR_BITCAST, IR_OPT_TYPE(opt));
}
op2 = IR_UNUSED;
op3 = IR_UNUSED;
IR_FOLD_RESTART;
}
} else if (IR_IS_TYPE_INT(op1_insn->type)) {
if (op2 == IR_TRUE) {
if (op3 == IR_FALSE) {
opt = IR_OPT(IR_NE, IR_BOOL);
val.u64 = 0;
op2 = ir_const(ctx, val, op1_insn->type);
op3 = IR_UNUSED;
IR_FOLD_RESTART;
}
} else if (op2 == IR_FALSE) {
if (op3 == IR_TRUE) {
opt = IR_OPT(IR_EQ, IR_BOOL);
val.u64 = 0;
op2 = ir_const(ctx, val, op1_insn->type);
op3 = IR_UNUSED;
IR_FOLD_RESTART;
}
}
}
if (op1_insn->op == IR_NE) {
if (IR_IS_CONST_REF(op1_insn->op2)
&& IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op2].type)
&& ctx->ir_base[op1_insn->op2].val.u64 == 0) {
op1 = op1_insn->op1;
IR_FOLD_RESTART;
}
} else if (op1_insn->op == IR_EQ) {
if (IR_IS_CONST_REF(op1_insn->op2)
&& IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op2].type)
&& ctx->ir_base[op1_insn->op2].val.u64 == 0) {
op1 = op1_insn->op1;
SWAP_REFS(op2, op3);
IR_FOLD_RESTART;
}
}
IR_FOLD_NEXT;
}

View File

@@ -262,7 +262,7 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
#endif
/* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and
* add them into TOTALLY_USEFUL set if all of their sucessors are already there.
* add them into TOTALLY_USEFUL set if all of their successors are already there.
*/
IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
_push_predecessors(ctx, &ctx->cfg_blocks[i], data);
@@ -788,7 +788,7 @@ IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start)
{
ir_insn *insn = &ctx->ir_base[start];
ir_insn *insn = &ctx->ir_base[start];
uint32_t n = insn->inputs_count;
ir_ref *p = insn->ops + 1;
@@ -924,25 +924,120 @@ next:
ctx->cfg_blocks = new_blocks;
}
#if IR_DEBUG
static void ir_schedule_print_list(const ir_ctx *ctx, uint32_t b, const ir_ref *_next,
ir_ref start, ir_ref end, const char *label)
{
ir_ref ref;
fprintf(stderr, " %s [%d", label, start);
ref = _next[start];
while (ref != end) {
fprintf(stderr, ",%d", ref);
ref = _next[ref];
}
fprintf(stderr, ",%d]\n", ref);
}
#endif
/* Simple Stable Topological Sort */
static void ir_schedule_topsort(const ir_ctx *ctx, uint32_t b, const ir_block *bb,
ir_ref *_xlat, ir_ref *_next, ir_ref *_prev,
ir_ref ref, ir_ref end,
ir_ref *insns_count, ir_ref *consts_count)
{
ir_ref i = ref;
const ir_insn *insn;
if (bb->successors_count > 1) {
ir_ref input, j = bb->end;
ir_insn *end = &ctx->ir_base[j];
if (end->op == IR_IF) {
/* Move condition closer to IF */
input = end->op2;
if (input > 0
&& ctx->cfg_map[input] == b
&& !_xlat[input]
&& _prev[j] != input
&& (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) {
if (input == i) {
i = _next[i];
insn = &ctx->ir_base[i];
}
/* remove "input" */
_prev[_next[input]] = _prev[input];
_next[_prev[input]] = _next[input];
/* insert before "j" */
_prev[input] = _prev[j];
_next[input] = j;
_next[_prev[j]] = input;
_prev[j] = input;
}
}
}
while (i != end) {
ir_ref n, j, input;
const ir_ref *p;
restart:
IR_ASSERT(ctx->cfg_map[i] == b);
insn = &ctx->ir_base[i];
n = insn->inputs_count;
for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
input = *p;
if (!_xlat[input]) {
/* input is not scheduled yet */
if (input > 0) {
if (ctx->cfg_map[input] == b) {
/* "input" should be before "i" to satisfy dependency */
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_SCHEDULE) {
fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
}
#endif
/* remove "input" */
_prev[_next[input]] = _prev[input];
_next[_prev[input]] = _next[input];
/* insert before "i" */
_prev[input] = _prev[i];
_next[input] = i;
_next[_prev[i]] = input;
_prev[i] = input;
/* restart from "input" */
i = input;
goto restart;
}
} else if (input < IR_TRUE) {
*consts_count += ir_count_constant(_xlat, input);
}
}
}
_xlat[i] = *insns_count;
*insns_count += ir_insn_inputs_to_len(n);
IR_ASSERT(_next[i] != IR_UNUSED);
i = _next[i];
}
}
int ir_schedule(ir_ctx *ctx)
{
ir_ctx new_ctx;
ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
ir_ref *_xlat;
ir_ref *edges;
ir_ref prev_b_end;
uint32_t b;
uint32_t *_blocks = ctx->cfg_map;
ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
ir_block *bb;
ir_insn *insn, *new_insn;
ir_insn *insn, *new_insn, *base;
ir_use_list *lists, *use_list, *new_list;
bool bad_bb_order = 0;
/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
IR_ASSERT(_blocks[1] == 1);
IR_ASSERT(ctx->cfg_map[1] == 1);
/* link BB boundaries */
_prev[1] = 0;
@@ -950,30 +1045,34 @@ int ir_schedule(ir_ctx *ctx)
_next[1] = prev_b_end;
_prev[prev_b_end] = 1;
for (b = 2, bb = ctx->cfg_blocks + 2; b <= ctx->cfg_blocks_count; b++, bb++) {
_next[prev_b_end] = bb->start;
_prev[bb->start] = prev_b_end;
_next[bb->start] = bb->end;
_prev[bb->end] = bb->start;
prev_b_end = bb->end;
if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) {
ir_ref start = bb->start;
ir_ref end = bb->end;
_next[prev_b_end] = start;
_prev[start] = prev_b_end;
_next[start] = end;
_prev[end] = start;
prev_b_end = end;
if (!ir_is_good_bb_order(ctx, b, bb, start)) {
bad_bb_order = 1;
}
}
_next[prev_b_end] = 0;
/* insert intermediate BB nodes */
for (i = 2, j = 1; i < ctx->insns_count; i++) {
b = _blocks[i];
use_edges_count = ctx->use_lists[1].count;
for (i = 2, use_list = &ctx->use_lists[i]; i < ctx->insns_count; use_list++, i++) {
b = ctx->cfg_map[i];
if (!b) continue;
use_edges_count += use_list->count;
bb = &ctx->cfg_blocks[b];
if (i != bb->start && i != bb->end) {
/* insert before "end" */
ir_ref n = bb->end;
ir_ref p = _prev[n];
_prev[i] = p;
_next[i] = n;
_next[p] = i;
_prev[n] = i;
ir_ref next = bb->end;
ir_ref prev = _prev[next];
_prev[i] = prev;
_next[i] = next;
_next[prev] = i;
_prev[next] = i;
}
}
@@ -981,15 +1080,6 @@ int ir_schedule(ir_ctx *ctx)
ir_fix_bb_order(ctx, _prev, _next);
}
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_SCHEDULE) {
fprintf(stderr, "Before Schedule\n");
for (i = 1; i != 0; i = _next[i]) {
fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
}
}
#endif
_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
_xlat += ctx->consts_count;
_xlat[IR_TRUE] = IR_TRUE;
@@ -999,10 +1089,17 @@ int ir_schedule(ir_ctx *ctx)
insns_count = 1;
consts_count = -(IR_TRUE - 1);
/* Topological sort according dependencies inside each basic block */
/* Schedule instructions inside each BB (now just topological sort according to dependencies) */
for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
ir_ref start;
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_SCHEDULE) {
fprintf(stderr, "BB%d\n", b);
ir_schedule_print_list(ctx, b, _next, bb->start, bb->end, "INITIAL");
}
#endif
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
/* Schedule BB start */
start = i = bb->start;
@@ -1062,8 +1159,8 @@ int ir_schedule(ir_ctx *ctx)
for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) {
ir_ref use = *p;
ir_insn *use_insn = &ctx->ir_base[use];
if (!_xlat[use] && (_blocks[use] || use_insn->op == IR_PARAM)) {
IR_ASSERT(_blocks[use] == b || use_insn->op == IR_PARAM);
if (!_xlat[use] && ctx->cfg_map[use]) {
IR_ASSERT(ctx->cfg_map[use] == b);
if (use_insn->op == IR_PARAM
|| use_insn->op == IR_VAR
|| use_insn->op == IR_PI
@@ -1100,76 +1197,20 @@ int ir_schedule(ir_ctx *ctx)
insn = &ctx->ir_base[i];
}
}
if (bb->successors_count > 1) {
ir_ref input, j = bb->end;
ir_insn *end = &ctx->ir_base[j];
if (end->op == IR_IF) {
/* Move condition closer to IF */
input = end->op2;
if (input > 0
&& _blocks[input] == b
&& !_xlat[input]
&& _prev[j] != input
&& (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) {
if (input == i) {
i = _next[i];
insn = &ctx->ir_base[i];
}
/* remove "input" */
_prev[_next[input]] = _prev[input];
_next[_prev[input]] = _next[input];
/* insert before "j" */
_prev[input] = _prev[j];
_next[input] = j;
_next[_prev[j]] = input;
_prev[j] = input;
}
}
if (i != bb->end) {
ir_schedule_topsort(ctx, b, bb, _xlat, _next, _prev, i, bb->end, &insns_count, &consts_count);
}
while (i != bb->end) {
ir_ref n, j, *p, input;
restart:
IR_ASSERT(_blocks[i] == b);
n = insn->inputs_count;
for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
input = *p;
if (!_xlat[input]) {
/* input is not scheduled yet */
if (input > 0) {
if (_blocks[input] == b) {
/* "input" should be before "i" to satisfy dependency */
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_SCHEDULE) {
fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
}
#endif
/* remove "input" */
_prev[_next[input]] = _prev[input];
_next[_prev[input]] = _next[input];
/* insert before "i" */
_prev[input] = _prev[i];
_next[input] = i;
_next[_prev[i]] = input;
_prev[i] = input;
/* restart from "input" */
i = input;
insn = &ctx->ir_base[i];
goto restart;
}
} else if (input < IR_TRUE) {
consts_count += ir_count_constant(_xlat, input);
}
}
}
_xlat[i] = insns_count;
insns_count += ir_insn_inputs_to_len(n);
IR_ASSERT(_next[i] != IR_UNUSED);
i = _next[i];
insn = &ctx->ir_base[i];
if (ctx->flags & IR_DEBUG_SCHEDULE) {
ir_schedule_print_list(ctx, b, _next, start, bb->end, " FINAL");
}
#endif
/* Schedule BB end */
i = bb->end;
insn = &ctx->ir_base[i];
_xlat[i] = bb->end = insns_count;
insns_count++;
if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
@@ -1179,15 +1220,6 @@ restart:
}
}
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_SCHEDULE) {
fprintf(stderr, "After Schedule\n");
for (i = 1; i != 0; i = _next[i]) {
fprintf(stderr, "%d -> %d (%d)\n", i, _blocks[i], _xlat[i]);
}
}
#endif
#if 1
/* Check if scheduling didn't make any modifications */
if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
@@ -1215,113 +1247,55 @@ restart:
ir_mem_free(_prev);
ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
new_ctx.insns_count = insns_count;
new_ctx.flags2 = ctx->flags2;
new_ctx.ret_type = ctx->ret_type;
new_ctx.value_params = ctx->value_params;
new_ctx.mflags = ctx->mflags;
new_ctx.spill_base = ctx->spill_base;
new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
new_ctx.fixed_regset = ctx->fixed_regset;
new_ctx.fixed_save_regset = ctx->fixed_save_regset;
new_ctx.entries_count = ctx->entries_count;
#if defined(IR_TARGET_AARCH64)
new_ctx.deoptimization_exits = ctx->deoptimization_exits;
new_ctx.get_exit_addr = ctx->get_exit_addr;
new_ctx.get_veneer = ctx->get_veneer;
new_ctx.set_veneer = ctx->set_veneer;
#endif
new_ctx.loader = ctx->loader;
uint32_t *map = ir_mem_calloc(insns_count, sizeof(uint32_t));
_prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
ir_ref *use_edges = edges = ir_mem_malloc(use_edges_count * sizeof(ir_ref));
base = ir_mem_malloc((consts_count + insns_count) * sizeof(ir_insn));
base += consts_count;
/* Copy constants */
if (consts_count == ctx->consts_count) {
new_ctx.consts_count = consts_count;
ref = 1 - consts_count;
insn = &ctx->ir_base[ref];
new_insn = &new_ctx.ir_base[ref];
memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
if (ctx->strtab.data) {
while (ref != IR_TRUE) {
if (new_insn->op == IR_FUNC_ADDR) {
if (new_insn->proto) {
size_t len;
const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
new_insn->proto = ir_strl(&new_ctx, proto, len);
}
} else if (new_insn->op == IR_FUNC) {
size_t len;
const char *name = ir_get_strl(ctx, new_insn->val.name, &len);
new_insn->val.u64 = ir_strl(&new_ctx, name, len);
if (new_insn->proto) {
const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
new_insn->proto = ir_strl(&new_ctx, proto, len);
}
} else if (new_insn->op == IR_SYM || new_insn->op == IR_STR || new_insn->op == IR_LABEL) {
size_t len;
const char *str = ir_get_strl(ctx, new_insn->val.name, &len);
new_insn->val.u64 = ir_strl(&new_ctx, str, len);
}
new_insn++;
ref++;
}
if (ctx->consts_count == consts_count) {
memcpy(base - consts_count + 1, ctx->ir_base - consts_count + 1, sizeof(ir_insn) * consts_count);
for (j = -consts_count + 1; j < IR_TRUE; j++) {
_xlat[j] = j;
}
} else {
new_ref = -new_ctx.consts_count;
new_insn = &new_ctx.ir_base[new_ref];
for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
if (!_xlat[ref]) {
continue;
ir_insn *src = ctx->ir_base - ctx->consts_count + 1;
ir_insn *dst = base - consts_count + 1;
i = -ctx->consts_count + 1;
j = -consts_count + 1;
while (i < IR_TRUE) {
if (_xlat[i]) {
*dst = *src;
dst->prev_const = 0;
_xlat[i] = j;
dst++;
j++;
}
new_insn->optx = insn->optx;
new_insn->prev_const = 0;
if (insn->op == IR_FUNC_ADDR) {
new_insn->val.u64 = insn->val.u64;
if (insn->proto) {
size_t len;
const char *proto = ir_get_strl(ctx, insn->proto, &len);
new_insn->proto = ir_strl(&new_ctx, proto, len);
} else {
new_insn->proto = 0;
}
} else if (insn->op == IR_FUNC) {
size_t len;
const char *name = ir_get_strl(ctx, insn->val.name, &len);
new_insn->val.u64 = ir_strl(&new_ctx, name, len);
if (insn->proto) {
const char *proto = ir_get_strl(ctx, insn->proto, &len);
new_insn->proto = ir_strl(&new_ctx, proto, len);
} else {
new_insn->proto = 0;
}
} else if (insn->op == IR_SYM || insn->op == IR_STR || insn->op == IR_LABEL) {
size_t len;
const char *str = ir_get_strl(ctx, insn->val.name, &len);
new_insn->val.u64 = ir_strl(&new_ctx, str, len);
} else {
new_insn->val.u64 = insn->val.u64;
}
_xlat[ref] = new_ref;
new_ref--;
new_insn--;
src++;
i++;
}
new_ctx.consts_count = -new_ref;
IR_ASSERT(j == IR_TRUE);
base[IR_TRUE].optx = IR_OPT(IR_C_BOOL, IR_BOOL);
base[IR_TRUE].val.u64 = 1;
base[IR_FALSE].optx = IR_OPT(IR_C_BOOL, IR_BOOL);
base[IR_FALSE].val.u64 = 0;
base[IR_NULL].optx = IR_OPT(IR_C_ADDR, IR_ADDR);
base[IR_NULL].val.u64 = 0;
MAKE_NOP(&base[IR_UNUSED]);
}
new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
/* Copy instructions, use lists and use edges */
#ifdef IR_DEBUG
ir_ref orig_use_edges_count = use_edges_count;
#endif
prev_ref = 0;
use_edges_count = 0;
for (i = 1; i != 0; i = _next[i]) {
new_ref = _xlat[i];
new_ctx.cfg_map[new_ref] = _blocks[i];
map[new_ref] = ctx->cfg_map[i];
_prev[new_ref] = prev_ref;
prev_ref = new_ref;
@@ -1330,7 +1304,7 @@ restart:
k = 0;
if (n == 1) {
ref = ctx->use_edges[use_list->refs];
if (_xlat[ref]) {
if (EXPECTED(_xlat[ref])) {
*edges = _xlat[ref];
edges++;
k = 1;
@@ -1339,7 +1313,7 @@ restart:
p = &ctx->use_edges[use_list->refs];
while (n--) {
ref = *p;
if (_xlat[ref]) {
if (EXPECTED(_xlat[ref])) {
*edges = _xlat[ref];
edges++;
k++;
@@ -1353,7 +1327,7 @@ restart:
new_list->count = k;
insn = &ctx->ir_base[i];
new_insn = &new_ctx.ir_base[new_ref];
new_insn = &base[new_ref];
new_insn->optx = insn->optx;
n = new_insn->inputs_count;
@@ -1365,11 +1339,7 @@ restart:
break;
case 1:
new_insn->op1 = _xlat[insn->op1];
if (new_insn->op == IR_PARAM || new_insn->op == IR_VAR || new_insn->op == IR_PROTO) {
size_t len;
const char *str = ir_get_strl(ctx, insn->op2, &len);
new_insn->op2 = ir_strl(&new_ctx, str, len);
} else if (new_insn->op == IR_BEGIN && insn->op2) {
if (new_insn->op == IR_BEGIN && insn->op2) {
new_insn->op2 = _xlat[insn->op2];
} else {
new_insn->op2 = insn->op2;
@@ -1428,12 +1398,12 @@ restart:
}
/* Update list of terminators (IR_OPND_CONTROL_REF) */
insn = &new_ctx.ir_base[1];
insn = &base[1];
ref = insn->op1;
if (ref) {
insn->op1 = ref = _xlat[ref];
while (1) {
insn = &new_ctx.ir_base[ref];
insn = &base[ref];
ref = insn->op3;
if (!ref) {
break;
@@ -1442,37 +1412,34 @@ restart:
}
}
IR_ASSERT(ctx->use_edges_count >= use_edges_count);
new_ctx.use_edges_count = use_edges_count;
new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
if (ctx->binding) {
ir_xlat_binding(ctx, _xlat);
new_ctx.binding = ctx->binding;
ctx->binding = NULL;
}
_xlat -= ctx->consts_count;
ir_mem_free(_xlat);
new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
new_ctx.cfg_edges_count = ctx->cfg_edges_count;
new_ctx.cfg_blocks = ctx->cfg_blocks;
new_ctx.cfg_edges = ctx->cfg_edges;
ctx->cfg_blocks = NULL;
ctx->cfg_edges = NULL;
ctx->value_params = NULL;
ir_code_buffer *saved_code_buffer = ctx->code_buffer;
ir_free(ctx);
IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
memcpy(ctx, &new_ctx, sizeof(ir_ctx));
ctx->code_buffer = saved_code_buffer;
ctx->flags2 |= IR_LINEAR;
ir_mem_free(_next);
/* Switch to new IR buffer */
ir_mem_free(ctx->ir_base - ctx->consts_limit);
ctx->ir_base = base;
ctx->insns_count = ctx->insns_limit = insns_count;
ctx->consts_count = ctx->consts_limit = consts_count;
ir_mem_free(ctx->use_lists);
ir_mem_free(ctx->use_edges);
IR_ASSERT(orig_use_edges_count >= use_edges_count);
ctx->use_lists = lists;
ctx->use_edges = use_edges;
ctx->use_edges_count = use_edges_count;
ir_mem_free(ctx->cfg_map);
ctx->cfg_map = map;
ctx->prev_ref = _prev;
ctx->flags2 |= IR_LINEAR;
return 1;
}

View File

@@ -908,7 +908,7 @@ IR_ALWAYS_INLINE bool ir_const_is_true(const ir_insn *v)
return 0;
}
IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref)
IR_ALWAYS_INLINE bool ir_ref_is_true(const ir_ctx *ctx, ir_ref ref)
{
if (ref == IR_TRUE) {
return 1;
@@ -1096,6 +1096,7 @@ void ir_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref);
void ir_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val);
/*** Iterative Optimization ***/
void ir_iter_add_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist);
void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist);
void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist);
void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist);
@@ -1179,16 +1180,17 @@ typedef enum _ir_fold_action {
IR_FOLD_DO_CONST
} ir_fold_action;
ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn);
ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3,
const ir_insn *op1_insn, const ir_insn *op2_insn, const ir_insn *op3_insn);
/*** Alias Analyzes (see ir.c) ***/
ir_ref ir_find_aliasing_load(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr);
ir_ref ir_find_aliasing_vload(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var);
ir_ref ir_find_aliasing_load(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr);
ir_ref ir_find_aliasing_vload(const ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref var);
ir_ref ir_find_aliasing_store(ir_ctx *ctx, ir_ref ref, ir_ref addr, ir_ref val);
ir_ref ir_find_aliasing_vstore(ir_ctx *ctx, ir_ref ref, ir_ref addr, ir_ref val);
/*** Predicates (see ir.c) ***/
ir_ref ir_check_dominating_predicates(ir_ctx *ctx, ir_ref ref, ir_ref condition);
ir_ref ir_check_dominating_predicates(const ir_ctx *ctx, ir_ref ref, ir_ref condition);
/*** IR Live Info ***/
typedef ir_ref ir_live_pos;
@@ -1468,9 +1470,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co
void ir_fix_stack_frame(ir_ctx *ctx);
/* Utility */
ir_type ir_get_return_type(ir_ctx *ctx);
const ir_proto_t *ir_call_proto(const ir_ctx *ctx, const ir_insn *insn);
void ir_print_call_conv(uint32_t flags, FILE *f);
//#define IR_BITSET_LIVENESS

View File

@@ -3761,14 +3761,13 @@ static void ir_set_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op, int8_t
{
char key[10];
IR_ASSERT(reg != IR_REG_NONE);
if (!ctx->fused_regs) {
ctx->fused_regs = ir_mem_malloc(sizeof(ir_strtab));
ir_strtab_init(ctx->fused_regs, 8, 128);
}
memcpy(key, &root, sizeof(ir_ref));
memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | reg);
ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | (uint8_t)reg);
}
static void assign_regs(ir_ctx *ctx)
@@ -3874,93 +3873,88 @@ static void assign_regs(ir_ctx *ctx)
}
prev_use_ref = ref;
}
} else if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
&& needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
&& use_pos->hint != reg
// && ctx->ir_base[ref].op != IR_CALL
// && ctx->ir_base[ref].op != IR_TAILCALL) {
&& ctx->ir_base[ref].op != IR_SNAPSHOT
&& !needs_spill_load(ctx, ival, use_pos)) {
/* fuse spill load (valid only when register is not reused) */
reg = IR_REG_NONE;
if (use_pos->next
&& use_pos->op_num == 1
&& use_pos->next->pos == use_pos->pos
&& !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
/* Support for R2 = BINOP(R1, R1) */
if (use_pos->hint_ref < 0) {
ref = -use_pos->hint_ref;
} else {
if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
&& needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
&& use_pos->hint != reg
// && ctx->ir_base[ref].op != IR_CALL
// && ctx->ir_base[ref].op != IR_TAILCALL) {
&& ctx->ir_base[ref].op != IR_SNAPSHOT
&& !needs_spill_load(ctx, ival, use_pos)) {
/* fuse spill load (valid only when register is not reused) */
reg = IR_REG_NONE;
if (use_pos->next
&& use_pos->op_num == 1
&& use_pos->next->pos == use_pos->pos
&& !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
/* Support for R2 = BINOP(R1, R1) */
if (use_pos->hint_ref < 0) {
ref = -use_pos->hint_ref;
}
ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
use_pos = use_pos->next;
}
} else {
if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
reg |= IR_REG_SPILL_SPECIAL;
} else {
reg |= IR_REG_SPILL_LOAD;
}
if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
uint32_t use_b = ctx->cfg_map[ref];
if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
ir_bitset_incl(available, use_b);
}
prev_use_ref = ref;
}
ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
use_pos = use_pos->next;
}
} else {
if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
reg |= IR_REG_SPILL_SPECIAL;
} else {
reg |= IR_REG_SPILL_LOAD;
}
if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
uint32_t use_b = ctx->cfg_map[ref];
/* reuse register without spill load */
}
if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
ir_bitset_incl(available, use_b);
if (use_pos->hint_ref < 0) {
if (use_pos->flags & IR_PHI_USE) {
IR_ASSERT(use_pos->hint_ref < 0);
IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
/* Spilled PHI var is passed through memory */
reg = IR_REG_NONE;
}
prev_use_ref = ref;
}
}
if (use_pos->hint_ref < 0
&& (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
reg |= IR_REG_SPILL_SPECIAL;
} else {
reg |= IR_REG_SPILL_LOAD;
}
if (reg != old_reg) {
IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
use_pos = use_pos->next;
continue;
old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num);
if ((old_reg != IR_REG_NONE && reg != old_reg) || reg == IR_REG_NONE) {
ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
use_pos = use_pos->next;
continue;
}
}
ref = -use_pos->hint_ref;
}
} else if (use_pos->flags & IR_PHI_USE) {
IR_ASSERT(use_pos->hint_ref < 0);
IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
/* Spilled PHI var is passed through memory */
reg = IR_REG_NONE;
}
} else if (use_pos->hint_ref < 0
&& (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
if (reg != old_reg) {
IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
use_pos = use_pos->next;
continue;
}
} else {
/* reuse register without spill load */
}
if (use_pos->hint_ref < 0) {
ref = -use_pos->hint_ref;
}
ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
use_pos = use_pos->next;
}
} else if (!(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
} else {
use_pos = ival->use_pos;
while (use_pos) {
ref = IR_LIVE_POS_TO_REF(use_pos->pos);
if (ctx->ir_base[ref].op == IR_SNAPSHOT) {
if (ctx->ir_base[ref].op == IR_SNAPSHOT
&& !(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
IR_ASSERT(use_pos->hint_ref >= 0);
/* A reference to a CPU spill slot */
reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER;
ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
} else if (use_pos->hint_ref < 0 && !(use_pos->flags & IR_PHI_USE)) {
IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, IR_REG_NONE);
}
use_pos = use_pos->next;
}

View File

@@ -18,7 +18,7 @@ void ir_print_proto(const ir_ctx *ctx, ir_ref func_proto, FILE *f)
}
}
void ir_print_call_conv(uint32_t flags, FILE *f)
static void ir_print_call_conv(uint32_t flags, FILE *f)
{
switch (flags & IR_CALL_CONV_MASK) {
case IR_CC_BUILTIN:
@@ -75,6 +75,38 @@ void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, c
}
}
void ir_print_func_proto(const ir_ctx *ctx, const char *name, bool prefix, FILE *f)
{
if (ctx->flags & IR_STATIC) {
fprintf(f, "static ");
}
fprintf(f, "func %s%s(",
prefix ? "@" : "",
name);
if (ctx->ir_base[2].op == IR_PARAM) {
ir_insn *insn = &ctx->ir_base[2];
fprintf(f, "%s", ir_type_cname[insn->type]);
insn++;
while (insn->op == IR_PARAM) {
fprintf(f, ", %s", ir_type_cname[insn->type]);
insn++;;
}
if (ctx->flags & IR_VARARG_FUNC) {
fprintf(f, ", ...");
}
} else if (ctx->flags & IR_VARARG_FUNC) {
fprintf(f, "...");
}
fprintf(f, "): %s", ir_type_cname[ctx->ret_type != (ir_type)-1 ? ctx->ret_type : IR_VOID]);
ir_print_call_conv(ctx->flags, f);
if (ctx->flags & IR_CONST_FUNC) {
fprintf(f, " __const");
} else if (ctx->flags & IR_PURE_FUNC) {
fprintf(f, " __pure");
}
}
static void ir_save_dessa_moves(const ir_ctx *ctx, int b, ir_block *bb, FILE *f)
{
uint32_t succ;

View File

@@ -19,7 +19,6 @@
#define IR_TOP IR_UNUSED
#define IR_BOTTOM IR_LAST_OP
#define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0)
#define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].op == IR_TOP)
@@ -27,17 +26,57 @@
#define IR_IS_REACHABLE(ref) _ir_is_reachable_ctrl(ctx, _values, ref)
#define IR_IS_CONST(ref) (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op))
IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref)
typedef struct {
union {
struct {
IR_STRUCT_LOHI(
union {
IR_STRUCT_LOHI(
union {
IR_STRUCT_LOHI(
uint8_t op, /* [IR_TOP - unreachable, IR_BOTTOM - reachable} for control */
/* {IR_TOP | IR_COPY() | IR_CONST() | IR_BOTTOM} for data */
/* {IR_TOP | IR_MERGE() | IR_BOTTOM} for IR_MERGE */
/* {IR_TOP | IR_IF() | IR_BOTTOM} for IR_IF and IR_SWITCH */
uint8_t type
);
uint16_t opt;
},
uint16_t _space_1
);
uint32_t optx;
},
union {
ir_ref copy; /* identity for IR_COPY */
ir_ref unfeasible_inputs; /* number of unfeasible inputs for IR_MERGE */
ir_ref single_output; /* reachable output for IR_IF */
ir_ref visited; /* for IR_TOP */
}
);
union {
struct {
ir_ref next; /* double-linked identities list for IR_COPY */
ir_ref prev; /* double-linked identities list for IR_COPY */
};
ir_val val; /* constant value for IR_CONST */
};
};
ir_insn insn; /* constant insn for IR_CONST */
};
} ir_sccp_val;
IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref)
{
IR_ASSERT(!IR_IS_CONST_REF(ref));
IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL);
return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */
}
IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
IR_ALWAYS_INLINE void ir_sccp_add_uses(const ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
{
ir_use_list *use_list;
ir_ref n, *p, use;
const ir_use_list *use_list;
const ir_ref *p;
ir_ref n, use;
IR_ASSERT(!IR_IS_CONST_REF(ref));
use_list = &ctx->use_lists[ref];
@@ -50,23 +89,23 @@ IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueu
}
}
IR_ALWAYS_INLINE void ir_sccp_add_input(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
IR_ALWAYS_INLINE void ir_sccp_add_input(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
{
IR_ASSERT(!IR_IS_CONST_REF(ref));
IR_ASSERT(_values[ref].op == IR_TOP);
/* do backward propagaton only once */
if (!_values[ref].op1) {
_values[ref].op1 = 1;
if (!_values[ref].visited) {
_values[ref].visited = 1;
ir_bitqueue_add(worklist, ref);
}
}
#if IR_COMBO_COPY_PROPAGATION
IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a)
IR_ALWAYS_INLINE ir_ref ir_sccp_identity(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
{
if (a > 0 && _values[a].op == IR_COPY) {
do {
a = _values[a].op1;
a = _values[a].copy;
IR_ASSERT(a > 0);
} while (_values[a].op == IR_COPY);
IR_ASSERT(_values[a].op == IR_BOTTOM);
@@ -75,7 +114,7 @@ IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a
}
#if 0
static void CHECK_LIST(ir_insn *_values, ir_ref ref)
static void CHECK_LIST(ir_sccp_val *_values, ir_ref ref)
{
ir_ref member = _values[ref].op2;
while (member != ref) {
@@ -88,44 +127,44 @@ static void CHECK_LIST(ir_insn *_values, ir_ref ref)
# define CHECK_LIST(_values, ref)
#endif
static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst)
static void ir_sccp_add_identity(const ir_ctx *ctx, ir_sccp_val *_values, ir_ref src, ir_ref dst)
{
IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY);
IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)));
IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst);
_values[dst].optx = IR_COPY;
_values[dst].op1 = src;
_values[dst].copy = src;
if (_values[src].op == IR_BOTTOM) {
/* initialize empty double-linked list */
if (_values[src].op1 != src) {
_values[src].op1 = src;
_values[src].op2 = src;
_values[src].op3 = src;
if (_values[src].copy != src) {
_values[src].copy = src;
_values[src].next = src;
_values[src].prev = src;
}
} else {
src = ir_sccp_identity(ctx, _values, src);
}
/* insert into circular double-linked list */
ir_ref prev = _values[src].op3;
_values[dst].op2 = src;
_values[dst].op3 = prev;
_values[src].op3 = dst;
_values[prev].op2 = dst;
ir_ref prev = _values[src].prev;
_values[dst].next = src;
_values[dst].prev = prev;
_values[src].prev = dst;
_values[prev].next = dst;
CHECK_LIST(_values, dst);
}
static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
static void ir_sccp_split_partition(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
{
ir_ref member, head, tail, next, prev;
CHECK_LIST(_values, ref);
IR_MAKE_BOTTOM(ref);
_values[ref].op1 = ref;
_values[ref].copy = ref;
member = _values[ref].op2;
member = _values[ref].next;
head = tail = IR_UNUSED;
while (member != ref) {
if (_values[member].op != IR_BOTTOM) {
@@ -133,19 +172,19 @@ static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *
}
ir_sccp_add_uses(ctx, _values, worklist, member);
next = _values[member].op2;
next = _values[member].next;
if (ir_sccp_identity(ctx, _values, member) == ref) {
/* remove "member" from the old circular double-linked list */
prev = _values[member].op3;
_values[prev].op2 = next;
_values[next].op3 = prev;
prev = _values[member].prev;
_values[prev].next = next;
_values[next].prev = prev;
/* insert "member" into the new double-linked list */
if (!head) {
head = tail = member;
} else {
_values[tail].op2 = member;
_values[member].op3 = tail;
_values[tail].next = member;
_values[member].prev = tail;
tail = member;
}
}
@@ -153,26 +192,26 @@ static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *
}
/* remove "ref" from the old circular double-linked list */
next = _values[ref].op2;
prev = _values[ref].op3;
_values[prev].op2 = next;
_values[next].op3 = prev;
next = _values[ref].next;
prev = _values[ref].prev;
_values[prev].next = next;
_values[next].prev = prev;
CHECK_LIST(_values, next);
/* close the new circle */
if (head) {
_values[ref].op2 = head;
_values[ref].op3 = tail;
_values[tail].op2 = ref;
_values[head].op3 = ref;
_values[ref].next = head;
_values[ref].prev = tail;
_values[tail].next = ref;
_values[head].prev = ref;
} else {
_values[ref].op2 = ref;
_values[ref].op3 = ref;
_values[ref].next = ref;
_values[ref].prev = ref;
}
CHECK_LIST(_values, ref);
}
IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
{
if (_values[ref].op == IR_COPY) {
ir_sccp_split_partition(ctx, _values, worklist, ref);
@@ -187,7 +226,7 @@ IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_b
# define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref)
#endif
IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *val_insn)
IR_ALWAYS_INLINE bool ir_sccp_meet_const(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *val_insn)
{
IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op));
@@ -207,46 +246,51 @@ IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqu
return 1;
}
IR_ALWAYS_INLINE bool ir_sccp_meet(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
IR_ALWAYS_INLINE bool ir_sccp_meet_copy(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
{
ir_ref val_identity = ir_sccp_identity(ctx, _values, val);
ir_insn *val_insn;
if (IR_IS_CONST_REF(val_identity)) {
val_insn = &ctx->ir_base[val_identity];
#if IR_COMBO_COPY_PROPAGATION
if (_values[ref].op == IR_COPY) {
/* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
* (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
if (ir_sccp_identity(ctx, _values, ref) == ir_sccp_identity(ctx, _values, val)) {
return 0; /* not changed */
}
ir_sccp_split_partition(ctx, _values, worklist, ref);
return 1;
} else {
val_insn = &_values[val_identity];
IR_ASSERT(_values[ref].op != IR_BOTTOM);
/* TOP meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
/* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
ir_sccp_add_identity(ctx, _values, val, ref);
return 1;
}
#endif
IR_MAKE_BOTTOM(ref);
return 1;
}
#if 0
IR_ALWAYS_INLINE bool ir_sccp_meet(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
{
const ir_insn *val_insn;
if (IR_IS_CONST_REF(val)) {
val_insn = &ctx->ir_base[val];
} else {
val_insn = &_values[val].insn;
if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) {
#if IR_COMBO_COPY_PROPAGATION
if (_values[ref].op == IR_COPY) {
/* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
* (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
if (ir_sccp_identity(ctx, _values, ref) == val_identity) {
return 0; /* not changed */
}
ir_sccp_split_partition(ctx, _values, worklist, ref);
return 1;
} else {
IR_ASSERT(_values[ref].op != IR_BOTTOM);
/* TOP meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
/* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
ir_sccp_add_identity(ctx, _values, val, ref);
return 1;
}
#endif
IR_MAKE_BOTTOM(ref);
return 1;
return ir_sccp_meet_copy(ctx, _values, worklist, ref, val);
}
}
return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn);
}
#endif
static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *insn)
static ir_ref ir_sccp_fold(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *insn)
{
ir_insn *op1_insn, *op2_insn, *op3_insn;
const ir_insn *op1_insn, *op2_insn, *op3_insn;
ir_ref op1, op2, op3, copy;
uint32_t opt = insn->opt;
@@ -255,11 +299,11 @@ static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist,
op3 = ir_sccp_identity(ctx, _values, insn->op3);
restart:
op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2;
op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3;
op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? &_values[op1].insn : ctx->ir_base + op1;
op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? &_values[op2].insn : ctx->ir_base + op2;
op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? &_values[op3].insn : ctx->ir_base + op3;
switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
switch (ir_folding((ir_ctx*)ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
case IR_FOLD_DO_RESTART:
opt = ctx->fold_insn.optx;
op1 = ctx->fold_insn.op1;
@@ -272,19 +316,30 @@ restart:
return 1;
case IR_FOLD_DO_COPY:
copy = ctx->fold_insn.op1;
return ir_sccp_meet(ctx, _values, worklist, ref, copy);
if (IR_IS_CONST_REF(copy)) {
insn = &ctx->ir_base[copy];
} else {
insn = &_values[copy].insn;
if (!IR_IS_CONST_OP(insn->op) && !IR_IS_SYM_CONST(insn->op)) {
return ir_sccp_meet_copy(ctx, _values, worklist, ref, copy);
}
}
goto meet_const;
case IR_FOLD_DO_CONST:
return ir_sccp_meet_const(ctx, _values, worklist, ref, &ctx->fold_insn);
insn = &ctx->fold_insn;
meet_const:
return ir_sccp_meet_const(ctx, _values, worklist, ref, insn);
default:
IR_ASSERT(0);
return 0;
}
}
static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref i, ir_insn *insn)
static bool ir_sccp_analyze_phi(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref i, const ir_insn *insn)
{
ir_ref j, n, input, *merge_input, *p;
ir_insn *v, *new_const = NULL;
ir_ref j, n, input;
const ir_ref *merge_input, *p;
const ir_insn *v, *new_const = NULL;
#if IR_COMBO_COPY_PROPAGATION
ir_ref new_copy = IR_UNUSED;
ir_ref new_copy_identity = IR_UNUSED;
@@ -315,7 +370,7 @@ static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *work
} else if (input == i) {
continue;
} else {
v = &_values[input];
v = &_values[input].insn;
if (v->op == IR_TOP) {
ir_sccp_add_input(ctx, _values, worklist, input);
continue;
@@ -369,7 +424,7 @@ next:
} else if (input == i) {
continue;
} else {
v = &_values[input];
v = &_values[input].insn;
if (v->op == IR_TOP) {
ir_sccp_add_input(ctx, _values, worklist, input);
continue;
@@ -398,7 +453,9 @@ next:
#if IR_COMBO_COPY_PROPAGATION
if (new_copy) {
return ir_sccp_meet(ctx, _values, worklist, i, new_copy);
IR_ASSERT(!IR_IS_CONST_REF(new_copy));
IR_ASSERT(!IR_IS_CONST_OP(_values[new_copy].op) && !IR_IS_SYM_CONST(_values[new_copy].op));
return ir_sccp_meet_copy(ctx, _values, worklist, i, new_copy);
}
#endif
@@ -409,7 +466,7 @@ make_bottom:
return 1;
}
static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn)
static bool ir_is_dead_load_ex(const ir_ctx *ctx, ir_ref ref, uint32_t flags, const ir_insn *insn)
{
if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
return ctx->use_lists[ref].count == 1;
@@ -419,10 +476,10 @@ static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn
return 0;
}
static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
static bool ir_is_dead_load(const ir_ctx *ctx, ir_ref ref)
{
if (ctx->use_lists[ref].count == 1) {
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
uint32_t flags = ir_op_flags[insn->op];
if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
@@ -434,7 +491,7 @@ static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
return 0;
}
static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
static bool ir_is_dead(const ir_ctx *ctx, ir_ref ref)
{
if (ctx->use_lists[ref].count == 0) {
return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
@@ -444,28 +501,28 @@ static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
return 0;
}
static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a)
static bool ir_sccp_is_true(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
{
ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
const ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
return ir_const_is_true(v);
}
static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
static bool ir_sccp_is_equal(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b)
{
ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;
IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
return v1->val.u64 == v2->val.u64;
}
static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b, ir_ref c)
static bool ir_sccp_in_range(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b, ir_ref c)
{
ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c];
const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;
const ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c].insn;
IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
@@ -478,13 +535,13 @@ static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b,
}
#ifdef IR_SCCP_TRACE
static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
static void ir_sccp_trace_val(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
{
if (IR_IS_BOTTOM(i)) {
fprintf(stderr, "BOTTOM");
} else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
fprintf(stderr, "CONST(");
ir_print_const(ctx, &_values[i], stderr, true);
ir_print_const(ctx, &_values[i].insn, stderr, true);
fprintf(stderr, ")");
#if IR_COMBO_COPY_PROPAGATION
} else if (_values[i].op == IR_COPY) {
@@ -501,13 +558,13 @@ static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
}
}
static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i)
static void ir_sccp_trace_start(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
{
fprintf(stderr, "%d. ", i);
ir_sccp_trace_val(ctx, _values, i);
}
static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i)
static void ir_sccp_trace_end(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
{
fprintf(stderr, " -> ");
ir_sccp_trace_val(ctx, _values, i);
@@ -518,11 +575,12 @@ static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i)
# define ir_sccp_trace_end(c, v, i)
#endif
static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
static IR_NEVER_INLINE void ir_sccp_analyze(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
{
ir_ref i, j, n, *p, use;
ir_use_list *use_list;
ir_insn *insn, *use_insn;
ir_ref i, j, n, use;
const ir_ref *p;
const ir_use_list *use_list;
const ir_insn *insn, *use_insn;
uint32_t flags;
/* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
@@ -610,7 +668,7 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
}
}
for (p = insn->ops + 1; n > 0; p++, n--) {
ir_ref input = *p;
const ir_ref input = *p;
IR_ASSERT(input > 0);
if (!IR_IS_REACHABLE(input)) {
unfeasible_inputs++;
@@ -618,9 +676,9 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
}
if (unfeasible_inputs == 0) {
IR_MAKE_BOTTOM(i);
} else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) {
} else if (_values[i].op != IR_MERGE || _values[i].unfeasible_inputs != unfeasible_inputs) {
_values[i].optx = IR_MERGE;
_values[i].op1 = unfeasible_inputs;
_values[i].unfeasible_inputs = unfeasible_inputs;
} else {
continue;
}
@@ -674,10 +732,10 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
}
if (_values[i].op == IR_TOP) {
_values[i].optx = IR_IF;
_values[i].op1 = use;
_values[i].single_output = use;
ir_bitqueue_add(worklist, use);
continue;
} else if (_values[i].op == IR_IF && _values[i].op1 == use) {
} else if (_values[i].op == IR_IF && _values[i].single_output == use) {
continue;
}
}
@@ -715,10 +773,10 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
use_insn = &ctx->ir_base[use_case];
if (_values[i].op == IR_TOP) {
_values[i].optx = IR_IF;
_values[i].op1 = use_case;
_values[i].single_output = use_case;
ir_bitqueue_add(worklist, use_case);
continue;
} else if (_values[i].op == IR_IF || _values[i].op1 == use_case) {
} else if (_values[i].op == IR_IF || _values[i].single_output == use_case) {
continue;
}
}
@@ -768,18 +826,20 @@ static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bi
for (i = 1; i < ctx->insns_count; i++) {
if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
fprintf(stderr, "%d. CONST(", i);
ir_print_const(ctx, &_values[i], stderr, true);
ir_print_const(ctx, &_values[i].insn, stderr, true);
fprintf(stderr, ")\n");
#if IR_COMBO_COPY_PROPAGATION
} else if (_values[i].op == IR_COPY) {
fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1);
fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].copy);
#endif
} else if (IR_IS_TOP(i)) {
fprintf(stderr, "%d. TOP\n", i);
if (ctx->ir_base[i].op != IR_TOP) {
fprintf(stderr, "%d. TOP\n", i);
}
} else if (_values[i].op == IR_IF) {
fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1);
fprintf(stderr, "%d. IF(%d)\n", i, _values[i].single_output);
} else if (_values[i].op == IR_MERGE) {
fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1);
fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].unfeasible_inputs);
} else if (!IR_IS_BOTTOM(i)) {
fprintf(stderr, "%d. %d\n", i, _values[i].op);
}
@@ -806,7 +866,7 @@ static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
}
}
static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
static void ir_sccp_remove_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_bitqueue *worklist)
{
ir_ref j, n, *p;
ir_insn *insn;
@@ -829,7 +889,7 @@ static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bi
}
}
static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
static void ir_sccp_replace_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
{
ir_ref j, n, *p, use, i;
ir_insn *insn;
@@ -907,7 +967,7 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r
CLEAR_USES(ref);
}
static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
static void ir_sccp_remove_if(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref dst)
{
ir_ref next;
ir_insn *insn, *next_insn;
@@ -1054,10 +1114,10 @@ static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_i
return 1;
}
static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
{
ir_ref i, j;
ir_insn *value;
const ir_sccp_val *value;
for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
if (value->op == IR_BOTTOM) {
@@ -1072,7 +1132,7 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
#if IR_COMBO_COPY_PROPAGATION
} else if (value->op == IR_COPY) {
ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->op1), iter_worklist);
ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->copy), iter_worklist);
#endif
} else if (value->op == IR_TOP) {
/* remove unreachable instruction */
@@ -1104,7 +1164,7 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
}
} else if (value->op == IR_IF) {
/* remove one way IF/SWITCH */
ir_sccp_remove_if(ctx, _values, i, value->op1);
ir_sccp_remove_if(ctx, _values, i, value->single_output);
} else if (value->op == IR_MERGE) {
/* schedule merge to remove unfeasible MERGE inputs */
ir_bitqueue_add(worklist, i);
@@ -1121,6 +1181,16 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
/* Iterative Optimizations */
/***************************/
void ir_iter_add_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
{
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref *p, n = use_list->count;
for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
ir_bitqueue_add(worklist, *p);
}
}
/* Modification of some instruction may open new optimization oprtunities for other
* instructions that use this one.
*
@@ -1132,16 +1202,16 @@ static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_
*
* TODO: Think abput a more general solution ???
*/
static void ir_iter_add_related_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
static void ir_iter_add_related_uses(const ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
{
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
if (insn->op == IR_ADD || insn->op == IR_SUB) {
ir_use_list *use_list = &ctx->use_lists[ref];
const ir_use_list *use_list = &ctx->use_lists[ref];
if (use_list->count == 1) {
ir_ref use = ctx->use_edges[use_list->refs];
ir_insn *use_insn = &ctx->ir_base[ref];
const ir_insn *use_insn = &ctx->ir_base[ref];
if (use_insn->op == IR_ADD || use_insn->op == IR_SUB) {
ir_bitqueue_add(worklist, use);
@@ -1266,16 +1336,17 @@ void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir
}
}
static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
static ir_ref ir_iter_find_cse1(const ir_ctx *ctx, uint32_t optx, ir_ref op1)
{
IR_ASSERT(!IR_IS_CONST_REF(op1));
ir_use_list *use_list = &ctx->use_lists[op1];
ir_ref *p, n = use_list->count;
const ir_use_list *use_list = &ctx->use_lists[op1];
const ir_ref *p;
ir_ref n = use_list->count;
for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
ir_ref use = *p;
ir_insn *use_insn = &ctx->ir_base[use];
const ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->optx == optx) {
IR_ASSERT(use_insn->op1 == op1);
@@ -1285,12 +1356,13 @@ static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
return IR_UNUSED;
}
static ir_ref ir_iter_find_cse(ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
static ir_ref ir_iter_find_cse(const ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
{
uint32_t n = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
ir_use_list *use_list = NULL;
ir_ref *p, use;
ir_insn *use_insn;
const ir_use_list *use_list = NULL;
const ir_ref *p;
ir_ref use;
const ir_insn *use_insn;
if (n == 2) {
if (!IR_IS_CONST_REF(op1)) {
@@ -1373,7 +1445,8 @@ static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
{
uint32_t opt;
ir_ref op1, op2, op3, copy;
ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
const ir_insn *op1_insn, *op2_insn, *op3_insn;
ir_insn *insn;
insn = &ctx->ir_base[ref];
opt = insn->opt;
@@ -1408,9 +1481,6 @@ restart:
|| insn->op2 != ctx->fold_insn.op2
|| insn->op3 != ctx->fold_insn.op3) {
ir_use_list *use_list;
ir_ref n, j, *p, use;
insn->optx = ctx->fold_insn.opt;
IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
@@ -1442,12 +1512,7 @@ restart:
insn->op2 = ctx->fold_insn.op2;
insn->op3 = ctx->fold_insn.op3;
use_list = &ctx->use_lists[ref];
n = use_list->count;
for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
use = *p;
ir_bitqueue_add(worklist, use);
}
ir_iter_add_uses(ctx, ref, worklist);
}
break;
case IR_FOLD_DO_COPY:
@@ -1464,9 +1529,9 @@ restart:
}
}
static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
static bool ir_may_promote_d2f(const ir_ctx *ctx, ir_ref ref)
{
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
IR_ASSERT(insn->type == IR_DOUBLE);
if (IR_IS_CONST_REF(ref)) {
@@ -1497,9 +1562,9 @@ static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
return 0;
}
static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref)
static bool ir_may_promote_f2d(const ir_ctx *ctx, ir_ref ref)
{
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
IR_ASSERT(insn->type == IR_FLOAT);
if (IR_IS_CONST_REF(ref)) {
@@ -1668,10 +1733,11 @@ static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *w
return ref;
}
static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref)
static bool ir_may_promote_trunc(const ir_ctx *ctx, ir_type type, ir_ref ref)
{
ir_insn *insn = &ctx->ir_base[ref];
ir_ref *p, n, input;
const ir_insn *insn = &ctx->ir_base[ref];
const ir_ref *p;
ir_ref n, input;
if (IR_IS_CONST_REF(ref)) {
return !IR_IS_SYM_CONST(insn->op);
@@ -1777,6 +1843,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
}
}
insn->type = type;
ir_iter_add_uses(ctx, ref, worklist);
return ref;
}
@@ -1857,7 +1924,7 @@ static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use,
return ref;
}
static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type)
static ir_ref ir_ext_const(ir_ctx *ctx, const ir_insn *val_insn, ir_op op, ir_type type)
{
ir_val new_val;
@@ -1921,10 +1988,11 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op,
return ref;
}
static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop)
static uint32_t _ir_estimated_control(const ir_ctx *ctx, ir_ref val, ir_ref loop)
{
ir_insn *insn;
ir_ref n, *p, input, result, ctrl;
const ir_insn *insn;
const ir_ref *p;
ir_ref n, input, result, ctrl;
if (IR_IS_CONST_REF(val)) {
return 1; /* IR_START */
@@ -1955,18 +2023,18 @@ static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop)
return result;
}
static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop)
static bool ir_is_loop_invariant(const ir_ctx *ctx, ir_ref ref, ir_ref loop)
{
ref = _ir_estimated_control(ctx, ref, loop);
return ref < loop; // TODO: check dominance instead of order
}
static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
static bool ir_is_cheaper_ext(const ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
{
if (IR_IS_CONST_REF(ref)) {
return 1;
} else {
ir_insn *insn = &ctx->ir_base[ref];
const ir_insn *insn = &ctx->ir_base[ref];
if (insn->op == IR_LOAD) {
if (ir_is_loop_invariant(ctx, ref, loop)) {
@@ -1982,7 +2050,7 @@ static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_r
for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
use = *p;
if (use != ext_ref) {
ir_insn *use_insn = &ctx->ir_base[use];
const ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op != op
&& (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM))
@@ -2018,7 +2086,7 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
if (use == op_ref || use == ext_ref) {
continue;
} else {
ir_insn *use_insn = &ctx->ir_base[use];
const ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
if (use_insn->op1 == phi_ref) {
@@ -2057,7 +2125,7 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
if (use == phi_ref || use == ext_ref) {
continue;
} else {
ir_insn *use_insn = &ctx->ir_base[use];
const ir_insn *use_insn = &ctx->ir_base[use];
if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
if (use_insn->op1 == phi_ref) {
@@ -2194,7 +2262,7 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref
}
static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
{
{
ir_ref ref = insn->op1;
/* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
@@ -2445,7 +2513,7 @@ static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn,
}
}
static bool ir_is_zero(ir_ctx *ctx, ir_ref ref)
static bool ir_is_zero(const ir_ctx *ctx, ir_ref ref)
{
return IR_IS_CONST_REF(ref)
&& !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
@@ -2470,7 +2538,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
ir_ref root_ref = start1->op1;
ir_insn *root = &ctx->ir_base[root_ref];
if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2) && ctx->use_lists[root->op2].count == 1) {
if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2)) {
ir_ref cond_ref = root->op2;
ir_insn *cond = &ctx->ir_base[cond_ref];
ir_type type = insn->type;
@@ -2550,7 +2618,11 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
ir_use_list_remove_all(ctx, insn->op2, cond_ref);
}
MAKE_NOP(cond); CLEAR_USES(cond_ref);
if (ctx->use_lists[cond_ref].count == 1) {
MAKE_NOP(cond); CLEAR_USES(cond_ref);
} else {
ir_use_list_remove_one(ctx, cond_ref, root_ref);
}
MAKE_NOP(root); CLEAR_USES(root_ref);
MAKE_NOP(start1); CLEAR_USES(start1_ref);
MAKE_NOP(start2); CLEAR_USES(start2_ref);
@@ -2636,7 +2708,11 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
ir_use_list_remove_all(ctx, insn->op1, cond_ref);
}
MAKE_NOP(cond); CLEAR_USES(cond_ref);
if (ctx->use_lists[cond_ref].count == 1) {
MAKE_NOP(cond); CLEAR_USES(cond_ref);
} else {
ir_use_list_remove_one(ctx, cond_ref, root_ref);
}
MAKE_NOP(root); CLEAR_USES(root_ref);
MAKE_NOP(start1); CLEAR_USES(start1_ref);
MAKE_NOP(start2); CLEAR_USES(start2_ref);
@@ -2650,8 +2726,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
}
return 1;
#if 0
} else {
} else if (cond->op != IR_OVERFLOW && insn->op2 <= cond_ref && insn->op3 <= cond_ref) {
/* COND
*
* prev prev
@@ -2705,12 +2780,12 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
MAKE_NOP(end2); CLEAR_USES(end2_ref);
MAKE_NOP(merge); CLEAR_USES(merge_ref);
ir_bitqueue_add(worklist, ref);
if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
ir_bitqueue_add(worklist, next->op1);
}
return 1;
#endif
}
}
}
@@ -2719,7 +2794,7 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re
return 0;
}
static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2)
static bool ir_cmp_is_true(ir_op op, const ir_insn *op1, const ir_insn *op2)
{
IR_ASSERT(op1->type == op2->type);
if (IR_IS_TYPE_INT(op1->type)) {
@@ -3246,7 +3321,7 @@ static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge
}
}
static ir_ref ir_find_ext_use(ir_ctx *ctx, ir_ref ref)
static ir_ref ir_find_ext_use(const ir_ctx *ctx, ir_ref ref)
{
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref *p, n, use;
@@ -3628,6 +3703,7 @@ remove_aliased_load:
insn->op1 = val;
insn->op2 = IR_UNUSED;
ir_bitqueue_add(worklist, i);
ir_iter_add_uses(ctx, i, worklist);
}
}
} else if (insn->op == IR_STORE) {
@@ -3677,11 +3753,11 @@ remove_bitcast:
int ir_sccp(ir_ctx *ctx)
{
ir_bitqueue sccp_worklist, iter_worklist;
ir_insn *_values;
ir_sccp_val *_values;
ir_bitqueue_init(&iter_worklist, ctx->insns_count);
ir_bitqueue_init(&sccp_worklist, ctx->insns_count);
_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_sccp_val));
ctx->flags2 |= IR_OPT_IN_SCCP;
ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist);

View File

@@ -1167,6 +1167,7 @@ const ir_call_conv_dsc ir_call_conv_x86_fastcall = {
_(CMP_AND_BRANCH_FP) \
_(TEST_AND_BRANCH_INT) \
_(JCC_INT) \
_(COND_TEST_INT) \
_(COND_CMP_INT) \
_(COND_CMP_FP) \
_(GUARD_CMP_INT) \
@@ -1405,6 +1406,7 @@ op2_const:
}
IR_FALLTHROUGH;
case IR_COND_CMP_INT:
case IR_COND_TEST_INT:
insn = &ctx->ir_base[ref];
if (IR_IS_TYPE_INT(insn->type)) {
if (IR_IS_CONST_REF(insn->op3) || ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) {
@@ -2125,6 +2127,34 @@ static uint32_t ir_match_builtin_call(ir_ctx *ctx, const ir_insn *func)
return 0;
}
static bool all_usages_are_fusable(ir_ctx *ctx, ir_ref ref)
{
ir_insn *insn = &ctx->ir_base[ref];
if (insn->op >= IR_EQ && insn->op <= IR_UNORDERED) {
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref n = use_list->count;
if (n > 0) {
ir_ref *p = ctx->use_edges + use_list->refs;
do {
insn = &ctx->ir_base[*p];
if (insn->op != IR_IF
&& insn->op != IR_GUARD
&& insn->op != IR_GUARD_NOT
&& (insn->op != IR_COND || insn->op2 == ref || insn->op3 == ref)) {
return 0;
}
p++;
n--;
} while (n);
return 1;
}
}
return 0;
}
static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
{
ir_insn *op2_insn;
@@ -2877,7 +2907,7 @@ store_int:
return IR_RETURN_FP;
}
case IR_IF:
if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
@@ -2889,7 +2919,9 @@ store_int:
if (op1_insn->op == IR_AND && ctx->use_lists[op2_insn->op1].count == 1) {
/* v = AND(_, _); c = CMP(v, 0) ... IF(c) => SKIP_TEST; SKIP ... TEST_AND_BRANCH */
ir_match_fuse_load_test_int(ctx, op1_insn, ref);
if (ctx->use_lists[insn->op2].count == 1) {
ir_match_fuse_load_test_int(ctx, op1_insn, ref);
}
ctx->rules[op2_insn->op1] = IR_FUSED | IR_TEST_INT;
ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_NOP;
return IR_TEST_AND_BRANCH_INT;
@@ -2901,10 +2933,14 @@ store_int:
op2_insn->op == IR_LT || op2_insn->op == IR_GE)))) {
/* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
if (ctx->use_lists[insn->op2].count == 1) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
}
ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP;
} else {
ir_match_fuse_load(ctx, op1_insn->op2, ref);
if (ctx->use_lists[insn->op2].count == 1) {
ir_match_fuse_load(ctx, op1_insn->op2, ref);
}
ctx->rules[op2_insn->op1] = IR_BINOP_INT;
}
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
@@ -2912,12 +2948,16 @@ store_int:
}
}
/* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */
ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
if (ctx->use_lists[insn->op2].count == 1) {
ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
}
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
return IR_CMP_AND_BRANCH_INT;
} else {
/* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */
ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
if (ctx->use_lists[insn->op2].count == 1) {
ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
}
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
return IR_CMP_AND_BRANCH_FP;
}
@@ -3005,31 +3045,43 @@ store_int:
break;
}
case IR_COND:
if (!IR_IS_CONST_REF(insn->op1) && ctx->use_lists[insn->op1].count == 1) {
if (!IR_IS_CONST_REF(insn->op1) && (ctx->use_lists[insn->op1].count == 1 || all_usages_are_fusable(ctx, insn->op1))) {
ir_insn *op1_insn = &ctx->ir_base[insn->op1];
if (op1_insn->op >= IR_EQ && op1_insn->op <= IR_UNORDERED) {
if (IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op1].type)) {
ir_match_fuse_load_cmp_int(ctx, op1_insn, ref);
if (ctx->use_lists[insn->op1].count == 1) {
ir_match_fuse_load_cmp_int(ctx, op1_insn, ref);
}
ctx->rules[insn->op1] = IR_FUSED | IR_CMP_INT;
return IR_COND_CMP_INT;
} else {
ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref);
if (ctx->use_lists[insn->op1].count == 1) {
ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref);
}
ctx->rules[insn->op1] = IR_FUSED | IR_CMP_FP;
return IR_COND_CMP_FP;
}
} else if (op1_insn->op == IR_AND) {
/* c = AND(_, _) ... IF(c) => SKIP_TEST ... TEST_AND_BRANCH */
ir_match_fuse_load_test_int(ctx, op1_insn, ref);
ctx->rules[insn->op1] = IR_FUSED | IR_TEST_INT;
return IR_COND_TEST_INT;
}
}
if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
ir_match_fuse_load(ctx, insn->op1, ref);
}
return IR_COND;
case IR_GUARD:
case IR_GUARD_NOT:
if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
if (!IR_IS_CONST_REF(insn->op2) && (ctx->use_lists[insn->op2].count == 1 || all_usages_are_fusable(ctx, insn->op2))) {
op2_insn = &ctx->ir_base[insn->op2];
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED) {
// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
&& (insn->op2 == ref - 1 ||
(insn->op2 == ctx->prev_ref[ref] - 1
&& ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
//??? && (insn->op2 == ref - 1 ||
//??? (insn->op2 == ctx->prev_ref[ref] - 1
//??? && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
if (IR_IS_CONST_REF(op2_insn->op2)
&& !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op)
@@ -3043,10 +3095,14 @@ store_int:
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE ||
op2_insn->op == IR_LT || op2_insn->op == IR_GE))) {
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
if (ctx->use_lists[insn->op2].count == 1) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
}
ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP;
} else {
ir_match_fuse_load(ctx, op1_insn->op2, ref);
if (ctx->use_lists[insn->op2].count == 1) {
ir_match_fuse_load(ctx, op1_insn->op2, ref);
}
ctx->rules[op2_insn->op1] = IR_BINOP_INT;
}
/* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */
@@ -3054,6 +3110,7 @@ store_int:
return IR_GUARD_JCC_INT;
}
} else if ((ctx->flags & IR_OPT_CODEGEN)
&& ctx->use_lists[insn->op2].count == 1
&& op2_insn->op1 == insn->op2 - 2 /* before previous instruction */
&& ir_in_same_block(ctx, op2_insn->op1)
&& ctx->use_lists[op2_insn->op1].count == 2) {
@@ -3101,12 +3158,16 @@ store_int:
}
}
/* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */
ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
if (ctx->use_lists[insn->op2].count == 1) {
ir_match_fuse_load_cmp_int(ctx, op2_insn, ref);
}
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
return IR_GUARD_CMP_INT;
} else {
/* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */
ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
if (ctx->use_lists[insn->op2].count == 1) {
ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref);
}
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
return IR_GUARD_CMP_FP;
}
@@ -6051,8 +6112,15 @@ static void ir_emit_cmp_int_common2(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_ins
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
ir_reg op1_reg = ctx->regs[ref][1];
ir_reg op2_reg = ctx->regs[ref][2];
ir_reg op1_reg, op2_reg;
if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) {
op1_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1);
op2_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2);
} else {
op1_reg = ctx->regs[ref][1];
op2_reg = ctx->regs[ref][2];
}
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -6218,8 +6286,15 @@ static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_op
ir_type type = binop_insn->type;
ir_ref op1 = binop_insn->op1;
ir_ref op2 = binop_insn->op2;
ir_reg op1_reg = ctx->regs[ref][1];
ir_reg op2_reg = ctx->regs[ref][2];
ir_reg op1_reg, op2_reg;
if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) {
op1_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1);
op2_reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2);
} else {
op1_reg = ctx->regs[ref][1];
op2_reg = ctx->regs[ref][2];
}
IR_ASSERT(binop_insn->op == IR_AND);
if (op1_reg != IR_REG_NONE) {
@@ -6329,8 +6404,13 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_
op1 = cmp_insn->op1;
op2 = cmp_insn->op2;
op1_reg = ctx->regs[cmp_ref][1];
op2_reg = ctx->regs[cmp_ref][2];
if (UNEXPECTED(ctx->rules[cmp_ref] & IR_FUSED_REG)) {
op1_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 1);
op2_reg = ir_get_fused_reg(ctx, root, cmp_ref * sizeof(ir_ref) + 2);
} else {
op1_reg = ctx->regs[cmp_ref][1];
op2_reg = ctx->regs[cmp_ref][2];
}
if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) {
ir_reg tmp_reg;
@@ -6603,8 +6683,15 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
ir_reg op1_reg = ctx->regs[insn->op2][1];
ir_reg op2_reg = ctx->regs[insn->op2][2];
ir_reg op1_reg, op2_reg;
if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
} else {
op1_reg = ctx->regs[insn->op2][1];
op2_reg = ctx->regs[insn->op2][2];
}
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -6735,37 +6822,24 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
IR_ASSERT(def_reg != IR_REG_NONE);
if (op2 != op3) {
if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, type, op2_reg, op2);
if (op1 == op2) {
op1_reg = op2_reg;
}
}
if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
op3_reg = IR_REG_NUM(op3_reg);
ir_emit_load(ctx, type, op3_reg, op3);
if (op1 == op2) {
op1_reg = op3_reg;
}
}
} else if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, type, op2_reg, op2);
op3_reg = op2_reg;
if (op1 == op2) {
op1_reg = op2_reg;
}
} else if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
op3_reg = IR_REG_NUM(op3_reg);
ir_emit_load(ctx, type, op3_reg, op3);
op2_reg = op3_reg;
if (op1 == op3) {
op1_reg = op3_reg;
if (op3 == op2) {
op3_reg = op2_reg;
}
}
if (op1_reg != IR_REG_NONE && op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) {
if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
op3_reg = IR_REG_NUM(op3_reg);
ir_emit_load(ctx, type, op3_reg, op3);
if (op1 == op3) {
op1_reg = op2_reg;
}
}
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
ir_emit_load(ctx, op1_type, op1_reg, op1);
}
@@ -6774,7 +6848,13 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
if (op1_reg != IR_REG_NONE) {
| ASM_REG_REG_OP test, op1_type, op1_reg, op1_reg
} else {
ir_mem mem = ir_ref_spill_slot(ctx, op1);
ir_mem mem;
if (ir_rule(ctx, insn->op1) & IR_FUSED) {
mem = ir_fuse_load(ctx, def, insn->op1);
} else {
mem = ir_ref_spill_slot(ctx, insn->op1);
}
| ASM_MEM_IMM_OP cmp, op1_type, mem, 0
}
@@ -6864,6 +6944,115 @@ static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
static void ir_emit_cond_test_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
ir_type type = insn->type;
ir_ref op2 = insn->op2;
ir_ref op3 = insn->op3;
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
ir_reg op2_reg = ctx->regs[def][2];
ir_reg op3_reg = ctx->regs[def][3];
if (op2 != op3) {
if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, type, op2_reg, op2);
}
if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
op3_reg = IR_REG_NUM(op3_reg);
ir_emit_load(ctx, type, op3_reg, op3);
}
} else if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
op2_reg = IR_REG_NUM(op2_reg);
ir_emit_load(ctx, type, op2_reg, op2);
op3_reg = op2_reg;
} else if (op3_reg != IR_REG_NONE && IR_REG_SPILLED(op3_reg)) {
op3_reg = IR_REG_NUM(op3_reg);
ir_emit_load(ctx, type, op3_reg, op3);
op2_reg = op3_reg;
}
ir_emit_test_int_common(ctx, def, insn->op1, IR_NE);
if (IR_IS_TYPE_INT(type)) {
bool eq = 0;
if (op3_reg != IR_REG_NONE) {
if (op3_reg == def_reg) {
IR_ASSERT(op2_reg != IR_REG_NONE);
op3_reg = op2_reg;
eq = 1; // reverse
} else {
if (op2_reg != IR_REG_NONE) {
if (def_reg != op2_reg) {
// if (IR_IS_TYPE_INT(type)) {
ir_emit_mov(ctx, type, def_reg, op2_reg);
// } else {
// ir_emit_fp_mov(ctx, type, def_reg, op2_reg);
// }
}
} else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)) {
/* prevent "xor" and flags clobbering */
ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op2].val.i64);
} else {
ir_emit_load_ex(ctx, type, def_reg, op2, def);
}
}
} else {
IR_ASSERT(op2_reg != IR_REG_NONE && op2_reg != def_reg);
if (IR_IS_CONST_REF(op3) && !IR_IS_SYM_CONST(ctx->ir_base[op3].op)) {
/* prevent "xor" and flags clobbering */
ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op3].val.i64);
} else {
ir_emit_load_ex(ctx, type, def_reg, op3, def);
}
op3_reg = op2_reg;
eq = 1; // reverse
}
if (eq) {
| ASM_REG_REG_OP2 cmovne, type, def_reg, op3_reg
} else {
| ASM_REG_REG_OP2 cmove, type, def_reg, op3_reg
}
} else {
| jne >2
|1:
if (op2_reg != IR_REG_NONE) {
if (def_reg != op2_reg) {
if (IR_IS_TYPE_INT(type)) {
ir_emit_mov(ctx, type, def_reg, op2_reg);
} else {
ir_emit_fp_mov(ctx, type, def_reg, op2_reg);
}
}
} else {
ir_emit_load_ex(ctx, type, def_reg, op2, def);
}
| jmp >3
|2:
if (op3_reg != IR_REG_NONE) {
if (def_reg != op3_reg) {
if (IR_IS_TYPE_INT(type)) {
ir_emit_mov(ctx, type, def_reg, op3_reg);
} else {
ir_emit_fp_mov(ctx, type, def_reg, op3_reg);
}
}
} else {
ir_emit_load_ex(ctx, type, def_reg, op3, def);
}
|3:
}
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, type, def, def_reg);
}
}
static void ir_emit_cond_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
{
ir_backend_data *data = ctx->data;
@@ -10454,9 +10643,16 @@ static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
ir_type type = ctx->ir_base[cmp_insn->op1].type;
ir_ref op1 = cmp_insn->op1;
ir_ref op2 = cmp_insn->op2;
ir_reg op1_reg = ctx->regs[insn->op2][1];
ir_reg op2_reg = ctx->regs[insn->op2][2];
void *addr;
ir_reg op1_reg, op2_reg;
if (UNEXPECTED(ctx->rules[insn->op2] & IR_FUSED_REG)) {
op1_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 1);
op2_reg = ir_get_fused_reg(ctx, def, insn->op2 * sizeof(ir_ref) + 2);
} else {
op1_reg = ctx->regs[insn->op2][1];
op2_reg = ctx->regs[insn->op2][2];
}
if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
op1_reg = IR_REG_NUM(op1_reg);
@@ -11714,6 +11910,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
case IR_COND:
ir_emit_cond(ctx, i, insn);
break;
case IR_COND_TEST_INT:
ir_emit_cond_test_int(ctx, i, insn);
break;
case IR_COND_CMP_INT:
ir_emit_cond_cmp_int(ctx, i, insn);
break;
@@ -12180,7 +12379,7 @@ const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_pe
return entry;
}
bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr)
bool ir_needs_thunk(const ir_code_buffer *code_buffer, void *addr)
{
return sizeof(void*) == 8 && !IR_MAY_USE_32BIT_ADDR(code_buffer, addr);
}