1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Update IR

IR commit: 1d3df9f7dd82fe49001e714a4c31962387b526f6
This commit is contained in:
Dmitry Stogov
2024-03-13 21:22:50 +03:00
parent 8793f9938b
commit 3bf4bcc775
7 changed files with 120 additions and 46 deletions

View File

@@ -15,6 +15,10 @@
#define MAX_RULES 2048
#define MAX_SLOTS (MAX_RULES * 4)
#define USE_SEMI_PERFECT_HASH 1
#define USE_SHL_HASH 1
#define USE_ROL_HASH 0
static ir_strtab strtab;
void print_hash(uint32_t *mask, uint32_t count)
@@ -28,12 +32,14 @@ void print_hash(uint32_t *mask, uint32_t count)
printf("};\n\n");
}
#if 0
#if USE_SHL_HASH
static uint32_t hash_shl2(uint32_t mask, uint32_t r1, uint32_t r2)
{
return ((mask << r1) - mask) << r2;
}
#else
#endif
#if USE_ROL_HASH
#define ir_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
#define ir_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
@@ -50,29 +56,64 @@ int find_hash(uint32_t *mask, uint32_t count)
uint32_t n, r1, r2, i, h;
for (n = (count | 1); n < MAX_SLOTS; n += 2) {
#if USE_SEMI_PERFECT_HASH
int semi_perfect = 0;
#endif
for (r1 = 0; r1 < 31; r1++) {
for (r2 = 0; r2 < 32; r2++) {
#if 0
#if USE_SHL_HASH
memset(hash, 0, n * sizeof(uint32_t));
for (i = 0; i < count; i++) {
h = hash_shl2(mask[i] & 0x1fffff, r1, r2) % n;
if (hash[h]) break; /* collision */
if (hash[h]) {
#if USE_SEMI_PERFECT_HASH
h++;
if (!hash[h]) {
hash[h] = mask[i];
semi_perfect = 1;
continue;
}
#endif
break; /* collision */
}
hash[h] = mask[i];
}
if (i == count) {
print_hash(hash, n);
#if USE_SEMI_PERFECT_HASH
if (semi_perfect) {
printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
}
#endif
printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn (((h << %d) - h) << %d) %% %d;\n}\n", r1, r2, n);
return 1;
}
#else
#endif
#if USE_ROL_HASH
memset(hash, 0, n * sizeof(uint32_t));
for (i = 0; i < count; i++) {
h = hash_rol2(mask[i] & 0x1fffff, r1, r2) % n;
if (hash[h]) break; /* collision */
if (hash[h]) {
#if USE_SEMI_PERFECT_HASH
h++;
if (!hash[h]) {
hash[h] = mask[i];
semi_perfect = 1;
continue;
}
#endif
break; /* collision */
}
hash[h] = mask[i];
}
if (i == count) {
print_hash(hash, n);
#if USE_SEMI_PERFECT_HASH
if (semi_perfect) {
printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
}
#endif
printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\nreturn ir_rol32((ir_rol32(h, %d) - h), %d) %% %d;\n}\n", r1, r2, n);
return 1;
}

View File

@@ -926,7 +926,11 @@ restart:
uint32_t k = key & any;
uint32_t h = _ir_fold_hashkey(k);
uint32_t fh = _ir_fold_hash[h];
if (IR_FOLD_KEY(fh) == k /*|| (fh = _ir_fold_hash[h+1], (fh & 0x1fffff) == k)*/) {
if (IR_FOLD_KEY(fh) == k
#ifdef IR_FOLD_SEMI_PERFECT_HASH
|| (fh = _ir_fold_hash[h+1], (fh & 0x1fffff) == k)
#endif
) {
switch (IR_FOLD_RULE(fh)) {
#include "ir_fold.h"
default:
@@ -1287,6 +1291,7 @@ void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref)
*p = IR_UNUSED;
break;
}
p++;
j++;
}
}

View File

@@ -4309,7 +4309,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
}
/* Generate a table jmp or a sequence of calls */
if ((max.i64-min.i64) < count * 8) {
if (count > 2 && (max.i64-min.i64) < count * 8) {
int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1));
for (i = 0; i <= (max.i64 - min.i64); i++) {

View File

@@ -2027,13 +2027,10 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
ir_chain *chains;
ir_bitqueue worklist;
ir_bitset visited;
uint32_t *empty, count;
#ifdef IR_DEBUG
uint32_t empty_count = 0;
#endif
uint32_t *schedule_end, count;
ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
empty = ctx->cfg_schedule + ctx->cfg_blocks_count;
schedule_end = ctx->cfg_schedule + ctx->cfg_blocks_count;
/* 1. Create initial chains for each BB */
chains = ir_mem_malloc(sizeof(ir_chain) * (ctx->cfg_blocks_count + 1));
@@ -2083,11 +2080,8 @@ restart:
/* move empty blocks to the end */
IR_ASSERT(chains[b].head == b);
chains[b].head = 0;
#ifdef IR_DEBUG
empty_count++;
#endif
*empty = b;
empty--;
*schedule_end = b;
schedule_end--;
if (successor > b) {
bb_freq[successor] += bb_freq[b];
@@ -2168,14 +2162,22 @@ restart:
} else {
prob1 = prob2 = 50;
}
IR_ASSERT(edges_count < max_edges_count);
freq = bb_freq[b] * (float)prob1 / (float)probN;
if (successor1 > b) {
IR_ASSERT(!ir_bitset_in(visited, successor1));
bb_freq[successor1] += freq;
ir_bitqueue_add(&worklist, successor1);
}
do {
freq = bb_freq[b] * (float)prob1 / (float)probN;
if (successor1 > b) {
IR_ASSERT(!ir_bitset_in(visited, successor1));
bb_freq[successor1] += freq;
if (successor1_bb->successors_count == 0 && insn1->op2 == 1) {
/* move cold block without successors to the end */
IR_ASSERT(chains[successor1].head == successor1);
chains[successor1].head = 0;
*schedule_end = successor1;
schedule_end--;
break;
} else {
ir_bitqueue_add(&worklist, successor1);
}
}
/* try to join edges early to reduce number of edges and the cost of their sorting */
if (prob1 > prob2
&& (successor1_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
@@ -2187,19 +2189,28 @@ restart:
if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
}
successor1 = _ir_skip_empty_blocks(ctx, successor1);
IR_ASSERT(edges_count < max_edges_count);
edges[edges_count].from = b;
edges[edges_count].to = successor1;
edges[edges_count].freq = freq;
edges_count++;
} while (0);
IR_ASSERT(edges_count < max_edges_count);
freq = bb_freq[b] * (float)prob2 / (float)probN;
if (successor2 > b) {
IR_ASSERT(!ir_bitset_in(visited, successor2));
bb_freq[successor2] += freq;
ir_bitqueue_add(&worklist, successor2);
}
do {
freq = bb_freq[b] * (float)prob2 / (float)probN;
if (successor2 > b) {
IR_ASSERT(!ir_bitset_in(visited, successor2));
bb_freq[successor2] += freq;
if (successor2_bb->successors_count == 0 && insn2->op2 == 1) {
/* move cold block without successors to the end */
IR_ASSERT(chains[successor2].head == successor2);
chains[successor2].head = 0;
*schedule_end = successor2;
schedule_end--;
break;
} else {
ir_bitqueue_add(&worklist, successor2);
}
}
if (prob2 > prob1
&& (successor2_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
uint32_t src = chains[b].next;
@@ -2210,6 +2221,7 @@ restart:
if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
}
successor2 = _ir_skip_empty_blocks(ctx, successor2);
IR_ASSERT(edges_count < max_edges_count);
edges[edges_count].from = b;
edges[edges_count].to = successor2;
edges[edges_count].freq = freq;
@@ -2242,7 +2254,6 @@ restart:
} else {
prob = 100 / bb->successors_count;
}
IR_ASSERT(edges_count < max_edges_count);
freq = bb_freq[b] * (float)prob / 100.0f;
if (successor > b) {
IR_ASSERT(!ir_bitset_in(visited, successor));
@@ -2250,6 +2261,7 @@ restart:
ir_bitqueue_add(&worklist, successor);
}
successor = _ir_skip_empty_blocks(ctx, successor);
IR_ASSERT(edges_count < max_edges_count);
edges[edges_count].from = b;
edges[edges_count].to = successor;
edges[edges_count].freq = freq;
@@ -2383,7 +2395,7 @@ restart:
}
}
IR_ASSERT(count + empty_count == ctx->cfg_blocks_count);
IR_ASSERT(ctx->cfg_schedule + count == schedule_end);
ctx->cfg_schedule[ctx->cfg_blocks_count + 1] = 0;
ir_mem_free(edges);
@@ -2401,17 +2413,14 @@ static int ir_schedule_blocks_top_down(ir_ctx *ctx)
uint32_t b, best_successor, last_non_empty;
ir_block *bb, *best_successor_bb;
ir_insn *insn;
uint32_t *list, *empty;
uint32_t *list, *schedule_end;
uint32_t count = 0;
#ifdef IR_DEBUG
uint32_t empty_count = 0;
#endif
ir_bitqueue_init(&blocks, ctx->cfg_blocks_count + 1);
blocks.pos = 0;
list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
list[ctx->cfg_blocks_count + 1] = 0;
empty = list + ctx->cfg_blocks_count;
schedule_end = list + ctx->cfg_blocks_count;
for (b = 1; b <= ctx->cfg_blocks_count; b++) {
ir_bitset_incl(blocks.set, b);
}
@@ -2431,11 +2440,8 @@ static int ir_schedule_blocks_top_down(ir_ctx *ctx)
}
if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
/* move empty blocks to the end */
#ifdef IR_DEBUG
empty_count++;
#endif
*empty = b;
empty--;
*schedule_end = b;
schedule_end--;
} else {
count++;
list[count] = b;
@@ -2520,7 +2526,7 @@ static int ir_schedule_blocks_top_down(ir_ctx *ctx)
} while (1);
}
IR_ASSERT(count + empty_count == ctx->cfg_blocks_count);
IR_ASSERT(list + count == schedule_end);
ctx->cfg_schedule = list;
ir_bitqueue_free(&blocks);

View File

@@ -2508,6 +2508,7 @@ IR_FOLD(MUL(MUL, C_I8))
IR_FOLD(MUL(MUL, C_I16))
IR_FOLD(MUL(MUL, C_I32))
IR_FOLD(MUL(MUL, C_I64))
IR_FOLD(MUL(MUL, C_ADDR))
{
if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
/* (x * c1) * c2 => x * (c1 * c2) */
@@ -2527,6 +2528,7 @@ IR_FOLD(AND(AND, C_I8))
IR_FOLD(AND(AND, C_I16))
IR_FOLD(AND(AND, C_I32))
IR_FOLD(AND(AND, C_I64))
IR_FOLD(AND(AND, C_ADDR))
{
if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
/* (x & c1) & c2 => x & (c1 & c2) */
@@ -2546,6 +2548,7 @@ IR_FOLD(OR(OR, C_I8))
IR_FOLD(OR(OR, C_I16))
IR_FOLD(OR(OR, C_I32))
IR_FOLD(OR(OR, C_I64))
IR_FOLD(OR(OR, C_ADDR))
{
if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
/* (x | c1) | c2 => x | (c1 | c2) */
@@ -2565,6 +2568,7 @@ IR_FOLD(XOR(XOR, C_I8))
IR_FOLD(XOR(XOR, C_I16))
IR_FOLD(XOR(XOR, C_I32))
IR_FOLD(XOR(XOR, C_I64))
IR_FOLD(XOR(XOR, C_ADDR))
{
if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
/* (x ^ c1) ^ c2 => x ^ (c1 ^ c2) */

View File

@@ -1554,6 +1554,10 @@ static bool ir_vregs_inside(ir_ctx *ctx, uint32_t parent, uint32_t child)
ir_live_interval *child_ival = ctx->live_intervals[child];
ir_live_interval *parent_ival = ctx->live_intervals[parent];
if ((child_ival->flags | parent_ival->flags) & IR_LIVE_INTERVAL_COALESCED) {
// TODO: Support valid cases with already coalesced "parent_ival
return 0;
}
#if 0
if (child_ival->end >= parent_ival->end) {
return 0;
@@ -1629,6 +1633,13 @@ static void ir_vregs_coalesce(ir_ctx *ctx, uint32_t v1, uint32_t v2, ir_ref from
uint16_t f1 = ctx->live_intervals[v1]->flags;
uint16_t f2 = ctx->live_intervals[v2]->flags;
#if 0
if (ctx->binding) {
ir_ref b1 = ir_binding_find(ctx, from);
ir_ref b2 = ir_binding_find(ctx, to);
IR_ASSERT(b1 == b2);
}
#endif
if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) {
ir_vregs_join(ctx, v1, v2);
ctx->vregs[to] = v1;
@@ -1971,6 +1982,13 @@ int ir_coalesce(ir_ctx *ctx)
&& ctx->vregs[insn->op1]
&& ctx->vregs[i] != ctx->vregs[insn->op1]) {
if (ir_vregs_inside(ctx, ctx->vregs[insn->op1], ctx->vregs[i])) {
if (ctx->binding) {
ir_ref b1 = ir_binding_find(ctx, i);
ir_ref b2 = ir_binding_find(ctx, insn->op1);
if (b1 != b2) {
continue;
}
}
ir_vregs_coalesce(ctx, ctx->vregs[i], ctx->vregs[insn->op1], i, insn->op1);
compact = 1;
}

View File

@@ -7746,7 +7746,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
}
/* Generate a table jmp or a seqence of calls */
if ((max.i64-min.i64) < count * 8) {
if (count > 2 && (max.i64-min.i64) < count * 8) {
int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1));
for (i = 0; i <= (max.i64 - min.i64); i++) {