diff --git a/ext/opcache/jit/ir/gen_ir_fold_hash.c b/ext/opcache/jit/ir/gen_ir_fold_hash.c
index 52205297438..efd656f5b7a 100644
--- a/ext/opcache/jit/ir/gen_ir_fold_hash.c
+++ b/ext/opcache/jit/ir/gen_ir_fold_hash.c
@@ -15,6 +15,10 @@
 #define MAX_RULES 2048
 #define MAX_SLOTS (MAX_RULES * 4)
 
+#define USE_SEMI_PERFECT_HASH 1
+#define USE_SHL_HASH 1
+#define USE_ROL_HASH 0
+
 static ir_strtab strtab;
 
 void print_hash(uint32_t *mask, uint32_t count)
@@ -28,12 +32,14 @@ void print_hash(uint32_t *mask, uint32_t count)
 	printf("};\n\n");
 }
 
-#if 0
+#if USE_SHL_HASH
 static uint32_t hash_shl2(uint32_t mask, uint32_t r1, uint32_t r2)
 {
 	return ((mask << r1) - mask) << r2;
 }
-#else
+#endif
+
+#if USE_ROL_HASH
 #define ir_rol(x, n)	(((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
 #define ir_ror(x, n)	(((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
 
@@ -50,29 +56,64 @@ int find_hash(uint32_t *mask, uint32_t count)
 	uint32_t n, r1, r2, i, h;
 
 	for (n = (count | 1); n < MAX_SLOTS; n += 2) {
+#if USE_SEMI_PERFECT_HASH
+		int semi_perfect = 0;
+#endif
+
 		for (r1 = 0; r1 < 31; r1++) {
 			for (r2 = 0; r2 < 32; r2++) {
-#if 0
+#if USE_SHL_HASH
 				memset(hash, 0, n * sizeof(uint32_t));
 				for (i = 0; i < count; i++) {
 					h = hash_shl2(mask[i] & 0x1fffff, r1, r2) % n;
-					if (hash[h]) break; /* collision */
+					if (hash[h]) {
+#if USE_SEMI_PERFECT_HASH
+						h++;
+						if (!hash[h]) {
+							hash[h] = mask[i];
+							semi_perfect = 1;
+							continue;
+						}
+#endif
+						break; /* collision */
+					}
 					hash[h] = mask[i];
 				}
 				if (i == count) {
 					print_hash(hash, n);
+#if USE_SEMI_PERFECT_HASH
+					if (semi_perfect) {
+						printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
+					}
+#endif
 					printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn (((h << %d) - h) << %d) %% %d;\n}\n", r1, r2, n);
 					return 1;
 				}
-#else
+#endif
+#if USE_ROL_HASH
 				memset(hash, 0, n * sizeof(uint32_t));
 				for (i = 0; i < count; i++) {
 					h = hash_rol2(mask[i] & 0x1fffff, r1, r2) % n;
-					if (hash[h]) break; /* collision */
+					if (hash[h]) {
+#if USE_SEMI_PERFECT_HASH
+						h++;
+						if (!hash[h]) {
+							hash[h] = mask[i];
+							semi_perfect = 1;
+							continue;
+						}
+#endif
+						break; /* collision */
+					}
 					hash[h] = mask[i];
 				}
 				if (i == count) {
 					print_hash(hash, n);
+#if USE_SEMI_PERFECT_HASH
+					if (semi_perfect) {
+						printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
+					}
+#endif
 					printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\nreturn ir_rol32((ir_rol32(h, %d) - h), %d) %% %d;\n}\n", r1, r2, n);
 					return 1;
 				}
diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c
index 2db9b569806..d4e3314f1b1 100644
--- a/ext/opcache/jit/ir/ir.c
+++ b/ext/opcache/jit/ir/ir.c
@@ -926,7 +926,11 @@ restart:
 		uint32_t k = key & any;
 		uint32_t h = _ir_fold_hashkey(k);
 		uint32_t fh = _ir_fold_hash[h];
-		if (IR_FOLD_KEY(fh) == k /*|| (fh = _ir_fold_hash[h+1], (fh & 0x1fffff) == k)*/) {
+		if (IR_FOLD_KEY(fh) == k
+#ifdef IR_FOLD_SEMI_PERFECT_HASH
+		 || (fh = _ir_fold_hash[h+1], (fh & 0x1fffff) == k)
+#endif
+		) {
 			switch (IR_FOLD_RULE(fh)) {
 #include "ir_fold.h"
 				default:
@@ -1287,6 +1291,7 @@ void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref)
 			*p = IR_UNUSED;
 			break;
 		}
+		p++;
 		j++;
 	}
 }
diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc
index 891fc1c4e03..08634a91015 100644
--- a/ext/opcache/jit/ir/ir_aarch64.dasc
+++ b/ext/opcache/jit/ir/ir_aarch64.dasc
@@ -4309,7 +4309,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
 	}
 
 	/* Generate a table jmp or a sequence of calls */
-	if ((max.i64-min.i64) < count * 8) {
+	if (count > 2 && (max.i64-min.i64) < count * 8) {
 		int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1));
 
 		for (i = 0; i <= (max.i64 - min.i64); i++) {
diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c
index 20600c97abe..824cdb61c93 100644
--- a/ext/opcache/jit/ir/ir_cfg.c
+++ b/ext/opcache/jit/ir/ir_cfg.c
@@ -2027,13 +2027,10 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
 	ir_chain *chains;
 	ir_bitqueue worklist;
 	ir_bitset visited;
-	uint32_t *empty, count;
-#ifdef IR_DEBUG
-	uint32_t empty_count = 0;
-#endif
+	uint32_t *schedule_end, count;
 
 	ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
-	empty = ctx->cfg_schedule + ctx->cfg_blocks_count;
+	schedule_end = ctx->cfg_schedule + ctx->cfg_blocks_count;
 
 	/* 1. Create initial chains for each BB */
 	chains = ir_mem_malloc(sizeof(ir_chain) * (ctx->cfg_blocks_count + 1));
@@ -2083,11 +2080,8 @@ restart:
 			/* move empty blocks to the end */
 			IR_ASSERT(chains[b].head == b);
 			chains[b].head = 0;
-#ifdef IR_DEBUG
-			empty_count++;
-#endif
-			*empty = b;
-			empty--;
+			*schedule_end = b;
+			schedule_end--;
 
 			if (successor > b) {
 				bb_freq[successor] += bb_freq[b];
@@ -2168,14 +2162,22 @@ restart:
 				} else {
 					prob1 = prob2 = 50;
 				}
-				IR_ASSERT(edges_count < max_edges_count);
-				freq = bb_freq[b] * (float)prob1 / (float)probN;
-				if (successor1 > b) {
-					IR_ASSERT(!ir_bitset_in(visited, successor1));
-					bb_freq[successor1] += freq;
-					ir_bitqueue_add(&worklist, successor1);
-				}
 				do {
+					freq = bb_freq[b] * (float)prob1 / (float)probN;
+					if (successor1 > b) {
+						IR_ASSERT(!ir_bitset_in(visited, successor1));
+						bb_freq[successor1] += freq;
+						if (successor1_bb->successors_count == 0 && insn1->op2 == 1) {
+							/* move cold block without successors to the end */
+							IR_ASSERT(chains[successor1].head == successor1);
+							chains[successor1].head = 0;
+							*schedule_end = successor1;
+							schedule_end--;
+							break;
+						} else {
+							ir_bitqueue_add(&worklist, successor1);
+						}
+					}
 					/* try to join edges early to reduce number of edges and the cost of their sorting */
 					if (prob1 > prob2
 					 && (successor1_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
@@ -2187,19 +2189,28 @@ restart:
 						if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
 					}
 					successor1 = _ir_skip_empty_blocks(ctx, successor1);
+					IR_ASSERT(edges_count < max_edges_count);
 					edges[edges_count].from = b;
 					edges[edges_count].to = successor1;
 					edges[edges_count].freq = freq;
 					edges_count++;
 				} while (0);
-				IR_ASSERT(edges_count < max_edges_count);
-				freq = bb_freq[b] * (float)prob2 / (float)probN;
-				if (successor2 > b) {
-					IR_ASSERT(!ir_bitset_in(visited, successor2));
-					bb_freq[successor2] += freq;
-					ir_bitqueue_add(&worklist, successor2);
-				}
 				do {
+					freq = bb_freq[b] * (float)prob2 / (float)probN;
+					if (successor2 > b) {
+						IR_ASSERT(!ir_bitset_in(visited, successor2));
+						bb_freq[successor2] += freq;
+						if (successor2_bb->successors_count == 0 && insn2->op2 == 1) {
+							/* move cold block without successors to the end */
+							IR_ASSERT(chains[successor2].head == successor2);
+							chains[successor2].head = 0;
+							*schedule_end = successor2;
+							schedule_end--;
+							break;
+						} else {
+							ir_bitqueue_add(&worklist, successor2);
+						}
+					}
 					if (prob2 > prob1
 					 && (successor2_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
 						uint32_t src = chains[b].next;
@@ -2210,6 +2221,7 @@ restart:
 						if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
 					}
 					successor2 = _ir_skip_empty_blocks(ctx, successor2);
+					IR_ASSERT(edges_count < max_edges_count);
 					edges[edges_count].from = b;
 					edges[edges_count].to = successor2;
 					edges[edges_count].freq = freq;
@@ -2242,7 +2254,6 @@ restart:
 					} else {
 						prob = 100 / bb->successors_count;
 					}
-					IR_ASSERT(edges_count < max_edges_count);
 					freq = bb_freq[b] * (float)prob / 100.0f;
 					if (successor > b) {
 						IR_ASSERT(!ir_bitset_in(visited, successor));
@@ -2250,6 +2261,7 @@ restart:
 						ir_bitqueue_add(&worklist, successor);
 					}
 					successor = _ir_skip_empty_blocks(ctx, successor);
+					IR_ASSERT(edges_count < max_edges_count);
 					edges[edges_count].from = b;
 					edges[edges_count].to = successor;
 					edges[edges_count].freq = freq;
@@ -2383,7 +2395,7 @@ restart:
 		}
 	}
 
-	IR_ASSERT(count + empty_count == ctx->cfg_blocks_count);
+	IR_ASSERT(ctx->cfg_schedule + count == schedule_end);
 	ctx->cfg_schedule[ctx->cfg_blocks_count + 1] = 0;
 
 	ir_mem_free(edges);
@@ -2401,17 +2413,14 @@ static int ir_schedule_blocks_top_down(ir_ctx *ctx)
 	uint32_t b, best_successor, last_non_empty;
 	ir_block *bb, *best_successor_bb;
 	ir_insn *insn;
-	uint32_t *list, *empty;
+	uint32_t *list, *schedule_end;
 	uint32_t count = 0;
-#ifdef IR_DEBUG
-	uint32_t empty_count = 0;
-#endif
 
 	ir_bitqueue_init(&blocks, ctx->cfg_blocks_count + 1);
 	blocks.pos = 0;
 	list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
 	list[ctx->cfg_blocks_count + 1] = 0;
-	empty = list + ctx->cfg_blocks_count;
+	schedule_end = list + ctx->cfg_blocks_count;
 	for (b = 1; b <= ctx->cfg_blocks_count; b++) {
 		ir_bitset_incl(blocks.set, b);
 	}
@@ -2431,11 +2440,8 @@ static int ir_schedule_blocks_top_down(ir_ctx *ctx)
 			}
 			if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
 				/* move empty blocks to the end */
-#ifdef IR_DEBUG
-				empty_count++;
-#endif
-				*empty = b;
-				empty--;
+				*schedule_end = b;
+				schedule_end--;
 			} else {
 				count++;
 				list[count] = b;
@@ -2520,7 +2526,7 @@ static int ir_schedule_blocks_top_down(ir_ctx *ctx)
 		} while (1);
 	}
 
-	IR_ASSERT(count + empty_count == ctx->cfg_blocks_count);
+	IR_ASSERT(list + count == schedule_end);
 	ctx->cfg_schedule = list;
 	ir_bitqueue_free(&blocks);
 
diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h
index 8a50641e5ea..9ff0f6d9db1 100644
--- a/ext/opcache/jit/ir/ir_fold.h
+++ b/ext/opcache/jit/ir/ir_fold.h
@@ -2508,6 +2508,7 @@ IR_FOLD(MUL(MUL, C_I8))
 IR_FOLD(MUL(MUL, C_I16))
 IR_FOLD(MUL(MUL, C_I32))
 IR_FOLD(MUL(MUL, C_I64))
+IR_FOLD(MUL(MUL, C_ADDR))
 {
 	if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
 		/* (x * c1) * c2  => x * (c1 * c2) */
@@ -2527,6 +2528,7 @@ IR_FOLD(AND(AND, C_I8))
 IR_FOLD(AND(AND, C_I16))
 IR_FOLD(AND(AND, C_I32))
 IR_FOLD(AND(AND, C_I64))
+IR_FOLD(AND(AND, C_ADDR))
 {
 	if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
 		/* (x & c1) & c2  => x & (c1 & c2) */
@@ -2546,6 +2548,7 @@ IR_FOLD(OR(OR, C_I8))
 IR_FOLD(OR(OR, C_I16))
 IR_FOLD(OR(OR, C_I32))
 IR_FOLD(OR(OR, C_I64))
+IR_FOLD(OR(OR, C_ADDR))
 {
 	if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
 		/* (x | c1) | c2  => x | (c1 | c2) */
@@ -2565,6 +2568,7 @@ IR_FOLD(XOR(XOR, C_I8))
 IR_FOLD(XOR(XOR, C_I16))
 IR_FOLD(XOR(XOR, C_I32))
 IR_FOLD(XOR(XOR, C_I64))
+IR_FOLD(XOR(XOR, C_ADDR))
 {
 	if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
 		/* (x ^ c1) ^ c2  => x ^ (c1 ^ c2) */
diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c
index 4860dae0ca0..44d00e27cb3 100644
--- a/ext/opcache/jit/ir/ir_ra.c
+++ b/ext/opcache/jit/ir/ir_ra.c
@@ -1554,6 +1554,10 @@ static bool ir_vregs_inside(ir_ctx *ctx, uint32_t parent, uint32_t child)
 	ir_live_interval *child_ival = ctx->live_intervals[child];
 	ir_live_interval *parent_ival = ctx->live_intervals[parent];
 
+	if ((child_ival->flags | parent_ival->flags) & IR_LIVE_INTERVAL_COALESCED) {
+		// TODO: Support valid cases with already coalesced "parent_ival
+		return 0;
+	}
 #if 0
 	if (child_ival->end >= parent_ival->end) {
 		return 0;
@@ -1629,6 +1633,13 @@ static void ir_vregs_coalesce(ir_ctx *ctx, uint32_t v1, uint32_t v2, ir_ref from
 	uint16_t f1 = ctx->live_intervals[v1]->flags;
 	uint16_t f2 = ctx->live_intervals[v2]->flags;
 
+#if 0
+	if (ctx->binding) {
+		ir_ref b1 = ir_binding_find(ctx, from);
+		ir_ref b2 = ir_binding_find(ctx, to);
+		IR_ASSERT(b1 == b2);
+	}
+#endif
 	if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) {
 		ir_vregs_join(ctx, v1, v2);
 		ctx->vregs[to] = v1;
@@ -1971,6 +1982,13 @@ int ir_coalesce(ir_ctx *ctx)
 					 && ctx->vregs[insn->op1]
 					 && ctx->vregs[i] != ctx->vregs[insn->op1]) {
 						if (ir_vregs_inside(ctx, ctx->vregs[insn->op1], ctx->vregs[i])) {
+							if (ctx->binding) {
+								ir_ref b1 = ir_binding_find(ctx, i);
+								ir_ref b2 = ir_binding_find(ctx, insn->op1);
+								if (b1 != b2) {
+									continue;
+								}
+							}
 							ir_vregs_coalesce(ctx, ctx->vregs[i], ctx->vregs[insn->op1], i, insn->op1);
 							compact = 1;
 						}
diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc
index c853ec2c929..9afd8ace1d3 100644
--- a/ext/opcache/jit/ir/ir_x86.dasc
+++ b/ext/opcache/jit/ir/ir_x86.dasc
@@ -7746,7 +7746,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
 	}
 
 	/* Generate a table jmp or a seqence of calls */
-	if ((max.i64-min.i64) < count * 8) {
+	if (count > 2 && (max.i64-min.i64) < count * 8) {
 		int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1));
 
 		for (i = 0; i <= (max.i64 - min.i64); i++) {