From caf102dfaef1ef4191c76a4502c6b4fef20cba46 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Mon, 23 Oct 2023 10:15:52 +0300 Subject: [PATCH] A new PHP JIT implementation based on IR JIT framework (#12079) * IR update * Use folding to allow constant folding and common subexpression elimination * Implement IR JIT for INIT_FCALL, INIT_FCALL_BY_NAME and INIT_NS_FCALL_BY_NAME * Implement IR JIT for SEND_VAL and SEND_VAL_EX * Implement IR JIT for SEND_REF * Implement IR JIT for SEND_VAR* instructions (incompltere - few tests failures) * Implement IR JIT for CHECK_FUNC_ARG * Implement IR JIT for CHECK_UNDEF_ARGS * Implement IR JIT for ROPE_INIT, ROPE_ADD and ROPE_END * Implement IR JIT for FREE, FE_FREE, ECHO, STRLEN and COUNT * Implement IR JIT for IN_ARRAY * Implement IR JIT support for separate VM stack overflow check * Implement IR JIT for INIT_DYNAMIC_CALL * Implemenr IR JIT for INIT_METHOD_CALL * Fix IR JIT for IN_ARRAY and COUNT * Implement IR JIT for VERIFY_RETURN_TYPE * Force C compiler to store preserved registers to allow JIT using them * Implement IR JIT for DO_FCALL, DO_UCALL, DO_ICALL and DO_FCALL_BY_NAME * Implement IR JIT for FETCH_CONSTANT * Fix (reverse) guard conditions * Implement IR JIT for RECV and RECV_INIT * Implement IR JIT for RETURN * Implement IR JIT for BIND_GLOBAL * Fix guard for: int++ => double * Fix exception handling * Allow deoptimization of zval type only (if some register is spilled by the IR engine) * Fix overflow handling * Implement IR JIT for FE_RESET_R and FE_FETCH_R * Eliminate extra temporary register * Better registers usage * Implement IR JIT for FETCH_DIM_* and ISSET_DIM * Implement IR JIT for ASSIGN_DIM and ASSIGN_DIM_OP * cleanup * Generae IR that produces a better x86[_64] code * Allow trace register allocation for live ranges terminated before entering a called function * Remove following END->BEGIN nodes during IR construction * Remove useless (duplicate) guard * Avoid useless exception check * Prevent duplicate store * Eliminate repatable re-assignment of stack zval types * Enable combination of some instructions with the following SEND_VAL for IR JIT * Avoid generation of useless RLOADs * Eliminatare refcouting in a sequence of FETCH_DIM_R * Fix assertion * Remove ZREG_ZVAL_ADDREF flag from an element of abstract stack * Implement IR JIT for FETCH_OBJ_* * Implement IR JIT for ASSIGN_OBJ * Implement IR JIT for ASSIGN_OBJ_OP * cleanup * Implement IR JIT for (PRE/POST)_(INC/DEC)_OBJ * ws * cleanup * Fix IR JIT for constructor call * Fix opcache.jit=1201 IR JIT. With opcache.jit=1201 we still have to generate code for follow and target basic blocks with single exiting VM instruction. We mat just omit the entry point. * Fix IR construction for the case when both IF targets are the same * Avoid PHP LEAVE code duplication in function IR JIT. * Reload operands from memeory when overflow (this improves hot code) * Implement IR JIT for SWITCH_LONG, SWITCH_STRING and MATCH * Initialize result to IS_UNDEF * Fix JIT integraion with observer (Zend/tests/gh10346.phpt failure) * Fix incorrect compilation of FE_FETCH with predicted empty array * Fix register allocation * Use sign extension inxted of zero * Fix trace register allocator * cleanp * Fix address sanitizer warning * Calculate JIT trace prologue sixe on startup (to avoid magic constants). * Add cgecks for merge arrays overflow (this should be refactored using lists) * Cache TLS access to perform corresponding read once per basic block * cleanup unused variable * Fix IR JIT support for CLANG build (CALL VM without global register variables) * Fix IR JIT for CALL VM with global register variables * Allow %rpb ysage in JIT for CALL VM (we save and restore it in prologue/epilogue anyway) * cleanup * Allocate enough fixed stack to keep preserved registers * We don't have to care about x29 and x30 * cleanup (JMPZ/NZ_EX work fine) * Revert "cleanup (JMPZ/NZ_EX work fine)" This reverts commit cf8dd74a040e225d290d8ac4f5e33df638e6f8b8. * Don't allocate register for PHP variables that are loaded from memory and used once * Eliminate redundand deoptimization stores * cleanup * cleanup * cleanup * Optimization for constant comparison * Cleanup and elimination of dead deoptimization stores * Eliminate duplicate constant loading * Set proper initial SP offset info for GDB backtraces This doesn't take into account the following SP/FP modifications * Add spill stores * Remove low limit on number of deoptimization constants * Emit dead code only when it's really necessary for IR graph * cleanup * cleanup * Prefer loading long constants from memory (instead of loading immediate value) * Regiter disasm labels using macros (add missing helpers) * Make IR franework to care about GUARD JMP reordering * Avoid reloading * Improve register allocation for IR tracing JIT * Add comment * Fix deoptimization on result type guard of FETCH_DIM_R and FETCH_OBJ_R * If HYBRID VM can't provide some stack space for JIT code in "red zone" then JIT has to reserve stack space itself * Dump IR for stubs only if disassembling of stubs is requested * Revert "Dump IR for stubs only if disassembling of stubs is requested" This reverts commit d8b56bec129bc23c2b16f1f3c6367190181b6fdb. * Dump IR for stubs only if disassembling of stubs is requested (another approach) * Improve overflow deoptimization for ADD(_,1) and SUB(_,1) Now we deoptimize to the next instruction, load constant result, and remove op1 from SNAPSHOT * Switch to IR Builder API * Switch to new IR builder macros * Fix jit_set_Z_TYPE_INFO() call. op3 is a simple constant (not a ir_ref). * Generate better code * Enable empty ENTRY block merging * Improve code generated for array separation/creation before an update (ASSIGN_DIM, ASSING_DIM_OP, etc) * Fix incorrect deleteion of PHI source (op1 is used for control link) * Load constant once * cleanup * Improve control-flow to avoid two IS_ARRAY checks for REFERENCEs * Update comments * cleanup * Clenup comments * Fix AAarch 64 build (disable stack adjustment auto-detection) * Add filename and line number to closure names * Reserve stack for parameter passing * Increase size of CPU stack reserved for JIT-ed code * Fix addess sanitizer warnings * Clenup: introduce OPTIMIZE_FOR_SIZE macro (disabled by default) * Port 08e759120690520e99f9f2d38afeb21bcd1de197 to IR JIT Fix (at lease part of the) #GH-10635: ARM64 function JIT causes impossible assertion * cleanup * Preload constant and use tests that may be compiled into better code * Convert helpers to stubs * Introduce a helper data structure (ir_refs) to collect references for the following use in (MERGE/PHI)_N * Use ir_refs * Improve code generated by zend_jit_zval_copy_deref() * Use "cold" attribute to influence IR block scheduler and achieve better code layout * Keep info collected by recursion analyzer * Use HTTPS URL to allow fetching without a SSH key * Update IR * Update IR * Add IR JIT support for Wondows (Win64 support is incomplete) * Update IR * Update IR * Fix support for Windows ZTS build * Fix stack alignment * Cleanup ir_ctx.control usage * Fixed support for irreducable (incomplete) and merged loops * Revert "Fixed support for irreducable (incomplete) and merged loops" This reverts commit 672b5b89f47e8b81745fb73c86e0bcb0937daf16. * Generate better code for RECV_ENTRies * Use simpler and more efficient checks * Switch to new ENTRY node concept * Limit register usage across the OSR ENTRY point * Upate MEM type only if we write to memory * Use LOOP_END without a reference edge * Use new ir_init() prototype * Delay LOAD for better LOAD fusion * Fix RECV/RECV_INIT compilation with opcache.jit=1235 * iPtoperly compile fake closures (they mau be called as regular functions) * Fix reabase * Fix rebase and add --with-capstone support for IR JIT * Replace zend_uchar -> uint8_t * IR JIT support for delayed destructor for zend_assign_to_typed_ref/prop * Handle zend_execute_internal in IR JIT * Fix readonly+clone IR JIT issues * Switch to ir_ctx.mflags * Ckeanup "inputs_count" access * Disable CSE for nodes bound to PHP local varibles The stack slots for temporaty variables may be reused and in case of spilling this may cause clobbering of the value. (ext/standard/tests/strings/htmlentities20.phpt on x86 with tracing JIT) * Fix deoptimization code when link traces See ext/zlib/tests/bug75273.phpt failure * Fix missing type store This fixes ext/openssl/tests/openssl_error_string_basic_openssl3.phpt * Fix tracing JIT for overflowing INC/DEC Fixes tests/lang/operators/preinc_basiclong_64bit.phpt * Remove ir_remove_unreachable_blocks() call. Now it's called by ir_build_cfg(), when necessary. * IR JIT: Fixed inaccurate range inference usage for UNDEF/NULL/FALSE * IR JIT: Fixed GH-11127 (JIT fault) * Avoid allocation of unused exit point * Don't record already stored PHP variables in SNAPSHOTs * Delay variable load * Disable CSE across ENTRY * Fixed disabling CSE * Fix deoptimization * Fixed deoptimization * Disable incorrect register allocation * Fix JIT for INDENTICAL+JMPZ_EX * Add comments * Fixed missed type stores * IR JIT: added support for CLDEMOTE * Fixed incorrect constant usage * Disable compilation of PHP functions with irreducible CGF * Fixed liveness check * Fixed code for constant conditional jump * Add type store to avoid use-after-free * Fixed liveness analyses * Gnerate SNAPSHOT for virtual method calls * More accurate search for staticaly inferred info about a trace SSA vaiable * Fix incorrect result use type_info * Fix JMPZ/NZ_EX support and missing type store * Fixed trace type inference and missing type store * Store type of unused CV to prevent possible following use after free * Fixed deoptimizaton info * Fixed stack layout * Implemented support for veneers on AArch64 * Dsable CSE to avoid over-optimization * Don't bind nodes for TMP PHP variables * Re-enable CSE for temporary variables as we don't bind them anymore * Switch to CPU stack spill slots * Add codegen info dump * Initialize CV variables through FP (this enables some folding optimizatios) * Use zero-extension that can be eliminated * Avoid generation of dead PHIs * Increase preallocated spill stack size * Enable IR based JIT by default * Fixed build with -disable-opcache-jit * Use explicit type conversion & force load values to registerts * Fix IR build * Checkout submodules in github actions * Fixed Windows build * Fixed Windows build * Fixed reattach to IR JIT SHM * Update IR * Checkout submodules in nightly CI * Fix MACOS ZTS in IR JIT * Update ir * Fixed incorrect register allocation * Fixed incorect code generation * Fixed tracing jit for BIND_INIT_STATIC_OR_JMP * Update README * Typos * Revert JIT disabling for run-tests.php workers * Fixed code review issues * Update IR * Update IR * Update IR * Allow exit_point duplication, when the deoptimization info differs because of spilling * Use bound spill slots for CV (once again) * Improve error handling * Removed IR submodule * Remove IR submodule from workflows * Embed IR IR commit: 8977307f4e96ee03847d7f2eb809b3080f9ed662 * Add .gitignore * Fixed according to feedback * Force C saving preserved registers only for HYBRID VM * Update IR IR commit: a2f8452b3d35a756cba38924f5c51a48a7207494 * cleanup * Replace ZEND_ASSERT(0) by ZEND_UNREACHABLE() * Update IR and remove unused IR files IR commit: 399a38771393c202a741336643118991290b4b1b * Fixed inconsistency between IR code-generation and register-allocation * Update IR IR commit: 86685504274b0c71d9985b3c926dccaca2cacf9b * Update ir_PHI*() according to IR construction API changes * Fixed 32-bit build * Update IR IR commit: d0686408e20cd8c8640e37ed52ab81403a2383cb * Support for ir_TAILCALL() prototype changes * Update IR IR commit: d72ae866e09d17e879378767aceb91d51894818c * Fixed incorrect extension (ZEXT->SEXT) * Fix SSA dominance * Update IR IR commit: d60d92516dc5f89b93cdf1df7a54141e83226b07 * Fixed support ir_ctx.ret_type --- Zend/zend_vm_execute.h | 16 +- Zend/zend_vm_execute.skl | 10 + Zend/zend_vm_gen.php | 8 +- Zend/zend_vm_opcodes.h | 2 +- build/Makefile.global | 5 + ext/opcache/config.m4 | 70 +- ext/opcache/config.w32 | 43 +- ext/opcache/jit/Makefile.frag | 27 + ext/opcache/jit/Makefile.frag.w32 | 46 + ext/opcache/jit/README-IR.md | 32 + ext/opcache/jit/ir/.gitignore | 22 + ext/opcache/jit/ir/LICENSE | 21 + ext/opcache/jit/ir/README | 2 + ext/opcache/jit/ir/dynasm/dasm_arm.h | 461 + ext/opcache/jit/ir/dynasm/dasm_arm.lua | 1125 ++ ext/opcache/jit/ir/dynasm/dasm_arm64.h | 570 + ext/opcache/jit/ir/dynasm/dasm_arm64.lua | 1219 ++ ext/opcache/jit/ir/dynasm/dasm_mips.h | 424 + ext/opcache/jit/ir/dynasm/dasm_mips.lua | 1181 ++ ext/opcache/jit/ir/dynasm/dasm_mips64.lua | 12 + ext/opcache/jit/ir/dynasm/dasm_ppc.h | 423 + ext/opcache/jit/ir/dynasm/dasm_ppc.lua | 1919 +++ ext/opcache/jit/ir/dynasm/dasm_proto.h | 83 + ext/opcache/jit/ir/dynasm/dasm_x64.lua | 12 + ext/opcache/jit/ir/dynasm/dasm_x86.h | 546 + ext/opcache/jit/ir/dynasm/dasm_x86.lua | 2390 +++ ext/opcache/jit/ir/dynasm/dynasm.lua | 1095 ++ ext/opcache/jit/ir/dynasm/minilua.c | 7770 +++++++++ ext/opcache/jit/ir/gen_ir_fold_hash.c | 261 + ext/opcache/jit/ir/ir.c | 2322 +++ ext/opcache/jit/ir/ir.h | 924 ++ ext/opcache/jit/ir/ir_aarch64.dasc | 5564 +++++++ ext/opcache/jit/ir/ir_aarch64.h | 173 + ext/opcache/jit/ir/ir_builder.h | 639 + ext/opcache/jit/ir/ir_cfg.c | 1219 ++ ext/opcache/jit/ir/ir_check.c | 381 + ext/opcache/jit/ir/ir_disasm.c | 832 + ext/opcache/jit/ir/ir_dump.c | 713 + ext/opcache/jit/ir/ir_elf.h | 101 + ext/opcache/jit/ir/ir_emit.c | 608 + ext/opcache/jit/ir/ir_fold.h | 2129 +++ ext/opcache/jit/ir/ir_gcm.c | 897 ++ ext/opcache/jit/ir/ir_gdb.c | 642 + ext/opcache/jit/ir/ir_patch.c | 270 + ext/opcache/jit/ir/ir_perf.c | 266 + ext/opcache/jit/ir/ir_php.h | 37 + ext/opcache/jit/ir/ir_private.h | 1206 ++ ext/opcache/jit/ir/ir_ra.c | 3870 +++++ ext/opcache/jit/ir/ir_save.c | 128 + ext/opcache/jit/ir/ir_sccp.c | 885 ++ ext/opcache/jit/ir/ir_strtab.c | 227 + ext/opcache/jit/ir/ir_x86.dasc | 9056 +++++++++++ ext/opcache/jit/ir/ir_x86.h | 226 + ext/opcache/jit/ir/y.txt | 6 + ext/opcache/jit/zend_jit.c | 833 +- ext/opcache/jit/zend_jit.h | 32 + ext/opcache/jit/zend_jit_internal.h | 140 +- ext/opcache/jit/zend_jit_ir.c | 16531 ++++++++++++++++++++ ext/opcache/jit/zend_jit_trace.c | 1786 ++- ext/opcache/jit/zend_jit_vm_helpers.c | 2 + 60 files changed, 71906 insertions(+), 534 deletions(-) create mode 100644 ext/opcache/jit/README-IR.md create mode 100644 ext/opcache/jit/ir/.gitignore create mode 100644 ext/opcache/jit/ir/LICENSE create mode 100644 ext/opcache/jit/ir/README create mode 100644 ext/opcache/jit/ir/dynasm/dasm_arm.h create mode 100644 ext/opcache/jit/ir/dynasm/dasm_arm.lua create mode 100644 ext/opcache/jit/ir/dynasm/dasm_arm64.h create mode 100644 ext/opcache/jit/ir/dynasm/dasm_arm64.lua create mode 100644 ext/opcache/jit/ir/dynasm/dasm_mips.h create mode 100644 ext/opcache/jit/ir/dynasm/dasm_mips.lua create mode 100644 ext/opcache/jit/ir/dynasm/dasm_mips64.lua create mode 100644 ext/opcache/jit/ir/dynasm/dasm_ppc.h create mode 100644 ext/opcache/jit/ir/dynasm/dasm_ppc.lua create mode 100644 ext/opcache/jit/ir/dynasm/dasm_proto.h create mode 100644 ext/opcache/jit/ir/dynasm/dasm_x64.lua create mode 100644 ext/opcache/jit/ir/dynasm/dasm_x86.h create mode 100644 ext/opcache/jit/ir/dynasm/dasm_x86.lua create mode 100644 ext/opcache/jit/ir/dynasm/dynasm.lua create mode 100644 ext/opcache/jit/ir/dynasm/minilua.c create mode 100644 ext/opcache/jit/ir/gen_ir_fold_hash.c create mode 100644 ext/opcache/jit/ir/ir.c create mode 100644 ext/opcache/jit/ir/ir.h create mode 100644 ext/opcache/jit/ir/ir_aarch64.dasc create mode 100644 ext/opcache/jit/ir/ir_aarch64.h create mode 100644 ext/opcache/jit/ir/ir_builder.h create mode 100644 ext/opcache/jit/ir/ir_cfg.c create mode 100644 ext/opcache/jit/ir/ir_check.c create mode 100644 ext/opcache/jit/ir/ir_disasm.c create mode 100644 ext/opcache/jit/ir/ir_dump.c create mode 100644 ext/opcache/jit/ir/ir_elf.h create mode 100644 ext/opcache/jit/ir/ir_emit.c create mode 100644 ext/opcache/jit/ir/ir_fold.h create mode 100644 ext/opcache/jit/ir/ir_gcm.c create mode 100644 ext/opcache/jit/ir/ir_gdb.c create mode 100644 ext/opcache/jit/ir/ir_patch.c create mode 100644 ext/opcache/jit/ir/ir_perf.c create mode 100644 ext/opcache/jit/ir/ir_php.h create mode 100644 ext/opcache/jit/ir/ir_private.h create mode 100644 ext/opcache/jit/ir/ir_ra.c create mode 100644 ext/opcache/jit/ir/ir_save.c create mode 100644 ext/opcache/jit/ir/ir_sccp.c create mode 100644 ext/opcache/jit/ir/ir_strtab.c create mode 100644 ext/opcache/jit/ir/ir_x86.dasc create mode 100644 ext/opcache/jit/ir/ir_x86.h create mode 100644 ext/opcache/jit/ir/y.txt create mode 100644 ext/opcache/jit/zend_jit_ir.c diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index ccfa626f901..c352902bbe9 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -53444,14 +53444,14 @@ ZEND_API void execute_ex(zend_execute_data *ex) #if defined(ZEND_VM_IP_GLOBAL_REG) || defined(ZEND_VM_FP_GLOBAL_REG) struct { +#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE + char hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE]; +#endif #ifdef ZEND_VM_IP_GLOBAL_REG const zend_op *orig_opline; #endif #ifdef ZEND_VM_FP_GLOBAL_REG zend_execute_data *orig_execute_data; -#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE - char hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE]; -#endif #endif } vm_stack_data; #endif @@ -56960,6 +56960,16 @@ ZEND_API void execute_ex(zend_execute_data *ex) } #endif +#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) + /* Force C compiler to store preserved registers to allow JIT using them */ +# if defined(__GNUC__) && defined(__i386__) + __asm__ __volatile__ (""::: "ebx"); +# elif defined(__GNUC__) && defined(__x86_64__) + __asm__ __volatile__ (""::: "rbx","r12","r13"); +# elif defined(__GNUC__) && defined(__aarch64__) + __asm__ __volatile__ (""::: "x19","x20","x21","x22","x23","x24","x25","x26"); +# endif +#endif LOAD_OPLINE(); ZEND_VM_LOOP_INTERRUPT_CHECK(); diff --git a/Zend/zend_vm_execute.skl b/Zend/zend_vm_execute.skl index 717d4ffd3e8..65aa52962bd 100644 --- a/Zend/zend_vm_execute.skl +++ b/Zend/zend_vm_execute.skl @@ -13,6 +13,16 @@ ZEND_API void {%EXECUTOR_NAME%}_ex(zend_execute_data *ex) {%INTERNAL_LABELS%} +#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) + /* Force C compiler to store preserved registers to allow JIT using them */ +# if defined(__GNUC__) && defined(__i386__) + __asm__ __volatile__ (""::: "ebx"); +# elif defined(__GNUC__) && defined(__x86_64__) + __asm__ __volatile__ (""::: "rbx","r12","r13"); +# elif defined(__GNUC__) && defined(__aarch64__) + __asm__ __volatile__ (""::: "x19","x20","x21","x22","x23","x24","x25","x26"); +# endif +#endif LOAD_OPLINE(); ZEND_VM_LOOP_INTERRUPT_CHECK(); diff --git a/Zend/zend_vm_gen.php b/Zend/zend_vm_gen.php index 79b76538712..978c13dc6b0 100755 --- a/Zend/zend_vm_gen.php +++ b/Zend/zend_vm_gen.php @@ -2046,14 +2046,14 @@ function gen_executor($f, $skl, $spec, $kind, $executor_name, $initializer_name) } else { out($f,"#if defined(ZEND_VM_IP_GLOBAL_REG) || defined(ZEND_VM_FP_GLOBAL_REG)\n"); out($f,$m[1]."struct {\n"); + out($f,"#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE\n"); + out($f,$m[1]."\tchar hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE];\n"); + out($f,"#endif\n"); out($f,"#ifdef ZEND_VM_IP_GLOBAL_REG\n"); out($f,$m[1]."\tconst zend_op *orig_opline;\n"); out($f,"#endif\n"); out($f,"#ifdef ZEND_VM_FP_GLOBAL_REG\n"); out($f,$m[1]."\tzend_execute_data *orig_execute_data;\n"); - out($f,"#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE\n"); - out($f,$m[1]."\tchar hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE];\n"); - out($f,"#endif\n"); out($f,"#endif\n"); out($f,$m[1]."} vm_stack_data;\n"); out($f,"#endif\n"); @@ -2339,7 +2339,7 @@ function gen_vm_opcodes_header( $str .= "\n"; $str .= "#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) && !defined(__SANITIZE_ADDRESS__)\n"; $str .= "# if ((defined(i386) && !defined(__PIC__)) || defined(__x86_64__) || defined(_M_X64))\n"; - $str .= "# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 16\n"; + $str .= "# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 48\n"; $str .= "# endif\n"; $str .= "#endif\n"; $str .= "\n"; diff --git a/Zend/zend_vm_opcodes.h b/Zend/zend_vm_opcodes.h index 5531accbf0c..17453f0aed6 100644 --- a/Zend/zend_vm_opcodes.h +++ b/Zend/zend_vm_opcodes.h @@ -36,7 +36,7 @@ #if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) && !defined(__SANITIZE_ADDRESS__) # if ((defined(i386) && !defined(__PIC__)) || defined(__x86_64__) || defined(_M_X64)) -# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 16 +# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 48 # endif #endif diff --git a/build/Makefile.global b/build/Makefile.global index dee5fa5ecde..14d90a1f3da 100644 --- a/build/Makefile.global +++ b/build/Makefile.global @@ -122,6 +122,11 @@ clean: rm -f ext/opcache/jit/zend_jit_x86.c rm -f ext/opcache/jit/zend_jit_arm64.c rm -f ext/opcache/minilua + rm -f ext/opcache/jit/ir/gen_ir_fold_hash + rm -f ext/opcache/jit/ir/minilua + rm -f ext/opcache/jit/ir/ir_fold_hash.h + rm -f ext/opcache/jit/ir/ir_emit_x86.h + rm -f ext/opcache/jit/ir/ir_emit_aarch64.h distclean: clean rm -f Makefile config.cache config.log config.status Makefile.objects Makefile.fragments libtool main/php_config.h main/internal_functions_cli.c main/internal_functions.c Zend/zend_dtrace_gen.h Zend/zend_dtrace_gen.h.bak Zend/zend_config.h diff --git a/ext/opcache/config.m4 b/ext/opcache/config.m4 index 58e9b2643f9..4fa403337b0 100644 --- a/ext/opcache/config.m4 +++ b/ext/opcache/config.m4 @@ -24,6 +24,13 @@ PHP_ARG_WITH([capstone],, [no], [no]) +PHP_ARG_ENABLE([opcache-jit-ir], + [whether to enable JIT based on IR framework], + [AS_HELP_STRING([--disable-opcache-jit-ir], + [Disable JIT based on IR framework (use old JIT)])], + [yes], + [no]) + if test "$PHP_OPCACHE" != "no"; then dnl Always build as shared extension @@ -44,7 +51,7 @@ if test "$PHP_OPCACHE" != "no"; then esac fi - if test "$PHP_OPCACHE_JIT" = "yes"; then + if test "$PHP_OPCACHE_JIT" = "yes" -a "$PHP_OPCACHE_JIT_IR" = "no" ; then AC_DEFINE(HAVE_JIT, 1, [Define to enable JIT]) ZEND_JIT_SRC="jit/zend_jit.c jit/zend_jit_gdb.c jit/zend_jit_vm_helpers.c" @@ -86,6 +93,62 @@ if test "$PHP_OPCACHE" != "no"; then PHP_SUBST(DASM_FLAGS) PHP_SUBST(DASM_ARCH) + + JIT_CFLAGS= + + elif test "$PHP_OPCACHE_JIT" = "yes" -a "$PHP_OPCACHE_JIT_IR" = "yes"; then + AC_DEFINE(HAVE_JIT, 1, [Define to enable JIT]) + AC_DEFINE(ZEND_JIT_IR, 1, [Use JIT IR framework]) + ZEND_JIT_SRC="jit/zend_jit.c jit/zend_jit_vm_helpers.c jit/ir/ir.c jit/ir/ir_strtab.c \ + jit/ir/ir_cfg.c jit/ir/ir_sccp.c jit/ir/ir_gcm.c jit/ir/ir_ra.c jit/ir/ir_save.c \ + jit/ir/ir_dump.c jit/ir/ir_gdb.c jit/ir/ir_perf.c jit/ir/ir_check.c \ + jit/ir/ir_patch.c jit/ir/ir_emit.c" + + dnl Find out which ABI we are using. + case $host_alias in + x86_64-*-darwin*) + IR_TARGET=IR_TARGET_X64 + DASM_FLAGS="-D X64APPLE=1 -D X64=1" + DASM_ARCH="x86" + ;; + x86_64*) + IR_TARGET=IR_TARGET_X64 + DASM_FLAGS="-D X64=1" + DASM_ARCH="x86" + ;; + i[[34567]]86*) + IR_TARGET=IR_TARGET_X86 + DASM_ARCH="x86" + ;; + x86*) + IR_TARGET=IR_TARGET_X86 + DASM_ARCH="x86" + ;; + aarch64*) + IR_TARGET=IR_TARGET_AARCH64 + DASM_ARCH="aarch64" + ;; + esac + + AS_IF([test x"$with_capstone" = "xyes"],[ + PKG_CHECK_MODULES([CAPSTONE],[capstone >= 3.0.0],[ + AC_DEFINE([HAVE_CAPSTONE], [1], [Capstone is available]) + PHP_EVAL_LIBLINE($CAPSTONE_LIBS, OPCACHE_SHARED_LIBADD) + PHP_EVAL_INCLINE($CAPSTONE_CFLAGS) + ZEND_JIT_SRC+=" jit/ir/ir_disasm.c" + ],[ + AC_MSG_ERROR([capstone >= 3.0 required but not found]) + ]) + ]) + + PHP_SUBST(IR_TARGET) + PHP_SUBST(DASM_FLAGS) + PHP_SUBST(DASM_ARCH) + + JIT_CFLAGS="-I@ext_builddir@/jit/ir -D${IR_TARGET} -DIR_PHP" + if test "$ZEND_DEBUG" = "yes"; then + JIT_CFLAGS="${JIT_CFLAGS} -DIR_DEBUG" + fi fi AC_CHECK_FUNCS([mprotect memfd_create shm_create_largepage]) @@ -310,7 +373,7 @@ int main(void) { shared_alloc_mmap.c \ shared_alloc_posix.c \ $ZEND_JIT_SRC, - shared,,"-Wno-implicit-fallthrough -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1",,yes) + shared,,"-Wno-implicit-fallthrough -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 ${JIT_CFLAGS}",,yes) PHP_ADD_EXTENSION_DEP(opcache, pcre) @@ -320,6 +383,9 @@ int main(void) { if test "$PHP_OPCACHE_JIT" = "yes"; then PHP_ADD_BUILD_DIR([$ext_builddir/jit], 1) + if test "$PHP_OPCACHE_JIT_IR" = "yes"; then + PHP_ADD_BUILD_DIR([$ext_builddir/jit/ir], 1) + fi PHP_ADD_MAKEFILE_FRAGMENT($ext_srcdir/jit/Makefile.frag) fi PHP_SUBST(OPCACHE_SHARED_LIBADD) diff --git a/ext/opcache/config.w32 b/ext/opcache/config.w32 index 764a2edaab1..da60492b59f 100644 --- a/ext/opcache/config.w32 +++ b/ext/opcache/config.w32 @@ -5,6 +5,8 @@ if (PHP_OPCACHE != "no") { ARG_ENABLE("opcache-jit", "whether to enable JIT", "yes"); + ARG_ENABLE("opcache-jit-ir", "whether to enable JIT based on IR framework", "yes"); + ZEND_EXTENSION('opcache', "\ ZendAccelerator.c \ zend_accelerator_blacklist.c \ @@ -18,7 +20,7 @@ if (PHP_OPCACHE != "no") { zend_shared_alloc.c \ shared_alloc_win32.c", true, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); - if (PHP_OPCACHE_JIT == "yes") { + if (PHP_OPCACHE_JIT == "yes" && PHP_OPCACHE_JIT_IR == "no") { if (CHECK_HEADER_ADD_INCLUDE("dynasm/dasm_x86.h", "CFLAGS_OPCACHE", PHP_OPCACHE + ";ext\\opcache\\jit")) { var dasm_flags = (X64 ? "-D X64=1" : "") + (X64 ? " -D X64WIN=1" : "") + " -D WIN=1"; if (PHP_ZTS == "yes") { @@ -37,6 +39,45 @@ if (PHP_OPCACHE != "no") { } else { WARNING("JIT not enabled, headers not found"); } + } else if (PHP_OPCACHE_JIT == "yes" && PHP_OPCACHE_JIT_IR == "yes") { + if (CHECK_HEADER_ADD_INCLUDE("ir/ir.h", "CFLAGS_OPCACHE", PHP_OPCACHE + ";ext\\opcache\\jit")) { + var dasm_flags = (X64 ? "-D X64=1" : "") + (X64 ? " -D X64WIN=1" : "") + " -D WIN=1"; + var ir_target = (X64 ? "IR_TARGET_X64" : "IR_TARGET_X86"); + var ir_src = "ir_strtab.c ir_cfg.c ir_sccp.c ir_gcm.c ir_ra.c ir_save.c \ + ir_dump.c ir_check.c ir_patch.c"; + + DEFINE("IR_TARGET", ir_target); + DEFINE("DASM_FLAGS", dasm_flags); + DEFINE("DASM_ARCH", "x86"); + + AC_DEFINE('HAVE_JIT', 1, 'Define to enable JIT'); + AC_DEFINE('ZEND_JIT_IR', 1, 'Use JIT IR framework'); + + ADD_FLAG("CFLAGS_OPCACHE", "/I \"ext\\opcache\\jit\\ir\" /D "+ir_target+" /D IR_PHP"); + if (PHP_DEBUG == "yes") { + ADD_FLAG("CFLAGS_OPCACHE", "/D IR_DEBUG"); + } + + if (CHECK_HEADER_ADD_INCLUDE("capstone\\capstone.h", "CFLAGS_OPCACHE", PHP_OPCACHE+ ";" + PHP_PHP_BUILD + "\\include") && + CHECK_LIB("capstone.lib", "opcache", PHP_OPCACHE)) { + AC_DEFINE('HAVE_CAPSTONE', 1, 'capstone support enabled'); + ir_src += " ir_disasm.c"; + } + + ADD_MAKEFILE_FRAGMENT(configure_module_dirname + "\\jit\\Makefile.frag.w32"); + + ADD_SOURCES(configure_module_dirname + "\\jit", + "zend_jit.c zend_jit_vm_helpers.c", + "opcache", "ext\\opcache\\jit"); + ADD_SOURCES(configure_module_dirname + "\\jit\\ir", + "ir.c", "opcache", "ext\\opcache\\jit\\ir"); + ADD_SOURCES(configure_module_dirname + "\\jit\\ir", + "ir_emit.c", "opcache", "ext\\opcache\\jit\\ir"); + ADD_SOURCES(configure_module_dirname + "\\jit\\ir", + ir_src, "opcache", "ext\\opcache\\jit\\ir"); + } else { + WARNING("JIT not enabled, headers not found"); + } } ADD_FLAG('CFLAGS_OPCACHE', "/I " + configure_module_dirname); diff --git a/ext/opcache/jit/Makefile.frag b/ext/opcache/jit/Makefile.frag index f9ae2e0cf4b..07a826764ed 100644 --- a/ext/opcache/jit/Makefile.frag +++ b/ext/opcache/jit/Makefile.frag @@ -1,4 +1,29 @@ +ifdef IR_TARGET +# New IR based JIT +$(builddir)/jit/ir/minilua: $(srcdir)/jit/ir/dynasm/minilua.c + $(BUILD_CC) $(srcdir)/jit/ir/dynasm/minilua.c -lm -o $@ +$(builddir)/jit/ir/ir_emit_$(DASM_ARCH).h: $(srcdir)/jit/ir/ir_$(DASM_ARCH).dasc $(srcdir)/jit/ir/dynasm/*.lua $(builddir)/jit/ir/minilua + $(builddir)/jit/ir/minilua $(srcdir)/jit/ir/dynasm/dynasm.lua $(DASM_FLAGS) -o $@ $(srcdir)/jit/ir/ir_$(DASM_ARCH).dasc + +$(builddir)/jit/ir/ir_emit.lo: \ + $(srcdir)/jit/ir/ir_emit.c $(builddir)/jit/ir/ir_emit_$(DASM_ARCH).h + +$(builddir)/jit/ir/gen_ir_fold_hash: $(srcdir)/jit/ir/gen_ir_fold_hash.c $(srcdir)/jit/ir/ir_strtab.c + $(BUILD_CC) -D${IR_TARGET} -DIR_PHP -DIR_PHP_MM=0 -o $@ $< + +$(builddir)/jit/ir/ir_fold_hash.h: $(builddir)/jit/ir/gen_ir_fold_hash $(srcdir)/jit/ir/ir_fold.h $(srcdir)/jit/ir/ir.h + $(builddir)/jit/ir/gen_ir_fold_hash < $(srcdir)/jit/ir/ir_fold.h > $(builddir)/jit/ir/ir_fold_hash.h + +$(builddir)/jit/ir/ir.lo: \ + $(builddir)/jit/ir/ir_fold_hash.h + +$(builddir)/jit/zend_jit.lo: \ + $(srcdir)/jit/zend_jit_helpers.c \ + $(srcdir)/jit/zend_jit_ir.c + +else +# Old DynAsm based JIT $(builddir)/minilua: $(srcdir)/jit/dynasm/minilua.c $(BUILD_CC) $(srcdir)/jit/dynasm/minilua.c -lm -o $@ @@ -15,6 +40,8 @@ $(builddir)/jit/zend_jit.lo: \ $(srcdir)/jit/zend_jit_trace.c \ $(srcdir)/jit/zend_elf.c +endif + # For non-GNU make, jit/zend_jit.lo and ./jit/zend_jit.lo are considered distinct targets. # Use this workaround to allow building from inside ext/opcache. jit/zend_jit.lo: $(builddir)/jit/zend_jit.lo diff --git a/ext/opcache/jit/Makefile.frag.w32 b/ext/opcache/jit/Makefile.frag.w32 index a9533e98edc..f2c1995fc79 100644 --- a/ext/opcache/jit/Makefile.frag.w32 +++ b/ext/opcache/jit/Makefile.frag.w32 @@ -1,3 +1,48 @@ +!if "$(IR_TARGET)" != "" +# New IR based JIT + +$(BUILD_DIR)\\minilua.exe: ext\opcache\jit\ir\dynasm\minilua.c + @if exist $(BUILD_DIR)\\minilua.exe del $(BUILD_DIR)\\minilua.exe + $(PHP_CL) /Fo$(BUILD_DIR)\ /Fd$(BUILD_DIR)\ /Fp$(BUILD_DIR)\ /FR$(BUILD_DIR) /Fe$(BUILD_DIR)\minilua.exe ext\opcache\jit\ir\dynasm\minilua.c + +ext\opcache\jit\ir\ir_emit_x86.h: ext\opcache\jit\ir\ir_x86.dasc $(BUILD_DIR)\\minilua.exe + @if exist ext\opcache\jit\ir\ir_emit_x86.h del ext\opcache\jit\ir\ir_emit_x86.h + $(BUILD_DIR)\\minilua.exe ext/opcache/jit/ir/dynasm/dynasm.lua $(DASM_FLAGS) -o $@ ext/opcache/jit/ir/ir_x86.dasc + +$(BUILD_DIR)\\gen_ir_fold_hash.exe: ext\opcache\jit\ir\gen_ir_fold_hash.c ext\opcache\jit\ir\ir_strtab.c + @if exist $(BUILD_DIR)\\gen_ir_fold_hash.exe del $(BUILD_DIR)\\gen_ir_fold_hash.exe + $(PHP_CL) /D $(IR_TARGET) /Fo$(BUILD_DIR)\ /Fd$(BUILD_DIR)\ /Fp$(BUILD_DIR)\ /Fe$(BUILD_DIR)\\gen_ir_fold_hash.exe ext\opcache\jit\ir\gen_ir_fold_hash.c + +ext\opcache\jit\ir\ir_fold_hash.h: $(BUILD_DIR)\\gen_ir_fold_hash.exe ext\opcache\jit\ir\ir_fold.h ext\opcache\jit\ir\ir.h + @if exist ext\opcache\jit\ir\ir_fold_hash.h del ext\opcache\jit\ir\ir_fold_hash.h + $(BUILD_DIR)\\gen_ir_fold_hash.exe < ext\opcache\jit\ir\ir_fold.h > ext\opcache\jit\ir\ir_fold_hash.h + +$(BUILD_DIR)\ext\opcache\jit\ir\ir_ra.obj: \ + ext\opcache\jit\ir\ir.h \ + ext\opcache\jit\ir\ir_private.h \ + ext\opcache\jit\ir\ir_x86.h + +$(BUILD_DIR)\ext\opcache\jit\ir\ir_emit.obj: \ + ext\opcache\jit\ir\ir.h \ + ext\opcache\jit\ir\ir_private.h \ + ext\opcache\jit\ir\ir_x86.h \ + ext\opcache\jit\ir\ir_emit_x86.h + +$(BUILD_DIR)\ext\opcache\jit\ir\ir.obj: \ + ext\opcache\jit\ir\ir.h \ + ext\opcache\jit\ir\ir_private.h \ + ext\opcache\jit\ir\ir_fold.h \ + ext\opcache\jit\ir\ir_fold_hash.h + +$(BUILD_DIR)\ext\opcache\jit\zend_jit.obj: \ + ext\opcache\jit\zend_jit_ir.c \ + ext\opcache\jit\zend_jit_helpers.c \ + ext\opcache\jit\ir\ir.h \ + ext\opcache\jit\ir\ir_builder.h + +!else +# Old DynAsm based JIT + $(BUILD_DIR)\\minilua.exe: ext\opcache\jit\dynasm\minilua.c @if exist $(BUILD_DIR)\\minilua.exe del $(BUILD_DIR)\\minilua.exe $(PHP_CL) /Fo$(BUILD_DIR)\ /Fd$(BUILD_DIR)\ /Fp$(BUILD_DIR)\ /FR$(BUILD_DIR) /Fe$(BUILD_DIR)\minilua.exe ext\opcache\jit\dynasm\minilua.c @@ -14,3 +59,4 @@ $(BUILD_DIR)\ext\opcache\jit\zend_jit.obj: \ ext/opcache/jit/zend_jit_perf_dump.c \ ext/opcache/jit/zend_jit_trace.c \ ext/opcache/jit/zend_jit_vtune.c +!endif diff --git a/ext/opcache/jit/README-IR.md b/ext/opcache/jit/README-IR.md new file mode 100644 index 00000000000..b547792e493 --- /dev/null +++ b/ext/opcache/jit/README-IR.md @@ -0,0 +1,32 @@ +New JIT implementation +====================== + +This branch provides a new JIT implementation based on [IR - Lightweight +JIT Compilation Framework](https://github.com/dstogov/ir). + +As opposed to the PHP 8.* JIT approach that generates native code directly from +PHP byte-code, this implementation generates intermediate representation (IR) +and delegates all lower-level tasks to the IR Framework. IR for JIT is like an +AST for compiler. + +Key benefits of the new JIT implementation: +- Usage of IR opens possibilities for better optimization and register + allocation (the resulting native code is more efficient) +- PHP doesn't have to care about most low-level details (different CPUs, + calling conventions, TLS details, etc) +- it's much easier to implement support for new targets (e.g. RISCV) +- IR framework is going to be developed separately from PHP and may accept + contributions from other projects (new optimizations, improvements, bug fixes) + +Disadvantages: +- JIT compilation becomes slower (this is almost invisible for tracing + JIT, but function JIT compilation of Wordpress becomes 4 times slower) + +The necessary part of the IR Framework is embedded into php-src. So, the PR +doesn't introduce new dependencies. + +The new JIT implementation successfully passes all CI workflows, but it's still +not mature and may cause failures. To reduce risks, this patch doesn't remove +the old JIT implementation (that is the same as PHP-8.3 JIT). It's possible +to build PHP with the old JIT by configuring with **--disable-opcache-jit-ir**. +In the future the old implementation should be removed. diff --git a/ext/opcache/jit/ir/.gitignore b/ext/opcache/jit/ir/.gitignore new file mode 100644 index 00000000000..7a37a4fd059 --- /dev/null +++ b/ext/opcache/jit/ir/.gitignore @@ -0,0 +1,22 @@ +*.o +*.log +*.dot +*.pdf +ir_fold_hash.h +ir_emit_x86.h +ir_emit_aarch64.h +minilua +gen_ir_fold_hash +ir_test +tester +ir +b.c + +tests/**/*.diff +tests/**/*.exp +tests/**/*.ir +tests/**/*.out +tests/**/*.log + +win32/vcpkg +win32/build_* diff --git a/ext/opcache/jit/ir/LICENSE b/ext/opcache/jit/ir/LICENSE new file mode 100644 index 00000000000..c43a12a770f --- /dev/null +++ b/ext/opcache/jit/ir/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Zend by Perforce + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ext/opcache/jit/ir/README b/ext/opcache/jit/ir/README new file mode 100644 index 00000000000..68288d21bfc --- /dev/null +++ b/ext/opcache/jit/ir/README @@ -0,0 +1,2 @@ +This directory contains an embeded version of IR Framework. +See the full version at https://github.com/dstogov/ir diff --git a/ext/opcache/jit/ir/dynasm/dasm_arm.h b/ext/opcache/jit/ir/dynasm/dasm_arm.h new file mode 100644 index 00000000000..ebcf4ac0ec1 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_arm.h @@ -0,0 +1,461 @@ +/* +** DynASM ARM encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "arm" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, + DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12, DASM_IMMV8, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imm12(unsigned int n) +{ + int i; + for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30)) + if (n <= 255) return (int)(n + (i << 8)); + return -1; +} + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: + case DASM_IMM16: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); + if ((ins & 0x8000)) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMMV8: + CK((n & 3) == 0, RANGE_I); + n >>= 2; + /* fallthrough */ + case DASM_IMML8: + case DASM_IMML12: + CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : + (((-n)>>((ins>>5)&31)) == 0), RANGE_I); + b[pos++] = n; + break; + case DASM_IMM12: + CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMM12: case DASM_IMM16: + case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; + break; + case DASM_REL_LG: + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; + patchrel: + if ((ins & 0x800) == 0) { + CK((n & 3) == 0 && ((n+0x02000000) >> 26) == 0, RANGE_REL); + cp[-1] |= ((n >> 2) & 0x00ffffff); + } else if ((ins & 0x1000)) { + CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL); + goto patchimml8; + } else if ((ins & 0x2000) == 0) { + CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL); + goto patchimml; + } else { + CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL); + n >>= 2; + goto patchimml; + } + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMM12: + cp[-1] |= dasm_imm12((unsigned int)n); + break; + case DASM_IMM16: + cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff); + break; + case DASM_IMML8: patchimml8: + cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) : + ((-n & 0x0f) | ((-n & 0xf0) << 4)); + break; + case DASM_IMML12: case DASM_IMMV8: patchimml: + cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_arm.lua b/ext/opcache/jit/ir/dynasm/dasm_arm.lua new file mode 100644 index 00000000000..0c775ae2687 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_arm.lua @@ -0,0 +1,1125 @@ +------------------------------------------------------------------------------ +-- DynASM ARM module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "arm", + description = "DynASM ARM module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex = bit.ror, bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0x000fffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + if n <= 0x000fffff then + insert(actlist, pos+1, n) + n = map_action.ESC * 0x10000 + end + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. + +-- Ext. register name -> int. name. +local map_archdef = { sp = "r13", lr = "r14", pc = "r15", } + +-- Int. register name -> ext. name. +local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, } + +local map_cond = { + eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, + hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, + hs = 2, lo = 3, +} + +------------------------------------------------------------------------------ + +-- Template strings for ARM instructions. +local map_op = { + -- Basic data processing instructions. + and_3 = "e0000000DNPs", + eor_3 = "e0200000DNPs", + sub_3 = "e0400000DNPs", + rsb_3 = "e0600000DNPs", + add_3 = "e0800000DNPs", + adc_3 = "e0a00000DNPs", + sbc_3 = "e0c00000DNPs", + rsc_3 = "e0e00000DNPs", + tst_2 = "e1100000NP", + teq_2 = "e1300000NP", + cmp_2 = "e1500000NP", + cmn_2 = "e1700000NP", + orr_3 = "e1800000DNPs", + mov_2 = "e1a00000DPs", + bic_3 = "e1c00000DNPs", + mvn_2 = "e1e00000DPs", + + and_4 = "e0000000DNMps", + eor_4 = "e0200000DNMps", + sub_4 = "e0400000DNMps", + rsb_4 = "e0600000DNMps", + add_4 = "e0800000DNMps", + adc_4 = "e0a00000DNMps", + sbc_4 = "e0c00000DNMps", + rsc_4 = "e0e00000DNMps", + tst_3 = "e1100000NMp", + teq_3 = "e1300000NMp", + cmp_3 = "e1500000NMp", + cmn_3 = "e1700000NMp", + orr_4 = "e1800000DNMps", + mov_3 = "e1a00000DMps", + bic_4 = "e1c00000DNMps", + mvn_3 = "e1e00000DMps", + + lsl_3 = "e1a00000DMws", + lsr_3 = "e1a00020DMws", + asr_3 = "e1a00040DMws", + ror_3 = "e1a00060DMws", + rrx_2 = "e1a00060DMs", + + -- Multiply and multiply-accumulate. + mul_3 = "e0000090NMSs", + mla_4 = "e0200090NMSDs", + umaal_4 = "e0400090DNMSs", -- v6 + mls_4 = "e0600090DNMSs", -- v6T2 + umull_4 = "e0800090DNMSs", + umlal_4 = "e0a00090DNMSs", + smull_4 = "e0c00090DNMSs", + smlal_4 = "e0e00090DNMSs", + + -- Halfword multiply and multiply-accumulate. + smlabb_4 = "e1000080NMSD", -- v5TE + smlatb_4 = "e10000a0NMSD", -- v5TE + smlabt_4 = "e10000c0NMSD", -- v5TE + smlatt_4 = "e10000e0NMSD", -- v5TE + smlawb_4 = "e1200080NMSD", -- v5TE + smulwb_3 = "e12000a0NMS", -- v5TE + smlawt_4 = "e12000c0NMSD", -- v5TE + smulwt_3 = "e12000e0NMS", -- v5TE + smlalbb_4 = "e1400080NMSD", -- v5TE + smlaltb_4 = "e14000a0NMSD", -- v5TE + smlalbt_4 = "e14000c0NMSD", -- v5TE + smlaltt_4 = "e14000e0NMSD", -- v5TE + smulbb_3 = "e1600080NMS", -- v5TE + smultb_3 = "e16000a0NMS", -- v5TE + smulbt_3 = "e16000c0NMS", -- v5TE + smultt_3 = "e16000e0NMS", -- v5TE + + -- Miscellaneous data processing instructions. + clz_2 = "e16f0f10DM", -- v5T + rev_2 = "e6bf0f30DM", -- v6 + rev16_2 = "e6bf0fb0DM", -- v6 + revsh_2 = "e6ff0fb0DM", -- v6 + sel_3 = "e6800fb0DNM", -- v6 + usad8_3 = "e780f010NMS", -- v6 + usada8_4 = "e7800010NMSD", -- v6 + rbit_2 = "e6ff0f30DM", -- v6T2 + movw_2 = "e3000000DW", -- v6T2 + movt_2 = "e3400000DW", -- v6T2 + -- Note: the X encodes width-1, not width. + sbfx_4 = "e7a00050DMvX", -- v6T2 + ubfx_4 = "e7e00050DMvX", -- v6T2 + -- Note: the X encodes the msb field, not the width. + bfc_3 = "e7c0001fDvX", -- v6T2 + bfi_4 = "e7c00010DMvX", -- v6T2 + + -- Packing and unpacking instructions. + pkhbt_3 = "e6800010DNM", pkhbt_4 = "e6800010DNMv", -- v6 + pkhtb_3 = "e6800050DNM", pkhtb_4 = "e6800050DNMv", -- v6 + sxtab_3 = "e6a00070DNM", sxtab_4 = "e6a00070DNMv", -- v6 + sxtab16_3 = "e6800070DNM", sxtab16_4 = "e6800070DNMv", -- v6 + sxtah_3 = "e6b00070DNM", sxtah_4 = "e6b00070DNMv", -- v6 + sxtb_2 = "e6af0070DM", sxtb_3 = "e6af0070DMv", -- v6 + sxtb16_2 = "e68f0070DM", sxtb16_3 = "e68f0070DMv", -- v6 + sxth_2 = "e6bf0070DM", sxth_3 = "e6bf0070DMv", -- v6 + uxtab_3 = "e6e00070DNM", uxtab_4 = "e6e00070DNMv", -- v6 + uxtab16_3 = "e6c00070DNM", uxtab16_4 = "e6c00070DNMv", -- v6 + uxtah_3 = "e6f00070DNM", uxtah_4 = "e6f00070DNMv", -- v6 + uxtb_2 = "e6ef0070DM", uxtb_3 = "e6ef0070DMv", -- v6 + uxtb16_2 = "e6cf0070DM", uxtb16_3 = "e6cf0070DMv", -- v6 + uxth_2 = "e6ff0070DM", uxth_3 = "e6ff0070DMv", -- v6 + + -- Saturating instructions. + qadd_3 = "e1000050DMN", -- v5TE + qsub_3 = "e1200050DMN", -- v5TE + qdadd_3 = "e1400050DMN", -- v5TE + qdsub_3 = "e1600050DMN", -- v5TE + -- Note: the X for ssat* encodes sat_imm-1, not sat_imm. + ssat_3 = "e6a00010DXM", ssat_4 = "e6a00010DXMp", -- v6 + usat_3 = "e6e00010DXM", usat_4 = "e6e00010DXMp", -- v6 + ssat16_3 = "e6a00f30DXM", -- v6 + usat16_3 = "e6e00f30DXM", -- v6 + + -- Parallel addition and subtraction. + sadd16_3 = "e6100f10DNM", -- v6 + sasx_3 = "e6100f30DNM", -- v6 + ssax_3 = "e6100f50DNM", -- v6 + ssub16_3 = "e6100f70DNM", -- v6 + sadd8_3 = "e6100f90DNM", -- v6 + ssub8_3 = "e6100ff0DNM", -- v6 + qadd16_3 = "e6200f10DNM", -- v6 + qasx_3 = "e6200f30DNM", -- v6 + qsax_3 = "e6200f50DNM", -- v6 + qsub16_3 = "e6200f70DNM", -- v6 + qadd8_3 = "e6200f90DNM", -- v6 + qsub8_3 = "e6200ff0DNM", -- v6 + shadd16_3 = "e6300f10DNM", -- v6 + shasx_3 = "e6300f30DNM", -- v6 + shsax_3 = "e6300f50DNM", -- v6 + shsub16_3 = "e6300f70DNM", -- v6 + shadd8_3 = "e6300f90DNM", -- v6 + shsub8_3 = "e6300ff0DNM", -- v6 + uadd16_3 = "e6500f10DNM", -- v6 + uasx_3 = "e6500f30DNM", -- v6 + usax_3 = "e6500f50DNM", -- v6 + usub16_3 = "e6500f70DNM", -- v6 + uadd8_3 = "e6500f90DNM", -- v6 + usub8_3 = "e6500ff0DNM", -- v6 + uqadd16_3 = "e6600f10DNM", -- v6 + uqasx_3 = "e6600f30DNM", -- v6 + uqsax_3 = "e6600f50DNM", -- v6 + uqsub16_3 = "e6600f70DNM", -- v6 + uqadd8_3 = "e6600f90DNM", -- v6 + uqsub8_3 = "e6600ff0DNM", -- v6 + uhadd16_3 = "e6700f10DNM", -- v6 + uhasx_3 = "e6700f30DNM", -- v6 + uhsax_3 = "e6700f50DNM", -- v6 + uhsub16_3 = "e6700f70DNM", -- v6 + uhadd8_3 = "e6700f90DNM", -- v6 + uhsub8_3 = "e6700ff0DNM", -- v6 + + -- Load/store instructions. + str_2 = "e4000000DL", str_3 = "e4000000DL", str_4 = "e4000000DL", + strb_2 = "e4400000DL", strb_3 = "e4400000DL", strb_4 = "e4400000DL", + ldr_2 = "e4100000DL", ldr_3 = "e4100000DL", ldr_4 = "e4100000DL", + ldrb_2 = "e4500000DL", ldrb_3 = "e4500000DL", ldrb_4 = "e4500000DL", + strh_2 = "e00000b0DL", strh_3 = "e00000b0DL", + ldrh_2 = "e01000b0DL", ldrh_3 = "e01000b0DL", + ldrd_2 = "e00000d0DL", ldrd_3 = "e00000d0DL", -- v5TE + ldrsb_2 = "e01000d0DL", ldrsb_3 = "e01000d0DL", + strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE + ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL", + + ldm_2 = "e8900000oR", ldmia_2 = "e8900000oR", ldmfd_2 = "e8900000oR", + ldmda_2 = "e8100000oR", ldmfa_2 = "e8100000oR", + ldmdb_2 = "e9100000oR", ldmea_2 = "e9100000oR", + ldmib_2 = "e9900000oR", ldmed_2 = "e9900000oR", + stm_2 = "e8800000oR", stmia_2 = "e8800000oR", stmfd_2 = "e8800000oR", + stmda_2 = "e8000000oR", stmfa_2 = "e8000000oR", + stmdb_2 = "e9000000oR", stmea_2 = "e9000000oR", + stmib_2 = "e9800000oR", stmed_2 = "e9800000oR", + pop_1 = "e8bd0000R", push_1 = "e92d0000R", + + -- Branch instructions. + b_1 = "ea000000B", + bl_1 = "eb000000B", + blx_1 = "e12fff30C", + bx_1 = "e12fff10M", + + -- Miscellaneous instructions. + nop_0 = "e1a00000", + mrs_1 = "e10f0000D", + bkpt_1 = "e1200070K", -- v5T + svc_1 = "ef000000T", swi_1 = "ef000000T", + ud_0 = "e7f001f0", + + -- VFP instructions. + ["vadd.f32_3"] = "ee300a00dnm", + ["vadd.f64_3"] = "ee300b00Gdnm", + ["vsub.f32_3"] = "ee300a40dnm", + ["vsub.f64_3"] = "ee300b40Gdnm", + ["vmul.f32_3"] = "ee200a00dnm", + ["vmul.f64_3"] = "ee200b00Gdnm", + ["vnmul.f32_3"] = "ee200a40dnm", + ["vnmul.f64_3"] = "ee200b40Gdnm", + ["vmla.f32_3"] = "ee000a00dnm", + ["vmla.f64_3"] = "ee000b00Gdnm", + ["vmls.f32_3"] = "ee000a40dnm", + ["vmls.f64_3"] = "ee000b40Gdnm", + ["vnmla.f32_3"] = "ee100a40dnm", + ["vnmla.f64_3"] = "ee100b40Gdnm", + ["vnmls.f32_3"] = "ee100a00dnm", + ["vnmls.f64_3"] = "ee100b00Gdnm", + ["vdiv.f32_3"] = "ee800a00dnm", + ["vdiv.f64_3"] = "ee800b00Gdnm", + + ["vabs.f32_2"] = "eeb00ac0dm", + ["vabs.f64_2"] = "eeb00bc0Gdm", + ["vneg.f32_2"] = "eeb10a40dm", + ["vneg.f64_2"] = "eeb10b40Gdm", + ["vsqrt.f32_2"] = "eeb10ac0dm", + ["vsqrt.f64_2"] = "eeb10bc0Gdm", + ["vcmp.f32_2"] = "eeb40a40dm", + ["vcmp.f64_2"] = "eeb40b40Gdm", + ["vcmpe.f32_2"] = "eeb40ac0dm", + ["vcmpe.f64_2"] = "eeb40bc0Gdm", + ["vcmpz.f32_1"] = "eeb50a40d", + ["vcmpz.f64_1"] = "eeb50b40Gd", + ["vcmpze.f32_1"] = "eeb50ac0d", + ["vcmpze.f64_1"] = "eeb50bc0Gd", + + vldr_2 = "ed100a00dl|ed100b00Gdl", + vstr_2 = "ed000a00dl|ed000b00Gdl", + vldm_2 = "ec900a00or", + vldmia_2 = "ec900a00or", + vldmdb_2 = "ed100a00or", + vpop_1 = "ecbd0a00r", + vstm_2 = "ec800a00or", + vstmia_2 = "ec800a00or", + vstmdb_2 = "ed000a00or", + vpush_1 = "ed2d0a00r", + + ["vmov.f32_2"] = "eeb00a40dm|eeb00a00dY", -- #imm is VFPv3 only + ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY", -- #imm is VFPv3 only + vmov_2 = "ee100a10Dn|ee000a10nD", + vmov_3 = "ec500a10DNm|ec400a10mDN|ec500b10GDNm|ec400b10GmDN", + + vmrs_0 = "eef1fa10", + vmrs_1 = "eef10a10D", + vmsr_1 = "eee10a10D", + + ["vcvt.s32.f32_2"] = "eebd0ac0dm", + ["vcvt.s32.f64_2"] = "eebd0bc0dGm", + ["vcvt.u32.f32_2"] = "eebc0ac0dm", + ["vcvt.u32.f64_2"] = "eebc0bc0dGm", + ["vcvtr.s32.f32_2"] = "eebd0a40dm", + ["vcvtr.s32.f64_2"] = "eebd0b40dGm", + ["vcvtr.u32.f32_2"] = "eebc0a40dm", + ["vcvtr.u32.f64_2"] = "eebc0b40dGm", + ["vcvt.f32.s32_2"] = "eeb80ac0dm", + ["vcvt.f64.s32_2"] = "eeb80bc0GdFm", + ["vcvt.f32.u32_2"] = "eeb80a40dm", + ["vcvt.f64.u32_2"] = "eeb80b40GdFm", + ["vcvt.f32.f64_2"] = "eeb70bc0dGm", + ["vcvt.f64.f32_2"] = "eeb70ac0GdFm", + + -- VFPv4 only: + ["vfma.f32_3"] = "eea00a00dnm", + ["vfma.f64_3"] = "eea00b00Gdnm", + ["vfms.f32_3"] = "eea00a40dnm", + ["vfms.f64_3"] = "eea00b40Gdnm", + ["vfnma.f32_3"] = "ee900a40dnm", + ["vfnma.f64_3"] = "ee900b40Gdnm", + ["vfnms.f32_3"] = "ee900a00dnm", + ["vfnms.f64_3"] = "ee900b00Gdnm", + + -- NYI: Advanced SIMD instructions. + + -- NYI: I have no need for these instructions right now: + -- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh + -- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe + -- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb + -- stc, ldc, mcr, mcr2, mrc, mrc2, mcrr, mcrr2, mrrc, mrrc2, cdp, cdp2 +} + +-- Add mnemonics for "s" variants. +do + local t = {} + for k,v in pairs(map_op) do + if sub(v, -1) == "s" then + local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2) + t[sub(k, 1, -3).."s"..sub(k, -2)] = v2 + end + end + for k,v in pairs(t) do + map_op[k] = v + end +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r(1?[0-9])$") + if r then + r = tonumber(r) + if r <= 15 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_gpr_pm(expr) + local pm, expr2 = match(expr, "^([+-]?)(.*)$") + return parse_gpr(expr2), (pm == "-") +end + +local function parse_vr(expr, tp) + local t, r = match(expr, "^([sd])([0-9]+)$") + if t == tp then + r = tonumber(r) + if r <= 31 then + if t == "s" then return shr(r, 1), band(r, 1) end + return band(r, 15), shr(r, 4) + end + end + werror("bad register name `"..expr.."'") +end + +local function parse_reglist(reglist) + reglist = match(reglist, "^{%s*([^}]*)}$") + if not reglist then werror("register list expected") end + local rr = 0 + for p in gmatch(reglist..",", "%s*([^,]*),") do + local rbit = shl(1, parse_gpr(gsub(p, "%s+$", ""))) + if band(rr, rbit) ~= 0 then + werror("duplicate register `"..p.."'") + end + rr = rr + rbit + end + return rr +end + +local function parse_vrlist(reglist) + local ta, ra, tb, rb = match(reglist, + "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$") + ra, rb = tonumber(ra), tonumber(rb) + if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then + local nr = rb+1 - ra + if ta == "s" then + return shl(shr(ra,1),12)+shl(band(ra,1),22) + nr + else + return shl(band(ra,15),12)+shl(shr(ra,4),22) + nr*2 + 0x100 + end + end + werror("register list expected") +end + +local function parse_imm(imm, bits, shift, scale, signed) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_imm12(imm) + local n = tonumber(imm) + if n then + local m = band(n) + for i=0,-15,-1 do + if shr(m, 8) == 0 then return m + shl(band(i, 15), 8) end + m = ror(m, 2) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM12", 0, imm) + return 0 + end +end + +local function parse_imm16(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + if shr(n, 16) == 0 then return band(n, 0x0fff) + shl(band(n, 0xf000), 4) end + werror("out of range immediate `"..imm.."'") + else + waction("IMM16", 32*16, imm) + return 0 + end +end + +local function parse_imm_load(imm, ext) + local n = tonumber(imm) + if n then + if ext then + if n >= -255 and n <= 255 then + local up = 0x00800000 + if n < 0 then n = -n; up = 0 end + return shl(band(n, 0xf0), 4) + band(n, 0x0f) + up + end + else + if n >= -4095 and n <= 4095 then + if n >= 0 then return n+0x00800000 end + return -n + end + end + werror("out of range immediate `"..imm.."'") + else + waction(ext and "IMML8" or "IMML12", 32768 + shl(ext and 8 or 12, 5), imm) + return 0 + end +end + +local function parse_shift(shift, gprok) + if shift == "rrx" then + return 3 * 32 + else + local s, s2 = match(shift, "^(%S+)%s*(.*)$") + s = map_shift[s] + if not s then werror("expected shift operand") end + if sub(s2, 1, 1) == "#" then + return parse_imm(s2, 5, 7, 0, false) + shl(s, 5) + else + if not gprok then werror("expected immediate shift operand") end + return shl(parse_gpr(s2), 8) + shl(s, 5) + 16 + end + end +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +local function parse_load(params, nparams, n, op) + local oplo = band(op, 255) + local ext, ldrd = (oplo ~= 0), (oplo == 208) + local d + if (ldrd or oplo == 240) then + d = band(shr(op, 12), 15) + if band(d, 1) ~= 0 then werror("odd destination register") end + end + local pn = params[n] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + local p2 = params[n+1] + if not p1 then + if not p2 then + if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then + local mode, n, s = parse_label(pn, false) + waction("REL_"..mode, n + (ext and 0x1800 or 0x0800), s, 1) + return op + 15 * 65536 + 0x01000000 + (ext and 0x00400000 or 0) + end + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local d, tp = parse_gpr(reg) + if tp then + waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), + format(tp.ctypefmt, tailr)) + return op + shl(d, 16) + 0x01000000 + (ext and 0x00400000 or 0) + end + end + end + werror("expected address operand") + end + if wb == "!" then op = op + 0x00200000 end + if p2 then + if wb == "!" then werror("bad use of '!'") end + local p3 = params[n+2] + op = op + shl(parse_gpr(p1), 16) + local imm = match(p2, "^#(.*)$") + if imm then + local m = parse_imm_load(imm, ext) + if p3 then werror("too many parameters") end + op = op + m + (ext and 0x00400000 or 0) + else + local m, neg = parse_gpr_pm(p2) + if ldrd and (m == d or m-1 == d) then werror("register conflict") end + op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) + if p3 then op = op + parse_shift(p3) end + end + else + local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$") + op = op + shl(parse_gpr(p1a), 16) + 0x01000000 + if p2 ~= "" then + local imm = match(p2, "^,%s*#(.*)$") + if imm then + local m = parse_imm_load(imm, ext) + op = op + m + (ext and 0x00400000 or 0) + else + local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$") + local m, neg = parse_gpr_pm(p2a) + if ldrd and (m == d or m-1 == d) then werror("register conflict") end + op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) + if p3 ~= "" then + if ext then werror("too many parameters") end + op = op + parse_shift(p3) + end + end + else + if wb == "!" then werror("bad use of '!'") end + op = op + (ext and 0x00c00000 or 0x00800000) + end + end + return op +end + +local function parse_vload(q) + local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$") + if reg then + local d = shl(parse_gpr(reg), 16) + if imm == "" then return d end + imm = match(imm, "^,%s*#(.*)$") + if imm then + local n = tonumber(imm) + if n then + if n >= -1020 and n <= 1020 and n%4 == 0 then + return d + (n >= 0 and n/4+0x00800000 or -n/4) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMMV8", 32768 + 32*8, imm) + return d + end + end + else + if match(q, "^[<>=%-]") or match(q, "^extern%s+") then + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n + 0x2800, s, 1) + return 15 * 65536 + end + local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local d, tp = parse_gpr(reg) + if tp then + waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr)) + return shl(d, 16) + end + end + end + werror("expected address operand") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + local vr = "s" + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + local q = params[n] + if p == "D" then + op = op + shl(parse_gpr(q), 12); n = n + 1 + elseif p == "N" then + op = op + shl(parse_gpr(q), 16); n = n + 1 + elseif p == "S" then + op = op + shl(parse_gpr(q), 8); n = n + 1 + elseif p == "M" then + op = op + parse_gpr(q); n = n + 1 + elseif p == "d" then + local r,h = parse_vr(q, vr); op = op+shl(r,12)+shl(h,22); n = n + 1 + elseif p == "n" then + local r,h = parse_vr(q, vr); op = op+shl(r,16)+shl(h,7); n = n + 1 + elseif p == "m" then + local r,h = parse_vr(q, vr); op = op+r+shl(h,5); n = n + 1 + elseif p == "P" then + local imm = match(q, "^#(.*)$") + if imm then + op = op + parse_imm12(imm) + 0x02000000 + else + op = op + parse_gpr(q) + end + n = n + 1 + elseif p == "p" then + op = op + parse_shift(q, true); n = n + 1 + elseif p == "L" then + op = parse_load(params, nparams, n, op) + elseif p == "l" then + op = op + parse_vload(q) + elseif p == "B" then + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n, s, 1) + elseif p == "C" then -- blx gpr vs. blx label. + if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then + op = op + parse_gpr(q) + else + if op < 0xe0000000 then werror("unconditional instruction") end + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n, s, 1) + op = 0xfa000000 + end + elseif p == "F" then + vr = "s" + elseif p == "G" then + vr = "d" + elseif p == "o" then + local r, wb = match(q, "^([^!]*)(!?)$") + op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0) + n = n + 1 + elseif p == "R" then + op = op + parse_reglist(q); n = n + 1 + elseif p == "r" then + op = op + parse_vrlist(q); n = n + 1 + elseif p == "W" then + op = op + parse_imm16(q); n = n + 1 + elseif p == "v" then + op = op + parse_imm(q, 5, 7, 0, false); n = n + 1 + elseif p == "w" then + local imm = match(q, "^#(.*)$") + if imm then + op = op + parse_imm(q, 5, 7, 0, false); n = n + 1 + else + op = op + shl(parse_gpr(q), 8) + 16 + end + elseif p == "X" then + op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 + elseif p == "Y" then + local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 + if not imm or shr(imm, 8) ~= 0 then + werror("bad immediate operand") + end + op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f) + elseif p == "K" then + local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 + if not imm or shr(imm, 16) ~= 0 then + werror("bad immediate operand") + end + op = op + shl(band(imm, 0xfff0), 4) + band(imm, 0x000f) + elseif p == "T" then + op = op + parse_imm(q, 24, 0, 0, false); n = n + 1 + elseif p == "s" then + -- Ignored. + else + assert(false) + end + end + wputpos(pos, op) +end + +map_op[".template__"] = function(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions. + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + local lpos, apos, spos = #actlist, #actargs, secpos + + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams, pos) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + end + error(err, 0) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = function(t, k) + local v = map_coreop[k] + if v then return v end + local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$") + local cv = map_cond[cc] + if cv then + local v = rawget(t, k1..k2) + if type(v) == "string" then + local scv = format("%x", cv) + return gsub(scv..sub(v, 2), "|e", "|"..scv) + end + end + end }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dasm_arm64.h b/ext/opcache/jit/ir/dynasm/dasm_arm64.h new file mode 100644 index 00000000000..9a8a39a2586 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_arm64.h @@ -0,0 +1,570 @@ +/* +** DynASM ARM64 encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "arm64" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A, + DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML, + DASM_IMMV, DASM_VREG, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_RANGE_VREG 0x16000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imm12(unsigned int n) +{ + if ((n >> 12) == 0) + return n; + else if ((n & 0xff000fff) == 0) + return (n >> 12) | 0x1000; + else + return -1; +} + +static int dasm_ffs(unsigned long long x) +{ + int n = -1; + while (x) { x >>= 1; n++; } + return n; +} + +static int dasm_imm13(int lo, int hi) +{ + int inv = 0, w = 64, s = 0xfff, xa, xb; + unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo; + unsigned long long m = 1ULL, a, b, c; + if (n & 1) { n = ~n; inv = 1; } + a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b); + xa = dasm_ffs(a); xb = dasm_ffs(b); + if (c) { + w = dasm_ffs(c) - xa; + if (w == 32) m = 0x0000000100000001UL; + else if (w == 16) m = 0x0001000100010001UL; + else if (w == 8) m = 0x0101010101010101UL; + else if (w == 4) m = 0x1111111111111111UL; + else if (w == 2) m = 0x5555555555555555UL; + else return -1; + s = (-2*w & 0x3f) - 1; + } else if (!a) { + return -1; + } else if (xb == -1) { + xb = 64; + } + if ((b-a) * m != n) return -1; + if (inv) { + return ((w - xb) << 6) | (s+w+xa-xb); + } else { + return ((w - xa) << 6) | (s+xb-xa); + } + return -1; +} + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + if ((ins & 0x8000)) ofs += 8; + break; + case DASM_REL_A: + b[pos++] = n; + b[pos++] = va_arg(ap, int); + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if ((ins & 0x8000)) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMM6: + CK((n >> 6) == 0, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM12: + CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM13W: + CK(dasm_imm13(n, n) != -1, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM13X: { + int m = va_arg(ap, int); + CK(dasm_imm13(n, m) != -1, RANGE_I); + b[pos++] = n; + b[pos++] = m; + break; + } + case DASM_IMML: { +#ifdef DASM_CHECKS + int scale = (ins & 3); + CK((!(n & ((1<>scale) < 4096) || + (unsigned int)(n+256) < 512, RANGE_I); +#endif + b[pos++] = n; + break; + } + case DASM_IMMV: + ofs += 4; + b[pos++] = n; + break; + case DASM_VREG: + CK(n < 32, RANGE_VREG); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W: + case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break; + case DASM_IMM13X: case DASM_REL_A: pos += 2; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_ADD_VENEER +#define CK_REL(x, o) \ + do { if (!(x) && !(n = DASM_ADD_VENEER(D, buffer, ins, b, cp, o))) \ + return DASM_S_RANGE_REL|(p-D->actionlist-1); \ + } while (0) +#else +#define CK_REL(x, o) CK(x, RANGE_REL) +#endif + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xd503201f; + break; + case DASM_REL_LG: + if (n < 0) { + ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4; + n = (int)na; + CK_REL((ptrdiff_t)n == na, na); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; + patchrel: + if (!(ins & 0xf800)) { /* B, BL */ + CK_REL((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, n); + cp[-1] |= ((n >> 2) & 0x03ffffff); + } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ + CK_REL((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, n); + cp[-1] |= ((n << 3) & 0x00ffffe0); + } else if ((ins & 0x3000) == 0x2000) { /* ADR */ + CK_REL(((n+0x00100000) >> 21) == 0, n); + cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29); + } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ + cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29); + } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ + CK_REL((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, n); + cp[-1] |= ((n << 3) & 0x0007ffe0); + } else if ((ins & 0x8000)) { /* absolute */ + cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n); + cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32); + cp += 2; + } + break; + case DASM_REL_A: { + ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n); + if ((ins & 0x3000) == 0x3000) { /* ADRP */ + ins &= ~0x1000; + na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12); + } else { + na = na - (ptrdiff_t)cp + 4; + } + n = (int)na; + CK_REL((ptrdiff_t)n == na, na); + goto patchrel; + } + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMM6: + cp[-1] |= ((n&31) << 19) | ((n&32) << 26); + break; + case DASM_IMM12: + cp[-1] |= (dasm_imm12((unsigned int)n) << 10); + break; + case DASM_IMM13W: + cp[-1] |= (dasm_imm13(n, n) << 10); + break; + case DASM_IMM13X: + cp[-1] |= (dasm_imm13(n, *b++) << 10); + break; + case DASM_IMML: { + int scale = (ins & 3); + cp[-1] |= (!(n & ((1<>scale) < 4096) ? + ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12); + break; + } + case DASM_IMMV: + *cp++ = n; + break; + case DASM_VREG: + cp[-1] |= (n & 0x1f) << (ins & 0x1f); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_arm64.lua b/ext/opcache/jit/ir/dynasm/dasm_arm64.lua new file mode 100644 index 00000000000..7e9c4cbf22e --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_arm64.lua @@ -0,0 +1,1219 @@ +------------------------------------------------------------------------------ +-- DynASM ARM64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "arm", + description = "DynASM ARM64 module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local format, byte, char = _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "REL_A", + "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV", + "VREG", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0x000fffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + if n <= 0x000fffff then + insert(actlist, pos+1, n) + n = map_action.ESC * 0x10000 + end + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. + +-- Ext. register name -> int. name. +local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", } + +-- Int. register name -> ext. name. +local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_shift = { lsl = 0, lsr = 1, asr = 2, } + +local map_extend = { + uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3, + sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7, +} + +local map_cond = { + eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, + hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, + hs = 2, lo = 3, +} + +------------------------------------------------------------------------------ + +local parse_reg_type + +local function parse_reg(expr, shift, no_vreg) + if not expr then werror("expected register name") end + local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$") + if not tname then + tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$") + end + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$") + if r then + r = tonumber(r) + if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then + if not parse_reg_type then + parse_reg_type = rt + elseif parse_reg_type ~= rt then + werror("register size mismatch") + end + return shl(r, shift), tp + end + end + local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$") + if vreg then + if not parse_reg_type then + parse_reg_type = vrt + elseif parse_reg_type ~= vrt then + werror("register size mismatch") + end + if not no_vreg then waction("VREG", shift, vreg) end + return 0 + end + werror("bad register name `"..expr.."'") +end + +local function parse_reg_base(expr) + if expr == "sp" then return 0x3e0 end + local base, tp = parse_reg(expr, 5) + if parse_reg_type ~= "x" then werror("bad register type") end + parse_reg_type = false + return base, tp +end + +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok and type(y) == "number" then return y end + end + return nil +end + +local function parse_imm(imm, bits, shift, scale, signed) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_imm12(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + if shr(n, 12) == 0 then + return shl(n, 10) + elseif band(n, 0xff000fff) == 0 then + return shr(n, 2) + 0x00400000 + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM12", 0, imm) + return 0 + end +end + +local function parse_imm13(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + local r64 = parse_reg_type == "x" + if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then + local inv = false + if band(n, 1) == 1 then n = bit.bnot(n); inv = true end + local t = {} + for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end + local b = table.concat(t) + b = b..(r64 and (inv and "1" or "0"):rep(32) or b) + local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)") + if p0 then + local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a + if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then + local s = band(-2*w, 0x3f) - 1 + if w == 64 then s = s + 0x1000 end + if inv then + return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10) + else + return shl(w-#p0, 16) + shl(s+#p1, 10) + end + end + end + werror("out of range immediate `"..imm.."'") + elseif r64 then + waction("IMM13X", 0, format("(unsigned int)(%s)", imm)) + actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm) + return 0 + else + waction("IMM13W", 0, imm) + return 0 + end +end + +local function parse_imm6(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + if n >= 0 and n <= 63 then + return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM6", 0, imm) + return 0 + end +end + +local function parse_imm_load(imm, scale) + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n and m >= 0 and m < 0x1000 then + return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset. + elseif n >= -256 and n < 256 then + return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset. + end + werror("out of range immediate `"..imm.."'") + else + waction("IMML", scale, imm) + return 0 + end +end + +local function parse_fpimm(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + local m, e = math.frexp(n) + local s, e2 = 0, band(e-2, 7) + if m < 0 then m = -m; s = 0x00100000 end + m = m*32-16 + if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then + return s + shl(e2, 17) + shl(m, 13) + end + werror("out of range immediate `"..imm.."'") + else + werror("NYI fpimm action") + end +end + +local function parse_shift(expr) + local s, s2 = match(expr, "^(%S+)%s*(.*)$") + s = map_shift[s] + if not s then werror("expected shift operand") end + return parse_imm(s2, 6, 10, 0, false) + shl(s, 22) +end + +local function parse_lslx16(expr) + local n = match(expr, "^lsl%s*#(%d+)$") + n = tonumber(n) + if not n then werror("expected shift operand") end + if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then + werror("bad shift amount") + end + return shl(n, 17) +end + +local function parse_extend(expr) + local s, s2 = match(expr, "^(%S+)%s*(.*)$") + if s == "lsl" then + s = parse_reg_type == "x" and 3 or 2 + else + s = map_extend[s] + end + if not s then werror("expected extend operand") end + return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13) +end + +local function parse_cond(expr, inv) + local c = map_cond[expr] + if not c then werror("expected condition operand") end + return shl(bit.bxor(c, inv), 12) +end + +local function parse_load(params, nparams, n, op) + if params[n+2] then werror("too many operands") end + local scale = shr(op, 30) + local pn, p2 = params[n], params[n+1] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + if not p1 then + if not p2 then + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local base, tp = parse_reg_base(reg) + if tp then + waction("IMML", scale, format(tp.ctypefmt, tailr)) + return op + base + end + end + end + werror("expected address operand") + end + if p2 then + if wb == "!" then werror("bad use of '!'") end + op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400 + elseif wb == "!" then + local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") + if not p1a then werror("bad use of '!'") end + op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00 + else + local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$") + op = op + parse_reg_base(p1a) + if p2a ~= "" then + local imm = match(p2a, "^,%s*#(.*)$") + if imm then + op = op + parse_imm_load(imm, scale) + else + local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$") + op = op + parse_reg(p2b, 16) + 0x00200800 + if parse_reg_type ~= "x" and parse_reg_type ~= "w" then + werror("bad index register type") + end + if p3b == "" then + if parse_reg_type ~= "x" then werror("bad index register type") end + op = op + 0x6000 + else + if p3s == "" or p3s == "#0" then + elseif p3s == "#"..scale then + op = op + 0x1000 + else + werror("bad scale") + end + if parse_reg_type == "x" then + if p3b == "lsl" and p3s ~= "" then op = op + 0x6000 + elseif p3b == "sxtx" then op = op + 0xe000 + else + werror("bad extend/shift specifier") + end + else + if p3b == "uxtw" then op = op + 0x4000 + elseif p3b == "sxtw" then op = op + 0xc000 + else + werror("bad extend/shift specifier") + end + end + end + end + else + if wb == "!" then werror("bad use of '!'") end + op = op + 0x01000000 + end + end + return op +end + +local function parse_load_pair(params, nparams, n, op) + if params[n+2] then werror("too many operands") end + local pn, p2 = params[n], params[n+1] + local scale = shr(op, 30) == 0 and 2 or 3 + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + if not p1 then + if not p2 then + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local base, tp = parse_reg_base(reg) + if tp then + waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr)) + return op + base + 0x01000000 + end + end + end + werror("expected address operand") + end + if p2 then + if wb == "!" then werror("bad use of '!'") end + op = op + 0x00800000 + else + local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") + if p1a then p1, p2 = p1a, p2a else p2 = "#0" end + op = op + (wb == "!" and 0x01800000 or 0x01000000) + end + return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true) +end + +local function parse_label(label, def) + local prefix = label:sub(1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, label:sub(3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[label:sub(3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + -- &expr (pointer) + if label:sub(1, 1) == "&" then + return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2)) + end + end +end + +local function branch_type(op) + if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL + elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or + band(op, 0x3b000000) == 0x18000000 then + return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal + elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ + elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR + elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP + else + assert(false, "unknown branch type") + end +end + +------------------------------------------------------------------------------ + +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + +local function alias_bfx(p) + p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1" +end + +local function alias_bfiz(p) + parse_reg(p[1], 0, true) + if parse_reg_type == "w" then + p[3] = "#(32-("..p[3]:sub(2).."))%32" + p[4] = "#("..p[4]:sub(2)..")-1" + else + p[3] = "#(64-("..p[3]:sub(2).."))%64" + p[4] = "#("..p[4]:sub(2)..")-1" + end +end + +local alias_lslimm = op_alias("ubfm_4", function(p) + parse_reg(p[1], 0, true) + local sh = p[3]:sub(2) + if parse_reg_type == "w" then + p[3] = "#(32-("..sh.."))%32" + p[4] = "#31-("..sh..")" + else + p[3] = "#(64-("..sh.."))%64" + p[4] = "#63-("..sh..")" + end +end) + +-- Template strings for ARM instructions. +map_op = { + -- Basic data processing instructions. + add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx", + add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX", + adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx", + adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX", + cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx", + cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX", + + sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx", + sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX", + subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx", + subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX", + cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx", + cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX", + + neg_2 = "4b0003e0DMg", + neg_3 = "4b0003e0DMSg", + negs_2 = "6b0003e0DMg", + negs_3 = "6b0003e0DMSg", + + adc_3 = "1a000000DNMg", + adcs_3 = "3a000000DNMg", + sbc_3 = "5a000000DNMg", + sbcs_3 = "7a000000DNMg", + ngc_2 = "5a0003e0DMg", + ngcs_2 = "7a0003e0DMg", + + and_3 = "0a000000DNMg|12000000pDNig", + and_4 = "0a000000DNMSg", + orr_3 = "2a000000DNMg|32000000pDNig", + orr_4 = "2a000000DNMSg", + eor_3 = "4a000000DNMg|52000000pDNig", + eor_4 = "4a000000DNMSg", + ands_3 = "6a000000DNMg|72000000DNig", + ands_4 = "6a000000DNMSg", + tst_2 = "6a00001fNMg|7200001fNig", + tst_3 = "6a00001fNMSg", + + bic_3 = "0a200000DNMg", + bic_4 = "0a200000DNMSg", + orn_3 = "2a200000DNMg", + orn_4 = "2a200000DNMSg", + eon_3 = "4a200000DNMg", + eon_4 = "4a200000DNMSg", + bics_3 = "6a200000DNMg", + bics_4 = "6a200000DNMSg", + + movn_2 = "12800000DWg", + movn_3 = "12800000DWRg", + movz_2 = "52800000DWg", + movz_3 = "52800000DWRg", + movk_2 = "72800000DWg", + movk_3 = "72800000DWRg", + + -- TODO: this doesn't cover all valid immediates for mov reg, #imm. + mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg", + mov_3 = "2a0003e0DMSg", + mvn_2 = "2a2003e0DMg", + mvn_3 = "2a2003e0DMSg", + + adr_2 = "10000000DBx", + adrp_2 = "90000000DBx", + + csel_4 = "1a800000DNMCg", + csinc_4 = "1a800400DNMCg", + csinv_4 = "5a800000DNMCg", + csneg_4 = "5a800400DNMCg", + cset_2 = "1a9f07e0Dcg", + csetm_2 = "5a9f03e0Dcg", + cinc_3 = "1a800400DNmcg", + cinv_3 = "5a800000DNmcg", + cneg_3 = "5a800400DNmcg", + + ccmn_4 = "3a400000NMVCg|3a400800N5VCg", + ccmp_4 = "7a400000NMVCg|7a400800N5VCg", + + madd_4 = "1b000000DNMAg", + msub_4 = "1b008000DNMAg", + mul_3 = "1b007c00DNMg", + mneg_3 = "1b00fc00DNMg", + + smaddl_4 = "9b200000DxNMwAx", + smsubl_4 = "9b208000DxNMwAx", + smull_3 = "9b207c00DxNMw", + smnegl_3 = "9b20fc00DxNMw", + smulh_3 = "9b407c00DNMx", + umaddl_4 = "9ba00000DxNMwAx", + umsubl_4 = "9ba08000DxNMwAx", + umull_3 = "9ba07c00DxNMw", + umnegl_3 = "9ba0fc00DxNMw", + umulh_3 = "9bc07c00DNMx", + + udiv_3 = "1ac00800DNMg", + sdiv_3 = "1ac00c00DNMg", + + -- Bit operations. + sbfm_4 = "13000000DN12w|93400000DN12x", + bfm_4 = "33000000DN12w|b3400000DN12x", + ubfm_4 = "53000000DN12w|d3400000DN12x", + extr_4 = "13800000DNM2w|93c00000DNM2x", + + sxtb_2 = "13001c00DNw|93401c00DNx", + sxth_2 = "13003c00DNw|93403c00DNx", + sxtw_2 = "93407c00DxNw", + uxtb_2 = "53001c00DNw", + uxth_2 = "53003c00DNw", + + sbfx_4 = op_alias("sbfm_4", alias_bfx), + bfxil_4 = op_alias("bfm_4", alias_bfx), + ubfx_4 = op_alias("ubfm_4", alias_bfx), + sbfiz_4 = op_alias("sbfm_4", alias_bfiz), + bfi_4 = op_alias("bfm_4", alias_bfiz), + ubfiz_4 = op_alias("ubfm_4", alias_bfiz), + + lsl_3 = function(params, nparams) + if params and params[3]:byte() == 35 then + return alias_lslimm(params, nparams) + else + return op_template(params, "1ac02000DNMg", nparams) + end + end, + lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x", + asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x", + ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x", + + clz_2 = "5ac01000DNg", + cls_2 = "5ac01400DNg", + rbit_2 = "5ac00000DNg", + rev_2 = "5ac00800DNw|dac00c00DNx", + rev16_2 = "5ac00400DNg", + rev32_2 = "dac00800DNx", + + -- Loads and stores. + ["strb_*"] = "38000000DwL", + ["ldrb_*"] = "38400000DwL", + ["ldrsb_*"] = "38c00000DwL|38800000DxL", + ["strh_*"] = "78000000DwL", + ["ldrh_*"] = "78400000DwL", + ["ldrsh_*"] = "78c00000DwL|78800000DxL", + ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL", + ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL", + ["ldrsw_*"] = "98000000DxB|b8800000DxL", + -- NOTE: ldur etc. are handled by ldr et al. + + ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", + ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", + ["ldpsw_*"] = "68400000DAxP", + + -- Branches. + b_1 = "14000000B", + bl_1 = "94000000B", + blr_1 = "d63f0000Nx", + br_1 = "d61f0000Nx", + ret_0 = "d65f03c0", + ret_1 = "d65f0000Nx", + -- b.cond is added below. + cbz_2 = "34000000DBg", + cbnz_2 = "35000000DBg", + tbz_3 = "36000000DTBw|36000000DTBx", + tbnz_3 = "37000000DTBw|37000000DTBx", + + -- Miscellaneous instructions. + -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr + -- TODO: sys, sysl, ic, dc, at, tlbi + -- TODO: hint, yield, wfe, wfi, sev, sevl + -- TODO: clrex, dsb, dmb, isb + nop_0 = "d503201f", + brk_0 = "d4200000", + brk_1 = "d4200000W", + + -- Floating point instructions. + fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf", + fabs_2 = "1e20c000DNf", + fneg_2 = "1e214000DNf", + fsqrt_2 = "1e21c000DNf", + + fcvt_2 = "1e22c000DdNs|1e624000DsNd", + + -- TODO: half-precision and fixed-point conversions. + fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd", + fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd", + fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd", + fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd", + fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd", + fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd", + fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd", + fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd", + fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd", + fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd", + + scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx", + ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx", + + frintn_2 = "1e244000DNf", + frintp_2 = "1e24c000DNf", + frintm_2 = "1e254000DNf", + frintz_2 = "1e25c000DNf", + frinta_2 = "1e264000DNf", + frintx_2 = "1e274000DNf", + frinti_2 = "1e27c000DNf", + + fadd_3 = "1e202800DNMf", + fsub_3 = "1e203800DNMf", + fmul_3 = "1e200800DNMf", + fnmul_3 = "1e208800DNMf", + fdiv_3 = "1e201800DNMf", + + fmadd_4 = "1f000000DNMAf", + fmsub_4 = "1f008000DNMAf", + fnmadd_4 = "1f200000DNMAf", + fnmsub_4 = "1f208000DNMAf", + + fmax_3 = "1e204800DNMf", + fmaxnm_3 = "1e206800DNMf", + fmin_3 = "1e205800DNMf", + fminnm_3 = "1e207800DNMf", + + fcmp_2 = "1e202000NMf|1e202008NZf", + fcmpe_2 = "1e202010NMf|1e202018NZf", + + fccmp_4 = "1e200400NMVCf", + fccmpe_4 = "1e200410NMVCf", + + fcsel_4 = "1e200c00DNMCf", + + -- TODO: crc32*, aes*, sha*, pmull + -- TODO: SIMD instructions. +} + +for cond,c in pairs(map_cond) do + map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B" +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + local op = tonumber(template:sub(1, 8), 16) + local n = 1 + local rtt = {} + + parse_reg_type = false + + -- Process each character. + for p in gmatch(template:sub(9), ".") do + local q = params[n] + if p == "D" then + op = op + parse_reg(q, 0); n = n + 1 + elseif p == "N" then + op = op + parse_reg(q, 5); n = n + 1 + elseif p == "M" then + op = op + parse_reg(q, 16); n = n + 1 + elseif p == "A" then + op = op + parse_reg(q, 10); n = n + 1 + elseif p == "m" then + op = op + parse_reg(params[n-1], 16) + + elseif p == "p" then + if q == "sp" then params[n] = "@x31" end + elseif p == "g" then + if parse_reg_type == "x" then + op = op + 0x80000000 + elseif parse_reg_type ~= "w" then + werror("bad register type") + end + parse_reg_type = false + elseif p == "f" then + if parse_reg_type == "d" then + op = op + 0x00400000 + elseif parse_reg_type ~= "s" then + werror("bad register type") + end + parse_reg_type = false + elseif p == "x" or p == "w" or p == "d" or p == "s" then + if parse_reg_type ~= p then + werror("register size mismatch") + end + parse_reg_type = false + + elseif p == "L" then + op = parse_load(params, nparams, n, op) + elseif p == "P" then + op = parse_load_pair(params, nparams, n, op) + + elseif p == "B" then + local mode, v, s = parse_label(q, false); n = n + 1 + if not mode then werror("bad label `"..q.."'") end + local m = branch_type(op) + if mode == "A" then + waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s)) + actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s) + else + waction("REL_"..mode, v+m, s, 1) + end + + elseif p == "I" then + op = op + parse_imm12(q); n = n + 1 + elseif p == "i" then + op = op + parse_imm13(q); n = n + 1 + elseif p == "W" then + op = op + parse_imm(q, 16, 5, 0, false); n = n + 1 + elseif p == "T" then + op = op + parse_imm6(q); n = n + 1 + elseif p == "1" then + op = op + parse_imm(q, 6, 16, 0, false); n = n + 1 + elseif p == "2" then + op = op + parse_imm(q, 6, 10, 0, false); n = n + 1 + elseif p == "5" then + op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 + elseif p == "V" then + op = op + parse_imm(q, 4, 0, 0, false); n = n + 1 + elseif p == "F" then + op = op + parse_fpimm(q); n = n + 1 + elseif p == "Z" then + if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end + n = n + 1 + + elseif p == "S" then + op = op + parse_shift(q); n = n + 1 + elseif p == "X" then + op = op + parse_extend(q); n = n + 1 + elseif p == "R" then + op = op + parse_lslx16(q); n = n + 1 + elseif p == "C" then + op = op + parse_cond(q, 0); n = n + 1 + elseif p == "c" then + op = op + parse_cond(q, 1); n = n + 1 + + else + assert(false) + end + end + wputpos(pos, op) +end + +function op_template(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 4 positions. + if secpos+4 > maxsecpos then wflush() end + local pos = wpos() + local lpos, apos, spos = #actlist, #actargs, secpos + + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams, pos) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actlist[lpos+4] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + actargs[apos+4] = nil + end + error(err, 0) +end + +map_op[".template__"] = op_template + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if not mode or mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +local function op_data(params) + if not params then return "imm..." end + local sz = params.op == ".long" and 4 or 8 + for _,p in ipairs(params) do + local imm = parse_number(p) + if imm then + local n = tobit(imm) + if n == imm or (n < 0 and n + 2^32 == imm) then + wputw(n < 0 and n + 2^32 or n) + if sz == 8 then + wputw(imm < 0 and 0xffffffff or 0) + end + elseif sz == 4 then + werror("bad immediate `"..p.."'") + else + imm = nil + end + end + if not imm then + local mode, v, s = parse_label(p, false) + if sz == 4 then + if mode then werror("label does not fit into .long") end + waction("IMMV", 0, p) + elseif mode and mode ~= "A" then + waction("REL_"..mode, v+0x8000, s, 1) + else + if mode == "A" then p = s end + waction("IMMV", 0, format("(unsigned int)(%s)", p)) + waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p)) + end + end + if secpos+2 > maxsecpos then wflush() end + end +end +map_op[".long_*"] = op_data +map_op[".quad_*"] = op_data +map_op[".addr_*"] = op_data + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dasm_mips.h b/ext/opcache/jit/ir/dynasm/dasm_mips.h new file mode 100644 index 00000000000..b99b56b0e9a --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_mips.h @@ -0,0 +1,424 @@ +/* +** DynASM MIPS encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "mips" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: case DASM_IMMS: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); +#endif + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMMS: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n); + if (ins & 2048) + n = (n + (int)(size_t)base) & 0x0fffffff; + else + n = n - (int)((char *)cp - base); + patchrel: { + unsigned int e = 16 + ((ins >> 12) & 15); + CK((n & 3) == 0 && + ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL); + cp[-1] |= ((n>>2) & ((1<= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMMS: + cp[-1] |= ((n>>3) & 4); n &= 0x1f; + /* fallthrough */ + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_mips.lua b/ext/opcache/jit/ir/dynasm/dasm_mips.lua new file mode 100644 index 00000000000..591470157c4 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_mips.lua @@ -0,0 +1,1181 @@ +------------------------------------------------------------------------------ +-- DynASM MIPS32/MIPS64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +local mips64 = mips64 +local mipsr6 = _map_def.MIPSR6 + +-- Module information: +local _info = { + arch = mips64 and "mips64" or "mips", + description = "DynASM MIPS32/MIPS64 module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMMS", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(0xff000000 + w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n >= 0xff000000 then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { sp="r29", ra="r31" } -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "r29" then return "sp" + elseif s == "r31" then return "ra" end + return s +end + +------------------------------------------------------------------------------ + +-- Template strings for MIPS instructions. +local map_op = { + -- First-level opcodes. + j_1 = "08000000J", + jal_1 = "0c000000J", + b_1 = "10000000B", + beqz_2 = "10000000SB", + beq_3 = "10000000STB", + bnez_2 = "14000000SB", + bne_3 = "14000000STB", + blez_2 = "18000000SB", + bgtz_2 = "1c000000SB", + li_2 = "24000000TI", + addiu_3 = "24000000TSI", + slti_3 = "28000000TSI", + sltiu_3 = "2c000000TSI", + andi_3 = "30000000TSU", + lu_2 = "34000000TU", + ori_3 = "34000000TSU", + xori_3 = "38000000TSU", + lui_2 = "3c000000TU", + daddiu_3 = mips64 and "64000000TSI", + ldl_2 = mips64 and "68000000TO", + ldr_2 = mips64 and "6c000000TO", + lb_2 = "80000000TO", + lh_2 = "84000000TO", + lw_2 = "8c000000TO", + lbu_2 = "90000000TO", + lhu_2 = "94000000TO", + lwu_2 = mips64 and "9c000000TO", + sb_2 = "a0000000TO", + sh_2 = "a4000000TO", + sw_2 = "ac000000TO", + lwc1_2 = "c4000000HO", + ldc1_2 = "d4000000HO", + ld_2 = mips64 and "dc000000TO", + swc1_2 = "e4000000HO", + sdc1_2 = "f4000000HO", + sd_2 = mips64 and "fc000000TO", + + -- Opcode SPECIAL. + nop_0 = "00000000", + sll_3 = "00000000DTA", + sextw_2 = "00000000DT", + srl_3 = "00000002DTA", + rotr_3 = "00200002DTA", + sra_3 = "00000003DTA", + sllv_3 = "00000004DTS", + srlv_3 = "00000006DTS", + rotrv_3 = "00000046DTS", + drotrv_3 = mips64 and "00000056DTS", + srav_3 = "00000007DTS", + jalr_1 = "0000f809S", + jalr_2 = "00000009DS", + syscall_0 = "0000000c", + syscall_1 = "0000000cY", + break_0 = "0000000d", + break_1 = "0000000dY", + sync_0 = "0000000f", + dsllv_3 = mips64 and "00000014DTS", + dsrlv_3 = mips64 and "00000016DTS", + dsrav_3 = mips64 and "00000017DTS", + add_3 = "00000020DST", + move_2 = mips64 and "00000025DS" or "00000021DS", + addu_3 = "00000021DST", + sub_3 = "00000022DST", + negu_2 = mips64 and "0000002fDT" or "00000023DT", + subu_3 = "00000023DST", + and_3 = "00000024DST", + or_3 = "00000025DST", + xor_3 = "00000026DST", + not_2 = "00000027DS", + nor_3 = "00000027DST", + slt_3 = "0000002aDST", + sltu_3 = "0000002bDST", + dadd_3 = mips64 and "0000002cDST", + daddu_3 = mips64 and "0000002dDST", + dsub_3 = mips64 and "0000002eDST", + dsubu_3 = mips64 and "0000002fDST", + tge_2 = "00000030ST", + tge_3 = "00000030STZ", + tgeu_2 = "00000031ST", + tgeu_3 = "00000031STZ", + tlt_2 = "00000032ST", + tlt_3 = "00000032STZ", + tltu_2 = "00000033ST", + tltu_3 = "00000033STZ", + teq_2 = "00000034ST", + teq_3 = "00000034STZ", + tne_2 = "00000036ST", + tne_3 = "00000036STZ", + dsll_3 = mips64 and "00000038DTa", + dsrl_3 = mips64 and "0000003aDTa", + drotr_3 = mips64 and "0020003aDTa", + dsra_3 = mips64 and "0000003bDTa", + dsll32_3 = mips64 and "0000003cDTA", + dsrl32_3 = mips64 and "0000003eDTA", + drotr32_3 = mips64 and "0020003eDTA", + dsra32_3 = mips64 and "0000003fDTA", + + -- Opcode REGIMM. + bltz_2 = "04000000SB", + bgez_2 = "04010000SB", + bltzl_2 = "04020000SB", + bgezl_2 = "04030000SB", + bal_1 = "04110000B", + synci_1 = "041f0000O", + + -- Opcode SPECIAL3. + ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 + dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 + dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1 + dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1 + zextw_2 = mips64 and "7c00f803TS", + ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 + dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33 + dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33 + dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1 + wsbh_2 = "7c0000a0DT", + dsbh_2 = mips64 and "7c0000a4DT", + dshd_2 = mips64 and "7c000164DT", + seb_2 = "7c000420DT", + seh_2 = "7c000620DT", + rdhwr_2 = "7c00003bTD", + + -- Opcode COP0. + mfc0_2 = "40000000TD", + mfc0_3 = "40000000TDW", + dmfc0_2 = mips64 and "40200000TD", + dmfc0_3 = mips64 and "40200000TDW", + mtc0_2 = "40800000TD", + mtc0_3 = "40800000TDW", + dmtc0_2 = mips64 and "40a00000TD", + dmtc0_3 = mips64 and "40a00000TDW", + rdpgpr_2 = "41400000DT", + di_0 = "41606000", + di_1 = "41606000T", + ei_0 = "41606020", + ei_1 = "41606020T", + wrpgpr_2 = "41c00000DT", + tlbr_0 = "42000001", + tlbwi_0 = "42000002", + tlbwr_0 = "42000006", + tlbp_0 = "42000008", + eret_0 = "42000018", + deret_0 = "4200001f", + wait_0 = "42000020", + + -- Opcode COP1. + mfc1_2 = "44000000TG", + dmfc1_2 = mips64 and "44200000TG", + cfc1_2 = "44400000TG", + mfhc1_2 = "44600000TG", + mtc1_2 = "44800000TG", + dmtc1_2 = mips64 and "44a00000TG", + ctc1_2 = "44c00000TG", + mthc1_2 = "44e00000TG", + + ["add.s_3"] = "46000000FGH", + ["sub.s_3"] = "46000001FGH", + ["mul.s_3"] = "46000002FGH", + ["div.s_3"] = "46000003FGH", + ["sqrt.s_2"] = "46000004FG", + ["abs.s_2"] = "46000005FG", + ["mov.s_2"] = "46000006FG", + ["neg.s_2"] = "46000007FG", + ["round.l.s_2"] = "46000008FG", + ["trunc.l.s_2"] = "46000009FG", + ["ceil.l.s_2"] = "4600000aFG", + ["floor.l.s_2"] = "4600000bFG", + ["round.w.s_2"] = "4600000cFG", + ["trunc.w.s_2"] = "4600000dFG", + ["ceil.w.s_2"] = "4600000eFG", + ["floor.w.s_2"] = "4600000fFG", + ["recip.s_2"] = "46000015FG", + ["rsqrt.s_2"] = "46000016FG", + ["cvt.d.s_2"] = "46000021FG", + ["cvt.w.s_2"] = "46000024FG", + ["cvt.l.s_2"] = "46000025FG", + ["add.d_3"] = "46200000FGH", + ["sub.d_3"] = "46200001FGH", + ["mul.d_3"] = "46200002FGH", + ["div.d_3"] = "46200003FGH", + ["sqrt.d_2"] = "46200004FG", + ["abs.d_2"] = "46200005FG", + ["mov.d_2"] = "46200006FG", + ["neg.d_2"] = "46200007FG", + ["round.l.d_2"] = "46200008FG", + ["trunc.l.d_2"] = "46200009FG", + ["ceil.l.d_2"] = "4620000aFG", + ["floor.l.d_2"] = "4620000bFG", + ["round.w.d_2"] = "4620000cFG", + ["trunc.w.d_2"] = "4620000dFG", + ["ceil.w.d_2"] = "4620000eFG", + ["floor.w.d_2"] = "4620000fFG", + ["recip.d_2"] = "46200015FG", + ["rsqrt.d_2"] = "46200016FG", + ["cvt.s.d_2"] = "46200020FG", + ["cvt.w.d_2"] = "46200024FG", + ["cvt.l.d_2"] = "46200025FG", + ["cvt.s.w_2"] = "46800020FG", + ["cvt.d.w_2"] = "46800021FG", + ["cvt.s.l_2"] = "46a00020FG", + ["cvt.d.l_2"] = "46a00021FG", +} + +if mipsr6 then -- Instructions added with MIPSR6. + + for k,v in pairs({ + + -- Add immediate to upper bits. + aui_3 = "3c000000TSI", + daui_3 = mips64 and "74000000TSI", + dahi_2 = mips64 and "04060000SI", + dati_2 = mips64 and "041e0000SI", + + -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc. + + -- Compact branches. + blezalc_2 = "18000000TB", -- rt != 0. + bgezalc_2 = "18000000T=SB", -- rt != 0. + bgtzalc_2 = "1c000000TB", -- rt != 0. + bltzalc_2 = "1c000000T=SB", -- rt != 0. + + blezc_2 = "58000000TB", -- rt != 0. + bgezc_2 = "58000000T=SB", -- rt != 0. + bgec_3 = "58000000STB", -- rs != rt. + blec_3 = "58000000TSB", -- rt != rs. + + bgtzc_2 = "5c000000TB", -- rt != 0. + bltzc_2 = "5c000000T=SB", -- rt != 0. + bltc_3 = "5c000000STB", -- rs != rt. + bgtc_3 = "5c000000TSB", -- rt != rs. + + bgeuc_3 = "18000000STB", -- rs != rt. + bleuc_3 = "18000000TSB", -- rt != rs. + bltuc_3 = "1c000000STB", -- rs != rt. + bgtuc_3 = "1c000000TSB", -- rt != rs. + + beqzalc_2 = "20000000TB", -- rt != 0. + bnezalc_2 = "60000000TB", -- rt != 0. + beqc_3 = "20000000STB", -- rs < rt. + bnec_3 = "60000000STB", -- rs < rt. + bovc_3 = "20000000STB", -- rs >= rt. + bnvc_3 = "60000000STB", -- rs >= rt. + + beqzc_2 = "d8000000SK", -- rs != 0. + bnezc_2 = "f8000000SK", -- rs != 0. + jic_2 = "d8000000TI", + jialc_2 = "f8000000TI", + bc_1 = "c8000000L", + balc_1 = "e8000000L", + + -- Opcode SPECIAL. + jr_1 = "00000009S", + sdbbp_0 = "0000000e", + sdbbp_1 = "0000000eY", + lsa_4 = "00000005DSTA", + dlsa_4 = mips64 and "00000015DSTA", + seleqz_3 = "00000035DST", + selnez_3 = "00000037DST", + clz_2 = "00000050DS", + clo_2 = "00000051DS", + dclz_2 = mips64 and "00000052DS", + dclo_2 = mips64 and "00000053DS", + mul_3 = "00000098DST", + muh_3 = "000000d8DST", + mulu_3 = "00000099DST", + muhu_3 = "000000d9DST", + div_3 = "0000009aDST", + mod_3 = "000000daDST", + divu_3 = "0000009bDST", + modu_3 = "000000dbDST", + dmul_3 = mips64 and "0000009cDST", + dmuh_3 = mips64 and "000000dcDST", + dmulu_3 = mips64 and "0000009dDST", + dmuhu_3 = mips64 and "000000ddDST", + ddiv_3 = mips64 and "0000009eDST", + dmod_3 = mips64 and "000000deDST", + ddivu_3 = mips64 and "0000009fDST", + dmodu_3 = mips64 and "000000dfDST", + + -- Opcode SPECIAL3. + align_4 = "7c000220DSTA", + dalign_4 = mips64 and "7c000224DSTA", + bitswap_2 = "7c000020DT", + dbitswap_2 = mips64 and "7c000024DT", + + -- Opcode COP1. + bc1eqz_2 = "45200000HB", + bc1nez_2 = "45a00000HB", + + ["sel.s_3"] = "46000010FGH", + ["seleqz.s_3"] = "46000014FGH", + ["selnez.s_3"] = "46000017FGH", + ["maddf.s_3"] = "46000018FGH", + ["msubf.s_3"] = "46000019FGH", + ["rint.s_2"] = "4600001aFG", + ["class.s_2"] = "4600001bFG", + ["min.s_3"] = "4600001cFGH", + ["mina.s_3"] = "4600001dFGH", + ["max.s_3"] = "4600001eFGH", + ["maxa.s_3"] = "4600001fFGH", + ["cmp.af.s_3"] = "46800000FGH", + ["cmp.un.s_3"] = "46800001FGH", + ["cmp.or.s_3"] = "46800011FGH", + ["cmp.eq.s_3"] = "46800002FGH", + ["cmp.une.s_3"] = "46800012FGH", + ["cmp.ueq.s_3"] = "46800003FGH", + ["cmp.ne.s_3"] = "46800013FGH", + ["cmp.lt.s_3"] = "46800004FGH", + ["cmp.ult.s_3"] = "46800005FGH", + ["cmp.le.s_3"] = "46800006FGH", + ["cmp.ule.s_3"] = "46800007FGH", + ["cmp.saf.s_3"] = "46800008FGH", + ["cmp.sun.s_3"] = "46800009FGH", + ["cmp.sor.s_3"] = "46800019FGH", + ["cmp.seq.s_3"] = "4680000aFGH", + ["cmp.sune.s_3"] = "4680001aFGH", + ["cmp.sueq.s_3"] = "4680000bFGH", + ["cmp.sne.s_3"] = "4680001bFGH", + ["cmp.slt.s_3"] = "4680000cFGH", + ["cmp.sult.s_3"] = "4680000dFGH", + ["cmp.sle.s_3"] = "4680000eFGH", + ["cmp.sule.s_3"] = "4680000fFGH", + + ["sel.d_3"] = "46200010FGH", + ["seleqz.d_3"] = "46200014FGH", + ["selnez.d_3"] = "46200017FGH", + ["maddf.d_3"] = "46200018FGH", + ["msubf.d_3"] = "46200019FGH", + ["rint.d_2"] = "4620001aFG", + ["class.d_2"] = "4620001bFG", + ["min.d_3"] = "4620001cFGH", + ["mina.d_3"] = "4620001dFGH", + ["max.d_3"] = "4620001eFGH", + ["maxa.d_3"] = "4620001fFGH", + ["cmp.af.d_3"] = "46a00000FGH", + ["cmp.un.d_3"] = "46a00001FGH", + ["cmp.or.d_3"] = "46a00011FGH", + ["cmp.eq.d_3"] = "46a00002FGH", + ["cmp.une.d_3"] = "46a00012FGH", + ["cmp.ueq.d_3"] = "46a00003FGH", + ["cmp.ne.d_3"] = "46a00013FGH", + ["cmp.lt.d_3"] = "46a00004FGH", + ["cmp.ult.d_3"] = "46a00005FGH", + ["cmp.le.d_3"] = "46a00006FGH", + ["cmp.ule.d_3"] = "46a00007FGH", + ["cmp.saf.d_3"] = "46a00008FGH", + ["cmp.sun.d_3"] = "46a00009FGH", + ["cmp.sor.d_3"] = "46a00019FGH", + ["cmp.seq.d_3"] = "46a0000aFGH", + ["cmp.sune.d_3"] = "46a0001aFGH", + ["cmp.sueq.d_3"] = "46a0000bFGH", + ["cmp.sne.d_3"] = "46a0001bFGH", + ["cmp.slt.d_3"] = "46a0000cFGH", + ["cmp.sult.d_3"] = "46a0000dFGH", + ["cmp.sle.d_3"] = "46a0000eFGH", + ["cmp.sule.d_3"] = "46a0000fFGH", + + }) do map_op[k] = v end + +else -- Instructions removed by MIPSR6. + + for k,v in pairs({ + -- Traps, don't use. + addi_3 = "20000000TSI", + daddi_3 = mips64 and "60000000TSI", + + -- Branch on likely, don't use. + beqzl_2 = "50000000SB", + beql_3 = "50000000STB", + bnezl_2 = "54000000SB", + bnel_3 = "54000000STB", + blezl_2 = "58000000SB", + bgtzl_2 = "5c000000SB", + + lwl_2 = "88000000TO", + lwr_2 = "98000000TO", + swl_2 = "a8000000TO", + sdl_2 = mips64 and "b0000000TO", + sdr_2 = mips64 and "b1000000TO", + swr_2 = "b8000000TO", + cache_2 = "bc000000NO", + ll_2 = "c0000000TO", + pref_2 = "cc000000NO", + sc_2 = "e0000000TO", + scd_2 = mips64 and "f0000000TO", + + -- Opcode SPECIAL. + movf_2 = "00000001DS", + movf_3 = "00000001DSC", + movt_2 = "00010001DS", + movt_3 = "00010001DSC", + jr_1 = "00000008S", + movz_3 = "0000000aDST", + movn_3 = "0000000bDST", + mfhi_1 = "00000010D", + mthi_1 = "00000011S", + mflo_1 = "00000012D", + mtlo_1 = "00000013S", + mult_2 = "00000018ST", + multu_2 = "00000019ST", + div_3 = "0000001aST", + divu_3 = "0000001bST", + ddiv_3 = mips64 and "0000001eST", + ddivu_3 = mips64 and "0000001fST", + dmult_2 = mips64 and "0000001cST", + dmultu_2 = mips64 and "0000001dST", + + -- Opcode REGIMM. + tgei_2 = "04080000SI", + tgeiu_2 = "04090000SI", + tlti_2 = "040a0000SI", + tltiu_2 = "040b0000SI", + teqi_2 = "040c0000SI", + tnei_2 = "040e0000SI", + bltzal_2 = "04100000SB", + bgezal_2 = "04110000SB", + bltzall_2 = "04120000SB", + bgezall_2 = "04130000SB", + + -- Opcode SPECIAL2. + madd_2 = "70000000ST", + maddu_2 = "70000001ST", + mul_3 = "70000002DST", + msub_2 = "70000004ST", + msubu_2 = "70000005ST", + clz_2 = "70000020D=TS", + clo_2 = "70000021D=TS", + dclz_2 = mips64 and "70000024D=TS", + dclo_2 = mips64 and "70000025D=TS", + sdbbp_0 = "7000003f", + sdbbp_1 = "7000003fY", + + -- Opcode COP1. + bc1f_1 = "45000000B", + bc1f_2 = "45000000CB", + bc1t_1 = "45010000B", + bc1t_2 = "45010000CB", + bc1fl_1 = "45020000B", + bc1fl_2 = "45020000CB", + bc1tl_1 = "45030000B", + bc1tl_2 = "45030000CB", + + ["movf.s_2"] = "46000011FG", + ["movf.s_3"] = "46000011FGC", + ["movt.s_2"] = "46010011FG", + ["movt.s_3"] = "46010011FGC", + ["movz.s_3"] = "46000012FGT", + ["movn.s_3"] = "46000013FGT", + ["cvt.ps.s_3"] = "46000026FGH", + ["c.f.s_2"] = "46000030GH", + ["c.f.s_3"] = "46000030VGH", + ["c.un.s_2"] = "46000031GH", + ["c.un.s_3"] = "46000031VGH", + ["c.eq.s_2"] = "46000032GH", + ["c.eq.s_3"] = "46000032VGH", + ["c.ueq.s_2"] = "46000033GH", + ["c.ueq.s_3"] = "46000033VGH", + ["c.olt.s_2"] = "46000034GH", + ["c.olt.s_3"] = "46000034VGH", + ["c.ult.s_2"] = "46000035GH", + ["c.ult.s_3"] = "46000035VGH", + ["c.ole.s_2"] = "46000036GH", + ["c.ole.s_3"] = "46000036VGH", + ["c.ule.s_2"] = "46000037GH", + ["c.ule.s_3"] = "46000037VGH", + ["c.sf.s_2"] = "46000038GH", + ["c.sf.s_3"] = "46000038VGH", + ["c.ngle.s_2"] = "46000039GH", + ["c.ngle.s_3"] = "46000039VGH", + ["c.seq.s_2"] = "4600003aGH", + ["c.seq.s_3"] = "4600003aVGH", + ["c.ngl.s_2"] = "4600003bGH", + ["c.ngl.s_3"] = "4600003bVGH", + ["c.lt.s_2"] = "4600003cGH", + ["c.lt.s_3"] = "4600003cVGH", + ["c.nge.s_2"] = "4600003dGH", + ["c.nge.s_3"] = "4600003dVGH", + ["c.le.s_2"] = "4600003eGH", + ["c.le.s_3"] = "4600003eVGH", + ["c.ngt.s_2"] = "4600003fGH", + ["c.ngt.s_3"] = "4600003fVGH", + ["movf.d_2"] = "46200011FG", + ["movf.d_3"] = "46200011FGC", + ["movt.d_2"] = "46210011FG", + ["movt.d_3"] = "46210011FGC", + ["movz.d_3"] = "46200012FGT", + ["movn.d_3"] = "46200013FGT", + ["c.f.d_2"] = "46200030GH", + ["c.f.d_3"] = "46200030VGH", + ["c.un.d_2"] = "46200031GH", + ["c.un.d_3"] = "46200031VGH", + ["c.eq.d_2"] = "46200032GH", + ["c.eq.d_3"] = "46200032VGH", + ["c.ueq.d_2"] = "46200033GH", + ["c.ueq.d_3"] = "46200033VGH", + ["c.olt.d_2"] = "46200034GH", + ["c.olt.d_3"] = "46200034VGH", + ["c.ult.d_2"] = "46200035GH", + ["c.ult.d_3"] = "46200035VGH", + ["c.ole.d_2"] = "46200036GH", + ["c.ole.d_3"] = "46200036VGH", + ["c.ule.d_2"] = "46200037GH", + ["c.ule.d_3"] = "46200037VGH", + ["c.sf.d_2"] = "46200038GH", + ["c.sf.d_3"] = "46200038VGH", + ["c.ngle.d_2"] = "46200039GH", + ["c.ngle.d_3"] = "46200039VGH", + ["c.seq.d_2"] = "4620003aGH", + ["c.seq.d_3"] = "4620003aVGH", + ["c.ngl.d_2"] = "4620003bGH", + ["c.ngl.d_3"] = "4620003bVGH", + ["c.lt.d_2"] = "4620003cGH", + ["c.lt.d_3"] = "4620003cVGH", + ["c.nge.d_2"] = "4620003dGH", + ["c.nge.d_3"] = "4620003dVGH", + ["c.le.d_2"] = "4620003eGH", + ["c.le.d_3"] = "4620003eVGH", + ["c.ngt.d_2"] = "4620003fGH", + ["c.ngt.d_3"] = "4620003fVGH", + ["add.ps_3"] = "46c00000FGH", + ["sub.ps_3"] = "46c00001FGH", + ["mul.ps_3"] = "46c00002FGH", + ["abs.ps_2"] = "46c00005FG", + ["mov.ps_2"] = "46c00006FG", + ["neg.ps_2"] = "46c00007FG", + ["movf.ps_2"] = "46c00011FG", + ["movf.ps_3"] = "46c00011FGC", + ["movt.ps_2"] = "46c10011FG", + ["movt.ps_3"] = "46c10011FGC", + ["movz.ps_3"] = "46c00012FGT", + ["movn.ps_3"] = "46c00013FGT", + ["cvt.s.pu_2"] = "46c00020FG", + ["cvt.s.pl_2"] = "46c00028FG", + ["pll.ps_3"] = "46c0002cFGH", + ["plu.ps_3"] = "46c0002dFGH", + ["pul.ps_3"] = "46c0002eFGH", + ["puu.ps_3"] = "46c0002fFGH", + ["c.f.ps_2"] = "46c00030GH", + ["c.f.ps_3"] = "46c00030VGH", + ["c.un.ps_2"] = "46c00031GH", + ["c.un.ps_3"] = "46c00031VGH", + ["c.eq.ps_2"] = "46c00032GH", + ["c.eq.ps_3"] = "46c00032VGH", + ["c.ueq.ps_2"] = "46c00033GH", + ["c.ueq.ps_3"] = "46c00033VGH", + ["c.olt.ps_2"] = "46c00034GH", + ["c.olt.ps_3"] = "46c00034VGH", + ["c.ult.ps_2"] = "46c00035GH", + ["c.ult.ps_3"] = "46c00035VGH", + ["c.ole.ps_2"] = "46c00036GH", + ["c.ole.ps_3"] = "46c00036VGH", + ["c.ule.ps_2"] = "46c00037GH", + ["c.ule.ps_3"] = "46c00037VGH", + ["c.sf.ps_2"] = "46c00038GH", + ["c.sf.ps_3"] = "46c00038VGH", + ["c.ngle.ps_2"] = "46c00039GH", + ["c.ngle.ps_3"] = "46c00039VGH", + ["c.seq.ps_2"] = "46c0003aGH", + ["c.seq.ps_3"] = "46c0003aVGH", + ["c.ngl.ps_2"] = "46c0003bGH", + ["c.ngl.ps_3"] = "46c0003bVGH", + ["c.lt.ps_2"] = "46c0003cGH", + ["c.lt.ps_3"] = "46c0003cVGH", + ["c.nge.ps_2"] = "46c0003dGH", + ["c.nge.ps_3"] = "46c0003dVGH", + ["c.le.ps_2"] = "46c0003eGH", + ["c.le.ps_3"] = "46c0003eVGH", + ["c.ngt.ps_2"] = "46c0003fGH", + ["c.ngt.ps_3"] = "46c0003fVGH", + + -- Opcode COP1X. + lwxc1_2 = "4c000000FX", + ldxc1_2 = "4c000001FX", + luxc1_2 = "4c000005FX", + swxc1_2 = "4c000008FX", + sdxc1_2 = "4c000009FX", + suxc1_2 = "4c00000dFX", + prefx_2 = "4c00000fMX", + ["alnv.ps_4"] = "4c00001eFGHS", + ["madd.s_4"] = "4c000020FRGH", + ["madd.d_4"] = "4c000021FRGH", + ["madd.ps_4"] = "4c000026FRGH", + ["msub.s_4"] = "4c000028FRGH", + ["msub.d_4"] = "4c000029FRGH", + ["msub.ps_4"] = "4c00002eFRGH", + ["nmadd.s_4"] = "4c000030FRGH", + ["nmadd.d_4"] = "4c000031FRGH", + ["nmadd.ps_4"] = "4c000036FRGH", + ["nmsub.s_4"] = "4c000038FRGH", + ["nmsub.d_4"] = "4c000039FRGH", + ["nmsub.ps_4"] = "4c00003eFRGH", + + }) do map_op[k] = v end + +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_imm(imm, bits, shift, scale, signed, action) + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[rf]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction(action or "IMM", + (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) + return 0 + end +end + +local function parse_disp(disp) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = shl(parse_gpr(reg), 21) + local extname = match(imm, "^extern%s+(%S+)$") + if extname then + waction("REL_EXT", map_extern[extname], nil, 1) + return r + else + return r + parse_imm(imm, 16, 0, 0, true) + end + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if tp then + waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) + return shl(r, 21) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_index(idx) + local rt, rs = match(idx, "^(.*)%(([%w_:]+)%)$") + if rt then + rt = parse_gpr(rt) + rs = parse_gpr(rs) + return shl(rt, 16) + shl(rs, 21) + end + werror("bad index `"..idx.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 2 positions (ins/ext). + if secpos+2 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "D" then + op = op + shl(parse_gpr(params[n]), 11); n = n + 1 + elseif p == "T" then + op = op + shl(parse_gpr(params[n]), 16); n = n + 1 + elseif p == "S" then + op = op + shl(parse_gpr(params[n]), 21); n = n + 1 + elseif p == "F" then + op = op + shl(parse_fpr(params[n]), 6); n = n + 1 + elseif p == "G" then + op = op + shl(parse_fpr(params[n]), 11); n = n + 1 + elseif p == "H" then + op = op + shl(parse_fpr(params[n]), 16); n = n + 1 + elseif p == "R" then + op = op + shl(parse_fpr(params[n]), 21); n = n + 1 + elseif p == "I" then + op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1 + elseif p == "U" then + op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1 + elseif p == "O" then + op = op + parse_disp(params[n]); n = n + 1 + elseif p == "X" then + op = op + parse_index(params[n]); n = n + 1 + elseif p == "B" or p == "J" or p == "K" or p == "L" then + local mode, m, s = parse_label(params[n], false) + if p == "J" then m = m + 0xa800 + elseif p == "K" then m = m + 0x5000 + elseif p == "L" then m = m + 0xa000 end + waction("REL_"..mode, m, s, 1) + n = n + 1 + elseif p == "A" then + op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 + elseif p == "a" then + local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1 + op = op + band(m, 0x7c0) + band(shr(m, 9), 4) + elseif p == "M" then + op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 + elseif p == "N" then + op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1 + elseif p == "C" then + op = op + parse_imm(params[n], 3, 18, 0, false); n = n + 1 + elseif p == "V" then + op = op + parse_imm(params[n], 3, 8, 0, false); n = n + 1 + elseif p == "W" then + op = op + parse_imm(params[n], 3, 0, 0, false); n = n + 1 + elseif p == "Y" then + op = op + parse_imm(params[n], 20, 6, 0, false); n = n + 1 + elseif p == "Z" then + op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 + elseif p == "=" then + n = n - 1 -- Re-use previous parameter for next template char. + else + assert(false) + end + end + wputpos(pos, op) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dasm_mips64.lua b/ext/opcache/jit/ir/dynasm/dasm_mips64.lua new file mode 100644 index 00000000000..8ab5d33a208 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_mips64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM MIPS64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +mips64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_mips") diff --git a/ext/opcache/jit/ir/dynasm/dasm_ppc.h b/ext/opcache/jit/ir/dynasm/dasm_ppc.h new file mode 100644 index 00000000000..35264f2eb93 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_ppc.h @@ -0,0 +1,423 @@ +/* +** DynASM PPC/PPC64 encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "ppc" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); +#endif + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMMSH: + CK((n >> 6) == 0, RANGE_I); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMMSH: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4; + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); + patchrel: + CK((n & 3) == 0 && + (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >> + ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL); + cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMMSH: + cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_ppc.lua b/ext/opcache/jit/ir/dynasm/dasm_ppc.lua new file mode 100644 index 00000000000..ee2afb2ecfa --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_ppc.lua @@ -0,0 +1,1919 @@ +------------------------------------------------------------------------------ +-- DynASM PPC/PPC64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +-- +-- Support for various extensions contributed by Caio Souza Oliveira. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "ppc", + description = "DynASM PPC module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMMSH" +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0xffffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { sp = "r1" } -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "r1" then return "sp" end + return s +end + +local map_cond = { + lt = 0, gt = 1, eq = 2, so = 3, + ge = 4, le = 5, ne = 6, ns = 7, +} + +------------------------------------------------------------------------------ + +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + +-- Template strings for PPC instructions. +map_op = { + tdi_3 = "08000000ARI", + twi_3 = "0c000000ARI", + mulli_3 = "1c000000RRI", + subfic_3 = "20000000RRI", + cmplwi_3 = "28000000XRU", + cmplwi_2 = "28000000-RU", + cmpldi_3 = "28200000XRU", + cmpldi_2 = "28200000-RU", + cmpwi_3 = "2c000000XRI", + cmpwi_2 = "2c000000-RI", + cmpdi_3 = "2c200000XRI", + cmpdi_2 = "2c200000-RI", + addic_3 = "30000000RRI", + ["addic._3"] = "34000000RRI", + addi_3 = "38000000RR0I", + li_2 = "38000000RI", + la_2 = "38000000RD", + addis_3 = "3c000000RR0I", + lis_2 = "3c000000RI", + lus_2 = "3c000000RU", + bc_3 = "40000000AAK", + bcl_3 = "40000001AAK", + bdnz_1 = "42000000K", + bdz_1 = "42400000K", + sc_0 = "44000000", + b_1 = "48000000J", + bl_1 = "48000001J", + rlwimi_5 = "50000000RR~AAA.", + rlwinm_5 = "54000000RR~AAA.", + rlwnm_5 = "5c000000RR~RAA.", + ori_3 = "60000000RR~U", + nop_0 = "60000000", + oris_3 = "64000000RR~U", + xori_3 = "68000000RR~U", + xoris_3 = "6c000000RR~U", + ["andi._3"] = "70000000RR~U", + ["andis._3"] = "74000000RR~U", + lwz_2 = "80000000RD", + lwzu_2 = "84000000RD", + lbz_2 = "88000000RD", + lbzu_2 = "8c000000RD", + stw_2 = "90000000RD", + stwu_2 = "94000000RD", + stb_2 = "98000000RD", + stbu_2 = "9c000000RD", + lhz_2 = "a0000000RD", + lhzu_2 = "a4000000RD", + lha_2 = "a8000000RD", + lhau_2 = "ac000000RD", + sth_2 = "b0000000RD", + sthu_2 = "b4000000RD", + lmw_2 = "b8000000RD", + stmw_2 = "bc000000RD", + lfs_2 = "c0000000FD", + lfsu_2 = "c4000000FD", + lfd_2 = "c8000000FD", + lfdu_2 = "cc000000FD", + stfs_2 = "d0000000FD", + stfsu_2 = "d4000000FD", + stfd_2 = "d8000000FD", + stfdu_2 = "dc000000FD", + ld_2 = "e8000000RD", -- NYI: displacement must be divisible by 4. + ldu_2 = "e8000001RD", + lwa_2 = "e8000002RD", + std_2 = "f8000000RD", + stdu_2 = "f8000001RD", + + subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end), + subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end), + subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end), + ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end), + + rotlwi_3 = op_alias("rlwinm_5", function(p) + p[4] = "0"; p[5] = "31" + end), + rotrwi_3 = op_alias("rlwinm_5", function(p) + p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31" + end), + rotlw_3 = op_alias("rlwnm_5", function(p) + p[4] = "0"; p[5] = "31" + end), + slwi_3 = op_alias("rlwinm_5", function(p) + p[5] = "31-("..p[3]..")"; p[4] = "0" + end), + srwi_3 = op_alias("rlwinm_5", function(p) + p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31" + end), + clrlwi_3 = op_alias("rlwinm_5", function(p) + p[4] = p[3]; p[3] = "0"; p[5] = "31" + end), + clrrwi_3 = op_alias("rlwinm_5", function(p) + p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0" + end), + + -- Primary opcode 4: + mulhhwu_3 = "10000010RRR.", + machhwu_3 = "10000018RRR.", + mulhhw_3 = "10000050RRR.", + nmachhw_3 = "1000005cRRR.", + machhwsu_3 = "10000098RRR.", + machhws_3 = "100000d8RRR.", + nmachhws_3 = "100000dcRRR.", + mulchwu_3 = "10000110RRR.", + macchwu_3 = "10000118RRR.", + mulchw_3 = "10000150RRR.", + macchw_3 = "10000158RRR.", + nmacchw_3 = "1000015cRRR.", + macchwsu_3 = "10000198RRR.", + macchws_3 = "100001d8RRR.", + nmacchws_3 = "100001dcRRR.", + mullhw_3 = "10000350RRR.", + maclhw_3 = "10000358RRR.", + nmaclhw_3 = "1000035cRRR.", + maclhwsu_3 = "10000398RRR.", + maclhws_3 = "100003d8RRR.", + nmaclhws_3 = "100003dcRRR.", + machhwuo_3 = "10000418RRR.", + nmachhwo_3 = "1000045cRRR.", + machhwsuo_3 = "10000498RRR.", + machhwso_3 = "100004d8RRR.", + nmachhwso_3 = "100004dcRRR.", + macchwuo_3 = "10000518RRR.", + macchwo_3 = "10000558RRR.", + nmacchwo_3 = "1000055cRRR.", + macchwsuo_3 = "10000598RRR.", + macchwso_3 = "100005d8RRR.", + nmacchwso_3 = "100005dcRRR.", + maclhwo_3 = "10000758RRR.", + nmaclhwo_3 = "1000075cRRR.", + maclhwsuo_3 = "10000798RRR.", + maclhwso_3 = "100007d8RRR.", + nmaclhwso_3 = "100007dcRRR.", + + vaddubm_3 = "10000000VVV", + vmaxub_3 = "10000002VVV", + vrlb_3 = "10000004VVV", + vcmpequb_3 = "10000006VVV", + vmuloub_3 = "10000008VVV", + vaddfp_3 = "1000000aVVV", + vmrghb_3 = "1000000cVVV", + vpkuhum_3 = "1000000eVVV", + vmhaddshs_4 = "10000020VVVV", + vmhraddshs_4 = "10000021VVVV", + vmladduhm_4 = "10000022VVVV", + vmsumubm_4 = "10000024VVVV", + vmsummbm_4 = "10000025VVVV", + vmsumuhm_4 = "10000026VVVV", + vmsumuhs_4 = "10000027VVVV", + vmsumshm_4 = "10000028VVVV", + vmsumshs_4 = "10000029VVVV", + vsel_4 = "1000002aVVVV", + vperm_4 = "1000002bVVVV", + vsldoi_4 = "1000002cVVVP", + vpermxor_4 = "1000002dVVVV", + vmaddfp_4 = "1000002eVVVV~", + vnmsubfp_4 = "1000002fVVVV~", + vaddeuqm_4 = "1000003cVVVV", + vaddecuq_4 = "1000003dVVVV", + vsubeuqm_4 = "1000003eVVVV", + vsubecuq_4 = "1000003fVVVV", + vadduhm_3 = "10000040VVV", + vmaxuh_3 = "10000042VVV", + vrlh_3 = "10000044VVV", + vcmpequh_3 = "10000046VVV", + vmulouh_3 = "10000048VVV", + vsubfp_3 = "1000004aVVV", + vmrghh_3 = "1000004cVVV", + vpkuwum_3 = "1000004eVVV", + vadduwm_3 = "10000080VVV", + vmaxuw_3 = "10000082VVV", + vrlw_3 = "10000084VVV", + vcmpequw_3 = "10000086VVV", + vmulouw_3 = "10000088VVV", + vmuluwm_3 = "10000089VVV", + vmrghw_3 = "1000008cVVV", + vpkuhus_3 = "1000008eVVV", + vaddudm_3 = "100000c0VVV", + vmaxud_3 = "100000c2VVV", + vrld_3 = "100000c4VVV", + vcmpeqfp_3 = "100000c6VVV", + vcmpequd_3 = "100000c7VVV", + vpkuwus_3 = "100000ceVVV", + vadduqm_3 = "10000100VVV", + vmaxsb_3 = "10000102VVV", + vslb_3 = "10000104VVV", + vmulosb_3 = "10000108VVV", + vrefp_2 = "1000010aV-V", + vmrglb_3 = "1000010cVVV", + vpkshus_3 = "1000010eVVV", + vaddcuq_3 = "10000140VVV", + vmaxsh_3 = "10000142VVV", + vslh_3 = "10000144VVV", + vmulosh_3 = "10000148VVV", + vrsqrtefp_2 = "1000014aV-V", + vmrglh_3 = "1000014cVVV", + vpkswus_3 = "1000014eVVV", + vaddcuw_3 = "10000180VVV", + vmaxsw_3 = "10000182VVV", + vslw_3 = "10000184VVV", + vmulosw_3 = "10000188VVV", + vexptefp_2 = "1000018aV-V", + vmrglw_3 = "1000018cVVV", + vpkshss_3 = "1000018eVVV", + vmaxsd_3 = "100001c2VVV", + vsl_3 = "100001c4VVV", + vcmpgefp_3 = "100001c6VVV", + vlogefp_2 = "100001caV-V", + vpkswss_3 = "100001ceVVV", + vadduhs_3 = "10000240VVV", + vminuh_3 = "10000242VVV", + vsrh_3 = "10000244VVV", + vcmpgtuh_3 = "10000246VVV", + vmuleuh_3 = "10000248VVV", + vrfiz_2 = "1000024aV-V", + vsplth_3 = "1000024cVV3", + vupkhsh_2 = "1000024eV-V", + vminuw_3 = "10000282VVV", + vminud_3 = "100002c2VVV", + vcmpgtud_3 = "100002c7VVV", + vrfim_2 = "100002caV-V", + vcmpgtsb_3 = "10000306VVV", + vcfux_3 = "1000030aVVA~", + vaddshs_3 = "10000340VVV", + vminsh_3 = "10000342VVV", + vsrah_3 = "10000344VVV", + vcmpgtsh_3 = "10000346VVV", + vmulesh_3 = "10000348VVV", + vcfsx_3 = "1000034aVVA~", + vspltish_2 = "1000034cVS", + vupkhpx_2 = "1000034eV-V", + vaddsws_3 = "10000380VVV", + vminsw_3 = "10000382VVV", + vsraw_3 = "10000384VVV", + vcmpgtsw_3 = "10000386VVV", + vmulesw_3 = "10000388VVV", + vctuxs_3 = "1000038aVVA~", + vspltisw_2 = "1000038cVS", + vminsd_3 = "100003c2VVV", + vsrad_3 = "100003c4VVV", + vcmpbfp_3 = "100003c6VVV", + vcmpgtsd_3 = "100003c7VVV", + vctsxs_3 = "100003caVVA~", + vupklpx_2 = "100003ceV-V", + vsububm_3 = "10000400VVV", + ["bcdadd._4"] = "10000401VVVy.", + vavgub_3 = "10000402VVV", + vand_3 = "10000404VVV", + ["vcmpequb._3"] = "10000406VVV", + vmaxfp_3 = "1000040aVVV", + vsubuhm_3 = "10000440VVV", + ["bcdsub._4"] = "10000441VVVy.", + vavguh_3 = "10000442VVV", + vandc_3 = "10000444VVV", + ["vcmpequh._3"] = "10000446VVV", + vminfp_3 = "1000044aVVV", + vpkudum_3 = "1000044eVVV", + vsubuwm_3 = "10000480VVV", + vavguw_3 = "10000482VVV", + vor_3 = "10000484VVV", + ["vcmpequw._3"] = "10000486VVV", + vpmsumw_3 = "10000488VVV", + ["vcmpeqfp._3"] = "100004c6VVV", + ["vcmpequd._3"] = "100004c7VVV", + vpkudus_3 = "100004ceVVV", + vavgsb_3 = "10000502VVV", + vavgsh_3 = "10000542VVV", + vorc_3 = "10000544VVV", + vbpermq_3 = "1000054cVVV", + vpksdus_3 = "1000054eVVV", + vavgsw_3 = "10000582VVV", + vsld_3 = "100005c4VVV", + ["vcmpgefp._3"] = "100005c6VVV", + vpksdss_3 = "100005ceVVV", + vsububs_3 = "10000600VVV", + mfvscr_1 = "10000604V--", + vsum4ubs_3 = "10000608VVV", + vsubuhs_3 = "10000640VVV", + mtvscr_1 = "10000644--V", + ["vcmpgtuh._3"] = "10000646VVV", + vsum4shs_3 = "10000648VVV", + vupkhsw_2 = "1000064eV-V", + vsubuws_3 = "10000680VVV", + vshasigmaw_4 = "10000682VVYp", + veqv_3 = "10000684VVV", + vsum2sws_3 = "10000688VVV", + vmrgow_3 = "1000068cVVV", + vshasigmad_4 = "100006c2VVYp", + vsrd_3 = "100006c4VVV", + ["vcmpgtud._3"] = "100006c7VVV", + vupklsw_2 = "100006ceV-V", + vupkslw_2 = "100006ceV-V", + vsubsbs_3 = "10000700VVV", + vclzb_2 = "10000702V-V", + vpopcntb_2 = "10000703V-V", + ["vcmpgtsb._3"] = "10000706VVV", + vsum4sbs_3 = "10000708VVV", + vsubshs_3 = "10000740VVV", + vclzh_2 = "10000742V-V", + vpopcnth_2 = "10000743V-V", + ["vcmpgtsh._3"] = "10000746VVV", + vsubsws_3 = "10000780VVV", + vclzw_2 = "10000782V-V", + vpopcntw_2 = "10000783V-V", + ["vcmpgtsw._3"] = "10000786VVV", + vsumsws_3 = "10000788VVV", + vmrgew_3 = "1000078cVVV", + vclzd_2 = "100007c2V-V", + vpopcntd_2 = "100007c3V-V", + ["vcmpbfp._3"] = "100007c6VVV", + ["vcmpgtsd._3"] = "100007c7VVV", + + -- Primary opcode 19: + mcrf_2 = "4c000000XX", + isync_0 = "4c00012c", + crnor_3 = "4c000042CCC", + crnot_2 = "4c000042CC=", + crandc_3 = "4c000102CCC", + crxor_3 = "4c000182CCC", + crclr_1 = "4c000182C==", + crnand_3 = "4c0001c2CCC", + crand_3 = "4c000202CCC", + creqv_3 = "4c000242CCC", + crset_1 = "4c000242C==", + crorc_3 = "4c000342CCC", + cror_3 = "4c000382CCC", + crmove_2 = "4c000382CC=", + bclr_2 = "4c000020AA", + bclrl_2 = "4c000021AA", + bcctr_2 = "4c000420AA", + bcctrl_2 = "4c000421AA", + bctar_2 = "4c000460AA", + bctarl_2 = "4c000461AA", + blr_0 = "4e800020", + blrl_0 = "4e800021", + bctr_0 = "4e800420", + bctrl_0 = "4e800421", + + -- Primary opcode 31: + cmpw_3 = "7c000000XRR", + cmpw_2 = "7c000000-RR", + cmpd_3 = "7c200000XRR", + cmpd_2 = "7c200000-RR", + tw_3 = "7c000008ARR", + lvsl_3 = "7c00000cVRR", + subfc_3 = "7c000010RRR.", + subc_3 = "7c000010RRR~.", + mulhdu_3 = "7c000012RRR.", + addc_3 = "7c000014RRR.", + mulhwu_3 = "7c000016RRR.", + isel_4 = "7c00001eRRRC", + isellt_3 = "7c00001eRRR", + iselgt_3 = "7c00005eRRR", + iseleq_3 = "7c00009eRRR", + mfcr_1 = "7c000026R", + mfocrf_2 = "7c100026RG", + mtcrf_2 = "7c000120GR", + mtocrf_2 = "7c100120GR", + lwarx_3 = "7c000028RR0R", + ldx_3 = "7c00002aRR0R", + lwzx_3 = "7c00002eRR0R", + slw_3 = "7c000030RR~R.", + cntlzw_2 = "7c000034RR~", + sld_3 = "7c000036RR~R.", + and_3 = "7c000038RR~R.", + cmplw_3 = "7c000040XRR", + cmplw_2 = "7c000040-RR", + cmpld_3 = "7c200040XRR", + cmpld_2 = "7c200040-RR", + lvsr_3 = "7c00004cVRR", + subf_3 = "7c000050RRR.", + sub_3 = "7c000050RRR~.", + lbarx_3 = "7c000068RR0R", + ldux_3 = "7c00006aRR0R", + dcbst_2 = "7c00006c-RR", + lwzux_3 = "7c00006eRR0R", + cntlzd_2 = "7c000074RR~", + andc_3 = "7c000078RR~R.", + td_3 = "7c000088ARR", + lvewx_3 = "7c00008eVRR", + mulhd_3 = "7c000092RRR.", + addg6s_3 = "7c000094RRR", + mulhw_3 = "7c000096RRR.", + dlmzb_3 = "7c00009cRR~R.", + ldarx_3 = "7c0000a8RR0R", + dcbf_2 = "7c0000ac-RR", + lbzx_3 = "7c0000aeRR0R", + lvx_3 = "7c0000ceVRR", + neg_2 = "7c0000d0RR.", + lharx_3 = "7c0000e8RR0R", + lbzux_3 = "7c0000eeRR0R", + popcntb_2 = "7c0000f4RR~", + not_2 = "7c0000f8RR~%.", + nor_3 = "7c0000f8RR~R.", + stvebx_3 = "7c00010eVRR", + subfe_3 = "7c000110RRR.", + sube_3 = "7c000110RRR~.", + adde_3 = "7c000114RRR.", + stdx_3 = "7c00012aRR0R", + ["stwcx._3"] = "7c00012dRR0R.", + stwx_3 = "7c00012eRR0R", + prtyw_2 = "7c000134RR~", + stvehx_3 = "7c00014eVRR", + stdux_3 = "7c00016aRR0R", + ["stqcx._3"] = "7c00016dR:R0R.", + stwux_3 = "7c00016eRR0R", + prtyd_2 = "7c000174RR~", + stvewx_3 = "7c00018eVRR", + subfze_2 = "7c000190RR.", + addze_2 = "7c000194RR.", + ["stdcx._3"] = "7c0001adRR0R.", + stbx_3 = "7c0001aeRR0R", + stvx_3 = "7c0001ceVRR", + subfme_2 = "7c0001d0RR.", + mulld_3 = "7c0001d2RRR.", + addme_2 = "7c0001d4RR.", + mullw_3 = "7c0001d6RRR.", + dcbtst_2 = "7c0001ec-RR", + stbux_3 = "7c0001eeRR0R", + bpermd_3 = "7c0001f8RR~R", + lvepxl_3 = "7c00020eVRR", + add_3 = "7c000214RRR.", + lqarx_3 = "7c000228R:R0R", + dcbt_2 = "7c00022c-RR", + lhzx_3 = "7c00022eRR0R", + cdtbcd_2 = "7c000234RR~", + eqv_3 = "7c000238RR~R.", + lvepx_3 = "7c00024eVRR", + eciwx_3 = "7c00026cRR0R", + lhzux_3 = "7c00026eRR0R", + cbcdtd_2 = "7c000274RR~", + xor_3 = "7c000278RR~R.", + mfspefscr_1 = "7c0082a6R", + mfxer_1 = "7c0102a6R", + mflr_1 = "7c0802a6R", + mfctr_1 = "7c0902a6R", + lwax_3 = "7c0002aaRR0R", + lhax_3 = "7c0002aeRR0R", + mftb_1 = "7c0c42e6R", + mftbu_1 = "7c0d42e6R", + lvxl_3 = "7c0002ceVRR", + lwaux_3 = "7c0002eaRR0R", + lhaux_3 = "7c0002eeRR0R", + popcntw_2 = "7c0002f4RR~", + divdeu_3 = "7c000312RRR.", + divweu_3 = "7c000316RRR.", + sthx_3 = "7c00032eRR0R", + orc_3 = "7c000338RR~R.", + ecowx_3 = "7c00036cRR0R", + sthux_3 = "7c00036eRR0R", + or_3 = "7c000378RR~R.", + mr_2 = "7c000378RR~%.", + divdu_3 = "7c000392RRR.", + divwu_3 = "7c000396RRR.", + mtspefscr_1 = "7c0083a6R", + mtxer_1 = "7c0103a6R", + mtlr_1 = "7c0803a6R", + mtctr_1 = "7c0903a6R", + dcbi_2 = "7c0003ac-RR", + nand_3 = "7c0003b8RR~R.", + dsn_2 = "7c0003c6-RR", + stvxl_3 = "7c0003ceVRR", + divd_3 = "7c0003d2RRR.", + divw_3 = "7c0003d6RRR.", + popcntd_2 = "7c0003f4RR~", + cmpb_3 = "7c0003f8RR~R.", + mcrxr_1 = "7c000400X", + lbdx_3 = "7c000406RRR", + subfco_3 = "7c000410RRR.", + subco_3 = "7c000410RRR~.", + addco_3 = "7c000414RRR.", + ldbrx_3 = "7c000428RR0R", + lswx_3 = "7c00042aRR0R", + lwbrx_3 = "7c00042cRR0R", + lfsx_3 = "7c00042eFR0R", + srw_3 = "7c000430RR~R.", + srd_3 = "7c000436RR~R.", + lhdx_3 = "7c000446RRR", + subfo_3 = "7c000450RRR.", + subo_3 = "7c000450RRR~.", + lfsux_3 = "7c00046eFR0R", + lwdx_3 = "7c000486RRR", + lswi_3 = "7c0004aaRR0A", + sync_0 = "7c0004ac", + lwsync_0 = "7c2004ac", + ptesync_0 = "7c4004ac", + lfdx_3 = "7c0004aeFR0R", + lddx_3 = "7c0004c6RRR", + nego_2 = "7c0004d0RR.", + lfdux_3 = "7c0004eeFR0R", + stbdx_3 = "7c000506RRR", + subfeo_3 = "7c000510RRR.", + subeo_3 = "7c000510RRR~.", + addeo_3 = "7c000514RRR.", + stdbrx_3 = "7c000528RR0R", + stswx_3 = "7c00052aRR0R", + stwbrx_3 = "7c00052cRR0R", + stfsx_3 = "7c00052eFR0R", + sthdx_3 = "7c000546RRR", + ["stbcx._3"] = "7c00056dRRR", + stfsux_3 = "7c00056eFR0R", + stwdx_3 = "7c000586RRR", + subfzeo_2 = "7c000590RR.", + addzeo_2 = "7c000594RR.", + stswi_3 = "7c0005aaRR0A", + ["sthcx._3"] = "7c0005adRRR", + stfdx_3 = "7c0005aeFR0R", + stddx_3 = "7c0005c6RRR", + subfmeo_2 = "7c0005d0RR.", + mulldo_3 = "7c0005d2RRR.", + addmeo_2 = "7c0005d4RR.", + mullwo_3 = "7c0005d6RRR.", + dcba_2 = "7c0005ec-RR", + stfdux_3 = "7c0005eeFR0R", + stvepxl_3 = "7c00060eVRR", + addo_3 = "7c000614RRR.", + lhbrx_3 = "7c00062cRR0R", + lfdpx_3 = "7c00062eF:RR", + sraw_3 = "7c000630RR~R.", + srad_3 = "7c000634RR~R.", + lfddx_3 = "7c000646FRR", + stvepx_3 = "7c00064eVRR", + srawi_3 = "7c000670RR~A.", + sradi_3 = "7c000674RR~H.", + eieio_0 = "7c0006ac", + lfiwax_3 = "7c0006aeFR0R", + divdeuo_3 = "7c000712RRR.", + divweuo_3 = "7c000716RRR.", + sthbrx_3 = "7c00072cRR0R", + stfdpx_3 = "7c00072eF:RR", + extsh_2 = "7c000734RR~.", + stfddx_3 = "7c000746FRR", + divdeo_3 = "7c000752RRR.", + divweo_3 = "7c000756RRR.", + extsb_2 = "7c000774RR~.", + divduo_3 = "7c000792RRR.", + divwou_3 = "7c000796RRR.", + icbi_2 = "7c0007ac-RR", + stfiwx_3 = "7c0007aeFR0R", + extsw_2 = "7c0007b4RR~.", + divdo_3 = "7c0007d2RRR.", + divwo_3 = "7c0007d6RRR.", + dcbz_2 = "7c0007ec-RR", + + ["tbegin._1"] = "7c00051d1", + ["tbegin._0"] = "7c00051d", + ["tend._1"] = "7c00055dY", + ["tend._0"] = "7c00055d", + ["tendall._0"] = "7e00055d", + tcheck_1 = "7c00059cX", + ["tsr._1"] = "7c0005dd1", + ["tsuspend._0"] = "7c0005dd", + ["tresume._0"] = "7c2005dd", + ["tabortwc._3"] = "7c00061dARR", + ["tabortdc._3"] = "7c00065dARR", + ["tabortwci._3"] = "7c00069dARS", + ["tabortdci._3"] = "7c0006ddARS", + ["tabort._1"] = "7c00071d-R-", + ["treclaim._1"] = "7c00075d-R", + ["trechkpt._0"] = "7c0007dd", + + lxsiwzx_3 = "7c000018QRR", + lxsiwax_3 = "7c000098QRR", + mfvsrd_2 = "7c000066-Rq", + mfvsrwz_2 = "7c0000e6-Rq", + stxsiwx_3 = "7c000118QRR", + mtvsrd_2 = "7c000166QR", + mtvsrwa_2 = "7c0001a6QR", + lxvdsx_3 = "7c000298QRR", + lxsspx_3 = "7c000418QRR", + lxsdx_3 = "7c000498QRR", + stxsspx_3 = "7c000518QRR", + stxsdx_3 = "7c000598QRR", + lxvw4x_3 = "7c000618QRR", + lxvd2x_3 = "7c000698QRR", + stxvw4x_3 = "7c000718QRR", + stxvd2x_3 = "7c000798QRR", + + -- Primary opcode 30: + rldicl_4 = "78000000RR~HM.", + rldicr_4 = "78000004RR~HM.", + rldic_4 = "78000008RR~HM.", + rldimi_4 = "7800000cRR~HM.", + rldcl_4 = "78000010RR~RM.", + rldcr_4 = "78000012RR~RM.", + + rotldi_3 = op_alias("rldicl_4", function(p) + p[4] = "0" + end), + rotrdi_3 = op_alias("rldicl_4", function(p) + p[3] = "64-("..p[3]..")"; p[4] = "0" + end), + rotld_3 = op_alias("rldcl_4", function(p) + p[4] = "0" + end), + sldi_3 = op_alias("rldicr_4", function(p) + p[4] = "63-("..p[3]..")" + end), + srdi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "64-("..p[3]..")" + end), + clrldi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "0" + end), + clrrdi_3 = op_alias("rldicr_4", function(p) + p[4] = "63-("..p[3]..")"; p[3] = "0" + end), + + -- Primary opcode 56: + lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8. + + -- Primary opcode 57: + lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4. + + -- Primary opcode 59: + fdivs_3 = "ec000024FFF.", + fsubs_3 = "ec000028FFF.", + fadds_3 = "ec00002aFFF.", + fsqrts_2 = "ec00002cF-F.", + fres_2 = "ec000030F-F.", + fmuls_3 = "ec000032FF-F.", + frsqrtes_2 = "ec000034F-F.", + fmsubs_4 = "ec000038FFFF~.", + fmadds_4 = "ec00003aFFFF~.", + fnmsubs_4 = "ec00003cFFFF~.", + fnmadds_4 = "ec00003eFFFF~.", + fcfids_2 = "ec00069cF-F.", + fcfidus_2 = "ec00079cF-F.", + + dadd_3 = "ec000004FFF.", + dqua_4 = "ec000006FFFZ.", + dmul_3 = "ec000044FFF.", + drrnd_4 = "ec000046FFFZ.", + dscli_3 = "ec000084FF6.", + dquai_4 = "ec000086SF~FZ.", + dscri_3 = "ec0000c4FF6.", + drintx_4 = "ec0000c61F~FZ.", + dcmpo_3 = "ec000104XFF", + dtstex_3 = "ec000144XFF", + dtstdc_3 = "ec000184XF6", + dtstdg_3 = "ec0001c4XF6", + drintn_4 = "ec0001c61F~FZ.", + dctdp_2 = "ec000204F-F.", + dctfix_2 = "ec000244F-F.", + ddedpd_3 = "ec000284ZF~F.", + dxex_2 = "ec0002c4F-F.", + dsub_3 = "ec000404FFF.", + ddiv_3 = "ec000444FFF.", + dcmpu_3 = "ec000504XFF", + dtstsf_3 = "ec000544XFF", + drsp_2 = "ec000604F-F.", + dcffix_2 = "ec000644F-F.", + denbcd_3 = "ec000684YF~F.", + diex_3 = "ec0006c4FFF.", + + -- Primary opcode 60: + xsaddsp_3 = "f0000000QQQ", + xsmaddasp_3 = "f0000008QQQ", + xxsldwi_4 = "f0000010QQQz", + xsrsqrtesp_2 = "f0000028Q-Q", + xssqrtsp_2 = "f000002cQ-Q", + xxsel_4 = "f0000030QQQQ", + xssubsp_3 = "f0000040QQQ", + xsmaddmsp_3 = "f0000048QQQ", + xxpermdi_4 = "f0000050QQQz", + xsresp_2 = "f0000068Q-Q", + xsmulsp_3 = "f0000080QQQ", + xsmsubasp_3 = "f0000088QQQ", + xxmrghw_3 = "f0000090QQQ", + xsdivsp_3 = "f00000c0QQQ", + xsmsubmsp_3 = "f00000c8QQQ", + xsadddp_3 = "f0000100QQQ", + xsmaddadp_3 = "f0000108QQQ", + xscmpudp_3 = "f0000118XQQ", + xscvdpuxws_2 = "f0000120Q-Q", + xsrdpi_2 = "f0000124Q-Q", + xsrsqrtedp_2 = "f0000128Q-Q", + xssqrtdp_2 = "f000012cQ-Q", + xssubdp_3 = "f0000140QQQ", + xsmaddmdp_3 = "f0000148QQQ", + xscmpodp_3 = "f0000158XQQ", + xscvdpsxws_2 = "f0000160Q-Q", + xsrdpiz_2 = "f0000164Q-Q", + xsredp_2 = "f0000168Q-Q", + xsmuldp_3 = "f0000180QQQ", + xsmsubadp_3 = "f0000188QQQ", + xxmrglw_3 = "f0000190QQQ", + xsrdpip_2 = "f00001a4Q-Q", + xstsqrtdp_2 = "f00001a8X-Q", + xsrdpic_2 = "f00001acQ-Q", + xsdivdp_3 = "f00001c0QQQ", + xsmsubmdp_3 = "f00001c8QQQ", + xsrdpim_2 = "f00001e4Q-Q", + xstdivdp_3 = "f00001e8XQQ", + xvaddsp_3 = "f0000200QQQ", + xvmaddasp_3 = "f0000208QQQ", + xvcmpeqsp_3 = "f0000218QQQ", + xvcvspuxws_2 = "f0000220Q-Q", + xvrspi_2 = "f0000224Q-Q", + xvrsqrtesp_2 = "f0000228Q-Q", + xvsqrtsp_2 = "f000022cQ-Q", + xvsubsp_3 = "f0000240QQQ", + xvmaddmsp_3 = "f0000248QQQ", + xvcmpgtsp_3 = "f0000258QQQ", + xvcvspsxws_2 = "f0000260Q-Q", + xvrspiz_2 = "f0000264Q-Q", + xvresp_2 = "f0000268Q-Q", + xvmulsp_3 = "f0000280QQQ", + xvmsubasp_3 = "f0000288QQQ", + xxspltw_3 = "f0000290QQg~", + xvcmpgesp_3 = "f0000298QQQ", + xvcvuxwsp_2 = "f00002a0Q-Q", + xvrspip_2 = "f00002a4Q-Q", + xvtsqrtsp_2 = "f00002a8X-Q", + xvrspic_2 = "f00002acQ-Q", + xvdivsp_3 = "f00002c0QQQ", + xvmsubmsp_3 = "f00002c8QQQ", + xvcvsxwsp_2 = "f00002e0Q-Q", + xvrspim_2 = "f00002e4Q-Q", + xvtdivsp_3 = "f00002e8XQQ", + xvadddp_3 = "f0000300QQQ", + xvmaddadp_3 = "f0000308QQQ", + xvcmpeqdp_3 = "f0000318QQQ", + xvcvdpuxws_2 = "f0000320Q-Q", + xvrdpi_2 = "f0000324Q-Q", + xvrsqrtedp_2 = "f0000328Q-Q", + xvsqrtdp_2 = "f000032cQ-Q", + xvsubdp_3 = "f0000340QQQ", + xvmaddmdp_3 = "f0000348QQQ", + xvcmpgtdp_3 = "f0000358QQQ", + xvcvdpsxws_2 = "f0000360Q-Q", + xvrdpiz_2 = "f0000364Q-Q", + xvredp_2 = "f0000368Q-Q", + xvmuldp_3 = "f0000380QQQ", + xvmsubadp_3 = "f0000388QQQ", + xvcmpgedp_3 = "f0000398QQQ", + xvcvuxwdp_2 = "f00003a0Q-Q", + xvrdpip_2 = "f00003a4Q-Q", + xvtsqrtdp_2 = "f00003a8X-Q", + xvrdpic_2 = "f00003acQ-Q", + xvdivdp_3 = "f00003c0QQQ", + xvmsubmdp_3 = "f00003c8QQQ", + xvcvsxwdp_2 = "f00003e0Q-Q", + xvrdpim_2 = "f00003e4Q-Q", + xvtdivdp_3 = "f00003e8XQQ", + xsnmaddasp_3 = "f0000408QQQ", + xxland_3 = "f0000410QQQ", + xscvdpsp_2 = "f0000424Q-Q", + xscvdpspn_2 = "f000042cQ-Q", + xsnmaddmsp_3 = "f0000448QQQ", + xxlandc_3 = "f0000450QQQ", + xsrsp_2 = "f0000464Q-Q", + xsnmsubasp_3 = "f0000488QQQ", + xxlor_3 = "f0000490QQQ", + xscvuxdsp_2 = "f00004a0Q-Q", + xsnmsubmsp_3 = "f00004c8QQQ", + xxlxor_3 = "f00004d0QQQ", + xscvsxdsp_2 = "f00004e0Q-Q", + xsmaxdp_3 = "f0000500QQQ", + xsnmaddadp_3 = "f0000508QQQ", + xxlnor_3 = "f0000510QQQ", + xscvdpuxds_2 = "f0000520Q-Q", + xscvspdp_2 = "f0000524Q-Q", + xscvspdpn_2 = "f000052cQ-Q", + xsmindp_3 = "f0000540QQQ", + xsnmaddmdp_3 = "f0000548QQQ", + xxlorc_3 = "f0000550QQQ", + xscvdpsxds_2 = "f0000560Q-Q", + xsabsdp_2 = "f0000564Q-Q", + xscpsgndp_3 = "f0000580QQQ", + xsnmsubadp_3 = "f0000588QQQ", + xxlnand_3 = "f0000590QQQ", + xscvuxddp_2 = "f00005a0Q-Q", + xsnabsdp_2 = "f00005a4Q-Q", + xsnmsubmdp_3 = "f00005c8QQQ", + xxleqv_3 = "f00005d0QQQ", + xscvsxddp_2 = "f00005e0Q-Q", + xsnegdp_2 = "f00005e4Q-Q", + xvmaxsp_3 = "f0000600QQQ", + xvnmaddasp_3 = "f0000608QQQ", + ["xvcmpeqsp._3"] = "f0000618QQQ", + xvcvspuxds_2 = "f0000620Q-Q", + xvcvdpsp_2 = "f0000624Q-Q", + xvminsp_3 = "f0000640QQQ", + xvnmaddmsp_3 = "f0000648QQQ", + ["xvcmpgtsp._3"] = "f0000658QQQ", + xvcvspsxds_2 = "f0000660Q-Q", + xvabssp_2 = "f0000664Q-Q", + xvcpsgnsp_3 = "f0000680QQQ", + xvnmsubasp_3 = "f0000688QQQ", + ["xvcmpgesp._3"] = "f0000698QQQ", + xvcvuxdsp_2 = "f00006a0Q-Q", + xvnabssp_2 = "f00006a4Q-Q", + xvnmsubmsp_3 = "f00006c8QQQ", + xvcvsxdsp_2 = "f00006e0Q-Q", + xvnegsp_2 = "f00006e4Q-Q", + xvmaxdp_3 = "f0000700QQQ", + xvnmaddadp_3 = "f0000708QQQ", + ["xvcmpeqdp._3"] = "f0000718QQQ", + xvcvdpuxds_2 = "f0000720Q-Q", + xvcvspdp_2 = "f0000724Q-Q", + xvmindp_3 = "f0000740QQQ", + xvnmaddmdp_3 = "f0000748QQQ", + ["xvcmpgtdp._3"] = "f0000758QQQ", + xvcvdpsxds_2 = "f0000760Q-Q", + xvabsdp_2 = "f0000764Q-Q", + xvcpsgndp_3 = "f0000780QQQ", + xvnmsubadp_3 = "f0000788QQQ", + ["xvcmpgedp._3"] = "f0000798QQQ", + xvcvuxddp_2 = "f00007a0Q-Q", + xvnabsdp_2 = "f00007a4Q-Q", + xvnmsubmdp_3 = "f00007c8QQQ", + xvcvsxddp_2 = "f00007e0Q-Q", + xvnegdp_2 = "f00007e4Q-Q", + + -- Primary opcode 61: + stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4. + + -- Primary opcode 62: + stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8. + + -- Primary opcode 63: + fdiv_3 = "fc000024FFF.", + fsub_3 = "fc000028FFF.", + fadd_3 = "fc00002aFFF.", + fsqrt_2 = "fc00002cF-F.", + fsel_4 = "fc00002eFFFF~.", + fre_2 = "fc000030F-F.", + fmul_3 = "fc000032FF-F.", + frsqrte_2 = "fc000034F-F.", + fmsub_4 = "fc000038FFFF~.", + fmadd_4 = "fc00003aFFFF~.", + fnmsub_4 = "fc00003cFFFF~.", + fnmadd_4 = "fc00003eFFFF~.", + fcmpu_3 = "fc000000XFF", + fcpsgn_3 = "fc000010FFF.", + fcmpo_3 = "fc000040XFF", + mtfsb1_1 = "fc00004cA", + fneg_2 = "fc000050F-F.", + mcrfs_2 = "fc000080XX", + mtfsb0_1 = "fc00008cA", + fmr_2 = "fc000090F-F.", + frsp_2 = "fc000018F-F.", + fctiw_2 = "fc00001cF-F.", + fctiwz_2 = "fc00001eF-F.", + ftdiv_2 = "fc000100X-F.", + fctiwu_2 = "fc00011cF-F.", + fctiwuz_2 = "fc00011eF-F.", + mtfsfi_2 = "fc00010cAA", -- NYI: upshift. + fnabs_2 = "fc000110F-F.", + ftsqrt_2 = "fc000140X-F.", + fabs_2 = "fc000210F-F.", + frin_2 = "fc000310F-F.", + friz_2 = "fc000350F-F.", + frip_2 = "fc000390F-F.", + frim_2 = "fc0003d0F-F.", + mffs_1 = "fc00048eF.", + -- NYI: mtfsf, mtfsb0, mtfsb1. + fctid_2 = "fc00065cF-F.", + fctidz_2 = "fc00065eF-F.", + fmrgow_3 = "fc00068cFFF", + fcfid_2 = "fc00069cF-F.", + fctidu_2 = "fc00075cF-F.", + fctiduz_2 = "fc00075eF-F.", + fmrgew_3 = "fc00078cFFF", + fcfidu_2 = "fc00079cF-F.", + + daddq_3 = "fc000004F:F:F:.", + dquaq_4 = "fc000006F:F:F:Z.", + dmulq_3 = "fc000044F:F:F:.", + drrndq_4 = "fc000046F:F:F:Z.", + dscliq_3 = "fc000084F:F:6.", + dquaiq_4 = "fc000086SF:~F:Z.", + dscriq_3 = "fc0000c4F:F:6.", + drintxq_4 = "fc0000c61F:~F:Z.", + dcmpoq_3 = "fc000104XF:F:", + dtstexq_3 = "fc000144XF:F:", + dtstdcq_3 = "fc000184XF:6", + dtstdgq_3 = "fc0001c4XF:6", + drintnq_4 = "fc0001c61F:~F:Z.", + dctqpq_2 = "fc000204F:-F:.", + dctfixq_2 = "fc000244F:-F:.", + ddedpdq_3 = "fc000284ZF:~F:.", + dxexq_2 = "fc0002c4F:-F:.", + dsubq_3 = "fc000404F:F:F:.", + ddivq_3 = "fc000444F:F:F:.", + dcmpuq_3 = "fc000504XF:F:", + dtstsfq_3 = "fc000544XF:F:", + drdpq_2 = "fc000604F:-F:.", + dcffixq_2 = "fc000644F:-F:.", + denbcdq_3 = "fc000684YF:~F:.", + diexq_3 = "fc0006c4F:FF:.", + + -- Primary opcode 4, SPE APU extension: + evaddw_3 = "10000200RRR", + evaddiw_3 = "10000202RAR~", + evsubw_3 = "10000204RRR~", + evsubiw_3 = "10000206RAR~", + evabs_2 = "10000208RR", + evneg_2 = "10000209RR", + evextsb_2 = "1000020aRR", + evextsh_2 = "1000020bRR", + evrndw_2 = "1000020cRR", + evcntlzw_2 = "1000020dRR", + evcntlsw_2 = "1000020eRR", + brinc_3 = "1000020fRRR", + evand_3 = "10000211RRR", + evandc_3 = "10000212RRR", + evxor_3 = "10000216RRR", + evor_3 = "10000217RRR", + evmr_2 = "10000217RR=", + evnor_3 = "10000218RRR", + evnot_2 = "10000218RR=", + eveqv_3 = "10000219RRR", + evorc_3 = "1000021bRRR", + evnand_3 = "1000021eRRR", + evsrwu_3 = "10000220RRR", + evsrws_3 = "10000221RRR", + evsrwiu_3 = "10000222RRA", + evsrwis_3 = "10000223RRA", + evslw_3 = "10000224RRR", + evslwi_3 = "10000226RRA", + evrlw_3 = "10000228RRR", + evsplati_2 = "10000229RS", + evrlwi_3 = "1000022aRRA", + evsplatfi_2 = "1000022bRS", + evmergehi_3 = "1000022cRRR", + evmergelo_3 = "1000022dRRR", + evcmpgtu_3 = "10000230XRR", + evcmpgtu_2 = "10000230-RR", + evcmpgts_3 = "10000231XRR", + evcmpgts_2 = "10000231-RR", + evcmpltu_3 = "10000232XRR", + evcmpltu_2 = "10000232-RR", + evcmplts_3 = "10000233XRR", + evcmplts_2 = "10000233-RR", + evcmpeq_3 = "10000234XRR", + evcmpeq_2 = "10000234-RR", + evsel_4 = "10000278RRRW", + evsel_3 = "10000278RRR", + evfsadd_3 = "10000280RRR", + evfssub_3 = "10000281RRR", + evfsabs_2 = "10000284RR", + evfsnabs_2 = "10000285RR", + evfsneg_2 = "10000286RR", + evfsmul_3 = "10000288RRR", + evfsdiv_3 = "10000289RRR", + evfscmpgt_3 = "1000028cXRR", + evfscmpgt_2 = "1000028c-RR", + evfscmplt_3 = "1000028dXRR", + evfscmplt_2 = "1000028d-RR", + evfscmpeq_3 = "1000028eXRR", + evfscmpeq_2 = "1000028e-RR", + evfscfui_2 = "10000290R-R", + evfscfsi_2 = "10000291R-R", + evfscfuf_2 = "10000292R-R", + evfscfsf_2 = "10000293R-R", + evfsctui_2 = "10000294R-R", + evfsctsi_2 = "10000295R-R", + evfsctuf_2 = "10000296R-R", + evfsctsf_2 = "10000297R-R", + evfsctuiz_2 = "10000298R-R", + evfsctsiz_2 = "1000029aR-R", + evfststgt_3 = "1000029cXRR", + evfststgt_2 = "1000029c-RR", + evfststlt_3 = "1000029dXRR", + evfststlt_2 = "1000029d-RR", + evfststeq_3 = "1000029eXRR", + evfststeq_2 = "1000029e-RR", + efsadd_3 = "100002c0RRR", + efssub_3 = "100002c1RRR", + efsabs_2 = "100002c4RR", + efsnabs_2 = "100002c5RR", + efsneg_2 = "100002c6RR", + efsmul_3 = "100002c8RRR", + efsdiv_3 = "100002c9RRR", + efscmpgt_3 = "100002ccXRR", + efscmpgt_2 = "100002cc-RR", + efscmplt_3 = "100002cdXRR", + efscmplt_2 = "100002cd-RR", + efscmpeq_3 = "100002ceXRR", + efscmpeq_2 = "100002ce-RR", + efscfd_2 = "100002cfR-R", + efscfui_2 = "100002d0R-R", + efscfsi_2 = "100002d1R-R", + efscfuf_2 = "100002d2R-R", + efscfsf_2 = "100002d3R-R", + efsctui_2 = "100002d4R-R", + efsctsi_2 = "100002d5R-R", + efsctuf_2 = "100002d6R-R", + efsctsf_2 = "100002d7R-R", + efsctuiz_2 = "100002d8R-R", + efsctsiz_2 = "100002daR-R", + efststgt_3 = "100002dcXRR", + efststgt_2 = "100002dc-RR", + efststlt_3 = "100002ddXRR", + efststlt_2 = "100002dd-RR", + efststeq_3 = "100002deXRR", + efststeq_2 = "100002de-RR", + efdadd_3 = "100002e0RRR", + efdsub_3 = "100002e1RRR", + efdcfuid_2 = "100002e2R-R", + efdcfsid_2 = "100002e3R-R", + efdabs_2 = "100002e4RR", + efdnabs_2 = "100002e5RR", + efdneg_2 = "100002e6RR", + efdmul_3 = "100002e8RRR", + efddiv_3 = "100002e9RRR", + efdctuidz_2 = "100002eaR-R", + efdctsidz_2 = "100002ebR-R", + efdcmpgt_3 = "100002ecXRR", + efdcmpgt_2 = "100002ec-RR", + efdcmplt_3 = "100002edXRR", + efdcmplt_2 = "100002ed-RR", + efdcmpeq_3 = "100002eeXRR", + efdcmpeq_2 = "100002ee-RR", + efdcfs_2 = "100002efR-R", + efdcfui_2 = "100002f0R-R", + efdcfsi_2 = "100002f1R-R", + efdcfuf_2 = "100002f2R-R", + efdcfsf_2 = "100002f3R-R", + efdctui_2 = "100002f4R-R", + efdctsi_2 = "100002f5R-R", + efdctuf_2 = "100002f6R-R", + efdctsf_2 = "100002f7R-R", + efdctuiz_2 = "100002f8R-R", + efdctsiz_2 = "100002faR-R", + efdtstgt_3 = "100002fcXRR", + efdtstgt_2 = "100002fc-RR", + efdtstlt_3 = "100002fdXRR", + efdtstlt_2 = "100002fd-RR", + efdtsteq_3 = "100002feXRR", + efdtsteq_2 = "100002fe-RR", + evlddx_3 = "10000300RR0R", + evldd_2 = "10000301R8", + evldwx_3 = "10000302RR0R", + evldw_2 = "10000303R8", + evldhx_3 = "10000304RR0R", + evldh_2 = "10000305R8", + evlwhex_3 = "10000310RR0R", + evlwhe_2 = "10000311R4", + evlwhoux_3 = "10000314RR0R", + evlwhou_2 = "10000315R4", + evlwhosx_3 = "10000316RR0R", + evlwhos_2 = "10000317R4", + evstddx_3 = "10000320RR0R", + evstdd_2 = "10000321R8", + evstdwx_3 = "10000322RR0R", + evstdw_2 = "10000323R8", + evstdhx_3 = "10000324RR0R", + evstdh_2 = "10000325R8", + evstwhex_3 = "10000330RR0R", + evstwhe_2 = "10000331R4", + evstwhox_3 = "10000334RR0R", + evstwho_2 = "10000335R4", + evstwwex_3 = "10000338RR0R", + evstwwe_2 = "10000339R4", + evstwwox_3 = "1000033cRR0R", + evstwwo_2 = "1000033dR4", + evmhessf_3 = "10000403RRR", + evmhossf_3 = "10000407RRR", + evmheumi_3 = "10000408RRR", + evmhesmi_3 = "10000409RRR", + evmhesmf_3 = "1000040bRRR", + evmhoumi_3 = "1000040cRRR", + evmhosmi_3 = "1000040dRRR", + evmhosmf_3 = "1000040fRRR", + evmhessfa_3 = "10000423RRR", + evmhossfa_3 = "10000427RRR", + evmheumia_3 = "10000428RRR", + evmhesmia_3 = "10000429RRR", + evmhesmfa_3 = "1000042bRRR", + evmhoumia_3 = "1000042cRRR", + evmhosmia_3 = "1000042dRRR", + evmhosmfa_3 = "1000042fRRR", + evmwhssf_3 = "10000447RRR", + evmwlumi_3 = "10000448RRR", + evmwhumi_3 = "1000044cRRR", + evmwhsmi_3 = "1000044dRRR", + evmwhsmf_3 = "1000044fRRR", + evmwssf_3 = "10000453RRR", + evmwumi_3 = "10000458RRR", + evmwsmi_3 = "10000459RRR", + evmwsmf_3 = "1000045bRRR", + evmwhssfa_3 = "10000467RRR", + evmwlumia_3 = "10000468RRR", + evmwhumia_3 = "1000046cRRR", + evmwhsmia_3 = "1000046dRRR", + evmwhsmfa_3 = "1000046fRRR", + evmwssfa_3 = "10000473RRR", + evmwumia_3 = "10000478RRR", + evmwsmia_3 = "10000479RRR", + evmwsmfa_3 = "1000047bRRR", + evmra_2 = "100004c4RR", + evdivws_3 = "100004c6RRR", + evdivwu_3 = "100004c7RRR", + evmwssfaa_3 = "10000553RRR", + evmwumiaa_3 = "10000558RRR", + evmwsmiaa_3 = "10000559RRR", + evmwsmfaa_3 = "1000055bRRR", + evmwssfan_3 = "100005d3RRR", + evmwumian_3 = "100005d8RRR", + evmwsmian_3 = "100005d9RRR", + evmwsmfan_3 = "100005dbRRR", + evmergehilo_3 = "1000022eRRR", + evmergelohi_3 = "1000022fRRR", + evlhhesplatx_3 = "10000308RR0R", + evlhhesplat_2 = "10000309R2", + evlhhousplatx_3 = "1000030cRR0R", + evlhhousplat_2 = "1000030dR2", + evlhhossplatx_3 = "1000030eRR0R", + evlhhossplat_2 = "1000030fR2", + evlwwsplatx_3 = "10000318RR0R", + evlwwsplat_2 = "10000319R4", + evlwhsplatx_3 = "1000031cRR0R", + evlwhsplat_2 = "1000031dR4", + evaddusiaaw_2 = "100004c0RR", + evaddssiaaw_2 = "100004c1RR", + evsubfusiaaw_2 = "100004c2RR", + evsubfssiaaw_2 = "100004c3RR", + evaddumiaaw_2 = "100004c8RR", + evaddsmiaaw_2 = "100004c9RR", + evsubfumiaaw_2 = "100004caRR", + evsubfsmiaaw_2 = "100004cbRR", + evmheusiaaw_3 = "10000500RRR", + evmhessiaaw_3 = "10000501RRR", + evmhessfaaw_3 = "10000503RRR", + evmhousiaaw_3 = "10000504RRR", + evmhossiaaw_3 = "10000505RRR", + evmhossfaaw_3 = "10000507RRR", + evmheumiaaw_3 = "10000508RRR", + evmhesmiaaw_3 = "10000509RRR", + evmhesmfaaw_3 = "1000050bRRR", + evmhoumiaaw_3 = "1000050cRRR", + evmhosmiaaw_3 = "1000050dRRR", + evmhosmfaaw_3 = "1000050fRRR", + evmhegumiaa_3 = "10000528RRR", + evmhegsmiaa_3 = "10000529RRR", + evmhegsmfaa_3 = "1000052bRRR", + evmhogumiaa_3 = "1000052cRRR", + evmhogsmiaa_3 = "1000052dRRR", + evmhogsmfaa_3 = "1000052fRRR", + evmwlusiaaw_3 = "10000540RRR", + evmwlssiaaw_3 = "10000541RRR", + evmwlumiaaw_3 = "10000548RRR", + evmwlsmiaaw_3 = "10000549RRR", + evmheusianw_3 = "10000580RRR", + evmhessianw_3 = "10000581RRR", + evmhessfanw_3 = "10000583RRR", + evmhousianw_3 = "10000584RRR", + evmhossianw_3 = "10000585RRR", + evmhossfanw_3 = "10000587RRR", + evmheumianw_3 = "10000588RRR", + evmhesmianw_3 = "10000589RRR", + evmhesmfanw_3 = "1000058bRRR", + evmhoumianw_3 = "1000058cRRR", + evmhosmianw_3 = "1000058dRRR", + evmhosmfanw_3 = "1000058fRRR", + evmhegumian_3 = "100005a8RRR", + evmhegsmian_3 = "100005a9RRR", + evmhegsmfan_3 = "100005abRRR", + evmhogumian_3 = "100005acRRR", + evmhogsmian_3 = "100005adRRR", + evmhogsmfan_3 = "100005afRRR", + evmwlusianw_3 = "100005c0RRR", + evmwlssianw_3 = "100005c1RRR", + evmwlumianw_3 = "100005c8RRR", + evmwlsmianw_3 = "100005c9RRR", + + -- NYI: Book E instructions. +} + +-- Add mnemonics for "." variants. +do + local t = {} + for k,v in pairs(map_op) do + if type(v) == "string" and sub(v, -1) == "." then + local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) + t[sub(k, 1, -3).."."..sub(k, -2)] = v2 + end + end + for k,v in pairs(t) do + map_op[k] = v + end +end + +-- Add more branch mnemonics. +for cond,c in pairs(map_cond) do + local b1 = "b"..cond + local c1 = shl(band(c, 3), 16) + (c < 4 and 0x01000000 or 0) + -- bX[l] + map_op[b1.."_1"] = tohex(0x40800000 + c1).."K" + map_op[b1.."y_1"] = tohex(0x40a00000 + c1).."K" + map_op[b1.."l_1"] = tohex(0x40800001 + c1).."K" + map_op[b1.."_2"] = tohex(0x40800000 + c1).."-XK" + map_op[b1.."y_2"] = tohex(0x40a00000 + c1).."-XK" + map_op[b1.."l_2"] = tohex(0x40800001 + c1).."-XK" + -- bXlr[l] + map_op[b1.."lr_0"] = tohex(0x4c800020 + c1) + map_op[b1.."lrl_0"] = tohex(0x4c800021 + c1) + map_op[b1.."ctr_0"] = tohex(0x4c800420 + c1) + map_op[b1.."ctrl_0"] = tohex(0x4c800421 + c1) + -- bXctr[l] + map_op[b1.."lr_1"] = tohex(0x4c800020 + c1).."-X" + map_op[b1.."lrl_1"] = tohex(0x4c800021 + c1).."-X" + map_op[b1.."ctr_1"] = tohex(0x4c800420 + c1).."-X" + map_op[b1.."ctrl_1"] = tohex(0x4c800421 + c1).."-X" +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_vr(expr) + local r = match(expr, "^v([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_vs(expr) + local r = match(expr, "^vs([1-6]?[0-9])$") + if r then + r = tonumber(r) + if r <= 63 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_cr(expr) + local r = match(expr, "^cr([0-7])$") + if r then return tonumber(r) end + werror("bad condition register name `"..expr.."'") +end + +local function parse_cond(expr) + local r, cond = match(expr, "^4%*cr([0-7])%+(%w%w)$") + if r then + r = tonumber(r) + local c = map_cond[cond] + if c and c < 4 then return r*4+c end + end + werror("bad condition bit name `"..expr.."'") +end + +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok then return y end + end + return nil +end + +local function parse_imm(imm, bits, shift, scale, signed) + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[rfv]([1-3]?[0-9])$") or + match(imm, "^vs([1-6]?[0-9])$") or + match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_shiftmask(imm, isshift) + local n = parse_number(imm) + if n then + if shr(n, 6) == 0 then + local lsb = band(n, 31) + local msb = n - lsb + return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^r([1-3]?[0-9])$") or + match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMMSH", isshift and 1 or 0, imm) + return 0; + end +end + +local function parse_disp(disp) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + return shl(r, 16) + parse_imm(imm, 16, 0, 0, true) + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + if tp then + waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) + return shl(r, 16) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_u5disp(disp, scale) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + return shl(r, 16) + parse_imm(imm, 5, 11, scale, false) + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + if tp then + waction("IMM", scale*1024+5*32+11, format(tp.ctypefmt, tailr)) + return shl(r, 16) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +op_template = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n, rs = 1, 26 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions (rlwinm). + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "R" then + rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 + elseif p == "F" then + rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 + elseif p == "V" then + rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1 + elseif p == "Q" then + local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5 + local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3) + op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh) + elseif p == "q" then + local vs = parse_vs(params[n]); n = n + 1 + op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5) + elseif p == "A" then + rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 + elseif p == "S" then + rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, true); n = n + 1 + elseif p == "I" then + op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1 + elseif p == "U" then + op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1 + elseif p == "D" then + op = op + parse_disp(params[n]); n = n + 1 + elseif p == "2" then + op = op + parse_u5disp(params[n], 1); n = n + 1 + elseif p == "4" then + op = op + parse_u5disp(params[n], 2); n = n + 1 + elseif p == "8" then + op = op + parse_u5disp(params[n], 3); n = n + 1 + elseif p == "C" then + rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 + elseif p == "X" then + rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 + elseif p == "1" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1 + elseif p == "g" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1 + elseif p == "3" then + rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1 + elseif p == "P" then + rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 + elseif p == "p" then + op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 + elseif p == "6" then + rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1 + elseif p == "Y" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1 + elseif p == "y" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1 + elseif p == "Z" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1 + elseif p == "z" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1 + elseif p == "W" then + op = op + parse_cr(params[n]); n = n + 1 + elseif p == "G" then + op = op + parse_imm(params[n], 8, 12, 0, false); n = n + 1 + elseif p == "H" then + op = op + parse_shiftmask(params[n], true); n = n + 1 + elseif p == "M" then + op = op + parse_shiftmask(params[n], false); n = n + 1 + elseif p == "J" or p == "K" then + local mode, m, s = parse_label(params[n], false) + if p == "K" then m = m + 2048 end + waction("REL_"..mode, m, s, 1) + n = n + 1 + elseif p == "0" then + if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end + elseif p == "=" or p == "%" then + local t = band(shr(op, p == "%" and rs+5 or rs), 31) + rs = rs - 5 + op = op + shl(t, rs) + elseif p == "~" then + local mm = shl(31, rs) + local lo = band(op, mm) + local hi = band(op, shl(mm, 5)) + op = op - lo - hi + shl(lo, 5) + shr(hi, 5) + elseif p == ":" then + if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end + elseif p == "-" then + rs = rs - 5 + elseif p == "." then + -- Ignored. + else + assert(false) + end + end + wputpos(pos, op) +end + +map_op[".template__"] = op_template + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dasm_proto.h b/ext/opcache/jit/ir/dynasm/dasm_proto.h new file mode 100644 index 00000000000..8914596adf5 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_proto.h @@ -0,0 +1,83 @@ +/* +** DynASM encoding engine prototypes. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#ifndef _DASM_PROTO_H +#define _DASM_PROTO_H + +#include +#include + +#define DASM_IDENT "DynASM 1.5.0" +#define DASM_VERSION 10500 /* 1.5.0 */ + +#ifndef Dst_DECL +#define Dst_DECL dasm_State **Dst +#endif + +#ifndef Dst_REF +#define Dst_REF (*Dst) +#endif + +#ifndef DASM_FDEF +#define DASM_FDEF extern +#endif + +#ifndef DASM_M_GROW +#define DASM_M_GROW(ctx, t, p, sz, need) \ + do { \ + size_t _sz = (sz), _need = (need); \ + if (_sz < _need) { \ + if (_sz < 16) _sz = 16; \ + while (_sz < _need) _sz += _sz; \ + (p) = (t *)realloc((p), _sz); \ + if ((p) == NULL) exit(1); \ + (sz) = _sz; \ + } \ + } while(0) +#endif + +#ifndef DASM_M_FREE +#define DASM_M_FREE(ctx, p, sz) free(p) +#endif + +/* Internal DynASM encoder state. */ +typedef struct dasm_State dasm_State; + + +/* Initialize and free DynASM state. */ +DASM_FDEF void dasm_init(Dst_DECL, int maxsection); +DASM_FDEF void dasm_free(Dst_DECL); + +/* Setup global array. Must be called before dasm_setup(). */ +DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl); + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc); + +/* Setup encoder. */ +DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist); + +/* Feed encoder with actions. Calls are generated by pre-processor. */ +DASM_FDEF void dasm_put(Dst_DECL, int start, ...); + +/* Link sections and return the resulting size. */ +DASM_FDEF int dasm_link(Dst_DECL, size_t *szp); + +/* Encode sections into buffer. */ +DASM_FDEF int dasm_encode(Dst_DECL, void *buffer); + +/* Get PC label offset. */ +DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc); + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch); +#else +#define dasm_checkstep(a, b) 0 +#endif + + +#endif /* _DASM_PROTO_H */ diff --git a/ext/opcache/jit/ir/dynasm/dasm_x64.lua b/ext/opcache/jit/ir/dynasm/dasm_x64.lua new file mode 100644 index 00000000000..2c0a0e8681f --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_x64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM x64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined x86/x64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +x64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_x86") diff --git a/ext/opcache/jit/ir/dynasm/dasm_x86.h b/ext/opcache/jit/ir/dynasm/dasm_x86.h new file mode 100644 index 00000000000..a2b46cc951e --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_x86.h @@ -0,0 +1,546 @@ +/* +** DynASM x86 encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "x86" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. DASM_STOP must be 255. */ +enum { + DASM_DISP = 233, + DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB, + DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC, + DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN, + DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_VREG 0x15000000 +#define DASM_S_UNDEF_L 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned char *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + +/* Perform potentially overflowing pointer operations in a way that avoids UB. */ +#define DASM_PTR_SUB(p1, off) ((void *) ((uintptr_t) (p1) - sizeof(*p1) * (uintptr_t) (off))) +#define DASM_PTR_ADD(p1, off) ((void *) ((uintptr_t) (p1) + sizeof(*p1) * (uintptr_t) (off))) + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = DASM_PTR_SUB(D->sections[i].buf, DASM_SEC2POS(i)); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs, mrm = -1; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + int action = *p++; + while (action < DASM_DISP) { + ofs++; + action = *p++; + } + if (action <= DASM_REL_A) { + int n = va_arg(ap, int); + b[pos++] = n; + switch (action) { + case DASM_DISP: + if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } + /* fallthrough */ + case DASM_IMM_DB: if ((((unsigned)n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ + case DASM_IMM_D: ofs += 4; break; + case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; + case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; + case DASM_SPACE: p++; ofs += n; break; + case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ + case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG); + if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; + if (*p < 0x20 && (n&7) == 4) ofs++; + switch ((*p++ >> 3) & 3) { + case 3: n |= b[pos-3]; /* fallthrough */ + case 2: n |= b[pos-2]; /* fallthrough */ + case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } + } + continue; + } + mrm = -1; + } else { + int *pl, n; + switch (action) { + case DASM_REL_LG: + case DASM_IMM_LG: + n = *p++; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n <= 246) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl -= 246; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + ofs += 4; /* Maximum offset needed. */ + if (action == DASM_REL_LG || action == DASM_REL_PC) { + b[pos++] = ofs; /* Store pass1 offset estimate. */ + } else if (sizeof(ptrdiff_t) == 8) { + ofs += 4; + } + break; + case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_ALIGN: + ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_EXTERN: p += 2; ofs += 4; break; + case DASM_ESC: p++; ofs++; break; + case DASM_MARK: mrm = p[-2]; break; + case DASM_SECTION: + n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n]; + case DASM_STOP: goto stop; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + int op = 0; + while (1) { + int action = *p++; + while (action < DASM_DISP) { + op = action; + action = *p++; + } + switch (action) { + case DASM_REL_LG: p++; + /* fallthrough */ + case DASM_REL_PC: { + int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); + if (shrink) { /* Shrinkable branch opcode? */ + int lofs, lpos = b[pos]; + if (lpos < 0) goto noshrink; /* Ext global? */ + lofs = *DASM_POS2PTR(D, lpos); + if (lpos > pos) { /* Fwd label: add cumulative section offsets. */ + int i; + for (i = secnum; i < DASM_POS2SEC(lpos); i++) + lofs += D->sections[i].ofs; + } else { + lofs -= ofs; /* Bkwd label: unfix offset. */ + } + lofs -= b[pos+1]; /* Short branch ok? */ + if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */ + else { noshrink: shrink = 0; } /* No, cannot shrink op. */ + } + b[pos+1] = shrink; + pos += 2; + break; + } + /* fallthrough */ + case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; + case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: + case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: + case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; + case DASM_LABEL_LG: p++; + /* fallthrough */ + case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ + case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ + case DASM_EXTERN: p += 2; break; + case DASM_ESC: op = *p++; break; + case DASM_MARK: break; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#define dasmb(x) *cp++ = (unsigned char)(x) +#ifndef DASM_ALIGNED_WRITES +typedef IR_SET_ALIGNED(1, unsigned short unaligned_short); +typedef IR_SET_ALIGNED(1, unsigned int unaligned_int); +typedef IR_SET_ALIGNED(1, unsigned long long unaligned_long_long); +#define dasmw(x) \ + do { *((unaligned_short *)cp) = (unsigned short)(x); cp+=2; } while (0) +#define dasmd(x) \ + do { *((unaligned_int *)cp) = (unsigned int)(x); cp+=4; } while (0) +#define dasmq(x) \ + do { *((unaligned_long_long *)cp) = (unsigned long long)(x); cp+=8; } while (0) +#else +#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) +#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) +#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0) +#endif +static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x) +{ + if (sizeof(ptrdiff_t) == 8) + dasmq((unsigned long long)x); + else + dasmd((unsigned int)x); + return cp; +} +#define dasma(x) (cp = dasma_(cp, (x))) + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + unsigned char *base = (unsigned char *)buffer; + unsigned char *cp = base; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = DASM_PTR_ADD(sec->rbuf, sec->pos); + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + unsigned char *mark = NULL; + while (1) { + int n; + int action = *p++; + while (action < DASM_DISP) { + *cp++ = action; + action = *p++; + } + if (action >= DASM_ALIGN) { + switch (action) { + case DASM_ALIGN: + b++; + n = *p++; + while (((cp-base) & n)) *cp++ = 0x90; /* nop */ + continue; + case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; + case DASM_MARK: mark = cp; continue; + case DASM_ESC: action = *p++; *cp++ = action; continue; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + n = *b++; + switch (action) { + case DASM_DISP: if (!mark) mark = cp; { + unsigned char *mm = mark; + if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL; + if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7; + if (mrm != 5) { mm[-1] -= 0x80; break; } } + if ((((unsigned)n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; + } + /* fallthrough */ + case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; + case DASM_IMM_DB: if ((((unsigned)n+128)&-256) == 0) { + db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; + } else mark = NULL; + /* fallthrough */ + case DASM_IMM_D: wd: dasmd(n); break; + case DASM_IMM_WB: if ((((unsigned)n+128)&-256) == 0) goto db; else mark = NULL; + /* fallthrough */ + case DASM_IMM_W: dasmw(n); break; + case DASM_VREG: { + int t = *p++; + unsigned char *ex = cp - (t&7); + if ((n & 8) && t < 0xa0) { + if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6); + n &= 7; + } else if (n & 0x10) { + if (*ex & 0x80) { + *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2; + } + while (++ex < cp) ex[-1] = *ex; + if (mark) mark--; + cp--; + n &= 7; + } + if (t >= 0xc0) n <<= 4; + else if (t >= 0x40) n <<= 3; + else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; } + cp[-1] ^= n; + break; + } + case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; + b++; n = (int)(ptrdiff_t)D->globals[-n]; + /* fallthrough */ + case DASM_REL_A: rel_a: + n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_PC: rel_pc: { + int shrink = *b++; + int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } + n = *pb - ((int)(cp-base) + 4-shrink); + if (shrink == 0) goto wd; + if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb; + goto wb; + } + case DASM_IMM_LG: + p++; + if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; } + /* fallthrough */ + case DASM_IMM_PC: { + int *pb = DASM_POS2PTR(D, n); + dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base)); + break; + } + case DASM_LABEL_LG: { + int idx = *p++; + if (idx >= 10) + D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); + break; + } + case DASM_LABEL_PC: case DASM_SETLABEL: break; + case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; } + } + } + stop: (void)0; + } + } + + if (base + D->codesize != cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_x86.lua b/ext/opcache/jit/ir/dynasm/dasm_x86.lua new file mode 100644 index 00000000000..d5eea69e485 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_x86.lua @@ -0,0 +1,2390 @@ +------------------------------------------------------------------------------ +-- DynASM x86/x64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +local x64 = x64 + +-- Module information: +local _info = { + arch = x64 and "x64" or "x86", + description = "DynASM x86/x64 module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub +local concat, sort, remove = table.concat, table.sort, table.remove +local bit = bit or require("bit") +local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + -- int arg, 1 buffer pos: + "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", + -- action arg (1 byte), int arg, 1 buffer pos (reg/num): + "VREG", "SPACE", + -- ptrdiff_t arg, 1 buffer pos (address): !x64 + "SETLABEL", "REL_A", + -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): + "REL_LG", "REL_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (link): + "IMM_LG", "IMM_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (offset): + "LABEL_LG", "LABEL_PC", + -- action arg (1 byte), 1 buffer pos (offset): + "ALIGN", + -- action args (2 bytes), no buffer pos. + "EXTERN", + -- action arg (1 byte), no buffer pos. + "ESC", + -- no action arg, no buffer pos. + "MARK", + -- action arg (1 byte), no buffer pos, terminal action: + "SECTION", + -- no args, no buffer pos, terminal action: + "STOP" +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number (dynamically generated below). +local map_action = {} +-- First action number. Everything below does not need to be escaped. +local actfirst = 256-#action_names + +-- Action list buffer and string (only used to remove dupes). +local actlist = {} +local actstr = "" + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +-- VREG kind encodings, pre-shifted by 5 bits. +local map_vreg = { + ["modrm.rm.m"] = 0x00, + ["modrm.rm.r"] = 0x20, + ["opcode"] = 0x20, + ["sib.base"] = 0x20, + ["sib.index"] = 0x40, + ["modrm.reg"] = 0x80, + ["vex.v"] = 0xa0, + ["imm.hi"] = 0xc0, +} + +-- Current number of VREG actions contributing to REX/VEX shrinkage. +local vreg_shrink_count = 0 + +------------------------------------------------------------------------------ + +-- Compute action numbers for action names. +for n,name in ipairs(action_names) do + local num = actfirst + n - 1 + map_action[name] = num +end + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + local last = actlist[nn] or 255 + actlist[nn] = nil -- Remove last byte. + if nn == 0 then nn = 1 end + out:write("static const unsigned char ", name, "[", nn, "] = {\n") + local s = " " + for n,b in ipairs(actlist) do + s = s..b.."," + if #s >= 75 then + assert(out:write(s, "\n")) + s = " " + end + end + out:write(s, last, "\n};\n\n") -- Add last byte back. +end + +------------------------------------------------------------------------------ + +-- Add byte to action list. +local function wputxb(n) + assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, a, num) + wputxb(assert(map_action[action], "bad action name `"..action.."'")) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Optionally add a VREG action. +local function wvreg(kind, vreg, psz, sk, defer) + if not vreg then return end + waction("VREG", vreg) + local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") + if b < (sk or 0) then + vreg_shrink_count = vreg_shrink_count + 1 + end + if not defer then + b = b + vreg_shrink_count * 8 + vreg_shrink_count = 0 + end + wputxb(b + (psz or 0)) +end + +-- Add call to embedded DynASM C code. +local function wcall(func, args) + wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) +end + +-- Delete duplicate action list chunks. A tad slow, but so what. +local function dedupechunk(offset) + local al, as = actlist, actstr + local chunk = char(unpack(al, offset+1, #al)) + local orig = find(as, chunk, 1, true) + if orig then + actargs[1] = orig-1 -- Replace with original offset. + for i=offset+1,#al do al[i] = nil end -- Kill dupe. + else + actstr = as..chunk + end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + local offset = actargs[1] + if #actlist == offset then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + dedupechunk(offset) + wcall("put", actargs) -- Add call to dasm_put(). + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped byte. +local function wputb(n) + if n >= actfirst then waction("ESC") end -- Need to escape byte. + wputxb(n) +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 10 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end + local n = next_global + if n > 246 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=10,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=10,next_global-1 do + out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=10,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = -1 +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n < -256 then werror("too many extern labels") end + next_extern = n - 1 + t[name] = n + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("Extern labels:\n") + for i=1,-next_extern-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=1,-next_extern-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = {} -- Ext. register name -> int. name. +local map_reg_rev = {} -- Int. register name -> ext. name. +local map_reg_num = {} -- Int. register name -> register number. +local map_reg_opsize = {} -- Int. register name -> operand size. +local map_reg_valid_base = {} -- Int. register name -> valid base register? +local map_reg_valid_index = {} -- Int. register name -> valid index register? +local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex. +local reg_list = {} -- Canonical list of int. register names. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for _PTx macros). + +local addrsize = x64 and "q" or "d" -- Size for address operands. + +-- Helper functions to fill register maps. +local function mkrmap(sz, cl, names) + local cname = format("@%s", sz) + reg_list[#reg_list+1] = cname + map_archdef[cl] = cname + map_reg_rev[cname] = cl + map_reg_num[cname] = -1 + map_reg_opsize[cname] = sz + if sz == addrsize or sz == "d" then + map_reg_valid_base[cname] = true + map_reg_valid_index[cname] = true + end + if names then + for n,name in ipairs(names) do + local iname = format("@%s%x", sz, n-1) + reg_list[#reg_list+1] = iname + map_archdef[name] = iname + map_reg_rev[iname] = name + map_reg_num[iname] = n-1 + map_reg_opsize[iname] = sz + if sz == "b" and n > 4 then map_reg_needrex[iname] = false end + if sz == addrsize or sz == "d" then + map_reg_valid_base[iname] = true + map_reg_valid_index[iname] = true + end + end + end + for i=0,(x64 and sz ~= "f") and 15 or 7 do + local needrex = sz == "b" and i > 3 + local iname = format("@%s%x%s", sz, i, needrex and "R" or "") + if needrex then map_reg_needrex[iname] = true end + local name + if sz == "o" or sz == "y" then name = format("%s%d", cl, i) + elseif sz == "f" then name = format("st%d", i) + else name = format("r%d%s", i, sz == addrsize and "" or sz) end + map_archdef[name] = iname + if not map_reg_rev[iname] then + reg_list[#reg_list+1] = iname + map_reg_rev[iname] = name + map_reg_num[iname] = i + map_reg_opsize[iname] = sz + if sz == addrsize or sz == "d" then + map_reg_valid_base[iname] = true + map_reg_valid_index[iname] = true + end + end + end + reg_list[#reg_list+1] = "" +end + +-- Integer registers (qword, dword, word and byte sized). +if x64 then + mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}) +end +mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) +mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) +mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) +map_reg_valid_index[map_archdef.esp] = false +if x64 then map_reg_valid_index[map_archdef.rsp] = false end +if x64 then map_reg_needrex[map_archdef.Rb] = true end +map_archdef["Ra"] = "@"..addrsize + +-- FP registers (internally tword sized, but use "f" as operand size). +mkrmap("f", "Rf") + +-- SSE registers (oword sized, but qword and dword accessible). +mkrmap("o", "xmm") + +-- AVX registers (yword sized, but oword, qword and dword accessible). +mkrmap("y", "ymm") + +-- Operand size prefixes to codes. +local map_opsize = { + byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", + tword = "t", aword = addrsize, +} + +-- Operand size code to number. +local map_opsizenum = { + b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, +} + +-- Operand size code to name. +local map_opsizename = { + b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", + t = "tword", f = "fpword", +} + +-- Valid index register scale factors. +local map_xsc = { + ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, +} + +-- Condition codes. +local map_cc = { + o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, + s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, + c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, + pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, +} + + +-- Reverse defines for registers. +function _M.revdef(s) + return gsub(s, "@%w+", map_reg_rev) +end + +-- Dump register names and numbers +local function dumpregs(out) + out:write("Register names, sizes and internal numbers:\n") + for _,reg in ipairs(reg_list) do + if reg == "" then + out:write("\n") + else + local name = map_reg_rev[reg] + local num = map_reg_num[reg] + local opsize = map_opsizename[map_reg_opsize[reg]] + out:write(format(" %-5s %-8s %s\n", name, opsize, + num < 0 and "(variable)" or num)) + end + end +end + +------------------------------------------------------------------------------ + +-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). +local function wputlabel(aprefix, imm, num) + if type(imm) == "number" then + if imm < 0 then + waction("EXTERN") + wputxb(aprefix == "IMM_" and 0 or 1) + imm = -imm-1 + else + waction(aprefix.."LG", nil, num); + end + wputxb(imm) + else + waction(aprefix.."PC", imm, num) + end +end + +-- Put signed byte or arg. +local function wputsbarg(n) + if type(n) == "number" then + if n < -128 or n > 127 then + werror("signed immediate byte out of range") + end + if n < 0 then n = n + 256 end + wputb(n) + else waction("IMM_S", n) end +end + +-- Put unsigned byte or arg. +local function wputbarg(n) + if type(n) == "number" then + if n < 0 or n > 255 then + werror("unsigned immediate byte out of range") + end + wputb(n) + else waction("IMM_B", n) end +end + +-- Put unsigned word or arg. +local function wputwarg(n) + if type(n) == "number" then + if shr(n, 16) ~= 0 then + werror("unsigned immediate word out of range") + end + wputb(band(n, 255)); wputb(shr(n, 8)); + else waction("IMM_W", n) end +end + +-- Put signed or unsigned dword or arg. +local function wputdarg(n) + local tn = type(n) + if tn == "number" then + wputb(band(n, 255)) + wputb(band(shr(n, 8), 255)) + wputb(band(shr(n, 16), 255)) + wputb(shr(n, 24)) + elseif tn == "table" then + wputlabel("IMM_", n[1], 1) + else + waction("IMM_D", n) + end +end + +-- Put signed or unsigned qword or arg. +local function wputqarg(n) + local tn = type(n) + if tn == "number" then -- This is only used for numbers from -2^31..2^32-1. + wputb(band(n, 255)) + wputb(band(shr(n, 8), 255)) + wputb(band(shr(n, 16), 255)) + wputb(shr(n, 24)) + local sign = n < 0 and 255 or 0 + wputb(sign); wputb(sign); wputb(sign); wputb(sign) + else + waction("IMM_D", format("(unsigned int)(%s)", n)) + waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n)) + end +end + +-- Put operand-size dependent number or arg (defaults to dword). +local function wputszarg(sz, n) + if not sz or sz == "d" or sz == "q" then wputdarg(n) + elseif sz == "w" then wputwarg(n) + elseif sz == "b" then wputbarg(n) + elseif sz == "s" then wputsbarg(n) + else werror("bad operand size") end +end + +-- Put multi-byte opcode with operand-size dependent modifications. +local function wputop(sz, op, rex, vex, vregr, vregxb) + local psz, sk = 0, nil + if vex then + local tail + if vex.m == 1 and band(rex, 11) == 0 then + if x64 and vregxb then + sk = map_vreg["modrm.reg"] + else + wputb(0xc5) + tail = shl(bxor(band(rex, 4), 4), 5) + psz = 3 + end + end + if not tail then + wputb(0xc4) + wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) + tail = shl(band(rex, 8), 4) + psz = 4 + end + local reg, vreg = 0, nil + if vex.v then + reg = vex.v.reg + if not reg then werror("bad vex operand") end + if reg < 0 then reg = 0; vreg = vex.v.vreg end + end + if sz == "y" or vex.l then tail = tail + 4 end + wputb(tail + shl(bxor(reg, 15), 3) + vex.p) + wvreg("vex.v", vreg) + rex = 0 + if op >= 256 then werror("bad vex opcode") end + else + if rex ~= 0 then + if not x64 then werror("bad operand size") end + elseif (vregr or vregxb) and x64 then + rex = 0x10 + sk = map_vreg["vex.v"] + end + end + local r + if sz == "w" then wputb(102) end + -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] + if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end + if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end + if op >= 65536 then + if rex ~= 0 then + local opc3 = band(op, 0xffff00) + if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then + wputb(64 + band(rex, 15)); rex = 0; psz = 2 + end + end + wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 + end + if op >= 256 then + local b = shr(op, 8) + if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end + wputb(b); op = band(op, 255); psz = psz + 1 + end + if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end + if sz == "b" then op = op - 1 end + wputb(op) + return psz, sk +end + +-- Put ModRM or SIB formatted byte. +local function wputmodrm(m, s, rm, vs, vrm) + assert(m < 4 and s < 16 and rm < 16, "bad modrm operands") + wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7)) +end + +-- Put ModRM/SIB plus optional displacement. +local function wputmrmsib(t, imark, s, vsreg, psz, sk) + local vreg, vxreg + local reg, xreg = t.reg, t.xreg + if reg and reg < 0 then reg = 0; vreg = t.vreg end + if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end + if s < 0 then s = 0 end + + -- Register mode. + if sub(t.mode, 1, 1) == "r" then + wputmodrm(3, s, reg) + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.r", vreg, psz+1, sk) + return + end + + local disp = t.disp + local tdisp = type(disp) + -- No base register? + if not reg then + local riprel = false + if xreg then + -- Indexed mode with index register only. + -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) + wputmodrm(0, s, 4) + if imark == "I" then waction("MARK") end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) + wputmodrm(t.xsc, xreg, 5) + wvreg("sib.index", vxreg, psz+2, sk) + else + -- Pure 32 bit displacement. + if x64 and tdisp ~= "table" then + wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) + if imark == "I" then waction("MARK") end + wputmodrm(0, 4, 5) + else + riprel = x64 + wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) + if imark == "I" then waction("MARK") end + end + end + if riprel then -- Emit rip-relative displacement. + if match("UWSiI", imark) then + werror("NYI: rip-relative displacement followed by immediate") + end + -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. + wputlabel("REL_", disp[1], 2) + else + wputdarg(disp) + end + return + end + + local m + if tdisp == "number" then -- Check displacement size at assembly time. + if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) + if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] + elseif disp >= -128 and disp <= 127 then m = 1 + else m = 2 end + elseif tdisp == "table" then + m = 2 + end + + -- Index register present or esp as base register: need SIB encoding. + if xreg or band(reg, 7) == 4 then + wputmodrm(m or 2, s, 4) -- ModRM. + if m == nil or imark == "I" then waction("MARK") end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) + wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. + wvreg("sib.index", vxreg, psz+2, sk, vreg) + wvreg("sib.base", vreg, psz+2, sk) + else + wputmodrm(m or 2, s, reg) -- ModRM. + if (imark == "I" and (m == 1 or m == 2)) or + (m == nil and (vsreg or vreg)) then waction("MARK") end + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.m", vreg, psz+1, sk) + end + + -- Put displacement. + if m == 1 then wputsbarg(disp) + elseif m == 2 then wputdarg(disp) + elseif m == nil then waction("DISP", disp) end +end + +------------------------------------------------------------------------------ + +-- Return human-readable operand mode string. +local function opmodestr(op, args) + local m = {} + for i=1,#args do + local a = args[i] + m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") + end + return op.." "..concat(m, ",") +end + +-- Convert number to valid integer or nil. +local function toint(expr, isqword) + local n = tonumber(expr) + if n then + if n % 1 ~= 0 then + werror("not an integer number `"..expr.."'") + elseif isqword then + if n < -2147483648 or n > 2147483647 then + n = nil -- Handle it as an expression to avoid precision loss. + end + elseif n < -2147483648 or n > 4294967295 then + werror("bad integer number `"..expr.."'") + end + return n + end +end + +-- Parse immediate expression. +local function immexpr(expr) + -- &expr (pointer) + if sub(expr, 1, 1) == "&" then + return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2)) + end + + local prefix = sub(expr, 1, 2) + -- =>expr (pc label reference) + if prefix == "=>" then + return "iJ", sub(expr, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "iJ", map_global[sub(expr, 3)] + end + + -- [<>][1-9] (local label reference) + local dir, lnum = match(expr, "^([<>])([1-9])$") + if dir then -- Fwd: 247-255, Bkwd: 1-9. + return "iJ", lnum + (dir == ">" and 246 or 0) + end + + local extname = match(expr, "^extern%s+(%S+)$") + if extname then + return "iJ", map_extern[extname] + end + + -- expr (interpreted as immediate) + return "iI", expr +end + +-- Parse displacement expression: +-num, +-expr, +-opsize*num +local function dispexpr(expr) + local disp = expr == "" and 0 or toint(expr) + if disp then return disp end + local c, dispt = match(expr, "^([+-])%s*(.+)$") + if c == "+" then + expr = dispt + elseif not c then + werror("bad displacement expression `"..expr.."'") + end + local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") + local ops, imm = map_opsize[opsize], toint(tailops) + if ops and imm then + if c == "-" then imm = -imm end + return imm*map_opsizenum[ops] + end + local mode, iexpr = immexpr(dispt) + if mode == "iJ" then + if c == "-" then werror("cannot invert label reference") end + return { iexpr } + end + return expr -- Need to return original signed expression. +end + +-- Parse register or type expression. +local function rtexpr(expr) + if not expr then return end + local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + local rnum = map_reg_num[reg] + if not rnum then + werror("type `"..(tname or expr).."' needs a register override") + end + if not map_reg_valid_base[reg] then + werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") + end + return reg, rnum, tp + end + return expr, map_reg_num[expr] +end + +-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. +local function parseoperand(param, isqword) + local t = {} + + local expr = param + local opsize, tailops = match(param, "^(%w+)%s*(.+)$") + if opsize then + t.opsize = map_opsize[opsize] + if t.opsize then expr = tailops end + end + + local br = match(expr, "^%[%s*(.-)%s*%]$") + repeat + if br then + t.mode = "xm" + + -- [disp] + t.disp = toint(br) + if t.disp then + t.mode = x64 and "xm" or "xmO" + break + end + + -- [reg...] + local tp + local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if not t.reg then + -- [expr] + t.mode = x64 and "xm" or "xmO" + t.disp = dispexpr("+"..br) + break + end + + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + + -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] + local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") + if xsc then + if not map_reg_valid_index[reg] then + werror("bad index register `"..map_reg_rev[reg].."'") + end + t.xsc = map_xsc[xsc] + t.xreg = t.reg + t.vxreg = t.vreg + t.reg = nil + t.vreg = nil + t.disp = dispexpr(tailsc) + break + end + if not map_reg_valid_base[reg] then + werror("bad base register `"..map_reg_rev[reg].."'") + end + + -- [reg] or [reg+-disp] + t.disp = toint(tailr) or (tailr == "" and 0) + if t.disp then break end + + -- [reg+xreg...] + local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$") + xreg, t.xreg, tp = rtexpr(xreg) + if not t.xreg then + -- [reg+-expr] + t.disp = dispexpr(tailr) + break + end + if not map_reg_valid_index[xreg] then + werror("bad index register `"..map_reg_rev[xreg].."'") + end + + if t.xreg == -1 then + t.vxreg, tailx = match(tailx, "^(%b())(.*)$") + if not t.vxreg then werror("bad variable register expression") end + end + + -- [reg+xreg*xsc...] + local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") + if xsc then + t.xsc = map_xsc[xsc] + tailx = tailsc + end + + -- [...] or [...+-disp] or [...+-expr] + t.disp = dispexpr(tailx) + else + -- imm or opsize*imm + local imm = toint(expr, isqword) + if not imm and sub(expr, 1, 1) == "*" and t.opsize then + imm = toint(sub(expr, 2)) + if imm then + imm = imm * map_opsizenum[t.opsize] + t.opsize = nil + end + end + if imm then + if t.opsize then werror("bad operand size override") end + local m = "i" + if imm == 1 then m = m.."1" end + if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end + if imm >= -128 and imm <= 127 then m = m.."S" end + t.imm = imm + t.mode = m + break + end + + local tp + local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if t.reg then + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + -- reg + if tailr == "" then + if t.opsize then werror("bad operand size override") end + t.opsize = map_reg_opsize[reg] + if t.opsize == "f" then + t.mode = t.reg == 0 and "fF" or "f" + else + if reg == "@w4" or (x64 and reg == "@d4") then + wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'")) + end + t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") + end + t.needrex = map_reg_needrex[reg] + break + end + + -- type[idx], type[idx].field, type->field -> [reg+offset_expr] + if not tp then werror("bad operand `"..param.."'") end + t.mode = "xm" + t.disp = format(tp.ctypefmt, tailr) + else + t.mode, t.imm = immexpr(expr) + if sub(t.mode, -1) == "J" then + if t.opsize and t.opsize ~= addrsize then + werror("bad operand size override") + end + t.opsize = addrsize + end + end + end + until true + return t +end + +------------------------------------------------------------------------------ +-- x86 Template String Description +-- =============================== +-- +-- Each template string is a list of [match:]pattern pairs, +-- separated by "|". The first match wins. No match means a +-- bad or unsupported combination of operand modes or sizes. +-- +-- The match part and the ":" is omitted if the operation has +-- no operands. Otherwise the first N characters are matched +-- against the mode strings of each of the N operands. +-- +-- The mode string for each operand type is (see parseoperand()): +-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl +-- FP register: "f", +"F" for st0 +-- Index operand: "xm", +"O" for [disp] (pure offset) +-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, +-- +"I" for arg, +"P" for pointer +-- Any: +"J" for valid jump targets +-- +-- So a match character "m" (mixed) matches both an integer register +-- and an index operand (to be encoded with the ModRM/SIB scheme). +-- But "r" matches only a register and "x" only an index operand +-- (e.g. for FP memory access operations). +-- +-- The operand size match string starts right after the mode match +-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty. +-- The effective data size of the operation is matched against this list. +-- +-- If only the regular "b", "w", "d", "q", "t" operand sizes are +-- present, then all operands must be the same size. Unspecified sizes +-- are ignored, but at least one operand must have a size or the pattern +-- won't match (use the "byte", "word", "dword", "qword", "tword" +-- operand size overrides. E.g.: mov dword [eax], 1). +-- +-- If the list has a "1" or "2" prefix, the operand size is taken +-- from the respective operand and any other operand sizes are ignored. +-- If the list contains only ".", all operand sizes are ignored. +-- If the list has a "/" prefix, the concatenated (mixed) operand sizes +-- are compared to the match. +-- +-- E.g. "rrdw" matches for either two dword registers or two word +-- registers. "Fx2dq" matches an st0 operand plus an index operand +-- pointing to a dword (float) or qword (double). +-- +-- Every character after the ":" is part of the pattern string: +-- Hex chars are accumulated to form the opcode (left to right). +-- "n" disables the standard opcode mods +-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") +-- "X" Force REX.W. +-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. +-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. +-- The spare 3 bits are either filled with the last hex digit or +-- the result from a previous "r"/"R". The opcode is restored. +-- "u" Use VEX encoding, vvvv unused. +-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is +-- removed from the list used by future characters). +-- "w" Use VEX encoding, vvvv from 3rd operand. +-- "L" Force VEX.L +-- +-- All of the following characters force a flush of the opcode: +-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. +-- "s" stores a 4 bit immediate from the last register operand, +-- followed by 4 zero bits. +-- "S" stores a signed 8 bit immediate from the last operand. +-- "U" stores an unsigned 8 bit immediate from the last operand. +-- "W" stores an unsigned 16 bit immediate from the last operand. +-- "i" stores an operand sized immediate from the last operand. +-- "I" dito, but generates an action code to optionally modify +-- the opcode (+2) for a signed 8 bit immediate. +-- "J" generates one of the REL action codes from the last operand. +-- +------------------------------------------------------------------------------ + +-- Template strings for x86 instructions. Ordered by first opcode byte. +-- Unimplemented opcodes (deliberate omissions) are marked with *. +local map_op = { + -- 00-05: add... + -- 06: *push es + -- 07: *pop es + -- 08-0D: or... + -- 0E: *push cs + -- 0F: two byte opcode prefix + -- 10-15: adc... + -- 16: *push ss + -- 17: *pop ss + -- 18-1D: sbb... + -- 1E: *push ds + -- 1F: *pop ds + -- 20-25: and... + es_0 = "26", + -- 27: *daa + -- 28-2D: sub... + cs_0 = "2E", + -- 2F: *das + -- 30-35: xor... + ss_0 = "36", + -- 37: *aaa + -- 38-3D: cmp... + ds_0 = "3E", + -- 3F: *aas + inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m", + dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m", + push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or + "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i", + pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m", + -- 60: *pusha, *pushad, *pushaw + -- 61: *popa, *popad, *popaw + -- 62: *bound rdw,x + -- 63: x86: *arpl mw,rw + movsxd_2 = x64 and "rm/qd:63rM", + fs_0 = "64", + gs_0 = "65", + o16_0 = "66", + a16_0 = not x64 and "67" or nil, + a32_0 = x64 and "67", + -- 68: push idw + -- 69: imul rdw,mdw,idw + -- 6A: push ib + -- 6B: imul rdw,mdw,S + -- 6C: *insb + -- 6D: *insd, *insw + -- 6E: *outsb + -- 6F: *outsd, *outsw + -- 70-7F: jcc lb + -- 80: add... mb,i + -- 81: add... mdw,i + -- 82: *undefined + -- 83: add... mdw,S + test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", + -- 86: xchg rb,mb + -- 87: xchg rdw,mdw + -- 88: mov mb,r + -- 89: mov mdw,r + -- 8A: mov r,mb + -- 8B: mov r,mdw + -- 8C: *mov mdw,seg + lea_2 = "rx1dq:8DrM", + -- 8E: *mov seg,mdw + -- 8F: pop mdw + nop_0 = "90", + xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm", + cbw_0 = "6698", + cwde_0 = "98", + cdqe_0 = "4898", + cwd_0 = "6699", + cdq_0 = "99", + cqo_0 = "4899", + -- 9A: *call iw:idw + wait_0 = "9B", + fwait_0 = "9B", + pushf_0 = "9C", + pushfd_0 = not x64 and "9C", + pushfq_0 = x64 and "9C", + popf_0 = "9D", + popfd_0 = not x64 and "9D", + popfq_0 = x64 and "9D", + sahf_0 = "9E", + lahf_0 = "9F", + mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", + movsb_0 = "A4", + movsw_0 = "66A5", + movsd_0 = "A5", + cmpsb_0 = "A6", + cmpsw_0 = "66A7", + cmpsd_0 = "A7", + -- A8: test Rb,i + -- A9: test Rdw,i + stosb_0 = "AA", + stosw_0 = "66AB", + stosd_0 = "AB", + lodsb_0 = "AC", + lodsw_0 = "66AD", + lodsd_0 = "AD", + scasb_0 = "AE", + scasw_0 = "66AF", + scasd_0 = "AF", + -- B0-B7: mov rb,i + -- B8-BF: mov rdw,i + -- C0: rol... mb,i + -- C1: rol... mdw,i + ret_1 = "i.:nC2W", + ret_0 = "C3", + -- C4: *les rdw,mq + -- C5: *lds rdw,mq + -- C6: mov mb,i + -- C7: mov mdw,i + -- C8: *enter iw,ib + leave_0 = "C9", + -- CA: *retf iw + -- CB: *retf + int3_0 = "CC", + int_1 = "i.:nCDU", + into_0 = "CE", + -- CF: *iret + -- D0: rol... mb,1 + -- D1: rol... mdw,1 + -- D2: rol... mb,cl + -- D3: rol... mb,cl + -- D4: *aam ib + -- D5: *aad ib + -- D6: *salc + -- D7: *xlat + -- D8-DF: floating point ops + -- E0: *loopne + -- E1: *loope + -- E2: *loop + -- E3: *jcxz, *jecxz + -- E4: *in Rb,ib + -- E5: *in Rdw,ib + -- E6: *out ib,Rb + -- E7: *out ib,Rdw + call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J", + jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB + -- EA: *jmp iw:idw + -- EB: jmp ib + -- EC: *in Rb,dx + -- ED: *in Rdw,dx + -- EE: *out dx,Rb + -- EF: *out dx,Rdw + lock_0 = "F0", + int1_0 = "F1", + repne_0 = "F2", + repnz_0 = "F2", + rep_0 = "F3", + repe_0 = "F3", + repz_0 = "F3", + endbr32_0 = "F30F1EFB", + endbr64_0 = "F30F1EFA", + -- F4: *hlt + cmc_0 = "F5", + -- F6: test... mb,i; div... mb + -- F7: test... mdw,i; div... mdw + clc_0 = "F8", + stc_0 = "F9", + -- FA: *cli + cld_0 = "FC", + std_0 = "FD", + -- FE: inc... mb + -- FF: inc... mdw + + -- misc ops + not_1 = "m:F72m", + neg_1 = "m:F73m", + mul_1 = "m:F74m", + imul_1 = "m:F75m", + div_1 = "m:F76m", + idiv_1 = "m:F77m", + + imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", + imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", + + movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", + movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", + + bswap_1 = "rqd:0FC8r", + bsf_2 = "rmqdw:0FBCrM", + bsr_2 = "rmqdw:0FBDrM", + bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU", + btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU", + btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", + bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", + + shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:", + shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", + + rdtsc_0 = "0F31", -- P1+ + rdpmc_0 = "0F33", -- P6+ + cpuid_0 = "0FA2", -- P1+ + + -- floating point ops + fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m", + fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m", + fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m", + + fpop_0 = "DDD8", -- Alias for fstp st0. + + fist_1 = "xw:nDF2m|xd:DB2m", + fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m", + fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m", + + fxch_0 = "D9C9", + fxch_1 = "ff:D9C8r", + fxch_2 = "fFf:D9C8r|Fff:D9C8R", + + fucom_1 = "ff:DDE0r", + fucom_2 = "Fff:DDE0R", + fucomp_1 = "ff:DDE8r", + fucomp_2 = "Fff:DDE8R", + fucomi_1 = "ff:DBE8r", -- P6+ + fucomi_2 = "Fff:DBE8R", -- P6+ + fucomip_1 = "ff:DFE8r", -- P6+ + fucomip_2 = "Fff:DFE8R", -- P6+ + fcomi_1 = "ff:DBF0r", -- P6+ + fcomi_2 = "Fff:DBF0R", -- P6+ + fcomip_1 = "ff:DFF0r", -- P6+ + fcomip_2 = "Fff:DFF0R", -- P6+ + fucompp_0 = "DAE9", + fcompp_0 = "DED9", + + fldenv_1 = "x.:D94m", + fnstenv_1 = "x.:D96m", + fstenv_1 = "x.:9BD96m", + fldcw_1 = "xw:nD95m", + fstcw_1 = "xw:n9BD97m", + fnstcw_1 = "xw:nD97m", + fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", + fnstsw_1 = "Rw:nDFE0|xw:nDD7m", + fclex_0 = "9BDBE2", + fnclex_0 = "DBE2", + + fnop_0 = "D9D0", + -- D9D1-D9DF: unassigned + + fchs_0 = "D9E0", + fabs_0 = "D9E1", + -- D9E2: unassigned + -- D9E3: unassigned + ftst_0 = "D9E4", + fxam_0 = "D9E5", + -- D9E6: unassigned + -- D9E7: unassigned + fld1_0 = "D9E8", + fldl2t_0 = "D9E9", + fldl2e_0 = "D9EA", + fldpi_0 = "D9EB", + fldlg2_0 = "D9EC", + fldln2_0 = "D9ED", + fldz_0 = "D9EE", + -- D9EF: unassigned + + f2xm1_0 = "D9F0", + fyl2x_0 = "D9F1", + fptan_0 = "D9F2", + fpatan_0 = "D9F3", + fxtract_0 = "D9F4", + fprem1_0 = "D9F5", + fdecstp_0 = "D9F6", + fincstp_0 = "D9F7", + fprem_0 = "D9F8", + fyl2xp1_0 = "D9F9", + fsqrt_0 = "D9FA", + fsincos_0 = "D9FB", + frndint_0 = "D9FC", + fscale_0 = "D9FD", + fsin_0 = "D9FE", + fcos_0 = "D9FF", + + -- SSE, SSE2 + andnpd_2 = "rmo:660F55rM", + andnps_2 = "rmo:0F55rM", + andpd_2 = "rmo:660F54rM", + andps_2 = "rmo:0F54rM", + clflush_1 = "x.:0FAE7m", + cmppd_3 = "rmio:660FC2rMU", + cmpps_3 = "rmio:0FC2rMU", + cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:", + cmpss_3 = "rrio:F30FC2rMU|rxi/od:", + comisd_2 = "rro:660F2FrM|rx/oq:", + comiss_2 = "rro:0F2FrM|rx/od:", + cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", + cvtdq2ps_2 = "rmo:0F5BrM", + cvtpd2dq_2 = "rmo:F20FE6rM", + cvtpd2ps_2 = "rmo:660F5ArM", + cvtpi2pd_2 = "rx/oq:660F2ArM", + cvtpi2ps_2 = "rx/oq:0F2ArM", + cvtps2dq_2 = "rmo:660F5BrM", + cvtps2pd_2 = "rro:0F5ArM|rx/oq:", + cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:", + cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", + cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM", + cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM", + cvtss2sd_2 = "rro:F30F5ArM|rx/od:", + cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:", + cvttpd2dq_2 = "rmo:660FE6rM", + cvttps2dq_2 = "rmo:F30F5BrM", + cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", + cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:", + fxsave_1 = "x.:0FAE0m", + fxrstor_1 = "x.:0FAE1m", + ldmxcsr_1 = "xd:0FAE2m", + lfence_0 = "0FAEE8", + maskmovdqu_2 = "rro:660FF7rM", + mfence_0 = "0FAEF0", + movapd_2 = "rmo:660F28rM|mro:660F29Rm", + movaps_2 = "rmo:0F28rM|mro:0F29Rm", + movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:", + movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", + movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", + movhlps_2 = "rro:0F12rM", + movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm", + movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm", + movlhps_2 = "rro:0F16rM", + movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm", + movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm", + movmskpd_2 = "rr/do:660F50rM", + movmskps_2 = "rr/do:0F50rM", + movntdq_2 = "xro:660FE7Rm", + movnti_2 = "xrqd:0FC3Rm", + movntpd_2 = "xro:660F2BRm", + movntps_2 = "xro:0F2BRm", + movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", + movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm", + movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", + movupd_2 = "rmo:660F10rM|mro:660F11Rm", + movups_2 = "rmo:0F10rM|mro:0F11Rm", + orpd_2 = "rmo:660F56rM", + orps_2 = "rmo:0F56rM", + pause_0 = "F390", + pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. + pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", + pmovmskb_2 = "rr/do:660FD7rM", + prefetchnta_1 = "xb:n0F180m", + prefetcht0_1 = "xb:n0F181m", + prefetcht1_1 = "xb:n0F182m", + prefetcht2_1 = "xb:n0F183m", + pshufd_3 = "rmio:660F70rMU", + pshufhw_3 = "rmio:F30F70rMU", + pshuflw_3 = "rmio:F20F70rMU", + pslld_2 = "rmo:660FF2rM|rio:660F726mU", + pslldq_2 = "rio:660F737mU", + psllq_2 = "rmo:660FF3rM|rio:660F736mU", + psllw_2 = "rmo:660FF1rM|rio:660F716mU", + psrad_2 = "rmo:660FE2rM|rio:660F724mU", + psraw_2 = "rmo:660FE1rM|rio:660F714mU", + psrld_2 = "rmo:660FD2rM|rio:660F722mU", + psrldq_2 = "rio:660F733mU", + psrlq_2 = "rmo:660FD3rM|rio:660F732mU", + psrlw_2 = "rmo:660FD1rM|rio:660F712mU", + rcpps_2 = "rmo:0F53rM", + rcpss_2 = "rro:F30F53rM|rx/od:", + rsqrtps_2 = "rmo:0F52rM", + rsqrtss_2 = "rmo:F30F52rM", + sfence_0 = "0FAEF8", + shufpd_3 = "rmio:660FC6rMU", + shufps_3 = "rmio:0FC6rMU", + stmxcsr_1 = "xd:0FAE3m", + ucomisd_2 = "rro:660F2ErM|rx/oq:", + ucomiss_2 = "rro:0F2ErM|rx/od:", + unpckhpd_2 = "rmo:660F15rM", + unpckhps_2 = "rmo:0F15rM", + unpcklpd_2 = "rmo:660F14rM", + unpcklps_2 = "rmo:0F14rM", + xorpd_2 = "rmo:660F57rM", + xorps_2 = "rmo:0F57rM", + + -- SSE3 ops + fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m", + addsubpd_2 = "rmo:660FD0rM", + addsubps_2 = "rmo:F20FD0rM", + haddpd_2 = "rmo:660F7CrM", + haddps_2 = "rmo:F20F7CrM", + hsubpd_2 = "rmo:660F7DrM", + hsubps_2 = "rmo:F20F7DrM", + lddqu_2 = "rxo:F20FF0rM", + movddup_2 = "rmo:F20F12rM", + movshdup_2 = "rmo:F30F16rM", + movsldup_2 = "rmo:F30F12rM", + + -- SSSE3 ops + pabsb_2 = "rmo:660F381CrM", + pabsd_2 = "rmo:660F381ErM", + pabsw_2 = "rmo:660F381DrM", + palignr_3 = "rmio:660F3A0FrMU", + phaddd_2 = "rmo:660F3802rM", + phaddsw_2 = "rmo:660F3803rM", + phaddw_2 = "rmo:660F3801rM", + phsubd_2 = "rmo:660F3806rM", + phsubsw_2 = "rmo:660F3807rM", + phsubw_2 = "rmo:660F3805rM", + pmaddubsw_2 = "rmo:660F3804rM", + pmulhrsw_2 = "rmo:660F380BrM", + pshufb_2 = "rmo:660F3800rM", + psignb_2 = "rmo:660F3808rM", + psignd_2 = "rmo:660F380ArM", + psignw_2 = "rmo:660F3809rM", + + -- SSE4.1 ops + blendpd_3 = "rmio:660F3A0DrMU", + blendps_3 = "rmio:660F3A0CrMU", + blendvpd_3 = "rmRo:660F3815rM", + blendvps_3 = "rmRo:660F3814rM", + dppd_3 = "rmio:660F3A41rMU", + dpps_3 = "rmio:660F3A40rMU", + extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU", + insertps_3 = "rrio:660F3A41rMU|rxi/od:", + movntdqa_2 = "rxo:660F382ArM", + mpsadbw_3 = "rmio:660F3A42rMU", + packusdw_2 = "rmo:660F382BrM", + pblendvb_3 = "rmRo:660F3810rM", + pblendw_3 = "rmio:660F3A0ErMU", + pcmpeqq_2 = "rmo:660F3829rM", + pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:", + pextrd_3 = "mri/do:660F3A16RmU", + pextrq_3 = "mri/qo:660F3A16RmU", + -- pextrw is SSE2, mem operand is SSE4.1 only + phminposuw_2 = "rmo:660F3841rM", + pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", + pinsrd_3 = "rmi/od:660F3A22rMU", + pinsrq_3 = "rmi/oq:660F3A22rXMU", + pmaxsb_2 = "rmo:660F383CrM", + pmaxsd_2 = "rmo:660F383DrM", + pmaxud_2 = "rmo:660F383FrM", + pmaxuw_2 = "rmo:660F383ErM", + pminsb_2 = "rmo:660F3838rM", + pminsd_2 = "rmo:660F3839rM", + pminud_2 = "rmo:660F383BrM", + pminuw_2 = "rmo:660F383ArM", + pmovsxbd_2 = "rro:660F3821rM|rx/od:", + pmovsxbq_2 = "rro:660F3822rM|rx/ow:", + pmovsxbw_2 = "rro:660F3820rM|rx/oq:", + pmovsxdq_2 = "rro:660F3825rM|rx/oq:", + pmovsxwd_2 = "rro:660F3823rM|rx/oq:", + pmovsxwq_2 = "rro:660F3824rM|rx/od:", + pmovzxbd_2 = "rro:660F3831rM|rx/od:", + pmovzxbq_2 = "rro:660F3832rM|rx/ow:", + pmovzxbw_2 = "rro:660F3830rM|rx/oq:", + pmovzxdq_2 = "rro:660F3835rM|rx/oq:", + pmovzxwd_2 = "rro:660F3833rM|rx/oq:", + pmovzxwq_2 = "rro:660F3834rM|rx/od:", + pmuldq_2 = "rmo:660F3828rM", + pmulld_2 = "rmo:660F3840rM", + ptest_2 = "rmo:660F3817rM", + roundpd_3 = "rmio:660F3A09rMU", + roundps_3 = "rmio:660F3A08rMU", + roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", + roundss_3 = "rrio:660F3A0ArMU|rxi/od:", + + -- SSE4.2 ops + crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:", + pcmpestri_3 = "rmio:660F3A61rMU", + pcmpestrm_3 = "rmio:660F3A60rMU", + pcmpgtq_2 = "rmo:660F3837rM", + pcmpistri_3 = "rmio:660F3A63rMU", + pcmpistrm_3 = "rmio:660F3A62rMU", + popcnt_2 = "rmqdw:F30FB8rM", + + -- SSE4a + extrq_2 = "rro:660F79rM", + extrq_3 = "riio:660F780mUU", + insertq_2 = "rro:F20F79rM", + insertq_4 = "rriio:F20F78rMUU", + lzcnt_2 = "rmqdw:F30FBDrM", + movntsd_2 = "xr/qo:nF20F2BRm", + movntss_2 = "xr/do:F30F2BRm", + -- popcnt is also in SSE4.2 + + -- AES-NI + aesdec_2 = "rmo:660F38DErM", + aesdeclast_2 = "rmo:660F38DFrM", + aesenc_2 = "rmo:660F38DCrM", + aesenclast_2 = "rmo:660F38DDrM", + aesimc_2 = "rmo:660F38DBrM", + aeskeygenassist_3 = "rmio:660F3ADFrMU", + pclmulqdq_3 = "rmio:660F3A44rMU", + + -- AVX FP ops + vaddsubpd_3 = "rrmoy:660FVD0rM", + vaddsubps_3 = "rrmoy:F20FVD0rM", + vandpd_3 = "rrmoy:660FV54rM", + vandps_3 = "rrmoy:0FV54rM", + vandnpd_3 = "rrmoy:660FV55rM", + vandnps_3 = "rrmoy:0FV55rM", + vblendpd_4 = "rrmioy:660F3AV0DrMU", + vblendps_4 = "rrmioy:660F3AV0CrMU", + vblendvpd_4 = "rrmroy:660F3AV4BrMs", + vblendvps_4 = "rrmroy:660F3AV4ArMs", + vbroadcastf128_2 = "rx/yo:660F38u1ArM", + vcmppd_4 = "rrmioy:660FVC2rMU", + vcmpps_4 = "rrmioy:0FVC2rMU", + vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", + vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", + vcomisd_2 = "rro:660Fu2FrM|rx/oq:", + vcomiss_2 = "rro:0Fu2FrM|rx/od:", + vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", + vcvtdq2ps_2 = "rmoy:0Fu5BrM", + vcvtpd2dq_2 = "rmoy:F20FuE6rM", + vcvtpd2ps_2 = "rmoy:660Fu5ArM", + vcvtps2dq_2 = "rmoy:660Fu5BrM", + vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", + vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", + vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", + vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", + vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", + vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", + vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", + vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", + vcvttps2dq_2 = "rmoy:F30Fu5BrM", + vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", + vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", + vdppd_4 = "rrmio:660F3AV41rMU", + vdpps_4 = "rrmioy:660F3AV40rMU", + vextractf128_3 = "mri/oy:660F3AuL19RmU", + vextractps_3 = "mri/do:660F3Au17RmU", + vhaddpd_3 = "rrmoy:660FV7CrM", + vhaddps_3 = "rrmoy:F20FV7CrM", + vhsubpd_3 = "rrmoy:660FV7DrM", + vhsubps_3 = "rrmoy:F20FV7DrM", + vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", + vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", + vldmxcsr_1 = "xd:0FuAE2m", + vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", + vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", + vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", + vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", + vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", + vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", + vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", + vmovhlps_3 = "rrro:0FV12rM", + vmovhpd_2 = "xr/qo:660Fu17Rm", + vmovhpd_3 = "rrx/ooq:660FV16rM", + vmovhps_2 = "xr/qo:0Fu17Rm", + vmovhps_3 = "rrx/ooq:0FV16rM", + vmovlhps_3 = "rrro:0FV16rM", + vmovlpd_2 = "xr/qo:660Fu13Rm", + vmovlpd_3 = "rrx/ooq:660FV12rM", + vmovlps_2 = "xr/qo:0Fu13Rm", + vmovlps_3 = "rrx/ooq:0FV12rM", + vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", + vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", + vmovntpd_2 = "xroy:660Fu2BRm", + vmovntps_2 = "xroy:0Fu2BRm", + vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", + vmovsd_3 = "rrro:F20FV10rM", + vmovshdup_2 = "rmoy:F30Fu16rM", + vmovsldup_2 = "rmoy:F30Fu12rM", + vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", + vmovss_3 = "rrro:F30FV10rM", + vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", + vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", + vorpd_3 = "rrmoy:660FV56rM", + vorps_3 = "rrmoy:0FV56rM", + vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", + vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", + vperm2f128_4 = "rrmiy:660F3AV06rMU", + vptestpd_2 = "rmoy:660F38u0FrM", + vptestps_2 = "rmoy:660F38u0ErM", + vrcpps_2 = "rmoy:0Fu53rM", + vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", + vrsqrtps_2 = "rmoy:0Fu52rM", + vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", + vroundpd_3 = "rmioy:660F3Au09rMU", + vroundps_3 = "rmioy:660F3Au08rMU", + vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", + vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", + vshufpd_4 = "rrmioy:660FVC6rMU", + vshufps_4 = "rrmioy:0FVC6rMU", + vsqrtps_2 = "rmoy:0Fu51rM", + vsqrtss_2 = "rro:F30Fu51rM|rx/od:", + vsqrtpd_2 = "rmoy:660Fu51rM", + vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", + vstmxcsr_1 = "xd:0FuAE3m", + vucomisd_2 = "rro:660Fu2ErM|rx/oq:", + vucomiss_2 = "rro:0Fu2ErM|rx/od:", + vunpckhpd_3 = "rrmoy:660FV15rM", + vunpckhps_3 = "rrmoy:0FV15rM", + vunpcklpd_3 = "rrmoy:660FV14rM", + vunpcklps_3 = "rrmoy:0FV14rM", + vxorpd_3 = "rrmoy:660FV57rM", + vxorps_3 = "rrmoy:0FV57rM", + vzeroall_0 = "0FuL77", + vzeroupper_0 = "0Fu77", + + -- AVX2 FP ops + vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", + vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", + -- *vgather* (!vsib) + vpermpd_3 = "rmiy:660F3AuX01rMU", + vpermps_3 = "rrmy:660F38V16rM", + + -- AVX, AVX2 integer ops + -- In general, xmm requires AVX, ymm requires AVX2. + vaesdec_3 = "rrmo:660F38VDErM", + vaesdeclast_3 = "rrmo:660F38VDFrM", + vaesenc_3 = "rrmo:660F38VDCrM", + vaesenclast_3 = "rrmo:660F38VDDrM", + vaesimc_2 = "rmo:660F38uDBrM", + vaeskeygenassist_3 = "rmio:660F3AuDFrMU", + vlddqu_2 = "rxoy:F20FuF0rM", + vmaskmovdqu_2 = "rro:660FuF7rM", + vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", + vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", + vmovntdq_2 = "xroy:660FuE7Rm", + vmovntdqa_2 = "rxoy:660F38u2ArM", + vmpsadbw_4 = "rrmioy:660F3AV42rMU", + vpabsb_2 = "rmoy:660F38u1CrM", + vpabsd_2 = "rmoy:660F38u1ErM", + vpabsw_2 = "rmoy:660F38u1DrM", + vpackusdw_3 = "rrmoy:660F38V2BrM", + vpalignr_4 = "rrmioy:660F3AV0FrMU", + vpblendvb_4 = "rrmroy:660F3AV4CrMs", + vpblendw_4 = "rrmioy:660F3AV0ErMU", + vpclmulqdq_4 = "rrmio:660F3AV44rMU", + vpcmpeqq_3 = "rrmoy:660F38V29rM", + vpcmpestri_3 = "rmio:660F3Au61rMU", + vpcmpestrm_3 = "rmio:660F3Au60rMU", + vpcmpgtq_3 = "rrmoy:660F38V37rM", + vpcmpistri_3 = "rmio:660F3Au63rMU", + vpcmpistrm_3 = "rmio:660F3Au62rMU", + vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", + vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", + vpextrd_3 = "mri/do:660F3Au16RmU", + vpextrq_3 = "mri/qo:660F3Au16RmU", + vphaddw_3 = "rrmoy:660F38V01rM", + vphaddd_3 = "rrmoy:660F38V02rM", + vphaddsw_3 = "rrmoy:660F38V03rM", + vphminposuw_2 = "rmo:660F38u41rM", + vphsubw_3 = "rrmoy:660F38V05rM", + vphsubd_3 = "rrmoy:660F38V06rM", + vphsubsw_3 = "rrmoy:660F38V07rM", + vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", + vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", + vpinsrd_4 = "rrmi/ood:660F3AV22rMU", + vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", + vpmaddubsw_3 = "rrmoy:660F38V04rM", + vpmaxsb_3 = "rrmoy:660F38V3CrM", + vpmaxsd_3 = "rrmoy:660F38V3DrM", + vpmaxuw_3 = "rrmoy:660F38V3ErM", + vpmaxud_3 = "rrmoy:660F38V3FrM", + vpminsb_3 = "rrmoy:660F38V38rM", + vpminsd_3 = "rrmoy:660F38V39rM", + vpminuw_3 = "rrmoy:660F38V3ArM", + vpminud_3 = "rrmoy:660F38V3BrM", + vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", + vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", + vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", + vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", + vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", + vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", + vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", + vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", + vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", + vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", + vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", + vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", + vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", + vpmuldq_3 = "rrmoy:660F38V28rM", + vpmulhrsw_3 = "rrmoy:660F38V0BrM", + vpmulld_3 = "rrmoy:660F38V40rM", + vpshufb_3 = "rrmoy:660F38V00rM", + vpshufd_3 = "rmioy:660Fu70rMU", + vpshufhw_3 = "rmioy:F30Fu70rMU", + vpshuflw_3 = "rmioy:F20Fu70rMU", + vpsignb_3 = "rrmoy:660F38V08rM", + vpsignw_3 = "rrmoy:660F38V09rM", + vpsignd_3 = "rrmoy:660F38V0ArM", + vpslldq_3 = "rrioy:660Fv737mU", + vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", + vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", + vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", + vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", + vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", + vpsrldq_3 = "rrioy:660Fv733mU", + vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", + vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", + vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", + vptest_2 = "rmoy:660F38u17rM", + + -- AVX2 integer ops + vbroadcasti128_2 = "rx/yo:660F38u5ArM", + vinserti128_4 = "rrmi/yyo:660F3AV38rMU", + vextracti128_3 = "mri/oy:660F3AuL39RmU", + vpblendd_4 = "rrmioy:660F3AV02rMU", + vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", + vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", + vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", + vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", + vpermd_3 = "rrmy:660F38V36rM", + vpermq_3 = "rmiy:660F3AuX00rMU", + -- *vpgather* (!vsib) + vperm2i128_4 = "rrmiy:660F3AV46rMU", + vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", + vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", + vpsllvd_3 = "rrmoy:660F38V47rM", + vpsllvq_3 = "rrmoy:660F38VX47rM", + vpsravd_3 = "rrmoy:660F38V46rM", + vpsrlvd_3 = "rrmoy:660F38V45rM", + vpsrlvq_3 = "rrmoy:660F38VX45rM", + + -- Intel ADX + adcx_2 = "rmqd:660F38F6rM", + adox_2 = "rmqd:F30F38F6rM", + + -- BMI1 + andn_3 = "rrmqd:0F38VF2rM", + bextr_3 = "rmrqd:0F38wF7rM", + blsi_2 = "rmqd:0F38vF33m", + blsmsk_2 = "rmqd:0F38vF32m", + blsr_2 = "rmqd:0F38vF31m", + tzcnt_2 = "rmqdw:F30FBCrM", + + -- BMI2 + bzhi_3 = "rmrqd:0F38wF5rM", + mulx_3 = "rrmqd:F20F38VF6rM", + pdep_3 = "rrmqd:F20F38VF5rM", + pext_3 = "rrmqd:F30F38VF5rM", + rorx_3 = "rmSqd:F20F3AuF0rMS", + sarx_3 = "rmrqd:F30F38wF7rM", + shrx_3 = "rmrqd:F20F38wF7rM", + shlx_3 = "rmrqd:660F38wF7rM", + + -- FMA3 + vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", + vfmaddsub132ps_3 = "rrmoy:660F38V96rM", + vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", + vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", + vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", + vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", + + vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", + vfmsubadd132ps_3 = "rrmoy:660F38V97rM", + vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", + vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", + vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", + vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", + + vfmadd132pd_3 = "rrmoy:660F38VX98rM", + vfmadd132ps_3 = "rrmoy:660F38V98rM", + vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", + vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", + vfmadd213pd_3 = "rrmoy:660F38VXA8rM", + vfmadd213ps_3 = "rrmoy:660F38VA8rM", + vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", + vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", + vfmadd231pd_3 = "rrmoy:660F38VXB8rM", + vfmadd231ps_3 = "rrmoy:660F38VB8rM", + vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", + vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", + + vfmsub132pd_3 = "rrmoy:660F38VX9ArM", + vfmsub132ps_3 = "rrmoy:660F38V9ArM", + vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", + vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", + vfmsub213pd_3 = "rrmoy:660F38VXAArM", + vfmsub213ps_3 = "rrmoy:660F38VAArM", + vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", + vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", + vfmsub231pd_3 = "rrmoy:660F38VXBArM", + vfmsub231ps_3 = "rrmoy:660F38VBArM", + vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", + vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", + + vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", + vfnmadd132ps_3 = "rrmoy:660F38V9CrM", + vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", + vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", + vfnmadd213pd_3 = "rrmoy:660F38VXACrM", + vfnmadd213ps_3 = "rrmoy:660F38VACrM", + vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", + vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", + vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", + vfnmadd231ps_3 = "rrmoy:660F38VBCrM", + vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", + vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", + + vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", + vfnmsub132ps_3 = "rrmoy:660F38V9ErM", + vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", + vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", + vfnmsub213pd_3 = "rrmoy:660F38VXAErM", + vfnmsub213ps_3 = "rrmoy:660F38VAErM", + vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", + vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", + vfnmsub231pd_3 = "rrmoy:660F38VXBErM", + vfnmsub231ps_3 = "rrmoy:660F38VBErM", + vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", + vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", +} + +------------------------------------------------------------------------------ + +-- Arithmetic ops. +for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, + ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do + local n8 = shl(n, 3) + map_op[name.."_2"] = format( + "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi", + 1+n8, 3+n8, n, n, 5+n8, n) +end + +-- Shift ops. +for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, + shl = 4, shr = 5, sar = 7, sal = 4 } do + map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n) +end + +-- Conditional ops. +for cc,n in pairs(map_cc) do + map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X + map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) + map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+ +end + +-- FP arithmetic ops. +for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, + sub = 4, subr = 5, div = 6, divr = 7 } do + local nc = 0xc0 + shl(n, 3) + local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) + local fn = "f"..name + map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) + if n == 2 or n == 3 then + map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n) + else + map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n) + map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) + map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) + end + map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) +end + +-- FP conditional moves. +for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do + local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6) + map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ + map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ +end + +-- SSE / AVX FP arithmetic ops. +for name,n in pairs{ sqrt = 1, add = 8, mul = 9, + sub = 12, min = 13, div = 14, max = 15 } do + map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) + map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) + map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) + map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) + if n ~= 1 then + map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) + map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) + map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) + map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) + end +end + +-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). +for name,n in pairs{ + paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, + paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, + packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, + paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, + pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, + pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, + pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, + pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, + pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, + pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, + psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, + psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, + punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, + punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, + punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF +} do + map_op[name.."_2"] = format("rmo:660F%02XrM", n) + map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) +end + +------------------------------------------------------------------------------ + +local map_vexarg = { u = false, v = 1, V = 2, w = 3 } + +-- Process pattern string. +local function dopattern(pat, args, sz, op, needrex) + local digit, addin, vex + local opcode = 0 + local szov = sz + local narg = 1 + local rex = 0 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 6 positions. + if secpos+6 > maxsecpos then wflush() end + + -- Process each character. + for c in gmatch(pat.."|", ".") do + if match(c, "%x") then -- Hex digit. + digit = byte(c) - 48 + if digit > 48 then digit = digit - 39 + elseif digit > 16 then digit = digit - 7 end + opcode = opcode*16 + digit + addin = nil + elseif c == "n" then -- Disable operand size mods for opcode. + szov = nil + elseif c == "X" then -- Force REX.W. + rex = 8 + elseif c == "L" then -- Force VEX.L. + vex.l = true + elseif c == "r" then -- Merge 1st operand regno. into opcode. + addin = args[1]; opcode = opcode + (addin.reg % 8) + if narg < 2 then narg = 2 end + elseif c == "R" then -- Merge 2nd operand regno. into opcode. + addin = args[2]; opcode = opcode + (addin.reg % 8) + narg = 3 + elseif c == "m" or c == "M" then -- Encode ModRM/SIB. + local s + if addin then + s = addin.reg + opcode = opcode - band(s, 7) -- Undo regno opcode merge. + else + s = band(opcode, 15) -- Undo last digit. + opcode = shr(opcode, 4) + end + local nn = c == "m" and 1 or 2 + local t = args[nn] + if narg <= nn then narg = nn + 1 end + if szov == "q" and rex == 0 then rex = rex + 8 end + if t.reg and t.reg > 7 then rex = rex + 1 end + if t.xreg and t.xreg > 7 then rex = rex + 2 end + if s > 7 then rex = rex + 4 end + if needrex then rex = rex + 16 end + local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) + opcode = nil + local imark = sub(pat, -1) -- Force a mark (ugly). + -- Put ModRM/SIB with regno/last digit as spare. + wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) + addin = nil + elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix + local b = band(opcode, 255); opcode = shr(opcode, 8) + local m = 1 + if b == 0x38 then m = 2 + elseif b == 0x3a then m = 3 end + if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end + if b ~= 0x0f then + werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. + "' in pattern `"..pat.."' for `"..op.."'") + end + local v = map_vexarg[c] + if v then v = remove(args, v) end + b = band(opcode, 255) + local p = 0 + if b == 0x66 then p = 1 + elseif b == 0xf3 then p = 2 + elseif b == 0xf2 then p = 3 end + if p ~= 0 then opcode = shr(opcode, 8) end + if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end + vex = { m = m, p = p, v = v } + else + if opcode then -- Flush opcode. + if szov == "q" and rex == 0 then rex = rex + 8 end + if needrex then rex = rex + 16 end + if addin and addin.reg == -1 then + local psz, sk = wputop(szov, opcode - 7, rex, vex, true) + wvreg("opcode", addin.vreg, psz, sk) + else + if addin and addin.reg > 7 then rex = rex + 1 end + wputop(szov, opcode, rex, vex) + end + opcode = nil + end + if c == "|" then break end + if c == "o" then -- Offset (pure 32 bit displacement). + wputdarg(args[1].disp); if narg < 2 then narg = 2 end + elseif c == "O" then + wputdarg(args[2].disp); narg = 3 + else + -- Anything else is an immediate operand. + local a = args[narg] + narg = narg + 1 + local mode, imm = a.mode, a.imm + if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then + werror("bad operand size for label") + end + if c == "S" then + wputsbarg(imm) + elseif c == "U" then + wputbarg(imm) + elseif c == "W" then + wputwarg(imm) + elseif c == "i" or c == "I" then + if mode == "iJ" then + wputlabel("IMM_", imm, 1) + elseif mode == "iI" and c == "I" then + waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) + else + wputszarg(sz, imm) + end + elseif c == "J" then + if mode == "iPJ" then + waction("REL_A", imm) -- !x64 (secpos) + else + wputlabel("REL_", imm, 2) + end + elseif c == "s" then + local reg = a.reg + if reg < 0 then + wputb(0) + wvreg("imm.hi", a.vreg) + else + wputb(shl(reg, 4)) + end + else + werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") + end + end + end + end +end + +------------------------------------------------------------------------------ + +-- Mapping of operand modes to short names. Suppress output with '#'. +local map_modename = { + r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", + f = "stx", F = "st0", J = "lbl", ["1"] = "1", + I = "#", S = "#", O = "#", +} + +-- Return a table/string showing all possible operand modes. +local function templatehelp(template, nparams) + if nparams == 0 then return "" end + local t = {} + for tm in gmatch(template, "[^%|]+") do + local s = map_modename[sub(tm, 1, 1)] + s = s..gsub(sub(tm, 2, nparams), ".", function(c) + return ", "..map_modename[c] + end) + if not match(s, "#") then t[#t+1] = s end + end + return t +end + +-- Match operand modes against mode match part of template. +local function matchtm(tm, args) + for i=1,#args do + if not match(args[i].mode, sub(tm, i, i)) then return end + end + return true +end + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return templatehelp(template, nparams) end + local args = {} + + -- Zero-operand opcodes have no match part. + if #params == 0 then + dopattern(template, args, "d", params.op, nil) + return + end + + -- Determine common operand size (coerce undefined size) or flag as mixed. + local sz, szmix, needrex + for i,p in ipairs(params) do + args[i] = parseoperand(p) + local nsz = args[i].opsize + if nsz then + if sz and sz ~= nsz then szmix = true else sz = nsz end + end + local nrex = args[i].needrex + if nrex ~= nil then + if needrex == nil then + needrex = nrex + elseif needrex ~= nrex then + werror("bad mix of byte-addressable registers") + end + end + end + + -- Try all match:pattern pairs (separated by '|'). + local gotmatch, lastpat + for tm in gmatch(template, "[^%|]+") do + -- Split off size match (starts after mode match) and pattern string. + local szm, pat = match(tm, "^(.-):(.*)$", #args+1) + if pat == "" then pat = lastpat else lastpat = pat end + if matchtm(tm, args) then + local prefix = sub(szm, 1, 1) + if prefix == "/" then -- Exactly match leading operand sizes. + for i = #szm,1,-1 do + if i == 1 then + dopattern(pat, args, sz, params.op, needrex) -- Process pattern. + return + elseif args[i-1].opsize ~= sub(szm, i, i) then + break + end + end + else -- Match common operand size. + local szp = sz + if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes. + if prefix == "1" then szp = args[1].opsize; szmix = nil + elseif prefix == "2" then szp = args[2].opsize; szmix = nil end + if not szmix and (prefix == "." or match(szm, szp or "#")) then + dopattern(pat, args, szp, params.op, needrex) -- Process pattern. + return + end + end + gotmatch = true + end + end + + local msg = "bad operand mode" + if gotmatch then + if szmix then + msg = "mixed operand size" + else + msg = sz and "bad operand size" or "missing operand size" + end + end + + werror(msg.." in `"..opmodestr(params.op, args).."'") +end + +------------------------------------------------------------------------------ + +-- x64-specific opcode for 64 bit immediates and displacements. +if x64 then + function map_op.mov64_2(params) + if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end + if secpos+2 > maxsecpos then wflush() end + local opcode, op64, sz, rex, vreg + local op64 = match(params[1], "^%[%s*(.-)%s*%]$") + if op64 then + local a = parseoperand(params[2]) + if a.mode ~= "rmR" then werror("bad operand mode") end + sz = a.opsize + rex = sz == "q" and 8 or 0 + opcode = 0xa3 + else + op64 = match(params[2], "^%[%s*(.-)%s*%]$") + local a = parseoperand(params[1]) + if op64 then + if a.mode ~= "rmR" then werror("bad operand mode") end + sz = a.opsize + rex = sz == "q" and 8 or 0 + opcode = 0xa1 + else + if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then + werror("bad operand mode") + end + op64 = params[2] + if a.reg == -1 then + vreg = a.vreg + opcode = 0xb8 + else + opcode = 0xb8 + band(a.reg, 7) + end + rex = a.reg > 7 and 9 or 8 + end + end + local psz, sk = wputop(sz, opcode, rex, nil, vreg) + wvreg("opcode", vreg, psz, sk) + waction("IMM_D", format("(unsigned int)(%s)", op64)) + waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) + end +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +local function op_data(params) + if not params then return "imm..." end + local sz = sub(params.op, 2, 2) + if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end + for _,p in ipairs(params) do + local a = parseoperand(p, sz == "q") + if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then + werror("bad mode or size in `"..p.."'") + end + if a.mode == "iJ" then + wputlabel("IMM_", a.imm, 1) + elseif sz == "q" then + wputqarg(a.imm) + else + wputszarg(sz, a.imm) + end + if secpos+2 > maxsecpos then wflush() end + end +end + +map_op[".byte_*"] = op_data +map_op[".sbyte_*"] = op_data +map_op[".word_*"] = op_data +map_op[".dword_*"] = op_data +map_op[".qword_*"] = op_data +map_op[".aword_*"] = op_data +map_op[".long_*"] = op_data +map_op[".quad_*"] = op_data +map_op[".addr_*"] = op_data + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_2"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end + if secpos+2 > maxsecpos then wflush() end + local a = parseoperand(params[1]) + local mode, imm = a.mode, a.imm + if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then + -- Local label (1: ... 9:) or global label (->global:). + waction("LABEL_LG", nil, 1) + wputxb(imm) + elseif mode == "iJ" then + -- PC label (=>pcexpr:). + waction("LABEL_PC", imm) + else + werror("bad label definition") + end + -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. + local addr = params[2] + if addr then + local a = parseoperand(addr) + if a.mode == "iPJ" then + waction("SETLABEL", a.imm) + else + werror("bad label assignment") + end + end +end +map_op[".label_1"] = map_op[".label_2"] + +------------------------------------------------------------------------------ + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", nil, 1) + wputxb(align-1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +-- Spacing pseudo-opcode. +map_op[".space_2"] = function(params) + if not params then return "num [, filler]" end + if secpos+1 > maxsecpos then wflush() end + waction("SPACE", params[1]) + local fill = params[2] + if fill then + fill = tonumber(fill) + if not fill or fill < 0 or fill > 255 then werror("bad filler") end + end + wputxb(fill or 0) +end +map_op[".space_1"] = map_op[".space_2"] + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + if reg and not map_reg_valid_base[reg] then + werror("bad base register `"..(map_reg_rev[reg] or reg).."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg and map_reg_rev[tp.reg] or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION") + wputxb(num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpregs(out) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dynasm.lua b/ext/opcache/jit/ir/dynasm/dynasm.lua new file mode 100644 index 00000000000..2583295fce3 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dynasm.lua @@ -0,0 +1,1095 @@ +------------------------------------------------------------------------------ +-- DynASM. A dynamic assembler for code generation engines. +-- Originally designed and implemented for LuaJIT. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See below for full copyright notice. +------------------------------------------------------------------------------ + +-- Application information. +local _info = { + name = "DynASM", + description = "A dynamic assembler for code generation engines", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + url = "https://luajit.org/dynasm.html", + license = "MIT", + copyright = [[ +Copyright (C) 2005-2021 Mike Pall. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +[ MIT license: https://www.opensource.org/licenses/mit-license.php ] +]], +} + +-- Cache library functions. +local type, pairs, ipairs = type, pairs, ipairs +local pcall, error, assert = pcall, error, assert +local _s = string +local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub +local format, rep, upper = _s.format, _s.rep, _s.upper +local _t = table +local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort +local exit = os.exit +local io = io +local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr + +------------------------------------------------------------------------------ + +-- Program options. +local g_opt = {} + +-- Global state for current file. +local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch +local g_errcount = 0 + +-- Write buffer for output file. +local g_wbuffer, g_capbuffer + +------------------------------------------------------------------------------ + +-- Write an output line (or callback function) to the buffer. +local function wline(line, needindent) + local buf = g_capbuffer or g_wbuffer + buf[#buf+1] = needindent and g_indent..line or line + g_synclineno = g_synclineno + 1 +end + +-- Write assembler line as a comment, if requested. +local function wcomment(aline) + if g_opt.comment then + wline(g_opt.comment..aline..g_opt.endcomment, true) + end +end + +-- Resync CPP line numbers. +local function wsync() + if g_synclineno ~= g_lineno and g_opt.cpp then + wline("#line "..g_lineno..' "'..g_fname..'"') + g_synclineno = g_lineno + end +end + +-- Dummy action flush function. Replaced with arch-specific function later. +local function wflush(term) +end + +-- Dump all buffered output lines. +local function wdumplines(out, buf) + for _,line in ipairs(buf) do + if type(line) == "string" then + assert(out:write(line, "\n")) + else + -- Special callback to dynamically insert lines after end of processing. + line(out) + end + end +end + +------------------------------------------------------------------------------ + +-- Emit an error. Processing continues with next statement. +local function werror(msg) + error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0) +end + +-- Emit a fatal error. Processing stops. +local function wfatal(msg) + g_errcount = "fatal" + werror(msg) +end + +-- Print a warning. Processing continues. +local function wwarn(msg) + stderr:write(format("%s:%s: warning: %s:\n%s\n", + g_fname, g_lineno, msg, g_curline)) +end + +-- Print caught error message. But suppress excessive errors. +local function wprinterr(...) + if type(g_errcount) == "number" then + -- Regular error. + g_errcount = g_errcount + 1 + if g_errcount < 21 then -- Seems to be a reasonable limit. + stderr:write(...) + elseif g_errcount == 21 then + stderr:write(g_fname, + ":*: warning: too many errors (suppressed further messages).\n") + end + else + -- Fatal error. + stderr:write(...) + return true -- Stop processing. + end +end + +------------------------------------------------------------------------------ + +-- Map holding all option handlers. +local opt_map = {} +local opt_current + +-- Print error and exit with error status. +local function opterror(...) + stderr:write("dynasm.lua: ERROR: ", ...) + stderr:write("\n") + exit(1) +end + +-- Get option parameter. +local function optparam(args) + local argn = args.argn + local p = args[argn] + if not p then + opterror("missing parameter for option `", opt_current, "'.") + end + args.argn = argn + 1 + return p +end + +------------------------------------------------------------------------------ + +-- Core pseudo-opcodes. +local map_coreop = {} +-- Dummy opcode map. Replaced by arch-specific map. +local map_op = {} + +-- Forward declarations. +local dostmt +local readfile + +------------------------------------------------------------------------------ + +-- Map for defines (initially empty, chains to arch-specific map). +local map_def = {} + +-- Pseudo-opcode to define a substitution. +map_coreop[".define_2"] = function(params, nparams) + if not params then return nparams == 1 and "name" or "name, subst" end + local name, def = params[1], params[2] or "1" + if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end + map_def[name] = def +end +map_coreop[".define_1"] = map_coreop[".define_2"] + +-- Define a substitution on the command line. +function opt_map.D(args) + local namesubst = optparam(args) + local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$") + if name then + map_def[name] = subst + elseif match(namesubst, "^[%a_][%w_]*$") then + map_def[namesubst] = "1" + else + opterror("bad define") + end +end + +-- Undefine a substitution on the command line. +function opt_map.U(args) + local name = optparam(args) + if match(name, "^[%a_][%w_]*$") then + map_def[name] = nil + else + opterror("bad define") + end +end + +-- Helper for definesubst. +local gotsubst + +local function definesubst_one(word) + local subst = map_def[word] + if subst then gotsubst = word; return subst else return word end +end + +-- Iteratively substitute defines. +local function definesubst(stmt) + -- Limit number of iterations. + for i=1,100 do + gotsubst = false + stmt = gsub(stmt, "#?[%w_]+", definesubst_one) + if not gotsubst then break end + end + if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end + return stmt +end + +-- Dump all defines. +local function dumpdefines(out, lvl) + local t = {} + for name in pairs(map_def) do + t[#t+1] = name + end + sort(t) + out:write("Defines:\n") + for _,name in ipairs(t) do + local subst = map_def[name] + if g_arch then subst = g_arch.revdef(subst) end + out:write(format(" %-20s %s\n", name, subst)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Support variables for conditional assembly. +local condlevel = 0 +local condstack = {} + +-- Evaluate condition with a Lua expression. Substitutions already performed. +local function cond_eval(cond) + local func, err + if setfenv then + func, err = loadstring("return "..cond, "=expr") + else + -- No globals. All unknown identifiers evaluate to nil. + func, err = load("return "..cond, "=expr", "t", {}) + end + if func then + if setfenv then + setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil. + end + local ok, res = pcall(func) + if ok then + if res == 0 then return false end -- Oh well. + return not not res + end + err = res + end + wfatal("bad condition: "..err) +end + +-- Skip statements until next conditional pseudo-opcode at the same level. +local function stmtskip() + local dostmt_save = dostmt + local lvl = 0 + dostmt = function(stmt) + local op = match(stmt, "^%s*(%S+)") + if op == ".if" then + lvl = lvl + 1 + elseif lvl ~= 0 then + if op == ".endif" then lvl = lvl - 1 end + elseif op == ".elif" or op == ".else" or op == ".endif" then + dostmt = dostmt_save + dostmt(stmt) + end + end +end + +-- Pseudo-opcodes for conditional assembly. +map_coreop[".if_1"] = function(params) + if not params then return "condition" end + local lvl = condlevel + 1 + local res = cond_eval(params[1]) + condlevel = lvl + condstack[lvl] = res + if not res then stmtskip() end +end + +map_coreop[".elif_1"] = function(params) + if not params then return "condition" end + if condlevel == 0 then wfatal(".elif without .if") end + local lvl = condlevel + local res = condstack[lvl] + if res then + if res == "else" then wfatal(".elif after .else") end + else + res = cond_eval(params[1]) + if res then + condstack[lvl] = res + return + end + end + stmtskip() +end + +map_coreop[".else_0"] = function(params) + if condlevel == 0 then wfatal(".else without .if") end + local lvl = condlevel + local res = condstack[lvl] + condstack[lvl] = "else" + if res then + if res == "else" then wfatal(".else after .else") end + stmtskip() + end +end + +map_coreop[".endif_0"] = function(params) + local lvl = condlevel + if lvl == 0 then wfatal(".endif without .if") end + condlevel = lvl - 1 +end + +-- Check for unfinished conditionals. +local function checkconds() + if g_errcount ~= "fatal" and condlevel ~= 0 then + wprinterr(g_fname, ":*: error: unbalanced conditional\n") + end +end + +------------------------------------------------------------------------------ + +-- Search for a file in the given path and open it for reading. +local function pathopen(path, name) + local dirsep = package and match(package.path, "\\") and "\\" or "/" + for _,p in ipairs(path) do + local fullname = p == "" and name or p..dirsep..name + local fin = io.open(fullname, "r") + if fin then + g_fname = fullname + return fin + end + end +end + +-- Include a file. +map_coreop[".include_1"] = function(params) + if not params then return "filename" end + local name = params[1] + -- Save state. Ugly, I know. but upvalues are fast. + local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent + -- Read the included file. + local fatal = readfile(pathopen(g_opt.include, name) or + wfatal("include file `"..name.."' not found")) + -- Restore state. + g_synclineno = -1 + g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi + if fatal then wfatal("in include file") end +end + +-- Make .include and conditionals initially available, too. +map_op[".include_1"] = map_coreop[".include_1"] +map_op[".if_1"] = map_coreop[".if_1"] +map_op[".elif_1"] = map_coreop[".elif_1"] +map_op[".else_0"] = map_coreop[".else_0"] +map_op[".endif_0"] = map_coreop[".endif_0"] + +------------------------------------------------------------------------------ + +-- Support variables for macros. +local mac_capture, mac_lineno, mac_name +local mac_active = {} +local mac_list = {} + +-- Pseudo-opcode to define a macro. +map_coreop[".macro_*"] = function(mparams) + if not mparams then return "name [, params...]" end + -- Split off and validate macro name. + local name = remove(mparams, 1) + if not name then werror("missing macro name") end + if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]*$")) then + wfatal("bad macro name `"..name.."'") + end + -- Validate macro parameter names. + local mdup = {} + for _,mp in ipairs(mparams) do + if not match(mp, "^[%a_][%w_]*$") then + wfatal("bad macro parameter name `"..mp.."'") + end + if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end + mdup[mp] = true + end + -- Check for duplicate or recursive macro definitions. + local opname = name.."_"..#mparams + if map_op[opname] or map_op[name.."_*"] then + wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)") + end + if mac_capture then wfatal("recursive macro definition") end + + -- Enable statement capture. + local lines = {} + mac_lineno = g_lineno + mac_name = name + mac_capture = function(stmt) -- Statement capture function. + -- Stop macro definition with .endmacro pseudo-opcode. + if not match(stmt, "^%s*.endmacro%s*$") then + lines[#lines+1] = stmt + return + end + mac_capture = nil + mac_lineno = nil + mac_name = nil + mac_list[#mac_list+1] = opname + -- Add macro-op definition. + map_op[opname] = function(params) + if not params then return mparams, lines end + -- Protect against recursive macro invocation. + if mac_active[opname] then wfatal("recursive macro invocation") end + mac_active[opname] = true + -- Setup substitution map. + local subst = {} + for i,mp in ipairs(mparams) do subst[mp] = params[i] end + local mcom + if g_opt.maccomment and g_opt.comment then + mcom = " MACRO "..name.." ("..#mparams..")" + wcomment("{"..mcom) + end + -- Loop through all captured statements + for _,stmt in ipairs(lines) do + -- Substitute macro parameters. + local st = gsub(stmt, "[%w_]+", subst) + st = definesubst(st) + st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b. + if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end + -- Emit statement. Use a protected call for better diagnostics. + local ok, err = pcall(dostmt, st) + if not ok then + -- Add the captured statement to the error. + wprinterr(err, "\n", g_indent, "| ", stmt, + "\t[MACRO ", name, " (", #mparams, ")]\n") + end + end + if mcom then wcomment("}"..mcom) end + mac_active[opname] = nil + end + end +end + +-- An .endmacro pseudo-opcode outside of a macro definition is an error. +map_coreop[".endmacro_0"] = function(params) + wfatal(".endmacro without .macro") +end + +-- Dump all macros and their contents (with -PP only). +local function dumpmacros(out, lvl) + sort(mac_list) + out:write("Macros:\n") + for _,opname in ipairs(mac_list) do + local name = sub(opname, 1, -3) + local params, lines = map_op[opname]() + out:write(format(" %-20s %s\n", name, concat(params, ", "))) + if lvl > 1 then + for _,line in ipairs(lines) do + out:write(" |", line, "\n") + end + out:write("\n") + end + end + out:write("\n") +end + +-- Check for unfinished macro definitions. +local function checkmacros() + if mac_capture then + wprinterr(g_fname, ":", mac_lineno, + ": error: unfinished .macro `", mac_name ,"'\n") + end +end + +------------------------------------------------------------------------------ + +-- Support variables for captures. +local cap_lineno, cap_name +local cap_buffers = {} +local cap_used = {} + +-- Start a capture. +map_coreop[".capture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + if cap_name then + wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno) + end + cap_name = name + cap_lineno = g_lineno + -- Create or continue a capture buffer and start the output line capture. + local buf = cap_buffers[name] + if not buf then buf = {}; cap_buffers[name] = buf end + g_capbuffer = buf + g_synclineno = 0 +end + +-- Stop a capture. +map_coreop[".endcapture_0"] = function(params) + wflush() + if not cap_name then wfatal(".endcapture without a valid .capture") end + cap_name = nil + cap_lineno = nil + g_capbuffer = nil + g_synclineno = 0 +end + +-- Dump a capture buffer. +map_coreop[".dumpcapture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + cap_used[name] = true + wline(function(out) + local buf = cap_buffers[name] + if buf then wdumplines(out, buf) end + end) + g_synclineno = 0 +end + +-- Dump all captures and their buffers (with -PP only). +local function dumpcaptures(out, lvl) + out:write("Captures:\n") + for name,buf in pairs(cap_buffers) do + out:write(format(" %-20s %4s)\n", name, "("..#buf)) + if lvl > 1 then + local bar = rep("=", 76) + out:write(" ", bar, "\n") + for _,line in ipairs(buf) do + out:write(" ", line, "\n") + end + out:write(" ", bar, "\n\n") + end + end + out:write("\n") +end + +-- Check for unfinished or unused captures. +local function checkcaptures() + if cap_name then + wprinterr(g_fname, ":", cap_lineno, + ": error: unfinished .capture `", cap_name,"'\n") + return + end + for name in pairs(cap_buffers) do + if not cap_used[name] then + wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n") + end + end +end + +------------------------------------------------------------------------------ + +-- Sections names. +local map_sections = {} + +-- Pseudo-opcode to define code sections. +-- TODO: Data sections, BSS sections. Needs extra C code and API. +map_coreop[".section_*"] = function(params) + if not params then return "name..." end + if #map_sections > 0 then werror("duplicate section definition") end + wflush() + for sn,name in ipairs(params) do + local opname = "."..name.."_0" + if not match(name, "^[%a][%w_]*$") or + map_op[opname] or map_op["."..name.."_*"] then + werror("bad section name `"..name.."'") + end + map_sections[#map_sections+1] = name + wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1)) + map_op[opname] = function(params) g_arch.section(sn-1) end + end + wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections)) +end + +-- Dump all sections. +local function dumpsections(out, lvl) + out:write("Sections:\n") + for _,name in ipairs(map_sections) do + out:write(format(" %s\n", name)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Replacement for customized Lua, which lacks the package library. +local prefix = "" +if not require then + function require(name) + local fp = assert(io.open(prefix..name..".lua")) + local s = fp:read("*a") + assert(fp:close()) + return assert(loadstring(s, "@"..name..".lua"))() + end +end + +-- Load architecture-specific module. +local function loadarch(arch) + if not match(arch, "^[%w_]+$") then return "bad arch name" end + _G._map_def = map_def + local ok, m_arch = pcall(require, "dasm_"..arch) + if not ok then return "cannot load module: "..m_arch end + g_arch = m_arch + wflush = m_arch.passcb(wline, werror, wfatal, wwarn) + m_arch.setup(arch, g_opt) + map_op, map_def = m_arch.mergemaps(map_coreop, map_def) +end + +-- Dump architecture description. +function opt_map.dumparch(args) + local name = optparam(args) + if not g_arch then + local err = loadarch(name) + if err then opterror(err) end + end + + local t = {} + for name in pairs(map_coreop) do t[#t+1] = name end + for name in pairs(map_op) do t[#t+1] = name end + sort(t) + + local out = stdout + local _arch = g_arch._info + out:write(format("%s version %s, released %s, %s\n", + _info.name, _info.version, _info.release, _info.url)) + g_arch.dumparch(out) + + local pseudo = true + out:write("Pseudo-Opcodes:\n") + for _,sname in ipairs(t) do + local name, nparam = match(sname, "^(.+)_([0-9%*])$") + if name then + if pseudo and sub(name, 1, 1) ~= "." then + out:write("\nOpcodes:\n") + pseudo = false + end + local f = map_op[sname] + local s + if nparam ~= "*" then nparam = nparam + 0 end + if nparam == 0 then + s = "" + elseif type(f) == "string" then + s = map_op[".template__"](nil, f, nparam) + else + s = f(nil, nparam) + end + if type(s) == "table" then + for _,s2 in ipairs(s) do + out:write(format(" %-12s %s\n", name, s2)) + end + else + out:write(format(" %-12s %s\n", name, s)) + end + end + end + out:write("\n") + exit(0) +end + +-- Pseudo-opcode to set the architecture. +-- Only initially available (map_op is replaced when called). +map_op[".arch_1"] = function(params) + if not params then return "name" end + local err = loadarch(params[1]) + if err then wfatal(err) end + wline(format("#if DASM_VERSION != %d", _info.vernum)) + wline('#error "Version mismatch between DynASM and included encoding engine"') + wline("#endif") +end + +-- Dummy .arch pseudo-opcode to improve the error report. +map_coreop[".arch_1"] = function(params) + if not params then return "name" end + wfatal("duplicate .arch statement") +end + +------------------------------------------------------------------------------ + +-- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'. +map_coreop[".nop_*"] = function(params) + if not params then return "[ignored...]" end +end + +-- Pseudo-opcodes to raise errors. +map_coreop[".error_1"] = function(params) + if not params then return "message" end + werror(params[1]) +end + +map_coreop[".fatal_1"] = function(params) + if not params then return "message" end + wfatal(params[1]) +end + +-- Dump all user defined elements. +local function dumpdef(out) + local lvl = g_opt.dumpdef + if lvl == 0 then return end + dumpsections(out, lvl) + dumpdefines(out, lvl) + if g_arch then g_arch.dumpdef(out, lvl) end + dumpmacros(out, lvl) + dumpcaptures(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Helper for splitstmt. +local splitlvl + +local function splitstmt_one(c) + if c == "(" then + splitlvl = ")"..splitlvl + elseif c == "[" then + splitlvl = "]"..splitlvl + elseif c == "{" then + splitlvl = "}"..splitlvl + elseif c == ")" or c == "]" or c == "}" then + if sub(splitlvl, 1, 1) ~= c then werror("unbalanced (), [] or {}") end + splitlvl = sub(splitlvl, 2) + elseif splitlvl == "" then + return " \0 " + end + return c +end + +-- Split statement into (pseudo-)opcode and params. +local function splitstmt(stmt) + -- Convert label with trailing-colon into .label statement. + local label = match(stmt, "^%s*(.+):%s*$") + if label then return ".label", {label} end + + -- Split at commas and equal signs, but obey parentheses and brackets. + splitlvl = "" + stmt = gsub(stmt, "[,%(%)%[%]{}]", splitstmt_one) + if splitlvl ~= "" then werror("unbalanced () or []") end + + -- Split off opcode. + local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$") + if not op then werror("bad statement syntax") end + + -- Split parameters. + local params = {} + for p in gmatch(other, "%s*(%Z+)%z?") do + params[#params+1] = gsub(p, "%s+$", "") + end + if #params > 16 then werror("too many parameters") end + + params.op = op + return op, params +end + +-- Process a single statement. +dostmt = function(stmt) + -- Ignore empty statements. + if match(stmt, "^%s*$") then return end + + -- Capture macro defs before substitution. + if mac_capture then return mac_capture(stmt) end + stmt = definesubst(stmt) + + -- Emit C code without parsing the line. + if sub(stmt, 1, 1) == "|" then + local tail = sub(stmt, 2) + wflush() + if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end + return + end + + -- Split into (pseudo-)opcode and params. + local op, params = splitstmt(stmt) + + -- Get opcode handler (matching # of parameters or generic handler). + local f = map_op[op.."_"..#params] or map_op[op.."_*"] + if not f then + if not g_arch then wfatal("first statement must be .arch") end + -- Improve error report. + for i=0,9 do + if map_op[op.."_"..i] then + werror("wrong number of parameters for `"..op.."'") + end + end + werror("unknown statement `"..op.."'") + end + + -- Call opcode handler or special handler for template strings. + if type(f) == "string" then + map_op[".template__"](params, f) + else + f(params) + end +end + +-- Process a single line. +local function doline(line) + if g_opt.flushline then wflush() end + + -- Assembler line? + local indent, aline = match(line, "^(%s*)%|(.*)$") + if not aline then + -- No, plain C code line, need to flush first. + wflush() + wsync() + wline(line, false) + return + end + + g_indent = indent -- Remember current line indentation. + + -- Emit C code (even from macros). Avoids echo and line parsing. + if sub(aline, 1, 1) == "|" then + if not mac_capture then + wsync() + elseif g_opt.comment then + wsync() + wcomment(aline) + end + dostmt(aline) + return + end + + -- Echo assembler line as a comment. + if g_opt.comment then + wsync() + wcomment(aline) + end + + -- Strip assembler comments. + aline = gsub(aline, "//.*$", "") + + -- Split line into statements at semicolons. + if match(aline, ";") then + for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end + else + dostmt(aline) + end +end + +------------------------------------------------------------------------------ + +-- Write DynASM header. +local function dasmhead(out) + out:write(format([[ +/* +** This file has been pre-processed with DynASM. +** %s +** DynASM version %s, DynASM %s version %s +** DO NOT EDIT! The original file is in "%s". +*/ + +]], _info.url, + _info.version, g_arch._info.arch, g_arch._info.version, + g_fname)) +end + +-- Read input file. +readfile = function(fin) + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Process all lines. + for line in fin:lines() do + g_lineno = g_lineno + 1 + g_curline = line + local ok, err = pcall(doline, line) + if not ok and wprinterr(err, "\n") then return true end + end + wflush() + + -- Close input file. + assert(fin == stdin or fin:close()) +end + +-- Write output file. +local function writefile(outfile) + local fout + + -- Open output file. + if outfile == nil or outfile == "-" then + fout = stdout + else + fout = assert(io.open(outfile, "w")) + end + + -- Write all buffered lines + wdumplines(fout, g_wbuffer) + + -- Close output file. + assert(fout == stdout or fout:close()) + + -- Optionally dump definitions. + dumpdef(fout == stdout and stderr or stdout) +end + +-- Translate an input file to an output file. +local function translate(infile, outfile) + g_wbuffer = {} + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Put header. + wline(dasmhead) + + -- Read input file. + local fin + if infile == "-" then + g_fname = "(stdin)" + fin = stdin + else + g_fname = infile + fin = assert(io.open(infile, "r")) + end + readfile(fin) + + -- Check for errors. + if not g_arch then + wprinterr(g_fname, ":*: error: missing .arch directive\n") + end + checkconds() + checkmacros() + checkcaptures() + + if g_errcount ~= 0 then + stderr:write(g_fname, ":*: info: ", g_errcount, " error", + (type(g_errcount) == "number" and g_errcount > 1) and "s" or "", + " in input file -- no output file generated.\n") + dumpdef(stderr) + exit(1) + end + + -- Write output file. + writefile(outfile) +end + +------------------------------------------------------------------------------ + +-- Print help text. +function opt_map.help() + stdout:write("DynASM -- ", _info.description, ".\n") + stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n") + stdout:write[[ + +Usage: dynasm [OPTION]... INFILE.dasc|- + + -h, --help Display this help text. + -V, --version Display version and copyright information. + + -o, --outfile FILE Output file name (default is stdout). + -I, --include DIR Add directory to the include search path. + + -c, --ccomment Use /* */ comments for assembler lines. + -C, --cppcomment Use // comments for assembler lines (default). + -N, --nocomment Suppress assembler lines in output. + -M, --maccomment Show macro expansions as comments (default off). + + -L, --nolineno Suppress CPP line number information in output. + -F, --flushline Flush action list for every line. + + -D NAME[=SUBST] Define a substitution. + -U NAME Undefine a substitution. + + -P, --dumpdef Dump defines, macros, etc. Repeat for more output. + -A, --dumparch ARCH Load architecture ARCH and dump description. +]] + exit(0) +end + +-- Print version information. +function opt_map.version() + stdout:write(format("%s version %s, released %s\n%s\n\n%s", + _info.name, _info.version, _info.release, _info.url, _info.copyright)) + exit(0) +end + +-- Misc. options. +function opt_map.outfile(args) g_opt.outfile = optparam(args) end +function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end +function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end +function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end +function opt_map.nocomment() g_opt.comment = false end +function opt_map.maccomment() g_opt.maccomment = true end +function opt_map.nolineno() g_opt.cpp = false end +function opt_map.flushline() g_opt.flushline = true end +function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end + +------------------------------------------------------------------------------ + +-- Short aliases for long options. +local opt_alias = { + h = "help", ["?"] = "help", V = "version", + o = "outfile", I = "include", + c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment", + L = "nolineno", F = "flushline", + P = "dumpdef", A = "dumparch", +} + +-- Parse single option. +local function parseopt(opt, args) + opt_current = #opt == 1 and "-"..opt or "--"..opt + local f = opt_map[opt] or opt_map[opt_alias[opt]] + if not f then + opterror("unrecognized option `", opt_current, "'. Try `--help'.\n") + end + f(args) +end + +-- Parse arguments. +local function parseargs(args) + -- Default options. + g_opt.comment = "//|" + g_opt.endcomment = "" + g_opt.cpp = true + g_opt.dumpdef = 0 + g_opt.include = { "" } + + -- Process all option arguments. + args.argn = 1 + repeat + local a = args[args.argn] + if not a then break end + local lopt, opt = match(a, "^%-(%-?)(.+)") + if not opt then break end + args.argn = args.argn + 1 + if lopt == "" then + -- Loop through short options. + for o in gmatch(opt, ".") do parseopt(o, args) end + else + -- Long option. + parseopt(opt, args) + end + until false + + -- Check for proper number of arguments. + local nargs = #args - args.argn + 1 + if nargs ~= 1 then + if nargs == 0 then + if g_opt.dumpdef > 0 then return dumpdef(stdout) end + end + opt_map.help() + end + + -- Translate a single input file to a single output file + -- TODO: Handle multiple files? + translate(args[args.argn], g_opt.outfile) +end + +------------------------------------------------------------------------------ + +-- Add the directory dynasm.lua resides in to the Lua module search path. +local arg = arg +if arg and arg[0] then + prefix = match(arg[0], "^(.*[/\\])") + if package and prefix then package.path = prefix.."?.lua;"..package.path end +end + +-- Start DynASM. +parseargs{...} + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/minilua.c b/ext/opcache/jit/ir/dynasm/minilua.c new file mode 100644 index 00000000000..a8d7c305e10 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/minilua.c @@ -0,0 +1,7770 @@ +/* This is a heavily customized and minimized copy of Lua 5.1.5. */ +/* It's only used to build LuaJIT. It does NOT have all standard functions! */ +/****************************************************************************** +* Copyright (C) 1994-2012 Lua.org, PUC-Rio. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ +#ifdef _MSC_VER +typedef unsigned __int64 U64; +#else +typedef unsigned long long U64; +#endif +int _CRT_glob = 0; +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +typedef enum{ +TM_INDEX, +TM_NEWINDEX, +TM_GC, +TM_MODE, +TM_EQ, +TM_ADD, +TM_SUB, +TM_MUL, +TM_DIV, +TM_MOD, +TM_POW, +TM_UNM, +TM_LEN, +TM_LT, +TM_LE, +TM_CONCAT, +TM_CALL, +TM_N +}TMS; +enum OpMode{iABC,iABx,iAsBx}; +typedef enum{ +OP_MOVE, +OP_LOADK, +OP_LOADBOOL, +OP_LOADNIL, +OP_GETUPVAL, +OP_GETGLOBAL, +OP_GETTABLE, +OP_SETGLOBAL, +OP_SETUPVAL, +OP_SETTABLE, +OP_NEWTABLE, +OP_SELF, +OP_ADD, +OP_SUB, +OP_MUL, +OP_DIV, +OP_MOD, +OP_POW, +OP_UNM, +OP_NOT, +OP_LEN, +OP_CONCAT, +OP_JMP, +OP_EQ, +OP_LT, +OP_LE, +OP_TEST, +OP_TESTSET, +OP_CALL, +OP_TAILCALL, +OP_RETURN, +OP_FORLOOP, +OP_FORPREP, +OP_TFORLOOP, +OP_SETLIST, +OP_CLOSE, +OP_CLOSURE, +OP_VARARG +}OpCode; +enum OpArgMask{ +OpArgN, +OpArgU, +OpArgR, +OpArgK +}; +typedef enum{ +VVOID, +VNIL, +VTRUE, +VFALSE, +VK, +VKNUM, +VLOCAL, +VUPVAL, +VGLOBAL, +VINDEXED, +VJMP, +VRELOCABLE, +VNONRELOC, +VCALL, +VVARARG +}expkind; +enum RESERVED{ +TK_AND=257,TK_BREAK, +TK_DO,TK_ELSE,TK_ELSEIF,TK_END,TK_FALSE,TK_FOR,TK_FUNCTION, +TK_IF,TK_IN,TK_LOCAL,TK_NIL,TK_NOT,TK_OR,TK_REPEAT, +TK_RETURN,TK_THEN,TK_TRUE,TK_UNTIL,TK_WHILE, +TK_CONCAT,TK_DOTS,TK_EQ,TK_GE,TK_LE,TK_NE,TK_NUMBER, +TK_NAME,TK_STRING,TK_EOS +}; +typedef enum BinOpr{ +OPR_ADD,OPR_SUB,OPR_MUL,OPR_DIV,OPR_MOD,OPR_POW, +OPR_CONCAT, +OPR_NE,OPR_EQ, +OPR_LT,OPR_LE,OPR_GT,OPR_GE, +OPR_AND,OPR_OR, +OPR_NOBINOPR +}BinOpr; +typedef enum UnOpr{OPR_MINUS,OPR_NOT,OPR_LEN,OPR_NOUNOPR}UnOpr; +#define LUA_QL(x)"'"x"'" +#define luai_apicheck(L,o){(void)L;} +#define lua_number2str(s,n)sprintf((s),"%.14g",(n)) +#define lua_str2number(s,p)strtod((s),(p)) +#define luai_numadd(a,b)((a)+(b)) +#define luai_numsub(a,b)((a)-(b)) +#define luai_nummul(a,b)((a)*(b)) +#define luai_numdiv(a,b)((a)/(b)) +#define luai_nummod(a,b)((a)-floor((a)/(b))*(b)) +#define luai_numpow(a,b)(pow(a,b)) +#define luai_numunm(a)(-(a)) +#define luai_numeq(a,b)((a)==(b)) +#define luai_numlt(a,b)((a)<(b)) +#define luai_numle(a,b)((a)<=(b)) +#define luai_numisnan(a)(!luai_numeq((a),(a))) +#define lua_number2int(i,d)((i)=(int)(d)) +#define lua_number2integer(i,d)((i)=(lua_Integer)(d)) +#define LUAI_THROW(L,c)longjmp((c)->b,1) +#define LUAI_TRY(L,c,a)if(setjmp((c)->b)==0){a} +#define lua_pclose(L,file)((void)((void)L,file),0) +#define lua_upvalueindex(i)((-10002)-(i)) +typedef struct lua_State lua_State; +typedef int(*lua_CFunction)(lua_State*L); +typedef const char*(*lua_Reader)(lua_State*L,void*ud,size_t*sz); +typedef void*(*lua_Alloc)(void*ud,void*ptr,size_t osize,size_t nsize); +typedef double lua_Number; +typedef ptrdiff_t lua_Integer; +static void lua_settop(lua_State*L,int idx); +static int lua_type(lua_State*L,int idx); +static const char* lua_tolstring(lua_State*L,int idx,size_t*len); +static size_t lua_objlen(lua_State*L,int idx); +static void lua_pushlstring(lua_State*L,const char*s,size_t l); +static void lua_pushcclosure(lua_State*L,lua_CFunction fn,int n); +static void lua_createtable(lua_State*L,int narr,int nrec); +static void lua_setfield(lua_State*L,int idx,const char*k); +#define lua_pop(L,n)lua_settop(L,-(n)-1) +#define lua_newtable(L)lua_createtable(L,0,0) +#define lua_pushcfunction(L,f)lua_pushcclosure(L,(f),0) +#define lua_strlen(L,i)lua_objlen(L,(i)) +#define lua_isfunction(L,n)(lua_type(L,(n))==6) +#define lua_istable(L,n)(lua_type(L,(n))==5) +#define lua_isnil(L,n)(lua_type(L,(n))==0) +#define lua_isboolean(L,n)(lua_type(L,(n))==1) +#define lua_isnone(L,n)(lua_type(L,(n))==(-1)) +#define lua_isnoneornil(L,n)(lua_type(L,(n))<=0) +#define lua_pushliteral(L,s)lua_pushlstring(L,""s,(sizeof(s)/sizeof(char))-1) +#define lua_setglobal(L,s)lua_setfield(L,(-10002),(s)) +#define lua_tostring(L,i)lua_tolstring(L,(i),NULL) +typedef struct lua_Debug lua_Debug; +typedef void(*lua_Hook)(lua_State*L,lua_Debug*ar); +struct lua_Debug{ +int event; +const char*name; +const char*namewhat; +const char*what; +const char*source; +int currentline; +int nups; +int linedefined; +int lastlinedefined; +char short_src[60]; +int i_ci; +}; +typedef unsigned int lu_int32; +typedef size_t lu_mem; +typedef ptrdiff_t l_mem; +typedef unsigned char lu_byte; +#define IntPoint(p)((unsigned int)(lu_mem)(p)) +typedef union{double u;void*s;long l;}L_Umaxalign; +typedef double l_uacNumber; +#define check_exp(c,e)(e) +#define UNUSED(x)((void)(x)) +#define cast(t,exp)((t)(exp)) +#define cast_byte(i)cast(lu_byte,(i)) +#define cast_num(i)cast(lua_Number,(i)) +#define cast_int(i)cast(int,(i)) +typedef lu_int32 Instruction; +#define condhardstacktests(x)((void)0) +typedef union GCObject GCObject; +typedef struct GCheader{ +GCObject*next;lu_byte tt;lu_byte marked; +}GCheader; +typedef union{ +GCObject*gc; +void*p; +lua_Number n; +int b; +}Value; +typedef struct lua_TValue{ +Value value;int tt; +}TValue; +#define ttisnil(o)(ttype(o)==0) +#define ttisnumber(o)(ttype(o)==3) +#define ttisstring(o)(ttype(o)==4) +#define ttistable(o)(ttype(o)==5) +#define ttisfunction(o)(ttype(o)==6) +#define ttisboolean(o)(ttype(o)==1) +#define ttisuserdata(o)(ttype(o)==7) +#define ttisthread(o)(ttype(o)==8) +#define ttislightuserdata(o)(ttype(o)==2) +#define ttype(o)((o)->tt) +#define gcvalue(o)check_exp(iscollectable(o),(o)->value.gc) +#define pvalue(o)check_exp(ttislightuserdata(o),(o)->value.p) +#define nvalue(o)check_exp(ttisnumber(o),(o)->value.n) +#define rawtsvalue(o)check_exp(ttisstring(o),&(o)->value.gc->ts) +#define tsvalue(o)(&rawtsvalue(o)->tsv) +#define rawuvalue(o)check_exp(ttisuserdata(o),&(o)->value.gc->u) +#define uvalue(o)(&rawuvalue(o)->uv) +#define clvalue(o)check_exp(ttisfunction(o),&(o)->value.gc->cl) +#define hvalue(o)check_exp(ttistable(o),&(o)->value.gc->h) +#define bvalue(o)check_exp(ttisboolean(o),(o)->value.b) +#define thvalue(o)check_exp(ttisthread(o),&(o)->value.gc->th) +#define l_isfalse(o)(ttisnil(o)||(ttisboolean(o)&&bvalue(o)==0)) +#define checkconsistency(obj) +#define checkliveness(g,obj) +#define setnilvalue(obj)((obj)->tt=0) +#define setnvalue(obj,x){TValue*i_o=(obj);i_o->value.n=(x);i_o->tt=3;} +#define setbvalue(obj,x){TValue*i_o=(obj);i_o->value.b=(x);i_o->tt=1;} +#define setsvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=4;checkliveness(G(L),i_o);} +#define setuvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=7;checkliveness(G(L),i_o);} +#define setthvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=8;checkliveness(G(L),i_o);} +#define setclvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=6;checkliveness(G(L),i_o);} +#define sethvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=5;checkliveness(G(L),i_o);} +#define setptvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=(8+1);checkliveness(G(L),i_o);} +#define setobj(L,obj1,obj2){const TValue*o2=(obj2);TValue*o1=(obj1);o1->value=o2->value;o1->tt=o2->tt;checkliveness(G(L),o1);} +#define setttype(obj,tt)(ttype(obj)=(tt)) +#define iscollectable(o)(ttype(o)>=4) +typedef TValue*StkId; +typedef union TString{ +L_Umaxalign dummy; +struct{ +GCObject*next;lu_byte tt;lu_byte marked; +lu_byte reserved; +unsigned int hash; +size_t len; +}tsv; +}TString; +#define getstr(ts)cast(const char*,(ts)+1) +#define svalue(o)getstr(rawtsvalue(o)) +typedef union Udata{ +L_Umaxalign dummy; +struct{ +GCObject*next;lu_byte tt;lu_byte marked; +struct Table*metatable; +struct Table*env; +size_t len; +}uv; +}Udata; +typedef struct Proto{ +GCObject*next;lu_byte tt;lu_byte marked; +TValue*k; +Instruction*code; +struct Proto**p; +int*lineinfo; +struct LocVar*locvars; +TString**upvalues; +TString*source; +int sizeupvalues; +int sizek; +int sizecode; +int sizelineinfo; +int sizep; +int sizelocvars; +int linedefined; +int lastlinedefined; +GCObject*gclist; +lu_byte nups; +lu_byte numparams; +lu_byte is_vararg; +lu_byte maxstacksize; +}Proto; +typedef struct LocVar{ +TString*varname; +int startpc; +int endpc; +}LocVar; +typedef struct UpVal{ +GCObject*next;lu_byte tt;lu_byte marked; +TValue*v; +union{ +TValue value; +struct{ +struct UpVal*prev; +struct UpVal*next; +}l; +}u; +}UpVal; +typedef struct CClosure{ +GCObject*next;lu_byte tt;lu_byte marked;lu_byte isC;lu_byte nupvalues;GCObject*gclist;struct Table*env; +lua_CFunction f; +TValue upvalue[1]; +}CClosure; +typedef struct LClosure{ +GCObject*next;lu_byte tt;lu_byte marked;lu_byte isC;lu_byte nupvalues;GCObject*gclist;struct Table*env; +struct Proto*p; +UpVal*upvals[1]; +}LClosure; +typedef union Closure{ +CClosure c; +LClosure l; +}Closure; +#define iscfunction(o)(ttype(o)==6&&clvalue(o)->c.isC) +typedef union TKey{ +struct{ +Value value;int tt; +struct Node*next; +}nk; +TValue tvk; +}TKey; +typedef struct Node{ +TValue i_val; +TKey i_key; +}Node; +typedef struct Table{ +GCObject*next;lu_byte tt;lu_byte marked; +lu_byte flags; +lu_byte lsizenode; +struct Table*metatable; +TValue*array; +Node*node; +Node*lastfree; +GCObject*gclist; +int sizearray; +}Table; +#define lmod(s,size)(check_exp((size&(size-1))==0,(cast(int,(s)&((size)-1))))) +#define twoto(x)((size_t)1<<(x)) +#define sizenode(t)(twoto((t)->lsizenode)) +static const TValue luaO_nilobject_; +#define ceillog2(x)(luaO_log2((x)-1)+1) +static int luaO_log2(unsigned int x); +#define gfasttm(g,et,e)((et)==NULL?NULL:((et)->flags&(1u<<(e)))?NULL:luaT_gettm(et,e,(g)->tmname[e])) +#define fasttm(l,et,e)gfasttm(G(l),et,e) +static const TValue*luaT_gettm(Table*events,TMS event,TString*ename); +#define luaM_reallocv(L,b,on,n,e)((cast(size_t,(n)+1)<=((size_t)(~(size_t)0)-2)/(e))?luaM_realloc_(L,(b),(on)*(e),(n)*(e)):luaM_toobig(L)) +#define luaM_freemem(L,b,s)luaM_realloc_(L,(b),(s),0) +#define luaM_free(L,b)luaM_realloc_(L,(b),sizeof(*(b)),0) +#define luaM_freearray(L,b,n,t)luaM_reallocv(L,(b),n,0,sizeof(t)) +#define luaM_malloc(L,t)luaM_realloc_(L,NULL,0,(t)) +#define luaM_new(L,t)cast(t*,luaM_malloc(L,sizeof(t))) +#define luaM_newvector(L,n,t)cast(t*,luaM_reallocv(L,NULL,0,n,sizeof(t))) +#define luaM_growvector(L,v,nelems,size,t,limit,e)if((nelems)+1>(size))((v)=cast(t*,luaM_growaux_(L,v,&(size),sizeof(t),limit,e))) +#define luaM_reallocvector(L,v,oldn,n,t)((v)=cast(t*,luaM_reallocv(L,v,oldn,n,sizeof(t)))) +static void*luaM_realloc_(lua_State*L,void*block,size_t oldsize, +size_t size); +static void*luaM_toobig(lua_State*L); +static void*luaM_growaux_(lua_State*L,void*block,int*size, +size_t size_elem,int limit, +const char*errormsg); +typedef struct Zio ZIO; +#define char2int(c)cast(int,cast(unsigned char,(c))) +#define zgetc(z)(((z)->n--)>0?char2int(*(z)->p++):luaZ_fill(z)) +typedef struct Mbuffer{ +char*buffer; +size_t n; +size_t buffsize; +}Mbuffer; +#define luaZ_initbuffer(L,buff)((buff)->buffer=NULL,(buff)->buffsize=0) +#define luaZ_buffer(buff)((buff)->buffer) +#define luaZ_sizebuffer(buff)((buff)->buffsize) +#define luaZ_bufflen(buff)((buff)->n) +#define luaZ_resetbuffer(buff)((buff)->n=0) +#define luaZ_resizebuffer(L,buff,size)(luaM_reallocvector(L,(buff)->buffer,(buff)->buffsize,size,char),(buff)->buffsize=size) +#define luaZ_freebuffer(L,buff)luaZ_resizebuffer(L,buff,0) +struct Zio{ +size_t n; +const char*p; +lua_Reader reader; +void*data; +lua_State*L; +}; +static int luaZ_fill(ZIO*z); +struct lua_longjmp; +#define gt(L)(&L->l_gt) +#define registry(L)(&G(L)->l_registry) +typedef struct stringtable{ +GCObject**hash; +lu_int32 nuse; +int size; +}stringtable; +typedef struct CallInfo{ +StkId base; +StkId func; +StkId top; +const Instruction*savedpc; +int nresults; +int tailcalls; +}CallInfo; +#define curr_func(L)(clvalue(L->ci->func)) +#define ci_func(ci)(clvalue((ci)->func)) +#define f_isLua(ci)(!ci_func(ci)->c.isC) +#define isLua(ci)(ttisfunction((ci)->func)&&f_isLua(ci)) +typedef struct global_State{ +stringtable strt; +lua_Alloc frealloc; +void*ud; +lu_byte currentwhite; +lu_byte gcstate; +int sweepstrgc; +GCObject*rootgc; +GCObject**sweepgc; +GCObject*gray; +GCObject*grayagain; +GCObject*weak; +GCObject*tmudata; +Mbuffer buff; +lu_mem GCthreshold; +lu_mem totalbytes; +lu_mem estimate; +lu_mem gcdept; +int gcpause; +int gcstepmul; +lua_CFunction panic; +TValue l_registry; +struct lua_State*mainthread; +UpVal uvhead; +struct Table*mt[(8+1)]; +TString*tmname[TM_N]; +}global_State; +struct lua_State{ +GCObject*next;lu_byte tt;lu_byte marked; +lu_byte status; +StkId top; +StkId base; +global_State*l_G; +CallInfo*ci; +const Instruction*savedpc; +StkId stack_last; +StkId stack; +CallInfo*end_ci; +CallInfo*base_ci; +int stacksize; +int size_ci; +unsigned short nCcalls; +unsigned short baseCcalls; +lu_byte hookmask; +lu_byte allowhook; +int basehookcount; +int hookcount; +lua_Hook hook; +TValue l_gt; +TValue env; +GCObject*openupval; +GCObject*gclist; +struct lua_longjmp*errorJmp; +ptrdiff_t errfunc; +}; +#define G(L)(L->l_G) +union GCObject{ +GCheader gch; +union TString ts; +union Udata u; +union Closure cl; +struct Table h; +struct Proto p; +struct UpVal uv; +struct lua_State th; +}; +#define rawgco2ts(o)check_exp((o)->gch.tt==4,&((o)->ts)) +#define gco2ts(o)(&rawgco2ts(o)->tsv) +#define rawgco2u(o)check_exp((o)->gch.tt==7,&((o)->u)) +#define gco2u(o)(&rawgco2u(o)->uv) +#define gco2cl(o)check_exp((o)->gch.tt==6,&((o)->cl)) +#define gco2h(o)check_exp((o)->gch.tt==5,&((o)->h)) +#define gco2p(o)check_exp((o)->gch.tt==(8+1),&((o)->p)) +#define gco2uv(o)check_exp((o)->gch.tt==(8+2),&((o)->uv)) +#define ngcotouv(o)check_exp((o)==NULL||(o)->gch.tt==(8+2),&((o)->uv)) +#define gco2th(o)check_exp((o)->gch.tt==8,&((o)->th)) +#define obj2gco(v)(cast(GCObject*,(v))) +static void luaE_freethread(lua_State*L,lua_State*L1); +#define pcRel(pc,p)(cast(int,(pc)-(p)->code)-1) +#define getline_(f,pc)(((f)->lineinfo)?(f)->lineinfo[pc]:0) +#define resethookcount(L)(L->hookcount=L->basehookcount) +static void luaG_typeerror(lua_State*L,const TValue*o, +const char*opname); +static void luaG_runerror(lua_State*L,const char*fmt,...); +#define luaD_checkstack(L,n)if((char*)L->stack_last-(char*)L->top<=(n)*(int)sizeof(TValue))luaD_growstack(L,n);else condhardstacktests(luaD_reallocstack(L,L->stacksize-5-1)); +#define incr_top(L){luaD_checkstack(L,1);L->top++;} +#define savestack(L,p)((char*)(p)-(char*)L->stack) +#define restorestack(L,n)((TValue*)((char*)L->stack+(n))) +#define saveci(L,p)((char*)(p)-(char*)L->base_ci) +#define restoreci(L,n)((CallInfo*)((char*)L->base_ci+(n))) +typedef void(*Pfunc)(lua_State*L,void*ud); +static int luaD_poscall(lua_State*L,StkId firstResult); +static void luaD_reallocCI(lua_State*L,int newsize); +static void luaD_reallocstack(lua_State*L,int newsize); +static void luaD_growstack(lua_State*L,int n); +static void luaD_throw(lua_State*L,int errcode); +static void*luaM_growaux_(lua_State*L,void*block,int*size,size_t size_elems, +int limit,const char*errormsg){ +void*newblock; +int newsize; +if(*size>=limit/2){ +if(*size>=limit) +luaG_runerror(L,errormsg); +newsize=limit; +} +else{ +newsize=(*size)*2; +if(newsize<4) +newsize=4; +} +newblock=luaM_reallocv(L,block,*size,newsize,size_elems); +*size=newsize; +return newblock; +} +static void*luaM_toobig(lua_State*L){ +luaG_runerror(L,"memory allocation error: block too big"); +return NULL; +} +static void*luaM_realloc_(lua_State*L,void*block,size_t osize,size_t nsize){ +global_State*g=G(L); +block=(*g->frealloc)(g->ud,block,osize,nsize); +if(block==NULL&&nsize>0) +luaD_throw(L,4); +g->totalbytes=(g->totalbytes-osize)+nsize; +return block; +} +#define resetbits(x,m)((x)&=cast(lu_byte,~(m))) +#define setbits(x,m)((x)|=(m)) +#define testbits(x,m)((x)&(m)) +#define bitmask(b)(1<<(b)) +#define bit2mask(b1,b2)(bitmask(b1)|bitmask(b2)) +#define l_setbit(x,b)setbits(x,bitmask(b)) +#define resetbit(x,b)resetbits(x,bitmask(b)) +#define testbit(x,b)testbits(x,bitmask(b)) +#define set2bits(x,b1,b2)setbits(x,(bit2mask(b1,b2))) +#define reset2bits(x,b1,b2)resetbits(x,(bit2mask(b1,b2))) +#define test2bits(x,b1,b2)testbits(x,(bit2mask(b1,b2))) +#define iswhite(x)test2bits((x)->gch.marked,0,1) +#define isblack(x)testbit((x)->gch.marked,2) +#define isgray(x)(!isblack(x)&&!iswhite(x)) +#define otherwhite(g)(g->currentwhite^bit2mask(0,1)) +#define isdead(g,v)((v)->gch.marked&otherwhite(g)&bit2mask(0,1)) +#define changewhite(x)((x)->gch.marked^=bit2mask(0,1)) +#define gray2black(x)l_setbit((x)->gch.marked,2) +#define valiswhite(x)(iscollectable(x)&&iswhite(gcvalue(x))) +#define luaC_white(g)cast(lu_byte,(g)->currentwhite&bit2mask(0,1)) +#define luaC_checkGC(L){condhardstacktests(luaD_reallocstack(L,L->stacksize-5-1));if(G(L)->totalbytes>=G(L)->GCthreshold)luaC_step(L);} +#define luaC_barrier(L,p,v){if(valiswhite(v)&&isblack(obj2gco(p)))luaC_barrierf(L,obj2gco(p),gcvalue(v));} +#define luaC_barriert(L,t,v){if(valiswhite(v)&&isblack(obj2gco(t)))luaC_barrierback(L,t);} +#define luaC_objbarrier(L,p,o){if(iswhite(obj2gco(o))&&isblack(obj2gco(p)))luaC_barrierf(L,obj2gco(p),obj2gco(o));} +#define luaC_objbarriert(L,t,o){if(iswhite(obj2gco(o))&&isblack(obj2gco(t)))luaC_barrierback(L,t);} +static void luaC_step(lua_State*L); +static void luaC_link(lua_State*L,GCObject*o,lu_byte tt); +static void luaC_linkupval(lua_State*L,UpVal*uv); +static void luaC_barrierf(lua_State*L,GCObject*o,GCObject*v); +static void luaC_barrierback(lua_State*L,Table*t); +#define sizestring(s)(sizeof(union TString)+((s)->len+1)*sizeof(char)) +#define sizeudata(u)(sizeof(union Udata)+(u)->len) +#define luaS_new(L,s)(luaS_newlstr(L,s,strlen(s))) +#define luaS_newliteral(L,s)(luaS_newlstr(L,""s,(sizeof(s)/sizeof(char))-1)) +#define luaS_fix(s)l_setbit((s)->tsv.marked,5) +static TString*luaS_newlstr(lua_State*L,const char*str,size_t l); +#define tostring(L,o)((ttype(o)==4)||(luaV_tostring(L,o))) +#define tonumber(o,n)(ttype(o)==3||(((o)=luaV_tonumber(o,n))!=NULL)) +#define equalobj(L,o1,o2)(ttype(o1)==ttype(o2)&&luaV_equalval(L,o1,o2)) +static int luaV_equalval(lua_State*L,const TValue*t1,const TValue*t2); +static const TValue*luaV_tonumber(const TValue*obj,TValue*n); +static int luaV_tostring(lua_State*L,StkId obj); +static void luaV_execute(lua_State*L,int nexeccalls); +static void luaV_concat(lua_State*L,int total,int last); +static const TValue luaO_nilobject_={{NULL},0}; +static int luaO_int2fb(unsigned int x){ +int e=0; +while(x>=16){ +x=(x+1)>>1; +e++; +} +if(x<8)return x; +else return((e+1)<<3)|(cast_int(x)-8); +} +static int luaO_fb2int(int x){ +int e=(x>>3)&31; +if(e==0)return x; +else return((x&7)+8)<<(e-1); +} +static int luaO_log2(unsigned int x){ +static const lu_byte log_2[256]={ +0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 +}; +int l=-1; +while(x>=256){l+=8;x>>=8;} +return l+log_2[x]; +} +static int luaO_rawequalObj(const TValue*t1,const TValue*t2){ +if(ttype(t1)!=ttype(t2))return 0; +else switch(ttype(t1)){ +case 0: +return 1; +case 3: +return luai_numeq(nvalue(t1),nvalue(t2)); +case 1: +return bvalue(t1)==bvalue(t2); +case 2: +return pvalue(t1)==pvalue(t2); +default: +return gcvalue(t1)==gcvalue(t2); +} +} +static int luaO_str2d(const char*s,lua_Number*result){ +char*endptr; +*result=lua_str2number(s,&endptr); +if(endptr==s)return 0; +if(*endptr=='x'||*endptr=='X') +*result=cast_num(strtoul(s,&endptr,16)); +if(*endptr=='\0')return 1; +while(isspace(cast(unsigned char,*endptr)))endptr++; +if(*endptr!='\0')return 0; +return 1; +} +static void pushstr(lua_State*L,const char*str){ +setsvalue(L,L->top,luaS_new(L,str)); +incr_top(L); +} +static const char*luaO_pushvfstring(lua_State*L,const char*fmt,va_list argp){ +int n=1; +pushstr(L,""); +for(;;){ +const char*e=strchr(fmt,'%'); +if(e==NULL)break; +setsvalue(L,L->top,luaS_newlstr(L,fmt,e-fmt)); +incr_top(L); +switch(*(e+1)){ +case's':{ +const char*s=va_arg(argp,char*); +if(s==NULL)s="(null)"; +pushstr(L,s); +break; +} +case'c':{ +char buff[2]; +buff[0]=cast(char,va_arg(argp,int)); +buff[1]='\0'; +pushstr(L,buff); +break; +} +case'd':{ +setnvalue(L->top,cast_num(va_arg(argp,int))); +incr_top(L); +break; +} +case'f':{ +setnvalue(L->top,cast_num(va_arg(argp,l_uacNumber))); +incr_top(L); +break; +} +case'p':{ +char buff[4*sizeof(void*)+8]; +sprintf(buff,"%p",va_arg(argp,void*)); +pushstr(L,buff); +break; +} +case'%':{ +pushstr(L,"%"); +break; +} +default:{ +char buff[3]; +buff[0]='%'; +buff[1]=*(e+1); +buff[2]='\0'; +pushstr(L,buff); +break; +} +} +n+=2; +fmt=e+2; +} +pushstr(L,fmt); +luaV_concat(L,n+1,cast_int(L->top-L->base)-1); +L->top-=n; +return svalue(L->top-1); +} +static const char*luaO_pushfstring(lua_State*L,const char*fmt,...){ +const char*msg; +va_list argp; +va_start(argp,fmt); +msg=luaO_pushvfstring(L,fmt,argp); +va_end(argp); +return msg; +} +static void luaO_chunkid(char*out,const char*source,size_t bufflen){ +if(*source=='='){ +strncpy(out,source+1,bufflen); +out[bufflen-1]='\0'; +} +else{ +if(*source=='@'){ +size_t l; +source++; +bufflen-=sizeof(" '...' "); +l=strlen(source); +strcpy(out,""); +if(l>bufflen){ +source+=(l-bufflen); +strcat(out,"..."); +} +strcat(out,source); +} +else{ +size_t len=strcspn(source,"\n\r"); +bufflen-=sizeof(" [string \"...\"] "); +if(len>bufflen)len=bufflen; +strcpy(out,"[string \""); +if(source[len]!='\0'){ +strncat(out,source,len); +strcat(out,"..."); +} +else +strcat(out,source); +strcat(out,"\"]"); +} +} +} +#define gnode(t,i)(&(t)->node[i]) +#define gkey(n)(&(n)->i_key.nk) +#define gval(n)(&(n)->i_val) +#define gnext(n)((n)->i_key.nk.next) +#define key2tval(n)(&(n)->i_key.tvk) +static TValue*luaH_setnum(lua_State*L,Table*t,int key); +static const TValue*luaH_getstr(Table*t,TString*key); +static TValue*luaH_set(lua_State*L,Table*t,const TValue*key); +static const char*const luaT_typenames[]={ +"nil","boolean","userdata","number", +"string","table","function","userdata","thread", +"proto","upval" +}; +static void luaT_init(lua_State*L){ +static const char*const luaT_eventname[]={ +"__index","__newindex", +"__gc","__mode","__eq", +"__add","__sub","__mul","__div","__mod", +"__pow","__unm","__len","__lt","__le", +"__concat","__call" +}; +int i; +for(i=0;itmname[i]=luaS_new(L,luaT_eventname[i]); +luaS_fix(G(L)->tmname[i]); +} +} +static const TValue*luaT_gettm(Table*events,TMS event,TString*ename){ +const TValue*tm=luaH_getstr(events,ename); +if(ttisnil(tm)){ +events->flags|=cast_byte(1u<metatable; +break; +case 7: +mt=uvalue(o)->metatable; +break; +default: +mt=G(L)->mt[ttype(o)]; +} +return(mt?luaH_getstr(mt,G(L)->tmname[event]):(&luaO_nilobject_)); +} +#define sizeCclosure(n)(cast(int,sizeof(CClosure))+cast(int,sizeof(TValue)*((n)-1))) +#define sizeLclosure(n)(cast(int,sizeof(LClosure))+cast(int,sizeof(TValue*)*((n)-1))) +static Closure*luaF_newCclosure(lua_State*L,int nelems,Table*e){ +Closure*c=cast(Closure*,luaM_malloc(L,sizeCclosure(nelems))); +luaC_link(L,obj2gco(c),6); +c->c.isC=1; +c->c.env=e; +c->c.nupvalues=cast_byte(nelems); +return c; +} +static Closure*luaF_newLclosure(lua_State*L,int nelems,Table*e){ +Closure*c=cast(Closure*,luaM_malloc(L,sizeLclosure(nelems))); +luaC_link(L,obj2gco(c),6); +c->l.isC=0; +c->l.env=e; +c->l.nupvalues=cast_byte(nelems); +while(nelems--)c->l.upvals[nelems]=NULL; +return c; +} +static UpVal*luaF_newupval(lua_State*L){ +UpVal*uv=luaM_new(L,UpVal); +luaC_link(L,obj2gco(uv),(8+2)); +uv->v=&uv->u.value; +setnilvalue(uv->v); +return uv; +} +static UpVal*luaF_findupval(lua_State*L,StkId level){ +global_State*g=G(L); +GCObject**pp=&L->openupval; +UpVal*p; +UpVal*uv; +while(*pp!=NULL&&(p=ngcotouv(*pp))->v>=level){ +if(p->v==level){ +if(isdead(g,obj2gco(p))) +changewhite(obj2gco(p)); +return p; +} +pp=&p->next; +} +uv=luaM_new(L,UpVal); +uv->tt=(8+2); +uv->marked=luaC_white(g); +uv->v=level; +uv->next=*pp; +*pp=obj2gco(uv); +uv->u.l.prev=&g->uvhead; +uv->u.l.next=g->uvhead.u.l.next; +uv->u.l.next->u.l.prev=uv; +g->uvhead.u.l.next=uv; +return uv; +} +static void unlinkupval(UpVal*uv){ +uv->u.l.next->u.l.prev=uv->u.l.prev; +uv->u.l.prev->u.l.next=uv->u.l.next; +} +static void luaF_freeupval(lua_State*L,UpVal*uv){ +if(uv->v!=&uv->u.value) +unlinkupval(uv); +luaM_free(L,uv); +} +static void luaF_close(lua_State*L,StkId level){ +UpVal*uv; +global_State*g=G(L); +while(L->openupval!=NULL&&(uv=ngcotouv(L->openupval))->v>=level){ +GCObject*o=obj2gco(uv); +L->openupval=uv->next; +if(isdead(g,o)) +luaF_freeupval(L,uv); +else{ +unlinkupval(uv); +setobj(L,&uv->u.value,uv->v); +uv->v=&uv->u.value; +luaC_linkupval(L,uv); +} +} +} +static Proto*luaF_newproto(lua_State*L){ +Proto*f=luaM_new(L,Proto); +luaC_link(L,obj2gco(f),(8+1)); +f->k=NULL; +f->sizek=0; +f->p=NULL; +f->sizep=0; +f->code=NULL; +f->sizecode=0; +f->sizelineinfo=0; +f->sizeupvalues=0; +f->nups=0; +f->upvalues=NULL; +f->numparams=0; +f->is_vararg=0; +f->maxstacksize=0; +f->lineinfo=NULL; +f->sizelocvars=0; +f->locvars=NULL; +f->linedefined=0; +f->lastlinedefined=0; +f->source=NULL; +return f; +} +static void luaF_freeproto(lua_State*L,Proto*f){ +luaM_freearray(L,f->code,f->sizecode,Instruction); +luaM_freearray(L,f->p,f->sizep,Proto*); +luaM_freearray(L,f->k,f->sizek,TValue); +luaM_freearray(L,f->lineinfo,f->sizelineinfo,int); +luaM_freearray(L,f->locvars,f->sizelocvars,struct LocVar); +luaM_freearray(L,f->upvalues,f->sizeupvalues,TString*); +luaM_free(L,f); +} +static void luaF_freeclosure(lua_State*L,Closure*c){ +int size=(c->c.isC)?sizeCclosure(c->c.nupvalues): +sizeLclosure(c->l.nupvalues); +luaM_freemem(L,c,size); +} +#define MASK1(n,p)((~((~(Instruction)0)<>0)&MASK1(6,0))) +#define SET_OPCODE(i,o)((i)=(((i)&MASK0(6,0))|((cast(Instruction,o)<<0)&MASK1(6,0)))) +#define GETARG_A(i)(cast(int,((i)>>(0+6))&MASK1(8,0))) +#define SETARG_A(i,u)((i)=(((i)&MASK0(8,(0+6)))|((cast(Instruction,u)<<(0+6))&MASK1(8,(0+6))))) +#define GETARG_B(i)(cast(int,((i)>>(((0+6)+8)+9))&MASK1(9,0))) +#define SETARG_B(i,b)((i)=(((i)&MASK0(9,(((0+6)+8)+9)))|((cast(Instruction,b)<<(((0+6)+8)+9))&MASK1(9,(((0+6)+8)+9))))) +#define GETARG_C(i)(cast(int,((i)>>((0+6)+8))&MASK1(9,0))) +#define SETARG_C(i,b)((i)=(((i)&MASK0(9,((0+6)+8)))|((cast(Instruction,b)<<((0+6)+8))&MASK1(9,((0+6)+8))))) +#define GETARG_Bx(i)(cast(int,((i)>>((0+6)+8))&MASK1((9+9),0))) +#define SETARG_Bx(i,b)((i)=(((i)&MASK0((9+9),((0+6)+8)))|((cast(Instruction,b)<<((0+6)+8))&MASK1((9+9),((0+6)+8))))) +#define GETARG_sBx(i)(GETARG_Bx(i)-(((1<<(9+9))-1)>>1)) +#define SETARG_sBx(i,b)SETARG_Bx((i),cast(unsigned int,(b)+(((1<<(9+9))-1)>>1))) +#define CREATE_ABC(o,a,b,c)((cast(Instruction,o)<<0)|(cast(Instruction,a)<<(0+6))|(cast(Instruction,b)<<(((0+6)+8)+9))|(cast(Instruction,c)<<((0+6)+8))) +#define CREATE_ABx(o,a,bc)((cast(Instruction,o)<<0)|(cast(Instruction,a)<<(0+6))|(cast(Instruction,bc)<<((0+6)+8))) +#define ISK(x)((x)&(1<<(9-1))) +#define INDEXK(r)((int)(r)&~(1<<(9-1))) +#define RKASK(x)((x)|(1<<(9-1))) +static const lu_byte luaP_opmodes[(cast(int,OP_VARARG)+1)]; +#define getBMode(m)(cast(enum OpArgMask,(luaP_opmodes[m]>>4)&3)) +#define getCMode(m)(cast(enum OpArgMask,(luaP_opmodes[m]>>2)&3)) +#define testTMode(m)(luaP_opmodes[m]&(1<<7)) +typedef struct expdesc{ +expkind k; +union{ +struct{int info,aux;}s; +lua_Number nval; +}u; +int t; +int f; +}expdesc; +typedef struct upvaldesc{ +lu_byte k; +lu_byte info; +}upvaldesc; +struct BlockCnt; +typedef struct FuncState{ +Proto*f; +Table*h; +struct FuncState*prev; +struct LexState*ls; +struct lua_State*L; +struct BlockCnt*bl; +int pc; +int lasttarget; +int jpc; +int freereg; +int nk; +int np; +short nlocvars; +lu_byte nactvar; +upvaldesc upvalues[60]; +unsigned short actvar[200]; +}FuncState; +static Proto*luaY_parser(lua_State*L,ZIO*z,Mbuffer*buff, +const char*name); +struct lua_longjmp{ +struct lua_longjmp*previous; +jmp_buf b; +volatile int status; +}; +static void luaD_seterrorobj(lua_State*L,int errcode,StkId oldtop){ +switch(errcode){ +case 4:{ +setsvalue(L,oldtop,luaS_newliteral(L,"not enough memory")); +break; +} +case 5:{ +setsvalue(L,oldtop,luaS_newliteral(L,"error in error handling")); +break; +} +case 3: +case 2:{ +setobj(L,oldtop,L->top-1); +break; +} +} +L->top=oldtop+1; +} +static void restore_stack_limit(lua_State*L){ +if(L->size_ci>20000){ +int inuse=cast_int(L->ci-L->base_ci); +if(inuse+1<20000) +luaD_reallocCI(L,20000); +} +} +static void resetstack(lua_State*L,int status){ +L->ci=L->base_ci; +L->base=L->ci->base; +luaF_close(L,L->base); +luaD_seterrorobj(L,status,L->base); +L->nCcalls=L->baseCcalls; +L->allowhook=1; +restore_stack_limit(L); +L->errfunc=0; +L->errorJmp=NULL; +} +static void luaD_throw(lua_State*L,int errcode){ +if(L->errorJmp){ +L->errorJmp->status=errcode; +LUAI_THROW(L,L->errorJmp); +} +else{ +L->status=cast_byte(errcode); +if(G(L)->panic){ +resetstack(L,errcode); +G(L)->panic(L); +} +exit(EXIT_FAILURE); +} +} +static int luaD_rawrunprotected(lua_State*L,Pfunc f,void*ud){ +struct lua_longjmp lj; +lj.status=0; +lj.previous=L->errorJmp; +L->errorJmp=&lj; +LUAI_TRY(L,&lj, +(*f)(L,ud); +); +L->errorJmp=lj.previous; +return lj.status; +} +static void correctstack(lua_State*L,TValue*oldstack){ +CallInfo*ci; +GCObject*up; +L->top=(L->top-oldstack)+L->stack; +for(up=L->openupval;up!=NULL;up=up->gch.next) +gco2uv(up)->v=(gco2uv(up)->v-oldstack)+L->stack; +for(ci=L->base_ci;ci<=L->ci;ci++){ +ci->top=(ci->top-oldstack)+L->stack; +ci->base=(ci->base-oldstack)+L->stack; +ci->func=(ci->func-oldstack)+L->stack; +} +L->base=(L->base-oldstack)+L->stack; +} +static void luaD_reallocstack(lua_State*L,int newsize){ +TValue*oldstack=L->stack; +int realsize=newsize+1+5; +luaM_reallocvector(L,L->stack,L->stacksize,realsize,TValue); +L->stacksize=realsize; +L->stack_last=L->stack+newsize; +correctstack(L,oldstack); +} +static void luaD_reallocCI(lua_State*L,int newsize){ +CallInfo*oldci=L->base_ci; +luaM_reallocvector(L,L->base_ci,L->size_ci,newsize,CallInfo); +L->size_ci=newsize; +L->ci=(L->ci-oldci)+L->base_ci; +L->end_ci=L->base_ci+L->size_ci-1; +} +static void luaD_growstack(lua_State*L,int n){ +if(n<=L->stacksize) +luaD_reallocstack(L,2*L->stacksize); +else +luaD_reallocstack(L,L->stacksize+n); +} +static CallInfo*growCI(lua_State*L){ +if(L->size_ci>20000) +luaD_throw(L,5); +else{ +luaD_reallocCI(L,2*L->size_ci); +if(L->size_ci>20000) +luaG_runerror(L,"stack overflow"); +} +return++L->ci; +} +static StkId adjust_varargs(lua_State*L,Proto*p,int actual){ +int i; +int nfixargs=p->numparams; +Table*htab=NULL; +StkId base,fixed; +for(;actualtop++); +fixed=L->top-actual; +base=L->top; +for(i=0;itop++,fixed+i); +setnilvalue(fixed+i); +} +if(htab){ +sethvalue(L,L->top++,htab); +} +return base; +} +static StkId tryfuncTM(lua_State*L,StkId func){ +const TValue*tm=luaT_gettmbyobj(L,func,TM_CALL); +StkId p; +ptrdiff_t funcr=savestack(L,func); +if(!ttisfunction(tm)) +luaG_typeerror(L,func,"call"); +for(p=L->top;p>func;p--)setobj(L,p,p-1); +incr_top(L); +func=restorestack(L,funcr); +setobj(L,func,tm); +return func; +} +#define inc_ci(L)((L->ci==L->end_ci)?growCI(L):(condhardstacktests(luaD_reallocCI(L,L->size_ci)),++L->ci)) +static int luaD_precall(lua_State*L,StkId func,int nresults){ +LClosure*cl; +ptrdiff_t funcr; +if(!ttisfunction(func)) +func=tryfuncTM(L,func); +funcr=savestack(L,func); +cl=&clvalue(func)->l; +L->ci->savedpc=L->savedpc; +if(!cl->isC){ +CallInfo*ci; +StkId st,base; +Proto*p=cl->p; +luaD_checkstack(L,p->maxstacksize); +func=restorestack(L,funcr); +if(!p->is_vararg){ +base=func+1; +if(L->top>base+p->numparams) +L->top=base+p->numparams; +} +else{ +int nargs=cast_int(L->top-func)-1; +base=adjust_varargs(L,p,nargs); +func=restorestack(L,funcr); +} +ci=inc_ci(L); +ci->func=func; +L->base=ci->base=base; +ci->top=L->base+p->maxstacksize; +L->savedpc=p->code; +ci->tailcalls=0; +ci->nresults=nresults; +for(st=L->top;sttop;st++) +setnilvalue(st); +L->top=ci->top; +return 0; +} +else{ +CallInfo*ci; +int n; +luaD_checkstack(L,20); +ci=inc_ci(L); +ci->func=restorestack(L,funcr); +L->base=ci->base=ci->func+1; +ci->top=L->top+20; +ci->nresults=nresults; +n=(*curr_func(L)->c.f)(L); +if(n<0) +return 2; +else{ +luaD_poscall(L,L->top-n); +return 1; +} +} +} +static int luaD_poscall(lua_State*L,StkId firstResult){ +StkId res; +int wanted,i; +CallInfo*ci; +ci=L->ci--; +res=ci->func; +wanted=ci->nresults; +L->base=(ci-1)->base; +L->savedpc=(ci-1)->savedpc; +for(i=wanted;i!=0&&firstResulttop;i--) +setobj(L,res++,firstResult++); +while(i-->0) +setnilvalue(res++); +L->top=res; +return(wanted-(-1)); +} +static void luaD_call(lua_State*L,StkId func,int nResults){ +if(++L->nCcalls>=200){ +if(L->nCcalls==200) +luaG_runerror(L,"C stack overflow"); +else if(L->nCcalls>=(200+(200>>3))) +luaD_throw(L,5); +} +if(luaD_precall(L,func,nResults)==0) +luaV_execute(L,1); +L->nCcalls--; +luaC_checkGC(L); +} +static int luaD_pcall(lua_State*L,Pfunc func,void*u, +ptrdiff_t old_top,ptrdiff_t ef){ +int status; +unsigned short oldnCcalls=L->nCcalls; +ptrdiff_t old_ci=saveci(L,L->ci); +lu_byte old_allowhooks=L->allowhook; +ptrdiff_t old_errfunc=L->errfunc; +L->errfunc=ef; +status=luaD_rawrunprotected(L,func,u); +if(status!=0){ +StkId oldtop=restorestack(L,old_top); +luaF_close(L,oldtop); +luaD_seterrorobj(L,status,oldtop); +L->nCcalls=oldnCcalls; +L->ci=restoreci(L,old_ci); +L->base=L->ci->base; +L->savedpc=L->ci->savedpc; +L->allowhook=old_allowhooks; +restore_stack_limit(L); +} +L->errfunc=old_errfunc; +return status; +} +struct SParser{ +ZIO*z; +Mbuffer buff; +const char*name; +}; +static void f_parser(lua_State*L,void*ud){ +int i; +Proto*tf; +Closure*cl; +struct SParser*p=cast(struct SParser*,ud); +luaC_checkGC(L); +tf=luaY_parser(L,p->z, +&p->buff,p->name); +cl=luaF_newLclosure(L,tf->nups,hvalue(gt(L))); +cl->l.p=tf; +for(i=0;inups;i++) +cl->l.upvals[i]=luaF_newupval(L); +setclvalue(L,L->top,cl); +incr_top(L); +} +static int luaD_protectedparser(lua_State*L,ZIO*z,const char*name){ +struct SParser p; +int status; +p.z=z;p.name=name; +luaZ_initbuffer(L,&p.buff); +status=luaD_pcall(L,f_parser,&p,savestack(L,L->top),L->errfunc); +luaZ_freebuffer(L,&p.buff); +return status; +} +static void luaS_resize(lua_State*L,int newsize){ +GCObject**newhash; +stringtable*tb; +int i; +if(G(L)->gcstate==2) +return; +newhash=luaM_newvector(L,newsize,GCObject*); +tb=&G(L)->strt; +for(i=0;isize;i++){ +GCObject*p=tb->hash[i]; +while(p){ +GCObject*next=p->gch.next; +unsigned int h=gco2ts(p)->hash; +int h1=lmod(h,newsize); +p->gch.next=newhash[h1]; +newhash[h1]=p; +p=next; +} +} +luaM_freearray(L,tb->hash,tb->size,TString*); +tb->size=newsize; +tb->hash=newhash; +} +static TString*newlstr(lua_State*L,const char*str,size_t l, +unsigned int h){ +TString*ts; +stringtable*tb; +if(l+1>(((size_t)(~(size_t)0)-2)-sizeof(TString))/sizeof(char)) +luaM_toobig(L); +ts=cast(TString*,luaM_malloc(L,(l+1)*sizeof(char)+sizeof(TString))); +ts->tsv.len=l; +ts->tsv.hash=h; +ts->tsv.marked=luaC_white(G(L)); +ts->tsv.tt=4; +ts->tsv.reserved=0; +memcpy(ts+1,str,l*sizeof(char)); +((char*)(ts+1))[l]='\0'; +tb=&G(L)->strt; +h=lmod(h,tb->size); +ts->tsv.next=tb->hash[h]; +tb->hash[h]=obj2gco(ts); +tb->nuse++; +if(tb->nuse>cast(lu_int32,tb->size)&&tb->size<=(INT_MAX-2)/2) +luaS_resize(L,tb->size*2); +return ts; +} +static TString*luaS_newlstr(lua_State*L,const char*str,size_t l){ +GCObject*o; +unsigned int h=cast(unsigned int,l); +size_t step=(l>>5)+1; +size_t l1; +for(l1=l;l1>=step;l1-=step) +h=h^((h<<5)+(h>>2)+cast(unsigned char,str[l1-1])); +for(o=G(L)->strt.hash[lmod(h,G(L)->strt.size)]; +o!=NULL; +o=o->gch.next){ +TString*ts=rawgco2ts(o); +if(ts->tsv.len==l&&(memcmp(str,getstr(ts),l)==0)){ +if(isdead(G(L),o))changewhite(o); +return ts; +} +} +return newlstr(L,str,l,h); +} +static Udata*luaS_newudata(lua_State*L,size_t s,Table*e){ +Udata*u; +if(s>((size_t)(~(size_t)0)-2)-sizeof(Udata)) +luaM_toobig(L); +u=cast(Udata*,luaM_malloc(L,s+sizeof(Udata))); +u->uv.marked=luaC_white(G(L)); +u->uv.tt=7; +u->uv.len=s; +u->uv.metatable=NULL; +u->uv.env=e; +u->uv.next=G(L)->mainthread->next; +G(L)->mainthread->next=obj2gco(u); +return u; +} +#define hashpow2(t,n)(gnode(t,lmod((n),sizenode(t)))) +#define hashstr(t,str)hashpow2(t,(str)->tsv.hash) +#define hashboolean(t,p)hashpow2(t,p) +#define hashmod(t,n)(gnode(t,((n)%((sizenode(t)-1)|1)))) +#define hashpointer(t,p)hashmod(t,IntPoint(p)) +static const Node dummynode_={ +{{NULL},0}, +{{{NULL},0,NULL}} +}; +static Node*hashnum(const Table*t,lua_Number n){ +unsigned int a[cast_int(sizeof(lua_Number)/sizeof(int))]; +int i; +if(luai_numeq(n,0)) +return gnode(t,0); +memcpy(a,&n,sizeof(a)); +for(i=1;isizearray) +return i-1; +else{ +Node*n=mainposition(t,key); +do{ +if(luaO_rawequalObj(key2tval(n),key)|| +(ttype(gkey(n))==(8+3)&&iscollectable(key)&& +gcvalue(gkey(n))==gcvalue(key))){ +i=cast_int(n-gnode(t,0)); +return i+t->sizearray; +} +else n=gnext(n); +}while(n); +luaG_runerror(L,"invalid key to "LUA_QL("next")); +return 0; +} +} +static int luaH_next(lua_State*L,Table*t,StkId key){ +int i=findindex(L,t,key); +for(i++;isizearray;i++){ +if(!ttisnil(&t->array[i])){ +setnvalue(key,cast_num(i+1)); +setobj(L,key+1,&t->array[i]); +return 1; +} +} +for(i-=t->sizearray;i<(int)sizenode(t);i++){ +if(!ttisnil(gval(gnode(t,i)))){ +setobj(L,key,key2tval(gnode(t,i))); +setobj(L,key+1,gval(gnode(t,i))); +return 1; +} +} +return 0; +} +static int computesizes(int nums[],int*narray){ +int i; +int twotoi; +int a=0; +int na=0; +int n=0; +for(i=0,twotoi=1;twotoi/2<*narray;i++,twotoi*=2){ +if(nums[i]>0){ +a+=nums[i]; +if(a>twotoi/2){ +n=twotoi; +na=a; +} +} +if(a==*narray)break; +} +*narray=n; +return na; +} +static int countint(const TValue*key,int*nums){ +int k=arrayindex(key); +if(0t->sizearray){ +lim=t->sizearray; +if(i>lim) +break; +} +for(;i<=lim;i++){ +if(!ttisnil(&t->array[i-1])) +lc++; +} +nums[lg]+=lc; +ause+=lc; +} +return ause; +} +static int numusehash(const Table*t,int*nums,int*pnasize){ +int totaluse=0; +int ause=0; +int i=sizenode(t); +while(i--){ +Node*n=&t->node[i]; +if(!ttisnil(gval(n))){ +ause+=countint(key2tval(n),nums); +totaluse++; +} +} +*pnasize+=ause; +return totaluse; +} +static void setarrayvector(lua_State*L,Table*t,int size){ +int i; +luaM_reallocvector(L,t->array,t->sizearray,size,TValue); +for(i=t->sizearray;iarray[i]); +t->sizearray=size; +} +static void setnodevector(lua_State*L,Table*t,int size){ +int lsize; +if(size==0){ +t->node=cast(Node*,(&dummynode_)); +lsize=0; +} +else{ +int i; +lsize=ceillog2(size); +if(lsize>(32-2)) +luaG_runerror(L,"table overflow"); +size=twoto(lsize); +t->node=luaM_newvector(L,size,Node); +for(i=0;ilsizenode=cast_byte(lsize); +t->lastfree=gnode(t,size); +} +static void resize(lua_State*L,Table*t,int nasize,int nhsize){ +int i; +int oldasize=t->sizearray; +int oldhsize=t->lsizenode; +Node*nold=t->node; +if(nasize>oldasize) +setarrayvector(L,t,nasize); +setnodevector(L,t,nhsize); +if(nasizesizearray=nasize; +for(i=nasize;iarray[i])) +setobj(L,luaH_setnum(L,t,i+1),&t->array[i]); +} +luaM_reallocvector(L,t->array,oldasize,nasize,TValue); +} +for(i=twoto(oldhsize)-1;i>=0;i--){ +Node*old=nold+i; +if(!ttisnil(gval(old))) +setobj(L,luaH_set(L,t,key2tval(old)),gval(old)); +} +if(nold!=(&dummynode_)) +luaM_freearray(L,nold,twoto(oldhsize),Node); +} +static void luaH_resizearray(lua_State*L,Table*t,int nasize){ +int nsize=(t->node==(&dummynode_))?0:sizenode(t); +resize(L,t,nasize,nsize); +} +static void rehash(lua_State*L,Table*t,const TValue*ek){ +int nasize,na; +int nums[(32-2)+1]; +int i; +int totaluse; +for(i=0;i<=(32-2);i++)nums[i]=0; +nasize=numusearray(t,nums); +totaluse=nasize; +totaluse+=numusehash(t,nums,&nasize); +nasize+=countint(ek,nums); +totaluse++; +na=computesizes(nums,&nasize); +resize(L,t,nasize,totaluse-na); +} +static Table*luaH_new(lua_State*L,int narray,int nhash){ +Table*t=luaM_new(L,Table); +luaC_link(L,obj2gco(t),5); +t->metatable=NULL; +t->flags=cast_byte(~0); +t->array=NULL; +t->sizearray=0; +t->lsizenode=0; +t->node=cast(Node*,(&dummynode_)); +setarrayvector(L,t,narray); +setnodevector(L,t,nhash); +return t; +} +static void luaH_free(lua_State*L,Table*t){ +if(t->node!=(&dummynode_)) +luaM_freearray(L,t->node,sizenode(t),Node); +luaM_freearray(L,t->array,t->sizearray,TValue); +luaM_free(L,t); +} +static Node*getfreepos(Table*t){ +while(t->lastfree-->t->node){ +if(ttisnil(gkey(t->lastfree))) +return t->lastfree; +} +return NULL; +} +static TValue*newkey(lua_State*L,Table*t,const TValue*key){ +Node*mp=mainposition(t,key); +if(!ttisnil(gval(mp))||mp==(&dummynode_)){ +Node*othern; +Node*n=getfreepos(t); +if(n==NULL){ +rehash(L,t,key); +return luaH_set(L,t,key); +} +othern=mainposition(t,key2tval(mp)); +if(othern!=mp){ +while(gnext(othern)!=mp)othern=gnext(othern); +gnext(othern)=n; +*n=*mp; +gnext(mp)=NULL; +setnilvalue(gval(mp)); +} +else{ +gnext(n)=gnext(mp); +gnext(mp)=n; +mp=n; +} +} +gkey(mp)->value=key->value;gkey(mp)->tt=key->tt; +luaC_barriert(L,t,key); +return gval(mp); +} +static const TValue*luaH_getnum(Table*t,int key){ +if(cast(unsigned int,key)-1sizearray)) +return&t->array[key-1]; +else{ +lua_Number nk=cast_num(key); +Node*n=hashnum(t,nk); +do{ +if(ttisnumber(gkey(n))&&luai_numeq(nvalue(gkey(n)),nk)) +return gval(n); +else n=gnext(n); +}while(n); +return(&luaO_nilobject_); +} +} +static const TValue*luaH_getstr(Table*t,TString*key){ +Node*n=hashstr(t,key); +do{ +if(ttisstring(gkey(n))&&rawtsvalue(gkey(n))==key) +return gval(n); +else n=gnext(n); +}while(n); +return(&luaO_nilobject_); +} +static const TValue*luaH_get(Table*t,const TValue*key){ +switch(ttype(key)){ +case 0:return(&luaO_nilobject_); +case 4:return luaH_getstr(t,rawtsvalue(key)); +case 3:{ +int k; +lua_Number n=nvalue(key); +lua_number2int(k,n); +if(luai_numeq(cast_num(k),nvalue(key))) +return luaH_getnum(t,k); +} +default:{ +Node*n=mainposition(t,key); +do{ +if(luaO_rawequalObj(key2tval(n),key)) +return gval(n); +else n=gnext(n); +}while(n); +return(&luaO_nilobject_); +} +} +} +static TValue*luaH_set(lua_State*L,Table*t,const TValue*key){ +const TValue*p=luaH_get(t,key); +t->flags=0; +if(p!=(&luaO_nilobject_)) +return cast(TValue*,p); +else{ +if(ttisnil(key))luaG_runerror(L,"table index is nil"); +else if(ttisnumber(key)&&luai_numisnan(nvalue(key))) +luaG_runerror(L,"table index is NaN"); +return newkey(L,t,key); +} +} +static TValue*luaH_setnum(lua_State*L,Table*t,int key){ +const TValue*p=luaH_getnum(t,key); +if(p!=(&luaO_nilobject_)) +return cast(TValue*,p); +else{ +TValue k; +setnvalue(&k,cast_num(key)); +return newkey(L,t,&k); +} +} +static TValue*luaH_setstr(lua_State*L,Table*t,TString*key){ +const TValue*p=luaH_getstr(t,key); +if(p!=(&luaO_nilobject_)) +return cast(TValue*,p); +else{ +TValue k; +setsvalue(L,&k,key); +return newkey(L,t,&k); +} +} +static int unbound_search(Table*t,unsigned int j){ +unsigned int i=j; +j++; +while(!ttisnil(luaH_getnum(t,j))){ +i=j; +j*=2; +if(j>cast(unsigned int,(INT_MAX-2))){ +i=1; +while(!ttisnil(luaH_getnum(t,i)))i++; +return i-1; +} +} +while(j-i>1){ +unsigned int m=(i+j)/2; +if(ttisnil(luaH_getnum(t,m)))j=m; +else i=m; +} +return i; +} +static int luaH_getn(Table*t){ +unsigned int j=t->sizearray; +if(j>0&&ttisnil(&t->array[j-1])){ +unsigned int i=0; +while(j-i>1){ +unsigned int m=(i+j)/2; +if(ttisnil(&t->array[m-1]))j=m; +else i=m; +} +return i; +} +else if(t->node==(&dummynode_)) +return j; +else return unbound_search(t,j); +} +#define makewhite(g,x)((x)->gch.marked=cast_byte(((x)->gch.marked&cast_byte(~(bitmask(2)|bit2mask(0,1))))|luaC_white(g))) +#define white2gray(x)reset2bits((x)->gch.marked,0,1) +#define black2gray(x)resetbit((x)->gch.marked,2) +#define stringmark(s)reset2bits((s)->tsv.marked,0,1) +#define isfinalized(u)testbit((u)->marked,3) +#define markfinalized(u)l_setbit((u)->marked,3) +#define markvalue(g,o){checkconsistency(o);if(iscollectable(o)&&iswhite(gcvalue(o)))reallymarkobject(g,gcvalue(o));} +#define markobject(g,t){if(iswhite(obj2gco(t)))reallymarkobject(g,obj2gco(t));} +#define setthreshold(g)(g->GCthreshold=(g->estimate/100)*g->gcpause) +static void removeentry(Node*n){ +if(iscollectable(gkey(n))) +setttype(gkey(n),(8+3)); +} +static void reallymarkobject(global_State*g,GCObject*o){ +white2gray(o); +switch(o->gch.tt){ +case 4:{ +return; +} +case 7:{ +Table*mt=gco2u(o)->metatable; +gray2black(o); +if(mt)markobject(g,mt); +markobject(g,gco2u(o)->env); +return; +} +case(8+2):{ +UpVal*uv=gco2uv(o); +markvalue(g,uv->v); +if(uv->v==&uv->u.value) +gray2black(o); +return; +} +case 6:{ +gco2cl(o)->c.gclist=g->gray; +g->gray=o; +break; +} +case 5:{ +gco2h(o)->gclist=g->gray; +g->gray=o; +break; +} +case 8:{ +gco2th(o)->gclist=g->gray; +g->gray=o; +break; +} +case(8+1):{ +gco2p(o)->gclist=g->gray; +g->gray=o; +break; +} +default:; +} +} +static void marktmu(global_State*g){ +GCObject*u=g->tmudata; +if(u){ +do{ +u=u->gch.next; +makewhite(g,u); +reallymarkobject(g,u); +}while(u!=g->tmudata); +} +} +static size_t luaC_separateudata(lua_State*L,int all){ +global_State*g=G(L); +size_t deadmem=0; +GCObject**p=&g->mainthread->next; +GCObject*curr; +while((curr=*p)!=NULL){ +if(!(iswhite(curr)||all)||isfinalized(gco2u(curr))) +p=&curr->gch.next; +else if(fasttm(L,gco2u(curr)->metatable,TM_GC)==NULL){ +markfinalized(gco2u(curr)); +p=&curr->gch.next; +} +else{ +deadmem+=sizeudata(gco2u(curr)); +markfinalized(gco2u(curr)); +*p=curr->gch.next; +if(g->tmudata==NULL) +g->tmudata=curr->gch.next=curr; +else{ +curr->gch.next=g->tmudata->gch.next; +g->tmudata->gch.next=curr; +g->tmudata=curr; +} +} +} +return deadmem; +} +static int traversetable(global_State*g,Table*h){ +int i; +int weakkey=0; +int weakvalue=0; +const TValue*mode; +if(h->metatable) +markobject(g,h->metatable); +mode=gfasttm(g,h->metatable,TM_MODE); +if(mode&&ttisstring(mode)){ +weakkey=(strchr(svalue(mode),'k')!=NULL); +weakvalue=(strchr(svalue(mode),'v')!=NULL); +if(weakkey||weakvalue){ +h->marked&=~(bitmask(3)|bitmask(4)); +h->marked|=cast_byte((weakkey<<3)| +(weakvalue<<4)); +h->gclist=g->weak; +g->weak=obj2gco(h); +} +} +if(weakkey&&weakvalue)return 1; +if(!weakvalue){ +i=h->sizearray; +while(i--) +markvalue(g,&h->array[i]); +} +i=sizenode(h); +while(i--){ +Node*n=gnode(h,i); +if(ttisnil(gval(n))) +removeentry(n); +else{ +if(!weakkey)markvalue(g,gkey(n)); +if(!weakvalue)markvalue(g,gval(n)); +} +} +return weakkey||weakvalue; +} +static void traverseproto(global_State*g,Proto*f){ +int i; +if(f->source)stringmark(f->source); +for(i=0;isizek;i++) +markvalue(g,&f->k[i]); +for(i=0;isizeupvalues;i++){ +if(f->upvalues[i]) +stringmark(f->upvalues[i]); +} +for(i=0;isizep;i++){ +if(f->p[i]) +markobject(g,f->p[i]); +} +for(i=0;isizelocvars;i++){ +if(f->locvars[i].varname) +stringmark(f->locvars[i].varname); +} +} +static void traverseclosure(global_State*g,Closure*cl){ +markobject(g,cl->c.env); +if(cl->c.isC){ +int i; +for(i=0;ic.nupvalues;i++) +markvalue(g,&cl->c.upvalue[i]); +} +else{ +int i; +markobject(g,cl->l.p); +for(i=0;il.nupvalues;i++) +markobject(g,cl->l.upvals[i]); +} +} +static void checkstacksizes(lua_State*L,StkId max){ +int ci_used=cast_int(L->ci-L->base_ci); +int s_used=cast_int(max-L->stack); +if(L->size_ci>20000) +return; +if(4*ci_usedsize_ci&&2*8size_ci) +luaD_reallocCI(L,L->size_ci/2); +condhardstacktests(luaD_reallocCI(L,ci_used+1)); +if(4*s_usedstacksize&& +2*((2*20)+5)stacksize) +luaD_reallocstack(L,L->stacksize/2); +condhardstacktests(luaD_reallocstack(L,s_used)); +} +static void traversestack(global_State*g,lua_State*l){ +StkId o,lim; +CallInfo*ci; +markvalue(g,gt(l)); +lim=l->top; +for(ci=l->base_ci;ci<=l->ci;ci++){ +if(limtop)lim=ci->top; +} +for(o=l->stack;otop;o++) +markvalue(g,o); +for(;o<=lim;o++) +setnilvalue(o); +checkstacksizes(l,lim); +} +static l_mem propagatemark(global_State*g){ +GCObject*o=g->gray; +gray2black(o); +switch(o->gch.tt){ +case 5:{ +Table*h=gco2h(o); +g->gray=h->gclist; +if(traversetable(g,h)) +black2gray(o); +return sizeof(Table)+sizeof(TValue)*h->sizearray+ +sizeof(Node)*sizenode(h); +} +case 6:{ +Closure*cl=gco2cl(o); +g->gray=cl->c.gclist; +traverseclosure(g,cl); +return(cl->c.isC)?sizeCclosure(cl->c.nupvalues): +sizeLclosure(cl->l.nupvalues); +} +case 8:{ +lua_State*th=gco2th(o); +g->gray=th->gclist; +th->gclist=g->grayagain; +g->grayagain=o; +black2gray(o); +traversestack(g,th); +return sizeof(lua_State)+sizeof(TValue)*th->stacksize+ +sizeof(CallInfo)*th->size_ci; +} +case(8+1):{ +Proto*p=gco2p(o); +g->gray=p->gclist; +traverseproto(g,p); +return sizeof(Proto)+sizeof(Instruction)*p->sizecode+ +sizeof(Proto*)*p->sizep+ +sizeof(TValue)*p->sizek+ +sizeof(int)*p->sizelineinfo+ +sizeof(LocVar)*p->sizelocvars+ +sizeof(TString*)*p->sizeupvalues; +} +default:return 0; +} +} +static size_t propagateall(global_State*g){ +size_t m=0; +while(g->gray)m+=propagatemark(g); +return m; +} +static int iscleared(const TValue*o,int iskey){ +if(!iscollectable(o))return 0; +if(ttisstring(o)){ +stringmark(rawtsvalue(o)); +return 0; +} +return iswhite(gcvalue(o))|| +(ttisuserdata(o)&&(!iskey&&isfinalized(uvalue(o)))); +} +static void cleartable(GCObject*l){ +while(l){ +Table*h=gco2h(l); +int i=h->sizearray; +if(testbit(h->marked,4)){ +while(i--){ +TValue*o=&h->array[i]; +if(iscleared(o,0)) +setnilvalue(o); +} +} +i=sizenode(h); +while(i--){ +Node*n=gnode(h,i); +if(!ttisnil(gval(n))&& +(iscleared(key2tval(n),1)||iscleared(gval(n),0))){ +setnilvalue(gval(n)); +removeentry(n); +} +} +l=h->gclist; +} +} +static void freeobj(lua_State*L,GCObject*o){ +switch(o->gch.tt){ +case(8+1):luaF_freeproto(L,gco2p(o));break; +case 6:luaF_freeclosure(L,gco2cl(o));break; +case(8+2):luaF_freeupval(L,gco2uv(o));break; +case 5:luaH_free(L,gco2h(o));break; +case 8:{ +luaE_freethread(L,gco2th(o)); +break; +} +case 4:{ +G(L)->strt.nuse--; +luaM_freemem(L,o,sizestring(gco2ts(o))); +break; +} +case 7:{ +luaM_freemem(L,o,sizeudata(gco2u(o))); +break; +} +default:; +} +} +#define sweepwholelist(L,p)sweeplist(L,p,((lu_mem)(~(lu_mem)0)-2)) +static GCObject**sweeplist(lua_State*L,GCObject**p,lu_mem count){ +GCObject*curr; +global_State*g=G(L); +int deadmask=otherwhite(g); +while((curr=*p)!=NULL&&count-->0){ +if(curr->gch.tt==8) +sweepwholelist(L,&gco2th(curr)->openupval); +if((curr->gch.marked^bit2mask(0,1))&deadmask){ +makewhite(g,curr); +p=&curr->gch.next; +} +else{ +*p=curr->gch.next; +if(curr==g->rootgc) +g->rootgc=curr->gch.next; +freeobj(L,curr); +} +} +return p; +} +static void checkSizes(lua_State*L){ +global_State*g=G(L); +if(g->strt.nusestrt.size/4)&& +g->strt.size>32*2) +luaS_resize(L,g->strt.size/2); +if(luaZ_sizebuffer(&g->buff)>32*2){ +size_t newsize=luaZ_sizebuffer(&g->buff)/2; +luaZ_resizebuffer(L,&g->buff,newsize); +} +} +static void GCTM(lua_State*L){ +global_State*g=G(L); +GCObject*o=g->tmudata->gch.next; +Udata*udata=rawgco2u(o); +const TValue*tm; +if(o==g->tmudata) +g->tmudata=NULL; +else +g->tmudata->gch.next=udata->uv.next; +udata->uv.next=g->mainthread->next; +g->mainthread->next=o; +makewhite(g,o); +tm=fasttm(L,udata->uv.metatable,TM_GC); +if(tm!=NULL){ +lu_byte oldah=L->allowhook; +lu_mem oldt=g->GCthreshold; +L->allowhook=0; +g->GCthreshold=2*g->totalbytes; +setobj(L,L->top,tm); +setuvalue(L,L->top+1,udata); +L->top+=2; +luaD_call(L,L->top-2,0); +L->allowhook=oldah; +g->GCthreshold=oldt; +} +} +static void luaC_callGCTM(lua_State*L){ +while(G(L)->tmudata) +GCTM(L); +} +static void luaC_freeall(lua_State*L){ +global_State*g=G(L); +int i; +g->currentwhite=bit2mask(0,1)|bitmask(6); +sweepwholelist(L,&g->rootgc); +for(i=0;istrt.size;i++) +sweepwholelist(L,&g->strt.hash[i]); +} +static void markmt(global_State*g){ +int i; +for(i=0;i<(8+1);i++) +if(g->mt[i])markobject(g,g->mt[i]); +} +static void markroot(lua_State*L){ +global_State*g=G(L); +g->gray=NULL; +g->grayagain=NULL; +g->weak=NULL; +markobject(g,g->mainthread); +markvalue(g,gt(g->mainthread)); +markvalue(g,registry(L)); +markmt(g); +g->gcstate=1; +} +static void remarkupvals(global_State*g){ +UpVal*uv; +for(uv=g->uvhead.u.l.next;uv!=&g->uvhead;uv=uv->u.l.next){ +if(isgray(obj2gco(uv))) +markvalue(g,uv->v); +} +} +static void atomic(lua_State*L){ +global_State*g=G(L); +size_t udsize; +remarkupvals(g); +propagateall(g); +g->gray=g->weak; +g->weak=NULL; +markobject(g,L); +markmt(g); +propagateall(g); +g->gray=g->grayagain; +g->grayagain=NULL; +propagateall(g); +udsize=luaC_separateudata(L,0); +marktmu(g); +udsize+=propagateall(g); +cleartable(g->weak); +g->currentwhite=cast_byte(otherwhite(g)); +g->sweepstrgc=0; +g->sweepgc=&g->rootgc; +g->gcstate=2; +g->estimate=g->totalbytes-udsize; +} +static l_mem singlestep(lua_State*L){ +global_State*g=G(L); +switch(g->gcstate){ +case 0:{ +markroot(L); +return 0; +} +case 1:{ +if(g->gray) +return propagatemark(g); +else{ +atomic(L); +return 0; +} +} +case 2:{ +lu_mem old=g->totalbytes; +sweepwholelist(L,&g->strt.hash[g->sweepstrgc++]); +if(g->sweepstrgc>=g->strt.size) +g->gcstate=3; +g->estimate-=old-g->totalbytes; +return 10; +} +case 3:{ +lu_mem old=g->totalbytes; +g->sweepgc=sweeplist(L,g->sweepgc,40); +if(*g->sweepgc==NULL){ +checkSizes(L); +g->gcstate=4; +} +g->estimate-=old-g->totalbytes; +return 40*10; +} +case 4:{ +if(g->tmudata){ +GCTM(L); +if(g->estimate>100) +g->estimate-=100; +return 100; +} +else{ +g->gcstate=0; +g->gcdept=0; +return 0; +} +} +default:return 0; +} +} +static void luaC_step(lua_State*L){ +global_State*g=G(L); +l_mem lim=(1024u/100)*g->gcstepmul; +if(lim==0) +lim=(((lu_mem)(~(lu_mem)0)-2)-1)/2; +g->gcdept+=g->totalbytes-g->GCthreshold; +do{ +lim-=singlestep(L); +if(g->gcstate==0) +break; +}while(lim>0); +if(g->gcstate!=0){ +if(g->gcdept<1024u) +g->GCthreshold=g->totalbytes+1024u; +else{ +g->gcdept-=1024u; +g->GCthreshold=g->totalbytes; +} +} +else{ +setthreshold(g); +} +} +static void luaC_barrierf(lua_State*L,GCObject*o,GCObject*v){ +global_State*g=G(L); +if(g->gcstate==1) +reallymarkobject(g,v); +else +makewhite(g,o); +} +static void luaC_barrierback(lua_State*L,Table*t){ +global_State*g=G(L); +GCObject*o=obj2gco(t); +black2gray(o); +t->gclist=g->grayagain; +g->grayagain=o; +} +static void luaC_link(lua_State*L,GCObject*o,lu_byte tt){ +global_State*g=G(L); +o->gch.next=g->rootgc; +g->rootgc=o; +o->gch.marked=luaC_white(g); +o->gch.tt=tt; +} +static void luaC_linkupval(lua_State*L,UpVal*uv){ +global_State*g=G(L); +GCObject*o=obj2gco(uv); +o->gch.next=g->rootgc; +g->rootgc=o; +if(isgray(o)){ +if(g->gcstate==1){ +gray2black(o); +luaC_barrier(L,uv,uv->v); +} +else{ +makewhite(g,o); +} +} +} +typedef union{ +lua_Number r; +TString*ts; +}SemInfo; +typedef struct Token{ +int token; +SemInfo seminfo; +}Token; +typedef struct LexState{ +int current; +int linenumber; +int lastline; +Token t; +Token lookahead; +struct FuncState*fs; +struct lua_State*L; +ZIO*z; +Mbuffer*buff; +TString*source; +char decpoint; +}LexState; +static void luaX_init(lua_State*L); +static void luaX_lexerror(LexState*ls,const char*msg,int token); +#define state_size(x)(sizeof(x)+0) +#define fromstate(l)(cast(lu_byte*,(l))-0) +#define tostate(l)(cast(lua_State*,cast(lu_byte*,l)+0)) +typedef struct LG{ +lua_State l; +global_State g; +}LG; +static void stack_init(lua_State*L1,lua_State*L){ +L1->base_ci=luaM_newvector(L,8,CallInfo); +L1->ci=L1->base_ci; +L1->size_ci=8; +L1->end_ci=L1->base_ci+L1->size_ci-1; +L1->stack=luaM_newvector(L,(2*20)+5,TValue); +L1->stacksize=(2*20)+5; +L1->top=L1->stack; +L1->stack_last=L1->stack+(L1->stacksize-5)-1; +L1->ci->func=L1->top; +setnilvalue(L1->top++); +L1->base=L1->ci->base=L1->top; +L1->ci->top=L1->top+20; +} +static void freestack(lua_State*L,lua_State*L1){ +luaM_freearray(L,L1->base_ci,L1->size_ci,CallInfo); +luaM_freearray(L,L1->stack,L1->stacksize,TValue); +} +static void f_luaopen(lua_State*L,void*ud){ +global_State*g=G(L); +UNUSED(ud); +stack_init(L,L); +sethvalue(L,gt(L),luaH_new(L,0,2)); +sethvalue(L,registry(L),luaH_new(L,0,2)); +luaS_resize(L,32); +luaT_init(L); +luaX_init(L); +luaS_fix(luaS_newliteral(L,"not enough memory")); +g->GCthreshold=4*g->totalbytes; +} +static void preinit_state(lua_State*L,global_State*g){ +G(L)=g; +L->stack=NULL; +L->stacksize=0; +L->errorJmp=NULL; +L->hook=NULL; +L->hookmask=0; +L->basehookcount=0; +L->allowhook=1; +resethookcount(L); +L->openupval=NULL; +L->size_ci=0; +L->nCcalls=L->baseCcalls=0; +L->status=0; +L->base_ci=L->ci=NULL; +L->savedpc=NULL; +L->errfunc=0; +setnilvalue(gt(L)); +} +static void close_state(lua_State*L){ +global_State*g=G(L); +luaF_close(L,L->stack); +luaC_freeall(L); +luaM_freearray(L,G(L)->strt.hash,G(L)->strt.size,TString*); +luaZ_freebuffer(L,&g->buff); +freestack(L,L); +(*g->frealloc)(g->ud,fromstate(L),state_size(LG),0); +} +static void luaE_freethread(lua_State*L,lua_State*L1){ +luaF_close(L1,L1->stack); +freestack(L,L1); +luaM_freemem(L,fromstate(L1),state_size(lua_State)); +} +static lua_State*lua_newstate(lua_Alloc f,void*ud){ +int i; +lua_State*L; +global_State*g; +void*l=(*f)(ud,NULL,0,state_size(LG)); +if(l==NULL)return NULL; +L=tostate(l); +g=&((LG*)L)->g; +L->next=NULL; +L->tt=8; +g->currentwhite=bit2mask(0,5); +L->marked=luaC_white(g); +set2bits(L->marked,5,6); +preinit_state(L,g); +g->frealloc=f; +g->ud=ud; +g->mainthread=L; +g->uvhead.u.l.prev=&g->uvhead; +g->uvhead.u.l.next=&g->uvhead; +g->GCthreshold=0; +g->strt.size=0; +g->strt.nuse=0; +g->strt.hash=NULL; +setnilvalue(registry(L)); +luaZ_initbuffer(L,&g->buff); +g->panic=NULL; +g->gcstate=0; +g->rootgc=obj2gco(L); +g->sweepstrgc=0; +g->sweepgc=&g->rootgc; +g->gray=NULL; +g->grayagain=NULL; +g->weak=NULL; +g->tmudata=NULL; +g->totalbytes=sizeof(LG); +g->gcpause=200; +g->gcstepmul=200; +g->gcdept=0; +for(i=0;i<(8+1);i++)g->mt[i]=NULL; +if(luaD_rawrunprotected(L,f_luaopen,NULL)!=0){ +close_state(L); +L=NULL; +} +else +{} +return L; +} +static void callallgcTM(lua_State*L,void*ud){ +UNUSED(ud); +luaC_callGCTM(L); +} +static void lua_close(lua_State*L){ +L=G(L)->mainthread; +luaF_close(L,L->stack); +luaC_separateudata(L,1); +L->errfunc=0; +do{ +L->ci=L->base_ci; +L->base=L->top=L->ci->base; +L->nCcalls=L->baseCcalls=0; +}while(luaD_rawrunprotected(L,callallgcTM,NULL)!=0); +close_state(L); +} +#define getcode(fs,e)((fs)->f->code[(e)->u.s.info]) +#define luaK_codeAsBx(fs,o,A,sBx)luaK_codeABx(fs,o,A,(sBx)+(((1<<(9+9))-1)>>1)) +#define luaK_setmultret(fs,e)luaK_setreturns(fs,e,(-1)) +static int luaK_codeABx(FuncState*fs,OpCode o,int A,unsigned int Bx); +static int luaK_codeABC(FuncState*fs,OpCode o,int A,int B,int C); +static void luaK_setreturns(FuncState*fs,expdesc*e,int nresults); +static void luaK_patchtohere(FuncState*fs,int list); +static void luaK_concat(FuncState*fs,int*l1,int l2); +static int currentpc(lua_State*L,CallInfo*ci){ +if(!isLua(ci))return-1; +if(ci==L->ci) +ci->savedpc=L->savedpc; +return pcRel(ci->savedpc,ci_func(ci)->l.p); +} +static int currentline(lua_State*L,CallInfo*ci){ +int pc=currentpc(L,ci); +if(pc<0) +return-1; +else +return getline_(ci_func(ci)->l.p,pc); +} +static int lua_getstack(lua_State*L,int level,lua_Debug*ar){ +int status; +CallInfo*ci; +for(ci=L->ci;level>0&&ci>L->base_ci;ci--){ +level--; +if(f_isLua(ci)) +level-=ci->tailcalls; +} +if(level==0&&ci>L->base_ci){ +status=1; +ar->i_ci=cast_int(ci-L->base_ci); +} +else if(level<0){ +status=1; +ar->i_ci=0; +} +else status=0; +return status; +} +static Proto*getluaproto(CallInfo*ci){ +return(isLua(ci)?ci_func(ci)->l.p:NULL); +} +static void funcinfo(lua_Debug*ar,Closure*cl){ +if(cl->c.isC){ +ar->source="=[C]"; +ar->linedefined=-1; +ar->lastlinedefined=-1; +ar->what="C"; +} +else{ +ar->source=getstr(cl->l.p->source); +ar->linedefined=cl->l.p->linedefined; +ar->lastlinedefined=cl->l.p->lastlinedefined; +ar->what=(ar->linedefined==0)?"main":"Lua"; +} +luaO_chunkid(ar->short_src,ar->source,60); +} +static void info_tailcall(lua_Debug*ar){ +ar->name=ar->namewhat=""; +ar->what="tail"; +ar->lastlinedefined=ar->linedefined=ar->currentline=-1; +ar->source="=(tail call)"; +luaO_chunkid(ar->short_src,ar->source,60); +ar->nups=0; +} +static void collectvalidlines(lua_State*L,Closure*f){ +if(f==NULL||f->c.isC){ +setnilvalue(L->top); +} +else{ +Table*t=luaH_new(L,0,0); +int*lineinfo=f->l.p->lineinfo; +int i; +for(i=0;il.p->sizelineinfo;i++) +setbvalue(luaH_setnum(L,t,lineinfo[i]),1); +sethvalue(L,L->top,t); +} +incr_top(L); +} +static int auxgetinfo(lua_State*L,const char*what,lua_Debug*ar, +Closure*f,CallInfo*ci){ +int status=1; +if(f==NULL){ +info_tailcall(ar); +return status; +} +for(;*what;what++){ +switch(*what){ +case'S':{ +funcinfo(ar,f); +break; +} +case'l':{ +ar->currentline=(ci)?currentline(L,ci):-1; +break; +} +case'u':{ +ar->nups=f->c.nupvalues; +break; +} +case'n':{ +ar->namewhat=(ci)?NULL:NULL; +if(ar->namewhat==NULL){ +ar->namewhat=""; +ar->name=NULL; +} +break; +} +case'L': +case'f': +break; +default:status=0; +} +} +return status; +} +static int lua_getinfo(lua_State*L,const char*what,lua_Debug*ar){ +int status; +Closure*f=NULL; +CallInfo*ci=NULL; +if(*what=='>'){ +StkId func=L->top-1; +luai_apicheck(L,ttisfunction(func)); +what++; +f=clvalue(func); +L->top--; +} +else if(ar->i_ci!=0){ +ci=L->base_ci+ar->i_ci; +f=clvalue(ci->func); +} +status=auxgetinfo(L,what,ar,f,ci); +if(strchr(what,'f')){ +if(f==NULL)setnilvalue(L->top); +else setclvalue(L,L->top,f); +incr_top(L); +} +if(strchr(what,'L')) +collectvalidlines(L,f); +return status; +} +static int isinstack(CallInfo*ci,const TValue*o){ +StkId p; +for(p=ci->base;ptop;p++) +if(o==p)return 1; +return 0; +} +static void luaG_typeerror(lua_State*L,const TValue*o,const char*op){ +const char*name=NULL; +const char*t=luaT_typenames[ttype(o)]; +const char*kind=(isinstack(L->ci,o))? +NULL: +NULL; +if(kind) +luaG_runerror(L,"attempt to %s %s "LUA_QL("%s")" (a %s value)", +op,kind,name,t); +else +luaG_runerror(L,"attempt to %s a %s value",op,t); +} +static void luaG_concaterror(lua_State*L,StkId p1,StkId p2){ +if(ttisstring(p1)||ttisnumber(p1))p1=p2; +luaG_typeerror(L,p1,"concatenate"); +} +static void luaG_aritherror(lua_State*L,const TValue*p1,const TValue*p2){ +TValue temp; +if(luaV_tonumber(p1,&temp)==NULL) +p2=p1; +luaG_typeerror(L,p2,"perform arithmetic on"); +} +static int luaG_ordererror(lua_State*L,const TValue*p1,const TValue*p2){ +const char*t1=luaT_typenames[ttype(p1)]; +const char*t2=luaT_typenames[ttype(p2)]; +if(t1[2]==t2[2]) +luaG_runerror(L,"attempt to compare two %s values",t1); +else +luaG_runerror(L,"attempt to compare %s with %s",t1,t2); +return 0; +} +static void addinfo(lua_State*L,const char*msg){ +CallInfo*ci=L->ci; +if(isLua(ci)){ +char buff[60]; +int line=currentline(L,ci); +luaO_chunkid(buff,getstr(getluaproto(ci)->source),60); +luaO_pushfstring(L,"%s:%d: %s",buff,line,msg); +} +} +static void luaG_errormsg(lua_State*L){ +if(L->errfunc!=0){ +StkId errfunc=restorestack(L,L->errfunc); +if(!ttisfunction(errfunc))luaD_throw(L,5); +setobj(L,L->top,L->top-1); +setobj(L,L->top-1,errfunc); +incr_top(L); +luaD_call(L,L->top-2,1); +} +luaD_throw(L,2); +} +static void luaG_runerror(lua_State*L,const char*fmt,...){ +va_list argp; +va_start(argp,fmt); +addinfo(L,luaO_pushvfstring(L,fmt,argp)); +va_end(argp); +luaG_errormsg(L); +} +static int luaZ_fill(ZIO*z){ +size_t size; +lua_State*L=z->L; +const char*buff; +buff=z->reader(L,z->data,&size); +if(buff==NULL||size==0)return(-1); +z->n=size-1; +z->p=buff; +return char2int(*(z->p++)); +} +static void luaZ_init(lua_State*L,ZIO*z,lua_Reader reader,void*data){ +z->L=L; +z->reader=reader; +z->data=data; +z->n=0; +z->p=NULL; +} +static char*luaZ_openspace(lua_State*L,Mbuffer*buff,size_t n){ +if(n>buff->buffsize){ +if(n<32)n=32; +luaZ_resizebuffer(L,buff,n); +} +return buff->buffer; +} +#define opmode(t,a,b,c,m)(((t)<<7)|((a)<<6)|((b)<<4)|((c)<<2)|(m)) +static const lu_byte luaP_opmodes[(cast(int,OP_VARARG)+1)]={ +opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgK,OpArgN,iABx) +,opmode(0,1,OpArgU,OpArgU,iABC) +,opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgU,OpArgN,iABC) +,opmode(0,1,OpArgK,OpArgN,iABx) +,opmode(0,1,OpArgR,OpArgK,iABC) +,opmode(0,0,OpArgK,OpArgN,iABx) +,opmode(0,0,OpArgU,OpArgN,iABC) +,opmode(0,0,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgU,OpArgU,iABC) +,opmode(0,1,OpArgR,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgR,OpArgR,iABC) +,opmode(0,0,OpArgR,OpArgN,iAsBx) +,opmode(1,0,OpArgK,OpArgK,iABC) +,opmode(1,0,OpArgK,OpArgK,iABC) +,opmode(1,0,OpArgK,OpArgK,iABC) +,opmode(1,1,OpArgR,OpArgU,iABC) +,opmode(1,1,OpArgR,OpArgU,iABC) +,opmode(0,1,OpArgU,OpArgU,iABC) +,opmode(0,1,OpArgU,OpArgU,iABC) +,opmode(0,0,OpArgU,OpArgN,iABC) +,opmode(0,1,OpArgR,OpArgN,iAsBx) +,opmode(0,1,OpArgR,OpArgN,iAsBx) +,opmode(1,0,OpArgN,OpArgU,iABC) +,opmode(0,0,OpArgU,OpArgU,iABC) +,opmode(0,0,OpArgN,OpArgN,iABC) +,opmode(0,1,OpArgU,OpArgN,iABx) +,opmode(0,1,OpArgU,OpArgN,iABC) +}; +#define next(ls)(ls->current=zgetc(ls->z)) +#define currIsNewline(ls)(ls->current=='\n'||ls->current=='\r') +static const char*const luaX_tokens[]={ +"and","break","do","else","elseif", +"end","false","for","function","if", +"in","local","nil","not","or","repeat", +"return","then","true","until","while", +"..","...","==",">=","<=","~=", +"","","","", +NULL +}; +#define save_and_next(ls)(save(ls,ls->current),next(ls)) +static void save(LexState*ls,int c){ +Mbuffer*b=ls->buff; +if(b->n+1>b->buffsize){ +size_t newsize; +if(b->buffsize>=((size_t)(~(size_t)0)-2)/2) +luaX_lexerror(ls,"lexical element too long",0); +newsize=b->buffsize*2; +luaZ_resizebuffer(ls->L,b,newsize); +} +b->buffer[b->n++]=cast(char,c); +} +static void luaX_init(lua_State*L){ +int i; +for(i=0;i<(cast(int,TK_WHILE-257+1));i++){ +TString*ts=luaS_new(L,luaX_tokens[i]); +luaS_fix(ts); +ts->tsv.reserved=cast_byte(i+1); +} +} +static const char*luaX_token2str(LexState*ls,int token){ +if(token<257){ +return(iscntrl(token))?luaO_pushfstring(ls->L,"char(%d)",token): +luaO_pushfstring(ls->L,"%c",token); +} +else +return luaX_tokens[token-257]; +} +static const char*txtToken(LexState*ls,int token){ +switch(token){ +case TK_NAME: +case TK_STRING: +case TK_NUMBER: +save(ls,'\0'); +return luaZ_buffer(ls->buff); +default: +return luaX_token2str(ls,token); +} +} +static void luaX_lexerror(LexState*ls,const char*msg,int token){ +char buff[80]; +luaO_chunkid(buff,getstr(ls->source),80); +msg=luaO_pushfstring(ls->L,"%s:%d: %s",buff,ls->linenumber,msg); +if(token) +luaO_pushfstring(ls->L,"%s near "LUA_QL("%s"),msg,txtToken(ls,token)); +luaD_throw(ls->L,3); +} +static void luaX_syntaxerror(LexState*ls,const char*msg){ +luaX_lexerror(ls,msg,ls->t.token); +} +static TString*luaX_newstring(LexState*ls,const char*str,size_t l){ +lua_State*L=ls->L; +TString*ts=luaS_newlstr(L,str,l); +TValue*o=luaH_setstr(L,ls->fs->h,ts); +if(ttisnil(o)){ +setbvalue(o,1); +luaC_checkGC(L); +} +return ts; +} +static void inclinenumber(LexState*ls){ +int old=ls->current; +next(ls); +if(currIsNewline(ls)&&ls->current!=old) +next(ls); +if(++ls->linenumber>=(INT_MAX-2)) +luaX_syntaxerror(ls,"chunk has too many lines"); +} +static void luaX_setinput(lua_State*L,LexState*ls,ZIO*z,TString*source){ +ls->decpoint='.'; +ls->L=L; +ls->lookahead.token=TK_EOS; +ls->z=z; +ls->fs=NULL; +ls->linenumber=1; +ls->lastline=1; +ls->source=source; +luaZ_resizebuffer(ls->L,ls->buff,32); +next(ls); +} +static int check_next(LexState*ls,const char*set){ +if(!strchr(set,ls->current)) +return 0; +save_and_next(ls); +return 1; +} +static void buffreplace(LexState*ls,char from,char to){ +size_t n=luaZ_bufflen(ls->buff); +char*p=luaZ_buffer(ls->buff); +while(n--) +if(p[n]==from)p[n]=to; +} +static void read_numeral(LexState*ls,SemInfo*seminfo){ +do{ +save_and_next(ls); +}while(isdigit(ls->current)||ls->current=='.'); +if(check_next(ls,"Ee")) +check_next(ls,"+-"); +while(isalnum(ls->current)||ls->current=='_') +save_and_next(ls); +save(ls,'\0'); +buffreplace(ls,'.',ls->decpoint); +if(!luaO_str2d(luaZ_buffer(ls->buff),&seminfo->r)) +luaX_lexerror(ls,"malformed number",TK_NUMBER); +} +static int skip_sep(LexState*ls){ +int count=0; +int s=ls->current; +save_and_next(ls); +while(ls->current=='='){ +save_and_next(ls); +count++; +} +return(ls->current==s)?count:(-count)-1; +} +static void read_long_string(LexState*ls,SemInfo*seminfo,int sep){ +int cont=0; +(void)(cont); +save_and_next(ls); +if(currIsNewline(ls)) +inclinenumber(ls); +for(;;){ +switch(ls->current){ +case(-1): +luaX_lexerror(ls,(seminfo)?"unfinished long string": +"unfinished long comment",TK_EOS); +break; +case']':{ +if(skip_sep(ls)==sep){ +save_and_next(ls); +goto endloop; +} +break; +} +case'\n': +case'\r':{ +save(ls,'\n'); +inclinenumber(ls); +if(!seminfo)luaZ_resetbuffer(ls->buff); +break; +} +default:{ +if(seminfo)save_and_next(ls); +else next(ls); +} +} +}endloop: +if(seminfo) +seminfo->ts=luaX_newstring(ls,luaZ_buffer(ls->buff)+(2+sep), +luaZ_bufflen(ls->buff)-2*(2+sep)); +} +static void read_string(LexState*ls,int del,SemInfo*seminfo){ +save_and_next(ls); +while(ls->current!=del){ +switch(ls->current){ +case(-1): +luaX_lexerror(ls,"unfinished string",TK_EOS); +continue; +case'\n': +case'\r': +luaX_lexerror(ls,"unfinished string",TK_STRING); +continue; +case'\\':{ +int c; +next(ls); +switch(ls->current){ +case'a':c='\a';break; +case'b':c='\b';break; +case'f':c='\f';break; +case'n':c='\n';break; +case'r':c='\r';break; +case't':c='\t';break; +case'v':c='\v';break; +case'\n': +case'\r':save(ls,'\n');inclinenumber(ls);continue; +case(-1):continue; +default:{ +if(!isdigit(ls->current)) +save_and_next(ls); +else{ +int i=0; +c=0; +do{ +c=10*c+(ls->current-'0'); +next(ls); +}while(++i<3&&isdigit(ls->current)); +if(c>UCHAR_MAX) +luaX_lexerror(ls,"escape sequence too large",TK_STRING); +save(ls,c); +} +continue; +} +} +save(ls,c); +next(ls); +continue; +} +default: +save_and_next(ls); +} +} +save_and_next(ls); +seminfo->ts=luaX_newstring(ls,luaZ_buffer(ls->buff)+1, +luaZ_bufflen(ls->buff)-2); +} +static int llex(LexState*ls,SemInfo*seminfo){ +luaZ_resetbuffer(ls->buff); +for(;;){ +switch(ls->current){ +case'\n': +case'\r':{ +inclinenumber(ls); +continue; +} +case'-':{ +next(ls); +if(ls->current!='-')return'-'; +next(ls); +if(ls->current=='['){ +int sep=skip_sep(ls); +luaZ_resetbuffer(ls->buff); +if(sep>=0){ +read_long_string(ls,NULL,sep); +luaZ_resetbuffer(ls->buff); +continue; +} +} +while(!currIsNewline(ls)&&ls->current!=(-1)) +next(ls); +continue; +} +case'[':{ +int sep=skip_sep(ls); +if(sep>=0){ +read_long_string(ls,seminfo,sep); +return TK_STRING; +} +else if(sep==-1)return'['; +else luaX_lexerror(ls,"invalid long string delimiter",TK_STRING); +} +case'=':{ +next(ls); +if(ls->current!='=')return'='; +else{next(ls);return TK_EQ;} +} +case'<':{ +next(ls); +if(ls->current!='=')return'<'; +else{next(ls);return TK_LE;} +} +case'>':{ +next(ls); +if(ls->current!='=')return'>'; +else{next(ls);return TK_GE;} +} +case'~':{ +next(ls); +if(ls->current!='=')return'~'; +else{next(ls);return TK_NE;} +} +case'"': +case'\'':{ +read_string(ls,ls->current,seminfo); +return TK_STRING; +} +case'.':{ +save_and_next(ls); +if(check_next(ls,".")){ +if(check_next(ls,".")) +return TK_DOTS; +else return TK_CONCAT; +} +else if(!isdigit(ls->current))return'.'; +else{ +read_numeral(ls,seminfo); +return TK_NUMBER; +} +} +case(-1):{ +return TK_EOS; +} +default:{ +if(isspace(ls->current)){ +next(ls); +continue; +} +else if(isdigit(ls->current)){ +read_numeral(ls,seminfo); +return TK_NUMBER; +} +else if(isalpha(ls->current)||ls->current=='_'){ +TString*ts; +do{ +save_and_next(ls); +}while(isalnum(ls->current)||ls->current=='_'); +ts=luaX_newstring(ls,luaZ_buffer(ls->buff), +luaZ_bufflen(ls->buff)); +if(ts->tsv.reserved>0) +return ts->tsv.reserved-1+257; +else{ +seminfo->ts=ts; +return TK_NAME; +} +} +else{ +int c=ls->current; +next(ls); +return c; +} +} +} +} +} +static void luaX_next(LexState*ls){ +ls->lastline=ls->linenumber; +if(ls->lookahead.token!=TK_EOS){ +ls->t=ls->lookahead; +ls->lookahead.token=TK_EOS; +} +else +ls->t.token=llex(ls,&ls->t.seminfo); +} +static void luaX_lookahead(LexState*ls){ +ls->lookahead.token=llex(ls,&ls->lookahead.seminfo); +} +#define hasjumps(e)((e)->t!=(e)->f) +static int isnumeral(expdesc*e){ +return(e->k==VKNUM&&e->t==(-1)&&e->f==(-1)); +} +static void luaK_nil(FuncState*fs,int from,int n){ +Instruction*previous; +if(fs->pc>fs->lasttarget){ +if(fs->pc==0){ +if(from>=fs->nactvar) +return; +} +else{ +previous=&fs->f->code[fs->pc-1]; +if(GET_OPCODE(*previous)==OP_LOADNIL){ +int pfrom=GETARG_A(*previous); +int pto=GETARG_B(*previous); +if(pfrom<=from&&from<=pto+1){ +if(from+n-1>pto) +SETARG_B(*previous,from+n-1); +return; +} +} +} +} +luaK_codeABC(fs,OP_LOADNIL,from,from+n-1,0); +} +static int luaK_jump(FuncState*fs){ +int jpc=fs->jpc; +int j; +fs->jpc=(-1); +j=luaK_codeAsBx(fs,OP_JMP,0,(-1)); +luaK_concat(fs,&j,jpc); +return j; +} +static void luaK_ret(FuncState*fs,int first,int nret){ +luaK_codeABC(fs,OP_RETURN,first,nret+1,0); +} +static int condjump(FuncState*fs,OpCode op,int A,int B,int C){ +luaK_codeABC(fs,op,A,B,C); +return luaK_jump(fs); +} +static void fixjump(FuncState*fs,int pc,int dest){ +Instruction*jmp=&fs->f->code[pc]; +int offset=dest-(pc+1); +if(abs(offset)>(((1<<(9+9))-1)>>1)) +luaX_syntaxerror(fs->ls,"control structure too long"); +SETARG_sBx(*jmp,offset); +} +static int luaK_getlabel(FuncState*fs){ +fs->lasttarget=fs->pc; +return fs->pc; +} +static int getjump(FuncState*fs,int pc){ +int offset=GETARG_sBx(fs->f->code[pc]); +if(offset==(-1)) +return(-1); +else +return(pc+1)+offset; +} +static Instruction*getjumpcontrol(FuncState*fs,int pc){ +Instruction*pi=&fs->f->code[pc]; +if(pc>=1&&testTMode(GET_OPCODE(*(pi-1)))) +return pi-1; +else +return pi; +} +static int need_value(FuncState*fs,int list){ +for(;list!=(-1);list=getjump(fs,list)){ +Instruction i=*getjumpcontrol(fs,list); +if(GET_OPCODE(i)!=OP_TESTSET)return 1; +} +return 0; +} +static int patchtestreg(FuncState*fs,int node,int reg){ +Instruction*i=getjumpcontrol(fs,node); +if(GET_OPCODE(*i)!=OP_TESTSET) +return 0; +if(reg!=((1<<8)-1)&®!=GETARG_B(*i)) +SETARG_A(*i,reg); +else +*i=CREATE_ABC(OP_TEST,GETARG_B(*i),0,GETARG_C(*i)); +return 1; +} +static void removevalues(FuncState*fs,int list){ +for(;list!=(-1);list=getjump(fs,list)) +patchtestreg(fs,list,((1<<8)-1)); +} +static void patchlistaux(FuncState*fs,int list,int vtarget,int reg, +int dtarget){ +while(list!=(-1)){ +int next=getjump(fs,list); +if(patchtestreg(fs,list,reg)) +fixjump(fs,list,vtarget); +else +fixjump(fs,list,dtarget); +list=next; +} +} +static void dischargejpc(FuncState*fs){ +patchlistaux(fs,fs->jpc,fs->pc,((1<<8)-1),fs->pc); +fs->jpc=(-1); +} +static void luaK_patchlist(FuncState*fs,int list,int target){ +if(target==fs->pc) +luaK_patchtohere(fs,list); +else{ +patchlistaux(fs,list,target,((1<<8)-1),target); +} +} +static void luaK_patchtohere(FuncState*fs,int list){ +luaK_getlabel(fs); +luaK_concat(fs,&fs->jpc,list); +} +static void luaK_concat(FuncState*fs,int*l1,int l2){ +if(l2==(-1))return; +else if(*l1==(-1)) +*l1=l2; +else{ +int list=*l1; +int next; +while((next=getjump(fs,list))!=(-1)) +list=next; +fixjump(fs,list,l2); +} +} +static void luaK_checkstack(FuncState*fs,int n){ +int newstack=fs->freereg+n; +if(newstack>fs->f->maxstacksize){ +if(newstack>=250) +luaX_syntaxerror(fs->ls,"function or expression too complex"); +fs->f->maxstacksize=cast_byte(newstack); +} +} +static void luaK_reserveregs(FuncState*fs,int n){ +luaK_checkstack(fs,n); +fs->freereg+=n; +} +static void freereg(FuncState*fs,int reg){ +if(!ISK(reg)&®>=fs->nactvar){ +fs->freereg--; +} +} +static void freeexp(FuncState*fs,expdesc*e){ +if(e->k==VNONRELOC) +freereg(fs,e->u.s.info); +} +static int addk(FuncState*fs,TValue*k,TValue*v){ +lua_State*L=fs->L; +TValue*idx=luaH_set(L,fs->h,k); +Proto*f=fs->f; +int oldsize=f->sizek; +if(ttisnumber(idx)){ +return cast_int(nvalue(idx)); +} +else{ +setnvalue(idx,cast_num(fs->nk)); +luaM_growvector(L,f->k,fs->nk,f->sizek,TValue, +((1<<(9+9))-1),"constant table overflow"); +while(oldsizesizek)setnilvalue(&f->k[oldsize++]); +setobj(L,&f->k[fs->nk],v); +luaC_barrier(L,f,v); +return fs->nk++; +} +} +static int luaK_stringK(FuncState*fs,TString*s){ +TValue o; +setsvalue(fs->L,&o,s); +return addk(fs,&o,&o); +} +static int luaK_numberK(FuncState*fs,lua_Number r){ +TValue o; +setnvalue(&o,r); +return addk(fs,&o,&o); +} +static int boolK(FuncState*fs,int b){ +TValue o; +setbvalue(&o,b); +return addk(fs,&o,&o); +} +static int nilK(FuncState*fs){ +TValue k,v; +setnilvalue(&v); +sethvalue(fs->L,&k,fs->h); +return addk(fs,&k,&v); +} +static void luaK_setreturns(FuncState*fs,expdesc*e,int nresults){ +if(e->k==VCALL){ +SETARG_C(getcode(fs,e),nresults+1); +} +else if(e->k==VVARARG){ +SETARG_B(getcode(fs,e),nresults+1); +SETARG_A(getcode(fs,e),fs->freereg); +luaK_reserveregs(fs,1); +} +} +static void luaK_setoneret(FuncState*fs,expdesc*e){ +if(e->k==VCALL){ +e->k=VNONRELOC; +e->u.s.info=GETARG_A(getcode(fs,e)); +} +else if(e->k==VVARARG){ +SETARG_B(getcode(fs,e),2); +e->k=VRELOCABLE; +} +} +static void luaK_dischargevars(FuncState*fs,expdesc*e){ +switch(e->k){ +case VLOCAL:{ +e->k=VNONRELOC; +break; +} +case VUPVAL:{ +e->u.s.info=luaK_codeABC(fs,OP_GETUPVAL,0,e->u.s.info,0); +e->k=VRELOCABLE; +break; +} +case VGLOBAL:{ +e->u.s.info=luaK_codeABx(fs,OP_GETGLOBAL,0,e->u.s.info); +e->k=VRELOCABLE; +break; +} +case VINDEXED:{ +freereg(fs,e->u.s.aux); +freereg(fs,e->u.s.info); +e->u.s.info=luaK_codeABC(fs,OP_GETTABLE,0,e->u.s.info,e->u.s.aux); +e->k=VRELOCABLE; +break; +} +case VVARARG: +case VCALL:{ +luaK_setoneret(fs,e); +break; +} +default:break; +} +} +static int code_label(FuncState*fs,int A,int b,int jump){ +luaK_getlabel(fs); +return luaK_codeABC(fs,OP_LOADBOOL,A,b,jump); +} +static void discharge2reg(FuncState*fs,expdesc*e,int reg){ +luaK_dischargevars(fs,e); +switch(e->k){ +case VNIL:{ +luaK_nil(fs,reg,1); +break; +} +case VFALSE:case VTRUE:{ +luaK_codeABC(fs,OP_LOADBOOL,reg,e->k==VTRUE,0); +break; +} +case VK:{ +luaK_codeABx(fs,OP_LOADK,reg,e->u.s.info); +break; +} +case VKNUM:{ +luaK_codeABx(fs,OP_LOADK,reg,luaK_numberK(fs,e->u.nval)); +break; +} +case VRELOCABLE:{ +Instruction*pc=&getcode(fs,e); +SETARG_A(*pc,reg); +break; +} +case VNONRELOC:{ +if(reg!=e->u.s.info) +luaK_codeABC(fs,OP_MOVE,reg,e->u.s.info,0); +break; +} +default:{ +return; +} +} +e->u.s.info=reg; +e->k=VNONRELOC; +} +static void discharge2anyreg(FuncState*fs,expdesc*e){ +if(e->k!=VNONRELOC){ +luaK_reserveregs(fs,1); +discharge2reg(fs,e,fs->freereg-1); +} +} +static void exp2reg(FuncState*fs,expdesc*e,int reg){ +discharge2reg(fs,e,reg); +if(e->k==VJMP) +luaK_concat(fs,&e->t,e->u.s.info); +if(hasjumps(e)){ +int final; +int p_f=(-1); +int p_t=(-1); +if(need_value(fs,e->t)||need_value(fs,e->f)){ +int fj=(e->k==VJMP)?(-1):luaK_jump(fs); +p_f=code_label(fs,reg,0,1); +p_t=code_label(fs,reg,1,0); +luaK_patchtohere(fs,fj); +} +final=luaK_getlabel(fs); +patchlistaux(fs,e->f,final,reg,p_f); +patchlistaux(fs,e->t,final,reg,p_t); +} +e->f=e->t=(-1); +e->u.s.info=reg; +e->k=VNONRELOC; +} +static void luaK_exp2nextreg(FuncState*fs,expdesc*e){ +luaK_dischargevars(fs,e); +freeexp(fs,e); +luaK_reserveregs(fs,1); +exp2reg(fs,e,fs->freereg-1); +} +static int luaK_exp2anyreg(FuncState*fs,expdesc*e){ +luaK_dischargevars(fs,e); +if(e->k==VNONRELOC){ +if(!hasjumps(e))return e->u.s.info; +if(e->u.s.info>=fs->nactvar){ +exp2reg(fs,e,e->u.s.info); +return e->u.s.info; +} +} +luaK_exp2nextreg(fs,e); +return e->u.s.info; +} +static void luaK_exp2val(FuncState*fs,expdesc*e){ +if(hasjumps(e)) +luaK_exp2anyreg(fs,e); +else +luaK_dischargevars(fs,e); +} +static int luaK_exp2RK(FuncState*fs,expdesc*e){ +luaK_exp2val(fs,e); +switch(e->k){ +case VKNUM: +case VTRUE: +case VFALSE: +case VNIL:{ +if(fs->nk<=((1<<(9-1))-1)){ +e->u.s.info=(e->k==VNIL)?nilK(fs): +(e->k==VKNUM)?luaK_numberK(fs,e->u.nval): +boolK(fs,(e->k==VTRUE)); +e->k=VK; +return RKASK(e->u.s.info); +} +else break; +} +case VK:{ +if(e->u.s.info<=((1<<(9-1))-1)) +return RKASK(e->u.s.info); +else break; +} +default:break; +} +return luaK_exp2anyreg(fs,e); +} +static void luaK_storevar(FuncState*fs,expdesc*var,expdesc*ex){ +switch(var->k){ +case VLOCAL:{ +freeexp(fs,ex); +exp2reg(fs,ex,var->u.s.info); +return; +} +case VUPVAL:{ +int e=luaK_exp2anyreg(fs,ex); +luaK_codeABC(fs,OP_SETUPVAL,e,var->u.s.info,0); +break; +} +case VGLOBAL:{ +int e=luaK_exp2anyreg(fs,ex); +luaK_codeABx(fs,OP_SETGLOBAL,e,var->u.s.info); +break; +} +case VINDEXED:{ +int e=luaK_exp2RK(fs,ex); +luaK_codeABC(fs,OP_SETTABLE,var->u.s.info,var->u.s.aux,e); +break; +} +default:{ +break; +} +} +freeexp(fs,ex); +} +static void luaK_self(FuncState*fs,expdesc*e,expdesc*key){ +int func; +luaK_exp2anyreg(fs,e); +freeexp(fs,e); +func=fs->freereg; +luaK_reserveregs(fs,2); +luaK_codeABC(fs,OP_SELF,func,e->u.s.info,luaK_exp2RK(fs,key)); +freeexp(fs,key); +e->u.s.info=func; +e->k=VNONRELOC; +} +static void invertjump(FuncState*fs,expdesc*e){ +Instruction*pc=getjumpcontrol(fs,e->u.s.info); +SETARG_A(*pc,!(GETARG_A(*pc))); +} +static int jumponcond(FuncState*fs,expdesc*e,int cond){ +if(e->k==VRELOCABLE){ +Instruction ie=getcode(fs,e); +if(GET_OPCODE(ie)==OP_NOT){ +fs->pc--; +return condjump(fs,OP_TEST,GETARG_B(ie),0,!cond); +} +} +discharge2anyreg(fs,e); +freeexp(fs,e); +return condjump(fs,OP_TESTSET,((1<<8)-1),e->u.s.info,cond); +} +static void luaK_goiftrue(FuncState*fs,expdesc*e){ +int pc; +luaK_dischargevars(fs,e); +switch(e->k){ +case VK:case VKNUM:case VTRUE:{ +pc=(-1); +break; +} +case VJMP:{ +invertjump(fs,e); +pc=e->u.s.info; +break; +} +default:{ +pc=jumponcond(fs,e,0); +break; +} +} +luaK_concat(fs,&e->f,pc); +luaK_patchtohere(fs,e->t); +e->t=(-1); +} +static void luaK_goiffalse(FuncState*fs,expdesc*e){ +int pc; +luaK_dischargevars(fs,e); +switch(e->k){ +case VNIL:case VFALSE:{ +pc=(-1); +break; +} +case VJMP:{ +pc=e->u.s.info; +break; +} +default:{ +pc=jumponcond(fs,e,1); +break; +} +} +luaK_concat(fs,&e->t,pc); +luaK_patchtohere(fs,e->f); +e->f=(-1); +} +static void codenot(FuncState*fs,expdesc*e){ +luaK_dischargevars(fs,e); +switch(e->k){ +case VNIL:case VFALSE:{ +e->k=VTRUE; +break; +} +case VK:case VKNUM:case VTRUE:{ +e->k=VFALSE; +break; +} +case VJMP:{ +invertjump(fs,e); +break; +} +case VRELOCABLE: +case VNONRELOC:{ +discharge2anyreg(fs,e); +freeexp(fs,e); +e->u.s.info=luaK_codeABC(fs,OP_NOT,0,e->u.s.info,0); +e->k=VRELOCABLE; +break; +} +default:{ +break; +} +} +{int temp=e->f;e->f=e->t;e->t=temp;} +removevalues(fs,e->f); +removevalues(fs,e->t); +} +static void luaK_indexed(FuncState*fs,expdesc*t,expdesc*k){ +t->u.s.aux=luaK_exp2RK(fs,k); +t->k=VINDEXED; +} +static int constfolding(OpCode op,expdesc*e1,expdesc*e2){ +lua_Number v1,v2,r; +if(!isnumeral(e1)||!isnumeral(e2))return 0; +v1=e1->u.nval; +v2=e2->u.nval; +switch(op){ +case OP_ADD:r=luai_numadd(v1,v2);break; +case OP_SUB:r=luai_numsub(v1,v2);break; +case OP_MUL:r=luai_nummul(v1,v2);break; +case OP_DIV: +if(v2==0)return 0; +r=luai_numdiv(v1,v2);break; +case OP_MOD: +if(v2==0)return 0; +r=luai_nummod(v1,v2);break; +case OP_POW:r=luai_numpow(v1,v2);break; +case OP_UNM:r=luai_numunm(v1);break; +case OP_LEN:return 0; +default:r=0;break; +} +if(luai_numisnan(r))return 0; +e1->u.nval=r; +return 1; +} +static void codearith(FuncState*fs,OpCode op,expdesc*e1,expdesc*e2){ +if(constfolding(op,e1,e2)) +return; +else{ +int o2=(op!=OP_UNM&&op!=OP_LEN)?luaK_exp2RK(fs,e2):0; +int o1=luaK_exp2RK(fs,e1); +if(o1>o2){ +freeexp(fs,e1); +freeexp(fs,e2); +} +else{ +freeexp(fs,e2); +freeexp(fs,e1); +} +e1->u.s.info=luaK_codeABC(fs,op,0,o1,o2); +e1->k=VRELOCABLE; +} +} +static void codecomp(FuncState*fs,OpCode op,int cond,expdesc*e1, +expdesc*e2){ +int o1=luaK_exp2RK(fs,e1); +int o2=luaK_exp2RK(fs,e2); +freeexp(fs,e2); +freeexp(fs,e1); +if(cond==0&&op!=OP_EQ){ +int temp; +temp=o1;o1=o2;o2=temp; +cond=1; +} +e1->u.s.info=condjump(fs,op,cond,o1,o2); +e1->k=VJMP; +} +static void luaK_prefix(FuncState*fs,UnOpr op,expdesc*e){ +expdesc e2; +e2.t=e2.f=(-1);e2.k=VKNUM;e2.u.nval=0; +switch(op){ +case OPR_MINUS:{ +if(!isnumeral(e)) +luaK_exp2anyreg(fs,e); +codearith(fs,OP_UNM,e,&e2); +break; +} +case OPR_NOT:codenot(fs,e);break; +case OPR_LEN:{ +luaK_exp2anyreg(fs,e); +codearith(fs,OP_LEN,e,&e2); +break; +} +default:; +} +} +static void luaK_infix(FuncState*fs,BinOpr op,expdesc*v){ +switch(op){ +case OPR_AND:{ +luaK_goiftrue(fs,v); +break; +} +case OPR_OR:{ +luaK_goiffalse(fs,v); +break; +} +case OPR_CONCAT:{ +luaK_exp2nextreg(fs,v); +break; +} +case OPR_ADD:case OPR_SUB:case OPR_MUL:case OPR_DIV: +case OPR_MOD:case OPR_POW:{ +if(!isnumeral(v))luaK_exp2RK(fs,v); +break; +} +default:{ +luaK_exp2RK(fs,v); +break; +} +} +} +static void luaK_posfix(FuncState*fs,BinOpr op,expdesc*e1,expdesc*e2){ +switch(op){ +case OPR_AND:{ +luaK_dischargevars(fs,e2); +luaK_concat(fs,&e2->f,e1->f); +*e1=*e2; +break; +} +case OPR_OR:{ +luaK_dischargevars(fs,e2); +luaK_concat(fs,&e2->t,e1->t); +*e1=*e2; +break; +} +case OPR_CONCAT:{ +luaK_exp2val(fs,e2); +if(e2->k==VRELOCABLE&&GET_OPCODE(getcode(fs,e2))==OP_CONCAT){ +freeexp(fs,e1); +SETARG_B(getcode(fs,e2),e1->u.s.info); +e1->k=VRELOCABLE;e1->u.s.info=e2->u.s.info; +} +else{ +luaK_exp2nextreg(fs,e2); +codearith(fs,OP_CONCAT,e1,e2); +} +break; +} +case OPR_ADD:codearith(fs,OP_ADD,e1,e2);break; +case OPR_SUB:codearith(fs,OP_SUB,e1,e2);break; +case OPR_MUL:codearith(fs,OP_MUL,e1,e2);break; +case OPR_DIV:codearith(fs,OP_DIV,e1,e2);break; +case OPR_MOD:codearith(fs,OP_MOD,e1,e2);break; +case OPR_POW:codearith(fs,OP_POW,e1,e2);break; +case OPR_EQ:codecomp(fs,OP_EQ,1,e1,e2);break; +case OPR_NE:codecomp(fs,OP_EQ,0,e1,e2);break; +case OPR_LT:codecomp(fs,OP_LT,1,e1,e2);break; +case OPR_LE:codecomp(fs,OP_LE,1,e1,e2);break; +case OPR_GT:codecomp(fs,OP_LT,0,e1,e2);break; +case OPR_GE:codecomp(fs,OP_LE,0,e1,e2);break; +default:; +} +} +static void luaK_fixline(FuncState*fs,int line){ +fs->f->lineinfo[fs->pc-1]=line; +} +static int luaK_code(FuncState*fs,Instruction i,int line){ +Proto*f=fs->f; +dischargejpc(fs); +luaM_growvector(fs->L,f->code,fs->pc,f->sizecode,Instruction, +(INT_MAX-2),"code size overflow"); +f->code[fs->pc]=i; +luaM_growvector(fs->L,f->lineinfo,fs->pc,f->sizelineinfo,int, +(INT_MAX-2),"code size overflow"); +f->lineinfo[fs->pc]=line; +return fs->pc++; +} +static int luaK_codeABC(FuncState*fs,OpCode o,int a,int b,int c){ +return luaK_code(fs,CREATE_ABC(o,a,b,c),fs->ls->lastline); +} +static int luaK_codeABx(FuncState*fs,OpCode o,int a,unsigned int bc){ +return luaK_code(fs,CREATE_ABx(o,a,bc),fs->ls->lastline); +} +static void luaK_setlist(FuncState*fs,int base,int nelems,int tostore){ +int c=(nelems-1)/50+1; +int b=(tostore==(-1))?0:tostore; +if(c<=((1<<9)-1)) +luaK_codeABC(fs,OP_SETLIST,base,b,c); +else{ +luaK_codeABC(fs,OP_SETLIST,base,b,0); +luaK_code(fs,cast(Instruction,c),fs->ls->lastline); +} +fs->freereg=base+1; +} +#define hasmultret(k)((k)==VCALL||(k)==VVARARG) +#define getlocvar(fs,i)((fs)->f->locvars[(fs)->actvar[i]]) +#define luaY_checklimit(fs,v,l,m)if((v)>(l))errorlimit(fs,l,m) +typedef struct BlockCnt{ +struct BlockCnt*previous; +int breaklist; +lu_byte nactvar; +lu_byte upval; +lu_byte isbreakable; +}BlockCnt; +static void chunk(LexState*ls); +static void expr(LexState*ls,expdesc*v); +static void anchor_token(LexState*ls){ +if(ls->t.token==TK_NAME||ls->t.token==TK_STRING){ +TString*ts=ls->t.seminfo.ts; +luaX_newstring(ls,getstr(ts),ts->tsv.len); +} +} +static void error_expected(LexState*ls,int token){ +luaX_syntaxerror(ls, +luaO_pushfstring(ls->L,LUA_QL("%s")" expected",luaX_token2str(ls,token))); +} +static void errorlimit(FuncState*fs,int limit,const char*what){ +const char*msg=(fs->f->linedefined==0)? +luaO_pushfstring(fs->L,"main function has more than %d %s",limit,what): +luaO_pushfstring(fs->L,"function at line %d has more than %d %s", +fs->f->linedefined,limit,what); +luaX_lexerror(fs->ls,msg,0); +} +static int testnext(LexState*ls,int c){ +if(ls->t.token==c){ +luaX_next(ls); +return 1; +} +else return 0; +} +static void check(LexState*ls,int c){ +if(ls->t.token!=c) +error_expected(ls,c); +} +static void checknext(LexState*ls,int c){ +check(ls,c); +luaX_next(ls); +} +#define check_condition(ls,c,msg){if(!(c))luaX_syntaxerror(ls,msg);} +static void check_match(LexState*ls,int what,int who,int where){ +if(!testnext(ls,what)){ +if(where==ls->linenumber) +error_expected(ls,what); +else{ +luaX_syntaxerror(ls,luaO_pushfstring(ls->L, +LUA_QL("%s")" expected (to close "LUA_QL("%s")" at line %d)", +luaX_token2str(ls,what),luaX_token2str(ls,who),where)); +} +} +} +static TString*str_checkname(LexState*ls){ +TString*ts; +check(ls,TK_NAME); +ts=ls->t.seminfo.ts; +luaX_next(ls); +return ts; +} +static void init_exp(expdesc*e,expkind k,int i){ +e->f=e->t=(-1); +e->k=k; +e->u.s.info=i; +} +static void codestring(LexState*ls,expdesc*e,TString*s){ +init_exp(e,VK,luaK_stringK(ls->fs,s)); +} +static void checkname(LexState*ls,expdesc*e){ +codestring(ls,e,str_checkname(ls)); +} +static int registerlocalvar(LexState*ls,TString*varname){ +FuncState*fs=ls->fs; +Proto*f=fs->f; +int oldsize=f->sizelocvars; +luaM_growvector(ls->L,f->locvars,fs->nlocvars,f->sizelocvars, +LocVar,SHRT_MAX,"too many local variables"); +while(oldsizesizelocvars)f->locvars[oldsize++].varname=NULL; +f->locvars[fs->nlocvars].varname=varname; +luaC_objbarrier(ls->L,f,varname); +return fs->nlocvars++; +} +#define new_localvarliteral(ls,v,n)new_localvar(ls,luaX_newstring(ls,""v,(sizeof(v)/sizeof(char))-1),n) +static void new_localvar(LexState*ls,TString*name,int n){ +FuncState*fs=ls->fs; +luaY_checklimit(fs,fs->nactvar+n+1,200,"local variables"); +fs->actvar[fs->nactvar+n]=cast(unsigned short,registerlocalvar(ls,name)); +} +static void adjustlocalvars(LexState*ls,int nvars){ +FuncState*fs=ls->fs; +fs->nactvar=cast_byte(fs->nactvar+nvars); +for(;nvars;nvars--){ +getlocvar(fs,fs->nactvar-nvars).startpc=fs->pc; +} +} +static void removevars(LexState*ls,int tolevel){ +FuncState*fs=ls->fs; +while(fs->nactvar>tolevel) +getlocvar(fs,--fs->nactvar).endpc=fs->pc; +} +static int indexupvalue(FuncState*fs,TString*name,expdesc*v){ +int i; +Proto*f=fs->f; +int oldsize=f->sizeupvalues; +for(i=0;inups;i++){ +if(fs->upvalues[i].k==v->k&&fs->upvalues[i].info==v->u.s.info){ +return i; +} +} +luaY_checklimit(fs,f->nups+1,60,"upvalues"); +luaM_growvector(fs->L,f->upvalues,f->nups,f->sizeupvalues, +TString*,(INT_MAX-2),""); +while(oldsizesizeupvalues)f->upvalues[oldsize++]=NULL; +f->upvalues[f->nups]=name; +luaC_objbarrier(fs->L,f,name); +fs->upvalues[f->nups].k=cast_byte(v->k); +fs->upvalues[f->nups].info=cast_byte(v->u.s.info); +return f->nups++; +} +static int searchvar(FuncState*fs,TString*n){ +int i; +for(i=fs->nactvar-1;i>=0;i--){ +if(n==getlocvar(fs,i).varname) +return i; +} +return-1; +} +static void markupval(FuncState*fs,int level){ +BlockCnt*bl=fs->bl; +while(bl&&bl->nactvar>level)bl=bl->previous; +if(bl)bl->upval=1; +} +static int singlevaraux(FuncState*fs,TString*n,expdesc*var,int base){ +if(fs==NULL){ +init_exp(var,VGLOBAL,((1<<8)-1)); +return VGLOBAL; +} +else{ +int v=searchvar(fs,n); +if(v>=0){ +init_exp(var,VLOCAL,v); +if(!base) +markupval(fs,v); +return VLOCAL; +} +else{ +if(singlevaraux(fs->prev,n,var,0)==VGLOBAL) +return VGLOBAL; +var->u.s.info=indexupvalue(fs,n,var); +var->k=VUPVAL; +return VUPVAL; +} +} +} +static void singlevar(LexState*ls,expdesc*var){ +TString*varname=str_checkname(ls); +FuncState*fs=ls->fs; +if(singlevaraux(fs,varname,var,1)==VGLOBAL) +var->u.s.info=luaK_stringK(fs,varname); +} +static void adjust_assign(LexState*ls,int nvars,int nexps,expdesc*e){ +FuncState*fs=ls->fs; +int extra=nvars-nexps; +if(hasmultret(e->k)){ +extra++; +if(extra<0)extra=0; +luaK_setreturns(fs,e,extra); +if(extra>1)luaK_reserveregs(fs,extra-1); +} +else{ +if(e->k!=VVOID)luaK_exp2nextreg(fs,e); +if(extra>0){ +int reg=fs->freereg; +luaK_reserveregs(fs,extra); +luaK_nil(fs,reg,extra); +} +} +} +static void enterlevel(LexState*ls){ +if(++ls->L->nCcalls>200) +luaX_lexerror(ls,"chunk has too many syntax levels",0); +} +#define leavelevel(ls)((ls)->L->nCcalls--) +static void enterblock(FuncState*fs,BlockCnt*bl,lu_byte isbreakable){ +bl->breaklist=(-1); +bl->isbreakable=isbreakable; +bl->nactvar=fs->nactvar; +bl->upval=0; +bl->previous=fs->bl; +fs->bl=bl; +} +static void leaveblock(FuncState*fs){ +BlockCnt*bl=fs->bl; +fs->bl=bl->previous; +removevars(fs->ls,bl->nactvar); +if(bl->upval) +luaK_codeABC(fs,OP_CLOSE,bl->nactvar,0,0); +fs->freereg=fs->nactvar; +luaK_patchtohere(fs,bl->breaklist); +} +static void pushclosure(LexState*ls,FuncState*func,expdesc*v){ +FuncState*fs=ls->fs; +Proto*f=fs->f; +int oldsize=f->sizep; +int i; +luaM_growvector(ls->L,f->p,fs->np,f->sizep,Proto*, +((1<<(9+9))-1),"constant table overflow"); +while(oldsizesizep)f->p[oldsize++]=NULL; +f->p[fs->np++]=func->f; +luaC_objbarrier(ls->L,f,func->f); +init_exp(v,VRELOCABLE,luaK_codeABx(fs,OP_CLOSURE,0,fs->np-1)); +for(i=0;if->nups;i++){ +OpCode o=(func->upvalues[i].k==VLOCAL)?OP_MOVE:OP_GETUPVAL; +luaK_codeABC(fs,o,0,func->upvalues[i].info,0); +} +} +static void open_func(LexState*ls,FuncState*fs){ +lua_State*L=ls->L; +Proto*f=luaF_newproto(L); +fs->f=f; +fs->prev=ls->fs; +fs->ls=ls; +fs->L=L; +ls->fs=fs; +fs->pc=0; +fs->lasttarget=-1; +fs->jpc=(-1); +fs->freereg=0; +fs->nk=0; +fs->np=0; +fs->nlocvars=0; +fs->nactvar=0; +fs->bl=NULL; +f->source=ls->source; +f->maxstacksize=2; +fs->h=luaH_new(L,0,0); +sethvalue(L,L->top,fs->h); +incr_top(L); +setptvalue(L,L->top,f); +incr_top(L); +} +static void close_func(LexState*ls){ +lua_State*L=ls->L; +FuncState*fs=ls->fs; +Proto*f=fs->f; +removevars(ls,0); +luaK_ret(fs,0,0); +luaM_reallocvector(L,f->code,f->sizecode,fs->pc,Instruction); +f->sizecode=fs->pc; +luaM_reallocvector(L,f->lineinfo,f->sizelineinfo,fs->pc,int); +f->sizelineinfo=fs->pc; +luaM_reallocvector(L,f->k,f->sizek,fs->nk,TValue); +f->sizek=fs->nk; +luaM_reallocvector(L,f->p,f->sizep,fs->np,Proto*); +f->sizep=fs->np; +luaM_reallocvector(L,f->locvars,f->sizelocvars,fs->nlocvars,LocVar); +f->sizelocvars=fs->nlocvars; +luaM_reallocvector(L,f->upvalues,f->sizeupvalues,f->nups,TString*); +f->sizeupvalues=f->nups; +ls->fs=fs->prev; +if(fs)anchor_token(ls); +L->top-=2; +} +static Proto*luaY_parser(lua_State*L,ZIO*z,Mbuffer*buff,const char*name){ +struct LexState lexstate; +struct FuncState funcstate; +lexstate.buff=buff; +luaX_setinput(L,&lexstate,z,luaS_new(L,name)); +open_func(&lexstate,&funcstate); +funcstate.f->is_vararg=2; +luaX_next(&lexstate); +chunk(&lexstate); +check(&lexstate,TK_EOS); +close_func(&lexstate); +return funcstate.f; +} +static void field(LexState*ls,expdesc*v){ +FuncState*fs=ls->fs; +expdesc key; +luaK_exp2anyreg(fs,v); +luaX_next(ls); +checkname(ls,&key); +luaK_indexed(fs,v,&key); +} +static void yindex(LexState*ls,expdesc*v){ +luaX_next(ls); +expr(ls,v); +luaK_exp2val(ls->fs,v); +checknext(ls,']'); +} +struct ConsControl{ +expdesc v; +expdesc*t; +int nh; +int na; +int tostore; +}; +static void recfield(LexState*ls,struct ConsControl*cc){ +FuncState*fs=ls->fs; +int reg=ls->fs->freereg; +expdesc key,val; +int rkkey; +if(ls->t.token==TK_NAME){ +luaY_checklimit(fs,cc->nh,(INT_MAX-2),"items in a constructor"); +checkname(ls,&key); +} +else +yindex(ls,&key); +cc->nh++; +checknext(ls,'='); +rkkey=luaK_exp2RK(fs,&key); +expr(ls,&val); +luaK_codeABC(fs,OP_SETTABLE,cc->t->u.s.info,rkkey,luaK_exp2RK(fs,&val)); +fs->freereg=reg; +} +static void closelistfield(FuncState*fs,struct ConsControl*cc){ +if(cc->v.k==VVOID)return; +luaK_exp2nextreg(fs,&cc->v); +cc->v.k=VVOID; +if(cc->tostore==50){ +luaK_setlist(fs,cc->t->u.s.info,cc->na,cc->tostore); +cc->tostore=0; +} +} +static void lastlistfield(FuncState*fs,struct ConsControl*cc){ +if(cc->tostore==0)return; +if(hasmultret(cc->v.k)){ +luaK_setmultret(fs,&cc->v); +luaK_setlist(fs,cc->t->u.s.info,cc->na,(-1)); +cc->na--; +} +else{ +if(cc->v.k!=VVOID) +luaK_exp2nextreg(fs,&cc->v); +luaK_setlist(fs,cc->t->u.s.info,cc->na,cc->tostore); +} +} +static void listfield(LexState*ls,struct ConsControl*cc){ +expr(ls,&cc->v); +luaY_checklimit(ls->fs,cc->na,(INT_MAX-2),"items in a constructor"); +cc->na++; +cc->tostore++; +} +static void constructor(LexState*ls,expdesc*t){ +FuncState*fs=ls->fs; +int line=ls->linenumber; +int pc=luaK_codeABC(fs,OP_NEWTABLE,0,0,0); +struct ConsControl cc; +cc.na=cc.nh=cc.tostore=0; +cc.t=t; +init_exp(t,VRELOCABLE,pc); +init_exp(&cc.v,VVOID,0); +luaK_exp2nextreg(ls->fs,t); +checknext(ls,'{'); +do{ +if(ls->t.token=='}')break; +closelistfield(fs,&cc); +switch(ls->t.token){ +case TK_NAME:{ +luaX_lookahead(ls); +if(ls->lookahead.token!='=') +listfield(ls,&cc); +else +recfield(ls,&cc); +break; +} +case'[':{ +recfield(ls,&cc); +break; +} +default:{ +listfield(ls,&cc); +break; +} +} +}while(testnext(ls,',')||testnext(ls,';')); +check_match(ls,'}','{',line); +lastlistfield(fs,&cc); +SETARG_B(fs->f->code[pc],luaO_int2fb(cc.na)); +SETARG_C(fs->f->code[pc],luaO_int2fb(cc.nh)); +} +static void parlist(LexState*ls){ +FuncState*fs=ls->fs; +Proto*f=fs->f; +int nparams=0; +f->is_vararg=0; +if(ls->t.token!=')'){ +do{ +switch(ls->t.token){ +case TK_NAME:{ +new_localvar(ls,str_checkname(ls),nparams++); +break; +} +case TK_DOTS:{ +luaX_next(ls); +f->is_vararg|=2; +break; +} +default:luaX_syntaxerror(ls," or "LUA_QL("...")" expected"); +} +}while(!f->is_vararg&&testnext(ls,',')); +} +adjustlocalvars(ls,nparams); +f->numparams=cast_byte(fs->nactvar-(f->is_vararg&1)); +luaK_reserveregs(fs,fs->nactvar); +} +static void body(LexState*ls,expdesc*e,int needself,int line){ +FuncState new_fs; +open_func(ls,&new_fs); +new_fs.f->linedefined=line; +checknext(ls,'('); +if(needself){ +new_localvarliteral(ls,"self",0); +adjustlocalvars(ls,1); +} +parlist(ls); +checknext(ls,')'); +chunk(ls); +new_fs.f->lastlinedefined=ls->linenumber; +check_match(ls,TK_END,TK_FUNCTION,line); +close_func(ls); +pushclosure(ls,&new_fs,e); +} +static int explist1(LexState*ls,expdesc*v){ +int n=1; +expr(ls,v); +while(testnext(ls,',')){ +luaK_exp2nextreg(ls->fs,v); +expr(ls,v); +n++; +} +return n; +} +static void funcargs(LexState*ls,expdesc*f){ +FuncState*fs=ls->fs; +expdesc args; +int base,nparams; +int line=ls->linenumber; +switch(ls->t.token){ +case'(':{ +if(line!=ls->lastline) +luaX_syntaxerror(ls,"ambiguous syntax (function call x new statement)"); +luaX_next(ls); +if(ls->t.token==')') +args.k=VVOID; +else{ +explist1(ls,&args); +luaK_setmultret(fs,&args); +} +check_match(ls,')','(',line); +break; +} +case'{':{ +constructor(ls,&args); +break; +} +case TK_STRING:{ +codestring(ls,&args,ls->t.seminfo.ts); +luaX_next(ls); +break; +} +default:{ +luaX_syntaxerror(ls,"function arguments expected"); +return; +} +} +base=f->u.s.info; +if(hasmultret(args.k)) +nparams=(-1); +else{ +if(args.k!=VVOID) +luaK_exp2nextreg(fs,&args); +nparams=fs->freereg-(base+1); +} +init_exp(f,VCALL,luaK_codeABC(fs,OP_CALL,base,nparams+1,2)); +luaK_fixline(fs,line); +fs->freereg=base+1; +} +static void prefixexp(LexState*ls,expdesc*v){ +switch(ls->t.token){ +case'(':{ +int line=ls->linenumber; +luaX_next(ls); +expr(ls,v); +check_match(ls,')','(',line); +luaK_dischargevars(ls->fs,v); +return; +} +case TK_NAME:{ +singlevar(ls,v); +return; +} +default:{ +luaX_syntaxerror(ls,"unexpected symbol"); +return; +} +} +} +static void primaryexp(LexState*ls,expdesc*v){ +FuncState*fs=ls->fs; +prefixexp(ls,v); +for(;;){ +switch(ls->t.token){ +case'.':{ +field(ls,v); +break; +} +case'[':{ +expdesc key; +luaK_exp2anyreg(fs,v); +yindex(ls,&key); +luaK_indexed(fs,v,&key); +break; +} +case':':{ +expdesc key; +luaX_next(ls); +checkname(ls,&key); +luaK_self(fs,v,&key); +funcargs(ls,v); +break; +} +case'(':case TK_STRING:case'{':{ +luaK_exp2nextreg(fs,v); +funcargs(ls,v); +break; +} +default:return; +} +} +} +static void simpleexp(LexState*ls,expdesc*v){ +switch(ls->t.token){ +case TK_NUMBER:{ +init_exp(v,VKNUM,0); +v->u.nval=ls->t.seminfo.r; +break; +} +case TK_STRING:{ +codestring(ls,v,ls->t.seminfo.ts); +break; +} +case TK_NIL:{ +init_exp(v,VNIL,0); +break; +} +case TK_TRUE:{ +init_exp(v,VTRUE,0); +break; +} +case TK_FALSE:{ +init_exp(v,VFALSE,0); +break; +} +case TK_DOTS:{ +FuncState*fs=ls->fs; +check_condition(ls,fs->f->is_vararg, +"cannot use "LUA_QL("...")" outside a vararg function"); +fs->f->is_vararg&=~4; +init_exp(v,VVARARG,luaK_codeABC(fs,OP_VARARG,0,1,0)); +break; +} +case'{':{ +constructor(ls,v); +return; +} +case TK_FUNCTION:{ +luaX_next(ls); +body(ls,v,0,ls->linenumber); +return; +} +default:{ +primaryexp(ls,v); +return; +} +} +luaX_next(ls); +} +static UnOpr getunopr(int op){ +switch(op){ +case TK_NOT:return OPR_NOT; +case'-':return OPR_MINUS; +case'#':return OPR_LEN; +default:return OPR_NOUNOPR; +} +} +static BinOpr getbinopr(int op){ +switch(op){ +case'+':return OPR_ADD; +case'-':return OPR_SUB; +case'*':return OPR_MUL; +case'/':return OPR_DIV; +case'%':return OPR_MOD; +case'^':return OPR_POW; +case TK_CONCAT:return OPR_CONCAT; +case TK_NE:return OPR_NE; +case TK_EQ:return OPR_EQ; +case'<':return OPR_LT; +case TK_LE:return OPR_LE; +case'>':return OPR_GT; +case TK_GE:return OPR_GE; +case TK_AND:return OPR_AND; +case TK_OR:return OPR_OR; +default:return OPR_NOBINOPR; +} +} +static const struct{ +lu_byte left; +lu_byte right; +}priority[]={ +{6,6},{6,6},{7,7},{7,7},{7,7}, +{10,9},{5,4}, +{3,3},{3,3}, +{3,3},{3,3},{3,3},{3,3}, +{2,2},{1,1} +}; +static BinOpr subexpr(LexState*ls,expdesc*v,unsigned int limit){ +BinOpr op; +UnOpr uop; +enterlevel(ls); +uop=getunopr(ls->t.token); +if(uop!=OPR_NOUNOPR){ +luaX_next(ls); +subexpr(ls,v,8); +luaK_prefix(ls->fs,uop,v); +} +else simpleexp(ls,v); +op=getbinopr(ls->t.token); +while(op!=OPR_NOBINOPR&&priority[op].left>limit){ +expdesc v2; +BinOpr nextop; +luaX_next(ls); +luaK_infix(ls->fs,op,v); +nextop=subexpr(ls,&v2,priority[op].right); +luaK_posfix(ls->fs,op,v,&v2); +op=nextop; +} +leavelevel(ls); +return op; +} +static void expr(LexState*ls,expdesc*v){ +subexpr(ls,v,0); +} +static int block_follow(int token){ +switch(token){ +case TK_ELSE:case TK_ELSEIF:case TK_END: +case TK_UNTIL:case TK_EOS: +return 1; +default:return 0; +} +} +static void block(LexState*ls){ +FuncState*fs=ls->fs; +BlockCnt bl; +enterblock(fs,&bl,0); +chunk(ls); +leaveblock(fs); +} +struct LHS_assign{ +struct LHS_assign*prev; +expdesc v; +}; +static void check_conflict(LexState*ls,struct LHS_assign*lh,expdesc*v){ +FuncState*fs=ls->fs; +int extra=fs->freereg; +int conflict=0; +for(;lh;lh=lh->prev){ +if(lh->v.k==VINDEXED){ +if(lh->v.u.s.info==v->u.s.info){ +conflict=1; +lh->v.u.s.info=extra; +} +if(lh->v.u.s.aux==v->u.s.info){ +conflict=1; +lh->v.u.s.aux=extra; +} +} +} +if(conflict){ +luaK_codeABC(fs,OP_MOVE,fs->freereg,v->u.s.info,0); +luaK_reserveregs(fs,1); +} +} +static void assignment(LexState*ls,struct LHS_assign*lh,int nvars){ +expdesc e; +check_condition(ls,VLOCAL<=lh->v.k&&lh->v.k<=VINDEXED, +"syntax error"); +if(testnext(ls,',')){ +struct LHS_assign nv; +nv.prev=lh; +primaryexp(ls,&nv.v); +if(nv.v.k==VLOCAL) +check_conflict(ls,lh,&nv.v); +luaY_checklimit(ls->fs,nvars,200-ls->L->nCcalls, +"variables in assignment"); +assignment(ls,&nv,nvars+1); +} +else{ +int nexps; +checknext(ls,'='); +nexps=explist1(ls,&e); +if(nexps!=nvars){ +adjust_assign(ls,nvars,nexps,&e); +if(nexps>nvars) +ls->fs->freereg-=nexps-nvars; +} +else{ +luaK_setoneret(ls->fs,&e); +luaK_storevar(ls->fs,&lh->v,&e); +return; +} +} +init_exp(&e,VNONRELOC,ls->fs->freereg-1); +luaK_storevar(ls->fs,&lh->v,&e); +} +static int cond(LexState*ls){ +expdesc v; +expr(ls,&v); +if(v.k==VNIL)v.k=VFALSE; +luaK_goiftrue(ls->fs,&v); +return v.f; +} +static void breakstat(LexState*ls){ +FuncState*fs=ls->fs; +BlockCnt*bl=fs->bl; +int upval=0; +while(bl&&!bl->isbreakable){ +upval|=bl->upval; +bl=bl->previous; +} +if(!bl) +luaX_syntaxerror(ls,"no loop to break"); +if(upval) +luaK_codeABC(fs,OP_CLOSE,bl->nactvar,0,0); +luaK_concat(fs,&bl->breaklist,luaK_jump(fs)); +} +static void whilestat(LexState*ls,int line){ +FuncState*fs=ls->fs; +int whileinit; +int condexit; +BlockCnt bl; +luaX_next(ls); +whileinit=luaK_getlabel(fs); +condexit=cond(ls); +enterblock(fs,&bl,1); +checknext(ls,TK_DO); +block(ls); +luaK_patchlist(fs,luaK_jump(fs),whileinit); +check_match(ls,TK_END,TK_WHILE,line); +leaveblock(fs); +luaK_patchtohere(fs,condexit); +} +static void repeatstat(LexState*ls,int line){ +int condexit; +FuncState*fs=ls->fs; +int repeat_init=luaK_getlabel(fs); +BlockCnt bl1,bl2; +enterblock(fs,&bl1,1); +enterblock(fs,&bl2,0); +luaX_next(ls); +chunk(ls); +check_match(ls,TK_UNTIL,TK_REPEAT,line); +condexit=cond(ls); +if(!bl2.upval){ +leaveblock(fs); +luaK_patchlist(ls->fs,condexit,repeat_init); +} +else{ +breakstat(ls); +luaK_patchtohere(ls->fs,condexit); +leaveblock(fs); +luaK_patchlist(ls->fs,luaK_jump(fs),repeat_init); +} +leaveblock(fs); +} +static int exp1(LexState*ls){ +expdesc e; +int k; +expr(ls,&e); +k=e.k; +luaK_exp2nextreg(ls->fs,&e); +return k; +} +static void forbody(LexState*ls,int base,int line,int nvars,int isnum){ +BlockCnt bl; +FuncState*fs=ls->fs; +int prep,endfor; +adjustlocalvars(ls,3); +checknext(ls,TK_DO); +prep=isnum?luaK_codeAsBx(fs,OP_FORPREP,base,(-1)):luaK_jump(fs); +enterblock(fs,&bl,0); +adjustlocalvars(ls,nvars); +luaK_reserveregs(fs,nvars); +block(ls); +leaveblock(fs); +luaK_patchtohere(fs,prep); +endfor=(isnum)?luaK_codeAsBx(fs,OP_FORLOOP,base,(-1)): +luaK_codeABC(fs,OP_TFORLOOP,base,0,nvars); +luaK_fixline(fs,line); +luaK_patchlist(fs,(isnum?endfor:luaK_jump(fs)),prep+1); +} +static void fornum(LexState*ls,TString*varname,int line){ +FuncState*fs=ls->fs; +int base=fs->freereg; +new_localvarliteral(ls,"(for index)",0); +new_localvarliteral(ls,"(for limit)",1); +new_localvarliteral(ls,"(for step)",2); +new_localvar(ls,varname,3); +checknext(ls,'='); +exp1(ls); +checknext(ls,','); +exp1(ls); +if(testnext(ls,',')) +exp1(ls); +else{ +luaK_codeABx(fs,OP_LOADK,fs->freereg,luaK_numberK(fs,1)); +luaK_reserveregs(fs,1); +} +forbody(ls,base,line,1,1); +} +static void forlist(LexState*ls,TString*indexname){ +FuncState*fs=ls->fs; +expdesc e; +int nvars=0; +int line; +int base=fs->freereg; +new_localvarliteral(ls,"(for generator)",nvars++); +new_localvarliteral(ls,"(for state)",nvars++); +new_localvarliteral(ls,"(for control)",nvars++); +new_localvar(ls,indexname,nvars++); +while(testnext(ls,',')) +new_localvar(ls,str_checkname(ls),nvars++); +checknext(ls,TK_IN); +line=ls->linenumber; +adjust_assign(ls,3,explist1(ls,&e),&e); +luaK_checkstack(fs,3); +forbody(ls,base,line,nvars-3,0); +} +static void forstat(LexState*ls,int line){ +FuncState*fs=ls->fs; +TString*varname; +BlockCnt bl; +enterblock(fs,&bl,1); +luaX_next(ls); +varname=str_checkname(ls); +switch(ls->t.token){ +case'=':fornum(ls,varname,line);break; +case',':case TK_IN:forlist(ls,varname);break; +default:luaX_syntaxerror(ls,LUA_QL("=")" or "LUA_QL("in")" expected"); +} +check_match(ls,TK_END,TK_FOR,line); +leaveblock(fs); +} +static int test_then_block(LexState*ls){ +int condexit; +luaX_next(ls); +condexit=cond(ls); +checknext(ls,TK_THEN); +block(ls); +return condexit; +} +static void ifstat(LexState*ls,int line){ +FuncState*fs=ls->fs; +int flist; +int escapelist=(-1); +flist=test_then_block(ls); +while(ls->t.token==TK_ELSEIF){ +luaK_concat(fs,&escapelist,luaK_jump(fs)); +luaK_patchtohere(fs,flist); +flist=test_then_block(ls); +} +if(ls->t.token==TK_ELSE){ +luaK_concat(fs,&escapelist,luaK_jump(fs)); +luaK_patchtohere(fs,flist); +luaX_next(ls); +block(ls); +} +else +luaK_concat(fs,&escapelist,flist); +luaK_patchtohere(fs,escapelist); +check_match(ls,TK_END,TK_IF,line); +} +static void localfunc(LexState*ls){ +expdesc v,b; +FuncState*fs=ls->fs; +new_localvar(ls,str_checkname(ls),0); +init_exp(&v,VLOCAL,fs->freereg); +luaK_reserveregs(fs,1); +adjustlocalvars(ls,1); +body(ls,&b,0,ls->linenumber); +luaK_storevar(fs,&v,&b); +getlocvar(fs,fs->nactvar-1).startpc=fs->pc; +} +static void localstat(LexState*ls){ +int nvars=0; +int nexps; +expdesc e; +do{ +new_localvar(ls,str_checkname(ls),nvars++); +}while(testnext(ls,',')); +if(testnext(ls,'=')) +nexps=explist1(ls,&e); +else{ +e.k=VVOID; +nexps=0; +} +adjust_assign(ls,nvars,nexps,&e); +adjustlocalvars(ls,nvars); +} +static int funcname(LexState*ls,expdesc*v){ +int needself=0; +singlevar(ls,v); +while(ls->t.token=='.') +field(ls,v); +if(ls->t.token==':'){ +needself=1; +field(ls,v); +} +return needself; +} +static void funcstat(LexState*ls,int line){ +int needself; +expdesc v,b; +luaX_next(ls); +needself=funcname(ls,&v); +body(ls,&b,needself,line); +luaK_storevar(ls->fs,&v,&b); +luaK_fixline(ls->fs,line); +} +static void exprstat(LexState*ls){ +FuncState*fs=ls->fs; +struct LHS_assign v; +primaryexp(ls,&v.v); +if(v.v.k==VCALL) +SETARG_C(getcode(fs,&v.v),1); +else{ +v.prev=NULL; +assignment(ls,&v,1); +} +} +static void retstat(LexState*ls){ +FuncState*fs=ls->fs; +expdesc e; +int first,nret; +luaX_next(ls); +if(block_follow(ls->t.token)||ls->t.token==';') +first=nret=0; +else{ +nret=explist1(ls,&e); +if(hasmultret(e.k)){ +luaK_setmultret(fs,&e); +if(e.k==VCALL&&nret==1){ +SET_OPCODE(getcode(fs,&e),OP_TAILCALL); +} +first=fs->nactvar; +nret=(-1); +} +else{ +if(nret==1) +first=luaK_exp2anyreg(fs,&e); +else{ +luaK_exp2nextreg(fs,&e); +first=fs->nactvar; +} +} +} +luaK_ret(fs,first,nret); +} +static int statement(LexState*ls){ +int line=ls->linenumber; +switch(ls->t.token){ +case TK_IF:{ +ifstat(ls,line); +return 0; +} +case TK_WHILE:{ +whilestat(ls,line); +return 0; +} +case TK_DO:{ +luaX_next(ls); +block(ls); +check_match(ls,TK_END,TK_DO,line); +return 0; +} +case TK_FOR:{ +forstat(ls,line); +return 0; +} +case TK_REPEAT:{ +repeatstat(ls,line); +return 0; +} +case TK_FUNCTION:{ +funcstat(ls,line); +return 0; +} +case TK_LOCAL:{ +luaX_next(ls); +if(testnext(ls,TK_FUNCTION)) +localfunc(ls); +else +localstat(ls); +return 0; +} +case TK_RETURN:{ +retstat(ls); +return 1; +} +case TK_BREAK:{ +luaX_next(ls); +breakstat(ls); +return 1; +} +default:{ +exprstat(ls); +return 0; +} +} +} +static void chunk(LexState*ls){ +int islast=0; +enterlevel(ls); +while(!islast&&!block_follow(ls->t.token)){ +islast=statement(ls); +testnext(ls,';'); +ls->fs->freereg=ls->fs->nactvar; +} +leavelevel(ls); +} +static const TValue*luaV_tonumber(const TValue*obj,TValue*n){ +lua_Number num; +if(ttisnumber(obj))return obj; +if(ttisstring(obj)&&luaO_str2d(svalue(obj),&num)){ +setnvalue(n,num); +return n; +} +else +return NULL; +} +static int luaV_tostring(lua_State*L,StkId obj){ +if(!ttisnumber(obj)) +return 0; +else{ +char s[32]; +lua_Number n=nvalue(obj); +lua_number2str(s,n); +setsvalue(L,obj,luaS_new(L,s)); +return 1; +} +} +static void callTMres(lua_State*L,StkId res,const TValue*f, +const TValue*p1,const TValue*p2){ +ptrdiff_t result=savestack(L,res); +setobj(L,L->top,f); +setobj(L,L->top+1,p1); +setobj(L,L->top+2,p2); +luaD_checkstack(L,3); +L->top+=3; +luaD_call(L,L->top-3,1); +res=restorestack(L,result); +L->top--; +setobj(L,res,L->top); +} +static void callTM(lua_State*L,const TValue*f,const TValue*p1, +const TValue*p2,const TValue*p3){ +setobj(L,L->top,f); +setobj(L,L->top+1,p1); +setobj(L,L->top+2,p2); +setobj(L,L->top+3,p3); +luaD_checkstack(L,4); +L->top+=4; +luaD_call(L,L->top-4,0); +} +static void luaV_gettable(lua_State*L,const TValue*t,TValue*key,StkId val){ +int loop; +for(loop=0;loop<100;loop++){ +const TValue*tm; +if(ttistable(t)){ +Table*h=hvalue(t); +const TValue*res=luaH_get(h,key); +if(!ttisnil(res)|| +(tm=fasttm(L,h->metatable,TM_INDEX))==NULL){ +setobj(L,val,res); +return; +} +} +else if(ttisnil(tm=luaT_gettmbyobj(L,t,TM_INDEX))) +luaG_typeerror(L,t,"index"); +if(ttisfunction(tm)){ +callTMres(L,val,tm,t,key); +return; +} +t=tm; +} +luaG_runerror(L,"loop in gettable"); +} +static void luaV_settable(lua_State*L,const TValue*t,TValue*key,StkId val){ +int loop; +TValue temp; +for(loop=0;loop<100;loop++){ +const TValue*tm; +if(ttistable(t)){ +Table*h=hvalue(t); +TValue*oldval=luaH_set(L,h,key); +if(!ttisnil(oldval)|| +(tm=fasttm(L,h->metatable,TM_NEWINDEX))==NULL){ +setobj(L,oldval,val); +h->flags=0; +luaC_barriert(L,h,val); +return; +} +} +else if(ttisnil(tm=luaT_gettmbyobj(L,t,TM_NEWINDEX))) +luaG_typeerror(L,t,"index"); +if(ttisfunction(tm)){ +callTM(L,tm,t,key,val); +return; +} +setobj(L,&temp,tm); +t=&temp; +} +luaG_runerror(L,"loop in settable"); +} +static int call_binTM(lua_State*L,const TValue*p1,const TValue*p2, +StkId res,TMS event){ +const TValue*tm=luaT_gettmbyobj(L,p1,event); +if(ttisnil(tm)) +tm=luaT_gettmbyobj(L,p2,event); +if(ttisnil(tm))return 0; +callTMres(L,res,tm,p1,p2); +return 1; +} +static const TValue*get_compTM(lua_State*L,Table*mt1,Table*mt2, +TMS event){ +const TValue*tm1=fasttm(L,mt1,event); +const TValue*tm2; +if(tm1==NULL)return NULL; +if(mt1==mt2)return tm1; +tm2=fasttm(L,mt2,event); +if(tm2==NULL)return NULL; +if(luaO_rawequalObj(tm1,tm2)) +return tm1; +return NULL; +} +static int call_orderTM(lua_State*L,const TValue*p1,const TValue*p2, +TMS event){ +const TValue*tm1=luaT_gettmbyobj(L,p1,event); +const TValue*tm2; +if(ttisnil(tm1))return-1; +tm2=luaT_gettmbyobj(L,p2,event); +if(!luaO_rawequalObj(tm1,tm2)) +return-1; +callTMres(L,L->top,tm1,p1,p2); +return!l_isfalse(L->top); +} +static int l_strcmp(const TString*ls,const TString*rs){ +const char*l=getstr(ls); +size_t ll=ls->tsv.len; +const char*r=getstr(rs); +size_t lr=rs->tsv.len; +for(;;){ +int temp=strcoll(l,r); +if(temp!=0)return temp; +else{ +size_t len=strlen(l); +if(len==lr) +return(len==ll)?0:1; +else if(len==ll) +return-1; +len++; +l+=len;ll-=len;r+=len;lr-=len; +} +} +} +static int luaV_lessthan(lua_State*L,const TValue*l,const TValue*r){ +int res; +if(ttype(l)!=ttype(r)) +return luaG_ordererror(L,l,r); +else if(ttisnumber(l)) +return luai_numlt(nvalue(l),nvalue(r)); +else if(ttisstring(l)) +return l_strcmp(rawtsvalue(l),rawtsvalue(r))<0; +else if((res=call_orderTM(L,l,r,TM_LT))!=-1) +return res; +return luaG_ordererror(L,l,r); +} +static int lessequal(lua_State*L,const TValue*l,const TValue*r){ +int res; +if(ttype(l)!=ttype(r)) +return luaG_ordererror(L,l,r); +else if(ttisnumber(l)) +return luai_numle(nvalue(l),nvalue(r)); +else if(ttisstring(l)) +return l_strcmp(rawtsvalue(l),rawtsvalue(r))<=0; +else if((res=call_orderTM(L,l,r,TM_LE))!=-1) +return res; +else if((res=call_orderTM(L,r,l,TM_LT))!=-1) +return!res; +return luaG_ordererror(L,l,r); +} +static int luaV_equalval(lua_State*L,const TValue*t1,const TValue*t2){ +const TValue*tm; +switch(ttype(t1)){ +case 0:return 1; +case 3:return luai_numeq(nvalue(t1),nvalue(t2)); +case 1:return bvalue(t1)==bvalue(t2); +case 2:return pvalue(t1)==pvalue(t2); +case 7:{ +if(uvalue(t1)==uvalue(t2))return 1; +tm=get_compTM(L,uvalue(t1)->metatable,uvalue(t2)->metatable, +TM_EQ); +break; +} +case 5:{ +if(hvalue(t1)==hvalue(t2))return 1; +tm=get_compTM(L,hvalue(t1)->metatable,hvalue(t2)->metatable,TM_EQ); +break; +} +default:return gcvalue(t1)==gcvalue(t2); +} +if(tm==NULL)return 0; +callTMres(L,L->top,tm,t1,t2); +return!l_isfalse(L->top); +} +static void luaV_concat(lua_State*L,int total,int last){ +do{ +StkId top=L->base+last+1; +int n=2; +if(!(ttisstring(top-2)||ttisnumber(top-2))||!tostring(L,top-1)){ +if(!call_binTM(L,top-2,top-1,top-2,TM_CONCAT)) +luaG_concaterror(L,top-2,top-1); +}else if(tsvalue(top-1)->len==0) +(void)tostring(L,top-2); +else{ +size_t tl=tsvalue(top-1)->len; +char*buffer; +int i; +for(n=1;nlen; +if(l>=((size_t)(~(size_t)0)-2)-tl)luaG_runerror(L,"string length overflow"); +tl+=l; +} +buffer=luaZ_openspace(L,&G(L)->buff,tl); +tl=0; +for(i=n;i>0;i--){ +size_t l=tsvalue(top-i)->len; +memcpy(buffer+tl,svalue(top-i),l); +tl+=l; +} +setsvalue(L,top-n,luaS_newlstr(L,buffer,tl)); +} +total-=n-1; +last-=n-1; +}while(total>1); +} +static void Arith(lua_State*L,StkId ra,const TValue*rb, +const TValue*rc,TMS op){ +TValue tempb,tempc; +const TValue*b,*c; +if((b=luaV_tonumber(rb,&tempb))!=NULL&& +(c=luaV_tonumber(rc,&tempc))!=NULL){ +lua_Number nb=nvalue(b),nc=nvalue(c); +switch(op){ +case TM_ADD:setnvalue(ra,luai_numadd(nb,nc));break; +case TM_SUB:setnvalue(ra,luai_numsub(nb,nc));break; +case TM_MUL:setnvalue(ra,luai_nummul(nb,nc));break; +case TM_DIV:setnvalue(ra,luai_numdiv(nb,nc));break; +case TM_MOD:setnvalue(ra,luai_nummod(nb,nc));break; +case TM_POW:setnvalue(ra,luai_numpow(nb,nc));break; +case TM_UNM:setnvalue(ra,luai_numunm(nb));break; +default:break; +} +} +else if(!call_binTM(L,rb,rc,ra,op)) +luaG_aritherror(L,rb,rc); +} +#define runtime_check(L,c){if(!(c))break;} +#define RA(i)(base+GETARG_A(i)) +#define RB(i)check_exp(getBMode(GET_OPCODE(i))==OpArgR,base+GETARG_B(i)) +#define RKB(i)check_exp(getBMode(GET_OPCODE(i))==OpArgK,ISK(GETARG_B(i))?k+INDEXK(GETARG_B(i)):base+GETARG_B(i)) +#define RKC(i)check_exp(getCMode(GET_OPCODE(i))==OpArgK,ISK(GETARG_C(i))?k+INDEXK(GETARG_C(i)):base+GETARG_C(i)) +#define KBx(i)check_exp(getBMode(GET_OPCODE(i))==OpArgK,k+GETARG_Bx(i)) +#define dojump(L,pc,i){(pc)+=(i);} +#define Protect(x){L->savedpc=pc;{x;};base=L->base;} +#define arith_op(op,tm){TValue*rb=RKB(i);TValue*rc=RKC(i);if(ttisnumber(rb)&&ttisnumber(rc)){lua_Number nb=nvalue(rb),nc=nvalue(rc);setnvalue(ra,op(nb,nc));}else Protect(Arith(L,ra,rb,rc,tm));} +static void luaV_execute(lua_State*L,int nexeccalls){ +LClosure*cl; +StkId base; +TValue*k; +const Instruction*pc; +reentry: +pc=L->savedpc; +cl=&clvalue(L->ci->func)->l; +base=L->base; +k=cl->p->k; +for(;;){ +const Instruction i=*pc++; +StkId ra; +ra=RA(i); +switch(GET_OPCODE(i)){ +case OP_MOVE:{ +setobj(L,ra,RB(i)); +continue; +} +case OP_LOADK:{ +setobj(L,ra,KBx(i)); +continue; +} +case OP_LOADBOOL:{ +setbvalue(ra,GETARG_B(i)); +if(GETARG_C(i))pc++; +continue; +} +case OP_LOADNIL:{ +TValue*rb=RB(i); +do{ +setnilvalue(rb--); +}while(rb>=ra); +continue; +} +case OP_GETUPVAL:{ +int b=GETARG_B(i); +setobj(L,ra,cl->upvals[b]->v); +continue; +} +case OP_GETGLOBAL:{ +TValue g; +TValue*rb=KBx(i); +sethvalue(L,&g,cl->env); +Protect(luaV_gettable(L,&g,rb,ra)); +continue; +} +case OP_GETTABLE:{ +Protect(luaV_gettable(L,RB(i),RKC(i),ra)); +continue; +} +case OP_SETGLOBAL:{ +TValue g; +sethvalue(L,&g,cl->env); +Protect(luaV_settable(L,&g,KBx(i),ra)); +continue; +} +case OP_SETUPVAL:{ +UpVal*uv=cl->upvals[GETARG_B(i)]; +setobj(L,uv->v,ra); +luaC_barrier(L,uv,ra); +continue; +} +case OP_SETTABLE:{ +Protect(luaV_settable(L,ra,RKB(i),RKC(i))); +continue; +} +case OP_NEWTABLE:{ +int b=GETARG_B(i); +int c=GETARG_C(i); +sethvalue(L,ra,luaH_new(L,luaO_fb2int(b),luaO_fb2int(c))); +Protect(luaC_checkGC(L)); +continue; +} +case OP_SELF:{ +StkId rb=RB(i); +setobj(L,ra+1,rb); +Protect(luaV_gettable(L,rb,RKC(i),ra)); +continue; +} +case OP_ADD:{ +arith_op(luai_numadd,TM_ADD); +continue; +} +case OP_SUB:{ +arith_op(luai_numsub,TM_SUB); +continue; +} +case OP_MUL:{ +arith_op(luai_nummul,TM_MUL); +continue; +} +case OP_DIV:{ +arith_op(luai_numdiv,TM_DIV); +continue; +} +case OP_MOD:{ +arith_op(luai_nummod,TM_MOD); +continue; +} +case OP_POW:{ +arith_op(luai_numpow,TM_POW); +continue; +} +case OP_UNM:{ +TValue*rb=RB(i); +if(ttisnumber(rb)){ +lua_Number nb=nvalue(rb); +setnvalue(ra,luai_numunm(nb)); +} +else{ +Protect(Arith(L,ra,rb,rb,TM_UNM)); +} +continue; +} +case OP_NOT:{ +int res=l_isfalse(RB(i)); +setbvalue(ra,res); +continue; +} +case OP_LEN:{ +const TValue*rb=RB(i); +switch(ttype(rb)){ +case 5:{ +setnvalue(ra,cast_num(luaH_getn(hvalue(rb)))); +break; +} +case 4:{ +setnvalue(ra,cast_num(tsvalue(rb)->len)); +break; +} +default:{ +Protect( +if(!call_binTM(L,rb,(&luaO_nilobject_),ra,TM_LEN)) +luaG_typeerror(L,rb,"get length of"); +) +} +} +continue; +} +case OP_CONCAT:{ +int b=GETARG_B(i); +int c=GETARG_C(i); +Protect(luaV_concat(L,c-b+1,c);luaC_checkGC(L)); +setobj(L,RA(i),base+b); +continue; +} +case OP_JMP:{ +dojump(L,pc,GETARG_sBx(i)); +continue; +} +case OP_EQ:{ +TValue*rb=RKB(i); +TValue*rc=RKC(i); +Protect( +if(equalobj(L,rb,rc)==GETARG_A(i)) +dojump(L,pc,GETARG_sBx(*pc)); +) +pc++; +continue; +} +case OP_LT:{ +Protect( +if(luaV_lessthan(L,RKB(i),RKC(i))==GETARG_A(i)) +dojump(L,pc,GETARG_sBx(*pc)); +) +pc++; +continue; +} +case OP_LE:{ +Protect( +if(lessequal(L,RKB(i),RKC(i))==GETARG_A(i)) +dojump(L,pc,GETARG_sBx(*pc)); +) +pc++; +continue; +} +case OP_TEST:{ +if(l_isfalse(ra)!=GETARG_C(i)) +dojump(L,pc,GETARG_sBx(*pc)); +pc++; +continue; +} +case OP_TESTSET:{ +TValue*rb=RB(i); +if(l_isfalse(rb)!=GETARG_C(i)){ +setobj(L,ra,rb); +dojump(L,pc,GETARG_sBx(*pc)); +} +pc++; +continue; +} +case OP_CALL:{ +int b=GETARG_B(i); +int nresults=GETARG_C(i)-1; +if(b!=0)L->top=ra+b; +L->savedpc=pc; +switch(luaD_precall(L,ra,nresults)){ +case 0:{ +nexeccalls++; +goto reentry; +} +case 1:{ +if(nresults>=0)L->top=L->ci->top; +base=L->base; +continue; +} +default:{ +return; +} +} +} +case OP_TAILCALL:{ +int b=GETARG_B(i); +if(b!=0)L->top=ra+b; +L->savedpc=pc; +switch(luaD_precall(L,ra,(-1))){ +case 0:{ +CallInfo*ci=L->ci-1; +int aux; +StkId func=ci->func; +StkId pfunc=(ci+1)->func; +if(L->openupval)luaF_close(L,ci->base); +L->base=ci->base=ci->func+((ci+1)->base-pfunc); +for(aux=0;pfunc+auxtop;aux++) +setobj(L,func+aux,pfunc+aux); +ci->top=L->top=func+aux; +ci->savedpc=L->savedpc; +ci->tailcalls++; +L->ci--; +goto reentry; +} +case 1:{ +base=L->base; +continue; +} +default:{ +return; +} +} +} +case OP_RETURN:{ +int b=GETARG_B(i); +if(b!=0)L->top=ra+b-1; +if(L->openupval)luaF_close(L,base); +L->savedpc=pc; +b=luaD_poscall(L,ra); +if(--nexeccalls==0) +return; +else{ +if(b)L->top=L->ci->top; +goto reentry; +} +} +case OP_FORLOOP:{ +lua_Number step=nvalue(ra+2); +lua_Number idx=luai_numadd(nvalue(ra),step); +lua_Number limit=nvalue(ra+1); +if(luai_numlt(0,step)?luai_numle(idx,limit) +:luai_numle(limit,idx)){ +dojump(L,pc,GETARG_sBx(i)); +setnvalue(ra,idx); +setnvalue(ra+3,idx); +} +continue; +} +case OP_FORPREP:{ +const TValue*init=ra; +const TValue*plimit=ra+1; +const TValue*pstep=ra+2; +L->savedpc=pc; +if(!tonumber(init,ra)) +luaG_runerror(L,LUA_QL("for")" initial value must be a number"); +else if(!tonumber(plimit,ra+1)) +luaG_runerror(L,LUA_QL("for")" limit must be a number"); +else if(!tonumber(pstep,ra+2)) +luaG_runerror(L,LUA_QL("for")" step must be a number"); +setnvalue(ra,luai_numsub(nvalue(ra),nvalue(pstep))); +dojump(L,pc,GETARG_sBx(i)); +continue; +} +case OP_TFORLOOP:{ +StkId cb=ra+3; +setobj(L,cb+2,ra+2); +setobj(L,cb+1,ra+1); +setobj(L,cb,ra); +L->top=cb+3; +Protect(luaD_call(L,cb,GETARG_C(i))); +L->top=L->ci->top; +cb=RA(i)+3; +if(!ttisnil(cb)){ +setobj(L,cb-1,cb); +dojump(L,pc,GETARG_sBx(*pc)); +} +pc++; +continue; +} +case OP_SETLIST:{ +int n=GETARG_B(i); +int c=GETARG_C(i); +int last; +Table*h; +if(n==0){ +n=cast_int(L->top-ra)-1; +L->top=L->ci->top; +} +if(c==0)c=cast_int(*pc++); +runtime_check(L,ttistable(ra)); +h=hvalue(ra); +last=((c-1)*50)+n; +if(last>h->sizearray) +luaH_resizearray(L,h,last); +for(;n>0;n--){ +TValue*val=ra+n; +setobj(L,luaH_setnum(L,h,last--),val); +luaC_barriert(L,h,val); +} +continue; +} +case OP_CLOSE:{ +luaF_close(L,ra); +continue; +} +case OP_CLOSURE:{ +Proto*p; +Closure*ncl; +int nup,j; +p=cl->p->p[GETARG_Bx(i)]; +nup=p->nups; +ncl=luaF_newLclosure(L,nup,cl->env); +ncl->l.p=p; +for(j=0;jl.upvals[j]=cl->upvals[GETARG_B(*pc)]; +else{ +ncl->l.upvals[j]=luaF_findupval(L,base+GETARG_B(*pc)); +} +} +setclvalue(L,ra,ncl); +Protect(luaC_checkGC(L)); +continue; +} +case OP_VARARG:{ +int b=GETARG_B(i)-1; +int j; +CallInfo*ci=L->ci; +int n=cast_int(ci->base-ci->func)-cl->p->numparams-1; +if(b==(-1)){ +Protect(luaD_checkstack(L,n)); +ra=RA(i); +b=n; +L->top=ra+n; +} +for(j=0;jbase-n+j); +} +else{ +setnilvalue(ra+j); +} +} +continue; +} +} +} +} +#define api_checknelems(L,n)luai_apicheck(L,(n)<=(L->top-L->base)) +#define api_checkvalidindex(L,i)luai_apicheck(L,(i)!=(&luaO_nilobject_)) +#define api_incr_top(L){luai_apicheck(L,L->topci->top);L->top++;} +static TValue*index2adr(lua_State*L,int idx){ +if(idx>0){ +TValue*o=L->base+(idx-1); +luai_apicheck(L,idx<=L->ci->top-L->base); +if(o>=L->top)return cast(TValue*,(&luaO_nilobject_)); +else return o; +} +else if(idx>(-10000)){ +luai_apicheck(L,idx!=0&&-idx<=L->top-L->base); +return L->top+idx; +} +else switch(idx){ +case(-10000):return registry(L); +case(-10001):{ +Closure*func=curr_func(L); +sethvalue(L,&L->env,func->c.env); +return&L->env; +} +case(-10002):return gt(L); +default:{ +Closure*func=curr_func(L); +idx=(-10002)-idx; +return(idx<=func->c.nupvalues) +?&func->c.upvalue[idx-1] +:cast(TValue*,(&luaO_nilobject_)); +} +} +} +static Table*getcurrenv(lua_State*L){ +if(L->ci==L->base_ci) +return hvalue(gt(L)); +else{ +Closure*func=curr_func(L); +return func->c.env; +} +} +static int lua_checkstack(lua_State*L,int size){ +int res=1; +if(size>8000||(L->top-L->base+size)>8000) +res=0; +else if(size>0){ +luaD_checkstack(L,size); +if(L->ci->toptop+size) +L->ci->top=L->top+size; +} +return res; +} +static lua_CFunction lua_atpanic(lua_State*L,lua_CFunction panicf){ +lua_CFunction old; +old=G(L)->panic; +G(L)->panic=panicf; +return old; +} +static int lua_gettop(lua_State*L){ +return cast_int(L->top-L->base); +} +static void lua_settop(lua_State*L,int idx){ +if(idx>=0){ +luai_apicheck(L,idx<=L->stack_last-L->base); +while(L->topbase+idx) +setnilvalue(L->top++); +L->top=L->base+idx; +} +else{ +luai_apicheck(L,-(idx+1)<=(L->top-L->base)); +L->top+=idx+1; +} +} +static void lua_remove(lua_State*L,int idx){ +StkId p; +p=index2adr(L,idx); +api_checkvalidindex(L,p); +while(++ptop)setobj(L,p-1,p); +L->top--; +} +static void lua_insert(lua_State*L,int idx){ +StkId p; +StkId q; +p=index2adr(L,idx); +api_checkvalidindex(L,p); +for(q=L->top;q>p;q--)setobj(L,q,q-1); +setobj(L,p,L->top); +} +static void lua_replace(lua_State*L,int idx){ +StkId o; +if(idx==(-10001)&&L->ci==L->base_ci) +luaG_runerror(L,"no calling environment"); +api_checknelems(L,1); +o=index2adr(L,idx); +api_checkvalidindex(L,o); +if(idx==(-10001)){ +Closure*func=curr_func(L); +luai_apicheck(L,ttistable(L->top-1)); +func->c.env=hvalue(L->top-1); +luaC_barrier(L,func,L->top-1); +} +else{ +setobj(L,o,L->top-1); +if(idx<(-10002)) +luaC_barrier(L,curr_func(L),L->top-1); +} +L->top--; +} +static void lua_pushvalue(lua_State*L,int idx){ +setobj(L,L->top,index2adr(L,idx)); +api_incr_top(L); +} +static int lua_type(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +return(o==(&luaO_nilobject_))?(-1):ttype(o); +} +static const char*lua_typename(lua_State*L,int t){ +UNUSED(L); +return(t==(-1))?"no value":luaT_typenames[t]; +} +static int lua_iscfunction(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +return iscfunction(o); +} +static int lua_isnumber(lua_State*L,int idx){ +TValue n; +const TValue*o=index2adr(L,idx); +return tonumber(o,&n); +} +static int lua_isstring(lua_State*L,int idx){ +int t=lua_type(L,idx); +return(t==4||t==3); +} +static int lua_rawequal(lua_State*L,int index1,int index2){ +StkId o1=index2adr(L,index1); +StkId o2=index2adr(L,index2); +return(o1==(&luaO_nilobject_)||o2==(&luaO_nilobject_))?0 +:luaO_rawequalObj(o1,o2); +} +static int lua_lessthan(lua_State*L,int index1,int index2){ +StkId o1,o2; +int i; +o1=index2adr(L,index1); +o2=index2adr(L,index2); +i=(o1==(&luaO_nilobject_)||o2==(&luaO_nilobject_))?0 +:luaV_lessthan(L,o1,o2); +return i; +} +static lua_Number lua_tonumber(lua_State*L,int idx){ +TValue n; +const TValue*o=index2adr(L,idx); +if(tonumber(o,&n)) +return nvalue(o); +else +return 0; +} +static lua_Integer lua_tointeger(lua_State*L,int idx){ +TValue n; +const TValue*o=index2adr(L,idx); +if(tonumber(o,&n)){ +lua_Integer res; +lua_Number num=nvalue(o); +lua_number2integer(res,num); +return res; +} +else +return 0; +} +static int lua_toboolean(lua_State*L,int idx){ +const TValue*o=index2adr(L,idx); +return!l_isfalse(o); +} +static const char*lua_tolstring(lua_State*L,int idx,size_t*len){ +StkId o=index2adr(L,idx); +if(!ttisstring(o)){ +if(!luaV_tostring(L,o)){ +if(len!=NULL)*len=0; +return NULL; +} +luaC_checkGC(L); +o=index2adr(L,idx); +} +if(len!=NULL)*len=tsvalue(o)->len; +return svalue(o); +} +static size_t lua_objlen(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +switch(ttype(o)){ +case 4:return tsvalue(o)->len; +case 7:return uvalue(o)->len; +case 5:return luaH_getn(hvalue(o)); +case 3:{ +size_t l; +l=(luaV_tostring(L,o)?tsvalue(o)->len:0); +return l; +} +default:return 0; +} +} +static lua_CFunction lua_tocfunction(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +return(!iscfunction(o))?NULL:clvalue(o)->c.f; +} +static void*lua_touserdata(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +switch(ttype(o)){ +case 7:return(rawuvalue(o)+1); +case 2:return pvalue(o); +default:return NULL; +} +} +static void lua_pushnil(lua_State*L){ +setnilvalue(L->top); +api_incr_top(L); +} +static void lua_pushnumber(lua_State*L,lua_Number n){ +setnvalue(L->top,n); +api_incr_top(L); +} +static void lua_pushinteger(lua_State*L,lua_Integer n){ +setnvalue(L->top,cast_num(n)); +api_incr_top(L); +} +static void lua_pushlstring(lua_State*L,const char*s,size_t len){ +luaC_checkGC(L); +setsvalue(L,L->top,luaS_newlstr(L,s,len)); +api_incr_top(L); +} +static void lua_pushstring(lua_State*L,const char*s){ +if(s==NULL) +lua_pushnil(L); +else +lua_pushlstring(L,s,strlen(s)); +} +static const char*lua_pushvfstring(lua_State*L,const char*fmt, +va_list argp){ +const char*ret; +luaC_checkGC(L); +ret=luaO_pushvfstring(L,fmt,argp); +return ret; +} +static const char*lua_pushfstring(lua_State*L,const char*fmt,...){ +const char*ret; +va_list argp; +luaC_checkGC(L); +va_start(argp,fmt); +ret=luaO_pushvfstring(L,fmt,argp); +va_end(argp); +return ret; +} +static void lua_pushcclosure(lua_State*L,lua_CFunction fn,int n){ +Closure*cl; +luaC_checkGC(L); +api_checknelems(L,n); +cl=luaF_newCclosure(L,n,getcurrenv(L)); +cl->c.f=fn; +L->top-=n; +while(n--) +setobj(L,&cl->c.upvalue[n],L->top+n); +setclvalue(L,L->top,cl); +api_incr_top(L); +} +static void lua_pushboolean(lua_State*L,int b){ +setbvalue(L->top,(b!=0)); +api_incr_top(L); +} +static int lua_pushthread(lua_State*L){ +setthvalue(L,L->top,L); +api_incr_top(L); +return(G(L)->mainthread==L); +} +static void lua_gettable(lua_State*L,int idx){ +StkId t; +t=index2adr(L,idx); +api_checkvalidindex(L,t); +luaV_gettable(L,t,L->top-1,L->top-1); +} +static void lua_getfield(lua_State*L,int idx,const char*k){ +StkId t; +TValue key; +t=index2adr(L,idx); +api_checkvalidindex(L,t); +setsvalue(L,&key,luaS_new(L,k)); +luaV_gettable(L,t,&key,L->top); +api_incr_top(L); +} +static void lua_rawget(lua_State*L,int idx){ +StkId t; +t=index2adr(L,idx); +luai_apicheck(L,ttistable(t)); +setobj(L,L->top-1,luaH_get(hvalue(t),L->top-1)); +} +static void lua_rawgeti(lua_State*L,int idx,int n){ +StkId o; +o=index2adr(L,idx); +luai_apicheck(L,ttistable(o)); +setobj(L,L->top,luaH_getnum(hvalue(o),n)); +api_incr_top(L); +} +static void lua_createtable(lua_State*L,int narray,int nrec){ +luaC_checkGC(L); +sethvalue(L,L->top,luaH_new(L,narray,nrec)); +api_incr_top(L); +} +static int lua_getmetatable(lua_State*L,int objindex){ +const TValue*obj; +Table*mt=NULL; +int res; +obj=index2adr(L,objindex); +switch(ttype(obj)){ +case 5: +mt=hvalue(obj)->metatable; +break; +case 7: +mt=uvalue(obj)->metatable; +break; +default: +mt=G(L)->mt[ttype(obj)]; +break; +} +if(mt==NULL) +res=0; +else{ +sethvalue(L,L->top,mt); +api_incr_top(L); +res=1; +} +return res; +} +static void lua_getfenv(lua_State*L,int idx){ +StkId o; +o=index2adr(L,idx); +api_checkvalidindex(L,o); +switch(ttype(o)){ +case 6: +sethvalue(L,L->top,clvalue(o)->c.env); +break; +case 7: +sethvalue(L,L->top,uvalue(o)->env); +break; +case 8: +setobj(L,L->top,gt(thvalue(o))); +break; +default: +setnilvalue(L->top); +break; +} +api_incr_top(L); +} +static void lua_settable(lua_State*L,int idx){ +StkId t; +api_checknelems(L,2); +t=index2adr(L,idx); +api_checkvalidindex(L,t); +luaV_settable(L,t,L->top-2,L->top-1); +L->top-=2; +} +static void lua_setfield(lua_State*L,int idx,const char*k){ +StkId t; +TValue key; +api_checknelems(L,1); +t=index2adr(L,idx); +api_checkvalidindex(L,t); +setsvalue(L,&key,luaS_new(L,k)); +luaV_settable(L,t,&key,L->top-1); +L->top--; +} +static void lua_rawset(lua_State*L,int idx){ +StkId t; +api_checknelems(L,2); +t=index2adr(L,idx); +luai_apicheck(L,ttistable(t)); +setobj(L,luaH_set(L,hvalue(t),L->top-2),L->top-1); +luaC_barriert(L,hvalue(t),L->top-1); +L->top-=2; +} +static void lua_rawseti(lua_State*L,int idx,int n){ +StkId o; +api_checknelems(L,1); +o=index2adr(L,idx); +luai_apicheck(L,ttistable(o)); +setobj(L,luaH_setnum(L,hvalue(o),n),L->top-1); +luaC_barriert(L,hvalue(o),L->top-1); +L->top--; +} +static int lua_setmetatable(lua_State*L,int objindex){ +TValue*obj; +Table*mt; +api_checknelems(L,1); +obj=index2adr(L,objindex); +api_checkvalidindex(L,obj); +if(ttisnil(L->top-1)) +mt=NULL; +else{ +luai_apicheck(L,ttistable(L->top-1)); +mt=hvalue(L->top-1); +} +switch(ttype(obj)){ +case 5:{ +hvalue(obj)->metatable=mt; +if(mt) +luaC_objbarriert(L,hvalue(obj),mt); +break; +} +case 7:{ +uvalue(obj)->metatable=mt; +if(mt) +luaC_objbarrier(L,rawuvalue(obj),mt); +break; +} +default:{ +G(L)->mt[ttype(obj)]=mt; +break; +} +} +L->top--; +return 1; +} +static int lua_setfenv(lua_State*L,int idx){ +StkId o; +int res=1; +api_checknelems(L,1); +o=index2adr(L,idx); +api_checkvalidindex(L,o); +luai_apicheck(L,ttistable(L->top-1)); +switch(ttype(o)){ +case 6: +clvalue(o)->c.env=hvalue(L->top-1); +break; +case 7: +uvalue(o)->env=hvalue(L->top-1); +break; +case 8: +sethvalue(L,gt(thvalue(o)),hvalue(L->top-1)); +break; +default: +res=0; +break; +} +if(res)luaC_objbarrier(L,gcvalue(o),hvalue(L->top-1)); +L->top--; +return res; +} +#define adjustresults(L,nres){if(nres==(-1)&&L->top>=L->ci->top)L->ci->top=L->top;} +#define checkresults(L,na,nr)luai_apicheck(L,(nr)==(-1)||(L->ci->top-L->top>=(nr)-(na))) +static void lua_call(lua_State*L,int nargs,int nresults){ +StkId func; +api_checknelems(L,nargs+1); +checkresults(L,nargs,nresults); +func=L->top-(nargs+1); +luaD_call(L,func,nresults); +adjustresults(L,nresults); +} +struct CallS{ +StkId func; +int nresults; +}; +static void f_call(lua_State*L,void*ud){ +struct CallS*c=cast(struct CallS*,ud); +luaD_call(L,c->func,c->nresults); +} +static int lua_pcall(lua_State*L,int nargs,int nresults,int errfunc){ +struct CallS c; +int status; +ptrdiff_t func; +api_checknelems(L,nargs+1); +checkresults(L,nargs,nresults); +if(errfunc==0) +func=0; +else{ +StkId o=index2adr(L,errfunc); +api_checkvalidindex(L,o); +func=savestack(L,o); +} +c.func=L->top-(nargs+1); +c.nresults=nresults; +status=luaD_pcall(L,f_call,&c,savestack(L,c.func),func); +adjustresults(L,nresults); +return status; +} +static int lua_load(lua_State*L,lua_Reader reader,void*data, +const char*chunkname){ +ZIO z; +int status; +if(!chunkname)chunkname="?"; +luaZ_init(L,&z,reader,data); +status=luaD_protectedparser(L,&z,chunkname); +return status; +} +static int lua_error(lua_State*L){ +api_checknelems(L,1); +luaG_errormsg(L); +return 0; +} +static int lua_next(lua_State*L,int idx){ +StkId t; +int more; +t=index2adr(L,idx); +luai_apicheck(L,ttistable(t)); +more=luaH_next(L,hvalue(t),L->top-1); +if(more){ +api_incr_top(L); +} +else +L->top-=1; +return more; +} +static void lua_concat(lua_State*L,int n){ +api_checknelems(L,n); +if(n>=2){ +luaC_checkGC(L); +luaV_concat(L,n,cast_int(L->top-L->base)-1); +L->top-=(n-1); +} +else if(n==0){ +setsvalue(L,L->top,luaS_newlstr(L,"",0)); +api_incr_top(L); +} +} +static void*lua_newuserdata(lua_State*L,size_t size){ +Udata*u; +luaC_checkGC(L); +u=luaS_newudata(L,size,getcurrenv(L)); +setuvalue(L,L->top,u); +api_incr_top(L); +return u+1; +} +#define luaL_getn(L,i)((int)lua_objlen(L,i)) +#define luaL_setn(L,i,j)((void)0) +typedef struct luaL_Reg{ +const char*name; +lua_CFunction func; +}luaL_Reg; +static void luaI_openlib(lua_State*L,const char*libname, +const luaL_Reg*l,int nup); +static int luaL_argerror(lua_State*L,int numarg,const char*extramsg); +static const char* luaL_checklstring(lua_State*L,int numArg, +size_t*l); +static const char* luaL_optlstring(lua_State*L,int numArg, +const char*def,size_t*l); +static lua_Integer luaL_checkinteger(lua_State*L,int numArg); +static lua_Integer luaL_optinteger(lua_State*L,int nArg, +lua_Integer def); +static int luaL_error(lua_State*L,const char*fmt,...); +static const char* luaL_findtable(lua_State*L,int idx, +const char*fname,int szhint); +#define luaL_argcheck(L,cond,numarg,extramsg)((void)((cond)||luaL_argerror(L,(numarg),(extramsg)))) +#define luaL_checkstring(L,n)(luaL_checklstring(L,(n),NULL)) +#define luaL_optstring(L,n,d)(luaL_optlstring(L,(n),(d),NULL)) +#define luaL_checkint(L,n)((int)luaL_checkinteger(L,(n))) +#define luaL_optint(L,n,d)((int)luaL_optinteger(L,(n),(d))) +#define luaL_typename(L,i)lua_typename(L,lua_type(L,(i))) +#define luaL_getmetatable(L,n)(lua_getfield(L,(-10000),(n))) +#define luaL_opt(L,f,n,d)(lua_isnoneornil(L,(n))?(d):f(L,(n))) +typedef struct luaL_Buffer{ +char*p; +int lvl; +lua_State*L; +char buffer[BUFSIZ]; +}luaL_Buffer; +#define luaL_addchar(B,c)((void)((B)->p<((B)->buffer+BUFSIZ)||luaL_prepbuffer(B)),(*(B)->p++=(char)(c))) +#define luaL_addsize(B,n)((B)->p+=(n)) +static char* luaL_prepbuffer(luaL_Buffer*B); +static int luaL_argerror(lua_State*L,int narg,const char*extramsg){ +lua_Debug ar; +if(!lua_getstack(L,0,&ar)) +return luaL_error(L,"bad argument #%d (%s)",narg,extramsg); +lua_getinfo(L,"n",&ar); +if(strcmp(ar.namewhat,"method")==0){ +narg--; +if(narg==0) +return luaL_error(L,"calling "LUA_QL("%s")" on bad self (%s)", +ar.name,extramsg); +} +if(ar.name==NULL) +ar.name="?"; +return luaL_error(L,"bad argument #%d to "LUA_QL("%s")" (%s)", +narg,ar.name,extramsg); +} +static int luaL_typerror(lua_State*L,int narg,const char*tname){ +const char*msg=lua_pushfstring(L,"%s expected, got %s", +tname,luaL_typename(L,narg)); +return luaL_argerror(L,narg,msg); +} +static void tag_error(lua_State*L,int narg,int tag){ +luaL_typerror(L,narg,lua_typename(L,tag)); +} +static void luaL_where(lua_State*L,int level){ +lua_Debug ar; +if(lua_getstack(L,level,&ar)){ +lua_getinfo(L,"Sl",&ar); +if(ar.currentline>0){ +lua_pushfstring(L,"%s:%d: ",ar.short_src,ar.currentline); +return; +} +} +lua_pushliteral(L,""); +} +static int luaL_error(lua_State*L,const char*fmt,...){ +va_list argp; +va_start(argp,fmt); +luaL_where(L,1); +lua_pushvfstring(L,fmt,argp); +va_end(argp); +lua_concat(L,2); +return lua_error(L); +} +static int luaL_newmetatable(lua_State*L,const char*tname){ +lua_getfield(L,(-10000),tname); +if(!lua_isnil(L,-1)) +return 0; +lua_pop(L,1); +lua_newtable(L); +lua_pushvalue(L,-1); +lua_setfield(L,(-10000),tname); +return 1; +} +static void*luaL_checkudata(lua_State*L,int ud,const char*tname){ +void*p=lua_touserdata(L,ud); +if(p!=NULL){ +if(lua_getmetatable(L,ud)){ +lua_getfield(L,(-10000),tname); +if(lua_rawequal(L,-1,-2)){ +lua_pop(L,2); +return p; +} +} +} +luaL_typerror(L,ud,tname); +return NULL; +} +static void luaL_checkstack(lua_State*L,int space,const char*mes){ +if(!lua_checkstack(L,space)) +luaL_error(L,"stack overflow (%s)",mes); +} +static void luaL_checktype(lua_State*L,int narg,int t){ +if(lua_type(L,narg)!=t) +tag_error(L,narg,t); +} +static void luaL_checkany(lua_State*L,int narg){ +if(lua_type(L,narg)==(-1)) +luaL_argerror(L,narg,"value expected"); +} +static const char*luaL_checklstring(lua_State*L,int narg,size_t*len){ +const char*s=lua_tolstring(L,narg,len); +if(!s)tag_error(L,narg,4); +return s; +} +static const char*luaL_optlstring(lua_State*L,int narg, +const char*def,size_t*len){ +if(lua_isnoneornil(L,narg)){ +if(len) +*len=(def?strlen(def):0); +return def; +} +else return luaL_checklstring(L,narg,len); +} +static lua_Number luaL_checknumber(lua_State*L,int narg){ +lua_Number d=lua_tonumber(L,narg); +if(d==0&&!lua_isnumber(L,narg)) +tag_error(L,narg,3); +return d; +} +static lua_Integer luaL_checkinteger(lua_State*L,int narg){ +lua_Integer d=lua_tointeger(L,narg); +if(d==0&&!lua_isnumber(L,narg)) +tag_error(L,narg,3); +return d; +} +static lua_Integer luaL_optinteger(lua_State*L,int narg, +lua_Integer def){ +return luaL_opt(L,luaL_checkinteger,narg,def); +} +static int luaL_getmetafield(lua_State*L,int obj,const char*event){ +if(!lua_getmetatable(L,obj)) +return 0; +lua_pushstring(L,event); +lua_rawget(L,-2); +if(lua_isnil(L,-1)){ +lua_pop(L,2); +return 0; +} +else{ +lua_remove(L,-2); +return 1; +} +} +static void luaL_register(lua_State*L,const char*libname, +const luaL_Reg*l){ +luaI_openlib(L,libname,l,0); +} +static int libsize(const luaL_Reg*l){ +int size=0; +for(;l->name;l++)size++; +return size; +} +static void luaI_openlib(lua_State*L,const char*libname, +const luaL_Reg*l,int nup){ +if(libname){ +int size=libsize(l); +luaL_findtable(L,(-10000),"_LOADED",1); +lua_getfield(L,-1,libname); +if(!lua_istable(L,-1)){ +lua_pop(L,1); +if(luaL_findtable(L,(-10002),libname,size)!=NULL) +luaL_error(L,"name conflict for module "LUA_QL("%s"),libname); +lua_pushvalue(L,-1); +lua_setfield(L,-3,libname); +} +lua_remove(L,-2); +lua_insert(L,-(nup+1)); +} +for(;l->name;l++){ +int i; +for(i=0;ifunc,nup); +lua_setfield(L,-(nup+2),l->name); +} +lua_pop(L,nup); +} +static const char*luaL_findtable(lua_State*L,int idx, +const char*fname,int szhint){ +const char*e; +lua_pushvalue(L,idx); +do{ +e=strchr(fname,'.'); +if(e==NULL)e=fname+strlen(fname); +lua_pushlstring(L,fname,e-fname); +lua_rawget(L,-2); +if(lua_isnil(L,-1)){ +lua_pop(L,1); +lua_createtable(L,0,(*e=='.'?1:szhint)); +lua_pushlstring(L,fname,e-fname); +lua_pushvalue(L,-2); +lua_settable(L,-4); +} +else if(!lua_istable(L,-1)){ +lua_pop(L,2); +return fname; +} +lua_remove(L,-2); +fname=e+1; +}while(*e=='.'); +return NULL; +} +#define bufflen(B)((B)->p-(B)->buffer) +#define bufffree(B)((size_t)(BUFSIZ-bufflen(B))) +static int emptybuffer(luaL_Buffer*B){ +size_t l=bufflen(B); +if(l==0)return 0; +else{ +lua_pushlstring(B->L,B->buffer,l); +B->p=B->buffer; +B->lvl++; +return 1; +} +} +static void adjuststack(luaL_Buffer*B){ +if(B->lvl>1){ +lua_State*L=B->L; +int toget=1; +size_t toplen=lua_strlen(L,-1); +do{ +size_t l=lua_strlen(L,-(toget+1)); +if(B->lvl-toget+1>=(20/2)||toplen>l){ +toplen+=l; +toget++; +} +else break; +}while(togetlvl); +lua_concat(L,toget); +B->lvl=B->lvl-toget+1; +} +} +static char*luaL_prepbuffer(luaL_Buffer*B){ +if(emptybuffer(B)) +adjuststack(B); +return B->buffer; +} +static void luaL_addlstring(luaL_Buffer*B,const char*s,size_t l){ +while(l--) +luaL_addchar(B,*s++); +} +static void luaL_pushresult(luaL_Buffer*B){ +emptybuffer(B); +lua_concat(B->L,B->lvl); +B->lvl=1; +} +static void luaL_addvalue(luaL_Buffer*B){ +lua_State*L=B->L; +size_t vl; +const char*s=lua_tolstring(L,-1,&vl); +if(vl<=bufffree(B)){ +memcpy(B->p,s,vl); +B->p+=vl; +lua_pop(L,1); +} +else{ +if(emptybuffer(B)) +lua_insert(L,-2); +B->lvl++; +adjuststack(B); +} +} +static void luaL_buffinit(lua_State*L,luaL_Buffer*B){ +B->L=L; +B->p=B->buffer; +B->lvl=0; +} +typedef struct LoadF{ +int extraline; +FILE*f; +char buff[BUFSIZ]; +}LoadF; +static const char*getF(lua_State*L,void*ud,size_t*size){ +LoadF*lf=(LoadF*)ud; +(void)L; +if(lf->extraline){ +lf->extraline=0; +*size=1; +return"\n"; +} +if(feof(lf->f))return NULL; +*size=fread(lf->buff,1,sizeof(lf->buff),lf->f); +return(*size>0)?lf->buff:NULL; +} +static int errfile(lua_State*L,const char*what,int fnameindex){ +const char*serr=strerror(errno); +const char*filename=lua_tostring(L,fnameindex)+1; +lua_pushfstring(L,"cannot %s %s: %s",what,filename,serr); +lua_remove(L,fnameindex); +return(5+1); +} +static int luaL_loadfile(lua_State*L,const char*filename){ +LoadF lf; +int status,readstatus; +int c; +int fnameindex=lua_gettop(L)+1; +lf.extraline=0; +if(filename==NULL){ +lua_pushliteral(L,"=stdin"); +lf.f=stdin; +} +else{ +lua_pushfstring(L,"@%s",filename); +lf.f=fopen(filename,"r"); +if(lf.f==NULL)return errfile(L,"open",fnameindex); +} +c=getc(lf.f); +if(c=='#'){ +lf.extraline=1; +while((c=getc(lf.f))!=EOF&&c!='\n'); +if(c=='\n')c=getc(lf.f); +} +if(c=="\033Lua"[0]&&filename){ +lf.f=freopen(filename,"rb",lf.f); +if(lf.f==NULL)return errfile(L,"reopen",fnameindex); +while((c=getc(lf.f))!=EOF&&c!="\033Lua"[0]); +lf.extraline=0; +} +ungetc(c,lf.f); +status=lua_load(L,getF,&lf,lua_tostring(L,-1)); +readstatus=ferror(lf.f); +if(filename)fclose(lf.f); +if(readstatus){ +lua_settop(L,fnameindex); +return errfile(L,"read",fnameindex); +} +lua_remove(L,fnameindex); +return status; +} +typedef struct LoadS{ +const char*s; +size_t size; +}LoadS; +static const char*getS(lua_State*L,void*ud,size_t*size){ +LoadS*ls=(LoadS*)ud; +(void)L; +if(ls->size==0)return NULL; +*size=ls->size; +ls->size=0; +return ls->s; +} +static int luaL_loadbuffer(lua_State*L,const char*buff,size_t size, +const char*name){ +LoadS ls; +ls.s=buff; +ls.size=size; +return lua_load(L,getS,&ls,name); +} +static void*l_alloc(void*ud,void*ptr,size_t osize,size_t nsize){ +(void)ud; +(void)osize; +if(nsize==0){ +free(ptr); +return NULL; +} +else +return realloc(ptr,nsize); +} +static int panic(lua_State*L){ +(void)L; +fprintf(stderr,"PANIC: unprotected error in call to Lua API (%s)\n", +lua_tostring(L,-1)); +return 0; +} +static lua_State*luaL_newstate(void){ +lua_State*L=lua_newstate(l_alloc,NULL); +if(L)lua_atpanic(L,&panic); +return L; +} +static int luaB_tonumber(lua_State*L){ +int base=luaL_optint(L,2,10); +if(base==10){ +luaL_checkany(L,1); +if(lua_isnumber(L,1)){ +lua_pushnumber(L,lua_tonumber(L,1)); +return 1; +} +} +else{ +const char*s1=luaL_checkstring(L,1); +char*s2; +unsigned long n; +luaL_argcheck(L,2<=base&&base<=36,2,"base out of range"); +n=strtoul(s1,&s2,base); +if(s1!=s2){ +while(isspace((unsigned char)(*s2)))s2++; +if(*s2=='\0'){ +lua_pushnumber(L,(lua_Number)n); +return 1; +} +} +} +lua_pushnil(L); +return 1; +} +static int luaB_error(lua_State*L){ +int level=luaL_optint(L,2,1); +lua_settop(L,1); +if(lua_isstring(L,1)&&level>0){ +luaL_where(L,level); +lua_pushvalue(L,1); +lua_concat(L,2); +} +return lua_error(L); +} +static int luaB_setmetatable(lua_State*L){ +int t=lua_type(L,2); +luaL_checktype(L,1,5); +luaL_argcheck(L,t==0||t==5,2, +"nil or table expected"); +if(luaL_getmetafield(L,1,"__metatable")) +luaL_error(L,"cannot change a protected metatable"); +lua_settop(L,2); +lua_setmetatable(L,1); +return 1; +} +static void getfunc(lua_State*L,int opt){ +if(lua_isfunction(L,1))lua_pushvalue(L,1); +else{ +lua_Debug ar; +int level=opt?luaL_optint(L,1,1):luaL_checkint(L,1); +luaL_argcheck(L,level>=0,1,"level must be non-negative"); +if(lua_getstack(L,level,&ar)==0) +luaL_argerror(L,1,"invalid level"); +lua_getinfo(L,"f",&ar); +if(lua_isnil(L,-1)) +luaL_error(L,"no function environment for tail call at level %d", +level); +} +} +static int luaB_setfenv(lua_State*L){ +luaL_checktype(L,2,5); +getfunc(L,0); +lua_pushvalue(L,2); +if(lua_isnumber(L,1)&&lua_tonumber(L,1)==0){ +lua_pushthread(L); +lua_insert(L,-2); +lua_setfenv(L,-2); +return 0; +} +else if(lua_iscfunction(L,-2)||lua_setfenv(L,-2)==0) +luaL_error(L, +LUA_QL("setfenv")" cannot change environment of given object"); +return 1; +} +static int luaB_rawget(lua_State*L){ +luaL_checktype(L,1,5); +luaL_checkany(L,2); +lua_settop(L,2); +lua_rawget(L,1); +return 1; +} +static int luaB_type(lua_State*L){ +luaL_checkany(L,1); +lua_pushstring(L,luaL_typename(L,1)); +return 1; +} +static int luaB_next(lua_State*L){ +luaL_checktype(L,1,5); +lua_settop(L,2); +if(lua_next(L,1)) +return 2; +else{ +lua_pushnil(L); +return 1; +} +} +static int luaB_pairs(lua_State*L){ +luaL_checktype(L,1,5); +lua_pushvalue(L,lua_upvalueindex(1)); +lua_pushvalue(L,1); +lua_pushnil(L); +return 3; +} +static int ipairsaux(lua_State*L){ +int i=luaL_checkint(L,2); +luaL_checktype(L,1,5); +i++; +lua_pushinteger(L,i); +lua_rawgeti(L,1,i); +return(lua_isnil(L,-1))?0:2; +} +static int luaB_ipairs(lua_State*L){ +luaL_checktype(L,1,5); +lua_pushvalue(L,lua_upvalueindex(1)); +lua_pushvalue(L,1); +lua_pushinteger(L,0); +return 3; +} +static int load_aux(lua_State*L,int status){ +if(status==0) +return 1; +else{ +lua_pushnil(L); +lua_insert(L,-2); +return 2; +} +} +static int luaB_loadstring(lua_State*L){ +size_t l; +const char*s=luaL_checklstring(L,1,&l); +const char*chunkname=luaL_optstring(L,2,s); +return load_aux(L,luaL_loadbuffer(L,s,l,chunkname)); +} +static int luaB_loadfile(lua_State*L){ +const char*fname=luaL_optstring(L,1,NULL); +return load_aux(L,luaL_loadfile(L,fname)); +} +static int luaB_assert(lua_State*L){ +luaL_checkany(L,1); +if(!lua_toboolean(L,1)) +return luaL_error(L,"%s",luaL_optstring(L,2,"assertion failed!")); +return lua_gettop(L); +} +static int luaB_unpack(lua_State*L){ +int i,e,n; +luaL_checktype(L,1,5); +i=luaL_optint(L,2,1); +e=luaL_opt(L,luaL_checkint,3,luaL_getn(L,1)); +if(i>e)return 0; +n=e-i+1; +if(n<=0||!lua_checkstack(L,n)) +return luaL_error(L,"too many results to unpack"); +lua_rawgeti(L,1,i); +while(i++e)e=pos; +for(i=e;i>pos;i--){ +lua_rawgeti(L,1,i-1); +lua_rawseti(L,1,i); +} +break; +} +default:{ +return luaL_error(L,"wrong number of arguments to "LUA_QL("insert")); +} +} +luaL_setn(L,1,e); +lua_rawseti(L,1,pos); +return 0; +} +static int tremove(lua_State*L){ +int e=aux_getn(L,1); +int pos=luaL_optint(L,2,e); +if(!(1<=pos&&pos<=e)) +return 0; +luaL_setn(L,1,e-1); +lua_rawgeti(L,1,pos); +for(;posu)luaL_error(L,"invalid order function for sorting"); +lua_pop(L,1); +} +while(lua_rawgeti(L,1,--j),sort_comp(L,-3,-1)){ +if(j0); +} +l=strlen(p); +if(l==0||p[l-1]!='\n') +luaL_addsize(&b,l); +else{ +luaL_addsize(&b,l-1); +luaL_pushresult(&b); +return 1; +} +} +} +static int read_chars(lua_State*L,FILE*f,size_t n){ +size_t rlen; +size_t nr; +luaL_Buffer b; +luaL_buffinit(L,&b); +rlen=BUFSIZ; +do{ +char*p=luaL_prepbuffer(&b); +if(rlen>n)rlen=n; +nr=fread(p,sizeof(char),rlen,f); +luaL_addsize(&b,nr); +n-=nr; +}while(n>0&&nr==rlen); +luaL_pushresult(&b); +return(n==0||lua_objlen(L,-1)>0); +} +static int g_read(lua_State*L,FILE*f,int first){ +int nargs=lua_gettop(L)-1; +int success; +int n; +clearerr(f); +if(nargs==0){ +success=read_line(L,f); +n=first+1; +} +else{ +luaL_checkstack(L,nargs+20,"too many arguments"); +success=1; +for(n=first;nargs--&&success;n++){ +if(lua_type(L,n)==3){ +size_t l=(size_t)lua_tointeger(L,n); +success=(l==0)?test_eof(L,f):read_chars(L,f,l); +} +else{ +const char*p=lua_tostring(L,n); +luaL_argcheck(L,p&&p[0]=='*',n,"invalid option"); +switch(p[1]){ +case'n': +success=read_number(L,f); +break; +case'l': +success=read_line(L,f); +break; +case'a': +read_chars(L,f,~((size_t)0)); +success=1; +break; +default: +return luaL_argerror(L,n,"invalid format"); +} +} +} +} +if(ferror(f)) +return pushresult(L,0,NULL); +if(!success){ +lua_pop(L,1); +lua_pushnil(L); +} +return n-first; +} +static int io_read(lua_State*L){ +return g_read(L,getiofile(L,1),1); +} +static int f_read(lua_State*L){ +return g_read(L,tofile(L),2); +} +static int io_readline(lua_State*L){ +FILE*f=*(FILE**)lua_touserdata(L,lua_upvalueindex(1)); +int success; +if(f==NULL) +luaL_error(L,"file is already closed"); +success=read_line(L,f); +if(ferror(f)) +return luaL_error(L,"%s",strerror(errno)); +if(success)return 1; +else{ +if(lua_toboolean(L,lua_upvalueindex(2))){ +lua_settop(L,0); +lua_pushvalue(L,lua_upvalueindex(1)); +aux_close(L); +} +return 0; +} +} +static int g_write(lua_State*L,FILE*f,int arg){ +int nargs=lua_gettop(L)-1; +int status=1; +for(;nargs--;arg++){ +if(lua_type(L,arg)==3){ +status=status&& +fprintf(f,"%.14g",lua_tonumber(L,arg))>0; +} +else{ +size_t l; +const char*s=luaL_checklstring(L,arg,&l); +status=status&&(fwrite(s,sizeof(char),l,f)==l); +} +} +return pushresult(L,status,NULL); +} +static int io_write(lua_State*L){ +return g_write(L,getiofile(L,2),1); +} +static int f_write(lua_State*L){ +return g_write(L,tofile(L),2); +} +static int io_flush(lua_State*L){ +return pushresult(L,fflush(getiofile(L,2))==0,NULL); +} +static int f_flush(lua_State*L){ +return pushresult(L,fflush(tofile(L))==0,NULL); +} +static const luaL_Reg iolib[]={ +{"close",io_close}, +{"flush",io_flush}, +{"input",io_input}, +{"lines",io_lines}, +{"open",io_open}, +{"output",io_output}, +{"read",io_read}, +{"type",io_type}, +{"write",io_write}, +{NULL,NULL} +}; +static const luaL_Reg flib[]={ +{"close",io_close}, +{"flush",f_flush}, +{"lines",f_lines}, +{"read",f_read}, +{"write",f_write}, +{"__gc",io_gc}, +{NULL,NULL} +}; +static void createmeta(lua_State*L){ +luaL_newmetatable(L,"FILE*"); +lua_pushvalue(L,-1); +lua_setfield(L,-2,"__index"); +luaL_register(L,NULL,flib); +} +static void createstdfile(lua_State*L,FILE*f,int k,const char*fname){ +*newfile(L)=f; +if(k>0){ +lua_pushvalue(L,-1); +lua_rawseti(L,(-10001),k); +} +lua_pushvalue(L,-2); +lua_setfenv(L,-2); +lua_setfield(L,-3,fname); +} +static void newfenv(lua_State*L,lua_CFunction cls){ +lua_createtable(L,0,1); +lua_pushcfunction(L,cls); +lua_setfield(L,-2,"__close"); +} +static int luaopen_io(lua_State*L){ +createmeta(L); +newfenv(L,io_fclose); +lua_replace(L,(-10001)); +luaL_register(L,"io",iolib); +newfenv(L,io_noclose); +createstdfile(L,stdin,1,"stdin"); +createstdfile(L,stdout,2,"stdout"); +createstdfile(L,stderr,0,"stderr"); +lua_pop(L,1); +lua_getfield(L,-1,"popen"); +newfenv(L,io_pclose); +lua_setfenv(L,-2); +lua_pop(L,1); +return 1; +} +static int os_pushresult(lua_State*L,int i,const char*filename){ +int en=errno; +if(i){ +lua_pushboolean(L,1); +return 1; +} +else{ +lua_pushnil(L); +lua_pushfstring(L,"%s: %s",filename,strerror(en)); +lua_pushinteger(L,en); +return 3; +} +} +static int os_remove(lua_State*L){ +const char*filename=luaL_checkstring(L,1); +return os_pushresult(L,remove(filename)==0,filename); +} +static int os_exit(lua_State*L){ +exit(luaL_optint(L,1,EXIT_SUCCESS)); +} +static const luaL_Reg syslib[]={ +{"exit",os_exit}, +{"remove",os_remove}, +{NULL,NULL} +}; +static int luaopen_os(lua_State*L){ +luaL_register(L,"os",syslib); +return 1; +} +#define uchar(c)((unsigned char)(c)) +static ptrdiff_t posrelat(ptrdiff_t pos,size_t len){ +if(pos<0)pos+=(ptrdiff_t)len+1; +return(pos>=0)?pos:0; +} +static int str_sub(lua_State*L){ +size_t l; +const char*s=luaL_checklstring(L,1,&l); +ptrdiff_t start=posrelat(luaL_checkinteger(L,2),l); +ptrdiff_t end=posrelat(luaL_optinteger(L,3,-1),l); +if(start<1)start=1; +if(end>(ptrdiff_t)l)end=(ptrdiff_t)l; +if(start<=end) +lua_pushlstring(L,s+start-1,end-start+1); +else lua_pushliteral(L,""); +return 1; +} +static int str_lower(lua_State*L){ +size_t l; +size_t i; +luaL_Buffer b; +const char*s=luaL_checklstring(L,1,&l); +luaL_buffinit(L,&b); +for(i=0;i0) +luaL_addlstring(&b,s,l); +luaL_pushresult(&b); +return 1; +} +static int str_byte(lua_State*L){ +size_t l; +const char*s=luaL_checklstring(L,1,&l); +ptrdiff_t posi=posrelat(luaL_optinteger(L,2,1),l); +ptrdiff_t pose=posrelat(luaL_optinteger(L,3,posi),l); +int n,i; +if(posi<=0)posi=1; +if((size_t)pose>l)pose=l; +if(posi>pose)return 0; +n=(int)(pose-posi+1); +if(posi+n<=pose) +luaL_error(L,"string slice too long"); +luaL_checkstack(L,n,"string slice too long"); +for(i=0;i=ms->level||ms->capture[l].len==(-1)) +return luaL_error(ms->L,"invalid capture index"); +return l; +} +static int capture_to_close(MatchState*ms){ +int level=ms->level; +for(level--;level>=0;level--) +if(ms->capture[level].len==(-1))return level; +return luaL_error(ms->L,"invalid pattern capture"); +} +static const char*classend(MatchState*ms,const char*p){ +switch(*p++){ +case'%':{ +if(*p=='\0') +luaL_error(ms->L,"malformed pattern (ends with "LUA_QL("%%")")"); +return p+1; +} +case'[':{ +if(*p=='^')p++; +do{ +if(*p=='\0') +luaL_error(ms->L,"malformed pattern (missing "LUA_QL("]")")"); +if(*(p++)=='%'&&*p!='\0') +p++; +}while(*p!=']'); +return p+1; +} +default:{ +return p; +} +} +} +static int match_class(int c,int cl){ +int res; +switch(tolower(cl)){ +case'a':res=isalpha(c);break; +case'c':res=iscntrl(c);break; +case'd':res=isdigit(c);break; +case'l':res=islower(c);break; +case'p':res=ispunct(c);break; +case's':res=isspace(c);break; +case'u':res=isupper(c);break; +case'w':res=isalnum(c);break; +case'x':res=isxdigit(c);break; +case'z':res=(c==0);break; +default:return(cl==c); +} +return(islower(cl)?res:!res); +} +static int matchbracketclass(int c,const char*p,const char*ec){ +int sig=1; +if(*(p+1)=='^'){ +sig=0; +p++; +} +while(++pL,"unbalanced pattern"); +if(*s!=*p)return NULL; +else{ +int b=*p; +int e=*(p+1); +int cont=1; +while(++ssrc_end){ +if(*s==e){ +if(--cont==0)return s+1; +} +else if(*s==b)cont++; +} +} +return NULL; +} +static const char*max_expand(MatchState*ms,const char*s, +const char*p,const char*ep){ +ptrdiff_t i=0; +while((s+i)src_end&&singlematch(uchar(*(s+i)),p,ep)) +i++; +while(i>=0){ +const char*res=match(ms,(s+i),ep+1); +if(res)return res; +i--; +} +return NULL; +} +static const char*min_expand(MatchState*ms,const char*s, +const char*p,const char*ep){ +for(;;){ +const char*res=match(ms,s,ep+1); +if(res!=NULL) +return res; +else if(ssrc_end&&singlematch(uchar(*s),p,ep)) +s++; +else return NULL; +} +} +static const char*start_capture(MatchState*ms,const char*s, +const char*p,int what){ +const char*res; +int level=ms->level; +if(level>=32)luaL_error(ms->L,"too many captures"); +ms->capture[level].init=s; +ms->capture[level].len=what; +ms->level=level+1; +if((res=match(ms,s,p))==NULL) +ms->level--; +return res; +} +static const char*end_capture(MatchState*ms,const char*s, +const char*p){ +int l=capture_to_close(ms); +const char*res; +ms->capture[l].len=s-ms->capture[l].init; +if((res=match(ms,s,p))==NULL) +ms->capture[l].len=(-1); +return res; +} +static const char*match_capture(MatchState*ms,const char*s,int l){ +size_t len; +l=check_capture(ms,l); +len=ms->capture[l].len; +if((size_t)(ms->src_end-s)>=len&& +memcmp(ms->capture[l].init,s,len)==0) +return s+len; +else return NULL; +} +static const char*match(MatchState*ms,const char*s,const char*p){ +init: +switch(*p){ +case'(':{ +if(*(p+1)==')') +return start_capture(ms,s,p+2,(-2)); +else +return start_capture(ms,s,p+1,(-1)); +} +case')':{ +return end_capture(ms,s,p+1); +} +case'%':{ +switch(*(p+1)){ +case'b':{ +s=matchbalance(ms,s,p+2); +if(s==NULL)return NULL; +p+=4;goto init; +} +case'f':{ +const char*ep;char previous; +p+=2; +if(*p!='[') +luaL_error(ms->L,"missing "LUA_QL("[")" after " +LUA_QL("%%f")" in pattern"); +ep=classend(ms,p); +previous=(s==ms->src_init)?'\0':*(s-1); +if(matchbracketclass(uchar(previous),p,ep-1)|| +!matchbracketclass(uchar(*s),p,ep-1))return NULL; +p=ep;goto init; +} +default:{ +if(isdigit(uchar(*(p+1)))){ +s=match_capture(ms,s,uchar(*(p+1))); +if(s==NULL)return NULL; +p+=2;goto init; +} +goto dflt; +} +} +} +case'\0':{ +return s; +} +case'$':{ +if(*(p+1)=='\0') +return(s==ms->src_end)?s:NULL; +else goto dflt; +} +default:dflt:{ +const char*ep=classend(ms,p); +int m=ssrc_end&&singlematch(uchar(*s),p,ep); +switch(*ep){ +case'?':{ +const char*res; +if(m&&((res=match(ms,s+1,ep+1))!=NULL)) +return res; +p=ep+1;goto init; +} +case'*':{ +return max_expand(ms,s,p,ep); +} +case'+':{ +return(m?max_expand(ms,s+1,p,ep):NULL); +} +case'-':{ +return min_expand(ms,s,p,ep); +} +default:{ +if(!m)return NULL; +s++;p=ep;goto init; +} +} +} +} +} +static const char*lmemfind(const char*s1,size_t l1, +const char*s2,size_t l2){ +if(l2==0)return s1; +else if(l2>l1)return NULL; +else{ +const char*init; +l2--; +l1=l1-l2; +while(l1>0&&(init=(const char*)memchr(s1,*s2,l1))!=NULL){ +init++; +if(memcmp(init,s2+1,l2)==0) +return init-1; +else{ +l1-=init-s1; +s1=init; +} +} +return NULL; +} +} +static void push_onecapture(MatchState*ms,int i,const char*s, +const char*e){ +if(i>=ms->level){ +if(i==0) +lua_pushlstring(ms->L,s,e-s); +else +luaL_error(ms->L,"invalid capture index"); +} +else{ +ptrdiff_t l=ms->capture[i].len; +if(l==(-1))luaL_error(ms->L,"unfinished capture"); +if(l==(-2)) +lua_pushinteger(ms->L,ms->capture[i].init-ms->src_init+1); +else +lua_pushlstring(ms->L,ms->capture[i].init,l); +} +} +static int push_captures(MatchState*ms,const char*s,const char*e){ +int i; +int nlevels=(ms->level==0&&s)?1:ms->level; +luaL_checkstack(ms->L,nlevels,"too many captures"); +for(i=0;il1)init=(ptrdiff_t)l1; +if(find&&(lua_toboolean(L,4)|| +strpbrk(p,"^$*+?.([%-")==NULL)){ +const char*s2=lmemfind(s+init,l1-init,p,l2); +if(s2){ +lua_pushinteger(L,s2-s+1); +lua_pushinteger(L,s2-s+l2); +return 2; +} +} +else{ +MatchState ms; +int anchor=(*p=='^')?(p++,1):0; +const char*s1=s+init; +ms.L=L; +ms.src_init=s; +ms.src_end=s+l1; +do{ +const char*res; +ms.level=0; +if((res=match(&ms,s1,p))!=NULL){ +if(find){ +lua_pushinteger(L,s1-s+1); +lua_pushinteger(L,res-s); +return push_captures(&ms,NULL,0)+2; +} +else +return push_captures(&ms,s1,res); +} +}while(s1++L,3,&l); +for(i=0;iL; +switch(lua_type(L,3)){ +case 3: +case 4:{ +add_s(ms,b,s,e); +return; +} +case 6:{ +int n; +lua_pushvalue(L,3); +n=push_captures(ms,s,e); +lua_call(L,n,1); +break; +} +case 5:{ +push_onecapture(ms,0,s,e); +lua_gettable(L,3); +break; +} +} +if(!lua_toboolean(L,-1)){ +lua_pop(L,1); +lua_pushlstring(L,s,e-s); +} +else if(!lua_isstring(L,-1)) +luaL_error(L,"invalid replacement value (a %s)",luaL_typename(L,-1)); +luaL_addvalue(b); +} +static int str_gsub(lua_State*L){ +size_t srcl; +const char*src=luaL_checklstring(L,1,&srcl); +const char*p=luaL_checkstring(L,2); +int tr=lua_type(L,3); +int max_s=luaL_optint(L,4,srcl+1); +int anchor=(*p=='^')?(p++,1):0; +int n=0; +MatchState ms; +luaL_Buffer b; +luaL_argcheck(L,tr==3||tr==4|| +tr==6||tr==5,3, +"string/function/table expected"); +luaL_buffinit(L,&b); +ms.L=L; +ms.src_init=src; +ms.src_end=src+srcl; +while(nsrc) +src=e; +else if(src=sizeof("-+ #0")) +luaL_error(L,"invalid format (repeated flags)"); +if(isdigit(uchar(*p)))p++; +if(isdigit(uchar(*p)))p++; +if(*p=='.'){ +p++; +if(isdigit(uchar(*p)))p++; +if(isdigit(uchar(*p)))p++; +} +if(isdigit(uchar(*p))) +luaL_error(L,"invalid format (width or precision too long)"); +*(form++)='%'; +strncpy(form,strfrmt,p-strfrmt+1); +form+=p-strfrmt+1; +*form='\0'; +return p; +} +static void addintlen(char*form){ +size_t l=strlen(form); +char spec=form[l-1]; +strcpy(form+l-1,"l"); +form[l+sizeof("l")-2]=spec; +form[l+sizeof("l")-1]='\0'; +} +static int str_format(lua_State*L){ +int top=lua_gettop(L); +int arg=1; +size_t sfl; +const char*strfrmt=luaL_checklstring(L,arg,&sfl); +const char*strfrmt_end=strfrmt+sfl; +luaL_Buffer b; +luaL_buffinit(L,&b); +while(strfrmttop) +luaL_argerror(L,arg,"no value"); +strfrmt=scanformat(L,strfrmt,form); +switch(*strfrmt++){ +case'c':{ +sprintf(buff,form,(int)luaL_checknumber(L,arg)); +break; +} +case'd':case'i':{ +addintlen(form); +sprintf(buff,form,(long)luaL_checknumber(L,arg)); +break; +} +case'o':case'u':case'x':case'X':{ +addintlen(form); +sprintf(buff,form,(unsigned long)luaL_checknumber(L,arg)); +break; +} +case'e':case'E':case'f': +case'g':case'G':{ +sprintf(buff,form,(double)luaL_checknumber(L,arg)); +break; +} +case'q':{ +addquoted(L,&b,arg); +continue; +} +case's':{ +size_t l; +const char*s=luaL_checklstring(L,arg,&l); +if(!strchr(form,'.')&&l>=100){ +lua_pushvalue(L,arg); +luaL_addvalue(&b); +continue; +} +else{ +sprintf(buff,form,s); +break; +} +} +default:{ +return luaL_error(L,"invalid option "LUA_QL("%%%c")" to " +LUA_QL("format"),*(strfrmt-1)); +} +} +luaL_addlstring(&b,buff,strlen(buff)); +} +} +luaL_pushresult(&b); +return 1; +} +static const luaL_Reg strlib[]={ +{"byte",str_byte}, +{"char",str_char}, +{"find",str_find}, +{"format",str_format}, +{"gmatch",gmatch}, +{"gsub",str_gsub}, +{"lower",str_lower}, +{"match",str_match}, +{"rep",str_rep}, +{"sub",str_sub}, +{"upper",str_upper}, +{NULL,NULL} +}; +static void createmetatable(lua_State*L){ +lua_createtable(L,0,1); +lua_pushliteral(L,""); +lua_pushvalue(L,-2); +lua_setmetatable(L,-2); +lua_pop(L,1); +lua_pushvalue(L,-2); +lua_setfield(L,-2,"__index"); +lua_pop(L,1); +} +static int luaopen_string(lua_State*L){ +luaL_register(L,"string",strlib); +createmetatable(L); +return 1; +} +static const luaL_Reg lualibs[]={ +{"",luaopen_base}, +{"table",luaopen_table}, +{"io",luaopen_io}, +{"os",luaopen_os}, +{"string",luaopen_string}, +{NULL,NULL} +}; +static void luaL_openlibs(lua_State*L){ +const luaL_Reg*lib=lualibs; +for(;lib->func;lib++){ +lua_pushcfunction(L,lib->func); +lua_pushstring(L,lib->name); +lua_call(L,1,0); +} +} +typedef unsigned int UB; +static UB barg(lua_State*L,int idx){ +union{lua_Number n;U64 b;}bn; +bn.n=lua_tonumber(L,idx)+6755399441055744.0; +if(bn.n==0.0&&!lua_isnumber(L,idx))luaL_typerror(L,idx,"number"); +return(UB)bn.b; +} +#define BRET(b)lua_pushnumber(L,(lua_Number)(int)(b));return 1; +static int tobit(lua_State*L){ +BRET(barg(L,1))} +static int bnot(lua_State*L){ +BRET(~barg(L,1))} +static int band(lua_State*L){ +int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b&=barg(L,i);BRET(b)} +static int bor(lua_State*L){ +int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b|=barg(L,i);BRET(b)} +static int bxor(lua_State*L){ +int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b^=barg(L,i);BRET(b)} +static int lshift(lua_State*L){ +UB b=barg(L,1),n=barg(L,2)&31;BRET(b<>n)} +static int arshift(lua_State*L){ +UB b=barg(L,1),n=barg(L,2)&31;BRET((int)b>>n)} +static int rol(lua_State*L){ +UB b=barg(L,1),n=barg(L,2)&31;BRET((b<>(32-n)))} +static int ror(lua_State*L){ +UB b=barg(L,1),n=barg(L,2)&31;BRET((b>>n)|(b<<(32-n)))} +static int bswap(lua_State*L){ +UB b=barg(L,1);b=(b>>24)|((b>>8)&0xff00)|((b&0xff00)<<8)|(b<<24);BRET(b)} +static int tohex(lua_State*L){ +UB b=barg(L,1); +int n=lua_isnone(L,2)?8:(int)barg(L,2); +const char*hexdigits="0123456789abcdef"; +char buf[8]; +int i; +if(n<0){n=-n;hexdigits="0123456789ABCDEF";} +if(n>8)n=8; +for(i=(int)n;--i>=0;){buf[i]=hexdigits[b&15];b>>=4;} +lua_pushlstring(L,buf,(size_t)n); +return 1; +} +static const struct luaL_Reg bitlib[]={ +{"tobit",tobit}, +{"bnot",bnot}, +{"band",band}, +{"bor",bor}, +{"bxor",bxor}, +{"lshift",lshift}, +{"rshift",rshift}, +{"arshift",arshift}, +{"rol",rol}, +{"ror",ror}, +{"bswap",bswap}, +{"tohex",tohex}, +{NULL,NULL} +}; +int main(int argc,char**argv){ +lua_State*L=luaL_newstate(); +int i; +luaL_openlibs(L); +luaL_register(L,"bit",bitlib); +if(argc<2)return sizeof(void*); +lua_createtable(L,0,1); +lua_pushstring(L,argv[1]); +lua_rawseti(L,-2,0); +lua_setglobal(L,"arg"); +if(luaL_loadfile(L,argv[1])) +goto err; +for(i=2;i + * + * Based on Mike Pall's implementation for LuaJIT. + */ + +#include "ir.h" +#include + +#include "ir_strtab.c" + +#define MAX_RULES 2048 +#define MAX_SLOTS (MAX_RULES * 4) + +static ir_strtab strtab; + +void print_hash(uint32_t *mask, uint32_t count) +{ + uint32_t i; + + printf("static const uint32_t _ir_fold_hash[%d] = {\n", count); + for (i = 0; i < count; i++) { + printf("\t0x%08x,\n", mask[i]); + } + printf("};\n\n"); +} + +static uint32_t hash_shl2(uint32_t mask, uint32_t r1, uint32_t r2) +{ + return ((mask << r1) - mask) << r2; +} + +#if 0 +#define ir_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) +#define ir_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n))) + +static uint32_t hash_rol2(uint32_t mask, uint32_t r1, uint32_t r2) +{ + return ir_rol((ir_rol(mask, r1) - mask), r2); +} +#endif + +/* Find a perfect hash function */ +int find_hash(uint32_t *mask, uint32_t count) +{ + uint32_t hash[MAX_SLOTS]; + uint32_t n, r1, r2, i, h; + + for (n = (count | 1); n < MAX_SLOTS; n += 2) { + for (r1 = 0; r1 < 31; r1++) { + for (r2 = 0; r2 < 32; r2++) { + memset(hash, 0, n * sizeof(uint32_t)); + for (i = 0; i < count; i++) { + h = hash_shl2(mask[i] & 0x1fffff, r1, r2) % n; + if (hash[h]) break; /* collision */ + hash[h] = mask[i]; + } + if (i == count) { + print_hash(hash, n); + printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn (((h << %d) - h) << %d) %% %d;\n}\n", r1, r2, n); + return 1; + } +#if 0 + memset(hash, 0, n * sizeof(uint32_t)); + for (i = 0; i < count; i++) { + h = hash_rol2(mask[i] & 0x1fffff, r1, r2) % n; + if (hash[h]) break; /* collision */ + hash[h] = mask[i]; + } + if (i == count) { + print_hash(hash, n); + printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\nreturn 0; /*rol2(%u,%u,%u)*/\n}\n", r1, r2, n); + return 1; + } +#endif + } + } + } + + hash[0] = 0; + print_hash(hash, 1); + printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn 0;\n}\n"); + return 0; +} + +static int find_op(const char *s, size_t len) +{ + return ir_strtab_find(&strtab, s, (uint8_t)len) - 1; +} + +static int parse_rule(const char *buf) +{ + const char *p = buf + sizeof("IR_FOLD(") - 1; + const char *q; + int op, mask; + + while (*p == ' ' || *p == '\t') { + p++; + } + if (*p < 'A' || *p > 'Z') { + return 0; + } + q = p + 1; + while ((*q >= 'A' && *q <= 'Z') + || (*q >= '0' && *q <= '9') + || *q == '_') { + q++; + } + op = find_op(p, q - p); + if (op < 0) { + return 0; + } + mask = op; + + while (*q == ' ' || *q == '\t') { + q++; + } + if (*q == ')') { + return mask; /* unused operands */ + } else if (*q != '(') { + return 0; + } + + p = q + 1; + while (*p == ' ' || *p == '\t') { + p++; + } + if (*p == '_') { + q = p + 1; + } else if (*p >= 'A' && *p <= 'Z') { + q = p + 1; + while ((*q >= 'A' && *q <= 'Z') + || (*q >= '0' && *q <= '9') + || *q == '_') { + q++; + } + op = find_op(p, q - p); + if (op < 0) { + return 0; + } + mask |= op << 7; + } else { + return 0; + } + + while (*q == ' ' || *q == '\t') { + q++; + } + if (*q == ')') { + return mask; /* unused op2 */ + } else if (*q != ',') { + return 0; + } + + p = q + 1; + while (*p == ' ' || *p == '\t') { + p++; + } + if (*p == '_') { + q = p + 1; + } else if (*p >= 'A' && *p <= 'Z') { + q = p + 1; + while ((*q >= 'A' && *q <= 'Z') + || (*q >= '0' && *q <= '9') + || *q == '_') { + q++; + } + op = find_op(p, q - p); + if (op < 0) { + return 0; + } + mask |= op << 14; + } else { + return 0; + } + + while (*q == ' ' || *q == '\t') { + q++; + } + if (*q != ')') { + return 0; + } + + q++; + while (*q == ' ' || *q == '\t') { + q++; + } + if (*q != ')') { + return 0; + } + + return mask; +} + +int main() +{ + char buf[4096]; + FILE *f = stdin; + int line = 0; + int rules = 0; + int i; + uint32_t mask[MAX_RULES]; + uint32_t rule[MAX_RULES]; + + ir_strtab_init(&strtab, IR_LAST_OP, 0); + +#define IR_OP_ADD(name, flags, op1, op2, op3) \ + ir_strtab_lookup(&strtab, #name, sizeof(#name) - 1, IR_ ## name + 1); + + IR_OPS(IR_OP_ADD) + + while (fgets(buf, sizeof(buf) - 1, f)) { + size_t len = strlen(buf); + if (len > 0 && (buf[len - 1] == '\r' || buf[len - 1] == '\n')) { + buf[len - 1] = 0; + len--; + line++; + } + if (len >= sizeof("IR_FOLD(")-1 + && memcmp(buf, "IR_FOLD(", sizeof("IR_FOLD(")-1) == 0) { + if (rules >= MAX_RULES) { + fprintf(stderr, "ERROR: Too many rules\n"); + return 1; + } + i = parse_rule(buf); + if (!i) { + fprintf(stderr, "ERROR: Incorrect '%s' rule on line %d\n", buf, line); + return 1; + } + // TODO: few masks may share the same rule ??? + rule[rules] = line; + mask[rules] = i | (rules << 21); + rules++; + } + } + ir_strtab_free(&strtab); + +#if 0 + for (i = 0; i < rules; i++) { + printf("0x%08x\n", mask[i]); + } +#endif + + printf("/* This file is generated from \"ir_fold.h\". Do not edit! */\n\n"); + printf("typedef enum _ir_fold_rule_id {\n"); + for (i = 0; i < rules; i++) { + printf("\tIR_RULE_%d,\n", rule[i]); + } + printf("\t_IR_RULE_LAST\n"); + printf("} ir_fold_rule_id;\n\n"); + + if (!find_hash(mask, rules)) { + fprintf(stderr, "ERROR: Cannot find a good hash function\n"); + return 1; + } + + return 0; +} diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c new file mode 100644 index 00000000000..d41c3803e51 --- /dev/null +++ b/ext/opcache/jit/ir/ir.c @@ -0,0 +1,2322 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR construction, folding, utilities) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * The logical IR representation is based on Cliff Click's Sea of Nodes. + * See: C. Click, M. Paleczny. "A Simple Graph-Based Intermediate + * Representation" In ACM SIGPLAN Workshop on Intermediate Representations + * (IR '95), pages 35-49, Jan. 1995. + * + * The physical IR representation is based on Mike Pall's LuaJIT IR. + * See: M. Pall. "LuaJIT 2.0 intellectual property disclosure and research + * opportunities" November 2009 http://lua-users.org/lists/lua-l/2009-11/msg00089.html + */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#ifndef _WIN32 +# include +#else +# define WIN32_LEAN_AND_MEAN +# include +#endif + +#include "ir.h" +#include "ir_private.h" + +#include + +#ifdef HAVE_VALGRIND +# include +#endif + +#define IR_TYPE_FLAGS(name, type, field, flags) ((flags)|sizeof(type)), +#define IR_TYPE_NAME(name, type, field, flags) #name, +#define IR_TYPE_CNAME(name, type, field, flags) #type, +#define IR_TYPE_SIZE(name, type, field, flags) sizeof(type), +#define IR_OP_NAME(name, flags, op1, op2, op3) #name, + +const uint8_t ir_type_flags[IR_LAST_TYPE] = { + 0, + IR_TYPES(IR_TYPE_FLAGS) +}; + +const char *ir_type_name[IR_LAST_TYPE] = { + "void", + IR_TYPES(IR_TYPE_NAME) +}; + +const uint8_t ir_type_size[IR_LAST_TYPE] = { + 0, + IR_TYPES(IR_TYPE_SIZE) +}; + +const char *ir_type_cname[IR_LAST_TYPE] = { + "void", + IR_TYPES(IR_TYPE_CNAME) +}; + +const char *ir_op_name[IR_LAST_OP] = { + IR_OPS(IR_OP_NAME) +#ifdef IR_PHP + IR_PHP_OPS(IR_OP_NAME) +#endif +}; + +void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted) +{ + if (insn->op == IR_FUNC || insn->op == IR_SYM) { + fprintf(f, "%s", ir_get_str(ctx, insn->val.i32)); + return; + } else if (insn->op == IR_STR) { + if (quoted) { + fprintf(f, "\"%s\"", ir_get_str(ctx, insn->val.i32)); + } else { + fprintf(f, "%s", ir_get_str(ctx, insn->val.i32)); + } + return; + } + IR_ASSERT(IR_IS_CONST_OP(insn->op) || insn->op == IR_FUNC_ADDR); + switch (insn->type) { + case IR_BOOL: + fprintf(f, "%u", insn->val.b); + break; + case IR_U8: + fprintf(f, "%u", insn->val.u8); + break; + case IR_U16: + fprintf(f, "%u", insn->val.u16); + break; + case IR_U32: + fprintf(f, "%u", insn->val.u32); + break; + case IR_U64: + fprintf(f, "%" PRIu64, insn->val.u64); + break; + case IR_ADDR: + if (insn->val.addr) { + fprintf(f, "0x%" PRIxPTR, insn->val.addr); + } else { + fprintf(f, "0"); + } + break; + case IR_CHAR: + if (insn->val.c == '\\') { + fprintf(f, "'\\\\'"); + } else if (insn->val.c >= ' ') { + fprintf(f, "'%c'", insn->val.c); + } else if (insn->val.c == '\t') { + fprintf(f, "'\\t'"); + } else if (insn->val.c == '\r') { + fprintf(f, "'\\r'"); + } else if (insn->val.c == '\n') { + fprintf(f, "'\\n'"); + } else if (insn->val.c == '\0') { + fprintf(f, "'\\0'"); + } else { + fprintf(f, "%u", insn->val.c); + } + break; + case IR_I8: + fprintf(f, "%d", insn->val.i8); + break; + case IR_I16: + fprintf(f, "%d", insn->val.i16); + break; + case IR_I32: + fprintf(f, "%d", insn->val.i32); + break; + case IR_I64: + fprintf(f, "%" PRIi64, insn->val.i64); + break; + case IR_DOUBLE: + if (isnan(insn->val.d)) { + fprintf(f, "nan"); + } else { + fprintf(f, "%g", insn->val.d); + } + break; + case IR_FLOAT: + if (isnan(insn->val.f)) { + fprintf(f, "nan"); + } else { + fprintf(f, "%g", insn->val.f); + } + break; + default: + IR_ASSERT(0); + break; + } +} + +#define ir_op_flag_v 0 +#define ir_op_flag_v0X3 (0 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d IR_OP_FLAG_DATA +#define ir_op_flag_d0 ir_op_flag_d +#define ir_op_flag_d1 (ir_op_flag_d | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d1X1 (ir_op_flag_d | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d2 (ir_op_flag_d | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d2C (ir_op_flag_d | IR_OP_FLAG_COMMUTATIVE | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d3 (ir_op_flag_d | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_r IR_OP_FLAG_DATA // "d" and "r" are the same now +#define ir_op_flag_r0 ir_op_flag_r +#define ir_op_flag_p (IR_OP_FLAG_DATA | IR_OP_FLAG_PINNED) +#define ir_op_flag_p1 (ir_op_flag_p | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_p1X1 (ir_op_flag_p | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_p1X2 (ir_op_flag_p | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_p2 (ir_op_flag_p | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_pN (ir_op_flag_p | IR_OP_FLAG_VAR_INPUTS) +#define ir_op_flag_c IR_OP_FLAG_CONTROL +#define ir_op_flag_c1X2 (ir_op_flag_c | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_c3 (ir_op_flag_c | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S (IR_OP_FLAG_CONTROL|IR_OP_FLAG_BB_START) +#define ir_op_flag_S0X1 (ir_op_flag_S | 0 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S1 (ir_op_flag_S | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S1X1 (ir_op_flag_S | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S2 (ir_op_flag_S | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S2X1 (ir_op_flag_S | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_SN (ir_op_flag_S | IR_OP_FLAG_VAR_INPUTS) +#define ir_op_flag_E (IR_OP_FLAG_CONTROL|IR_OP_FLAG_BB_END) +#define ir_op_flag_E1 (ir_op_flag_E | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_E2 (ir_op_flag_E | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_T (IR_OP_FLAG_CONTROL|IR_OP_FLAG_BB_END|IR_OP_FLAG_TERMINATOR) +#define ir_op_flag_T2X1 (ir_op_flag_T | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_T1X2 (ir_op_flag_T | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) +#define ir_op_flag_l0 ir_op_flag_l +#define ir_op_flag_l1 (ir_op_flag_l | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l1X1 (ir_op_flag_l | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l1X2 (ir_op_flag_l | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l2 (ir_op_flag_l | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l3 (ir_op_flag_l | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_s (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_STORE) +#define ir_op_flag_s0 ir_op_flag_s +#define ir_op_flag_s1 (ir_op_flag_s | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_s2 (ir_op_flag_s | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_s2X1 (ir_op_flag_s | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_s3 (ir_op_flag_s | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_x1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_x2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_xN (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | IR_OP_FLAG_VAR_INPUTS) +#define ir_op_flag_a2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_ALLOC | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) + +#define ir_op_kind____ IR_OPND_UNUSED +#define ir_op_kind_def IR_OPND_DATA +#define ir_op_kind_ref IR_OPND_DATA +#define ir_op_kind_src IR_OPND_CONTROL +#define ir_op_kind_reg IR_OPND_CONTROL_DEP +#define ir_op_kind_ret IR_OPND_CONTROL_REF +#define ir_op_kind_str IR_OPND_STR +#define ir_op_kind_num IR_OPND_NUM +#define ir_op_kind_fld IR_OPND_STR +#define ir_op_kind_var IR_OPND_DATA +#define ir_op_kind_prb IR_OPND_PROB +#define ir_op_kind_opt IR_OPND_PROB + +#define _IR_OP_FLAGS(name, flags, op1, op2, op3) \ + IR_OP_FLAGS(ir_op_flag_ ## flags, ir_op_kind_ ## op1, ir_op_kind_ ## op2, ir_op_kind_ ## op3), + +const uint32_t ir_op_flags[IR_LAST_OP] = { + IR_OPS(_IR_OP_FLAGS) +#ifdef IR_PHP + IR_PHP_OPS(_IR_OP_FLAGS) +#endif +}; + +static void ir_grow_bottom(ir_ctx *ctx) +{ + ir_insn *buf = ctx->ir_base - ctx->consts_limit; + ir_ref old_consts_limit = ctx->consts_limit; + + if (ctx->consts_limit < 1024 * 4) { + ctx->consts_limit *= 2; + } else if (ctx->consts_limit < 1024 * 4 * 2) { + ctx->consts_limit = 1024 * 4 * 2; + } else { + ctx->consts_limit += 1024 * 4; + } + buf = ir_mem_realloc(buf, (ctx->consts_limit + ctx->insns_limit) * sizeof(ir_insn)); + memmove(buf + (ctx->consts_limit - old_consts_limit), + buf, + (old_consts_limit + ctx->insns_count) * sizeof(ir_insn)); + ctx->ir_base = buf + ctx->consts_limit; +} + +static ir_ref ir_next_const(ir_ctx *ctx) +{ + ir_ref ref = ctx->consts_count; + + if (UNEXPECTED(ref >= ctx->consts_limit)) { + ir_grow_bottom(ctx); + } + ctx->consts_count = ref + 1; + return -ref; +} + +static void ir_grow_top(ir_ctx *ctx) +{ + ir_insn *buf = ctx->ir_base - ctx->consts_limit; + + if (ctx->insns_limit < 1024 * 4) { + ctx->insns_limit *= 2; + } else if (ctx->insns_limit < 1024 * 4 * 2) { + ctx->insns_limit = 1024 * 4 * 2; + } else { + ctx->insns_limit += 1024 * 4; + } + buf = ir_mem_realloc(buf, (ctx->consts_limit + ctx->insns_limit) * sizeof(ir_insn)); + ctx->ir_base = buf + ctx->consts_limit; +} + +static ir_ref ir_next_insn(ir_ctx *ctx) +{ + ir_ref ref = ctx->insns_count; + + if (UNEXPECTED(ref >= ctx->insns_limit)) { + ir_grow_top(ctx); + } + ctx->insns_count = ref + 1; + return ref; +} + +void ir_truncate(ir_ctx *ctx) +{ + ir_insn *buf = ir_mem_malloc((ctx->consts_count + ctx->insns_count) * sizeof(ir_insn)); + + memcpy(buf, ctx->ir_base - ctx->consts_count, (ctx->consts_count + ctx->insns_count) * sizeof(ir_insn)); + ir_mem_free(ctx->ir_base - ctx->consts_limit); + ctx->insns_limit = ctx->insns_count; + ctx->consts_limit = ctx->consts_count; + ctx->ir_base = buf + ctx->consts_limit; +} + +void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limit) +{ + ir_insn *buf; + + IR_ASSERT(consts_limit >= IR_CONSTS_LIMIT_MIN); + IR_ASSERT(insns_limit >= IR_INSNS_LIMIT_MIN); + + memset(ctx, 0, sizeof(ir_ctx)); + + ctx->insns_count = IR_UNUSED + 1; + ctx->insns_limit = insns_limit; + ctx->consts_count = -(IR_TRUE - 1); + ctx->consts_limit = consts_limit; + ctx->fold_cse_limit = IR_UNUSED + 1; + ctx->flags = flags; + + ctx->spill_base = -1; + ctx->fixed_stack_frame_size = -1; + + buf = ir_mem_malloc((consts_limit + insns_limit) * sizeof(ir_insn)); + ctx->ir_base = buf + consts_limit; + + ctx->ir_base[IR_UNUSED].optx = IR_NOP; + ctx->ir_base[IR_NULL].optx = IR_OPT(IR_C_ADDR, IR_ADDR); + ctx->ir_base[IR_NULL].val.u64 = 0; + ctx->ir_base[IR_FALSE].optx = IR_OPT(IR_C_BOOL, IR_BOOL); + ctx->ir_base[IR_FALSE].val.u64 = 0; + ctx->ir_base[IR_TRUE].optx = IR_OPT(IR_C_BOOL, IR_BOOL); + ctx->ir_base[IR_TRUE].val.u64 = 1; +} + +void ir_free(ir_ctx *ctx) +{ + ir_insn *buf = ctx->ir_base - ctx->consts_limit; + ir_mem_free(buf); + if (ctx->strtab.data) { + ir_strtab_free(&ctx->strtab); + } + if (ctx->binding) { + ir_hashtab_free(ctx->binding); + ir_mem_free(ctx->binding); + } + if (ctx->use_lists) { + ir_mem_free(ctx->use_lists); + } + if (ctx->use_edges) { + ir_mem_free(ctx->use_edges); + } + if (ctx->cfg_blocks) { + ir_mem_free(ctx->cfg_blocks); + } + if (ctx->cfg_edges) { + ir_mem_free(ctx->cfg_edges); + } + if (ctx->cfg_map) { + ir_mem_free(ctx->cfg_map); + } + if (ctx->rules) { + ir_mem_free(ctx->rules); + } + if (ctx->vregs) { + ir_mem_free(ctx->vregs); + } + if (ctx->live_intervals) { + ir_mem_free(ctx->live_intervals); + } + if (ctx->arena) { + ir_arena_free(ctx->arena); + } + if (ctx->regs) { + ir_mem_free(ctx->regs); + } + if (ctx->prev_ref) { + ir_mem_free(ctx->prev_ref); + } + if (ctx->entries) { + ir_mem_free(ctx->entries); + } + if (ctx->osr_entry_loads) { + ir_list_free((ir_list*)ctx->osr_entry_loads); + ir_mem_free(ctx->osr_entry_loads); + } +} + +ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr) +{ + ir_ref ref = ir_next_const(ctx); + ir_insn *insn = &ctx->ir_base[ref]; + + insn->optx = IR_OPT(IR_ADDR, IR_ADDR); + insn->val.u64 = addr; + /* don't insert into constants chain */ + insn->prev_const = IR_UNUSED; +#if 0 + insn->prev_const = ctx->prev_const_chain[IR_ADDR]; + ctx->prev_const_chain[IR_ADDR] = ref; +#endif +#if 0 + ir_insn *prev_insn, *next_insn; + ir_ref next; + + prev_insn = NULL; + next = ctx->prev_const_chain[IR_ADDR]; + while (next) { + next_insn = &ctx->ir_base[next]; + if (UNEXPECTED(next_insn->val.u64 >= addr)) { + break; + } + prev_insn = next_insn; + next = next_insn->prev_const; + } + + if (prev_insn) { + insn->prev_const = prev_insn->prev_const; + prev_insn->prev_const = ref; + } else { + insn->prev_const = ctx->prev_const_chain[IR_ADDR]; + ctx->prev_const_chain[IR_ADDR] = ref; + } +#endif + + return ref; +} + +static IR_NEVER_INLINE ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, uint32_t optx) +{ + ir_insn *insn, *prev_insn; + ir_ref ref, prev; + + if (type == IR_BOOL) { + return val.u64 ? IR_TRUE : IR_FALSE; + } else if (type == IR_ADDR && val.u64 == 0) { + return IR_NULL; + } + prev_insn = NULL; + ref = ctx->prev_const_chain[type]; + while (ref) { + insn = &ctx->ir_base[ref]; + if (UNEXPECTED(insn->val.u64 >= val.u64)) { + if (insn->val.u64 == val.u64 && insn->optx == optx) { + return ref; + } else { + break; + } + } + prev_insn = insn; + ref = insn->prev_const; + } + + if (prev_insn) { + prev = prev_insn->prev_const; + prev_insn->prev_const = -ctx->consts_count; + } else { + prev = ctx->prev_const_chain[type]; + ctx->prev_const_chain[type] = -ctx->consts_count; + } + + ref = ir_next_const(ctx); + insn = &ctx->ir_base[ref]; + insn->prev_const = prev; + + insn->optx = optx; + insn->val.u64 = val.u64; + + return ref; +} + +ir_ref ir_const(ir_ctx *ctx, ir_val val, uint8_t type) +{ + return ir_const_ex(ctx, val, type, IR_OPT(type, type)); +} + +ir_ref ir_const_i8(ir_ctx *ctx, int8_t c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_I8); +} + +ir_ref ir_const_i16(ir_ctx *ctx, int16_t c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_I16); +} + +ir_ref ir_const_i32(ir_ctx *ctx, int32_t c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_I32); +} + +ir_ref ir_const_i64(ir_ctx *ctx, int64_t c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_I64); +} + +ir_ref ir_const_u8(ir_ctx *ctx, uint8_t c) +{ + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_U8); +} + +ir_ref ir_const_u16(ir_ctx *ctx, uint16_t c) +{ + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_U16); +} + +ir_ref ir_const_u32(ir_ctx *ctx, uint32_t c) +{ + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_U32); +} + +ir_ref ir_const_u64(ir_ctx *ctx, uint64_t c) +{ + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_U64); +} + +ir_ref ir_const_bool(ir_ctx *ctx, bool c) +{ + return (c) ? IR_TRUE : IR_FALSE; +} + +ir_ref ir_const_char(ir_ctx *ctx, char c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_CHAR); +} + +ir_ref ir_const_float(ir_ctx *ctx, float c) +{ + ir_val val; + val.u32_hi = 0; + val.f = c; + return ir_const(ctx, val, IR_FLOAT); +} + +ir_ref ir_const_double(ir_ctx *ctx, double c) +{ + ir_val val; + val.d = c; + return ir_const(ctx, val, IR_DOUBLE); +} + +ir_ref ir_const_addr(ir_ctx *ctx, uintptr_t c) +{ + if (c == 0) { + return IR_NULL; + } + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_ADDR); +} + +ir_ref ir_const_func_addr(ir_ctx *ctx, uintptr_t c, uint16_t flags) +{ + if (c == 0) { + return IR_NULL; + } + ir_val val; + val.u64 = c; + return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_FUNC_ADDR, IR_ADDR, flags)); +} + +ir_ref ir_const_func(ir_ctx *ctx, ir_ref str, uint16_t flags) +{ + ir_val val; + val.addr = str; + return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_FUNC, IR_ADDR, flags)); +} + +ir_ref ir_const_sym(ir_ctx *ctx, ir_ref str) +{ + ir_val val; + val.addr = str; + return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_SYM, IR_ADDR, 0)); +} + +ir_ref ir_const_str(ir_ctx *ctx, ir_ref str) +{ + ir_val val; + val.addr = str; + return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_STR, IR_ADDR, 0)); +} + +ir_ref ir_str(ir_ctx *ctx, const char *s) +{ + size_t len; + + if (!ctx->strtab.data) { + ir_strtab_init(&ctx->strtab, 64, 4096); + } + len = strlen(s); + IR_ASSERT(len <= 0xffffffff); + return ir_strtab_lookup(&ctx->strtab, s, (uint32_t)len, ir_strtab_count(&ctx->strtab) + 1); +} + +ir_ref ir_strl(ir_ctx *ctx, const char *s, size_t len) +{ + if (!ctx->strtab.data) { + ir_strtab_init(&ctx->strtab, 64, 4096); + } + IR_ASSERT(len <= 0xffffffff); + return ir_strtab_lookup(&ctx->strtab, s, (uint32_t)len, ir_strtab_count(&ctx->strtab) + 1); +} + +const char *ir_get_str(const ir_ctx *ctx, ir_ref idx) +{ + IR_ASSERT(ctx->strtab.data); + return ir_strtab_str(&ctx->strtab, idx - 1); +} + +/* IR construction */ +ir_ref ir_emit(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + ir_ref ref = ir_next_insn(ctx); + ir_insn *insn = &ctx->ir_base[ref]; + + insn->optx = opt; + insn->op1 = op1; + insn->op2 = op2; + insn->op3 = op3; + + return ref; +} + +ir_ref ir_emit0(ir_ctx *ctx, uint32_t opt) +{ + return ir_emit(ctx, opt, IR_UNUSED, IR_UNUSED, IR_UNUSED); +} + +ir_ref ir_emit1(ir_ctx *ctx, uint32_t opt, ir_ref op1) +{ + return ir_emit(ctx, opt, op1, IR_UNUSED, IR_UNUSED); +} + +ir_ref ir_emit2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2) +{ + return ir_emit(ctx, opt, op1, op2, IR_UNUSED); +} + +ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + return ir_emit(ctx, opt, op1, op2, op3); +} + +static ir_ref _ir_fold_cse(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + ir_ref ref = ctx->prev_insn_chain[opt & IR_OPT_OP_MASK]; + ir_insn *insn; + + if (ref) { + ir_ref limit = ctx->fold_cse_limit; + + if (op1 > limit) { + limit = op1; + } + if (op2 > limit) { + limit = op2; + } + if (op3 > limit) { + limit = op3; + } + while (ref >= limit) { + insn = &ctx->ir_base[ref]; + if (insn->opt == opt && insn->op1 == op1 && insn->op2 == op2 && insn->op3 == op3) { + return ref; + } + if (!insn->prev_insn_offset) { + break; + } + ref = ref - (ir_ref)(uint32_t)insn->prev_insn_offset; + } + } + + return IR_UNUSED; +} + +#define IR_FOLD(X) IR_FOLD1(X, __LINE__) +#define IR_FOLD1(X, Y) IR_FOLD2(X, Y) +#define IR_FOLD2(X, Y) case IR_RULE_ ## Y: + +#define IR_FOLD_ERROR(msg) do { \ + IR_ASSERT(0 && (msg)); \ + goto ir_fold_emit; \ + } while (0) + +#define IR_FOLD_CONST_U(_val) do { \ + val.u64 = (_val); \ + goto ir_fold_const; \ + } while (0) + +#define IR_FOLD_CONST_I(_val) do { \ + val.i64 = (_val); \ + goto ir_fold_const; \ + } while (0) + +#define IR_FOLD_CONST_D(_val) do { \ + val.d = (_val); \ + goto ir_fold_const; \ + } while (0) + +#define IR_FOLD_CONST_F(_val) do { \ + val.f = (_val); \ + goto ir_fold_const; \ + } while (0) + +#define IR_FOLD_COPY(op) do { \ + ref = (op); \ + goto ir_fold_copy; \ + } while (0) + +#define IR_FOLD_BOOL(cond) \ + IR_FOLD_COPY((cond) ? IR_TRUE : IR_FALSE) + +#define IR_FOLD_NAMED(name) ir_fold_ ## name: +#define IR_FOLD_DO_NAMED(name) goto ir_fold_ ## name +#define IR_FOLD_RESTART goto ir_fold_restart +#define IR_FOLD_CSE goto ir_fold_cse +#define IR_FOLD_EMIT goto ir_fold_emit +#define IR_FOLD_NEXT break + +#include "ir_fold_hash.h" + +#define IR_FOLD_RULE(x) ((x) >> 21) +#define IR_FOLD_KEY(x) ((x) & 0x1fffff) + +/* + * key = insn->op | (insn->op1->op << 7) | (insn->op2->op << 14) + * + * ANY and UNUSED ops are represented by 0 + */ + +ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn) +{ + uint8_t op; + ir_ref ref; + ir_val val; + uint32_t key, any; + (void) op3_insn; + +restart: + key = (opt & IR_OPT_OP_MASK) + ((uint32_t)op1_insn->op << 7) + ((uint32_t)op2_insn->op << 14); + any = 0x1fffff; + do { + uint32_t k = key & any; + uint32_t h = _ir_fold_hashkey(k); + uint32_t fh = _ir_fold_hash[h]; + if (IR_FOLD_KEY(fh) == k /*|| (fh = _ir_fold_hash[h+1], (fh & 0x1fffff) == k)*/) { + switch (IR_FOLD_RULE(fh)) { +#include "ir_fold.h" + default: + break; + } + } + if (any == 0x7f) { + /* All parrerns are checked. Pass on to CSE. */ + goto ir_fold_cse; + } + /* op2/op1/op op2/_/op _/op1/op _/_/op + * 0x1fffff -> 0x1fc07f -> 0x003fff -> 0x00007f + * from masks to bis: 11 -> 10 -> 01 -> 00 + * + * a b => x y + * 1 1 1 0 + * 1 0 0 1 + * 0 1 0 0 + * + * x = a & b; y = !b + */ + any = ((any & (any << 7)) & 0x1fc000) | (~any & 0x3f80) | 0x7f; + } while (1); + +ir_fold_restart: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + op1_insn = ctx->ir_base + op1; + op2_insn = ctx->ir_base + op2; + op3_insn = ctx->ir_base + op3; + goto restart; + } else { + ctx->fold_insn.optx = opt; + ctx->fold_insn.op1 = op1; + ctx->fold_insn.op2 = op2; + ctx->fold_insn.op3 = op3; + return IR_FOLD_DO_RESTART; + } +ir_fold_cse: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + /* Local CSE */ + ref = _ir_fold_cse(ctx, opt, op1, op2, op3); + if (ref) { + return ref; + } + + ref = ir_emit(ctx, opt, op1, op2, op3); + + /* Update local CSE chain */ + op = opt & IR_OPT_OP_MASK; + ir_ref prev = ctx->prev_insn_chain[op]; + ir_insn *insn = ctx->ir_base + ref; + if (!prev || ref - prev > 0xffff) { + /* can't fit into 16-bit */ + insn->prev_insn_offset = 0; + } else { + insn->prev_insn_offset = ref - prev; + } + ctx->prev_insn_chain[op] = ref; + + return ref; + } +ir_fold_emit: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + return ir_emit(ctx, opt, op1, op2, op3); + } else { + ctx->fold_insn.optx = opt; + ctx->fold_insn.op1 = op1; + ctx->fold_insn.op2 = op2; + ctx->fold_insn.op3 = op3; + return IR_FOLD_DO_EMIT; + } +ir_fold_copy: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + return ref; + } else { + ctx->fold_insn.op1 = ref; + return IR_FOLD_DO_COPY; + } +ir_fold_const: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + return ir_const(ctx, val, IR_OPT_TYPE(opt)); + } else { + ctx->fold_insn.type = IR_OPT_TYPE(opt); + ctx->fold_insn.val.u64 = val.u64; + return IR_FOLD_DO_CONST; + } +} + +ir_ref ir_fold(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) { + if ((opt & IR_OPT_OP_MASK) == IR_PHI) { + opt |= (3 << IR_OPT_INPUTS_SHIFT); + } + return ir_emit(ctx, opt, op1, op2, op3); + } + return ir_folding(ctx, opt, op1, op2, op3, ctx->ir_base + op1, ctx->ir_base + op2, ctx->ir_base + op3); +} + +ir_ref ir_fold0(ir_ctx *ctx, uint32_t opt) +{ + return ir_fold(ctx, opt, IR_UNUSED, IR_UNUSED, IR_UNUSED); +} + +ir_ref ir_fold1(ir_ctx *ctx, uint32_t opt, ir_ref op1) +{ + return ir_fold(ctx, opt, op1, IR_UNUSED, IR_UNUSED); +} + +ir_ref ir_fold2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2) +{ + return ir_fold(ctx, opt, op1, op2, IR_UNUSED); +} + +ir_ref ir_fold3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + return ir_fold(ctx, opt, op1, op2, op3); +} + +ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count) +{ + int i; + ir_ref *p, ref = ctx->insns_count; + ir_insn *insn; + + IR_ASSERT(count >= 0); + while (UNEXPECTED(ref + count/4 >= ctx->insns_limit)) { + ir_grow_top(ctx); + } + ctx->insns_count = ref + 1 + count/4; + + insn = &ctx->ir_base[ref]; + insn->optx = opt | (count << IR_OPT_INPUTS_SHIFT); + for (i = 1, p = insn->ops + i; i <= (count|3); i++, p++) { + *p = IR_UNUSED; + } + + return ref; +} + +void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val) +{ + ir_insn *insn = &ctx->ir_base[ref]; + +#ifdef IR_DEBUG + if (n > 3) { + int32_t count; + + IR_ASSERT(IR_OP_HAS_VAR_INPUTS(ir_op_flags[insn->op])); + count = insn->inputs_count; + IR_ASSERT(n <= count); + } +#endif + ir_insn_set_op(insn, n, val); +} + +ir_ref ir_param(ir_ctx *ctx, ir_type type, ir_ref region, const char *name, int pos) +{ + return ir_emit(ctx, IR_OPT(IR_PARAM, type), region, ir_str(ctx, name), pos); +} + +ir_ref ir_var(ir_ctx *ctx, ir_type type, ir_ref region, const char *name) +{ + return ir_emit(ctx, IR_OPT(IR_VAR, type), region, ir_str(ctx, name), IR_UNUSED); +} + +ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def) +{ + if (IR_IS_CONST_REF(def)) { + return def; + } + if (!ctx->binding) { + ctx->binding = ir_mem_malloc(sizeof(ir_hashtab));; + ir_hashtab_init(ctx->binding, 16); + } + /* Node may be bound to some special spill slot (using negative "var") */ + IR_ASSERT(var < 0); + if (!ir_hashtab_add(ctx->binding, def, var)) { + /* Add a copy with different binding */ + def = ir_emit2(ctx, IR_OPT(IR_COPY, ctx->ir_base[def].type), def, 1); + ir_hashtab_add(ctx->binding, def, var); + } + return def; +} + +/* Batch construction of def->use edges */ +#if 0 +void ir_build_def_use_lists(ir_ctx *ctx) +{ + ir_ref n, i, j, *p, def; + ir_insn *insn; + uint32_t edges_count; + ir_use_list *lists = ir_mem_calloc(ctx->insns_count, sizeof(ir_use_list)); + ir_ref *edges; + ir_use_list *use_list; + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + uint32_t flags = ir_op_flags[insn->op]; + + if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) { + n = insn->inputs_count; + } else { + n = insn->inputs_count = IR_INPUT_EDGES_COUNT(flags); + } + for (j = n, p = insn->ops + 1; j > 0; j--, p++) { + def = *p; + if (def > 0) { + lists[def].count++; + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + + edges_count = 0; + for (i = IR_UNUSED + 1, use_list = &lists[i]; i < ctx->insns_count; i++, use_list++) { + use_list->refs = edges_count; + edges_count += use_list->count; + use_list->count = 0; + } + + edges = ir_mem_malloc(edges_count * sizeof(ir_ref)); + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + n = insn->inputs_count; + for (j = n, p = insn->ops + 1; j > 0; j--, p++) { + def = *p; + if (def > 0) { + use_list = &lists[def]; + edges[use_list->refs + use_list->count++] = i; + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + + ctx->use_edges = edges; + ctx->use_edges_count = edges_count; + ctx->use_lists = lists; +} +#else +void ir_build_def_use_lists(ir_ctx *ctx) +{ + ir_ref n, i, j, *p, def; + ir_insn *insn; + size_t linked_lists_size, linked_lists_top = 0, edges_count = 0; + ir_use_list *lists = ir_mem_calloc(ctx->insns_count, sizeof(ir_use_list)); + ir_ref *edges; + ir_use_list *use_list; + ir_ref *linked_lists; + + linked_lists_size = IR_ALIGNED_SIZE(ctx->insns_count, 1024); + linked_lists = ir_mem_malloc(linked_lists_size * sizeof(ir_ref)); + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + uint32_t flags = ir_op_flags[insn->op]; + + if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) { + n = insn->inputs_count; + } else { + n = insn->inputs_count = IR_INPUT_EDGES_COUNT(flags); + } + for (j = n, p = insn->ops + 1; j > 0; j--, p++) { + def = *p; + if (def > 0) { + use_list = &lists[def]; + edges_count++; + if (!use_list->refs) { + /* store a single "use" directly in "refs" using a positive number */ + use_list->refs = i; + use_list->count = 1; + } else { + if (UNEXPECTED(linked_lists_top >= linked_lists_size)) { + linked_lists_size += 1024; + linked_lists = ir_mem_realloc(linked_lists, linked_lists_size * sizeof(ir_ref)); + } + /* form a linked list of "uses" (like in binsort) */ + linked_lists[linked_lists_top] = i; /* store the "use" */ + linked_lists[linked_lists_top + 1] = use_list->refs; /* store list next */ + use_list->refs = -(linked_lists_top + 1); /* store a head of the list using a negative number */ + linked_lists_top += 2; + use_list->count++; + } + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + + ctx->use_edges_count = edges_count; + edges = ir_mem_malloc(edges_count * sizeof(ir_ref)); + for (use_list = lists + ctx->insns_count - 1; use_list != lists; use_list--) { + n = use_list->refs; + if (n) { + /* transform linked list to plain array */ + while (n < 0) { + n = -n; + edges[--edges_count] = linked_lists[n - 1]; + n = linked_lists[n]; + } + IR_ASSERT(n > 0); + edges[--edges_count] = n; + use_list->refs = edges_count; + } + } + + ctx->use_edges = edges; + ctx->use_lists = lists; + ir_mem_free(linked_lists); +} +#endif + +/* Helper Data Types */ +void ir_array_grow(ir_array *a, uint32_t size) +{ + IR_ASSERT(size > a->size); + a->refs = ir_mem_realloc(a->refs, size * sizeof(ir_ref)); + a->size = size; +} + +void ir_array_insert(ir_array *a, uint32_t i, ir_ref val) +{ + IR_ASSERT(i < a->size); + if (a->refs[a->size - 1]) { + ir_array_grow(a, a->size + 1); + } + memmove(a->refs + i + 1, a->refs + i, (a->size - i - 1) * sizeof(ir_ref)); + a->refs[i] = val; +} + +void ir_array_remove(ir_array *a, uint32_t i) +{ + IR_ASSERT(i < a->size); + memmove(a->refs + i, a->refs + i + 1, (a->size - i - 1) * sizeof(ir_ref)); + a->refs[a->size - 1] = IR_UNUSED; +} + +void ir_list_insert(ir_list *l, uint32_t i, ir_ref val) +{ + IR_ASSERT(i < l->len); + if (l->len >= l->a.size) { + ir_array_grow(&l->a, l->a.size + 1); + } + memmove(l->a.refs + i + 1, l->a.refs + i, (l->len - i) * sizeof(ir_ref)); + l->a.refs[i] = val; + l->len++; +} + +void ir_list_remove(ir_list *l, uint32_t i) +{ + IR_ASSERT(i < l->len); + memmove(l->a.refs + i, l->a.refs + i + 1, (l->len - i) * sizeof(ir_ref)); + l->len--; +} + +bool ir_list_contains(const ir_list *l, ir_ref val) +{ + uint32_t i; + + for (i = 0; i < l->len; i++) { + if (ir_array_at(&l->a, i) == val) { + return 1; + } + } + return 0; +} + +static uint32_t ir_hashtab_hash_size(uint32_t size) +{ + size -= 1; + size |= (size >> 1); + size |= (size >> 2); + size |= (size >> 4); + size |= (size >> 8); + size |= (size >> 16); + return size + 1; +} + +static void ir_hashtab_resize(ir_hashtab *tab) +{ + uint32_t old_hash_size = (uint32_t)(-(int32_t)tab->mask); + char *old_data = tab->data; + uint32_t size = tab->size * 2; + uint32_t hash_size = ir_hashtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_hashtab_bucket)); + ir_hashtab_bucket *p; + uint32_t pos, i; + + memset(data, -1, hash_size * sizeof(uint32_t)); + tab->data = data + (hash_size * sizeof(uint32_t)); + tab->mask = (uint32_t)(-(int32_t)hash_size); + tab->size = size; + + memcpy(tab->data, old_data, tab->count * sizeof(ir_hashtab_bucket)); + ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t))); + + i = tab->count; + pos = 0; + p = (ir_hashtab_bucket*)tab->data; + do { + uint32_t key = p->key | tab->mask; + p->next = ((uint32_t*)tab->data)[(int32_t)key]; + ((uint32_t*)tab->data)[(int32_t)key] = pos; + pos += sizeof(ir_hashtab_bucket); + p++; + } while (--i); +} + +void ir_hashtab_init(ir_hashtab *tab, uint32_t size) +{ + IR_ASSERT(size > 0); + uint32_t hash_size = ir_hashtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_hashtab_bucket)); + memset(data, -1, hash_size * sizeof(uint32_t)); + tab->data = (data + (hash_size * sizeof(uint32_t))); + tab->mask = (uint32_t)(-(int32_t)hash_size); + tab->size = size; + tab->count = 0; + tab->pos = 0; +} + +void ir_hashtab_free(ir_hashtab *tab) +{ + uint32_t hash_size = (uint32_t)(-(int32_t)tab->mask); + char *data = (char*)tab->data - (hash_size * sizeof(uint32_t)); + ir_mem_free(data); + tab->data = NULL; +} + +ir_ref ir_hashtab_find(const ir_hashtab *tab, uint32_t key) +{ + const char *data = (const char*)tab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(key | tab->mask)]; + ir_hashtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_hashtab_bucket*)(data + pos); + if (p->key == key) { + return p->val; + } + pos = p->next; + } + return IR_INVALID_VAL; +} + +bool ir_hashtab_add(ir_hashtab *tab, uint32_t key, ir_ref val) +{ + char *data = (char*)tab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(key | tab->mask)]; + ir_hashtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_hashtab_bucket*)(data + pos); + if (p->key == key) { + return p->val == val; + } + pos = p->next; + } + + if (UNEXPECTED(tab->count >= tab->size)) { + ir_hashtab_resize(tab); + data = tab->data; + } + + pos = tab->pos; + tab->pos += sizeof(ir_hashtab_bucket); + tab->count++; + p = (ir_hashtab_bucket*)(data + pos); + p->key = key; + p->val = val; + key |= tab->mask; + p->next = ((uint32_t*)data)[(int32_t)key]; + ((uint32_t*)data)[(int32_t)key] = pos; + return 1; +} + +static int ir_hashtab_key_cmp(const void *b1, const void *b2) +{ + return ((ir_hashtab_bucket*)b1)->key - ((ir_hashtab_bucket*)b2)->key; +} + +void ir_hashtab_key_sort(ir_hashtab *tab) +{ + ir_hashtab_bucket *p; + uint32_t hash_size, pos, i; + + if (!tab->count) { + return; + } + + qsort(tab->data, tab->count, sizeof(ir_hashtab_bucket), ir_hashtab_key_cmp); + + hash_size = ir_hashtab_hash_size(tab->size); + memset((char*)tab->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t)); + + i = tab->count; + pos = 0; + p = (ir_hashtab_bucket*)tab->data; + do { + uint32_t key = p->key | tab->mask; + p->next = ((uint32_t*)tab->data)[(int32_t)key]; + ((uint32_t*)tab->data)[(int32_t)key] = pos; + pos += sizeof(ir_hashtab_bucket); + p++; + } while (--i); +} + +static void ir_addrtab_resize(ir_hashtab *tab) +{ + uint32_t old_hash_size = (uint32_t)(-(int32_t)tab->mask); + char *old_data = tab->data; + uint32_t size = tab->size * 2; + uint32_t hash_size = ir_hashtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_addrtab_bucket)); + ir_addrtab_bucket *p; + uint32_t pos, i; + + memset(data, -1, hash_size * sizeof(uint32_t)); + tab->data = data + (hash_size * sizeof(uint32_t)); + tab->mask = (uint32_t)(-(int32_t)hash_size); + tab->size = size; + + memcpy(tab->data, old_data, tab->count * sizeof(ir_addrtab_bucket)); + ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t))); + + i = tab->count; + pos = 0; + p = (ir_addrtab_bucket*)tab->data; + do { + uint32_t key = (uint32_t)p->key | tab->mask; + p->next = ((uint32_t*)tab->data)[(int32_t)key]; + ((uint32_t*)tab->data)[(int32_t)key] = pos; + pos += sizeof(ir_addrtab_bucket); + p++; + } while (--i); +} + +void ir_addrtab_init(ir_hashtab *tab, uint32_t size) +{ + IR_ASSERT(size > 0); + uint32_t hash_size = ir_hashtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_addrtab_bucket)); + memset(data, -1, hash_size * sizeof(uint32_t)); + tab->data = (data + (hash_size * sizeof(uint32_t))); + tab->mask = (uint32_t)(-(int32_t)hash_size); + tab->size = size; + tab->count = 0; + tab->pos = 0; +} + +void ir_addrtab_free(ir_hashtab *tab) +{ + uint32_t hash_size = (uint32_t)(-(int32_t)tab->mask); + char *data = (char*)tab->data - (hash_size * sizeof(uint32_t)); + ir_mem_free(data); + tab->data = NULL; +} + +ir_ref ir_addrtab_find(const ir_hashtab *tab, uint64_t key) +{ + const char *data = (const char*)tab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(key | tab->mask)]; + ir_addrtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_addrtab_bucket*)(data + pos); + if (p->key == key) { + return p->val; + } + pos = p->next; + } + return IR_INVALID_VAL; +} + +bool ir_addrtab_add(ir_hashtab *tab, uint64_t key, ir_ref val) +{ + char *data = (char*)tab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(key | tab->mask)]; + ir_addrtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_addrtab_bucket*)(data + pos); + if (p->key == key) { + return p->val == val; + } + pos = p->next; + } + + if (UNEXPECTED(tab->count >= tab->size)) { + ir_addrtab_resize(tab); + data = tab->data; + } + + pos = tab->pos; + tab->pos += sizeof(ir_addrtab_bucket); + tab->count++; + p = (ir_addrtab_bucket*)(data + pos); + p->key = key; + p->val = val; + key |= tab->mask; + p->next = ((uint32_t*)data)[(int32_t)key]; + ((uint32_t*)data)[(int32_t)key] = pos; + return 1; +} + +/* Memory API */ +#ifdef _WIN32 +void *ir_mem_mmap(size_t size) +{ + void *ret; + +#ifdef _M_X64 + DWORD size_hi = size >> 32, size_lo = size & 0xffffffff; +#else + DWORD size_hi = 0, size_lo = size; +#endif + + HANDLE h = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, size_hi, size_lo, NULL); + + ret = MapViewOfFile(h, FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE, 0, 0, size); + if (!ret) { + CloseHandle(h); + } + + return ret; +} + +int ir_mem_unmap(void *ptr, size_t size) +{ + /* XXX file handle is leaked. */ + UnmapViewOfFile(ptr); + return 1; +} + +int ir_mem_protect(void *ptr, size_t size) +{ + return 1; +} + +int ir_mem_unprotect(void *ptr, size_t size) +{ + return 1; +} + +int ir_mem_flush(void *ptr, size_t size) +{ + return 1; +} +#else +void *ir_mem_mmap(size_t size) +{ + void *ret = mmap(NULL, size, PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ret == MAP_FAILED) { + ret = NULL; + } + return ret; +} + +int ir_mem_unmap(void *ptr, size_t size) +{ + munmap(ptr, size); + return 1; +} + +int ir_mem_protect(void *ptr, size_t size) +{ + mprotect(ptr, size, PROT_READ | PROT_EXEC); + return 1; +} + +int ir_mem_unprotect(void *ptr, size_t size) +{ + mprotect(ptr, size, PROT_READ | PROT_WRITE); + return 1; +} + +int ir_mem_flush(void *ptr, size_t size) +{ +#if ((defined(__GNUC__) && ZEND_GCC_VERSION >= 4003) || __has_builtin(__builtin___clear_cache)) + __builtin___clear_cache((char*)(ptr), (char*)(ptr) + size); +#endif +#ifdef HAVE_VALGRIND + VALGRIND_DISCARD_TRANSLATIONS(ptr, size); +#endif + return 1; +} +#endif + +/* Alias Analyses */ +typedef enum _ir_alias { + IR_MAY_ALIAS = -1, + IR_NO_ALIAS = 0, + IR_MUST_ALIAS = 1, +} ir_alias; + +#if 0 +static ir_alias ir_check_aliasing(ir_ctx *ctx, ir_ref addr1, ir_ref addr2) +{ + ir_insn *insn1, *insn2; + + if (addr1 == addr2) { + return IR_MUST_ALIAS; + } + + insn1 = &ctx->ir_base[addr1]; + insn2 = &ctx->ir_base[addr2]; + if (insn1->op == IR_ADD && IR_IS_CONST_REF(insn1->op2)) { + if (insn1->op1 == addr2) { + uintptr_t offset1 = ctx->ir_base[insn1->op2].val.u64; + return (offset1 != 0) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } else if (insn2->op == IR_ADD && IR_IS_CONST_REF(insn1->op2) && insn1->op1 == insn2->op1) { + if (insn1->op2 == insn2->op2) { + return IR_MUST_ALIAS; + } else if (IR_IS_CONST_REF(insn1->op2) && IR_IS_CONST_REF(insn2->op2)) { + uintptr_t offset1 = ctx->ir_base[insn1->op2].val.u64; + uintptr_t offset2 = ctx->ir_base[insn2->op2].val.u64; + + return (offset1 == offset2) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } + } + } else if (insn2->op == IR_ADD && IR_IS_CONST_REF(insn2->op2)) { + if (insn2->op1 == addr1) { + uintptr_t offset2 = ctx->ir_base[insn2->op2].val.u64; + + return (offset2 != 0) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } + } + return IR_MAY_ALIAS; +} +#endif + +static ir_alias ir_check_partial_aliasing(const ir_ctx *ctx, ir_ref addr1, ir_ref addr2, ir_type type1, ir_type type2) +{ + ir_insn *insn1, *insn2; + + /* this must be already check */ + IR_ASSERT(addr1 != addr2); + + insn1 = &ctx->ir_base[addr1]; + insn2 = &ctx->ir_base[addr2]; + if (insn1->op == IR_ADD && IR_IS_CONST_REF(insn1->op2)) { + if (insn1->op1 == addr2) { + uintptr_t offset1 = ctx->ir_base[insn1->op2].val.addr; + uintptr_t size2 = ir_type_size[type2]; + + return (offset1 < size2) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } else if (insn2->op == IR_ADD && IR_IS_CONST_REF(insn1->op2) && insn1->op1 == insn2->op1) { + if (insn1->op2 == insn2->op2) { + return IR_MUST_ALIAS; + } else if (IR_IS_CONST_REF(insn1->op2) && IR_IS_CONST_REF(insn2->op2)) { + uintptr_t offset1 = ctx->ir_base[insn1->op2].val.addr; + uintptr_t offset2 = ctx->ir_base[insn2->op2].val.addr; + + if (offset1 == offset2) { + return IR_MUST_ALIAS; + } else if (type1 == type2) { + return IR_NO_ALIAS; + } else { + /* check for partail intersection */ + uintptr_t size1 = ir_type_size[type1]; + uintptr_t size2 = ir_type_size[type2]; + + if (offset1 > offset2) { + return offset1 < offset2 + size2 ? IR_MUST_ALIAS : IR_NO_ALIAS; + } else { + return offset2 < offset1 + size1 ? IR_MUST_ALIAS : IR_NO_ALIAS; + } + } + } + } + } else if (insn2->op == IR_ADD && IR_IS_CONST_REF(insn2->op2)) { + if (insn2->op1 == addr1) { + uintptr_t offset2 = ctx->ir_base[insn2->op2].val.addr; + uintptr_t size1 = ir_type_size[type1]; + + return (offset2 < size1) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } + } + return IR_MAY_ALIAS; +} + +static ir_ref ir_find_aliasing_load(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr) +{ + ir_ref limit = (addr > 0) ? addr : 1; + ir_insn *insn; + uint32_t modified_regset = 0; + + while (ref > limit) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_LOAD) { + if (insn->type == type && insn->op2 == addr) { + return ref; /* load forwarding (L2L) */ + } + } else if (insn->op == IR_STORE) { + ir_type type2 = ctx->ir_base[insn->op3].type; + + if (insn->op2 == addr) { + if (type2 == type) { + ref = insn->op3; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_RLOAD && (modified_regset & (1 << insn->op2))) { + /* anti-dependency */ + return IR_UNUSED; + } + return ref; /* store forwarding (S2L) */ + } else if (IR_IS_TYPE_INT(type) && ir_type_size[type2] > ir_type_size[type]) { + return ir_fold1(ctx, IR_OPT(IR_TRUNC, type), insn->op3); /* partial store forwarding (S2L) */ + } else { + return IR_UNUSED; + } + } else if (ir_check_partial_aliasing(ctx, addr, insn->op2, type, type2) != IR_NO_ALIAS) { + return IR_UNUSED; + } + } else if (insn->op == IR_RSTORE) { + modified_regset |= (1 << insn->op3); + } else if (insn->op >= IR_START || insn->op == IR_CALL) { + return IR_UNUSED; + } + ref = insn->op1; + } + return IR_UNUSED; +} + +/* IR Construction API */ + +ir_ref _ir_PARAM(ir_ctx *ctx, ir_type type, const char* name, ir_ref num) +{ + IR_ASSERT(ctx->control); + IR_ASSERT(ctx->ir_base[ctx->control].op == IR_START); + IR_ASSERT(ctx->insns_count == num + 1); + return ir_param(ctx, type, ctx->control, name, num); +} + +ir_ref _ir_VAR(ir_ctx *ctx, ir_type type, const char* name) +{ +// IR_ASSERT(ctx->control); +// IR_ASSERT(IR_IS_BB_START(ctx->ir_base[ctx->control].op)); +// TODO: VAR may be insterted after some "memory" instruction + ir_ref ref = ctx->control; + + while (1) { + IR_ASSERT(ctx->control); + if (IR_IS_BB_START(ctx->ir_base[ref].op)) { + break; + } + ref = ctx->ir_base[ref].op1; + } + return ir_var(ctx, type, ref, name); +} + +ir_ref _ir_PHI_2(ir_ctx *ctx, ir_type type, ir_ref src1, ir_ref src2) +{ + IR_ASSERT(ctx->control); + IR_ASSERT(ctx->ir_base[ctx->control].op == IR_MERGE || ctx->ir_base[ctx->control].op == IR_LOOP_BEGIN); + if (src1 == src2 && src1 != IR_UNUSED) { + return src1; + } + return ir_emit3(ctx, IR_OPTX(IR_PHI, type, 3), ctx->control, src1, src2); +} + +ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs) +{ + IR_ASSERT(ctx->control); + IR_ASSERT(n > 0); + if (n == 1) { + return inputs[0]; + } else { + ir_ref i; + ir_ref ref = inputs[0]; + + IR_ASSERT(ctx->ir_base[ctx->control].op == IR_MERGE || ctx->ir_base[ctx->control].op == IR_LOOP_BEGIN); + if (ref != IR_UNUSED) { + for (i = 1; i < n; i++) { + if (inputs[i] != ref) { + break; + } + } + if (i == n) { + /* all the same */ + return ref; + } + } + + ref = ir_emit_N(ctx, IR_OPT(IR_PHI, type), n + 1); + ir_set_op(ctx, ref, 1, ctx->control); + for (i = 0; i < n; i++) { + ir_set_op(ctx, ref, i + 2, inputs[i]); + } + return ref; + } +} + +void _ir_PHI_SET_OP(ir_ctx *ctx, ir_ref phi, ir_ref pos, ir_ref src) +{ + ir_insn *insn = &ctx->ir_base[phi]; + ir_ref *ops = insn->ops; + + IR_ASSERT(insn->op == IR_PHI); + IR_ASSERT(ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN); + IR_ASSERT(pos > 0 && pos < insn->inputs_count); + pos++; /* op1 is used for control */ + ops[pos] = src; +} + +void _ir_START(ir_ctx *ctx) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(ctx->insns_count == 1); + ctx->control = ir_emit0(ctx, IR_START); +} + +void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num) +{ + IR_ASSERT(!ctx->control); + /* fake control edge */ + IR_ASSERT((ir_op_flags[ctx->ir_base[src].op] & IR_OP_FLAG_TERMINATOR) + || ctx->ir_base[src].op == IR_END + || ctx->ir_base[src].op == IR_LOOP_END); /* return from a recursive call */ + ctx->control = ir_emit2(ctx, IR_ENTRY, src, num); +} + +void _ir_BEGIN(ir_ctx *ctx, ir_ref src) +{ + IR_ASSERT(!ctx->control); + if (src + && src + 1 == ctx->insns_count + && ctx->ir_base[src].op == IR_END) { + /* merge with the last END */ + ctx->control = ctx->ir_base[src].op1; + ctx->insns_count--; + } else { + ctx->control = ir_emit1(ctx, IR_BEGIN, src); + } +} + +ir_ref _ir_IF(ir_ctx *ctx, ir_ref condition) +{ + ir_ref if_ref; + + IR_ASSERT(ctx->control); + if_ref = ir_emit2(ctx, IR_IF, ctx->control, condition); + ctx->control = IR_UNUSED; + return if_ref; +} + +void _ir_IF_TRUE(ir_ctx *ctx, ir_ref if_ref) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(if_ref); + IR_ASSERT(ctx->ir_base[if_ref].op == IR_IF); + ctx->control = ir_emit1(ctx, IR_IF_TRUE, if_ref); +} + +void _ir_IF_TRUE_cold(ir_ctx *ctx, ir_ref if_ref) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(if_ref); + IR_ASSERT(ctx->ir_base[if_ref].op == IR_IF); + /* op2 is used as an indicator of low-probability branch */ + ctx->control = ir_emit2(ctx, IR_IF_TRUE, if_ref, 1); +} + +void _ir_IF_FALSE(ir_ctx *ctx, ir_ref if_ref) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(if_ref); + IR_ASSERT(ctx->ir_base[if_ref].op == IR_IF); + ctx->control = ir_emit1(ctx, IR_IF_FALSE, if_ref); +} + +void _ir_IF_FALSE_cold(ir_ctx *ctx, ir_ref if_ref) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(if_ref); + IR_ASSERT(ctx->ir_base[if_ref].op == IR_IF); + /* op2 is used as an indicator of low-probability branch */ + ctx->control = ir_emit2(ctx, IR_IF_FALSE, if_ref, 1); +} + +ir_ref _ir_END(ir_ctx *ctx) +{ + ir_ref ref; + + IR_ASSERT(ctx->control); + ref = ir_emit1(ctx, IR_END, ctx->control); + ctx->control = IR_UNUSED; + return ref; +} + +void _ir_MERGE_2(ir_ctx *ctx, ir_ref src1, ir_ref src2) +{ + IR_ASSERT(!ctx->control); + ctx->control = ir_emit2(ctx, IR_OPTX(IR_MERGE, IR_VOID, 2), src1, src2); +} + +void _ir_MERGE_N(ir_ctx *ctx, ir_ref n, ir_ref *inputs) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(n > 0); + if (n == 1) { + _ir_BEGIN(ctx, inputs[0]); + } else { + ir_ref *ops; + + ctx->control = ir_emit_N(ctx, IR_MERGE, n); + ops = ctx->ir_base[ctx->control].ops; + while (n) { + n--; + ops[n + 1] = inputs[n]; + } + } +} + +void _ir_MERGE_SET_OP(ir_ctx *ctx, ir_ref merge, ir_ref pos, ir_ref src) +{ + ir_insn *insn = &ctx->ir_base[merge]; + ir_ref *ops = insn->ops; + + IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN); + IR_ASSERT(pos > 0 && pos <= insn->inputs_count); + ops[pos] = src; +} + +ir_ref _ir_END_LIST(ir_ctx *ctx, ir_ref list) +{ + ir_ref ref; + + IR_ASSERT(ctx->control); + IR_ASSERT(!list || ctx->ir_base[list].op == IR_END); + /* create a liked list of END nodes with the same destination through END.op2 */ + ref = ir_emit2(ctx, IR_END, ctx->control, list); + ctx->control = IR_UNUSED; + return ref; +} + +void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list) +{ + ir_ref ref = list; + + if (list != IR_UNUSED) { + uint32_t n = 0; + + IR_ASSERT(!ctx->control); + + /* count inputs count */ + do { + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->op == IR_END); + ref = insn->op2; + n++; + } while (ref != IR_UNUSED); + + + /* create MERGE node */ + IR_ASSERT(n > 0); + if (n == 1) { + ctx->ir_base[list].op2 = IR_UNUSED; + _ir_BEGIN(ctx, list); + } else { + ctx->control = ir_emit_N(ctx, IR_MERGE, n); + ref = list; + while (n) { + ir_insn *insn = &ctx->ir_base[ref]; + + ir_set_op(ctx, ctx->control, n, ref); + ref = insn->op2; + insn->op2 = IR_UNUSED; + n--; + } + } + } +} + +ir_ref _ir_LOOP_BEGIN(ir_ctx *ctx, ir_ref src1) +{ + IR_ASSERT(!ctx->control); + ctx->control = ir_emit2(ctx, IR_OPTX(IR_LOOP_BEGIN, IR_VOID, 2), src1, IR_UNUSED); + return ctx->control; +} + +ir_ref _ir_LOOP_END(ir_ctx *ctx) +{ + ir_ref ref; + + IR_ASSERT(ctx->control); + ref = ir_emit1(ctx, IR_LOOP_END, ctx->control); + ctx->control = IR_UNUSED; + return ref; +} + +ir_ref _ir_CALL(ir_ctx *ctx, ir_type type, ir_ref func) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPTX(IR_CALL, type, 2), ctx->control, func); +} + +ir_ref _ir_CALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit3(ctx, IR_OPTX(IR_CALL, type, 3), ctx->control, func, arg1); +} + +ir_ref _ir_CALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), 4); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ctx->control = call; + return call; +} + +ir_ref _ir_CALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), 5); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ctx->control = call; + return call; +} + +ir_ref _ir_CALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), 6); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ir_set_op(ctx, call, 6, arg4); + ctx->control = call; + return call; +} + +ir_ref _ir_CALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), 7); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ir_set_op(ctx, call, 6, arg4); + ir_set_op(ctx, call, 7, arg5); + ctx->control = call; + return call; +} + +ir_ref _ir_CALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args) +{ + ir_ref call; + uint32_t i; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), count + 2); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + for (i = 0; i < count; i++) { + ir_set_op(ctx, call, i + 3, args[i]); + } + ctx->control = call; + return call; +} + +void _ir_UNREACHABLE(ir_ctx *ctx) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_UNREACHABLE, ctx->control, IR_UNUSED, ctx->ir_base[1].op1); + ctx->ir_base[1].op1 = ctx->control; + ctx->control = IR_UNUSED; +} + +void _ir_TAILCALL(ir_ctx *ctx, ir_type type, ir_ref func) +{ + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + ctx->control = ir_emit2(ctx, IR_OPTX(IR_TAILCALL, type, 2), ctx->control, func); + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1) +{ + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + ctx->control = ir_emit3(ctx, IR_OPTX(IR_TAILCALL, type, 3), ctx->control, func, arg1); + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), 4); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), 5); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), 6); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ir_set_op(ctx, call, 6, arg4); + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), 7); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ir_set_op(ctx, call, 6, arg4); + ir_set_op(ctx, call, 7, arg5); + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args) +{ + ir_ref call; + uint32_t i; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), count + 2); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + for (i = 0; i < count; i++) { + ir_set_op(ctx, call, i + 3, args[i]); + } + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +ir_ref _ir_SWITCH(ir_ctx *ctx, ir_ref val) +{ + ir_ref ref; + + IR_ASSERT(ctx->control); + ref = ir_emit2(ctx, IR_SWITCH, ctx->control, val); + ctx->control = IR_UNUSED; + return ref; +} + +void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val) +{ + IR_ASSERT(!ctx->control); + ctx->control = ir_emit2(ctx, IR_CASE_VAL, switch_ref, val); +} + +void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref) +{ + IR_ASSERT(!ctx->control); + ctx->control = ir_emit1(ctx, IR_CASE_DEFAULT, switch_ref); +} + +void _ir_RETURN(ir_ctx *ctx, ir_ref val) +{ + ir_type type = (val != IR_UNUSED) ? ctx->ir_base[val].type : IR_VOID; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + ctx->control = ir_emit3(ctx, IR_RETURN, ctx->control, val, ctx->ir_base[1].op1); + ctx->ir_base[1].op1 = ctx->control; + ctx->control = IR_UNUSED; +} + +void _ir_IJMP(ir_ctx *ctx, ir_ref addr) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_IJMP, ctx->control, addr, ctx->ir_base[1].op1); + ctx->ir_base[1].op1 = ctx->control; + ctx->control = IR_UNUSED; +} + +ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset) +{ + if (offset) { + addr = ir_fold2(ctx, IR_OPT(IR_ADD, IR_ADDR), addr, ir_const_addr(ctx, offset)); + } + return addr; +} + +void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr) +{ + IR_ASSERT(ctx->control); + if (condition == IR_TRUE) { + return; + } else { + ir_ref ref = ctx->control; + ir_insn *insn; + + while (ref > condition) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_GUARD) { + if (insn->op2 == condition) { + return; + } + } else if (insn->op == IR_GUARD_NOT) { + if (insn->op2 == condition) { + condition = IR_FALSE; + break; + } + } else if (insn->op >= IR_START) { + break; + } + ref = insn->op1; + } + } + if (ctx->snapshot_create) { + ctx->snapshot_create(ctx, addr); + } + ctx->control = ir_emit3(ctx, IR_GUARD, ctx->control, condition, addr); +} + +void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr) +{ + IR_ASSERT(ctx->control); + if (condition == IR_FALSE) { + return; + } else { + ir_ref ref = ctx->control; + ir_insn *insn; + + while (ref > condition) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_GUARD_NOT) { + if (insn->op2 == condition) { + return; + } + } else if (insn->op == IR_GUARD) { + if (insn->op2 == condition) { + condition = IR_TRUE; + break; + } + } else if (insn->op >= IR_START) { + break; + } + ref = insn->op1; + } + } + if (ctx->snapshot_create) { + ctx->snapshot_create(ctx, addr); + } + ctx->control = ir_emit3(ctx, IR_GUARD_NOT, ctx->control, condition, addr); +} + +ir_ref _ir_SNAPSHOT(ir_ctx *ctx, ir_ref n) +{ + ir_ref snapshot; + + IR_ASSERT(ctx->control); + snapshot = ir_emit_N(ctx, IR_SNAPSHOT, 1 + n); /* op1 is used for control */ + ctx->ir_base[snapshot].op1 = ctx->control; + ctx->control = snapshot; + return snapshot; +} + +void _ir_SNAPSHOT_SET_OP(ir_ctx *ctx, ir_ref snapshot, ir_ref pos, ir_ref val) +{ + ir_insn *insn = &ctx->ir_base[snapshot]; + ir_ref *ops = insn->ops; + + IR_ASSERT(val < snapshot); + IR_ASSERT(insn->op == IR_SNAPSHOT); + pos++; /* op1 is used for control */ + IR_ASSERT(pos > 1 && pos <= insn->inputs_count); + ops[pos] = val; +} + +ir_ref _ir_EXITCALL(ir_ctx *ctx, ir_ref func) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_EXITCALL, IR_I32), ctx->control, func); +} + +ir_ref _ir_ALLOCA(ir_ctx *ctx, ir_ref size) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_ALLOCA, IR_ADDR), ctx->control, size); +} + +void _ir_AFREE(ir_ctx *ctx, ir_ref size) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit2(ctx, IR_AFREE, ctx->control, size); +} + +ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_VLOAD, type), ctx->control, var); +} + +void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_VSTORE, ctx->control, var, val); +} + +ir_ref _ir_TLS(ir_ctx *ctx, ir_ref index, ir_ref offset) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit3(ctx, IR_OPT(IR_TLS, IR_ADDR), ctx->control, index, offset); +} + +ir_ref _ir_RLOAD(ir_ctx *ctx, ir_type type, ir_ref reg) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_RLOAD, type), ctx->control, reg); +} + +void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_RSTORE, ctx->control, val, reg); +} + +ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr) +{ + ir_ref ref = ir_find_aliasing_load(ctx, ctx->control, type, addr); + + IR_ASSERT(ctx->control); + if (!ref) { + ctx->control = ref = ir_emit2(ctx, IR_OPT(IR_LOAD, type), ctx->control, addr); + } + return ref; +} + +void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val) +{ + ir_ref limit = (addr > 0) ? addr : 1; + ir_ref ref = ctx->control; + ir_ref prev = IR_UNUSED; + ir_insn *insn; + ir_type type = ctx->ir_base[val].type; + ir_type type2; + bool guarded = 0; + + IR_ASSERT(ctx->control); + while (ref > limit) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_STORE) { + if (insn->op2 == addr) { + if (ctx->ir_base[insn->op3].type == type) { + if (insn->op3 == val) { + return; + } else { + if (!guarded) { + if (prev) { + ctx->ir_base[prev].op1 = insn->op1; + } else { + ctx->control = insn->op1; + } + insn->optx = IR_NOP; + insn->op1 = IR_NOP; + insn->op2 = IR_NOP; + insn->op3 = IR_NOP; + } + break; + } + } else { + break; + } + } else { + type2 = ctx->ir_base[insn->op3].type; + goto check_aliasing; + } + } else if (insn->op == IR_LOAD) { + if (insn->op2 == addr) { + break; + } + type2 = insn->type; +check_aliasing: + if (ir_check_partial_aliasing(ctx, addr, insn->op2, type, type2) != IR_NO_ALIAS) { + break; + } + } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { + guarded = 1; + } else if (insn->op >= IR_START || insn->op == IR_CALL) { + break; + } + prev = ref; + ref = insn->op1; + } + ctx->control = ir_emit3(ctx, IR_STORE, ctx->control, addr, val); +} diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h new file mode 100644 index 00000000000..18ac9e7a33f --- /dev/null +++ b/ext/opcache/jit/ir/ir.h @@ -0,0 +1,924 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Public API) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_H +#define IR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include + +#define IR_VERSION "0.0.1" + +#ifdef _WIN32 +/* TODO Handle ARM, too. */ +# if defined(_M_X64) +# define __SIZEOF_SIZE_T__ 8 +# elif defined(_M_IX86) +# define __SIZEOF_SIZE_T__ 4 +# endif +/* Only supported is little endian for any arch on Windows, + so just fake the same for all. */ +# define __ORDER_LITTLE_ENDIAN__ 1 +# define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ +# ifndef __has_builtin +# define __has_builtin(arg) (0) +# endif +#endif + +#if defined(IR_TARGET_X86) +# define IR_TARGET "x86" +#elif defined(IR_TARGET_X64) +# ifdef _WIN64 +# define IR_TARGET "Windows-x86_64" /* 64-bit Windows use different ABI and calling convention */ +# else +# define IR_TARGET "x86_64" +# endif +#elif defined(IR_TARGET_AARCH64) +# define IR_TARGET "aarch64" +#else +# error "Unknown IR target" +#endif + +#if defined(__SIZEOF_SIZE_T__) +# if __SIZEOF_SIZE_T__ == 8 +# define IR_64 1 +# elif __SIZEOF_SIZE_T__ != 4 +# error "Unknown addr size" +# endif +#else +# error "Unknown addr size" +#endif + +#if defined(__BYTE_ORDER__) +# if defined(__ORDER_LITTLE_ENDIAN__) +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define IR_STRUCT_LOHI(lo, hi) struct {lo; hi;} +# endif +# endif +# if defined(__ORDER_BIG_ENDIAN__) +# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define IR_STRUCT_LOHI(lo, hi) struct {hi; lo;} +# endif +# endif +#endif +#ifndef IR_STRUCT_LOHI +# error "Unknown byte order" +#endif + +#ifdef __has_attribute +# if __has_attribute(always_inline) +# define IR_ALWAYS_INLINE static inline __attribute__((always_inline)) +# endif +# if __has_attribute(noinline) +# define IR_NEVER_INLINE __attribute__((noinline)) +# endif +#else +# define __has_attribute(x) 0 +#endif + +#ifndef IR_ALWAYS_INLINE +# define IR_ALWAYS_INLINE static inline +#endif +#ifndef IR_NEVER_INLINE +# define IR_NEVER_INLINE +#endif + +#ifdef IR_PHP +# include "ir_php.h" +#endif + +/* IR Type flags (low 4 bits are used for type size) */ +#define IR_TYPE_SIGNED (1<<4) +#define IR_TYPE_UNSIGNED (1<<5) +#define IR_TYPE_FP (1<<6) +#define IR_TYPE_SPECIAL (1<<7) +#define IR_TYPE_BOOL (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED) +#define IR_TYPE_ADDR (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED) +#define IR_TYPE_CHAR (IR_TYPE_SPECIAL|IR_TYPE_SIGNED) + +/* List of IR types */ +#define IR_TYPES(_) \ + _(BOOL, bool, b, IR_TYPE_BOOL) \ + _(U8, uint8_t, u8, IR_TYPE_UNSIGNED) \ + _(U16, uint16_t, u16, IR_TYPE_UNSIGNED) \ + _(U32, uint32_t, u32, IR_TYPE_UNSIGNED) \ + _(U64, uint64_t, u64, IR_TYPE_UNSIGNED) \ + _(ADDR, uintptr_t, addr, IR_TYPE_ADDR) \ + _(CHAR, char, c, IR_TYPE_CHAR) \ + _(I8, int8_t, i8, IR_TYPE_SIGNED) \ + _(I16, int16_t, i16, IR_TYPE_SIGNED) \ + _(I32, int32_t, i32, IR_TYPE_SIGNED) \ + _(I64, int64_t, i64, IR_TYPE_SIGNED) \ + _(DOUBLE, double, d, IR_TYPE_FP) \ + _(FLOAT, float, f, IR_TYPE_FP) \ + +#define IR_IS_TYPE_UNSIGNED(t) ((t) < IR_CHAR) +#define IR_IS_TYPE_SIGNED(t) ((t) >= IR_CHAR && (t) < IR_DOUBLE) +#define IR_IS_TYPE_INT(t) ((t) < IR_DOUBLE) +#define IR_IS_TYPE_FP(t) ((t) >= IR_DOUBLE) + +#define IR_TYPE_ENUM(name, type, field, flags) IR_ ## name, + +typedef enum _ir_type { + IR_VOID, + IR_TYPES(IR_TYPE_ENUM) + IR_LAST_TYPE +} ir_type; + +/* List of IR opcodes + * ================== + * + * Each instruction is described by a type (opcode, flags, op1_type, op2_type, op3_type) + * + * flags + * ----- + * v - void + * d - data IR_OP_FLAG_DATA + * r - ref IR_OP_FLAG_DATA alias + * p - pinned IR_OP_FLAG_DATA + IR_OP_FLAG_PINNED + * c - control IR_OP_FLAG_CONTROL + * S - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_START + * E - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END + * T - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END + IR_OP_FLAG_TERMINATOR + * l - load IR_OP_FLAG_MEM + IR_OP_FLAG_MEM_LOAD + * s - store IR_OP_FLAG_MEM + IR_OP_FLAG_STORE + * x - call IR_OP_FLAG_MEM + IR_OP_FLAG_CALL + * a - alloc IR_OP_FLAG_MEM + IR_OP_FLAG_ALLOC + * 0-3 - number of input edges + * N - number of arguments is defined in the insn->inputs_count (MERGE, PHI, CALL) + * X1-X3 - number of extra data ops + * C - commutative operation ("d2C" => IR_OP_FLAG_DATA + IR_OP_FLAG_COMMUTATIVE) + * + * operand types + * ------------- + * ___ - unused + * def - reference to a definition op (data-flow use-def dependency edge) + * ref - memory reference (data-flow use-def dependency edge) + * var - variable reference (data-flow use-def dependency edge) + * arg - argument reference CALL/TAILCALL/CARG->CARG + * src - reference to a previous control region (IF, IF_TRUE, IF_FALSE, MERGE, LOOP_BEGIN, LOOP_END, RETURN) + * reg - data-control dependency on region (PHI, VAR, PARAM) + * ret - reference to a previous RETURN instruction (RETURN) + * str - string: variable/argument name (VAR, PARAM, CALL, TAILCALL) + * num - number: argument number (PARAM) + * prb - branch probability 1-99 (0 - unspecified): (IF_TRUE, IF_FALSE, CASE_VAL, CASE_DEFAULT) + * opt - optional number + * + * The order of IR opcodes is carefully selected for efficient folding. + * - foldable instruction go first + * - NOP is never used (code 0 is used as ANY pattern) + * - CONST is the most often used instruction (encode with 1 bit) + * - equality inversion: EQ <-> NE => op =^ 1 + * - comparison inversion: [U]LT <-> [U]GT, [U]LE <-> [U]GE => op =^ 3 + */ + +#define IR_OPS(_) \ + /* special op (must be the first !!!) */ \ + _(NOP, v, ___, ___, ___) /* empty instruction */ \ + \ + /* constants reference */ \ + _(C_BOOL, r0, ___, ___, ___) /* constant */ \ + _(C_U8, r0, ___, ___, ___) /* constant */ \ + _(C_U16, r0, ___, ___, ___) /* constant */ \ + _(C_U32, r0, ___, ___, ___) /* constant */ \ + _(C_U64, r0, ___, ___, ___) /* constant */ \ + _(C_ADDR, r0, ___, ___, ___) /* constant */ \ + _(C_CHAR, r0, ___, ___, ___) /* constant */ \ + _(C_I8, r0, ___, ___, ___) /* constant */ \ + _(C_I16, r0, ___, ___, ___) /* constant */ \ + _(C_I32, r0, ___, ___, ___) /* constant */ \ + _(C_I64, r0, ___, ___, ___) /* constant */ \ + _(C_DOUBLE, r0, ___, ___, ___) /* constant */ \ + _(C_FLOAT, r0, ___, ___, ___) /* constant */ \ + \ + /* equality ops */ \ + _(EQ, d2C, def, def, ___) /* equal */ \ + _(NE, d2C, def, def, ___) /* not equal */ \ + \ + /* comparison ops (order matters, LT must be a modulo of 4 !!!) */ \ + _(LT, d2, def, def, ___) /* less */ \ + _(GE, d2, def, def, ___) /* greater or equal */ \ + _(LE, d2, def, def, ___) /* less or equal */ \ + _(GT, d2, def, def, ___) /* greater */ \ + _(ULT, d2, def, def, ___) /* unsigned less */ \ + _(UGE, d2, def, def, ___) /* unsigned greater or equal */ \ + _(ULE, d2, def, def, ___) /* unsigned less or equal */ \ + _(UGT, d2, def, def, ___) /* unsigned greater */ \ + \ + /* arithmetic ops */ \ + _(ADD, d2C, def, def, ___) /* addition */ \ + _(SUB, d2, def, def, ___) /* subtraction (must be ADD+1) */ \ + _(MUL, d2C, def, def, ___) /* multiplication */ \ + _(DIV, d2, def, def, ___) /* division */ \ + _(MOD, d2, def, def, ___) /* modulo */ \ + _(NEG, d1, def, ___, ___) /* change sign */ \ + _(ABS, d1, def, ___, ___) /* absolute value */ \ + /* (LDEXP, MIN, MAX, FPMATH) */ \ + \ + /* type conversion ops */ \ + _(SEXT, d1, def, ___, ___) /* sign extension */ \ + _(ZEXT, d1, def, ___, ___) /* zero extension */ \ + _(TRUNC, d1, def, ___, ___) /* truncates to int type */ \ + _(BITCAST, d1, def, ___, ___) /* binary representation */ \ + _(INT2FP, d1, def, ___, ___) /* int to float conversion */ \ + _(FP2INT, d1, def, ___, ___) /* float to int conversion */ \ + _(FP2FP, d1, def, ___, ___) /* float to float conversion */ \ + \ + /* overflow-check */ \ + _(ADD_OV, d2C, def, def, ___) /* addition */ \ + _(SUB_OV, d2, def, def, ___) /* subtraction */ \ + _(MUL_OV, d2C, def, def, ___) /* multiplication */ \ + _(OVERFLOW, d1, def, ___, ___) /* overflow check add/sub/mul */ \ + \ + /* bitwise and shift ops */ \ + _(NOT, d1, def, ___, ___) /* bitwise NOT */ \ + _(OR, d2C, def, def, ___) /* bitwise OR */ \ + _(AND, d2C, def, def, ___) /* bitwise AND */ \ + _(XOR, d2C, def, def, ___) /* bitwise XOR */ \ + _(SHL, d2, def, def, ___) /* logic shift left */ \ + _(SHR, d2, def, def, ___) /* logic shift right */ \ + _(SAR, d2, def, def, ___) /* arithmetic shift right */ \ + _(ROL, d2, def, def, ___) /* rotate left */ \ + _(ROR, d2, def, def, ___) /* rotate right */ \ + _(BSWAP, d1, def, ___, ___) /* byte swap */ \ + \ + /* branch-less conditional ops */ \ + _(MIN, d2C, def, def, ___) /* min(op1, op2) */ \ + _(MAX, d2C, def, def, ___) /* max(op1, op2) */ \ + _(COND, d3, def, def, def) /* op1 ? op2 : op3 */ \ + \ + /* data-flow and miscellaneous ops */ \ + _(PHI, pN, reg, def, def) /* SSA Phi function */ \ + _(COPY, d1X1, def, opt, ___) /* COPY (last foldable op) */ \ + _(PI, p2, reg, def, ___) /* e-SSA Pi constraint ??? */ \ + /* (USE, RENAME) */ \ + \ + /* data ops */ \ + _(PARAM, p1X2, reg, str, num) /* incoming parameter proj. */ \ + _(VAR, p1X1, reg, str, ___) /* local variable */ \ + _(FUNC_ADDR, r0, ___, ___, ___) /* constant func ref */ \ + _(FUNC, r0, ___, ___, ___) /* constant func ref */ \ + _(SYM, r0, ___, ___, ___) /* constant symbol ref */ \ + _(STR, r0, ___, ___, ___) /* constant str ref */ \ + \ + /* call ops */ \ + _(CALL, xN, src, def, def) /* CALL(src, func, args...) */ \ + _(TAILCALL, xN, src, def, def) /* CALL+RETURN */ \ + \ + /* memory reference and load/store ops */ \ + _(ALLOCA, a2, src, def, ___) /* alloca(def) */ \ + _(AFREE, a2, src, def, ___) /* revert alloca(def) */ \ + _(VADDR, d1, var, ___, ___) /* load address of local var */ \ + _(VLOAD, l2, src, var, ___) /* load value of local var */ \ + _(VSTORE, s3, src, var, def) /* store value to local var */ \ + _(RLOAD, l1X2, src, num, opt) /* load value from register */ \ + _(RSTORE, s2X1, src, def, num) /* store value into register */ \ + _(LOAD, l2, src, ref, ___) /* load from memory */ \ + _(STORE, s3, src, ref, def) /* store to memory */ \ + _(TLS, l1X2, src, num, num) /* thread local variable */ \ + _(TRAP, x1, src, ___, ___) /* DebugBreak */ \ + /* memory reference ops (A, H, U, S, TMP, STR, NEW, X, V) ??? */ \ + \ + /* guards */ \ + _(GUARD, c3, src, def, def) /* IF without second successor */ \ + _(GUARD_NOT , c3, src, def, def) /* IF without second successor */ \ + \ + /* deoptimization */ \ + _(SNAPSHOT, xN, src, def, def) /* SNAPSHOT(src, args...) */ \ + \ + /* control-flow nodes */ \ + _(START, S0X1, ret, ___, ___) /* function start */ \ + _(ENTRY, S1X1, src, num, ___) /* entry with a fake src edge */ \ + _(BEGIN, S1, src, ___, ___) /* block start */ \ + _(IF_TRUE, S1X1, src, prb, ___) /* IF TRUE proj. */ \ + _(IF_FALSE, S1X1, src, prb, ___) /* IF FALSE proj. */ \ + _(CASE_VAL, S2X1, src, def, prb) /* switch proj. */ \ + _(CASE_DEFAULT, S1X1, src, prb, ___) /* switch proj. */ \ + _(MERGE, SN, src, src, src) /* control merge */ \ + _(LOOP_BEGIN, SN, src, src, src) /* loop start */ \ + _(END, E1, src, ___, ___) /* block end */ \ + _(LOOP_END, E1, src, ___, ___) /* loop end */ \ + _(IF, E2, src, def, ___) /* conditional control split */ \ + _(SWITCH, E2, src, def, ___) /* multi-way control split */ \ + _(RETURN, T2X1, src, def, ret) /* function return */ \ + _(IJMP, T2X1, src, def, ret) /* computed goto */ \ + _(UNREACHABLE, T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \ + \ + /* deoptimization helper */ \ + _(EXITCALL, x2, src, def, ___) /* save CPU regs and call op2 */ \ + + +#define IR_OP_ENUM(name, flags, op1, op2, op3) IR_ ## name, + +typedef enum _ir_op { + IR_OPS(IR_OP_ENUM) +#ifdef IR_PHP + IR_PHP_OPS(IR_OP_ENUM) +#endif + IR_LAST_OP +} ir_op; + +/* IR Opcode and Type Union */ +#define IR_OPT_OP_MASK 0x00ff +#define IR_OPT_TYPE_MASK 0xff00 +#define IR_OPT_TYPE_SHIFT 8 +#define IR_OPT_INPUTS_SHIFT 16 + +#define IR_OPT(op, type) ((uint16_t)(op) | ((uint16_t)(type) << IR_OPT_TYPE_SHIFT)) +#define IR_OPTX(op, type, n) ((uint32_t)(op) | ((uint32_t)(type) << IR_OPT_TYPE_SHIFT) | ((uint32_t)(n) << IR_OPT_INPUTS_SHIFT)) +#define IR_OPT_TYPE(opt) (((opt) & IR_OPT_TYPE_MASK) >> IR_OPT_TYPE_SHIFT) + +/* IR References */ +typedef int32_t ir_ref; + +#define IR_IS_CONST_REF(ref) ((ref) < 0) + +/* IR Constant Value */ +#define IR_UNUSED 0 +#define IR_NULL (-1) +#define IR_FALSE (-2) +#define IR_TRUE (-3) +#define IR_LAST_FOLDABLE_OP IR_COPY + +#define IR_CONSTS_LIMIT_MIN (-(IR_TRUE - 1)) +#define IR_INSNS_LIMIT_MIN (IR_UNUSED + 1) + + +#ifndef IR_64 +# define ADDR_MEMBER uintptr_t addr; +#else +# define ADDR_MEMBER +#endif +typedef union _ir_val { + double d; + uint64_t u64; + int64_t i64; +#ifdef IR_64 + uintptr_t addr; +#endif + IR_STRUCT_LOHI( + union { + uint32_t u32; + int32_t i32; + float f; + ADDR_MEMBER + IR_STRUCT_LOHI( + union { + uint16_t u16; + int16_t i16; + IR_STRUCT_LOHI( + union { + uint8_t u8; + int8_t i8; + bool b; + char c; + }, + uint8_t u8_hi + ); + }, + uint16_t u16_hi + ); + }, + uint32_t u32_hi + ); +} ir_val; +#undef ADDR_MEMBER + +/* IR constant flags */ +#define IR_CONST_EMIT (1<<0) +#define IR_CONST_FASTCALL_FUNC (1<<1) +#define IR_CONST_VARARG_FUNC (1<<2) + +/* IR Instruction */ +typedef struct _ir_insn { + IR_STRUCT_LOHI( + union { + IR_STRUCT_LOHI( + union { + IR_STRUCT_LOHI( + uint8_t op, + uint8_t type + ); + uint16_t opt; + }, + union { + uint16_t inputs_count; /* number of input control edges for MERGE, PHI, CALL, TAILCALL */ + uint16_t prev_insn_offset; /* 16-bit backward offset from current instruction for CSE */ + uint16_t const_flags; /* flag to emit constant in rodat section */ + } + ); + uint32_t optx; + ir_ref ops[1]; + }, + union { + ir_ref op1; + ir_ref prev_const; + } + ); + union { + IR_STRUCT_LOHI( + ir_ref op2, + ir_ref op3 + ); + ir_val val; + }; +} ir_insn; + +/* IR Hash Tables API (private) */ +typedef struct _ir_hashtab ir_hashtab; + +/* IR String Tables API (implementation in ir_strtab.c) */ +typedef struct _ir_strtab { + void *data; + uint32_t mask; + uint32_t size; + uint32_t count; + uint32_t pos; + char *buf; + uint32_t buf_size; + uint32_t buf_top; +} ir_strtab; + +#define ir_strtab_count(strtab) (strtab)->count + +typedef void (*ir_strtab_apply_t)(const char *str, uint32_t len, ir_ref val); + +void ir_strtab_init(ir_strtab *strtab, uint32_t count, uint32_t buf_size); +ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val); +ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len); +ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val); +const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx); +void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func); +void ir_strtab_free(ir_strtab *strtab); + +/* IR Context Flags */ +#define IR_FUNCTION (1<<0) /* Generate a function. */ +#define IR_FASTCALL_FUNC (1<<1) /* Generate a function with fastcall calling convention, x86 32-bit only. */ +#define IR_VARARG_FUNC (1<<2) +#define IR_STATIC (1<<3) +#define IR_EXTERN (1<<4) +#define IR_CONST (1<<5) + +#define IR_SKIP_PROLOGUE (1<<6) /* Don't generate function prologue. */ +#define IR_USE_FRAME_POINTER (1<<7) +#define IR_PREALLOCATED_STACK (1<<8) +#define IR_HAS_ALLOCA (1<<9) +#define IR_HAS_CALLS (1<<10) +#define IR_NO_STACK_COMBINE (1<<11) +#define IR_START_BR_TARGET (1<<12) +#define IR_ENTRY_BR_TARGET (1<<13) +#define IR_GEN_ENDBR (1<<14) +#define IR_MERGE_EMPTY_ENTRIES (1<<15) + +#define IR_CFG_HAS_LOOPS (1<<16) +#define IR_IRREDUCIBLE_CFG (1<<17) + +#define IR_OPT_FOLDING (1<<18) +#define IR_OPT_CFG (1<<19) /* merge BBs, by remove END->BEGIN nodes during CFG construction */ +#define IR_OPT_CODEGEN (1<<20) +#define IR_OPT_IN_SCCP (1<<21) +#define IR_LINEAR (1<<22) +#define IR_GEN_NATIVE (1<<23) +#define IR_GEN_CODE (1<<24) /* C or LLVM */ + +/* Temporary: SCCP -> CFG */ +#define IR_SCCP_DONE (1<<25) + +/* Temporary: Dominators -> Loops */ +#define IR_NO_LOOPS (1<<25) + +/* Temporary: Live Ranges */ +#define IR_LR_HAVE_DESSA_MOVES (1<<25) + +/* Temporary: Register Allocator */ +#define IR_RA_HAVE_SPLITS (1<<25) +#define IR_RA_HAVE_SPILLS (1<<26) + +/* debug related */ +#ifdef IR_DEBUG +# define IR_DEBUG_SCCP (1<<27) +# define IR_DEBUG_GCM (1<<28) +# define IR_DEBUG_SCHEDULE (1<<29) +# define IR_DEBUG_RA (1<<30) +#endif + +typedef struct _ir_ctx ir_ctx; +typedef struct _ir_use_list ir_use_list; +typedef struct _ir_block ir_block; +typedef struct _ir_arena ir_arena; +typedef struct _ir_live_interval ir_live_interval; +typedef struct _ir_live_range ir_live_range; +typedef struct _ir_loader ir_loader; +typedef int8_t ir_regs[4]; + +typedef void (*ir_snapshot_create_t)(ir_ctx *ctx, ir_ref addr); + +#if defined(IR_TARGET_AARCH64) +typedef const void *(*ir_get_exit_addr_t)(uint32_t exit_num); +typedef const void *(*ir_get_veneer_t)(ir_ctx *ctx, const void *addr); +typedef bool (*ir_set_veneer_t)(ir_ctx *ctx, const void *addr, const void *veneer); +#endif + +struct _ir_ctx { + ir_insn *ir_base; /* two directional array - instructions grow down, constants grow up */ + ir_ref insns_count; /* number of instructions stored in instructions buffer */ + ir_ref insns_limit; /* size of allocated instructions buffer (it's extended when overflow) */ + ir_ref consts_count; /* number of constants stored in constants buffer */ + ir_ref consts_limit; /* size of allocated constants buffer (it's extended when overflow) */ + uint32_t flags; /* IR context flags (see IR_* defines above) */ + ir_type ret_type; /* Function return type */ + uint32_t mflags; /* CPU specific flags (see IR_X86_... macros below) */ + int32_t status; /* non-zero error code (see IR_ERROR_... macros), app may use negative codes */ + ir_ref fold_cse_limit; /* CSE finds identical insns backward from "insn_count" to "fold_cse_limit" */ + ir_insn fold_insn; /* temporary storage for folding engine */ + ir_hashtab *binding; + ir_use_list *use_lists; /* def->use lists for each instruction */ + ir_ref *use_edges; /* the actual uses: use = ctx->use_edges[ctx->use_lists[def].refs + n] */ + ir_ref use_edges_count; /* number of elements in use_edges[] array */ + uint32_t cfg_blocks_count; /* number of elements in cfg_blocks[] array */ + uint32_t cfg_edges_count; /* number of elements in cfg_edges[] array */ + ir_block *cfg_blocks; /* list of basic blocks (starts from 1) */ + uint32_t *cfg_edges; /* the actual basic blocks predecessors and successors edges */ + uint32_t *cfg_map; /* map of instructions to basic block number */ + uint32_t *rules; /* array of target specific code-generation rules (for each instruction) */ + uint32_t *vregs; + ir_ref vregs_count; + int32_t spill_base; /* base register for special spill area (e.g. PHP VM frame pointer) */ + uint64_t fixed_regset; /* fixed registers, excluded for regular register allocation */ + int32_t fixed_stack_red_zone; /* reusable stack allocated by caller (default 0) */ + int32_t fixed_stack_frame_size; /* fixed stack allocated by generated code for spills and registers save/restore */ + int32_t fixed_call_stack_size; /* fixed preallocated stack for parameter passing (default 0) */ + uint64_t fixed_save_regset; /* registers that always saved/restored in prologue/epilogue */ + ir_live_interval **live_intervals; + ir_arena *arena; + ir_live_range *unused_ranges; + ir_regs *regs; + ir_ref *prev_ref; + union { + void *data; + ir_ref control; /* used by IR construction API (see ir_builder.h) */ + ir_ref bb_start; /* used by target CPU instruction matcher */ + ir_ref vars; /* list of VARs (used by register allocator) */ + }; + ir_snapshot_create_t snapshot_create; + int32_t stack_frame_alignment; + int32_t stack_frame_size; /* spill stack frame size (used by register allocator and code generator) */ + int32_t call_stack_size; /* stack for parameter passing (used by register allocator and code generator) */ + uint64_t used_preserved_regs; +#ifdef IR_TARGET_X86 + int32_t param_stack_size; + int32_t ret_slot; +#endif + uint32_t rodata_offset; + uint32_t jmp_table_offset; + uint32_t entries_count; + uint32_t *entries; /* array of ENTRY blocks */ + void *osr_entry_loads; + void *code_buffer; + size_t code_buffer_size; +#if defined(IR_TARGET_AARCH64) + int32_t deoptimization_exits; + int32_t veneers_size; + uint32_t code_size; + ir_get_exit_addr_t get_exit_addr; + ir_get_veneer_t get_veneer; + ir_set_veneer_t set_veneer; +#endif + ir_loader *loader; + ir_strtab strtab; + ir_ref prev_insn_chain[IR_LAST_FOLDABLE_OP + 1]; + ir_ref prev_const_chain[IR_LAST_TYPE]; +}; + +/* Basic IR Construction API (implementation in ir.c) */ +void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limit); +void ir_free(ir_ctx *ctx); +void ir_truncate(ir_ctx *ctx); + +ir_ref ir_const(ir_ctx *ctx, ir_val val, uint8_t type); +ir_ref ir_const_i8(ir_ctx *ctx, int8_t c); +ir_ref ir_const_i16(ir_ctx *ctx, int16_t c); +ir_ref ir_const_i32(ir_ctx *ctx, int32_t c); +ir_ref ir_const_i64(ir_ctx *ctx, int64_t c); +ir_ref ir_const_u8(ir_ctx *ctx, uint8_t c); +ir_ref ir_const_u16(ir_ctx *ctx, uint16_t c); +ir_ref ir_const_u32(ir_ctx *ctx, uint32_t c); +ir_ref ir_const_u64(ir_ctx *ctx, uint64_t c); +ir_ref ir_const_bool(ir_ctx *ctx, bool c); +ir_ref ir_const_char(ir_ctx *ctx, char c); +ir_ref ir_const_float(ir_ctx *ctx, float c); +ir_ref ir_const_double(ir_ctx *ctx, double c); +ir_ref ir_const_addr(ir_ctx *ctx, uintptr_t c); +ir_ref ir_const_func_addr(ir_ctx *ctx, uintptr_t c, uint16_t flags); + +ir_ref ir_const_func(ir_ctx *ctx, ir_ref str, uint16_t flags); +ir_ref ir_const_sym(ir_ctx *ctx, ir_ref str); +ir_ref ir_const_str(ir_ctx *ctx, ir_ref str); + +ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t c); + +void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted); + +ir_ref ir_str(ir_ctx *ctx, const char *s); +ir_ref ir_strl(ir_ctx *ctx, const char *s, size_t len); +const char *ir_get_str(const ir_ctx *ctx, ir_ref idx); + +ir_ref ir_emit(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); + +ir_ref ir_emit0(ir_ctx *ctx, uint32_t opt); +ir_ref ir_emit1(ir_ctx *ctx, uint32_t opt, ir_ref op1); +ir_ref ir_emit2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2); +ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); + +ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count); +void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val); + +IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val) +{ + ctx->ir_base[ref].op1 = val; +} + +IR_ALWAYS_INLINE void ir_set_op2(ir_ctx *ctx, ir_ref ref, ir_ref val) +{ + ctx->ir_base[ref].op2 = val; +} + +IR_ALWAYS_INLINE void ir_set_op3(ir_ctx *ctx, ir_ref ref, ir_ref val) +{ + ctx->ir_base[ref].op3 = val; +} + +IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n) +{ + const ir_ref *p = insn->ops + n; + return *p; +} + +IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val) +{ + ir_ref *p = insn->ops + n; + *p = val; +} + +ir_ref ir_fold(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); + +ir_ref ir_fold0(ir_ctx *ctx, uint32_t opt); +ir_ref ir_fold1(ir_ctx *ctx, uint32_t opt, ir_ref op1); +ir_ref ir_fold2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2); +ir_ref ir_fold3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); + +ir_ref ir_param(ir_ctx *ctx, ir_type type, ir_ref region, const char *name, int pos); +ir_ref ir_var(ir_ctx *ctx, ir_type type, ir_ref region, const char *name); +ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def); + +/* Def -> Use lists */ +void ir_build_def_use_lists(ir_ctx *ctx); + +/* CFG - Control Flow Graph (implementation in ir_cfg.c) */ +int ir_build_cfg(ir_ctx *ctx); +int ir_remove_unreachable_blocks(ir_ctx *ctx); +int ir_build_dominators_tree(ir_ctx *ctx); +int ir_find_loops(ir_ctx *ctx); +int ir_schedule_blocks(ir_ctx *ctx); +void ir_build_prev_refs(ir_ctx *ctx); + +/* SCCP - Sparse Conditional Constant Propagation (implementation in ir_sccp.c) */ +int ir_sccp(ir_ctx *ctx); + +/* GCM - Global Code Motion and scheduling (implementation in ir_gcm.c) */ +int ir_gcm(ir_ctx *ctx); +int ir_schedule(ir_ctx *ctx); + +/* Liveness & Register Allocation (implementation in ir_ra.c) */ +#define IR_REG_NONE -1 +#define IR_REG_SPILL_LOAD (1<<6) +#define IR_REG_SPILL_STORE (1<<6) +#define IR_REG_SPILL_SPECIAL (1<<7) +#define IR_REG_SPILLED(r) \ + ((r) & (IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) +#define IR_REG_NUM(r) \ + ((int8_t)((r) == IR_REG_NONE ? IR_REG_NONE : ((r) & ~(IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)))) + +int ir_assign_virtual_registers(ir_ctx *ctx); +int ir_compute_live_ranges(ir_ctx *ctx); +int ir_coalesce(ir_ctx *ctx); +int ir_compute_dessa_moves(ir_ctx *ctx); +int ir_reg_alloc(ir_ctx *ctx); + +int ir_regs_number(void); +bool ir_reg_is_int(int32_t reg); +const char *ir_reg_name(int8_t reg, ir_type type); +int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref); + +/* Target CPU instruction selection and code generation (see ir_x86.c) */ +int ir_match(ir_ctx *ctx); +void *ir_emit_code(ir_ctx *ctx, size_t *size); + +/* Target address resolution (implementation in ir_emit.c) */ +void *ir_resolve_sym_name(const char *name); + +/* Target CPU disassembler (implementation in ir_disasm.c) */ +int ir_disasm_init(void); +void ir_disasm_free(void); +void ir_disasm_add_symbol(const char *name, uint64_t addr, uint64_t size); +const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset); +int ir_disasm(const char *name, + const void *start, + size_t size, + bool asm_addr, + ir_ctx *ctx, + FILE *f); + +/* Linux perf interface (implementation in ir_perf.c) */ +int ir_perf_jitdump_open(void); +int ir_perf_jitdump_close(void); +int ir_perf_jitdump_register(const char *name, const void *start, size_t size); +void ir_perf_map_register(const char *name, const void *start, size_t size); + +/* GDB JIT interface (implementation in ir_gdb.c) */ +int ir_gdb_register(const char *name, + const void *start, + size_t size, + uint32_t sp_offset, + uint32_t sp_adjustment); +void ir_gdb_unregister_all(void); +bool ir_gdb_present(void); + +/* IR load API (implementation in ir_load.c) */ +struct _ir_loader { + uint32_t default_func_flags; + bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target); + bool (*external_sym_dcl) (ir_loader *loader, const char *name, uint32_t flags); + bool (*external_func_dcl) (ir_loader *loader, const char *name, + uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types); + bool (*forward_func_dcl) (ir_loader *loader, const char *name, + uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types); + bool (*sym_dcl) (ir_loader *loader, const char *name, uint32_t flags, size_t size, bool has_data); + bool (*sym_data) (ir_loader *loader, ir_type type, uint32_t count, const void *data); + bool (*sym_data_end) (ir_loader *loader); + bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name); + bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name); + void*(*resolve_sym_name) (ir_loader *loader, const char *name); +}; + +void ir_loader_init(void); +void ir_loader_free(void); +int ir_load(ir_loader *loader, FILE *f); + +/* IR LLVM load API (implementation in ir_load_llvm.c) */ +int ir_load_llvm_bitcode(ir_loader *loader, const char *filename); +int ir_load_llvm_asm(ir_loader *loader, const char *filename); + +/* IR save API (implementation in ir_save.c) */ +void ir_save(const ir_ctx *ctx, FILE *f); + +/* IR debug dump API (implementation in ir_dump.c) */ +void ir_dump(const ir_ctx *ctx, FILE *f); +void ir_dump_dot(const ir_ctx *ctx, FILE *f); +void ir_dump_use_lists(const ir_ctx *ctx, FILE *f); +void ir_dump_cfg(ir_ctx *ctx, FILE *f); +void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f); +void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f); +void ir_dump_codegen(const ir_ctx *ctx, FILE *f); + +/* IR to C conversion (implementation in ir_emit_c.c) */ +int ir_emit_c(ir_ctx *ctx, const char *name, FILE *f); +void ir_emit_c_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types, FILE *f); + +/* IR to LLVM conversion (implementation in ir_emit_llvm.c) */ +int ir_emit_llvm(ir_ctx *ctx, const char *name, FILE *f); +void ir_emit_llvm_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types, FILE *f); + +/* IR verification API (implementation in ir_check.c) */ +bool ir_check(const ir_ctx *ctx); +void ir_consistency_check(void); + +/* Code patching (implementation in ir_patch.c) */ +int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void *from_addr, const void *to_addr); + +/* CPU information (implementation in ir_cpuinfo.c) */ +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# define IR_X86_SSE2 (1<<0) +# define IR_X86_SSE3 (1<<1) +# define IR_X86_SSSE3 (1<<2) +# define IR_X86_SSE41 (1<<3) +# define IR_X86_SSE42 (1<<4) +# define IR_X86_AVX (1<<5) +# define IR_X86_AVX2 (1<<6) +#endif + +uint32_t ir_cpuinfo(void); + +/* Deoptimization helpers */ +const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, void *code_buffer, size_t code_buffer_size, size_t *size_ptr); + +/* A reference IR JIT compiler */ +IR_ALWAYS_INLINE void *ir_jit_compile(ir_ctx *ctx, int opt_level, size_t *size) +{ + if (opt_level == 0) { + if (ctx->flags & IR_OPT_FOLDING) { + // IR_ASSERT(0 && "IR_OPT_FOLDING is incompatible with -O0"); + return NULL; + } + ctx->flags &= ~(IR_OPT_CFG | IR_OPT_CODEGEN); + + ir_build_def_use_lists(ctx); + + if (!ir_build_cfg(ctx) + || !ir_match(ctx) + || !ir_assign_virtual_registers(ctx) + || !ir_compute_dessa_moves(ctx)) { + return NULL; + } + + return ir_emit_code(ctx, size); + } else if (opt_level == 1 || opt_level == 2) { + if (!(ctx->flags & IR_OPT_FOLDING)) { + // IR_ASSERT(0 && "IR_OPT_FOLDING must be set in ir_init() for -O1 and -O2"); + return NULL; + } + ctx->flags |= IR_OPT_CFG | IR_OPT_CODEGEN; + + ir_build_def_use_lists(ctx); + + if (opt_level == 2 + && !ir_sccp(ctx)) { + return NULL; + } + + if (!ir_build_cfg(ctx) + || !ir_build_dominators_tree(ctx) + || !ir_find_loops(ctx) + || !ir_gcm(ctx) + || !ir_schedule(ctx) + || !ir_match(ctx) + || !ir_assign_virtual_registers(ctx) + || !ir_compute_live_ranges(ctx) + || !ir_coalesce(ctx) + || !ir_reg_alloc(ctx) + || !ir_schedule_blocks(ctx)) { + return NULL; + } + + return ir_emit_code(ctx, size); + } else { + // IR_ASSERT(0 && "wrong optimization level"); + return NULL; + } +} + +#define IR_ERROR_CODE_MEM_OVERFLOW 1 +#define IR_ERROR_FIXED_STACK_FRAME_OVERFLOW 2 +#define IR_ERROR_UNSUPPORTED_CODE_RULE 3 +#define IR_ERROR_LINK 4 +#define IR_ERROR_ENCODE 5 + +/* IR Memmory Allocation */ +#ifndef ir_mem_malloc +# define ir_mem_malloc malloc +#endif +#ifndef ir_mem_calloc +# define ir_mem_calloc calloc +#endif +#ifndef ir_mem_realloc +# define ir_mem_realloc realloc +#endif +#ifndef ir_mem_free +# define ir_mem_free free +#endif + +#ifndef ir_mem_pmalloc +# define ir_mem_pmalloc malloc +#endif +#ifndef ir_mem_pcalloc +# define ir_mem_pcalloc calloc +#endif +#ifndef ir_mem_prealloc +# define ir_mem_prealloc realloc +#endif +#ifndef ir_mem_pfree +# define ir_mem_pfree free +#endif + +void *ir_mem_mmap(size_t size); +int ir_mem_unmap(void *ptr, size_t size); +int ir_mem_protect(void *ptr, size_t size); +int ir_mem_unprotect(void *ptr, size_t size); +int ir_mem_flush(void *ptr, size_t size); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* IR_H */ diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc new file mode 100644 index 00000000000..c4752bf6d77 --- /dev/null +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -0,0 +1,5564 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Aarch64 native code generator based on DynAsm) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +|.arch arm64 + +|.actionlist dasm_actions +|.globals ir_lb +|.section code, cold_code, rodata, jmp_table + +#define IR_SPILL_POS_TO_OFFSET(offset) \ + ((ctx->flags & IR_USE_FRAME_POINTER) ? \ + ((offset) + (int32_t)sizeof(void*) * 2) : \ + ((offset) + ctx->call_stack_size)) + +#define B_IMM (1<<27) // signed imm26 * 4 +#define ADR_IMM (1<<20) // signed imm21 +#define ADRP_IMM (1LL<<32) // signed imm21 * 4096 + +static bool aarch64_may_use_b(ir_ctx *ctx, const void *addr) +{ + if (ctx->code_buffer) { + if (addr >= ctx->code_buffer && (char*)addr < (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (ctx->code_buffer_size < B_IMM); + } else if ((char*)addr >= (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (((char*)addr - (char*)ctx->code_buffer) < B_IMM); + } else if (addr < ctx->code_buffer) { + return (((char*)(ctx->code_buffer + ctx->code_buffer_size) - (char*)addr) < B_IMM); + } + } + return 1; //??? +} + +static bool aarch64_may_use_adr(ir_ctx *ctx, const void *addr) +{ + if (ctx->code_buffer) { + if (addr >= ctx->code_buffer && (char*)addr < (char*)ctx->code_buffer + ctx->code_buffer_size) { + return ( ctx->code_buffer_size < ADR_IMM); + } else if ((char*)addr >= (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (((char*)addr - (char*)ctx->code_buffer) < ADR_IMM); + } else if (addr < ctx->code_buffer) { + return (((char*)(ctx->code_buffer + ctx->code_buffer_size) - (char*)addr) < ADR_IMM); + } + } + return 0; +} + +static bool aarch64_may_use_adrp(ir_ctx *ctx, const void *addr) +{ + if (ctx->code_buffer) { + if (addr >= ctx->code_buffer && (char*)addr < (char*)ctx->code_buffer + ctx->code_buffer_size) { + return ( ctx->code_buffer_size < ADRP_IMM); + } else if ((char*)addr >= (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (((char*)addr - (char*)ctx->code_buffer) < ADRP_IMM); + } else if (addr < ctx->code_buffer) { + return (((char*)(ctx->code_buffer + ctx->code_buffer_size) - (char*)addr) < ADRP_IMM); + } + } + return 0; +} + +/* Determine whether "val" falls into two allowed ranges: + * Range 1: [0, 0xfff] + * Range 2: LSL #12 to Range 1 + * Used to guard the immediate encoding for add/adds/sub/subs/cmp/cmn instructions. */ +static bool aarch64_may_encode_imm12(const int64_t val) +{ + return (val >= 0 && (val <= 0xfff || !(val & 0xffffffffff000fff))); +} + +/* Determine whether an immediate value can be encoded as the immediate operand of logical instructions. */ +static bool aarch64_may_encode_logical_imm(uint64_t value, uint32_t type_size) +{ + /* fast path: power of two */ + if (value > 0 && !(value & (value - 1))) { + return 1; + } + + if (type_size == 8) { + if (dasm_imm13((uint32_t)value, (uint32_t)(value >> 32)) != -1) { + return 1; + } + } else { + if (dasm_imm13((uint32_t)value, (uint32_t)value) != -1) { + return 1; + } + } + + return 0; +} + +static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size) +{ + return (uintptr_t)(offset) % type_size == 0 && (uintptr_t)(offset) < 0xfff * type_size; +} + +|.macro ASM_REG_REG_OP, op, type, dst, src +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src) +|| } else { +| op Rw(dst), Rw(src) +|| } +|.endmacro + +|.macro ASM_REG_REG_REG_OP, op, type, dst, src1, src2 +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src1), Rx(src2) +|| } else { +| op Rw(dst), Rw(src1), Rw(src2) +|| } +|.endmacro + +|.macro ASM_REG_REG_REG_REG_OP, op, type, dst, src1, src2, src3 +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src1), Rx(src2), Rx(src3) +|| } else { +| op Rw(dst), Rw(src1), Rw(src2), Rw(src3); +|| } +|.endmacro + +|.macro ASM_REG_REG_IMM_OP, op, type, dst, src1, val +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src1), #val +|| } else { +| op Rw(dst), Rw(src1), #val +|| } +|.endmacro + +|.macro ASM_REG_IMM_OP, op, type, reg, val +|| if (ir_type_size[type] == 8) { +| op Rx(reg), #val +|| } else { +| op Rw(reg), #val +|| } +|.endmacro + +|.macro ASM_FP_REG_IMM_OP, op, type, reg, val +|| if (type == IR_DOUBLE) { +| op Rd(reg-IR_REG_FP_FIRST), #val +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| op Rs(reg-IR_REG_FP_FIRST), #val +|| } +|.endmacro + +|.macro ASM_FP_REG_REG_REG_OP, op, type, dst, src1, src2 +|| if (type == IR_DOUBLE) { +| op Rd(dst-IR_REG_FP_FIRST), Rd(src1-IR_REG_FP_FIRST), Rd(src2-IR_REG_FP_FIRST) +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| op Rs(dst-IR_REG_FP_FIRST), Rs(src1-IR_REG_FP_FIRST), Rs(src2-IR_REG_FP_FIRST) +|| } +|.endmacro + +typedef struct _ir_backend_data { + ir_reg_alloc_data ra_data; + uint32_t dessa_from_block; + dasm_State *dasm_state; + int rodata_label, jmp_table_label; +} ir_backend_data; + +#define IR_GP_REG_NAME(code, name64, name32) \ + #name64, +#define IR_GP_REG_NAME32(code, name64, name32) \ + #name32, +#define IR_FP_REG_NAME(code, name64, name32, name16, name8) \ + #name64, +#define IR_FP_REG_NAME32(code, name64, name32, name16, name8) \ + #name32, + +static const char *_ir_reg_name[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME) + IR_FP_REGS(IR_FP_REG_NAME) +}; + +static const char *_ir_reg_name32[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME32) + IR_FP_REGS(IR_FP_REG_NAME32) +}; + +/* Calling Convention */ +static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { + IR_REG_INT_ARG1, + IR_REG_INT_ARG2, + IR_REG_INT_ARG3, + IR_REG_INT_ARG4, + IR_REG_INT_ARG5, + IR_REG_INT_ARG6, + IR_REG_INT_ARG7, + IR_REG_INT_ARG8, +}; + +static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { + IR_REG_FP_ARG1, + IR_REG_FP_ARG2, + IR_REG_FP_ARG3, + IR_REG_FP_ARG4, + IR_REG_FP_ARG5, + IR_REG_FP_ARG6, + IR_REG_FP_ARG7, + IR_REG_FP_ARG8, +}; + +const char *ir_reg_name(int8_t reg, ir_type type) +{ + if (reg >= IR_REG_NUM) { + if (reg == IR_REG_SCRATCH) { + return "SCRATCH"; + } else { + IR_ASSERT(reg == IR_REG_ALL); + return "ALL"; + } + } + IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); + if (type == IR_VOID) { + type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; + } + if (ir_type_size[type] == 8) { + return _ir_reg_name[reg]; + } else { + return _ir_reg_name32[reg]; + } +} + +#define IR_RULES(_) \ + _(CMP_INT) \ + _(CMP_FP) \ + _(MUL_PWR2) \ + _(DIV_PWR2) \ + _(MOD_PWR2) \ + _(OP_INT) \ + _(OP_FP) \ + _(BINOP_INT) \ + _(BINOP_FP) \ + _(SHIFT) \ + _(SHIFT_CONST) \ + _(COPY_INT) \ + _(COPY_FP) \ + _(CMP_AND_BRANCH_INT) \ + _(CMP_AND_BRANCH_FP) \ + _(GUARD_CMP_INT) \ + _(GUARD_CMP_FP) \ + _(GUARD_OVERFLOW) \ + _(OVERFLOW_AND_BRANCH) \ + _(MIN_MAX_INT) \ + _(REG_BINOP_INT) \ + _(LOAD_INT) \ + _(LOAD_FP) \ + _(STORE_INT) \ + _(STORE_FP) \ + _(IF_INT) \ + _(RETURN_VOID) \ + _(RETURN_INT) \ + _(RETURN_FP) \ + +#define IR_RULE_ENUM(name) IR_ ## name, + +enum _ir_rule { + IR_FIRST_RULE = IR_LAST_OP, + IR_RULES(IR_RULE_ENUM) + IR_LAST_RULE +}; + +#define IR_RULE_NAME(name) #name, +const char *ir_rule_name[IR_LAST_OP] = { + NULL, + IR_RULES(IR_RULE_NAME) + NULL +}; + +/* register allocation */ +int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) +{ + uint32_t rule = ir_rule(ctx, ref); + const ir_insn *insn; + int n = 0; + int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + + constraints->def_reg = IR_REG_NONE; + constraints->hints_count = 0; + switch (rule & IR_RULE_MASK) { + case IR_BINOP_INT: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + switch (insn->op) { + case IR_ADD: + case IR_ADD_OV: + case IR_SUB: + case IR_SUB_OV: + if (!aarch64_may_encode_imm12(val_insn->val.u64)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_MUL_OV: + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n++; + break; + case IR_AND: + case IR_OR: + case IR_XOR: + if (!aarch64_may_encode_logical_imm(val_insn->val.u64, ir_type_size[insn->type])) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_MUL: + case IR_DIV: + case IR_MOD: + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + break; + } + } + if (insn->op == IR_MOD) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } else if (insn->op == IR_MUL_OV && (ir_type_size[insn->type] == 8 || IR_IS_TYPE_SIGNED(insn->type))) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n++; + } + break; + case IR_SEXT: + case IR_ZEXT: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + IR_FALLTHROUGH; + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + case IR_SHIFT: + case IR_SHIFT_CONST: + case IR_OP_INT: + case IR_OP_FP: + case IR_INT2FP: + case IR_FP2INT: + case IR_FP2FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (rule == IR_SHIFT && insn->op == IR_ROL) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_BINOP_FP: + case IR_MIN_MAX_INT: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_CMP_INT: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + insn = &ctx->ir_base[insn->op2]; + if (!aarch64_may_encode_imm12(insn->val.u64)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_CMP_FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op1]; + constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[n] = IR_TMP_REG(2, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_VSTORE: + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; + case IR_LOAD_FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op2)) { + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_STORE_INT: + case IR_STORE_FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op2)) { + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + if (!IR_IS_TYPE_INT(insn->type) || insn->val.i64 != 0) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_SWITCH: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op2)) { + insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } else { + insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + break; + case IR_CALL: + insn = &ctx->ir_base[ref]; + constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); + n = 1; + IR_FALLTHROUGH; + case IR_TAILCALL: + insn = &ctx->ir_base[ref]; + if (insn->inputs_count > 2) { + constraints->hints[2] = IR_REG_NONE; + constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); + if (!IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); + n++; + } + } + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_COPY_INT: + case IR_COPY_FP: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG; + break; + case IR_PARAM: + constraints->def_reg = ir_get_param_reg(ctx, ref); + flags = 0; + break; + case IR_PI: + case IR_PHI: + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_RLOAD: + constraints->def_reg = ctx->ir_base[ref].op2; + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_EXITCALL: + constraints->def_reg = IR_REG_INT_RET1; + break; + case IR_TRUNC: + case IR_BITCAST: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_RSTORE: + flags = IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_RETURN_INT: + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_INT_RET1; + constraints->hints_count = 3; + break; + case IR_RETURN_FP: + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_FP_RET1; + constraints->hints_count = 3; + break; + case IR_SNAPSHOT: + flags = 0; + break; + } + constraints->tmps_count = n; + + return flags; +} + +/* instruction selection */ +static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type) +{ + if (!IR_IS_CONST_REF(addr_ref)) { + ir_insn *addr_insn = &ctx->ir_base[addr_ref]; + + if (addr_insn->op == IR_ADD + && !IR_IS_CONST_REF(addr_insn->op1) + && IR_IS_CONST_REF(addr_insn->op2) // TODO: temporary workaround + && aarch64_may_encode_addr_offset(ctx->ir_base[addr_insn->op2].val.i64, ir_type_size[type])) { + ir_use_list *use_list = &ctx->use_lists[addr_ref]; + ir_ref j = use_list->count; + + if (j > 1) { + /* check if address is used only in LOAD and STORE */ + ir_ref *p = &ctx->use_edges[use_list->refs]; + + do { + ir_insn *insn = &ctx->ir_base[*p]; + if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { + return; + } + p++; + } while (--j); + } + ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | addr_insn->op; + } + } +} + +static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) +{ + ir_insn *op2_insn; + ir_insn *insn = &ctx->ir_base[ref]; + + switch (insn->op) { + case IR_EQ: + case IR_NE: + case IR_LT: + case IR_GE: + case IR_LE: + case IR_GT: + case IR_ULT: + case IR_UGE: + case IR_ULE: + case IR_UGT: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { + return IR_CMP_INT; + } else { + return IR_CMP_FP; + } + break; + case IR_ADD: + case IR_SUB: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } + } +binop_int: + return IR_BINOP_INT; + } else { +binop_fp: + return IR_BINOP_FP; + } + break; + case IR_MUL: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + // 0 + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + return IR_MUL_PWR2; + } + } + return IR_BINOP_INT; + } else { + goto binop_fp; + } + break; + case IR_ADD_OV: + case IR_SUB_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + goto binop_int; + case IR_MUL_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + goto binop_int; + case IR_DIV: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + return IR_DIV_PWR2; + } + } + return IR_BINOP_INT; + } else { + goto binop_fp; + } + break; + case IR_MOD: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (IR_IS_TYPE_UNSIGNED(insn->type) && IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + return IR_MOD_PWR2; + } + } + return IR_BINOP_INT; + case IR_BSWAP: + case IR_NOT: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + return IR_OP_INT; + case IR_NEG: + case IR_ABS: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_OP_INT; + } else { + return IR_OP_FP; + } + case IR_OR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } else if (op2_insn->val.i64 == -1) { + // -1 + } + } + goto binop_int; + case IR_AND: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + // 0 + } else if (op2_insn->val.i64 == -1) { + return IR_COPY_INT; + } + } + goto binop_int; + case IR_XOR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } + } + goto binop_int; + case IR_SHL: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } else if (ir_type_size[insn->type] >= 4) { + if (op2_insn->val.u64 == 1) { + // lea [op1*2] + } else if (op2_insn->val.u64 == 2) { + // lea [op1*4] + } else if (op2_insn->val.u64 == 3) { + // lea [op1*8] + } + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_SHR: + case IR_SAR: + case IR_ROL: + case IR_ROR: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_MIN: + case IR_MAX: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_MIN_MAX_INT; + } else { + goto binop_fp; + } + break; +// case IR_COND: + case IR_COPY: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_COPY_INT; + } else { + return IR_COPY_FP; + } + break; + case IR_CALL: + ctx->flags |= IR_HAS_CALLS; + return IR_CALL; + case IR_VAR: + return IR_SKIPPED | IR_VAR; + case IR_PARAM: + return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; + case IR_ALLOCA: + if (ctx->flags & IR_FUNCTION) { + ctx->flags |= IR_USE_FRAME_POINTER | IR_HAS_ALLOCA; + } + return IR_ALLOCA; + case IR_LOAD: + ir_match_fuse_addr(ctx, insn->op2, insn->type); + if (IR_IS_TYPE_INT(insn->type)) { + return IR_LOAD_INT; + } else { + return IR_LOAD_FP; + } + break; + case IR_STORE: + ir_match_fuse_addr(ctx, insn->op2, ctx->ir_base[insn->op3].type); + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { + return IR_STORE_INT; + } else { + return IR_STORE_FP; + } + break; + case IR_RLOAD: + if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { + return IR_SKIPPED | IR_RLOAD; + } + return IR_RLOAD; + case IR_RSTORE: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + + if (!ctx->rules[insn->op2]) { + ctx->rules[insn->op2] = ir_match_insn(ctx, insn->op2); + } + if (ctx->rules[insn->op2] == IR_BINOP_INT) { + if (ctx->ir_base[op_insn->op1].op == IR_RLOAD + && ctx->ir_base[op_insn->op1].op2 == insn->op3) { + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && ctx->ir_base[op_insn->op2].op == IR_RLOAD + && ctx->ir_base[op_insn->op2].op2 == insn->op3) { + ir_ref tmp = op_insn->op1; + op_insn->op1 = op_insn->op2; + op_insn->op2 = tmp; + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } + } + } + } + return IR_RSTORE; + case IR_START: + case IR_BEGIN: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_UNREACHABLE: + return IR_SKIPPED | insn->op; + case IR_RETURN: + if (!insn->op2) { + return IR_RETURN_VOID; + } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + return IR_RETURN_INT; + } else { + return IR_RETURN_FP; + } + case IR_IF: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_CMP_AND_BRANCH_INT; + } else { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_CMP_AND_BRANCH_FP; + } + } else if (op2_insn->op == IR_OVERFLOW) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_OVERFLOW_AND_BRANCH; + } + } + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + return IR_IF_INT; + } else { + IR_ASSERT(0 && "NIY IR_IF_FP"); + break; + } + case IR_GUARD: + case IR_GUARD_NOT: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT + // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP + && (insn->op2 == ref - 1 || + (insn->op2 == ctx->prev_ref[ref] - 1 + && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_GUARD_CMP_INT; + } else { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_GUARD_CMP_FP; + } + } else if (op2_insn->op == IR_OVERFLOW) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_GUARD_OVERFLOW; + } + } + return insn->op; + default: + break; + } + + return insn->op; +} + +static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) +{ +} + +/* code generation */ +static int32_t ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + int32_t offset; + + IR_ASSERT(ref >= 0); + offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; + IR_ASSERT(offset != -1); + if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + IR_ASSERT(ctx->spill_base != IR_REG_NONE); + *reg = ctx->spill_base; + return offset; + } + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(offset); +} + +static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg reg, int32_t offset) +{ + ir_reg fp; + + return ir_ref_spill_slot(ctx, ref, &fp) == offset && reg == fp; +} + +static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + ir_insn *var_insn = &ctx->ir_base[ref]; + + IR_ASSERT(var_insn->op == IR_VAR); + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(var_insn->op3); +} + +static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_live_interval *ival; + + IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + ival = ctx->live_intervals[ctx->vregs[ref]]; + while (ival) { + ir_use_pos *use_pos = ival->use_pos; + while (use_pos) { + if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { + return !use_pos->next || use_pos->next->op_num == 0; + } + use_pos = use_pos->next; + } + ival = ival->next; + } + return 0; +} + +static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (ir_type_size[type] == 8) { + if (val == 0) { + if (reg != IR_REG_ZR) { + | mov Rx(reg), xzr + } + } else if (((uint64_t)(val)) <= 0xffff) { + | movz Rx(reg), #((uint64_t)(val)) + } else if (~((uint64_t)(val)) <= 0xffff) { + | movn Rx(reg), #(~((uint64_t)(val))) + } else if ((uint64_t)(val) & 0xffff) { + | movz Rx(reg), #((uint64_t)(val) & 0xffff) + if (((uint64_t)(val) >> 16) & 0xffff) { + | movk Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + } + if (((uint64_t)(val) >> 32) & 0xffff) { + | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 + } + if ((((uint64_t)(val) >> 48) & 0xffff)) { + | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else if (((uint64_t)(val) >> 16) & 0xffff) { + | movz Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + if (((uint64_t)(val) >> 32) & 0xffff) { + | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 + } + if ((((uint64_t)(val) >> 48) & 0xffff)) { + | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else if (((uint64_t)(val) >> 32) & 0xffff) { + | movz Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 + if ((((uint64_t)(val) >> 48) & 0xffff)) { + | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else { + | movz Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else { + if (val == 0) { + if (reg != IR_REG_ZR) { + | mov Rw(reg), wzr + } + } else if (((uint64_t)(val)) <= 0xffff) { + | movz Rw(reg), #((uint64_t)(val)) + } else if (~((uint64_t)(val)) <= 0xffff) { + | movn Rw(reg), #(~((uint64_t)(val))) + } else if ((uint64_t)(val) & 0xffff) { + | movz Rw(reg), #((uint64_t)(val) & 0xffff) + if (((uint64_t)(val) >> 16) & 0xffff) { + | movk Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + } + } else if (((uint64_t)(val) >> 16) & 0xffff) { + | movz Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + } + } +} + +static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | ldr Rx(reg), [Rx(base_reg), #offset] + break; + case 4: + | ldr Rw(reg), [Rx(base_reg), #offset] + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsh Rw(reg), [Rx(base_reg), #offset] + } else { + | ldrh Rw(reg), [Rx(base_reg), #offset] + } + break; + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsb Rw(reg), [Rx(base_reg), #offset] + } else { + | ldrb Rw(reg), [Rx(base_reg), #offset] + } + break; + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | ldr Rx(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 4: + | ldr Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsh Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } else { + | ldrh Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } + break; + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsb Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } else { + | ldrb Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } + break; + } + } +} + +static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *insn = &ctx->ir_base[src]; + int label; + + if (type == IR_FLOAT && insn->val.u32 == 0) { + | fmov Rs(reg-IR_REG_FP_FIRST), wzr + } else if (type == IR_DOUBLE && insn->val.u64 == 0) { + | fmov Rd(reg-IR_REG_FP_FIRST), xzr + } else { + label = ctx->cfg_blocks_count - src; + insn->const_flags |= IR_CONST_EMIT; + if (type == IR_DOUBLE) { + | ldr Rd(reg-IR_REG_FP_FIRST), =>label + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(reg-IR_REG_FP_FIRST), =>label + } + } +} + +static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + if (type == IR_DOUBLE) { + | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + if (type == IR_DOUBLE) { + | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } + } +} + +static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + int32_t offset; + ir_reg fp; + + if (IR_IS_CONST_REF(src)) { + if (IR_IS_TYPE_INT(type)) { + ir_insn *insn = &ctx->ir_base[src]; + + IR_ASSERT(insn->op != IR_STR && insn->op != IR_SYM && insn->op != IR_FUNC); + ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); + } else { + ir_emit_load_imm_fp(ctx, type, reg, src); + } + } else { + offset = ir_ref_spill_slot(ctx, src, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, reg, fp, offset); + } + } +} + +static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | str Rx(reg), [Rx(base_reg), #offset] + break; + case 4: + | str Rw(reg), [Rx(base_reg), #offset] + break; + case 2: + | strh Rw(reg), [Rx(base_reg), #offset] + break; + case 1: + | strb Rw(reg), [Rx(base_reg), #offset] + break; + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | str Rx(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 4: + | str Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 2: + | strh Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 1: + | strb Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + } + } +} + +static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + if (type == IR_DOUBLE) { + | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + if (type == IR_DOUBLE) { + | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } + } +} + +static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) +{ + int32_t offset; + ir_reg fp; + + IR_ASSERT(dst >= 0); + offset = ir_ref_spill_slot(ctx, dst, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, fp, offset, reg); + } else { + ir_emit_store_mem_fp(ctx, type, fp, offset, reg); + } +} + +static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ir_type_size[type] == 8) { + if (dst == IR_REG_STACK_POINTER) { + | mov sp, Rx(src) + } else if (src == IR_REG_STACK_POINTER) { + | mov Rx(dst), sp + } else { + | mov Rx(dst), Rx(src) + } + } else { + | mov Rw(dst), Rw(src) + } +} + +static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ir_type_size[type] == 8) { + | fmov Rd(dst-IR_REG_FP_FIRST), Rd(src-IR_REG_FP_FIRST) + } else { + | fmov Rs(dst-IR_REG_FP_FIRST), Rs(src-IR_REG_FP_FIRST) + } +} + +static void ir_emit_prologue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + | stp x29, x30, [sp, # (-(ctx->stack_frame_size+16))]! + | mov x29, sp + if (ctx->call_stack_size) { + | sub sp, sp, #(ctx->call_stack_size) + } + } else if (ctx->stack_frame_size + ctx->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | sub sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size) + } + } + if (ctx->used_preserved_regs) { + int offset; + uint32_t i; + ir_reg prev = IR_REG_NONE; + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = ctx->stack_frame_size + sizeof(void*) * 2; + } else { + offset = ctx->stack_frame_size + ctx->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(used_preserved_regs, i)) { + if (prev == IR_REG_NONE) { + prev = i; + } else if (i < IR_REG_FP_FIRST) { + offset -= sizeof(void*) * 2; + | stp Rx(prev), Rx(i), [Rx(fp), #offset] + prev = IR_REG_NONE; + } else { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | str Rx(prev), [Rx(fp), #offset] + offset -= sizeof(void*); + | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + offset -= sizeof(void*) * 2; + | stp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + prev = IR_REG_NONE; + } + } + } + if (prev != IR_REG_NONE) { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | str Rx(prev), [Rx(fp), #offset] + } else { + offset -= sizeof(void*); + | str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + } + } +} + +static void ir_emit_epilogue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ctx->used_preserved_regs) { + int offset; + uint32_t i; + ir_reg prev = IR_REG_NONE; + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = ctx->stack_frame_size + sizeof(void*) * 2; + } else { + offset = ctx->stack_frame_size + ctx->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(used_preserved_regs, i)) { + if (prev == IR_REG_NONE) { + prev = i; + } else if (i < IR_REG_FP_FIRST) { + offset -= sizeof(void*) * 2; + | ldp Rx(prev), Rx(i), [Rx(fp), #offset] + prev = IR_REG_NONE; + } else { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | ldr Rx(prev), [Rx(fp), #offset] + offset -= sizeof(void*); + | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + offset -= sizeof(void*) * 2; + | ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + prev = IR_REG_NONE; + } + } + } + if (prev != IR_REG_NONE) { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | ldr Rx(prev), [Rx(fp), #offset] + } else { + offset -= sizeof(void*); + | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + } + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + if (ctx->call_stack_size || (ctx->flags & IR_HAS_ALLOCA)) { + | mov sp, x29 + } + | ldp x29, x30, [sp], # (ctx->stack_frame_size+16) + } else if (ctx->stack_frame_size + ctx->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | add sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size) + } + } +} + +static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op2_reg + break; + case IR_ADD_OV: + | ASM_REG_REG_REG_OP adds, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB: + | ASM_REG_REG_REG_OP sub, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB_OV: + | ASM_REG_REG_REG_OP subs, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL: + | ASM_REG_REG_REG_OP mul, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL_OV: + if (ir_type_size[type] == 8) { + if (IR_IS_TYPE_SIGNED(type)) { + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | smulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) + | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) + | cmp Rx(tmp_reg), Rx(def_reg), asr #63 + } else { + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | umulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) + | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) + | cmp Rx(tmp_reg), xzr + } + } else { + if (IR_IS_TYPE_SIGNED(type)) { + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | smull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) + | asr Rx(tmp_reg), Rx(def_reg), #32 + | cmp Rx(tmp_reg), Rx(def_reg), asr #31 + } else { + | umull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) + | cmp xzr, Rx(def_reg), lsr #32 + } + } + break; + case IR_DIV: + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_REG_OP sdiv, type, def_reg, op1_reg, op2_reg + } else { + | ASM_REG_REG_REG_OP udiv, type, def_reg, op1_reg, op2_reg + } + break; + case IR_MOD: + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_REG_OP sdiv, type, tmp_reg, op1_reg, op2_reg + | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg + } else { + | ASM_REG_REG_REG_OP udiv, type, tmp_reg, op1_reg, op2_reg + | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg + } + break; + case IR_OR: + | ASM_REG_REG_REG_OP orr, type, def_reg, op1_reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_REG_OP and, type, def_reg, op1_reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_REG_OP eor, type, def_reg, op1_reg, op2_reg + break; + } + } else { + IR_ASSERT(IR_IS_CONST_REF(op2)); + int32_t val = ctx->ir_base[op2].val.i32; + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_IMM_OP add, type, def_reg, op1_reg, val + break; + case IR_ADD_OV: + | ASM_REG_REG_IMM_OP adds, type, def_reg, op1_reg, val + break; + case IR_SUB: + | ASM_REG_REG_IMM_OP sub, type, def_reg, op1_reg, val + break; + case IR_SUB_OV: + | ASM_REG_REG_IMM_OP subs, type, def_reg, op1_reg, val + break; + case IR_OR: + if (ir_type_size[type] == 8) { + uint64_t val = ctx->ir_base[op2].val.u64; + | ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val + } else { + | ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val + } + break; + case IR_AND: + if (ir_type_size[type] == 8) { + uint64_t val = ctx->ir_base[op2].val.u64; + | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val + } else { + | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val + } + break; + case IR_XOR: + if (ir_type_size[type] == 8) { + uint64_t val = ctx->ir_base[op2].val.u64; + | ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val + } else { + | ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val + } + break; + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + + if (op1 == op2) { + return; + } + + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), Rx(op2_reg) + if (insn->op == IR_MIN) { + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), le + } else { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ls + } + } else { + IR_ASSERT(insn->op == IR_MAX); + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ge + } else { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), hs + } + } + } else { + | cmp Rw(op1_reg), Rw(op2_reg) + if (insn->op == IR_MIN) { + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), le + } else { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ls + } + } else { + IR_ASSERT(insn->op == IR_MAX); + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ge + } else { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), hs + } + } + } + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_insn *math_insn = &ctx->ir_base[insn->op1]; + ir_type type = math_insn->type; + + IR_ASSERT(def_reg != IR_REG_NONE); + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (math_insn->op == IR_MUL_OV) { + | cset Rw(def_reg), ne + } else if (IR_IS_TYPE_SIGNED(type)) { + | cset Rw(def_reg), vs + } else { + | cset Rw(def_reg), cs + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; + ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; + ir_type type = math_insn->type; + uint32_t true_block, false_block, next_block; + bool reverse = 0; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + reverse = 1; + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (math_insn->op == IR_MUL_OV) { + if (reverse) { + | beq =>true_block + } else { + | bne =>true_block + } + } else if (IR_IS_TYPE_SIGNED(type)) { + if (reverse) { + | bvc =>true_block + } else { + | bvs =>true_block + } + } else { + if (reverse) { + | bcc =>true_block + } else { + | bcs =>true_block + } + } + if (false_block) { + | b =>false_block + } +} + +static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + ir_type type = op_insn->type; + ir_ref op2 = op_insn->op2; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + ir_reg reg; + + IR_ASSERT(insn->op == IR_RSTORE); + reg = insn->op3; + + if (op2_reg == IR_REG_NONE) { + ir_val *val = &ctx->ir_base[op2].val; + + IR_ASSERT(IR_IS_CONST_REF(op2)); + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_IMM_OP add, type, reg, reg, val->i32 + break; + case IR_SUB: + | ASM_REG_REG_IMM_OP sub, type, reg, reg, val->i32 + break; + case IR_OR: + | ASM_REG_REG_IMM_OP orr, type, reg, reg, val->i32 + break; + case IR_AND: + | ASM_REG_REG_IMM_OP and, type, reg, reg, val->i32 + break; + case IR_XOR: + | ASM_REG_REG_IMM_OP eor, type, reg, reg, val->i32 + break; + } + } else { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_REG_OP add, type, reg, reg, op2_reg + break; + case IR_SUB: + | ASM_REG_REG_REG_OP sub, type, reg, reg, op2_reg + break; + case IR_MUL: + | ASM_REG_REG_REG_OP mul, type, reg, reg, op2_reg + break; + case IR_OR: + | ASM_REG_REG_REG_OP orr, type, reg, reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_REG_OP and, type, reg, reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_REG_OP eor, type, reg, reg, op2_reg + break; + } + } +} + +static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (insn->op == IR_MUL) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + if (shift == 1) { + | ASM_REG_REG_REG_OP add, insn->type, def_reg, op1_reg, op1_reg + } else { + | ASM_REG_REG_IMM_OP lsl, insn->type, def_reg, op1_reg, shift + } + } else if (insn->op == IR_DIV) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + | ASM_REG_REG_IMM_OP lsr, insn->type, def_reg, op1_reg, shift + } else { + IR_ASSERT(insn->op == IR_MOD); + uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; + | ASM_REG_REG_IMM_OP and, insn->type, def_reg, op1_reg, mask + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + switch (insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_REG_REG_REG_OP lsl, type, def_reg, op1_reg, op2_reg + break; + case IR_SHR: + | ASM_REG_REG_REG_OP lsr, type, def_reg, op1_reg, op2_reg + break; + case IR_SAR: + | ASM_REG_REG_REG_OP asr, type, def_reg, op1_reg, op2_reg + break; + case IR_ROL: + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (ir_type_size[type] == 8) { + | neg Rx(tmp_reg), Rx(op2_reg) + | ror Rx(def_reg), Rx(op1_reg), Rx(tmp_reg) + } else { + | neg Rw(tmp_reg), Rw(op2_reg) + | ror Rw(def_reg), Rw(op1_reg), Rw(tmp_reg) + } + break; + case IR_ROR: + | ASM_REG_REG_REG_OP ror, type, def_reg, op1_reg, op2_reg + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + uint32_t shift = ctx->ir_base[insn->op2].val.u64; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + switch (insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift + break; + case IR_SHR: + | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift + break; + case IR_SAR: + | ASM_REG_REG_IMM_OP asr, type, def_reg, op1_reg, shift + break; + case IR_ROL: + if (ir_type_size[type] == 8) { + shift = (64 - shift) % 64; + | ror Rx(def_reg), Rx(op1_reg), #shift + } else { + shift = (32 - shift) % 32; + | ror Rw(def_reg), Rw(op1_reg), #shift + } + break; + case IR_ROR: + | ASM_REG_REG_IMM_OP ror, type, def_reg, op1_reg, shift + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (insn->op == IR_NOT) { + if (insn->type == IR_BOOL) { + | ASM_REG_IMM_OP cmp, type, op1, 0 + | cset Rw(def_reg), eq + } else { + | ASM_REG_REG_OP mvn, insn->type, def_reg, op1_reg + } + } else if (insn->op == IR_NEG) { + | ASM_REG_REG_OP neg, insn->type, def_reg, op1_reg + } else if (insn->op == IR_ABS) { + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), #0 + | cneg Rx(def_reg), Rx(op1_reg), lt + } else { + | cmp Rw(op1_reg), #0 + | cneg Rw(def_reg), Rw(op1_reg), lt + } + } else { + IR_ASSERT(insn->op == IR_BSWAP); + | ASM_REG_REG_OP rev, insn->type, def_reg, op1_reg + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (insn->op == IR_NEG) { + if (type == IR_DOUBLE) { + | fneg Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(type == IR_FLOAT); + | fneg Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(insn->op == IR_ABS); + if (type == IR_DOUBLE) { + | fabs Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(type == IR_FLOAT); + | fabs Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_binop_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_FP_REG_REG_REG_OP fadd, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB: + | ASM_FP_REG_REG_REG_OP fsub, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL: + | ASM_FP_REG_REG_REG_OP fmul, type, def_reg, op1_reg, op2_reg + break; + case IR_DIV: + | ASM_FP_REG_REG_REG_OP fdiv, type, def_reg, op1_reg, op2_reg + break; + case IR_MIN: + | ASM_FP_REG_REG_REG_OP fmin, type, def_reg, op1_reg, op2_reg + break; + case IR_MAX: + | ASM_FP_REG_REG_REG_OP fmax, type, def_reg, op1_reg, op2_reg + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), Rx(op2_reg) + } else { + | cmp Rw(op1_reg), Rw(op2_reg) + } + } else { + IR_ASSERT(IR_IS_CONST_REF(op2)); + int32_t val = ctx->ir_base[op2].val.i32; + + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), #val + } else { + | cmp Rw(op1_reg), #val + } + } +} + +static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[insn->op1].type; + ir_op op = insn->op; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2)) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(insn->op2) && ctx->ir_base[insn->op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 0); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_UGE) { + /* always true */ + ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 1); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + } + ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | cset Rw(def_reg), eq + break; + case IR_NE: + | cset Rw(def_reg), ne + break; + case IR_LT: + | cset Rw(def_reg), lt + break; + case IR_GE: + | cset Rw(def_reg), ge + break; + case IR_LE: + | cset Rw(def_reg), le + break; + case IR_GT: + | cset Rw(def_reg), gt + break; + case IR_ULT: + | cset Rw(def_reg), lo + break; + case IR_UGE: + | cset Rw(def_reg), hs + break; + case IR_ULE: + | cset Rw(def_reg), ls + break; + case IR_UGT: + | cset Rw(def_reg), hi + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_op op = cmp_insn->op; + ir_ref op1, op2; + ir_reg op1_reg, op2_reg; + + if (op == IR_LT || op == IR_LE) { + /* swap operands to avoid P flag check */ + op ^= 3; + op1 = cmp_insn->op2; + op2 = cmp_insn->op1; + op1_reg = ctx->regs[cmp_ref][2]; + op2_reg = ctx->regs[cmp_ref][1]; + } else { + op1 = cmp_insn->op1; + op2 = cmp_insn->op2; + op1_reg = ctx->regs[cmp_ref][1]; + op2_reg = ctx->regs[cmp_ref][2]; + } + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (type == IR_DOUBLE) { + | fcmp Rd(op1_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(type == IR_FLOAT); + | fcmp Rs(op1_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST) + } + return op; +} + +static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_op op = ir_emit_cmp_fp_common(ctx, def, insn); + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); +//??? ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into account vs flag + + IR_ASSERT(def_reg != IR_REG_NONE); + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | cset Rw(def_reg), eq + break; + case IR_NE: + | cset Rw(def_reg), ne + break; + case IR_LT: + | cset Rw(def_reg), mi + break; + case IR_GE: + | cset Rw(def_reg), ge + break; + case IR_LE: + | cset Rw(def_reg), ls + break; + case IR_GT: + | cset Rw(def_reg), gt + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block != next_block) { + | b =>true_block + } +} + +static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (false_block != next_block) { + | b =>false_block + } +} + +static void ir_emit_jz(ir_ctx *ctx, uint8_t op, uint32_t b, ir_type type, ir_reg reg) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + IR_ASSERT(op < IR_LT); + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (op == IR_EQ) { + if (ir_type_size[type] == 8) { + | cbz Rx(reg), =>true_block + } else { + | cbz Rw(reg), =>true_block + } + } else { + IR_ASSERT(op == IR_NE); + if (ir_type_size[type] == 8) { + | cbnz Rx(reg), =>true_block + } else { + | cbnz Rw(reg), =>true_block + } + } + if (false_block) { + | b =>false_block + } +} + +static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, uint32_t b, ir_ref def, ir_insn *insn, bool int_cmp) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + /* swap to avoid unconditional JMP */ + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | beq =>true_block + break; + case IR_NE: + | bne =>true_block + break; + case IR_LT: + | blt =>true_block + break; + case IR_GE: + | bge =>true_block + break; + case IR_LE: + | ble =>true_block + break; + case IR_GT: + | bgt =>true_block + break; + case IR_ULT: + | blo =>true_block + break; + case IR_UGE: + | bhs =>true_block + break; + case IR_ULE: + | bls =>true_block + break; + case IR_UGT: + | bhi =>true_block + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | beq =>true_block + break; + case IR_NE: + | bne =>true_block + break; + case IR_LT: + | bmi =>true_block + break; + case IR_GE: + | bge =>true_block + break; + case IR_LE: + | bls =>true_block + break; + case IR_GT: + | bgt =>true_block + break; +// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; +// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; +// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; +// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; + } + } + if (false_block) { + | b =>false_block + } +} + +static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op1_reg != IR_REG_NONE && IR_IS_CONST_REF(op1)) { + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2)) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + ir_emit_jmp_false(ctx, b, def); + return; + } else if (op == IR_UGE) { + /* always true */ + ir_emit_jmp_true(ctx, b, def); + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { + ir_emit_jz(ctx, op, b, type, op1_reg); + return; + } + } + ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); + ir_emit_jcc(ctx, op, b, def, insn, 1); +} + +static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + ir_emit_jcc(ctx, op, b, def, insn, 0); +} + +static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_type type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (IR_IS_CONST_REF(insn->op2)) { + uint32_t true_block, false_block, next_block; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (ir_const_is_true(&ctx->ir_base[insn->op2])) { + if (true_block != next_block) { + | b =>true_block + } + } else { + if (false_block != next_block) { + | b =>false_block + } + } + return; + } + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | ASM_REG_IMM_OP cmp, type, op2_reg, 0 + ir_emit_jcc(ctx, IR_NE, b, def, insn, 1); +} + +static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_ref op3 = insn->op3; + ir_type op1_type = ctx->ir_base[op1].type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg op3_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + if (op1 == op2) { + op1_reg = op2_reg; + } + if (op3 == op2) { + op3_reg = op2_reg; + } + } + if (op3 != op2 && (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(op3))) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, op3); + if (op1 == op2) { + op1_reg = op3_reg; + } + } + if (op1 != op2 && op1 != op3 && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, op1_type, op1_reg, op1); + } + + if (IR_IS_TYPE_INT(op1_type)) { + | ASM_REG_IMM_OP cmp, op1_type, op1_reg, 0 + } else{ + | ASM_FP_REG_IMM_OP fcmp, op1_type, op1_reg, 0.0 + } + + if (IR_IS_TYPE_INT(type)) { + if (ir_type_size[type] == 8) { + | csel Rx(def_reg), Rx(op2_reg), Rx(op3_reg), eq + } else { + | csel Rw(def_reg), Rw(op2_reg), Rw(op3_reg), eq + } + } else{ + if (type == IR_DOUBLE) { + | fcsel Rd(def_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST), Rd(op3_reg-IR_REG_FP_FIRST), eq + } else { + | fcsel Rs(def_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST), Rs(op3_reg-IR_REG_FP_FIRST), eq + } + } + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_return_void(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_emit_epilogue(ctx); + | ret +} + +static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + + if (op2_reg != IR_REG_INT_RET1) { + ir_type type = ctx->ir_base[insn->op2].type; + + if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { + ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); + } + } + ir_emit_return_void(ctx); +} + +static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + if (op2_reg != IR_REG_FP_RET1) { + if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { + ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); + } + } + ir_emit_return_void(ctx); +} + +static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if ((op1_reg != IR_REG_NONE) && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (op1_reg != IR_REG_NONE) { + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | sxtb Rw(def_reg), Rw(op1_reg) + } else if (ir_type_size[dst_type] == 4) { + | sxtb Rw(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | sxtb Rx(def_reg), Rx(op1_reg) + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | sxth Rw(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | sxth Rx(def_reg), Rx(op1_reg) + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | sxtw Rx(def_reg), Rw(op1_reg) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | ldrsb Rw(def_reg), [Rx(fp), #offset] + } else if (ir_type_size[dst_type] == 4) { + | ldrsb Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsb Rx(def_reg), [Rx(fp), #offset] + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | ldrsh Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsh Rx(def_reg), [Rx(fp), #offset] + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsw Rx(def_reg), [Rx(fp), #offset] + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if ((op1_reg != IR_REG_NONE) && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (op1_reg != IR_REG_NONE) { + if (ir_type_size[src_type] == 1) { + | uxtb Rw(def_reg), Rw(op1_reg) + } else if (ir_type_size[src_type] == 2) { + | uxth Rw(def_reg), Rw(op1_reg) + } else { + | mov Rw(def_reg), Rw(op1_reg) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (ir_type_size[src_type] == 1) { + | ldrb Rw(def_reg), [Rx(fp), #offset] + } else if (ir_type_size[src_type] == 2) { + | ldrh Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldr Rw(def_reg), [Rx(fp), #offset] + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != IR_REG_NONE) { + if (ir_type_size[dst_type] == 1) { + | and Rw(def_reg), Rw(op1_reg), #0xff + } else if (ir_type_size[dst_type] == 2) { + | and Rw(def_reg), Rw(op1_reg), #0xffff + } else if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type)) { + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == IR_DOUBLE) { + | fmov Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fmov Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); //??? + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (src_type == IR_DOUBLE) { + | ldr Rx(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(src_type == IR_FLOAT); + | ldr Rw(def_reg), [Rx(fp), #offset] + } + } + } else if (IR_IS_TYPE_FP(dst_type)) { + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (dst_type == IR_DOUBLE) { + | fmov Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | fmov Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); //??? + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (dst_type == IR_DOUBLE) { + | ldr Rd(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(src_type == IR_FLOAT); + | ldr Rs(def_reg), [Rx(fp), #offset] + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (ir_type_size[src_type] == 8) { + if (IR_IS_TYPE_SIGNED(src_type)) { + if (dst_type == IR_DOUBLE) { + | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } + } else { + if (dst_type == IR_DOUBLE) { + | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } + } + } else { + if (IR_IS_TYPE_SIGNED(src_type)) { + if (dst_type == IR_DOUBLE) { + | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } + } else { + if (dst_type == IR_DOUBLE) { + | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (ir_type_size[dst_type] == 8) { + if (IR_IS_TYPE_SIGNED(dst_type)) { + if (src_type == IR_DOUBLE) { + | fcvtzs Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzs Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else { + if (src_type == IR_DOUBLE) { + | fcvtzu Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzu Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } + } else { + if (IR_IS_TYPE_SIGNED(dst_type)) { + if (src_type == IR_DOUBLE) { + | fcvtzs Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzs Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else { + if (src_type == IR_DOUBLE) { + | fcvtzu Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzu Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == dst_type) { + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else if (src_type == IR_DOUBLE) { + | fcvt Rs(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvt Rd(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + int32_t offset; + ir_reg fp; + + IR_ASSERT(def_reg != IR_REG_NONE); + offset = ir_var_spill_slot(ctx, insn->op1, &fp); + | add Rx(def_reg), Rx(fp), #offset + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { + return; // fake load + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, fp, offset); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + return; // fake store + } + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, fp, offset, op3_reg); + } else { + ir_emit_store_mem_fp(ctx, type, fp, offset, op3_reg); + } +} + +static int32_t ir_fuse_addr(ir_ctx *ctx, ir_ref ref, ir_reg *preg1, ir_reg *preg2) +{ + ir_insn *addr_insn = &ctx->ir_base[ref]; + ir_reg reg; + + IR_ASSERT(addr_insn->op == IR_ADD); + IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2)); + reg = ctx->regs[ref][1]; + if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1); + } + *preg1 = reg; + *preg2 = IR_REG_NONE; // TODO: ??? + return ctx->ir_base[addr_insn->op2].val.i32; +} + +static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, op1_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, def, def)) { + ir_emit_load_mem_int(ctx, type, def_reg, op1_reg, offset); + } + /* avoid load to the same location (valid only when register is not reused) */ + return; + } + ir_emit_load_mem_int(ctx, type, def_reg, op1_reg, offset); + } else { + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | ldr Rx(def_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 4: + | ldr Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsh Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } else { + | ldrh Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } + break; + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsb Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } else { + | ldrb Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } + break; + } + } + } else { + if (op2_reg == IR_REG_NONE) { + op2_reg = def_reg; + } + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, 0); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, op1_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, def, def)) { + ir_emit_load_mem_fp(ctx, type, def_reg, op1_reg, offset); + } + /* avoid load to the same location (valid only when register is not reused) */ + return; + } + ir_emit_load_mem_fp(ctx, type, def_reg, op1_reg, offset); + } else { + if (type == IR_DOUBLE) { + | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(def_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } + } + } else { + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg == IR_REG_NONE) { + op2_reg = def_reg; + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, 0); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, op1_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + /* avoid store to the same location */ + return; + } + if (op3_reg == IR_REG_NONE) { + IR_ASSERT(IR_IS_CONST_REF(insn->op3) && ctx->ir_base[insn->op3].val.i64 == 0); + op3_reg = IR_REG_ZR; + } else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_int(ctx, type, op1_reg, offset, op3_reg); + } else { + if (op3_reg == IR_REG_NONE) { + IR_ASSERT(IR_IS_CONST_REF(insn->op3) && ctx->ir_base[insn->op3].val.i64 == 0); + op3_reg = IR_REG_ZR; + } else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | str Rx(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 4: + | str Rw(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 2: + | strh Rw(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 1: + | strb Rw(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + } + } + } else { + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + if (op3_reg == IR_REG_NONE) { + IR_ASSERT(IR_IS_CONST_REF(insn->op3) && ctx->ir_base[insn->op3].val.i64 == 0); + op3_reg = IR_REG_ZR; + } else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_int(ctx, type, op2_reg, 0, op3_reg); + } +} + +static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = ctx->ir_base[insn->op3].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + + IR_ASSERT(op3_reg != IR_REG_NONE); + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, op1_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + /* avoid store to the same location */ + return; + } + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_fp(ctx, type, op1_reg, offset, op3_reg); + } else { + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + if (type == IR_DOUBLE) { + | str Rd(op3_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | str Rs(op3_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } + } + } else { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_fp(ctx, type, op2_reg, 0, op3_reg); + } +} + +static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_reg src_reg = insn->op2; + ir_type type = insn->type; + + if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { + if (ctx->vregs[def] + && ctx->live_intervals[ctx->vregs[def]] + && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (def_reg == IR_REG_NONE) { + /* op3 is used as a flag that the value is already stored in memory. + * If op3 is set we don't have to store the value once again (in case of spilling) + */ + if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, ctx->spill_base, insn->op3)) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + if (src_reg != def_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, def_reg, src_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, def_reg, src_reg); + } + } + if (IR_REG_SPILLED(ctx->regs[def][0]) + && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, ctx->spill_base, insn->op3))) { + ir_emit_store(ctx, type, def, def_reg); + } + } + } +} + +static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_ref type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg dst_reg = insn->op3; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg != dst_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, dst_reg, op2_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); + } + } + } else { + ir_emit_load(ctx, type, dst_reg, insn->op2); + } +} + +static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | sub sp, sp, #size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + ctx->call_stack_size += size; + } + } else { + int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | add Rx(def_reg), Rx(op2_reg), #(alignment-1) + | and Rx(def_reg), Rx(def_reg), #(~(alignment-1)) + | sub sp, sp, Rx(def_reg); + } + if (def_reg != IR_REG_NONE) { + | mov Rx(def_reg), sp + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else { + ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); + } +} + +static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | add sp, sp, #size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + ctx->call_stack_size -= size; + } + } else { +// int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + // TODO: alignment + + | add sp, sp, Rx(op2_reg); + } +} + +static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type; + ir_block *bb; + ir_insn *use_insn, *val; + uint32_t n, *p, use_block; + int i; + int label, default_label = 0; + int count = 0; + ir_val min, max; + ir_reg op1_reg, op2_reg, tmp_reg; + + type = ctx->ir_base[insn->op2].type; + if (IR_IS_TYPE_SIGNED(type)) { + min.u64 = 0x7fffffffffffffff; + max.u64 = 0x8000000000000000; + } else { + min.u64 = 0xffffffffffffffff; + max.u64 = 0x0; + } + + bb = &ctx->cfg_blocks[b]; + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + if (IR_IS_TYPE_SIGNED(type)) { + IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); + min.i64 = IR_MIN(min.i64, val->val.i64); + max.i64 = IR_MAX(max.i64, val->val.i64); + } else { + IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); + min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); + max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); + } + count++; + } else { + IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); + default_label = ir_skip_empty_target_blocks(ctx, use_block); + } + } + + op1_reg = ctx->regs[def][1]; + op2_reg = ctx->regs[def][2]; + tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } else if (IR_IS_CONST_REF(insn->op2)) { + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + /* Generate a table jmp or a sequence of calls */ + if ((max.i64-min.i64) < count * 8) { + int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1)); + + for (i = 0; i <= (max.i64 - min.i64); i++) { + labels[i] = default_label; + } + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + labels[val->val.i64 - min.i64] = label; + } + } + + if (aarch64_may_encode_imm12(max.i64)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, max.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, max.i64); + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg + } + if (IR_IS_TYPE_SIGNED(type)) { + | bgt =>default_label + } else { + | bhi =>default_label + } + + if (op1_reg == IR_REG_NONE) { + op1_reg = op2_reg; + } + if (aarch64_may_encode_imm12(min.i64)) { + | ASM_REG_REG_IMM_OP subs, type, op1_reg, op2_reg, min.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, min.i64); + | ASM_REG_REG_REG_OP subs, type, op1_reg, op2_reg, tmp_reg + } + if (IR_IS_TYPE_SIGNED(type)) { + | blt =>default_label + } else { + | blo =>default_label + } + | adr Rx(tmp_reg), >1 + | ldr Rx(tmp_reg), [Rx(tmp_reg), Rx(op1_reg), lsl #3] + | br Rx(tmp_reg) + |.jmp_table + if (!data->jmp_table_label) { + data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; + |=>data->jmp_table_label: + } + |.align 8 + |1: + for (i = 0; i <= (max.i64 - min.i64); i++) { + int b = labels[i]; + ir_block *bb = &ctx->cfg_blocks[b]; + ir_insn *insn = &ctx->ir_base[bb->end]; + + if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { + ir_ref prev = ctx->prev_ref[bb->end]; + if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { + prev = ctx->prev_ref[prev]; + } + if (prev == bb->start) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + | .addr &addr + if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { + bb->flags |= IR_BB_EMPTY; + } + continue; + } + } + | .addr =>b + } + |.code + ir_mem_free(labels); + } else { + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + if (aarch64_may_encode_imm12(val->val.i64)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64); + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg + + } + | beq =>label + } + } + if (default_label) { + | b =>default_label + } + } +} + +static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg) +{ + int i; + int8_t *pred, *loc, *types; + ir_reg to, from_reg, c; + ir_type type; + ir_regset todo, ready; + ir_reg last_reg = IR_REG_NONE, last_fp_reg = IR_REG_NONE; + + loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t)); + pred = loc + IR_REG_NUM; + types = pred + IR_REG_NUM; + memset(loc, IR_REG_NONE, IR_REG_NUM * 2 * sizeof(int8_t)); + todo = IR_REGSET_EMPTY; + ready = IR_REGSET_EMPTY; + + for (i = 0; i < count; i++) { + from_reg = copies[i].from; + to = copies[i].to; + if (from_reg != to) { + loc[from_reg] = from_reg; + pred[to] = from_reg; + types[from_reg] = copies[i].type; + if (to == tmp_reg) { + IR_ASSERT(last_reg == IR_REG_NONE); + last_reg = to; + } else if (to == tmp_fp_reg) { + IR_ASSERT(last_fp_reg == IR_REG_NONE); + last_fp_reg = to; + } else { + IR_ASSERT(!IR_REGSET_IN(todo, to)); + IR_REGSET_INCL(todo, to); + } + } + } + + IR_REGSET_FOREACH(todo, i) { + if (loc[i] == IR_REG_NONE) { + IR_REGSET_INCL(ready, i); + } + } IR_REGSET_FOREACH_END(); + + while (1) { + while (ready != IR_REGSET_EMPTY) { + to = ir_regset_pop_first(&ready); + from_reg = pred[to]; + c = loc[from_reg]; + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, to, c); + } else { + ir_emit_fp_mov(ctx, type, to, c); + } + IR_REGSET_EXCL(todo, to); + loc[from_reg] = to; + if (from_reg == c && pred[from_reg] != IR_REG_NONE) { + IR_REGSET_INCL(ready, from_reg); + } + } + + if (todo == IR_REGSET_EMPTY) { + break; + } + to = ir_regset_pop_first(&todo); + from_reg = pred[to]; + IR_ASSERT(to != loc[from_reg]); + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST); + ir_emit_mov(ctx, type, tmp_reg, to); + loc[to] = tmp_reg; + } else { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST); + ir_emit_fp_mov(ctx, type, tmp_fp_reg, to); + loc[to] = tmp_fp_reg; + } + IR_REGSET_INCL(ready, to); + } + + if (last_reg != IR_REG_NONE) { + to = last_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(IR_IS_TYPE_INT(type)); + ir_emit_mov(ctx, type, to, c); + } + } + + if (last_fp_reg != IR_REG_NONE) { + to = last_fp_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(!IR_IS_TYPE_INT(type)); + ir_emit_fp_mov(ctx, type, to, c); + } + } + + ir_mem_free(loc); + + return 1; +} + +static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) +{ + int j, n; + ir_type type; + int int_param = 0; + int fp_param = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + int32_t used_stack = 0; + + n = insn->inputs_count; + for (j = 3; j <= n; j++) { + type = ctx->ir_base[ir_insn_op(insn, j)].type; + if (IR_IS_TYPE_INT(type)) { + if (int_param >= int_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + int_param++; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param >= fp_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + fp_param++; + } + } + + return used_stack; +} + +static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int j, n; + ir_ref arg; + ir_insn *arg_insn; + uint8_t type; + ir_reg src_reg, dst_reg; + int int_param = 0; + int fp_param = 0; + int count = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t used_stack, stack_offset = 0; + ir_copy *copies; + bool do_pass3 = 0; + /* For temporaries we may use any scratch registers except for registers used for parameters */ + ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ + + n = insn->inputs_count; + if (n < 3) { + return 0; + } + + if (tmp_reg == IR_REG_NONE) { + tmp_reg = IR_REG_IP0; + } + + if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) { + // TODO: support for preallocated stack + used_stack = 0; + } else { + used_stack = ir_call_used_stack(ctx, insn); + /* Stack must be 16 byte aligned */ + used_stack = IR_ALIGNED_SIZE(used_stack, 16); + if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) { + used_stack = 0; + } else { + ctx->call_stack_size += used_stack; + if (used_stack) { + | sub sp, sp, #used_stack + } + } + } + + /* 1. move all register arguments that should be passed through stack + * and collect arguments that should be passed through registers */ + copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + int_param++; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + fp_param++; + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + /* delay CONST->REG and MEM->REG moves to third pass */ + do_pass3 = 1; + } else { + IR_ASSERT(src_reg != IR_REG_NONE); + if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + } + if (src_reg != dst_reg) { + /* delay REG->REG moves to second pass */ + copies[count].type = type; + copies[count].from = src_reg; + copies[count].to = dst_reg; + count++; + } + } + } else { + /* Pass register arguments to stack (REG->MEM moves) */ + if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } else { + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + do_pass3 = 1; + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + + /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ + if (count) { + ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); + } + ir_mem_free(copies); + + /* 3. move the remaining memory and immediate values */ + if (do_pass3) { + stack_offset = 0; + int_param = 0; + fp_param = 0; + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + int_param++; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + fp_param++; + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + if (type == IR_ADDR) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + | adr Rx(dst_reg), =>label + continue; + } + IR_ASSERT(val_insn->op == IR_ADDR || val_insn->op == IR_FUNC_ADDR); + } else if (ir_type_size[type] == 1) { + type = IR_ADDR; + } + } + ir_emit_load(ctx, type, dst_reg, arg); + } else { + ir_emit_load(ctx, type, dst_reg, arg); + } + } + } else { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | adr Rx(tmp_reg), =>label + | str Rx(tmp_reg), [sp, #stack_offset] + } else if (val_insn->op == IR_FUNC || val_insn->op == IR_SYM) { + IR_ASSERT(0 && "sym"); + } else { + IR_ASSERT(tmp_reg != IR_REG_NONE); + ir_emit_load_imm_int(ctx, type, tmp_reg, val_insn->val.i64); + | str Rx(tmp_reg), [sp, #stack_offset] + } + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, tmp_reg); + } else if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + if (IR_IS_CONST_REF(arg)) { + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } else if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + } + return used_stack; +} + +static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + if (aarch64_may_use_b(ctx, addr)) { + | bl &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | blr Rx(IR_REG_INT_TMP) + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | blr Rx(op2_reg) + } + + if (used_stack) { + | add sp, sp, #used_stack + ctx->call_stack_size -= used_stack; + } + + if (insn->type != IR_VOID) { + if (IR_IS_TYPE_INT(insn->type)) { + def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(insn->type)); + def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_FP_RET1) { + ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); + } + } + } +} + +static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + + if (used_stack != 0) { + ir_emit_call(ctx, def, insn); + ir_emit_return_void(ctx); + return; + } + + ir_emit_epilogue(ctx); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | br Rx(op2_reg) + } +} + +static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | br Rx(op2_reg) + } else if (IR_IS_CONST_REF(insn->op2)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + } else { + IR_ASSERT(0); + } +} + +static void ir_emit_guard(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (IR_IS_CONST_REF(insn->op2)) { + bool is_true = ir_ref_is_true(ctx, insn->op2); + + if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { + if (IR_IS_CONST_REF(insn->op3)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + } else { + IR_ASSERT(0); + } + } + return; + } + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + if (IR_IS_CONST_REF(insn->op3)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (insn->op == IR_GUARD) { + if (ir_type_size[type] == 8) { + | cbz Rx(op2_reg), &addr + } else { + | cbz Rw(op2_reg), &addr + } + } else { + if (ir_type_size[type] == 8) { + | cbnz Rx(op2_reg), &addr + } else { + | cbnz Rw(op2_reg), &addr + } + } + } else { + IR_ASSERT(0); + } +} + +static void ir_emit_guard_jz(ir_ctx *ctx, uint8_t op, void *addr, ir_type type, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (op == IR_EQ) { + if (ir_type_size[type] == 8) { + | cbnz Rx(reg), &addr + } else { + | cbnz Rw(reg), &addr + } + } else { + IR_ASSERT(op == IR_NE); + if (ir_type_size[type] == 8) { + | cbz Rx(reg), &addr + } else { + | cbz Rw(reg), &addr + } + } +} + +static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | beq &addr + break; + case IR_NE: + | bne &addr + break; + case IR_LT: + | blt &addr + break; + case IR_GE: + | bge &addr + break; + case IR_LE: + | ble &addr + break; + case IR_GT: + | bgt &addr + break; + case IR_ULT: + | blo &addr + break; + case IR_UGE: + | bhs &addr + break; + case IR_ULE: + | bls &addr + break; + case IR_UGT: + | bhi &addr + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | beq &addr + break; + case IR_NE: + | bne &addr + break; + case IR_LT: + | bmi &addr + break; + case IR_GE: + | bge &addr + break; + case IR_LE: + | bls &addr + break; + case IR_GT: + | bgt &addr + break; +// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; +// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; +// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; +// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; + } + } +} + +static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + void *addr; + + if (op1_reg != IR_REG_NONE && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE && (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2))) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + + addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + return; + } else if (op == IR_UGE) { + /* always true */ + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { + if (insn->op == IR_GUARD_NOT) { + op ^= 1; // reverse + } + ir_emit_guard_jz(ctx, op, addr, type, op1_reg); + return; + } + } + ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + + ir_emit_guard_jcc(ctx, op, addr, 1); +} + +static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + ir_emit_guard_jcc(ctx, op, addr, 0); +} + +static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; + ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; + ir_type type = math_insn->type; + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (math_insn->op == IR_MUL_OV) { + if (insn->op == IR_GUARD) { + | beq &addr + } else { + | bne &addr + } + } else if (IR_IS_TYPE_SIGNED(type)) { + if (insn->op == IR_GUARD) { + | bvc &addr + } else { + | bvs &addr + } + } else { + if (insn->op == IR_GUARD) { + | bcc &addr + } else { + | bcs &addr + } + } +} + +static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + uint32_t code; + ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + +||#ifdef __APPLE__ +|| code = 0xd53bd060 | reg; // TODO: hard-coded: mrs reg, tpidrro_el0 +| .long code +| and Rx(reg), Rx(reg), #0xfffffffffffffff8 +|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op2, TMP1 +|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op3, TMP1 +||#else +|| code = 0xd53bd040 | reg; // TODO: hard-coded: mrs reg, tpidr_el0 +| .long code +||//??? IR_ASSERT(insn->op2 <= LDR_STR_PIMM64); +| ldr Rx(reg), [Rx(reg), #insn->op2] +||#endif + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, IR_ADDR, def, reg); + } +} + +static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + + | stp d30, d31, [sp, #-16]! + | stp d28, d29, [sp, #-16]! + | stp d26, d27, [sp, #-16]! + | stp d24, d25, [sp, #-16]! + | stp d22, d23, [sp, #-16]! + | stp d20, d21, [sp, #-16]! + | stp d18, d19, [sp, #-16]! + | stp d16, d17, [sp, #-16]! + | stp d14, d15, [sp, #-16]! + | stp d12, d13, [sp, #-16]! + | stp d10, d11, [sp, #-16]! + | stp d8, d9, [sp, #-16]! + | stp d6, d7, [sp, #-16]! + | stp d4, d5, [sp, #-16]! + | stp d2, d3, [sp, #-16]! + | stp d0, d1, [sp, #-16]! + + | str x30, [sp, #-16]! + | stp x28, x29, [sp, #-16]! + | stp x26, x27, [sp, #-16]! + | stp x24, x25, [sp, #-16]! + | stp x22, x23, [sp, #-16]! + | stp x20, x21, [sp, #-16]! + | stp x18, x19, [sp, #-16]! + | stp x16, x17, [sp, #-16]! + | stp x14, x15, [sp, #-16]! + | stp x12, x13, [sp, #-16]! + | stp x10, x11, [sp, #-16]! + | stp x8, x9, [sp, #-16]! + | stp x6, x7, [sp, #-16]! + | stp x4, x5, [sp, #-16]! + | stp x2, x3, [sp, #-16]! + | stp x0, x1, [sp, #-16]! + + | mov Rx(IR_REG_INT_ARG2), sp + | add Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_ARG2), #(32*8+32*8) + | str Rx(IR_REG_INT_ARG1), [sp, #(31*8)] + | mov Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_TMP) + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + + if (aarch64_may_use_b(ctx, addr)) { + | bl &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | blr Rx(IR_REG_INT_TMP) + } + } else { + IR_ASSERT(0); + } + + | add sp, sp, #(32*8+32*8) + + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) +{ + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); + + if (IR_IS_TYPE_INT(type)) { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_int(ctx, type, to_reg, fp, offset); + } + } else { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_fp(ctx, type, to_reg, fp, offset); + } + } +} + +static void ir_emit_load_params(ir_ctx *ctx) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + ir_reg dst_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_offset = 0; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ + } else { + stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */ + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; + } + if (ctx->vregs[use]) { + dst_reg = IR_REG_NUM(ctx->regs[use][0]); + IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || + stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + + ((ctx->flags & IR_USE_FRAME_POINTER) ? -ctx->stack_frame_size : ctx->call_stack_size)); + if (src_reg != dst_reg) { + ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); + } + if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { + ir_emit_store(ctx, insn->type, use, dst_reg); + } + } + if (src_reg == IR_REG_NONE) { + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } +} + +static ir_reg ir_get_free_reg(ir_type type, ir_regset available) +{ + if (IR_IS_TYPE_INT(type)) { + available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); + } + IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); + return IR_REGSET_FIRST(available); +} + +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +{ + ir_backend_data *data = ctx->data; + ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; + + if (to == 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_X0; + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_V0; + } + } + } else if (from != 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_X0; + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_V0; + } + } + } + return 1; +} + +static void ir_fix_param_spills(ir_ctx *ctx) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_offset = 0; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + /* skip old frame pointer and return address */ + stack_offset = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); + } else { + /* skip return address */ + stack_offset = sizeof(void*) + ctx->stack_frame_size; + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; + } + if (src_reg == IR_REG_NONE) { + if (ctx->vregs[use]) { + ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; + if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) + && ival->stack_spill_pos == -1 + && (ival->next || ival->reg == IR_REG_NONE)) { + ival->stack_spill_pos = stack_offset; + ctx->regs[use][0] = IR_REG_NONE; + } + } + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } +} + +static void ir_allocate_unique_spill_slots(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + ir_insn *insn; + ir_ref i, n, j, *p; + uint32_t *rule, insn_flags; + ir_backend_data *data = ctx->data; + ir_regset available = 0; + ir_target_constraints constraints; + uint32_t def_flags; + ir_reg reg; + + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + + if (!ctx->arena) { + ctx->arena = ir_arena_create(16 * 1024); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { + switch (ctx->rules ? *rule : insn->op) { + case IR_START: + case IR_BEGIN: + case IR_END: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_LOOP_END: + break; + default: + def_flags = ir_get_target_constraints(ctx, i, &constraints); + if (ctx->rules + && *rule != IR_CMP_AND_BRANCH_INT + && *rule != IR_CMP_AND_BRANCH_FP + && *rule != IR_GUARD_CMP_INT + && *rule != IR_GUARD_CMP_FP) { + available = IR_REGSET_SCRATCH; + } + if (ctx->vregs[i]) { + reg = constraints.def_reg; + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } else if (def_flags & IR_USE_MUST_BE_IN_REG) { + if (insn->op == IR_VLOAD + && ctx->live_intervals[ctx->vregs[i]] + && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { + /* pass */ + } else if (insn->op != IR_PARAM) { + reg = ir_get_free_reg(insn->type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } + } + if (!ctx->live_intervals[ctx->vregs[i]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[i]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[i]; + ival->stack_spill_pos = -1; + if (insn->op == IR_PARAM && reg == IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else { + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); + } + } else if (insn->op == IR_PARAM) { + IR_ASSERT(0 && "unexpected PARAM"); + return; + } + } else if (insn->op == IR_VAR) { + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref n = use_list->count; + + if (n > 0) { + int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); + ir_ref i, *p, use; + ir_insn *use_insn; + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_VLOAD) { + if (ctx->vregs[use] + && !ctx->live_intervals[ctx->vregs[use]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use]; + ival->stack_spill_pos = stack_spill_pos; + } + } else if (use_insn->op == IR_VSTORE) { + if (!IR_IS_CONST_REF(use_insn->op3) + && ctx->vregs[use_insn->op3] + && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use_insn->op3]; + ival->stack_spill_pos = stack_spill_pos; + } + } + } + } + } + + insn_flags = ir_op_flags[insn->op]; + n = constraints.tmps_count; + if (n) { + do { + n--; + if (constraints.tmp_regs[n].type) { + ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][constraints.tmp_regs[n].num] = reg; + } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { + available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); + } else { + IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); + } + } while (n); + } + n = insn->inputs_count; + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { + if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { + ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); + ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; + } else { + uint8_t use_flags = IR_USE_FLAGS(def_flags, j); + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { + ctx->regs[i][j] = ctx->regs[i][1]; + } else if (use_flags & IR_USE_MUST_BE_IN_REG) { + reg = ir_get_free_reg(ctx->ir_base[input].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } + } + } + } + break; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } + if (bb->flags & IR_BB_DESSA_MOVES) { + data->dessa_from_block = b; + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + } + } + + ctx->used_preserved_regs = ctx->fixed_save_regset; + ctx->flags |= IR_NO_STACK_COMBINE; + ir_fix_stack_frame(ctx); +} + +static void ir_preallocate_call_stack(ir_ctx *ctx) +{ + int call_stack_size, peak_call_stack_size = 0; + ir_ref i, n; + ir_insn *insn; + + for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { + if (insn->op == IR_CALL) { + call_stack_size = ir_call_used_stack(ctx, insn); + if (call_stack_size > peak_call_stack_size) { + peak_call_stack_size = call_stack_size; + } + } + n = ir_insn_len(insn); + i += n; + insn += n; + } + if (peak_call_stack_size) { + ctx->call_stack_size = peak_call_stack_size; + ctx->flags |= IR_PREALLOCATED_STACK; + } +} + +void ir_fix_stack_frame(ir_ctx *ctx) +{ + uint32_t additional_size = 0; + + if (ctx->used_preserved_regs) { + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + ir_reg reg; + (void) reg; + + IR_REGSET_FOREACH(used_preserved_regs, reg) { + additional_size += sizeof(void*); + } IR_REGSET_FOREACH_END(); + } + + ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); + ctx->stack_frame_size += additional_size; + ctx->stack_frame_alignment = 0; + ctx->call_stack_size = 0; + + if ((ctx->flags & IR_HAS_CALLS) && !(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_HAS_CALLS) { + ctx->flags |= IR_USE_FRAME_POINTER; + /* Stack must be 16 byte aligned */ + if (!(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_USE_FRAME_POINTER) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else { + if (!(ctx->flags & IR_NO_STACK_COMBINE)) { + ir_preallocate_call_stack(ctx); + } + while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size, 16) != + ctx->stack_frame_size + ctx->call_stack_size) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } + } + + ir_fix_param_spills(ctx); +} + +static void* dasm_labels[ir_lb_MAX]; + +/* Veneers support (TODO: avid global variable usage) */ +static ir_ctx *ir_current_ctx; + +void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) +{ + uint32_t b, n, target; + ir_block *bb; + ir_ref i; + ir_insn *insn; + uint32_t *rule; + ir_backend_data data; + dasm_State **Dst; + int ret; + void *entry; + size_t size; + + data.ra_data.unused_slot_4 = 0; + data.ra_data.unused_slot_2 = 0; + data.ra_data.unused_slot_1 = 0; + data.ra_data.handled = NULL; + data.rodata_label = 0; + data.jmp_table_label = 0; + ctx->data = &data; + + if (!ctx->live_intervals) { + ctx->stack_frame_size = 0; + ctx->stack_frame_alignment = 0; + ctx->call_stack_size = 0; + ctx->used_preserved_regs = 0; + ir_allocate_unique_spill_slots(ctx); + } + + if (ctx->fixed_stack_frame_size != -1) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); + } + if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { + // TODO: report error to caller +#ifdef IR_DEBUG_MESSAGES + fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", + __FILE__, __LINE__); +#endif + ctx->data = NULL; + ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; + return NULL; + } + ctx->stack_frame_size = ctx->fixed_stack_frame_size; + ctx->call_stack_size = ctx->fixed_call_stack_size; + ctx->stack_frame_alignment = 0; + } + + Dst = &data.dasm_state; + data.dasm_state = NULL; + dasm_init(&data.dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&data.dasm_state, dasm_actions); + /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ + dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); + + if (!(ctx->flags & IR_SKIP_PROLOGUE)) { + ir_emit_prologue(ctx); + } + if (ctx->flags & IR_FUNCTION) { + ir_emit_load_params(ctx); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { + continue; + } + |=>b: + + i = bb->start; + insn = ctx->ir_base + i; + if (bb->flags & IR_BB_ENTRY) { + uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; + + |=>label: + ir_emit_prologue(ctx); + ctx->entries[insn->op3] = i; + } + + /* skip first instruction */ + n = ir_insn_len(insn); + i += n; + insn += n; + rule = ctx->rules + i; + + while (i <= bb->end) { + if (!((*rule) & (IR_FUSED|IR_SKIPPED))) + switch (*rule) { + case IR_VAR: + case IR_PARAM: + case IR_PI: + case IR_PHI: + case IR_SNAPSHOT: + break; + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + ir_emit_mul_div_mod_pwr2(ctx, i, insn); + break; + case IR_SHIFT: + ir_emit_shift(ctx, i, insn); + break; + case IR_SHIFT_CONST: + ir_emit_shift_const(ctx, i, insn); + break; + case IR_OP_INT: + ir_emit_op_int(ctx, i, insn); + break; + case IR_OP_FP: + ir_emit_op_fp(ctx, i, insn); + break; + case IR_BINOP_INT: + ir_emit_binop_int(ctx, i, insn); + break; + case IR_BINOP_FP: + ir_emit_binop_fp(ctx, i, insn); + break; + case IR_CMP_INT: + ir_emit_cmp_int(ctx, i, insn); + break; + case IR_CMP_FP: + ir_emit_cmp_fp(ctx, i, insn); + break; + case IR_SEXT: + ir_emit_sext(ctx, i, insn); + break; + case IR_ZEXT: + ir_emit_zext(ctx, i, insn); + break; + case IR_TRUNC: + ir_emit_trunc(ctx, i, insn); + break; + case IR_BITCAST: + ir_emit_bitcast(ctx, i, insn); + break; + case IR_INT2FP: + ir_emit_int2fp(ctx, i, insn); + break; + case IR_FP2INT: + ir_emit_fp2int(ctx, i, insn); + break; + case IR_FP2FP: + ir_emit_fp2fp(ctx, i, insn); + break; + case IR_COPY_INT: + ir_emit_copy_int(ctx, i, insn); + break; + case IR_COPY_FP: + ir_emit_copy_fp(ctx, i, insn); + break; + case IR_CMP_AND_BRANCH_INT: + ir_emit_cmp_and_branch_int(ctx, b, i, insn); + break; + case IR_CMP_AND_BRANCH_FP: + ir_emit_cmp_and_branch_fp(ctx, b, i, insn); + break; + case IR_GUARD_CMP_INT: + ir_emit_guard_cmp_int(ctx, b, i, insn); + break; + case IR_GUARD_CMP_FP: + ir_emit_guard_cmp_fp(ctx, b, i, insn); + break; + case IR_IF_INT: + ir_emit_if_int(ctx, b, i, insn); + break; + case IR_COND: + ir_emit_cond(ctx, i, insn); + break; + case IR_SWITCH: + ir_emit_switch(ctx, b, i, insn); + break; + case IR_MIN_MAX_INT: + ir_emit_min_max_int(ctx, i, insn); + break; + case IR_OVERFLOW: + ir_emit_overflow(ctx, i, insn); + break; + case IR_OVERFLOW_AND_BRANCH: + ir_emit_overflow_and_branch(ctx, b, i, insn); + break; + case IR_END: + case IR_LOOP_END: + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_emit_osr_entry_loads(ctx, b, bb); + } + if (bb->flags & IR_BB_DESSA_MOVES) { + ir_emit_dessa_moves(ctx, b, bb); + } + do { + ir_ref succ = ctx->cfg_edges[bb->successors]; + + if (UNEXPECTED(bb->successors_count == 2)) { + if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { + succ = ctx->cfg_edges[bb->successors + 1]; + } else { + IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); + } + } else { + IR_ASSERT(bb->successors_count == 1); + } + target = ir_skip_empty_target_blocks(ctx, succ); + if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { + | b =>target + } + } while (0); + break; + case IR_RETURN_VOID: + ir_emit_return_void(ctx); + break; + case IR_RETURN_INT: + ir_emit_return_int(ctx, i, insn); + break; + case IR_RETURN_FP: + ir_emit_return_fp(ctx, i, insn); + break; + case IR_CALL: + ir_emit_call(ctx, i, insn); + break; + case IR_TAILCALL: + ir_emit_tailcall(ctx, i, insn); + break; + case IR_IJMP: + ir_emit_ijmp(ctx, i, insn); + break; + case IR_REG_BINOP_INT: + ir_emit_reg_binop_int(ctx, i, insn); + break; + case IR_VADDR: + ir_emit_vaddr(ctx, i, insn); + break; + case IR_VLOAD: + ir_emit_vload(ctx, i, insn); + break; + case IR_VSTORE: + ir_emit_vstore(ctx, i, insn); + break; + case IR_RLOAD: + ir_emit_rload(ctx, i, insn); + break; + case IR_RSTORE: + ir_emit_rstore(ctx, i, insn); + break; + case IR_LOAD_INT: + ir_emit_load_int(ctx, i, insn); + break; + case IR_LOAD_FP: + ir_emit_load_fp(ctx, i, insn); + break; + case IR_STORE_INT: + ir_emit_store_int(ctx, i, insn); + break; + case IR_STORE_FP: + ir_emit_store_fp(ctx, i, insn); + break; + case IR_ALLOCA: + ir_emit_alloca(ctx, i, insn); + break; + case IR_AFREE: + ir_emit_afree(ctx, i, insn); + break; + case IR_EXITCALL: + ir_emit_exitcall(ctx, i, insn); + break; + case IR_GUARD: + case IR_GUARD_NOT: + ir_emit_guard(ctx, i, insn); + break; + case IR_GUARD_OVERFLOW: + ir_emit_guard_overflow(ctx, i, insn); + break; + case IR_TLS: + ir_emit_tls(ctx, i, insn); + break; + default: + IR_ASSERT(0 && "NIY rule/instruction"); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; + return NULL; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } + } + + if (ctx->deoptimization_exits) { + for (i = 0; i < ctx->deoptimization_exits; i++) { + const void *exit_addr = ctx->get_exit_addr(i); + + if (!exit_addr) { + ctx->data = NULL; + return 0; + } + | b &exit_addr + } + } + + if (data.rodata_label) { + |.rodata + } + for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { + if (insn->const_flags & IR_CONST_EMIT) { + if (IR_IS_TYPE_FP(insn->type)) { + int label = ctx->cfg_blocks_count + i; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + if (insn->type == IR_DOUBLE) { + |.align 8 + |=>label: + |.long insn->val.u32, insn->val.u32_hi + } else { + IR_ASSERT(insn->type == IR_FLOAT); + |.align 4 + |=>label: + |.long insn->val.u32 + } + } else if (insn->op == IR_STR) { + int label = ctx->cfg_blocks_count + i; + const char *str = ir_get_str(ctx, insn->val.i32); + int i = 0; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + |.align 8 + |=>label: + while (1) { + char c; + uint32_t w = 0; + int j; + + for (j = 0; j < 4; j++) { + c = str[i]; + if (!c) { + break; + } else if (c == '\\') { + if (str[i+1] == '\\') { + i++; + c = '\\'; + } else if (str[i+1] == '\'') { + i++; + c = '\''; + } else if (str[i+1] == '"') { + i++; + c = '"'; + } else if (str[i+1] == 'a') { + i++; + c = '\a'; + } else if (str[i+1] == 'b') { + i++; + c = '\b'; + } else if (str[i+1] == 'e') { + i++; + c = 27; /* '\e'; */ + } else if (str[i+1] == 'f') { + i++; + c = '\f'; + } else if (str[i+1] == 'n') { + i++; + c = '\n'; + } else if (str[i+1] == 'r') { + i++; + c = '\r'; + } else if (str[i+1] == 't') { + i++; + c = '\t'; + } else if (str[i+1] == 'v') { + i++; + c = '\v'; + } else if (str[i+1] == '?') { + i++; + c = 0x3f; + } + } + w |= c << (8 * j); + i++; + } + | .long w + if (!c) { + break; + } + } + + } else { + IR_ASSERT(0); + } + } + } + if (data.rodata_label) { + |.code + } + + if (ctx->status) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + return NULL; + } + + ret = dasm_link(&data.dasm_state, size_ptr); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + if (ctx->code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + ctx->data = NULL; + ctx->status = IR_ERROR_LINK; + return NULL; + } + size = *size_ptr; + + if (ctx->code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > ctx->code_buffer_size) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; + return NULL; + } + entry = ctx->code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + if (!entry) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; + return NULL; + } + ir_mem_unprotect(entry, size); + } + + ir_current_ctx = ctx; + ctx->veneers_size = 0; + if (data.jmp_table_label) { + ctx->code_size = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); + } else if (data.rodata_label) { + ctx->code_size = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); + } else { + ctx->code_size = size; + } + + ret = dasm_encode(&data.dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + if (ctx->code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + ctx->data = NULL; + ctx->status = IR_ERROR_ENCODE; + return NULL; + } + + if (data.jmp_table_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); + ctx->jmp_table_offset = offset; + } else { + ctx->jmp_table_offset = 0; + } + if (data.rodata_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); + ctx->rodata_offset = offset; + } else { + ctx->rodata_offset = 0; + } + + if (ctx->entries_count) { + /* For all entries */ + i = ctx->entries_count; + do { + ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; + uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); + insn->op3 = offset; + } while (i != 0); + } + + dasm_free(&data.dasm_state); + + *size_ptr += ctx->veneers_size; + + ir_mem_flush(entry, size); + + if (ctx->code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + ctx->data = NULL; + return entry; +} + +const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, void *code_buffer, size_t code_buffer_size, size_t *size_ptr) +{ + void *entry; + size_t size; + uint32_t i; + dasm_State **Dst, *dasm_state; + int ret; + + /* IR_ASSERT(aarch64_may_use_b(ctx, exit_addr)) */ + IR_ASSERT(code_buffer); + if ((char*)exit_addr >= (char*)code_buffer && (char*)exit_addr < (char*)code_buffer + code_buffer_size) { + IR_ASSERT(code_buffer_size < B_IMM); + } else if ((char*)exit_addr >= (char*)code_buffer + code_buffer_size) { + IR_ASSERT(((char*)exit_addr - (char*)code_buffer) < B_IMM); + } else if ((char*)exit_addr < (char*)code_buffer) { + IR_ASSERT(((((char*)(code_buffer)) + code_buffer_size) - (char*)exit_addr) < B_IMM); + } else { + IR_ASSERT(0); + } + + Dst = &dasm_state; + dasm_state = NULL; + dasm_init(&dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&dasm_state, dasm_actions); + + | bl >2 + |1: + for (i = 1; i < exit_points_per_group; i++) { + | bl >2 + } + |2: + | adr Rx(IR_REG_INT_TMP), <1 + | sub Rx(IR_REG_INT_TMP), lr, Rx(IR_REG_INT_TMP) + | lsr Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #2 + if (first_exit_point) { + | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #first_exit_point + } + | b &exit_addr + + ret = dasm_link(&dasm_state, &size); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + return NULL; + } + + if (code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > code_buffer_size) { + dasm_free(&dasm_state); + return NULL; + } + entry = code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + ir_mem_unprotect(entry, size); + } + + ir_current_ctx = NULL; + ret = dasm_encode(&dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + if (code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + return NULL; + } + + dasm_free(&dasm_state); + + ir_mem_flush(entry, size); + + if (code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + *size_ptr = size; + return entry; +} + +static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset) +{ + ir_ctx *ctx = ir_current_ctx; + const void *addr, *veneer = NULL; + ptrdiff_t na; + int n, m; + + IR_ASSERT(ctx && ctx->code_buffer); + + if ((ins >> 16) == DASM_REL_A) { + addr = (void*)((((ptrdiff_t)(*(b-1))) << 32) | (unsigned int)(*(b-2))); + if (ctx->get_veneer) { + veneer = ctx->get_veneer(ctx, addr); + } + } else { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + + if (veneer) { + na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4; + n = (int)na; + + /* check if we can jump to veneer */ + if ((ptrdiff_t)n != na) { + /* pass */ + } else if (!(ins & 0xf800)) { /* B, BL */ + if ((n & 3) == 0 && ((n+0x08000000) >> 28) == 0) { + return n; + } + } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ + if ((n & 3) == 0 && ((n+0x00100000) >> 21) == 0) { + return n; + } + } else if ((ins & 0x3000) == 0x2000) { /* ADR */ + /* pass */ + } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ + /* pass */ + } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ + if ((n & 3) == 0 && ((n+0x00008000) >> 16) == 0) { + return n; + } + } + } + + veneer = (char*)buffer + (Dst->codesize + ctx->veneers_size); + if (veneer > (void*)((char*)ctx->code_buffer + ctx->code_buffer_size)) { + IR_ASSERT(0 && "too long jmp distance" && "jit buffer overflow"); + return 0; /* jit_buffer_size overflow */ + } + + na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4; + n = (int)na; + + /* check if we can jump to veneer */ + if ((ptrdiff_t)n != na) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else if (!(ins & 0xf800)) { /* B, BL */ + if ((n & 3) != 0 || ((n+0x08000000) >> 28) != 0) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ + if ((n & 3) != 0 || ((n+0x00100000) >> 21) != 0) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + } else if ((ins & 0x3000) == 0x2000) { /* ADR */ + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ + if ((n & 3) != 0 || ((n+0x00008000) >> 16) != 0) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + } else if ((ins & 0x8000)) { /* absolute */ + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + + /* check if we can use B to jump from veneer */ + na = (ptrdiff_t)cp + offset - (ptrdiff_t)veneer - 4; + m = (int)na; + if ((ptrdiff_t)m != na) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else if ((m & 3) != 0 || ((m+0x08000000) >> 28) != 0) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + + if (!ctx->set_veneer || !ctx->set_veneer(ctx, addr, veneer)) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + + /* generate B instruction */ + *(uint32_t*)veneer = 0x14000000 | ((m >> 2) & 0x03ffffff); + ctx->veneers_size += 4; + + return n; +} diff --git a/ext/opcache/jit/ir/ir_aarch64.h b/ext/opcache/jit/ir/ir_aarch64.h new file mode 100644 index 00000000000..4c36f7e56f6 --- /dev/null +++ b/ext/opcache/jit/ir/ir_aarch64.h @@ -0,0 +1,173 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Aarch64 CPU specific definitions) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_AARCH64_H +#define IR_AARCH64_H + +#define IR_GP_REGS(_) \ + _(X0, x0, w0) \ + _(X1, x1, w1) \ + _(X2, x2, w2) \ + _(X3, x3, w3) \ + _(X4, x4, w4) \ + _(X5, x5, w5) \ + _(X6, x6, w6) \ + _(X7, x7, w7) \ + _(X8, x8, w8) \ + _(X9, x9, w9) \ + _(X10, x10, w10) \ + _(X11, x11, w11) \ + _(X12, x12, w12) \ + _(X13, x13, w13) \ + _(X14, x14, w14) \ + _(X15, x15, w15) \ + _(X16, x16, w16) \ + _(X17, x17, w17) \ + _(X18, x18, w18) \ + _(X19, x19, w18) \ + _(X20, x20, w20) \ + _(X21, x21, w21) \ + _(X22, x22, w22) \ + _(X23, x23, w23) \ + _(X24, x24, w24) \ + _(X25, x25, w25) \ + _(X26, x26, w26) \ + _(X27, x27, w27) \ + _(X28, x28, w28) \ + _(X29, x29, w29) \ + _(X30, x30, w30) \ + _(X31, x31, w31) \ + +# define IR_FP_REGS(_) \ + _(V0, d0, s0, h0, b0) \ + _(V1, d1, s1, h1, b1) \ + _(V2, d2, s2, h2, b2) \ + _(V3, d3, s3, h3, b3) \ + _(V4, d4, s4, h4, b4) \ + _(V5, d5, s5, h5, b5) \ + _(V6, d6, s6, h6, b6) \ + _(V7, d7, s7, h7, b7) \ + _(V8, d8, s8, h8, b8) \ + _(V9, d9, s9, h9, b9) \ + _(V10, d10, s10, h10, b10) \ + _(V11, d11, s11, h11, b11) \ + _(V12, d12, s12, h12, b12) \ + _(V13, d13, s13, h13, b13) \ + _(V14, d14, s14, h14, b14) \ + _(V15, d15, s15, h15, b15) \ + _(V16, d16, s16, h16, b16) \ + _(V17, d17, s17, h17, b17) \ + _(V18, d18, s18, h18, b18) \ + _(V19, d19, s19, h19, b18) \ + _(V20, d20, s20, h20, b20) \ + _(V21, d21, s21, h21, b21) \ + _(V22, d22, s22, h22, b22) \ + _(V23, d23, s23, h23, b23) \ + _(V24, d24, s24, h24, b24) \ + _(V25, d25, s25, h25, b25) \ + _(V26, d26, s26, h26, b26) \ + _(V27, d27, s27, h27, b27) \ + _(V28, d28, s28, h28, b28) \ + _(V29, d29, s29, h29, b29) \ + _(V30, d30, s30, h30, b30) \ + _(V31, d31, s31, h31, b31) \ + +#define IR_GP_REG_ENUM(code, name64, name32) \ + IR_REG_ ## code, + +#define IR_FP_REG_ENUM(code, name64, name32, name16, name8) \ + IR_REG_ ## code, + +enum _ir_reg { + _IR_REG_NONE = -1, + IR_GP_REGS(IR_GP_REG_ENUM) + IR_FP_REGS(IR_FP_REG_ENUM) + IR_REG_NUM, +}; + +#define IR_REG_GP_FIRST IR_REG_X0 +#define IR_REG_FP_FIRST IR_REG_V0 +#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) +#define IR_REG_FP_LAST (IR_REG_NUM - 1) +#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */ +#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */ + +#define IR_REGSET_64BIT 1 + +#define IR_REG_INT_TMP IR_REG_X17 /* reserved temporary register used by code-generator */ + +#define IR_REG_STACK_POINTER \ + IR_REG_X31 +#define IR_REG_FRAME_POINTER \ + IR_REG_X29 +#define IR_REGSET_FIXED \ + ( IR_REGSET(IR_REG_INT_TMP) \ + | IR_REGSET(IR_REG_X18) /* platform specific register */ \ + | IR_REGSET_INTERVAL(IR_REG_X29, IR_REG_X31)) +#define IR_REGSET_GP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_GP_FIRST, IR_REG_GP_LAST), IR_REGSET_FIXED) +#define IR_REGSET_FP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_FP_FIRST, IR_REG_FP_LAST), IR_REGSET_FIXED) + +#define IR_REG_IP0 IR_REG_X16 +#define IR_REG_IP1 IR_REG_X17 +#define IR_REG_PR IR_REG_X18 +#define IR_REG_LR IR_REG_X30 +#define IR_REG_ZR IR_REG_X31 + +/* Calling Convention */ +#define IR_REG_INT_RET1 IR_REG_X0 +#define IR_REG_FP_RET1 IR_REG_V0 +#define IR_REG_INT_ARGS 8 +#define IR_REG_FP_ARGS 8 +#define IR_REG_INT_ARG1 IR_REG_X0 +#define IR_REG_INT_ARG2 IR_REG_X1 +#define IR_REG_INT_ARG3 IR_REG_X2 +#define IR_REG_INT_ARG4 IR_REG_X3 +#define IR_REG_INT_ARG5 IR_REG_X4 +#define IR_REG_INT_ARG6 IR_REG_X5 +#define IR_REG_INT_ARG7 IR_REG_X6 +#define IR_REG_INT_ARG8 IR_REG_X7 +#define IR_REG_FP_ARG1 IR_REG_V0 +#define IR_REG_FP_ARG2 IR_REG_V1 +#define IR_REG_FP_ARG3 IR_REG_V2 +#define IR_REG_FP_ARG4 IR_REG_V3 +#define IR_REG_FP_ARG5 IR_REG_V4 +#define IR_REG_FP_ARG6 IR_REG_V5 +#define IR_REG_FP_ARG7 IR_REG_V6 +#define IR_REG_FP_ARG8 IR_REG_V7 +#define IR_MAX_REG_ARGS 16 +#define IR_SHADOW_ARGS 0 + +# define IR_REGSET_SCRATCH \ + (IR_REGSET_INTERVAL(IR_REG_X0, IR_REG_X18) \ + | IR_REGSET_INTERVAL(IR_REG_V0, IR_REG_V7) \ + | IR_REGSET_INTERVAL(IR_REG_V16, IR_REG_V31)) + +# define IR_REGSET_PRESERVED \ + (IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) \ + | IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15)) + +typedef struct _ir_tmp_reg { + union { + uint8_t num; + int8_t reg; + }; + uint8_t type; + uint8_t start; + uint8_t end; +} ir_tmp_reg; + +struct _ir_target_constraints { + int8_t def_reg; + uint8_t tmps_count; + uint8_t hints_count; + ir_tmp_reg tmp_regs[3]; + int8_t hints[IR_MAX_REG_ARGS + 3]; +}; + +#endif /* IR_AARCH64_H */ diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h new file mode 100644 index 00000000000..c7d5abf5e46 --- /dev/null +++ b/ext/opcache/jit/ir/ir_builder.h @@ -0,0 +1,639 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR Construction API) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_BUILDER_H +#define IR_BUILDER_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* _ir_CTX may be redefined by the user */ +#define _ir_CTX ctx + +#define ir_NOP() ir_emit0(_ir_CTX, IR_NOP) + +#define ir_CONST_BOOL(_val) ir_const_bool(_ir_CTX, (_val)) +#define ir_CONST_U8(_val) ir_const_u8(_ir_CTX, (_val)) +#define ir_CONST_U16(_val) ir_const_u16(_ir_CTX, (_val)) +#define ir_CONST_U32(_val) ir_const_u32(_ir_CTX, (_val)) +#define ir_CONST_U64(_val) ir_const_u64(_ir_CTX, (_val)) +#define ir_CONST_ADDR(_val) ir_const_addr(_ir_CTX, (uintptr_t)(_val)) +#define ir_CONST_CHAR(_val) ir_const_char(_ir_CTX, (_val)) +#define ir_CONST_I8(_val) ir_const_i8(_ir_CTX, (_val)) +#define ir_CONST_I16(_val) ir_const_i16(_ir_CTX, (_val)) +#define ir_CONST_I32(_val) ir_const_i32(_ir_CTX, (_val)) +#define ir_CONST_I64(_val) ir_const_i64(_ir_CTX, (_val)) +#define ir_CONST_DOUBLE(_val) ir_const_double(_ir_CTX, (_val)) +#define ir_CONST_FLOAT(_val) ir_const_float(_ir_CTX, (_val)) + +#define ir_CMP_OP(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_BOOL), (_op1), (_op2)) + +#define ir_UNARY_OP(_op, _type, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), (_type)), (_op1)) +#define ir_UNARY_OP_B(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_BOOL), (_op1)) +#define ir_UNARY_OP_U8(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U8), (_op1)) +#define ir_UNARY_OP_U16(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U16), (_op1)) +#define ir_UNARY_OP_U32(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U32), (_op1)) +#define ir_UNARY_OP_U64(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U64), (_op1)) +#define ir_UNARY_OP_A(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_ADDR), (_op1)) +#define ir_UNARY_OP_C(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_CHAR), (_op1)) +#define ir_UNARY_OP_I8(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I8), (_op1)) +#define ir_UNARY_OP_I16(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I16), (_op1)) +#define ir_UNARY_OP_I32(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I32), (_op1)) +#define ir_UNARY_OP_I64(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I64), (_op1)) +#define ir_UNARY_OP_D(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_DOUBLE), (_op1)) +#define ir_UNARY_OP_F(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_FLOAT), (_op1)) + +#define ir_BINARY_OP(_op, _t, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), (_t)), (_op1), (_op2)) +#define ir_BINARY_OP_B(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_BOOL), (_op1), (_op2)) +#define ir_BINARY_OP_U8(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U8), (_op1), (_op2)) +#define ir_BINARY_OP_U16(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U16), (_op1), (_op2)) +#define ir_BINARY_OP_U32(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U32), (_op1), (_op2)) +#define ir_BINARY_OP_U64(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U64), (_op1), (_op2)) +#define ir_BINARY_OP_A(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_ADDR), (_op1), (_op2)) +#define ir_BINARY_OP_C(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_CHAR), (_op1), (_op2)) +#define ir_BINARY_OP_I8(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I8), (_op1), (_op2)) +#define ir_BINARY_OP_I16(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I16), (_op1), (_op2)) +#define ir_BINARY_OP_I32(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I32), (_op1), (_op2)) +#define ir_BINARY_OP_I64(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I64), (_op1), (_op2)) +#define ir_BINARY_OP_D(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_DOUBLE), (_op1), (_op2)) +#define ir_BINARY_OP_F(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_FLOAT), (_op1), (_op2)) + +#define ir_EQ(_op1, _op2) ir_CMP_OP(IR_EQ, (_op1), (_op2)) +#define ir_NE(_op1, _op2) ir_CMP_OP(IR_NE, (_op1), (_op2)) + +#define ir_LT(_op1, _op2) ir_CMP_OP(IR_LT, (_op1), (_op2)) +#define ir_GE(_op1, _op2) ir_CMP_OP(IR_GE, (_op1), (_op2)) +#define ir_LE(_op1, _op2) ir_CMP_OP(IR_LE, (_op1), (_op2)) +#define ir_GT(_op1, _op2) ir_CMP_OP(IR_GT, (_op1), (_op2)) + +#define ir_ULT(_op1, _op2) ir_CMP_OP(IR_ULT, (_op1), (_op2)) +#define ir_UGE(_op1, _op2) ir_CMP_OP(IR_UGE, (_op1), (_op2)) +#define ir_ULE(_op1, _op2) ir_CMP_OP(IR_ULE, (_op1), (_op2)) +#define ir_UGT(_op1, _op2) ir_CMP_OP(IR_UGT, (_op1), (_op2)) + +#define ir_ADD(_type, _op1, _op2) ir_BINARY_OP(IR_ADD, (_type), (_op1), (_op2)) +#define ir_ADD_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ADD, (_op1), (_op2)) +#define ir_ADD_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ADD, (_op1), (_op2)) +#define ir_ADD_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ADD, (_op1), (_op2)) +#define ir_ADD_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ADD, (_op1), (_op2)) +#define ir_ADD_A(_op1, _op2) ir_BINARY_OP_A(IR_ADD, (_op1), (_op2)) +#define ir_ADD_C(_op1, _op2) ir_BINARY_OP_C(IR_ADD, (_op1), (_op2)) +#define ir_ADD_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ADD, (_op1), (_op2)) +#define ir_ADD_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ADD, (_op1), (_op2)) +#define ir_ADD_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ADD, (_op1), (_op2)) +#define ir_ADD_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ADD, (_op1), (_op2)) +#define ir_ADD_D(_op1, _op2) ir_BINARY_OP_D(IR_ADD, (_op1), (_op2)) +#define ir_ADD_F(_op1, _op2) ir_BINARY_OP_F(IR_ADD, (_op1), (_op2)) + +#define ir_SUB(_type, _op1, _op2) ir_BINARY_OP(IR_SUB, (_type), (_op1), (_op2)) +#define ir_SUB_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SUB, (_op1), (_op2)) +#define ir_SUB_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SUB, (_op1), (_op2)) +#define ir_SUB_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SUB, (_op1), (_op2)) +#define ir_SUB_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SUB, (_op1), (_op2)) +#define ir_SUB_A(_op1, _op2) ir_BINARY_OP_A(IR_SUB, (_op1), (_op2)) +#define ir_SUB_C(_op1, _op2) ir_BINARY_OP_C(IR_SUB, (_op1), (_op2)) +#define ir_SUB_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SUB, (_op1), (_op2)) +#define ir_SUB_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SUB, (_op1), (_op2)) +#define ir_SUB_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SUB, (_op1), (_op2)) +#define ir_SUB_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SUB, (_op1), (_op2)) +#define ir_SUB_D(_op1, _op2) ir_BINARY_OP_D(IR_SUB, (_op1), (_op2)) +#define ir_SUB_F(_op1, _op2) ir_BINARY_OP_F(IR_SUB, (_op1), (_op2)) + +#define ir_MUL(_type, _op1, _op2) ir_BINARY_OP(IR_MUL, (_type), (_op1), (_op2)) +#define ir_MUL_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MUL, (_op1), (_op2)) +#define ir_MUL_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MUL, (_op1), (_op2)) +#define ir_MUL_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MUL, (_op1), (_op2)) +#define ir_MUL_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MUL, (_op1), (_op2)) +#define ir_MUL_A(_op1, _op2) ir_BINARY_OP_A(IR_MUL, (_op1), (_op2)) +#define ir_MUL_C(_op1, _op2) ir_BINARY_OP_C(IR_MUL, (_op1), (_op2)) +#define ir_NUL_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MUL, (_op1), (_op2)) +#define ir_MUL_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MUL, (_op1), (_op2)) +#define ir_MUL_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MUL, (_op1), (_op2)) +#define ir_MUL_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MUL, (_op1), (_op2)) +#define ir_MUL_D(_op1, _op2) ir_BINARY_OP_D(IR_MUL, (_op1), (_op2)) +#define ir_MUL_F(_op1, _op2) ir_BINARY_OP_F(IR_MUL, (_op1), (_op2)) + +#define ir_DIV(_type, _op1, _op2) ir_BINARY_OP(IR_DIV, (_type), (_op1), (_op2)) +#define ir_DIV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_DIV, (_op1), (_op2)) +#define ir_DIV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_DIV, (_op1), (_op2)) +#define ir_DIV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_DIV, (_op1), (_op2)) +#define ir_DIV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_DIV, (_op1), (_op2)) +#define ir_DIV_A(_op1, _op2) ir_BINARY_OP_A(IR_DIV, (_op1), (_op2)) +#define ir_DIV_C(_op1, _op2) ir_BINARY_OP_C(IR_DIV, (_op1), (_op2)) +#define ir_DIV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_DIV, (_op1), (_op2)) +#define ir_DIV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_DIV, (_op1), (_op2)) +#define ir_DIV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_DIV, (_op1), (_op2)) +#define ir_DIV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_DIV, (_op1), (_op2)) +#define ir_DIV_D(_op1, _op2) ir_BINARY_OP_D(IR_DIV, (_op1), (_op2)) +#define ir_DIV_F(_op1, _op2) ir_BINARY_OP_F(IR_DIV, (_op1), (_op2)) + +#define ir_MOD(_type, _op1, _op2) ir_BINARY_OP(IR_MOD, (_type), (_op1), (_op2)) +#define ir_MOD_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MOD, (_op1), (_op2)) +#define ir_MOD_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MOD, (_op1), (_op2)) +#define ir_MOD_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MOD, (_op1), (_op2)) +#define ir_MOD_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MOD, (_op1), (_op2)) +#define ir_MOD_A(_op1, _op2) ir_BINARY_OP_A(IR_MOD, (_op1), (_op2)) +#define ir_MOD_C(_op1, _op2) ir_BINARY_OP_C(IR_MOD, (_op1), (_op2)) +#define ir_MOD_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MOD, (_op1), (_op2)) +#define ir_MOD_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MOD, (_op1), (_op2)) +#define ir_MOD_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MOD, (_op1), (_op2)) +#define ir_MOD_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MOD, (_op1), (_op2)) + +#define ir_NEG(_type, _op1) ir_UNARY_OP(IR_NEG, (_type), (_op1)) +#define ir_NEG_C(_op1) ir_UNARY_OP_C(IR_NEG, (_op1)) +#define ir_NEG_I8(_op1) ir_UNARY_OP_I8(IR_NEG, (_op1)) +#define ir_NEG_I16(_op1) ir_UNARY_OP_I16(IR_NEG, (_op1)) +#define ir_NEG_I32(_op1) ir_UNARY_OP_I32(IR_NEG, (_op1)) +#define ir_NEG_I64(_op1) ir_UNARY_OP_I64(IR_NEG, (_op1)) +#define ir_NEG_D(_op1) ir_UNARY_OP_D(IR_NEG, (_op1)) +#define ir_NEG_F(_op1) ir_UNARY_OP_F(IR_NEG, (_op1)) + +#define ir_ABS(_type, _op1) ir_UNARY_OP(IR_ABS, (_type), (_op1)) +#define ir_ABS_C(_op1) ir_UNARY_OP_C(IR_ABS, (_op1)) +#define ir_ABS_I8(_op1) ir_UNARY_OP_I8(IR_ABS, (_op1)) +#define ir_ABS_I16(_op1) ir_UNARY_OP_I16(IR_ABS, (_op1)) +#define ir_ABS_I32(_op1) ir_UNARY_OP_I32(IR_ABS, (_op1)) +#define ir_ABS_I64(_op1) ir_UNARY_OP_I64(IR_ABS, (_op1)) +#define ir_ABS_D(_op1) ir_UNARY_OP_D(IR_ABS, (_op1)) +#define ir_ABS_F(_op1) ir_UNARY_OP_F(IR_ABS, (_op1)) + +#define ir_SEXT(_type, _op1) ir_UNARY_OP(IR_SEXT, (_type), (_op1)) +#define ir_SEXT_U8(_op1) ir_UNARY_OP_U8(IR_SEXT, (_op1)) +#define ir_SEXT_U16(_op1) ir_UNARY_OP_U16(IR_SEXT, (_op1)) +#define ir_SEXT_U32(_op1) ir_UNARY_OP_U32(IR_SEXT, (_op1)) +#define ir_SEXT_U64(_op1) ir_UNARY_OP_U64(IR_SEXT, (_op1)) +#define ir_SEXT_A(_op1) ir_UNARY_OP_A(IR_SEXT, (_op1)) +#define ir_SEXT_C(_op1) ir_UNARY_OP_C(IR_SEXT, (_op1)) +#define ir_SEXT_I8(_op1) ir_UNARY_OP_I8(IR_SEXT, (_op1)) +#define ir_SEXT_I16(_op1) ir_UNARY_OP_I16(IR_SEXT, (_op1)) +#define ir_SEXT_I32(_op1) ir_UNARY_OP_I32(IR_SEXT, (_op1)) +#define ir_SEXT_I64(_op1) ir_UNARY_OP_I64(IR_SEXT, (_op1)) + +#define ir_ZEXT(_type, _op1) ir_UNARY_OP(IR_ZEXT, (_type), (_op1)) +#define ir_ZEXT_U8(_op1) ir_UNARY_OP_U8(IR_ZEXT, (_op1)) +#define ir_ZEXT_U16(_op1) ir_UNARY_OP_U16(IR_ZEXT, (_op1)) +#define ir_ZEXT_U32(_op1) ir_UNARY_OP_U32(IR_ZEXT, (_op1)) +#define ir_ZEXT_U64(_op1) ir_UNARY_OP_U64(IR_ZEXT, (_op1)) +#define ir_ZEXT_A(_op1) ir_UNARY_OP_A(IR_ZEXT, (_op1)) +#define ir_ZEXT_C(_op1) ir_UNARY_OP_C(IR_ZEXT, (_op1)) +#define ir_ZEXT_I8(_op1) ir_UNARY_OP_I8(IR_ZEXT, (_op1)) +#define ir_ZEXT_I16(_op1) ir_UNARY_OP_I16(IR_ZEXT, (_op1)) +#define ir_ZEXT_I32(_op1) ir_UNARY_OP_I32(IR_ZEXT, (_op1)) +#define ir_ZEXT_I64(_op1) ir_UNARY_OP_I64(IR_ZEXT, (_op1)) + +#define ir_TRUNC(_type, _op1) ir_UNARY_OP(IR_TRUNC, (_type), (_op1)) +#define ir_TRUNC_U8(_op1) ir_UNARY_OP_U8(IR_TRUNC, (_op1)) +#define ir_TRUNC_U16(_op1) ir_UNARY_OP_U16(IR_TRUNC, (_op1)) +#define ir_TRUNC_U32(_op1) ir_UNARY_OP_U32(IR_TRUNC, (_op1)) +#define ir_TRUNC_U64(_op1) ir_UNARY_OP_U64(IR_TRUNC, (_op1)) +#define ir_TRUNC_A(_op1) ir_UNARY_OP_A(IR_TRUNC, (_op1)) +#define ir_TRUNC_C(_op1) ir_UNARY_OP_C(IR_TRUNC, (_op1)) +#define ir_TRUNC_I8(_op1) ir_UNARY_OP_I8(IR_TRUNC, (_op1)) +#define ir_TRUNC_I16(_op1) ir_UNARY_OP_I16(IR_TRUNC, (_op1)) +#define ir_TRUNC_I32(_op1) ir_UNARY_OP_I32(IR_TRUNC, (_op1)) +#define ir_TRUNC_I64(_op1) ir_UNARY_OP_I64(IR_TRUNC, (_op1)) + +#define ir_BITCAST(_type, _op1) ir_UNARY_OP(IR_BITCAST, (_type), (_op1)) +#define ir_BITCAST_U8(_op1) ir_UNARY_OP_U8(IR_BITCAST, (_op1)) +#define ir_BITCAST_U16(_op1) ir_UNARY_OP_U16(IR_BITCAST, (_op1)) +#define ir_BITCAST_U32(_op1) ir_UNARY_OP_U32(IR_BITCAST, (_op1)) +#define ir_BITCAST_U64(_op1) ir_UNARY_OP_U64(IR_BITCAST, (_op1)) +#define ir_BITCAST_A(_op1) ir_UNARY_OP_A(IR_BITCAST, (_op1)) +#define ir_BITCAST_C(_op1) ir_UNARY_OP_C(IR_BITCAST, (_op1)) +#define ir_BITCAST_I8(_op1) ir_UNARY_OP_I8(IR_BITCAST, (_op1)) +#define ir_BITCAST_I16(_op1) ir_UNARY_OP_I16(IR_BITCAST, (_op1)) +#define ir_BITCAST_I32(_op1) ir_UNARY_OP_I32(IR_BITCAST, (_op1)) +#define ir_BITCAST_I64(_op1) ir_UNARY_OP_I64(IR_BITCAST, (_op1)) +#define ir_BITCAST_D(_op1) ir_UNARY_OP_D(IR_BITCAST, (_op1)) +#define ir_BITCAST_F(_op1) ir_UNARY_OP_F(IR_BITCAST, (_op1)) + +#define ir_INT2FP(_type, _op1) ir_UNARY_OP(IR_INT2FP, (_type), (_op1)) +#define ir_INT2D(_op1) ir_UNARY_OP_D(IR_INT2FP, (_op1)) +#define ir_INT2F(_op1) ir_UNARY_OP_F(IR_INT2FP, (_op1)) + +#define ir_FP2INT(_type, _op1) ir_UNARY_OP(IR_FP2INT, (_type), (_op1)) +#define ir_FP2U8(_op1) ir_UNARY_OP_U8(IR_FP2INT, (_op1)) +#define ir_FP2U16(_op1) ir_UNARY_OP_U16(IR_FP2INT, (_op1)) +#define ir_FP2U32(_op1) ir_UNARY_OP_U32(IR_FP2INT, (_op1)) +#define ir_FP2U64(_op1) ir_UNARY_OP_U64(IR_FP2INT, (_op1)) +#define ir_FP2I8(_op1) ir_UNARY_OP_I8(IR_FP2INT, (_op1)) +#define ir_FP2I16(_op1) ir_UNARY_OP_I16(IR_FP2INT, (_op1)) +#define ir_FP2I32(_op1) ir_UNARY_OP_I32(IR_FP2INT, (_op1)) +#define ir_FP2I64(_op1) ir_UNARY_OP_I64(IR_FP2INT, (_op1)) + +#define ir_FP2FP(_type, _op1) ir_UNARY_OP(IR_FP2FP, (_type), (_op1)) +#define ir_F2D(_op1) ir_UNARY_OP_D(IR_FP2FP, (_op1)) +#define ir_D2F(_op1) ir_UNARY_OP_F(IR_FP2FP, (_op1)) + +#define ir_ADD_OV(_type, _op1, _op2) ir_BINARY_OP(IR_ADD_OV, (_type), (_op1), (_op2)) +#define ir_ADD_OV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_A(_op1, _op2) ir_BINARY_OP_A(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_C(_op1, _op2) ir_BINARY_OP_C(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ADD_OV, (_op1), (_op2)) + +#define ir_SUB_OV(_type, _op1, _op2) ir_BINARY_OP(IR_SUB_OV, (_type), (_op1), (_op2)) +#define ir_SUB_OV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_A(_op1, _op2) ir_BINARY_OP_A(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_C(_op1, _op2) ir_BINARY_OP_C(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SUB_OV, (_op1), (_op2)) + +#define ir_MUL_OV(_type, _op1, _op2) ir_BINARY_OP(IR_MUL_OV, (_type), (_op1), (_op2)) +#define ir_MUL_OV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_A(_op1, _op2) ir_BINARY_OP_A(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_C(_op1, _op2) ir_BINARY_OP_C(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MUL_OV, (_op1), (_op2)) + +#define ir_OVERFLOW(_op1) ir_fold1(_ir_CTX, IR_OPT(IR_OVERFLOW, IR_BOOL), (_op1)) + +#define ir_NOT(_type, _op1) ir_UNARY_OP(IR_NOT, (_type), (_op1)) +#define ir_NOT_B(_op1) ir_UNARY_OP_B(IR_NOT, (_op1)) +#define ir_NOT_U8(_op1) ir_UNARY_OP_U8(IR_NOT, (_op1)) +#define ir_NOT_U16(_op1) ir_UNARY_OP_U16(IR_NOT, (_op1)) +#define ir_NOT_U32(_op1) ir_UNARY_OP_U32(IR_NOT, (_op1)) +#define ir_NOT_U64(_op1) ir_UNARY_OP_U64(IR_NOT, (_op1)) +#define ir_NOT_A(_op1) ir_UNARY_OP_A(IR_NOT, (_op1)) +#define ir_NOT_C(_op1) ir_UNARY_OP_C(IR_NOT, (_op1)) +#define ir_NOT_I8(_op1) ir_UNARY_OP_I8(IR_NOT, (_op1)) +#define ir_NOT_I16(_op1) ir_UNARY_OP_I16(IR_NOT, (_op1)) +#define ir_NOT_I32(_op1) ir_UNARY_OP_I32(IR_NOT, (_op1)) +#define ir_NOT_I64(_op1) ir_UNARY_OP_I64(IR_NOT, (_op1)) + +#define ir_OR(_type, _op1, _op2) ir_BINARY_OP(IR_OR, (_type), (_op1), (_op2)) +#define ir_OR_B(_op1, _op2) ir_BINARY_OP_B(IR_OR, (_op1), (_op2)) +#define ir_OR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_OR, (_op1), (_op2)) +#define ir_OR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_OR, (_op1), (_op2)) +#define ir_OR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_OR, (_op1), (_op2)) +#define ir_OR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_OR, (_op1), (_op2)) +#define ir_OR_A(_op1, _op2) ir_BINARY_OP_A(IR_OR, (_op1), (_op2)) +#define ir_OR_C(_op1, _op2) ir_BINARY_OP_C(IR_OR, (_op1), (_op2)) +#define ir_OR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_OR, (_op1), (_op2)) +#define ir_OR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_OR, (_op1), (_op2)) +#define ir_OR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_OR, (_op1), (_op2)) +#define ir_OR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_OR, (_op1), (_op2)) + +#define ir_AND(_type, _op1, _op2) ir_BINARY_OP(IR_AND, (_type), (_op1), (_op2)) +#define ir_AND_B(_op1, _op2) ir_BINARY_OP_B(IR_AND, (_op1), (_op2)) +#define ir_AND_U8(_op1, _op2) ir_BINARY_OP_U8(IR_AND, (_op1), (_op2)) +#define ir_AND_U16(_op1, _op2) ir_BINARY_OP_U16(IR_AND, (_op1), (_op2)) +#define ir_AND_U32(_op1, _op2) ir_BINARY_OP_U32(IR_AND, (_op1), (_op2)) +#define ir_AND_U64(_op1, _op2) ir_BINARY_OP_U64(IR_AND, (_op1), (_op2)) +#define ir_AND_A(_op1, _op2) ir_BINARY_OP_A(IR_AND, (_op1), (_op2)) +#define ir_AND_C(_op1, _op2) ir_BINARY_OP_C(IR_AND, (_op1), (_op2)) +#define ir_AND_I8(_op1, _op2) ir_BINARY_OP_I8(IR_AND, (_op1), (_op2)) +#define ir_AND_I16(_op1, _op2) ir_BINARY_OP_I16(IR_AND, (_op1), (_op2)) +#define ir_AND_I32(_op1, _op2) ir_BINARY_OP_I32(IR_AND, (_op1), (_op2)) +#define ir_AND_I64(_op1, _op2) ir_BINARY_OP_I64(IR_AND, (_op1), (_op2)) + +#define ir_XOR(_type, _op1, _op2) ir_BINARY_OP(IR_XOR, (_type), (_op1), (_op2)) +#define ir_XOR_B(_op1, _op2) ir_BINARY_OP_B(IR_XOR, (_op1), (_op2)) +#define ir_XOR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_XOR, (_op1), (_op2)) +#define ir_XOR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_XOR, (_op1), (_op2)) +#define ir_XOR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_XOR, (_op1), (_op2)) +#define ir_XOR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_XOR, (_op1), (_op2)) +#define ir_XOR_A(_op1, _op2) ir_BINARY_OP_A(IR_XOR, (_op1), (_op2)) +#define ir_XOR_C(_op1, _op2) ir_BINARY_OP_C(IR_XOR, (_op1), (_op2)) +#define ir_XOR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_XOR, (_op1), (_op2)) +#define ir_XOR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_XOR, (_op1), (_op2)) +#define ir_XOR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_XOR, (_op1), (_op2)) +#define ir_XOR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_XOR, (_op1), (_op2)) + +#define ir_SHL(_type, _op1, _op2) ir_BINARY_OP(IR_SHL, (_type), (_op1), (_op2)) +#define ir_SHL_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SHL, (_op1), (_op2)) +#define ir_SHL_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SHL, (_op1), (_op2)) +#define ir_SHL_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SHL, (_op1), (_op2)) +#define ir_SHL_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SHL, (_op1), (_op2)) +#define ir_SHL_A(_op1, _op2) ir_BINARY_OP_A(IR_SHL, (_op1), (_op2)) +#define ir_SHL_C(_op1, _op2) ir_BINARY_OP_C(IR_SHL, (_op1), (_op2)) +#define ir_SHL_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SHL, (_op1), (_op2)) +#define ir_SHL_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SHL, (_op1), (_op2)) +#define ir_SHL_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SHL, (_op1), (_op2)) +#define ir_SHL_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SHL, (_op1), (_op2)) + +#define ir_SHR(_type, _op1, _op2) ir_BINARY_OP(IR_SHR, (_type), (_op1), (_op2)) +#define ir_SHR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SHR, (_op1), (_op2)) +#define ir_SHR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SHR, (_op1), (_op2)) +#define ir_SHR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SHR, (_op1), (_op2)) +#define ir_SHR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SHR, (_op1), (_op2)) +#define ir_SHR_A(_op1, _op2) ir_BINARY_OP_A(IR_SHR, (_op1), (_op2)) +#define ir_SHR_C(_op1, _op2) ir_BINARY_OP_C(IR_SHR, (_op1), (_op2)) +#define ir_SHR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SHR, (_op1), (_op2)) +#define ir_SHR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SHR, (_op1), (_op2)) +#define ir_SHR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SHR, (_op1), (_op2)) +#define ir_SHR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SHR, (_op1), (_op2)) + +#define ir_SAR(_type, _op1, _op2) ir_BINARY_OP(IR_SAR, (_type), (_op1), (_op2)) +#define ir_SAR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SAR, (_op1), (_op2)) +#define ir_SAR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SAR, (_op1), (_op2)) +#define ir_SAR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SAR, (_op1), (_op2)) +#define ir_SAR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SAR, (_op1), (_op2)) +#define ir_SAR_A(_op1, _op2) ir_BINARY_OP_A(IR_SAR, (_op1), (_op2)) +#define ir_SAR_C(_op1, _op2) ir_BINARY_OP_C(IR_SAR, (_op1), (_op2)) +#define ir_SAR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SAR, (_op1), (_op2)) +#define ir_SAR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SAR, (_op1), (_op2)) +#define ir_SAR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SAR, (_op1), (_op2)) +#define ir_SAR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SAR, (_op1), (_op2)) + +#define ir_ROL(_type, _op1, _op2) ir_BINARY_OP(IR_ROL, (_type), (_op1), (_op2)) +#define ir_ROL_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ROL, (_op1), (_op2)) +#define ir_ROL_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ROL, (_op1), (_op2)) +#define ir_ROL_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ROL, (_op1), (_op2)) +#define ir_ROL_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ROL, (_op1), (_op2)) +#define ir_ROL_A(_op1, _op2) ir_BINARY_OP_A(IR_ROL, (_op1), (_op2)) +#define ir_ROL_C(_op1, _op2) ir_BINARY_OP_C(IR_ROL, (_op1), (_op2)) +#define ir_ROL_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ROL, (_op1), (_op2)) +#define ir_ROL_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ROL, (_op1), (_op2)) +#define ir_ROL_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ROL, (_op1), (_op2)) +#define ir_ROL_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ROL, (_op1), (_op2)) + +#define ir_ROR(_type, _op1, _op2) ir_BINARY_OP(IR_ROR, (_type), (_op1), (_op2)) +#define ir_ROR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ROR, (_op1), (_op2)) +#define ir_ROR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ROR, (_op1), (_op2)) +#define ir_ROR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ROR, (_op1), (_op2)) +#define ir_ROR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ROR, (_op1), (_op2)) +#define ir_ROR_A(_op1, _op2) ir_BINARY_OP_A(IR_ROR, (_op1), (_op2)) +#define ir_ROR_C(_op1, _op2) ir_BINARY_OP_C(IR_ROR, (_op1), (_op2)) +#define ir_ROR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ROR, (_op1), (_op2)) +#define ir_ROR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ROR, (_op1), (_op2)) +#define ir_ROR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ROR, (_op1), (_op2)) +#define ir_ROR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ROR, (_op1), (_op2)) + +#define ir_BSWAP(_type, _op1) ir_UNARY_OP(IR_BSWAP, (_type), (_op1)) +#define ir_BSWAP_U16(_op1) ir_UNARY_OP_U16(IR_BSWAP, (_op1)) +#define ir_BSWAP_U32(_op1) ir_UNARY_OP_U32(IR_BSWAP, (_op1)) +#define ir_BSWAP_U64(_op1) ir_UNARY_OP_U64(IR_BSWAP, (_op1)) +#define ir_BSWAP_A(_op1) ir_UNARY_OP_A(IR_BSWAP, (_op1)) +#define ir_BSWAP_I16(_op1) ir_UNARY_OP_I16(IR_BSWAP, (_op1)) +#define ir_BSWAP_I32(_op1) ir_UNARY_OP_I32(IR_BSWAP, (_op1)) +#define ir_BSWAP_I64(_op1) ir_UNARY_OP_I64(IR_BSWAP, (_op1)) + +#define ir_MIN(_type, _op1, _op2) ir_BINARY_OP(IR_MIN, (_type), (_op1), (_op2)) +#define ir_MIN_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MIN, (_op1), (_op2)) +#define ir_MIN_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MIN, (_op1), (_op2)) +#define ir_MIN_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MIN, (_op1), (_op2)) +#define ir_MIN_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MIN, (_op1), (_op2)) +#define ir_MIN_A(_op1, _op2) ir_BINARY_OP_A(IR_MIN, (_op1), (_op2)) +#define ir_MIN_C(_op1, _op2) ir_BINARY_OP_C(IR_MIN, (_op1), (_op2)) +#define ir_MIN_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MIN, (_op1), (_op2)) +#define ir_MIN_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MIN, (_op1), (_op2)) +#define ir_MIN_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MIN, (_op1), (_op2)) +#define ir_MIN_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MIN, (_op1), (_op2)) +#define ir_MIN_D(_op1, _op2) ir_BINARY_OP_D(IR_MIN, (_op1), (_op2)) +#define ir_MIN_F(_op1, _op2) ir_BINARY_OP_F(IR_MIN, (_op1), (_op2)) + +#define ir_MAX(_type, _op1, _op2) ir_BINARY_OP(IR_MAX, (_type), (_op1), (_op2)) +#define ir_MAX_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MAX, (_op1), (_op2)) +#define ir_MAX_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MAX, (_op1), (_op2)) +#define ir_MAX_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MAX, (_op1), (_op2)) +#define ir_MAX_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MAX, (_op1), (_op2)) +#define ir_MAX_A(_op1, _op2) ir_BINARY_OP_A(IR_MAX, (_op1), (_op2)) +#define ir_MAX_C(_op1, _op2) ir_BINARY_OP_C(IR_MAX, (_op1), (_op2)) +#define ir_MAX_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MAX, (_op1), (_op2)) +#define ir_MAX_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MAX, (_op1), (_op2)) +#define ir_MAX_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MAX, (_op1), (_op2)) +#define ir_MAX_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MAX, (_op1), (_op2)) +#define ir_MAX_D(_op1, _op2) ir_BINARY_OP_D(IR_MAX, (_op1), (_op2)) +#define ir_MAX_F(_op1, _op2) ir_BINARY_OP_F(IR_MAX, (_op1), (_op2)) + +#define ir_COND(_type, _op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, (_type)), (_op1), (_op2), (_op3)) +#define ir_COND_U8(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U8), (_op1), (_op2), (_op3)) +#define ir_COND_U16(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U16), (_op1), (_op2), (_op3)) +#define ir_COND_U32(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U32), (_op1), (_op2), (_op3)) +#define ir_COND_U64(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U64), (_op1), (_op2), (_op3)) +#define ir_COND_A(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_ADDR), (_op1), (_op2), (_op3)) +#define ir_COND_C(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_CHAR), (_op1), (_op2), (_op3)) +#define ir_COND_I8(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_I8), (_op1), (_op2), (_op3)) +#define ir_COND_I16(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COMD, IR_I16), (_op1), (_op2), (_op3)) +#define ir_COND_I32(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_I32), (_op1), (_op2), (_op3)) +#define ir_COND_I64(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_I64), (_op1), (_op2), (_op3)) +#define ir_COND_D(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_DOUBLE), (_op1), (_op2), (_op3)) +#define ir_COND_F(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_FLOAT), (_op1), (_op2), (_op3)) + +#define ir_PHI_2(type, _src1, _src2) _ir_PHI_2(_ir_CTX, type, (_src1), (_src2)) +#define ir_PHI_N(type, _n, _inputs) _ir_PHI_N(_ir_CTX, type, (_n), (_inputs)) +#define ir_PHI_SET_OP(_ref, _pos, _src) _ir_PHI_SET_OP(_ir_CTX, (_ref), (_pos), (_src)) + +#define ir_COPY(_type, _op1) ir_UNARY_OP(IR_COPY, (_type), (_op1)) +#define ir_COPY_B(_op1) ir_UNARY_OP_B(IR_COPY, (_op1)) +#define ir_COPY_U8(_op1) ir_UNARY_OP_U8(IR_COPY, (_op1)) +#define ir_COPY_U16(_op1) ir_UNARY_OP_U16(IR_COPY, (_op1)) +#define ir_COPY_U32(_op1) ir_UNARY_OP_U32(IR_COPY, (_op1)) +#define ir_COPY_U64(_op1) ir_UNARY_OP_U64(IR_COPY, (_op1)) +#define ir_COPY_A(_op1) ir_UNARY_OP_A(IR_COPY, (_op1)) +#define ir_COPY_C(_op1) ir_UNARY_OP_C(IR_COPY, (_op1)) +#define ir_COPY_I8(_op1) ir_UNARY_OP_I8(IR_COPY, (_op1)) +#define ir_COPY_I16(_op1) ir_UNARY_OP_I16(IR_COPY, (_op1)) +#define ir_COPY_I32(_op1) ir_UNARY_OP_I32(IR_COPY, (_op1)) +#define ir_COPY_I64(_op1) ir_UNARY_OP_I64(IR_COPY, (_op1)) +#define ir_COPY_D(_op1) ir_UNARY_OP_D(IR_COPY, (_op1)) +#define ir_COPY_F(_op1) ir_UNARY_OP_F(IR_COPY, (_op1)) + +/* Helper to add address with a constant offset */ +#define ir_ADD_OFFSET(_addr, _offset) _ir_ADD_OFFSET(_ir_CTX, (_addr), (_offset)) + +/* Unfoldable variant of COPY */ +#define ir_HARD_COPY(_type, _op1) ir_BINARY_OP(IR_COPY, (_type), (_op1), 1) +#define ir_HARD_COPY_B(_op1) ir_BINARY_OP_B(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_U8(_op1) ir_BINARY_OP_U8(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_U16(_op1) ir_BINARY_OP_U16(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_U32(_op1) ir_BINARY_OP_U32(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_U64(_op1) ir_BINARY_OP_U64(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_A(_op1) ir_BINARY_OP_A(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_C(_op1) ir_BINARY_OP_C(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_I8(_op1) ir_BINARY_OP_I8(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_I16(_op1) ir_BINARY_OP_I16(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_I32(_op1) ir_BINARY_OP_I32(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_I64(_op1) ir_BINARY_OP_I64(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_D(_op1) ir_BINARY_OP_D(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_F(_op1) ir_BINARY_OP_F(IR_COPY, (_op1), 1) + +#define ir_PARAM(_type, _name, _num) _ir_PARAM(_ir_CTX, (_type), (_name), (_num)) +#define ir_VAR(_type, _name) _ir_VAR(_ir_CTX, (_type), (_name)) + +#define ir_CALL(type, func) _ir_CALL(_ir_CTX, type, func) +#define ir_CALL_1(type, func, a1) _ir_CALL_1(_ir_CTX, type, func, a1) +#define ir_CALL_2(type, func, a1, a2) _ir_CALL_2(_ir_CTX, type, func, a1, a2) +#define ir_CALL_3(type, func, a1, a2, a3) _ir_CALL_3(_ir_CTX, type, func, a1, a2, a3) +#define ir_CALL_4(type, func, a1, a2, a3, a4) _ir_CALL_4(_ir_CTX, type, func, a1, a2, a3, a4) +#define ir_CALL_5(type, func, a1, a2, a3, a4, a5) _ir_CALL_5(_ir_CTX, type, func, a1, a2, a3, a4, a5) +#define ir_CALL_N(type, func, count, args) _ir_CALL_N(_ir_CTX, type, func, count, args) + +#define ir_TAILCALL(type, func) _ir_TAILCALL(_ir_CTX, type, func) +#define ir_TAILCALL_1(type, func, a1) _ir_TAILCALL_1(_ir_CTX, type, func, a1) +#define ir_TAILCALL_2(type, func, a1, a2) _ir_TAILCALL_2(_ir_CTX, type, func, a1, a2) +#define ir_TAILCALL_3(type, func, a1, a2, a3) _ir_TAILCALL_3(_ir_CTX, type, func, a1, a2, a3) +#define ir_TAILCALL_4(type, func, a1, a2, a3, a4) _ir_TAILCALL_4(_ir_CTX, type, func, a1, a2, a3, a4) +#define ir_TAILCALL_5(type, func, a1, a2, a3, a4, a5) _ir_TAILCALL_5(_ir_CTX, type, func, a1, a2, a3, a4, a5) +#define ir_TAILCALL_N(type, func, count, args) _ir_TAILCALL_N(_ir_CTX, type, func, count, args) + +#define ir_ALLOCA(_size) _ir_ALLOCA(_ir_CTX, (_size)) +#define ir_AFREE(_size) _ir_AFREE(_ir_CTX, (_size)) +#define ir_VADDR(_var) ir_emit1(_ir_CTX, IR_OPT(IR_VADDR, IR_ADDR), (_var)) +#define ir_VLOAD(_type, _var) _ir_VLOAD(_ir_CTX, (_type), (_var)) +#define ir_VLOAD_B(_var) _ir_VLOAD(_ir_CTX, IR_BOOL, (_var)) +#define ir_VLOAD_U8(_var) _ir_VLOAD(_ir_CTX, IR_U8, (_var)) +#define ir_VLOAD_U16(_var) _ir_VLOAD(_ir_CTX, IR_U16, (_var)) +#define ir_VLOAD_U32(_var) _ir_VLOAD(_ir_CTX, IR_U32, (_var)) +#define ir_VLOAD_U64(_var) _ir_VLOAD(_ir_CTX, IR_U64, (_var)) +#define ir_VLOAD_A(_var) _ir_VLOAD(_ir_CTX, IR_ADDR, (_var)) +#define ir_VLOAD_C(_var) _ir_VLOAD(_ir_CTX, IR_CHAR, (_var)) +#define ir_VLOAD_I8(_var) _ir_VLOAD(_ir_CTX, IR_I8, (_var)) +#define ir_VLOAD_I16(_var) _ir_VLOAD(_ir_CTX, IR_I16, (_var)) +#define ir_VLOAD_I32(_var) _ir_VLOAD(_ir_CTX, IR_I32, (_var)) +#define ir_VLOAD_I64(_var) _ir_VLOAD(_ir_CTX, IR_I64, (_var)) +#define ir_VLOAD_D(_var) _ir_VLOAD(_ir_CTX, IR_DOUBLE, (_var)) +#define ir_VLOAD_F(_var) _ir_VLOAD(_ir_CTX, IR_FLOAT, (_var)) +#define ir_VSTORE(_var, _val) _ir_VSTORE(_ir_CTX, (_var), (_val)) +#define ir_RLOAD(_type, _reg) _ir_RLOAD(_ir_CTX, (_type), (_reg)) +#define ir_RLOAD_B(_reg) _ir_RLOAD(_ir_CTX, IR_BOOL, (_reg)) +#define ir_RLOAD_U8(_reg) _ir_RLOAD(_ir_CTX, IR_U8, (_reg)) +#define ir_RLOAD_U16(_reg) _ir_RLOAD(_ir_CTX, IR_U16, (_reg)) +#define ir_RLOAD_U32(_reg) _ir_RLOAD(_ir_CTX, IR_U32, (_reg)) +#define ir_RLOAD_U64(_reg) _ir_RLOAD(_ir_CTX, IR_U64, (_reg)) +#define ir_RLOAD_A(_reg) _ir_RLOAD(_ir_CTX, IR_ADDR, (_reg)) +#define ir_RLOAD_C(_reg) _ir_RLOAD(_ir_CTX, IR_CHAR, (_reg)) +#define ir_RLOAD_I8(_reg) _ir_RLOAD(_ir_CTX, IR_I8, (_reg)) +#define ir_RLOAD_I16(_reg) _ir_RLOAD(_ir_CTX, IR_I16, (_reg)) +#define ir_RLOAD_I32(_reg) _ir_RLOAD(_ir_CTX, IR_I32, (_reg)) +#define ir_RLOAD_I64(_reg) _ir_RLOAD(_ir_CTX, IR_I64, (_reg)) +#define ir_RLOAD_D(_reg) _ir_RLOAD(_ir_CTX, IR_DOUBLE, (_reg)) +#define ir_RLOAD_F(_reg) _ir_RLOAD(_ir_CTX, IR_FLOAT, (_reg)) +#define ir_RSTORE(_reg, _val) _ir_RSTORE(_ir_CTX, (_reg), (_val)) +#define ir_LOAD(_type, _addr) _ir_LOAD(_ir_CTX, (_type), (_addr)) +#define ir_LOAD_B(_addr) _ir_LOAD(_ir_CTX, IR_BOOL, (_addr)) +#define ir_LOAD_U8(_addr) _ir_LOAD(_ir_CTX, IR_U8, (_addr)) +#define ir_LOAD_U16(_addr) _ir_LOAD(_ir_CTX, IR_U16, (_addr)) +#define ir_LOAD_U32(_addr) _ir_LOAD(_ir_CTX, IR_U32, (_addr)) +#define ir_LOAD_U64(_addr) _ir_LOAD(_ir_CTX, IR_U64, (_addr)) +#define ir_LOAD_A(_addr) _ir_LOAD(_ir_CTX, IR_ADDR, (_addr)) +#define ir_LOAD_C(_addr) _ir_LOAD(_ir_CTX, IR_CHAR, (_addr)) +#define ir_LOAD_I8(_addr) _ir_LOAD(_ir_CTX, IR_I8, (_addr)) +#define ir_LOAD_I16(_addr) _ir_LOAD(_ir_CTX, IR_I16, (_addr)) +#define ir_LOAD_I32(_addr) _ir_LOAD(_ir_CTX, IR_I32, (_addr)) +#define ir_LOAD_I64(_addr) _ir_LOAD(_ir_CTX, IR_I64, (_addr)) +#define ir_LOAD_D(_addr) _ir_LOAD(_ir_CTX, IR_DOUBLE, (_addr)) +#define ir_LOAD_F(_addr) _ir_LOAD(_ir_CTX, IR_FLOAT, (_addr)) +#define ir_STORE(_addr, _val) _ir_STORE(_ir_CTX, (_addr), (_val)) +#define ir_TLS(_index, _offset) _ir_TLS(_ir_CTX, (_index), (_offset)) +#define ir_TRAP() do {_ir_CTX->control = ir_emit1(_ir_CTX, IR_TRAP, _ir_CTX->control);} while (0) + +#define ir_START() _ir_START(_ir_CTX) +#define ir_ENTRY(_src, _num) _ir_ENTRY(_ir_CTX, (_src), (_num)) +#define ir_BEGIN(_src) _ir_BEGIN(_ir_CTX, (_src)) +#define ir_IF(_condition) _ir_IF(_ir_CTX, (_condition)) +#define ir_IF_TRUE(_if) _ir_IF_TRUE(_ir_CTX, (_if)) +#define ir_IF_TRUE_cold(_if) _ir_IF_TRUE_cold(_ir_CTX, (_if)) +#define ir_IF_FALSE(_if) _ir_IF_FALSE(_ir_CTX, (_if)) +#define ir_IF_FALSE_cold(_if) _ir_IF_FALSE_cold(_ir_CTX, (_if)) +#define ir_END() _ir_END(_ir_CTX) +#define ir_MERGE_2(_src1, _src2) _ir_MERGE_2(_ir_CTX, (_src1), (_src2)) +#define ir_MERGE_N(_n, _inputs) _ir_MERGE_N(_ir_CTX, (_n), (_inputs)) +#define ir_MERGE_SET_OP(_ref, _pos, _src) _ir_MERGE_SET_OP(_ir_CTX, (_ref), (_pos), (_src)) +#define ir_LOOP_BEGIN(_src1) _ir_LOOP_BEGIN(_ir_CTX, (_src1)) +#define ir_LOOP_END() _ir_LOOP_END(_ir_CTX) +#define ir_SWITCH(_val) _ir_SWITCH(_ir_CTX, (_val)) +#define ir_CASE_VAL(_switch, _val) _ir_CASE_VAL(_ir_CTX, (_switch), (_val)) +#define ir_CASE_DEFAULT(_switch) _ir_CASE_DEFAULT(_ir_CTX, (_switch)) +#define ir_RETURN(_val) _ir_RETURN(_ir_CTX, (_val)) +#define ir_IJMP(_addr) _ir_IJMP(_ir_CTX, (_addr)) +#define ir_UNREACHABLE() _ir_UNREACHABLE(_ir_CTX) + +#define ir_GUARD(_condition, _addr) _ir_GUARD(_ir_CTX, (_condition), (_addr)) +#define ir_GUARD_NOT(_condition, _addr) _ir_GUARD_NOT(_ir_CTX, (_condition), (_addr)) + +#define ir_SNAPSHOT(_n) _ir_SNAPSHOT(_ir_CTX, (_n)) +#define ir_SNAPSHOT_SET_OP(_s, _pos, _v) _ir_SNAPSHOT_SET_OP(_ir_CTX, (_s), (_pos), (_v)) + +#define ir_EXITCALL(_func) _ir_EXITCALL(_ir_CTX,(_func)) + +#define ir_END_list(_list) do { _list = _ir_END_LIST(_ir_CTX, _list); } while (0) +#define ir_MERGE_list(_list) _ir_MERGE_LIST(_ir_CTX, (_list)) + +#define ir_MERGE_WITH(_src2) do {ir_ref end = ir_END(); ir_MERGE_2(end, _src2);} while (0) +#define ir_MERGE_WITH_EMPTY_TRUE(_if) do {ir_ref end = ir_END(); ir_IF_TRUE(_if); ir_MERGE_2(end, ir_END());} while (0) +#define ir_MERGE_WITH_EMPTY_FALSE(_if) do {ir_ref end = ir_END(); ir_IF_FALSE(_if); ir_MERGE_2(end, ir_END());} while (0) + +ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset); +ir_ref _ir_PHI_2(ir_ctx *ctx, ir_type type, ir_ref src1, ir_ref src2); +ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs); +void _ir_PHI_SET_OP(ir_ctx *ctx, ir_ref phi, ir_ref pos, ir_ref src); +ir_ref _ir_PARAM(ir_ctx *ctx, ir_type type, const char* name, ir_ref num); +ir_ref _ir_VAR(ir_ctx *ctx, ir_type type, const char* name); +ir_ref _ir_CALL(ir_ctx *ctx, ir_type type, ir_ref func); +ir_ref _ir_CALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1); +ir_ref _ir_CALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2); +ir_ref _ir_CALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3); +ir_ref _ir_CALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4); +ir_ref _ir_CALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5); +ir_ref _ir_CALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args); +void _ir_TAILCALL(ir_ctx *ctx, ir_type type, ir_ref func); +void _ir_TAILCALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1); +void _ir_TAILCALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2); +void _ir_TAILCALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3); +void _ir_TAILCALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4); +void _ir_TAILCALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5); +ir_ref _ir_TAILCALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args); +ir_ref _ir_ALLOCA(ir_ctx *ctx, ir_ref size); +void _ir_AFREE(ir_ctx *ctx, ir_ref size); +ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var); +void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val); +ir_ref _ir_RLOAD(ir_ctx *ctx, ir_type type, ir_ref reg); +void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val); +ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr); +void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val); +void _ir_START(ir_ctx *ctx); +void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num); +void _ir_BEGIN(ir_ctx *ctx, ir_ref src); +ir_ref _ir_END(ir_ctx *ctx); +ir_ref _ir_END_LIST(ir_ctx *ctx, ir_ref list); +ir_ref _ir_IF(ir_ctx *ctx, ir_ref condition); +void _ir_IF_TRUE(ir_ctx *ctx, ir_ref if_ref); +void _ir_IF_TRUE_cold(ir_ctx *ctx, ir_ref if_ref); +void _ir_IF_FALSE(ir_ctx *ctx, ir_ref if_ref); +void _ir_IF_FALSE_cold(ir_ctx *ctx, ir_ref if_ref); +void _ir_MERGE_2(ir_ctx *ctx, ir_ref src1, ir_ref src2); +void _ir_MERGE_N(ir_ctx *ctx, ir_ref n, ir_ref *inputs); +void _ir_MERGE_SET_OP(ir_ctx *ctx, ir_ref merge, ir_ref pos, ir_ref src); +void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list); +ir_ref _ir_LOOP_BEGIN(ir_ctx *ctx, ir_ref src1); +ir_ref _ir_LOOP_END(ir_ctx *ctx); +ir_ref _ir_TLS(ir_ctx *ctx, ir_ref index, ir_ref offset); +void _ir_UNREACHABLE(ir_ctx *ctx); +ir_ref _ir_SWITCH(ir_ctx *ctx, ir_ref val); +void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val); +void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref); +void _ir_RETURN(ir_ctx *ctx, ir_ref val); +void _ir_IJMP(ir_ctx *ctx, ir_ref addr); +void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr); +void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr); +ir_ref _ir_SNAPSHOT(ir_ctx *ctx, ir_ref n); +void _ir_SNAPSHOT_SET_OP(ir_ctx *ctx, ir_ref snapshot, ir_ref pos, ir_ref val); +ir_ref _ir_EXITCALL(ir_ctx *ctx, ir_ref func); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* IR_BUILDER_H */ diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c new file mode 100644 index 00000000000..b886319beb4 --- /dev/null +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -0,0 +1,1219 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (CFG - Control Flow Graph) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +static ir_ref _ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin) +{ + ir_ref prev, next; + ir_use_list *use_list; + ir_ref n, *p; + + IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN); + IR_ASSERT(ctx->ir_base[end].op == IR_END); + IR_ASSERT(ctx->ir_base[begin].op1 == end); + IR_ASSERT(ctx->use_lists[end].count == 1); + + prev = ctx->ir_base[end].op1; + + use_list = &ctx->use_lists[begin]; + IR_ASSERT(use_list->count == 1); + next = ctx->use_edges[use_list->refs]; + + /* remove BEGIN and END */ + ctx->ir_base[begin].op = IR_NOP; + ctx->ir_base[begin].op1 = IR_UNUSED; + ctx->use_lists[begin].count = 0; + ctx->ir_base[end].op = IR_NOP; + ctx->ir_base[end].op1 = IR_UNUSED; + ctx->use_lists[end].count = 0; + + /* connect their predecessor and successor */ + ctx->ir_base[next].op1 = prev; + use_list = &ctx->use_lists[prev]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + if (*p == end) { + *p = next; + } + } + + return next; +} + +IR_ALWAYS_INLINE void _ir_add_successors(const ir_ctx *ctx, ir_ref ref, ir_worklist *worklist) +{ + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_ref *p, use, n = use_list->count; + + if (n < 2) { + if (n == 1) { + use = ctx->use_edges[use_list->refs]; + IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL); + ir_worklist_push(worklist, use); + } + } else { + p = &ctx->use_edges[use_list->refs]; + if (n == 2) { + use = *p; + IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL); + ir_worklist_push(worklist, use); + use = *(p + 1); + IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL); + ir_worklist_push(worklist, use); + } else { + for (; n > 0; p++, n--) { + use = *p; + IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL); + ir_worklist_push(worklist, use); + } + } + } +} + +IR_ALWAYS_INLINE void _ir_add_predecessors(const ir_insn *insn, ir_worklist *worklist) +{ + ir_ref n, ref; + const ir_ref *p; + + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + n = insn->inputs_count; + for (p = insn->ops + 1; n > 0; p++, n--) { + ref = *p; + IR_ASSERT(ref); + ir_worklist_push(worklist, ref); + } + } else if (insn->op != IR_START) { + if (EXPECTED(insn->op1)) { + ir_worklist_push(worklist, insn->op1); + } + } +} + +int ir_build_cfg(ir_ctx *ctx) +{ + ir_ref n, *p, ref, start, end, next; + uint32_t b; + ir_insn *insn; + ir_worklist worklist; + uint32_t bb_init_falgs; + uint32_t count, bb_count = 0; + uint32_t edges_count = 0; + ir_block *blocks, *bb; + uint32_t *_blocks, *edges; + ir_use_list *use_list; + uint32_t len = ir_bitset_len(ctx->insns_count); + ir_bitset bb_starts = ir_mem_calloc(len * 2, IR_BITSET_BITS / 8); + ir_bitset bb_leaks = bb_starts + len; + _blocks = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); + ir_worklist_init(&worklist, ctx->insns_count); + + /* First try to perform backward DFS search starting from "stop" nodes */ + + /* Add all "stop" nodes */ + ref = ctx->ir_base[1].op1; + while (ref) { + ir_worklist_push(&worklist, ref); + ref = ctx->ir_base[ref].op3; + } + + while (ir_worklist_len(&worklist)) { + ref = ir_worklist_pop(&worklist); + insn = &ctx->ir_base[ref]; + + IR_ASSERT(IR_IS_BB_END(insn->op)); + /* Remember BB end */ + end = ref; + /* Some successors of IF and SWITCH nodes may be inaccessible by backward DFS */ + use_list = &ctx->use_lists[end]; + n = use_list->count; + if (n > 1) { + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + /* Remember possible inaccessible successors */ + ir_bitset_incl(bb_leaks, *p); + } + } + /* Skip control nodes untill BB start */ + ref = insn->op1; + while (1) { + insn = &ctx->ir_base[ref]; + if (IR_IS_BB_START(insn->op)) { + if (insn->op == IR_BEGIN + && (ctx->flags & IR_OPT_CFG) + && ctx->ir_base[insn->op1].op == IR_END + && ctx->use_lists[ref].count == 1) { + ref = _ir_merge_blocks(ctx, insn->op1, ref); + ref = ctx->ir_base[ref].op1; + continue; + } + break; + } + ref = insn->op1; // follow connected control blocks untill BB start + } + /* Mark BB Start */ + bb_count++; + _blocks[ref] = end; + ir_bitset_incl(bb_starts, ref); + /* Add predecessors */ + _ir_add_predecessors(insn, &worklist); + } + + /* Backward DFS way miss some branches ending by infinite loops. */ + /* Try forward DFS. (in most cases all nodes are already proceed). */ + + /* START node may be inaccessible from "stop" nodes */ + ir_bitset_incl(bb_leaks, 1); + + /* Add not processed START and successor of IF and SWITCH */ + IR_BITSET_FOREACH_DIFFERENCE(bb_leaks, bb_starts, len, start) { + ir_worklist_push(&worklist, start); + } IR_BITSET_FOREACH_END(); + + if (ir_worklist_len(&worklist)) { + ir_bitset_union(worklist.visited, bb_starts, len); + do { + ref = ir_worklist_pop(&worklist); + insn = &ctx->ir_base[ref]; + + IR_ASSERT(IR_IS_BB_START(insn->op)); + /* Remember BB start */ + start = ref; + /* Skip control nodes untill BB end */ + while (1) { + use_list = &ctx->use_lists[ref]; + n = use_list->count; + next = IR_UNUSED; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + next = *p; + insn = &ctx->ir_base[next]; + if ((ir_op_flags[insn->op] & IR_OP_FLAG_CONTROL) && insn->op1 == ref) { + break; + } + } + IR_ASSERT(next != IR_UNUSED); + ref = next; +next_successor: + if (IR_IS_BB_END(insn->op)) { + if (insn->op == IR_END && (ctx->flags & IR_OPT_CFG)) { + use_list = &ctx->use_lists[ref]; + IR_ASSERT(use_list->count == 1); + next = ctx->use_edges[use_list->refs]; + + if (ctx->ir_base[next].op == IR_BEGIN + && ctx->use_lists[next].count == 1) { + ref = _ir_merge_blocks(ctx, ref, next); + insn = &ctx->ir_base[ref]; + goto next_successor; + } + } + break; + } + } + /* Mark BB Start */ + bb_count++; + _blocks[start] = ref; + ir_bitset_incl(bb_starts, start); + /* Add successors */ + _ir_add_successors(ctx, ref, &worklist); + } while (ir_worklist_len(&worklist)); + } + + IR_ASSERT(bb_count > 0); + + /* Create array of basic blocks and count successor/predecessors edges for each BB */ + blocks = ir_mem_malloc((bb_count + 1) * sizeof(ir_block)); + b = 1; + bb = blocks + 1; + count = 0; + /* SCCP already removed UNREACHABKE blocks, otherwise all blocks are marked as UNREACHABLE first */ + bb_init_falgs = (ctx->flags & IR_SCCP_DONE) ? 0 : IR_BB_UNREACHABLE; + IR_BITSET_FOREACH(bb_starts, len, start) { + end = _blocks[start]; + _blocks[start] = b; + _blocks[end] = b; + insn = &ctx->ir_base[start]; + IR_ASSERT(IR_IS_BB_START(insn->op)); + IR_ASSERT(end > start); + bb->start = start; + bb->end = end; + bb->successors = count; + count += ctx->use_lists[end].count; + bb->successors_count = 0; + bb->predecessors = count; + bb->dom_parent = 0; + bb->dom_depth = 0; + bb->dom_child = 0; + bb->dom_next_child = 0; + bb->loop_header = 0; + bb->loop_depth = 0; + if (insn->op == IR_START) { + bb->flags = IR_BB_START; + bb->predecessors_count = 0; + } else { + bb->flags = bb_init_falgs; + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + n = insn->inputs_count; + bb->predecessors_count = n; + edges_count += n; + count += n; + } else if (EXPECTED(insn->op1)) { + if (insn->op == IR_ENTRY) { + bb->flags |= IR_BB_ENTRY; + ctx->entries_count++; + } + bb->predecessors_count = 1; + edges_count++; + count++; + } else { + IR_ASSERT(insn->op == IR_BEGIN); /* start of unreachable block */ + bb->predecessors_count = 0; + } + } + b++; + bb++; + } IR_BITSET_FOREACH_END(); + IR_ASSERT(count == edges_count * 2); + ir_mem_free(bb_starts); + + /* Create an array of successor/predecessors control edges */ + edges = ir_mem_malloc(edges_count * 2 * sizeof(uint32_t)); + bb = blocks + 1; + for (b = 1; b <= bb_count; b++, bb++) { + insn = &ctx->ir_base[bb->start]; + if (bb->predecessors_count > 1) { + uint32_t *q = edges + bb->predecessors; + n = insn->inputs_count; + for (p = insn->ops + 1; n > 0; p++, q++, n--) { + ref = *p; + IR_ASSERT(ref); + ir_ref pred_b = _blocks[ref]; + ir_block *pred_bb = &blocks[pred_b]; + *q = pred_b; + edges[pred_bb->successors + pred_bb->successors_count++] = b; + } + } else if (bb->predecessors_count == 1) { + ref = insn->op1; + IR_ASSERT(ref); + IR_ASSERT(IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL); + ir_ref pred_b = _blocks[ref]; + ir_block *pred_bb = &blocks[pred_b]; + edges[bb->predecessors] = pred_b; + edges[pred_bb->successors + pred_bb->successors_count++] = b; + } + } + + ctx->cfg_blocks_count = bb_count; + ctx->cfg_edges_count = edges_count * 2; + ctx->cfg_blocks = blocks; + ctx->cfg_edges = edges; + ctx->cfg_map = _blocks; + + if (!(ctx->flags & IR_SCCP_DONE)) { + uint32_t reachable_count = 0; + + /* Mark reachable blocks */ + ir_worklist_clear(&worklist); + ir_worklist_push(&worklist, 1); + while (ir_worklist_len(&worklist) != 0) { + uint32_t *p; + + reachable_count++; + b = ir_worklist_pop(&worklist); + bb = &blocks[b]; + bb->flags &= ~IR_BB_UNREACHABLE; + n = bb->successors_count; + if (n > 1) { + for (p = edges + bb->successors; n > 0; p++, n--) { + ir_worklist_push(&worklist, *p); + } + } else if (n == 1) { + ir_worklist_push(&worklist, edges[bb->successors]); + } + } + if (reachable_count != ctx->cfg_blocks_count) { + ir_remove_unreachable_blocks(ctx); + } + } + + ir_worklist_free(&worklist); + + return 1; +} + +static void ir_remove_predecessor(ir_ctx *ctx, ir_block *bb, uint32_t from) +{ + uint32_t i, *p, *q, n = 0; + + p = q = &ctx->cfg_edges[bb->predecessors]; + for (i = 0; i < bb->predecessors_count; i++, p++) { + if (*p != from) { + if (p != q) { + *q = *p; + } + q++; + n++; + } + } + IR_ASSERT(n != bb->predecessors_count); + bb->predecessors_count = n; +} + +static void ir_remove_from_use_list(ir_ctx *ctx, ir_ref from, ir_ref ref) +{ + ir_ref j, n, *p, *q, use; + ir_use_list *use_list = &ctx->use_lists[from]; + ir_ref skip = 0; + + n = use_list->count; + for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (use == ref) { + skip++; + } else { + if (p != q) { + *q = use; + } + q++; + } + } + use_list->count -= skip; +} + +static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) +{ + ir_ref i, j, n, k, *p, use; + ir_insn *use_insn; + ir_use_list *use_list; + ir_bitset life_inputs; + ir_insn *insn = &ctx->ir_base[merge]; + + IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN); + n = insn->inputs_count; + i = 1; + life_inputs = ir_bitset_malloc(n + 1); + for (j = 1; j <= n; j++) { + ir_ref input = ir_insn_op(insn, j); + + if (input != from) { + if (i != j) { + ir_insn_set_op(insn, i, input); + } + ir_bitset_incl(life_inputs, j); + i++; + } + } + i--; + if (i == 1) { + insn->op = IR_BEGIN; + insn->inputs_count = 0; + use_list = &ctx->use_lists[merge]; + if (use_list->count > 1) { + for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_PHI) { + /* Convert PHI to COPY */ + i = 2; + for (j = 2; j <= n; j++) { + ir_ref input = ir_insn_op(use_insn, j); + + if (ir_bitset_in(life_inputs, j - 1)) { + use_insn->op1 = ir_insn_op(use_insn, j); + } else if (input > 0) { + ir_remove_from_use_list(ctx, input, use); + } + } + use_insn->op = IR_COPY; + use_insn->op2 = IR_UNUSED; + use_insn->op3 = IR_UNUSED; + ir_remove_from_use_list(ctx, merge, use); + } + } + } + } else { + insn->inputs_count = i; + + n++; + use_list = &ctx->use_lists[merge]; + if (use_list->count > 1) { + for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_PHI) { + i = 2; + for (j = 2; j <= n; j++) { + ir_ref input = ir_insn_op(use_insn, j); + + if (ir_bitset_in(life_inputs, j - 1)) { + IR_ASSERT(input); + if (i != j) { + ir_insn_set_op(use_insn, i, input); + } + i++; + } else if (input > 0) { + ir_remove_from_use_list(ctx, input, use); + } + } + } + } + } + } + ir_mem_free(life_inputs); + ir_remove_from_use_list(ctx, from, merge); +} + +/* CFG constructed after SCCP pass doesn't have unreachable BBs, otherwise they should be removed */ +int ir_remove_unreachable_blocks(ir_ctx *ctx) +{ + uint32_t b, *p, i; + uint32_t unreachable_count = 0; + uint32_t bb_count = ctx->cfg_blocks_count; + ir_block *bb = ctx->cfg_blocks + 1; + + for (b = 1; b <= bb_count; b++, bb++) { + if (bb->flags & IR_BB_UNREACHABLE) { +#if 0 + do {if (!unreachable_count) ir_dump_cfg(ctx, stderr);} while(0); +#endif + if (bb->successors_count) { + for (i = 0, p = &ctx->cfg_edges[bb->successors]; i < bb->successors_count; i++, p++) { + ir_block *succ_bb = &ctx->cfg_blocks[*p]; + + if (!(succ_bb->flags & IR_BB_UNREACHABLE)) { + ir_remove_predecessor(ctx, succ_bb, b); + ir_remove_merge_input(ctx, succ_bb->start, bb->end); + } + } + } else { + ir_ref prev, ref = bb->end; + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR); + /* remove from terminators list */ + prev = ctx->ir_base[1].op1; + if (prev == ref) { + ctx->ir_base[1].op1 = insn->op3; + } else { + while (prev) { + if (ctx->ir_base[prev].op3 == ref) { + ctx->ir_base[prev].op3 = insn->op3; + break; + } + prev = ctx->ir_base[prev].op3; + } + } + } + ctx->cfg_map[bb->start] = 0; + ctx->cfg_map[bb->end] = 0; + unreachable_count++; + } + } + + if (unreachable_count) { + ir_block *dst_bb; + uint32_t n = 1; + uint32_t *edges; + + dst_bb = bb = ctx->cfg_blocks + 1; + for (b = 1; b <= bb_count; b++, bb++) { + if (!(bb->flags & IR_BB_UNREACHABLE)) { + if (dst_bb != bb) { + memcpy(dst_bb, bb, sizeof(ir_block)); + ctx->cfg_map[dst_bb->start] = n; + ctx->cfg_map[dst_bb->end] = n; + } + dst_bb->successors_count = 0; + dst_bb++; + n++; + } + } + ctx->cfg_blocks_count = bb_count = n - 1; + + /* Rebuild successor/predecessors control edges */ + edges = ctx->cfg_edges; + bb = ctx->cfg_blocks + 1; + for (b = 1; b <= bb_count; b++, bb++) { + ir_insn *insn = &ctx->ir_base[bb->start]; + ir_ref *p, ref; + + n = bb->predecessors_count; + if (n > 1) { + uint32_t *q = edges + bb->predecessors; + + IR_ASSERT(n == insn->inputs_count); + for (p = insn->ops + 1; n > 0; p++, q++, n--) { + ref = *p; + IR_ASSERT(ref); + ir_ref pred_b = ctx->cfg_map[ref]; + ir_block *pred_bb = &ctx->cfg_blocks[pred_b]; + *q = pred_b; + edges[pred_bb->successors + pred_bb->successors_count++] = b; + } + } else if (n == 1) { + ref = insn->op1; + IR_ASSERT(ref); + IR_ASSERT(IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL); + ir_ref pred_b = ctx->cfg_map[ref]; + ir_block *pred_bb = &ctx->cfg_blocks[pred_b]; + edges[bb->predecessors] = pred_b; + edges[pred_bb->successors + pred_bb->successors_count++] = b; + } + } + } + + return 1; +} + +#if 0 +static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b) +{ + uint32_t i, *p; + ir_block *bb = &ctx->cfg_blocks[b]; + + if (bb->postnum != 0) { + return; + } + + if (bb->successors_count) { + bb->postnum = -1; /* Marker for "currently visiting" */ + p = ctx->cfg_edges + bb->successors; + i = bb->successors_count; + do { + compute_postnum(ctx, cur, *p); + p++; + } while (--i); + } + bb->postnum = (*cur)++; +} + +/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by + * Cooper, Harvey and Kennedy. */ +int ir_build_dominators_tree(ir_ctx *ctx) +{ + uint32_t blocks_count, b, postnum; + ir_block *blocks, *bb; + uint32_t *edges; + bool changed; + + ctx->flags &= ~IR_NO_LOOPS; + + postnum = 1; + compute_postnum(ctx, &postnum, 1); + + /* Find immediate dominators */ + blocks = ctx->cfg_blocks; + edges = ctx->cfg_edges; + blocks_count = ctx->cfg_blocks_count; + blocks[1].idom = 1; + do { + changed = 0; + /* Iterating in Reverse Post Order */ + for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (bb->predecessors_count == 1) { + uint32_t pred_b = edges[bb->predecessors]; + + IR_ASSERT(blocks[pred_b].idom > 0); + if (bb->idom != pred_b) { + bb->idom = pred_b; + changed = 1; + } + } else if (bb->predecessors_count) { + uint32_t idom = 0; + uint32_t k = bb->predecessors_count; + uint32_t *p = edges + bb->predecessors; + + do { + uint32_t pred_b = *p; + ir_block *pred_bb = &blocks[pred_b]; + ir_block *idom_bb; + + if (pred_bb->idom > 0) { + idom = pred_b; + idom_bb = &blocks[idom]; + + while (--k > 0) { + pred_b = *(++p); + pred_bb = &blocks[pred_b]; + if (pred_bb->idom > 0) { + while (idom != pred_b) { + while (pred_bb->postnum < idom_bb->postnum) { + pred_b = pred_bb->idom; + pred_bb = &blocks[pred_b]; + } + while (idom_bb->postnum < pred_bb->postnum) { + idom = idom_bb->idom; + idom_bb = &blocks[idom]; + } + } + } + } + + if (bb->idom != idom) { + bb->idom = idom; + changed = 1; + } + break; + } + p++; + } while (--k > 0); + } + } + } while (changed); + blocks[1].idom = 0; + blocks[1].dom_depth = 0; + + /* Construct dominators tree */ + for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (bb->idom > 0) { + ir_block *idom_bb = &blocks[bb->idom]; + + bb->dom_depth = idom_bb->dom_depth + 1; + /* Sort by block number to traverse children in pre-order */ + if (idom_bb->dom_child == 0) { + idom_bb->dom_child = b; + } else if (b < idom_bb->dom_child) { + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; + } else { + int child = idom_bb->dom_child; + ir_block *child_bb = &blocks[child]; + + while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { + child = child_bb->dom_next_child; + child_bb = &blocks[child]; + } + bb->dom_next_child = child_bb->dom_next_child; + child_bb->dom_next_child = b; + } + } + } + + return 1; +} +#else +/* A single pass modification of "A Simple, Fast Dominance Algorithm" by + * Cooper, Harvey and Kennedy, that relays on IR block ordering */ +int ir_build_dominators_tree(ir_ctx *ctx) +{ + uint32_t blocks_count, b; + ir_block *blocks, *bb; + uint32_t *edges; + + ctx->flags |= IR_NO_LOOPS; + + /* Find immediate dominators */ + blocks = ctx->cfg_blocks; + edges = ctx->cfg_edges; + blocks_count = ctx->cfg_blocks_count; + blocks[1].idom = 1; + blocks[1].dom_depth = 0; + + /* Iterating in Reverse Post Order */ + for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + IR_ASSERT(bb->predecessors_count > 0); + uint32_t k = bb->predecessors_count; + uint32_t *p = edges + bb->predecessors; + uint32_t idom = *p; + ir_block *idom_bb; + + if (UNEXPECTED(idom > b)) { + /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */ + ctx->flags &= ~IR_NO_LOOPS; + while (1) { + k--; + p++; + idom = *p; + if (idom < b) { + break; + } + IR_ASSERT(k > 0); + } + } + IR_ASSERT(blocks[idom].idom > 0); + + while (--k > 0) { + uint32_t pred_b = *(++p); + + if (pred_b < b) { + IR_ASSERT(blocks[pred_b].idom > 0); + while (idom != pred_b) { + while (pred_b > idom) { + pred_b = blocks[pred_b].idom; + } + while (idom > pred_b) { + idom = blocks[idom].idom; + } + } + } else { + ctx->flags &= ~IR_NO_LOOPS; + } + } + bb->idom = idom; + idom_bb = &blocks[idom]; + + bb->dom_depth = idom_bb->dom_depth + 1; + /* Sort by block number to traverse children in pre-order */ + if (idom_bb->dom_child == 0) { + idom_bb->dom_child = b; + } else if (b < idom_bb->dom_child) { + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; + } else { + int child = idom_bb->dom_child; + ir_block *child_bb = &blocks[child]; + + while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { + child = child_bb->dom_next_child; + child_bb = &blocks[child]; + } + bb->dom_next_child = child_bb->dom_next_child; + child_bb->dom_next_child = b; + } + } + + blocks[1].idom = 0; + + return 1; +} +#endif + +static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2) +{ + uint32_t b1_depth = blocks[b1].dom_depth; + const ir_block *bb2 = &blocks[b2]; + + while (bb2->dom_depth > b1_depth) { + b2 = bb2->dom_parent; + bb2 = &blocks[b2]; + } + return b1 == b2; +} + +int ir_find_loops(ir_ctx *ctx) +{ + uint32_t i, j, n, count; + uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1; + ir_block *blocks = ctx->cfg_blocks; + uint32_t *edges = ctx->cfg_edges; + ir_worklist work; + + if (ctx->flags & IR_NO_LOOPS) { + return 1; + } + + /* We don't materialize the DJ spanning tree explicitly, as we are only interested in ancestor + * queries. These are implemented by checking entry/exit times of the DFS search. */ + ir_worklist_init(&work, ctx->cfg_blocks_count + 1); + entry_times = ir_mem_malloc((ctx->cfg_blocks_count + 1) * 3 * sizeof(uint32_t)); + exit_times = entry_times + ctx->cfg_blocks_count + 1; + sorted_blocks = exit_times + ctx->cfg_blocks_count + 1; + + memset(entry_times, 0, (ctx->cfg_blocks_count + 1) * sizeof(uint32_t)); + + ir_worklist_push(&work, 1); + while (ir_worklist_len(&work)) { + ir_block *bb; + int child; + +next: + i = ir_worklist_peek(&work); + if (!entry_times[i]) { + entry_times[i] = time++; + } + + /* Visit blocks immediately dominated by i. */ + bb = &blocks[i]; + for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) { + if (ir_worklist_push(&work, child)) { + goto next; + } + } + + /* Visit join edges. */ + if (bb->successors_count) { + uint32_t *p = edges + bb->successors; + for (j = 0; j < bb->successors_count; j++,p++) { + uint32_t succ = *p; + + if (blocks[succ].idom == i) { + continue; + } else if (ir_worklist_push(&work, succ)) { + goto next; + } + } + } + exit_times[i] = time++; + ir_worklist_pop(&work); + } + + /* Sort blocks by level, which is the opposite order in which we want to process them */ + sorted_blocks[1] = 1; + j = 1; + n = 2; + while (j != n) { + i = j; + j = n; + for (; i < j; i++) { + int child; + for (child = blocks[sorted_blocks[i]].dom_child; child > 0; child = blocks[child].dom_next_child) { + sorted_blocks[n++] = child; + } + } + } + count = n; + + /* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */ + while (n > 1) { + i = sorted_blocks[--n]; + ir_block *bb = &blocks[i]; + + if (bb->predecessors_count > 1) { + bool irreducible = 0; + uint32_t *p = &edges[bb->predecessors]; + + j = bb->predecessors_count; + do { + uint32_t pred = *p; + + /* A join edge is one for which the predecessor does not + immediately dominate the successor. */ + if (bb->idom != pred) { + /* In a loop back-edge (back-join edge), the successor dominates + the predecessor. */ + if (ir_dominates(blocks, i, pred)) { + if (!ir_worklist_len(&work)) { + ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work))); + } + blocks[pred].loop_header = 0; /* support for merged loops */ + ir_worklist_push(&work, pred); + } else { + /* Otherwise it's a cross-join edge. See if it's a branch + to an ancestor on the DJ spanning tree. */ + if (entry_times[pred] > entry_times[i] && exit_times[pred] < exit_times[i]) { + irreducible = 1; + } + } + } + p++; + } while (--j); + + if (UNEXPECTED(irreducible)) { + // TODO: Support for irreducible loops ??? + bb->flags |= IR_BB_IRREDUCIBLE_LOOP; + ctx->flags |= IR_IRREDUCIBLE_CFG; + while (ir_worklist_len(&work)) { + ir_worklist_pop(&work); + } + } else if (ir_worklist_len(&work)) { + bb->flags |= IR_BB_LOOP_HEADER; + ctx->flags |= IR_CFG_HAS_LOOPS; + bb->loop_depth = 1; + while (ir_worklist_len(&work)) { + j = ir_worklist_pop(&work); + while (blocks[j].loop_header > 0) { + j = blocks[j].loop_header; + } + if (j != i) { + ir_block *bb = &blocks[j]; + if (bb->idom == 0 && j != 1) { + /* Ignore blocks that are unreachable or only abnormally reachable. */ + continue; + } + bb->loop_header = i; + if (bb->predecessors_count) { + uint32_t *p = &edges[bb->predecessors]; + j = bb->predecessors_count; + do { + ir_worklist_push(&work, *p); + p++; + } while (--j); + } + } + } + } + } + } + + if (ctx->flags & IR_CFG_HAS_LOOPS) { + for (n = 1; n < count; n++) { + i = sorted_blocks[n]; + ir_block *bb = &blocks[i]; + if (bb->loop_header > 0) { + ir_block *loop = &blocks[bb->loop_header]; + uint32_t loop_depth = loop->loop_depth; + + if (bb->flags & IR_BB_LOOP_HEADER) { + loop_depth++; + } + bb->loop_depth = loop_depth; + if (bb->flags & (IR_BB_ENTRY|IR_BB_LOOP_WITH_ENTRY)) { + loop->flags |= IR_BB_LOOP_WITH_ENTRY; + } + } + } + } + + ir_mem_free(entry_times); + ir_worklist_free(&work); + + return 1; +} + +/* A variation of "Top-down Positioning" algorithm described by + * Karl Pettis and Robert C. Hansen "Profile Guided Code Positioning" + * + * TODO: Switch to "Bottom-up Positioning" algorithm + */ +int ir_schedule_blocks(ir_ctx *ctx) +{ + ir_bitqueue blocks; + uint32_t b, best_successor, j, last_non_empty; + ir_block *bb, *best_successor_bb; + ir_insn *insn; + uint32_t *list, *map; + uint32_t count = 0; + bool reorder = 0; + + ir_bitqueue_init(&blocks, ctx->cfg_blocks_count + 1); + blocks.pos = 0; + list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 1) * 2); + map = list + (ctx->cfg_blocks_count + 1); + for (b = 1; b <= ctx->cfg_blocks_count; b++) { + ir_bitset_incl(blocks.set, b); + } + + while ((b = ir_bitqueue_pop(&blocks)) != (uint32_t)-1) { + bb = &ctx->cfg_blocks[b]; + /* Start trace */ + last_non_empty = 0; + do { + if (UNEXPECTED(bb->flags & IR_BB_PREV_EMPTY_ENTRY) && ir_bitqueue_in(&blocks, b - 1)) { + /* Schedule the previous empty ENTRY block before this one */ + uint32_t predecessor = b - 1; + + ir_bitqueue_del(&blocks, predecessor); + count++; + list[count] = predecessor; + map[predecessor] = count; + if (predecessor != count) { + reorder = 1; + } + } + count++; + list[count] = b; + map[b] = count; + if (b != count) { + reorder = 1; + } + if (!(bb->flags & IR_BB_EMPTY)) { + last_non_empty = b; + } + best_successor_bb = NULL; + if (bb->successors_count == 1) { + best_successor = ctx->cfg_edges[bb->successors]; + if (ir_bitqueue_in(&blocks, best_successor)) { + best_successor_bb = &ctx->cfg_blocks[best_successor]; + } + } else if (bb->successors_count > 1) { + uint32_t prob, best_successor_prob; + uint32_t *p, successor; + ir_block *successor_bb; + + for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) { + successor = *p; + if (ir_bitqueue_in(&blocks, successor)) { + successor_bb = &ctx->cfg_blocks[successor]; + insn = &ctx->ir_base[successor_bb->start]; + if (insn->op == IR_IF_TRUE || insn->op == IR_IF_FALSE) { + prob = insn->op2; + if (!prob) { + prob = 100 / bb->successors_count; + if (!(successor_bb->flags & IR_BB_EMPTY)) { + prob++; + } + } + } else if (insn->op == IR_CASE_DEFAULT) { + prob = insn->op2; + if (!prob) { + prob = 100 / bb->successors_count; + } + } else if (insn->op == IR_CASE_VAL) { + prob = insn->op3; + if (!prob) { + prob = 100 / bb->successors_count; + } + } else if (insn->op == IR_ENTRY) { + if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (successor_bb->flags & IR_BB_EMPTY)) { + prob = 99; /* prefer empty ENTRY block to go first */ + } else { + prob = 1; + } + } else { + prob = 100 / bb->successors_count; + } + if (!best_successor_bb + || successor_bb->loop_depth > best_successor_bb->loop_depth + || prob > best_successor_prob) { + best_successor = successor; + best_successor_bb = successor_bb; + best_successor_prob = prob; + } + } + } + } + if (!best_successor_bb) { + /* Try to continue trace using the other successor of the last IF */ + if ((bb->flags & IR_BB_EMPTY) && last_non_empty) { + bb = &ctx->cfg_blocks[last_non_empty]; + if (bb->successors_count == 2 && ctx->ir_base[bb->end].op == IR_IF) { + b = ctx->cfg_edges[bb->successors]; + + if (!ir_bitqueue_in(&blocks, b)) { + b = ctx->cfg_edges[bb->successors + 1]; + } + if (ir_bitqueue_in(&blocks, b)) { + bb = &ctx->cfg_blocks[b]; + ir_bitqueue_del(&blocks, b); + continue; + } + } + } + /* End trace */ + break; + } + b = best_successor; + bb = best_successor_bb; + ir_bitqueue_del(&blocks, b); + } while (1); + } + + if (reorder) { + ir_block *cfg_blocks = ir_mem_malloc(sizeof(ir_block) * (ctx->cfg_blocks_count + 1)); + + memset(ctx->cfg_blocks, 0, sizeof(ir_block)); + for (b = 1, bb = cfg_blocks + 1; b <= count; b++, bb++) { + *bb = ctx->cfg_blocks[list[b]]; + if (bb->dom_parent > 0) { + bb->dom_parent = map[bb->dom_parent]; + } + if (bb->dom_child > 0) { + bb->dom_child = map[bb->dom_child]; + } + if (bb->dom_next_child > 0) { + bb->dom_next_child = map[bb->dom_next_child]; + } + if (bb->loop_header > 0) { + bb->loop_header = map[bb->loop_header]; + } + } + for (j = 0; j < ctx->cfg_edges_count; j++) { + if (ctx->cfg_edges[j] > 0) { + ctx->cfg_edges[j] = map[ctx->cfg_edges[j]]; + } + } + ir_mem_free(ctx->cfg_blocks); + ctx->cfg_blocks = cfg_blocks; + + if (ctx->osr_entry_loads) { + ir_list *list = (ir_list*)ctx->osr_entry_loads; + uint32_t pos = 0, count; + + while (1) { + b = ir_list_at(list, pos); + if (b == 0) { + break; + } + ir_list_set(list, pos, map[b]); + pos++; + count = ir_list_at(list, pos); + pos += count + 1; + } + } + + if (ctx->cfg_map) { + ir_ref i; + + for (i = IR_UNUSED + 1; i < ctx->insns_count; i++) { + ctx->cfg_map[i] = map[ctx->cfg_map[i]]; + } + } + } + + ir_mem_free(list); + ir_bitqueue_free(&blocks); + + return 1; +} + +/* JMP target optimisation */ +uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b) +{ + ir_block *bb; + + while (1) { + bb = &ctx->cfg_blocks[b]; + + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { + b = ctx->cfg_edges[bb->successors]; + } else { + break; + } + } + return b; +} + +uint32_t ir_skip_empty_next_blocks(const ir_ctx *ctx, uint32_t b) +{ + ir_block *bb; + + while (1) { + if (b > ctx->cfg_blocks_count) { + return 0; + } + + bb = &ctx->cfg_blocks[b]; + + if ((bb->flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY) { + b++; + } else { + break; + } + } + return b; +} + +void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block, uint32_t *next_block) +{ + ir_block *bb; + uint32_t *p, use_block; + + *true_block = 0; + *false_block = 0; + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(ctx->ir_base[bb->end].op == IR_IF); + IR_ASSERT(bb->successors_count == 2); + p = &ctx->cfg_edges[bb->successors]; + use_block = *p; + if (ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_TRUE) { + *true_block = ir_skip_empty_target_blocks(ctx, use_block); + use_block = *(p+1); + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_FALSE); + *false_block = ir_skip_empty_target_blocks(ctx, use_block); + } else { + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_FALSE); + *false_block = ir_skip_empty_target_blocks(ctx, use_block); + use_block = *(p+1); + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_TRUE); + *true_block = ir_skip_empty_target_blocks(ctx, use_block); + } + IR_ASSERT(*true_block && *false_block); + *next_block = b == ctx->cfg_blocks_count ? 0 : ir_skip_empty_next_blocks(ctx, b + 1); +} diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c new file mode 100644 index 00000000000..1993ee13605 --- /dev/null +++ b/ext/opcache/jit/ir/ir_check.c @@ -0,0 +1,381 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR verification) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +void ir_consistency_check(void) +{ + IR_ASSERT(IR_UNUSED == 0); + IR_ASSERT(IR_NOP == 0); + + IR_ASSERT((int)IR_BOOL == (int)IR_C_BOOL); + IR_ASSERT((int)IR_U8 == (int)IR_C_U8); + IR_ASSERT((int)IR_U16 == (int)IR_C_U16); + IR_ASSERT((int)IR_U32 == (int)IR_C_U32); + IR_ASSERT((int)IR_U64 == (int)IR_C_U64); + IR_ASSERT((int)IR_ADDR == (int)IR_C_ADDR); + IR_ASSERT((int)IR_CHAR == (int)IR_C_CHAR); + IR_ASSERT((int)IR_I8 == (int)IR_C_I8); + IR_ASSERT((int)IR_I16 == (int)IR_C_I16); + IR_ASSERT((int)IR_I32 == (int)IR_C_I32); + IR_ASSERT((int)IR_I64 == (int)IR_C_I64); + IR_ASSERT((int)IR_DOUBLE == (int)IR_C_DOUBLE); + IR_ASSERT((int)IR_FLOAT == (int)IR_C_FLOAT); + + IR_ASSERT((IR_EQ ^ 1) == IR_NE); + IR_ASSERT((IR_LT ^ 3) == IR_GT); + IR_ASSERT((IR_GT ^ 3) == IR_LT); + IR_ASSERT((IR_LE ^ 3) == IR_GE); + IR_ASSERT((IR_GE ^ 3) == IR_LE); + IR_ASSERT((IR_ULT ^ 3) == IR_UGT); + IR_ASSERT((IR_UGT ^ 3) == IR_ULT); + IR_ASSERT((IR_ULE ^ 3) == IR_UGE); + IR_ASSERT((IR_UGE ^ 3) == IR_ULE); + + IR_ASSERT(IR_ADD + 1 == IR_SUB); +} + +static bool ir_check_use_list(const ir_ctx *ctx, ir_ref from, ir_ref to) +{ + ir_ref n, j, *p; + ir_use_list *use_list = &ctx->use_lists[from]; + + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + if (*p == to) { + return 1; + } + } + return 0; +} + +static bool ir_check_input_list(const ir_ctx *ctx, ir_ref from, ir_ref to) +{ + ir_insn *insn = &ctx->ir_base[to]; + ir_ref n, j, *p; + + n = ir_input_edges_count(ctx, insn); + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + if (*p == from) { + return 1; + } + } + return 0; +} + +static bool ir_check_domination(const ir_ctx *ctx, ir_ref def, ir_ref use) +{ + uint32_t b1 = ctx->cfg_map[def]; + uint32_t b2 = ctx->cfg_map[use]; + ir_block *blocks = ctx->cfg_blocks; + uint32_t b1_depth = blocks[b1].dom_depth; + const ir_block *bb2 = &blocks[b2]; + + if (b1 == b2) { + return def < use; + } + while (bb2->dom_depth > b1_depth) { + b2 = bb2->dom_parent; + bb2 = &blocks[b2]; + } + return b1 == b2; +} + +bool ir_check(const ir_ctx *ctx) +{ + ir_ref i, j, n, *p, use; + ir_insn *insn, *use_insn; + ir_type type; + uint32_t flags; + bool ok = 1; + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + flags = ir_op_flags[insn->op]; + n = ir_input_edges_count(ctx, insn); + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + use = *p; + if (use != IR_UNUSED) { + if (IR_IS_CONST_REF(use)) { + if (use >= ctx->consts_count) { + fprintf(stderr, "ir_base[%d].ops[%d] constant reference (%d) is out of range\n", i, j, use); + ok = 0; + } + } else { + if (use >= ctx->insns_count) { + fprintf(stderr, "ir_base[%d].ops[%d] insn reference (%d) is out of range\n", i, j, use); + ok = 0; + } + use_insn = &ctx->ir_base[use]; + switch (IR_OPND_KIND(flags, j)) { + case IR_OPND_DATA: + if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_DATA)) { + if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_MEM) + || use_insn->type == IR_VOID) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be DATA\n", i, j, use); + ok = 0; + } + } + if (use >= i + && !(insn->op == IR_PHI + && (!(ctx->flags & IR_LINEAR) || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN))) { + fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); + ok = 0; + } + if (flags & IR_OP_FLAG_DATA) { + switch (insn->op) { + case IR_COND: + if (j == 1) { + break; + } + IR_FALLTHROUGH; + case IR_ADD: + case IR_SUB: + case IR_MUL: + case IR_DIV: + case IR_MOD: + case IR_NEG: + case IR_ABS: + case IR_ADD_OV: + case IR_SUB_OV: + case IR_MUL_OV: + case IR_NOT: + case IR_OR: + case IR_AND: + case IR_XOR: + case IR_SHL: + case IR_SHR: + case IR_SAR: + case IR_ROL: + case IR_ROR: + case IR_BSWAP: + case IR_MIN: + case IR_MAX: + case IR_PHI: + case IR_COPY: + case IR_PI: + if (insn->type != use_insn->type) { + if (j == 2 + && (insn->op == IR_SHL + || insn->op == IR_SHR + || insn->op == IR_SAR + || insn->op == IR_ROL + || insn->op == IR_ROR) + && ir_type_size[use_insn->type] < ir_type_size[insn->type]) { + /* second argument of SHIFT may be incompatible with result */ + break; + } + if (insn->op == IR_NOT && insn->type == IR_BOOL) { + /* boolean not */ + break; + } + if (sizeof(void*) == 8) { + if (insn->type == IR_ADDR && (use_insn->type == IR_U64 || use_insn->type == IR_I64)) { + break; + } + } else { + if (insn->type == IR_ADDR && (use_insn->type == IR_U32 || use_insn->type == IR_I32)) { + break; + } + } + fprintf(stderr, "ir_base[%d].ops[%d] (%d) type is incompatible with result type (%d != %d)\n", + i, j, use, use_insn->type, insn->type); + ok = 0; + } + break; + } + } + if ((ctx->flags & IR_LINEAR) + && ctx->cfg_map + && insn->op != IR_PHI + && !ir_check_domination(ctx, use, i)) { + fprintf(stderr, "ir_base[%d].ops[%d] -> %d, %d doesn't dominate %d\n", i, j, use, use, i); + ok = 0; + } + break; + case IR_OPND_CONTROL: + if (flags & IR_OP_FLAG_BB_START) { + if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_BB_END)) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be BB_END\n", i, j, use); + ok = 0; + } + } else { + if (ir_op_flags[use_insn->op] & IR_OP_FLAG_BB_END) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must not be BB_END\n", i, j, use); + ok = 0; + } + } + break; + case IR_OPND_CONTROL_DEP: + if (use >= i + && !(insn->op == IR_LOOP_BEGIN)) { + fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); + ok = 0; + } else if (insn->op == IR_PHI) { + ir_insn *merge_insn = &ctx->ir_base[insn->op1]; + if (merge_insn->op != IR_MERGE && merge_insn->op != IR_LOOP_BEGIN) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be MERGE or LOOP_BEGIN\n", i, j, use); + ok = 0; + } + } + break; + case IR_OPND_CONTROL_REF: + if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL)) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be CONTROL\n", i, j, use); + ok = 0; + } + break; + default: + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) of unsupported kind\n", i, j, use); + ok = 0; + } + } + } else if ((insn->op == IR_RETURN || insn->op == IR_UNREACHABLE) && j == 2) { + /* pass (function returns void) */ + } else if (insn->op == IR_BEGIN && j == 1) { + /* pass (start of unreachable basic block) */ + } else if (IR_OPND_KIND(flags, j) != IR_OPND_CONTROL_REF + && (insn->op != IR_SNAPSHOT || j == 1)) { + fprintf(stderr, "ir_base[%d].ops[%d] missing reference (%d)\n", i, j, use); + ok = 0; + } + if (ctx->use_lists + && use > 0 + && !ir_check_use_list(ctx, use, i)) { + fprintf(stderr, "ir_base[%d].ops[%d] is not in use list (%d)\n", i, j, use); + ok = 0; + } + } + + switch (insn->op) { + case IR_PHI: + if (insn->inputs_count != ctx->ir_base[insn->op1].inputs_count + 1) { + fprintf(stderr, "ir_base[%d] inconsistent PHI inputs_count (%d != %d)\n", + i, insn->inputs_count, ctx->ir_base[insn->op1].inputs_count + 1); + ok = 0; + } + break; + case IR_LOAD: + case IR_STORE: + type = ctx->ir_base[insn->op2].type; + if (type != IR_ADDR + && (!IR_IS_TYPE_INT(type) || ir_type_size[type] != ir_type_size[IR_ADDR])) { + fprintf(stderr, "ir_base[%d].op2 must have ADDR type (%s)\n", + i, ir_type_name[type]); + ok = 0; + } + break; + case IR_VLOAD: + case IR_VSTORE: + if (ctx->ir_base[insn->op2].op != IR_VAR) { + fprintf(stderr, "ir_base[%d].op2 must be 'VAR' (%s)\n", + i, ir_op_name[ctx->ir_base[insn->op2].op]); + ok = 0; + } + break; + case IR_RETURN: + if (ctx->ret_type != (insn->op2 ? ctx->ir_base[insn->op2].type : IR_VOID)) { + fprintf(stderr, "ir_base[%d].type incompatible return type\n", i); + ok = 0; + } + break; + case IR_TAILCALL: + if (ctx->ret_type != insn->type) { + fprintf(stderr, "ir_base[%d].type incompatible return type\n", i); + ok = 0; + } + break; + } + + if (ctx->use_lists) { + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref count; + + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < use_list->count; j++, p++) { + use = *p; + if (!ir_check_input_list(ctx, i, use)) { + fprintf(stderr, "ir_base[%d] is in use list of ir_base[%d]\n", use, i); + ok = 0; + } + } + + if ((flags & IR_OP_FLAG_CONTROL) && !(flags & IR_OP_FLAG_MEM)) { + switch (insn->op) { + case IR_SWITCH: + /* may have many successors */ + if (use_list->count < 1) { + fprintf(stderr, "ir_base[%d].op (SWITCH) must have at least 1 successor (%d)\n", i, use_list->count); + ok = 0; + } + break; + case IR_IF: + if (use_list->count != 2) { + fprintf(stderr, "ir_base[%d].op (IF) must have 2 successors (%d)\n", i, use_list->count); + ok = 0; + } + break; + case IR_UNREACHABLE: + case IR_RETURN: + if (use_list->count == 1) { + /* UNREACHABLE and RETURN may be linked with the following ENTRY by a fake edge */ + if (ctx->ir_base[ctx->use_edges[use_list->refs]].op == IR_ENTRY) { + break; + } + } + IR_FALLTHROUGH; + case IR_IJMP: + if (use_list->count != 0) { + fprintf(stderr, "ir_base[%d].op (%s) must not have successors (%d)\n", + i, ir_op_name[insn->op], use_list->count); + ok = 0; + } + break; + default: + /* skip data references */ + count = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < use_list->count; j++, p++) { + use = *p; + if (!(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL)) { + count--; + } + } + if (count != 1) { + if (insn->op == IR_CALL && count == 2) { + /* result of CALL may be used as data in control instruction */ + break; + } + if ((insn->op == IR_LOOP_END || insn->op == IR_END) && count == 2) { + /* LOOP_END/END may be linked with the following ENTRY by a fake edge */ + if (ctx->ir_base[ctx->use_edges[use_list->refs]].op == IR_ENTRY) { + count--; + } + if (ctx->ir_base[ctx->use_edges[use_list->refs + 1]].op == IR_ENTRY) { + count--; + } + if (count == 1) { + break; + } + } + fprintf(stderr, "ir_base[%d].op (%s) must have 1 successor (%d)\n", + i, ir_op_name[insn->op], count); + ok = 0; + } + break; + } + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + +// if (!ok) { +// ir_dump_codegen(ctx, stderr); +// } + IR_ASSERT(ok); + return ok; +} diff --git a/ext/opcache/jit/ir/ir_disasm.c b/ext/opcache/jit/ir/ir_disasm.c new file mode 100644 index 00000000000..70ee738fd6d --- /dev/null +++ b/ext/opcache/jit/ir/ir_disasm.c @@ -0,0 +1,832 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Disassembler based on libcapstone) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#ifndef _WIN32 +# include +# include +# include +#endif + +#include "ir.h" +#include "ir_private.h" + +#ifndef _WIN32 +# include "ir_elf.h" +#endif + +#include +#define HAVE_CAPSTONE_ITER + +typedef struct _ir_sym_node { + uint64_t addr; + uint64_t end; + struct _ir_sym_node *parent; + struct _ir_sym_node *child[2]; + unsigned char info; + char name[1]; +} ir_sym_node; + +static ir_sym_node *_symbols = NULL; + +static void ir_syms_rotateleft(ir_sym_node *p) +{ + ir_sym_node *r = p->child[1]; + p->child[1] = r->child[0]; + if (r->child[0]) { + r->child[0]->parent = p; + } + r->parent = p->parent; + if (p->parent == NULL) { + _symbols = r; + } else if (p->parent->child[0] == p) { + p->parent->child[0] = r; + } else { + p->parent->child[1] = r; + } + r->child[0] = p; + p->parent = r; +} + +static void ir_syms_rotateright(ir_sym_node *p) +{ + ir_sym_node *l = p->child[0]; + p->child[0] = l->child[1]; + if (l->child[1]) { + l->child[1]->parent = p; + } + l->parent = p->parent; + if (p->parent == NULL) { + _symbols = l; + } else if (p->parent->child[1] == p) { + p->parent->child[1] = l; + } else { + p->parent->child[0] = l; + } + l->child[1] = p; + p->parent = l; +} + +void ir_disasm_add_symbol(const char *name, + uint64_t addr, + uint64_t size) +{ + ir_sym_node *sym; + size_t len = strlen(name); + + sym = ir_mem_pmalloc(sizeof(ir_sym_node) + len + 1); + if (!sym) { + return; + } + sym->addr = addr; + sym->end = (addr + size - 1); + memcpy((char*)&sym->name, name, len + 1); + sym->parent = sym->child[0] = sym->child[1] = NULL; + sym->info = 1; + if (_symbols) { + ir_sym_node *node = _symbols; + + /* insert it into rbtree */ + do { + if (sym->addr > node->addr) { + IR_ASSERT(sym->addr > (node->end)); + if (node->child[1]) { + node = node->child[1]; + } else { + node->child[1] = sym; + sym->parent = node; + break; + } + } else if (sym->addr < node->addr) { + if (node->child[0]) { + node = node->child[0]; + } else { + node->child[0] = sym; + sym->parent = node; + break; + } + } else { + IR_ASSERT(sym->addr == node->addr); + if (strcmp(name, node->name) == 0 && sym->end < node->end) { + /* reduce size of the existing symbol */ + node->end = sym->end; + } + ir_mem_pfree(sym); + return; + } + } while (1); + + /* fix rbtree after inserting */ + while (sym && sym != _symbols && sym->parent->info == 1) { + if (sym->parent == sym->parent->parent->child[0]) { + node = sym->parent->parent->child[1]; + if (node && node->info == 1) { + sym->parent->info = 0; + node->info = 0; + sym->parent->parent->info = 1; + sym = sym->parent->parent; + } else { + if (sym == sym->parent->child[1]) { + sym = sym->parent; + ir_syms_rotateleft(sym); + } + sym->parent->info = 0; + sym->parent->parent->info = 1; + ir_syms_rotateright(sym->parent->parent); + } + } else { + node = sym->parent->parent->child[0]; + if (node && node->info == 1) { + sym->parent->info = 0; + node->info = 0; + sym->parent->parent->info = 1; + sym = sym->parent->parent; + } else { + if (sym == sym->parent->child[0]) { + sym = sym->parent; + ir_syms_rotateright(sym); + } + sym->parent->info = 0; + sym->parent->parent->info = 1; + ir_syms_rotateleft(sym->parent->parent); + } + } + } + } else { + _symbols = sym; + } + _symbols->info = 0; +} + +static void ir_disasm_destroy_symbols(ir_sym_node *n) +{ + if (n) { + if (n->child[0]) { + ir_disasm_destroy_symbols(n->child[0]); + } + if (n->child[1]) { + ir_disasm_destroy_symbols(n->child[1]); + } + ir_mem_pfree(n); + } +} + +const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset) +{ + ir_sym_node *node = _symbols; + while (node) { + if (addr < node->addr) { + node = node->child[0]; + } else if (addr > node->end) { + node = node->child[1]; + } else { + *offset = addr - node->addr; + return node->name; + } + } + return NULL; +} + +static uint64_t ir_disasm_branch_target(csh cs, const cs_insn *insn) +{ + unsigned int i; + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + if (cs_insn_group(cs, insn, X86_GRP_JUMP)) { + for (i = 0; i < insn->detail->x86.op_count; i++) { + if (insn->detail->x86.operands[i].type == X86_OP_IMM) { + return insn->detail->x86.operands[i].imm; + } + } + } +#elif defined(IR_TARGET_AARCH64) + if (cs_insn_group(cs, insn, ARM64_GRP_JUMP) + || insn->id == ARM64_INS_BL + || insn->id == ARM64_INS_ADR) { + for (i = 0; i < insn->detail->arm64.op_count; i++) { + if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM) + return insn->detail->arm64.operands[i].imm; + } + } +#endif + + return 0; +} + +static uint64_t ir_disasm_rodata_reference(csh cs, const cs_insn *insn) +{ +#if defined(IR_TARGET_X86) + unsigned int i; + + for (i = 0; i < insn->detail->x86.op_count; i++) { + if (insn->detail->x86.operands[i].type == X86_OP_MEM + && insn->detail->x86.operands[i].mem.base == X86_REG_INVALID + && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID + && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID + && insn->detail->x86.operands[i].mem.scale == 1) { + return (uint32_t)insn->detail->x86.operands[i].mem.disp; + } + } + if (cs_insn_group(cs, insn, X86_GRP_JUMP)) { + for (i = 0; i < insn->detail->x86.op_count; i++) { + if (insn->detail->x86.operands[i].type == X86_OP_MEM + && insn->detail->x86.operands[i].mem.disp) { + return (uint32_t)insn->detail->x86.operands[i].mem.disp; + } + } + } + if (insn->id == X86_INS_MOV + && insn->detail->x86.op_count == 2 + && insn->detail->x86.operands[0].type == X86_OP_IMM + && insn->detail->x86.operands[0].size == sizeof(void*)) { + return (uint32_t)insn->detail->x86.operands[0].imm; + } +#elif defined(IR_TARGET_X64) + unsigned int i; + + for (i = 0; i < insn->detail->x86.op_count; i++) { + if (insn->detail->x86.operands[i].type == X86_OP_MEM + && insn->detail->x86.operands[i].mem.base == X86_REG_RIP + && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID + // TODO: support for index and scale + && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID + && insn->detail->x86.operands[i].mem.scale == 1) { + return insn->detail->x86.operands[i].mem.disp + insn->address + insn->size; + } + } +#elif defined(IR_TARGET_AARCH64) + unsigned int i; + + if (insn->id == ARM64_INS_ADR + || insn->id == ARM64_INS_LDRB + || insn->id == ARM64_INS_LDR + || insn->id == ARM64_INS_LDRH + || insn->id == ARM64_INS_LDRSB + || insn->id == ARM64_INS_LDRSH + || insn->id == ARM64_INS_LDRSW + || insn->id == ARM64_INS_STRB + || insn->id == ARM64_INS_STR + || insn->id == ARM64_INS_STRH) { + for (i = 0; i < insn->detail->arm64.op_count; i++) { + if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM) + return insn->detail->arm64.operands[i].imm; + } + } + return 0; +#endif + + return 0; +} + +static const char* ir_disasm_resolver(uint64_t addr, + int64_t *offset) +{ +#ifndef _WIN32 + const char *name; + void *a = (void*)(uintptr_t)(addr); + Dl_info info; + + name = ir_disasm_find_symbol(addr, offset); + if (name) { + return name; + } + + if (dladdr(a, &info) + && info.dli_sname != NULL + && info.dli_saddr == a) { + *offset = 0; + return info.dli_sname; + } +#else + const char *name; + name = ir_disasm_find_symbol(addr, offset); + if (name) { + return name; + } +#endif + + return NULL; +} + +int ir_disasm(const char *name, + const void *start, + size_t size, + bool asm_addr, + ir_ctx *ctx, + FILE *f) +{ + size_t orig_size = size; + const void *orig_end = (void *)((char *)start + size); + const void *end; + ir_hashtab labels; + int32_t l, n; + uint64_t addr; + csh cs; + cs_insn *insn; +# ifdef HAVE_CAPSTONE_ITER + const uint8_t *cs_code; + size_t cs_size; + uint64_t cs_addr; +# else + size_t count, i; +# endif + const char *sym; + int64_t offset = 0; + char *p, *q, *r; + uint32_t rodata_offset = 0; + uint32_t jmp_table_offset = 0; + ir_hashtab_bucket *b; + int32_t entry; + cs_err ret; + +# if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# ifdef IR_TARGET_X64 + ret = cs_open(CS_ARCH_X86, CS_MODE_64, &cs); + if (ret != CS_ERR_OK) { + fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_64, ...) failed; [%d] %s\n", ret, cs_strerror(ret)); + return 0; + } +# else + ret = cs_open(CS_ARCH_X86, CS_MODE_32, &cs); + if (ret != CS_ERR_OK) { + fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_32, ...) failed; [%d] %s\n", ret, cs_strerror(ret)); + return 0; + } +# endif + cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON); +# if DISASM_INTEL_SYNTAX + cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); +# else + cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); +# endif +# elif defined(IR_TARGET_AARCH64) + ret = cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &cs); + if (ret != CS_ERR_OK) { + fprintf(stderr, "cs_open(CS_ARCH_ARM64, CS_MODE_ARM, ...) failed; [%d] %s\n", ret, cs_strerror(ret)); + return 0; + } + cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON); + cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); +# endif + + if (name) { + fprintf(f, "%s:\n", name); + } + + ir_hashtab_init(&labels, 32); + + if (ctx) { + if (ctx->entries_count) { + int i = ctx->entries_count; + do { + ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; + ir_hashtab_add(&labels, insn->op3, insn->op2); + } while (i != 0); + } + + rodata_offset = ctx->rodata_offset; + if (rodata_offset) { + if (size > rodata_offset) { + size = rodata_offset; + } + } + jmp_table_offset = ctx->jmp_table_offset; + if (jmp_table_offset) { + uint32_t n; + uintptr_t *p; + + IR_ASSERT(orig_size - jmp_table_offset <= 0xffffffff); + n = (uint32_t)(orig_size - jmp_table_offset); + if (size > jmp_table_offset) { + size = jmp_table_offset; + } + while (n > 0 && IR_ALIGNED_SIZE(n, sizeof(void*)) != n) { + jmp_table_offset++; + n--; + } + IR_ASSERT(n > 0 && n % sizeof(void*) == 0 && jmp_table_offset % sizeof(void*) == 0); + p = (uintptr_t*)((char*)start + jmp_table_offset); + while (n > 0) { + if (*p) { + if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) { + ir_hashtab_add(&labels, (uint32_t)((uintptr_t)*p - (uintptr_t)start), -1); + } + } + p++; + n -= sizeof(void*); + } + } + } + end = (void *)((char *)start + size); + +# ifdef HAVE_CAPSTONE_ITER + cs_code = start; + cs_size = (uint8_t*)end - (uint8_t*)start; + cs_addr = (uint64_t)(uintptr_t)cs_code; + insn = cs_malloc(cs); + while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) { + if ((addr = ir_disasm_branch_target(cs, insn)) +# else + count = cs_disasm(cs, start, (uint8_t*)end - (uint8_t*)start, (uintptr_t)start, 0, &insn); + for (i = 0; i < count; i++) { + if ((addr = ir_disasm_branch_target(cs, &(insn[i]))) +# endif + && (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)end)) { + ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1); +# ifdef HAVE_CAPSTONE_ITER + } else if ((addr = ir_disasm_rodata_reference(cs, insn))) { +# else + } else if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) { +# endif + if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) { + ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1); + } + } + } + + ir_hashtab_key_sort(&labels); + + /* renumber labels */ + l = 0; + n = labels.count; + b = labels.data; + while (n > 0) { + if (b->val < 0) { + b->val = --l; + } + b++; + n--; + } + +# ifdef HAVE_CAPSTONE_ITER + cs_code = start; + cs_size = (uint8_t*)end - (uint8_t*)start; + cs_addr = (uint64_t)(uintptr_t)cs_code; + while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start)); +# else + for (i = 0; i < count; i++) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start)); +# endif + if (entry != (ir_ref)IR_INVALID_VAL) { + if (entry >= 0) { + fprintf(f, ".ENTRY_%d:\n", entry); + } else { + fprintf(f, ".L%d:\n", -entry); + } + } + +# ifdef HAVE_CAPSTONE_ITER + if (asm_addr) { + fprintf(f, " %" PRIx64 ":", insn->address); + } + p = insn->op_str; +#if defined(IR_TARGET_X64) && (CS_API_MAJOR < 5) + /* Fix capstone MOVD/MOVQ disassemble mismatch */ + if (insn->id == X86_INS_MOVQ && strcmp(insn->mnemonic, "movd") == 0) { + insn->mnemonic[3] = 'q'; + } +#endif + if (strlen(p) == 0) { + fprintf(f, "\t%s\n", insn->mnemonic); + continue; + } else { + fprintf(f, "\t%s ", insn->mnemonic); + } +# else + if (asm_addr) { + fprintf(f, " %" PRIx64 ":", insn[i].address); + } + p = insn[i].op_str; + if (strlen(p) == 0) { + fprintf(f, "\t%s\n", insn[i].mnemonic); + continue; + } else { + fprintf(f, "\t%s ", insn[i].mnemonic); + } +# endif + /* Try to replace the target addresses with a symbols */ +#if defined(IR_TARGET_X64) +# ifdef HAVE_CAPSTONE_ITER + if ((addr = ir_disasm_rodata_reference(cs, insn))) { +# else + if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) { +# endif + if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + r = q = strstr(p, "(%rip)"); + if (r && r > p) { + r--; + while (r > p && ((*r >= '0' && *r <= '9') || (*r >= 'a' && *r <= 'f') || (*r >= 'A' && *r <= 'F'))) { + r--; + } + if (r > p && *r == 'x' && *(r - 1) == '0') { + r -= 2; + } + if (r > p) { + fwrite(p, 1, r - p, f); + } + if (entry >= 0) { + fprintf(f, ".ENTRY_%d%s\n", entry, q); + } else { + fprintf(f, ".L%d%s\n", -entry, q); + } + continue; + } + } + } + } +#endif +#if defined(IR_TARGET_AARCH64) + while ((q = strstr(p, "#0x")) != NULL) { + r = q + 3; +#else + while ((q = strstr(p, "0x")) != NULL) { + r = q + 2; +#endif + addr = 0; + while (1) { + if (*r >= '0' && *r <= '9') { + addr = addr * 16 + (*r - '0'); + } else if (*r >= 'A' && *r <= 'F') { + addr = addr * 16 + (*r - 'A' + 10); + } else if (*r >= 'a' && *r <= 'f') { + addr = addr * 16 + (*r - 'a' + 10); + } else { + break; + } + r++; + } + if (p != q && *(q-1) == '-') { + q--; + addr = (uint32_t)(-(int64_t)addr); + } + if (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)orig_end) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + fwrite(p, 1, q - p, f); + if (entry >= 0) { + fprintf(f, ".ENTRY_%d", entry); + } else { + fprintf(f, ".L%d", -entry); + } + } else if (r > p) { + fwrite(p, 1, r - p, f); + } + } else if ((sym = ir_disasm_resolver(addr, &offset))) { +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + if (offset && p != q && *(q-1) == '$') { + if (r > p) { + fwrite(p, 1, r - p, f); + } + p = r; + continue; + } +#endif + if (q > p) { + fwrite(p, 1, q - p, f); + } + fputs(sym, f); + if (offset != 0) { + if (offset > 0) { + fprintf(f, "+0x%" PRIx64, offset); + } else { + fprintf(f, "-0x%" PRIx64, -offset); + } + } + } else if (r > p) { + fwrite(p, 1, r - p, f); + } + p = r; + } + fprintf(f, "%s\n", p); + } +# ifdef HAVE_CAPSTONE_ITER + cs_free(insn, 1); +# else + cs_free(insn, count); +# endif + + if (rodata_offset || jmp_table_offset) { + fprintf(f, ".rodata\n"); + } + if (rodata_offset) { + const unsigned char *p = (unsigned char*)start + rodata_offset; + uint32_t n = jmp_table_offset ? + (uint32_t)(jmp_table_offset - rodata_offset) : + (uint32_t)(orig_size - rodata_offset); + uint32_t j; + + while (n > 0) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + if (entry >= 0) { + fprintf(f, ".ENTRY_%d:\n", entry); + } else { + fprintf(f, ".L%d:\n", -entry); + } + } + fprintf(f, "\t.db 0x%02x", (int)*p); + p++; + n--; + j = 15; + while (n > 0 && j > 0) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + break; + } + fprintf(f, ", 0x%02x", (int)*p); + p++; + n--; + j--; + } + fprintf(f, "\n"); + } + } + if (jmp_table_offset) { + uintptr_t *p = (uintptr_t*)(unsigned char*)start + jmp_table_offset; + uint32_t n = (uint32_t)(orig_size - jmp_table_offset); + + fprintf(f, ".align %d\n", (int)sizeof(void*)); + + p = (uintptr_t*)((char*)start + jmp_table_offset); + while (n > 0) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + if (entry >= 0) { + fprintf(f, ".ENTRY_%d:\n", entry); + } else { + fprintf(f, ".L%d:\n", -entry); + } + } + if (*p) { + if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) { + entry = ir_hashtab_find(&labels, (uint32_t)(*p - (uintptr_t)start)); + IR_ASSERT(entry != (ir_ref)IR_INVALID_VAL); + if (entry >= 0) { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword .ENTRY_%d\n", entry); + } else { + fprintf(f, "\t.dword .ENTRY_%d\n", entry); + } + } else { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword .L%d\n", -entry); + } else { + fprintf(f, "\t.dword .L%d\n", -entry); + } + } + } else { + int64_t offset; + const char *name = ir_disasm_find_symbol(*p, &offset); + + if (name && offset == 0) { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword %s\n", name); + } else { + fprintf(f, "\t.dword %s\n", name); + } + } else { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword 0x%0llx\n", (long long)*p); + } else { + fprintf(f, "\t.dword 0x%0x\n", (int)*p); + } + } + } + } else { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword 0\n"); + } else { + fprintf(f, "\t.dword 0\n"); + } + } + p++; + n -= sizeof(void*); + } + } + + fprintf(f, "\n"); + + ir_hashtab_free(&labels); + + cs_close(&cs); + + return 1; +} + +#ifndef _WIN32 +static void* ir_elf_read_sect(int fd, ir_elf_sectheader *sect) +{ + void *s = ir_mem_malloc(sect->size); + + if (lseek(fd, sect->ofs, SEEK_SET) < 0) { + ir_mem_free(s); + return NULL; + } + if (read(fd, s, sect->size) != (ssize_t)sect->size) { + ir_mem_free(s); + return NULL; + } + + return s; +} + +static void ir_elf_load_symbols(void) +{ + ir_elf_header hdr; + ir_elf_sectheader sect; + int i; +#if defined(__linux__) + int fd = open("/proc/self/exe", O_RDONLY); +#elif defined(__NetBSD__) + int fd = open("/proc/curproc/exe", O_RDONLY); +#elif defined(__FreeBSD__) || defined(__DragonFly__) + char path[PATH_MAX]; + size_t pathlen = sizeof(path); + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; + if (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) { + return; + } + int fd = open(path, O_RDONLY); +#elif defined(__sun) + int fd = open("/proc/self/path/a.out", O_RDONLY); +#elif defined(__HAIKU__) + char path[PATH_MAX]; + if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH, + NULL, path, sizeof(path)) != B_OK) { + return; + } + + int fd = open(path, O_RDONLY); +#else + // To complete eventually for other ELF platforms. + // Otherwise APPLE is Mach-O + int fd = -1; +#endif + + if (fd >= 0) { + if (read(fd, &hdr, sizeof(hdr)) == sizeof(hdr) + && hdr.emagic[0] == '\177' + && hdr.emagic[1] == 'E' + && hdr.emagic[2] == 'L' + && hdr.emagic[3] == 'F' + && lseek(fd, hdr.shofs, SEEK_SET) >= 0) { + for (i = 0; i < hdr.shnum; i++) { + if (read(fd, §, sizeof(sect)) == sizeof(sect) + && sect.type == ELFSECT_TYPE_SYMTAB) { + uint32_t n, count = sect.size / sizeof(ir_elf_symbol); + ir_elf_symbol *syms = ir_elf_read_sect(fd, §); + char *str_tbl; + + if (syms) { + if (lseek(fd, hdr.shofs + sect.link * sizeof(sect), SEEK_SET) >= 0 + && read(fd, §, sizeof(sect)) == sizeof(sect) + && (str_tbl = (char*)ir_elf_read_sect(fd, §)) != NULL) { + for (n = 0; n < count; n++) { + if (syms[n].name + && (ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_FUNC + /*|| ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_DATA*/) + && (ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_LOCAL + /*|| ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_GLOBAL*/)) { + ir_disasm_add_symbol(str_tbl + syms[n].name, syms[n].value, syms[n].size); + } + } + ir_mem_free(str_tbl); + } + ir_mem_free(syms); + } + if (lseek(fd, hdr.shofs + (i + 1) * sizeof(sect), SEEK_SET) < 0) { + break; + } + } + } + } + close(fd); + } +} +#endif + +int ir_disasm_init(void) +{ +#ifndef _WIN32 + ir_elf_load_symbols(); +#endif + return 1; +} + +void ir_disasm_free(void) +{ + if (_symbols) { + ir_disasm_destroy_symbols(_symbols); + _symbols = NULL; + } +} diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c new file mode 100644 index 00000000000..06c1bf65f33 --- /dev/null +++ b/ext/opcache/jit/ir/ir_dump.c @@ -0,0 +1,713 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (debug dumps) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +void ir_dump(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n, ref, *p; + ir_insn *insn; + uint32_t flags; + + for (i = 1 - ctx->consts_count, insn = ctx->ir_base + i; i < IR_UNUSED; i++, insn++) { + fprintf(f, "%05d %s %s(", i, ir_op_name[insn->op], ir_type_name[insn->type]); + ir_print_const(ctx, insn, f, true); + fprintf(f, ")\n"); + } + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count; i++, insn++) { + flags = ir_op_flags[insn->op]; + fprintf(f, "%05d %s", i, ir_op_name[insn->op]); + if ((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) { + fprintf(f, " %s", ir_type_name[insn->type]); + } + n = ir_operands_count(ctx, insn); + for (j = 1, p = insn->ops + 1; j <= 3; j++, p++) { + ref = *p; + if (ref) { + fprintf(f, " %05d", ref); + } + } + if (n > 3) { + n -= 3; + do { + i++; + insn++; + fprintf(f, "\n%05d", i); + for (j = 0; j < 4; j++, p++) { + ref = *p; + if (ref) { + fprintf(f, " %05d", ref); + } + } + n -= 4; + } while (n > 0); + } + fprintf(f, "\n"); + } +} + +void ir_dump_dot(const ir_ctx *ctx, FILE *f) +{ + int DATA_WEIGHT = 0; + int CONTROL_WEIGHT = 5; + int REF_WEIGHT = 4; + ir_ref i, j, n, ref, *p; + ir_insn *insn; + uint32_t flags; + + fprintf(f, "digraph ir {\n"); + fprintf(f, "\trankdir=TB;\n"); + for (i = 1 - ctx->consts_count, insn = ctx->ir_base + i; i < IR_UNUSED; i++, insn++) { + fprintf(f, "\tc%d [label=\"C%d: CONST %s(", -i, -i, ir_type_name[insn->type]); + /* FIXME(tony): We still cannot handle strings with escaped double quote inside */ + ir_print_const(ctx, insn, f, false); + fprintf(f, ")\",style=filled,fillcolor=yellow];\n"); + } + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_CONTROL) { + if (insn->op == IR_START) { + fprintf(f, "\t{rank=min; n%d [label=\"%d: %s\",shape=box,style=\"rounded,filled\",fillcolor=red];}\n", i, i, ir_op_name[insn->op]); + } else if (insn->op == IR_ENTRY) { + fprintf(f, "\t{n%d [label=\"%d: %s\",shape=box,style=\"rounded,filled\",fillcolor=red];}\n", i, i, ir_op_name[insn->op]); + } else if (flags & IR_OP_FLAG_TERMINATOR) { + fprintf(f, "\t{rank=max; n%d [label=\"%d: %s\",shape=box,style=\"rounded,filled\",fillcolor=red];}\n", i, i, ir_op_name[insn->op]); + } else if (flags & IR_OP_FLAG_MEM) { + fprintf(f, "\tn%d [label=\"%d: %s\",shape=box,style=filled,fillcolor=pink];\n", i, i, ir_op_name[insn->op]); + } else { + fprintf(f, "\tn%d [label=\"%d: %s\",shape=box,style=filled,fillcolor=lightcoral];\n", i, i, ir_op_name[insn->op]); + } + } else if (flags & IR_OP_FLAG_DATA) { + if (IR_OPND_KIND(flags, 1) == IR_OPND_DATA) { + /* not a leaf */ + fprintf(f, "\tn%d [label=\"%d: %s\"", i, i, ir_op_name[insn->op]); + fprintf(f, ",shape=diamond,style=filled,fillcolor=deepskyblue];\n"); + } else { + if (insn->op == IR_PARAM) { + fprintf(f, "\tn%d [label=\"%d: %s %s \\\"%s\\\"\",style=filled,fillcolor=lightblue];\n", + i, i, ir_op_name[insn->op], ir_type_name[insn->type], ir_get_str(ctx, insn->op2)); + } else if (insn->op == IR_VAR) { + fprintf(f, "\tn%d [label=\"%d: %s %s \\\"%s\\\"\"];\n", i, i, ir_op_name[insn->op], ir_type_name[insn->type], ir_get_str(ctx, insn->op2)); + } else { + fprintf(f, "\tn%d [label=\"%d: %s %s\",style=filled,fillcolor=deepskyblue];\n", i, i, ir_op_name[insn->op], ir_type_name[insn->type]); + } + } + } + n = ir_operands_count(ctx, insn); + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ref = *p; + if (ref) { + switch (IR_OPND_KIND(flags, j)) { + case IR_OPND_DATA: + if (IR_IS_CONST_REF(ref)) { + fprintf(f, "\tc%d -> n%d [color=blue,weight=%d];\n", -ref, i, DATA_WEIGHT); + } else if (insn->op == IR_PHI + && ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN + && ctx->ir_base[ir_insn_op(&ctx->ir_base[insn->op1], j - 1)].op == IR_LOOP_END) { + fprintf(f, "\tn%d -> n%d [color=blue,dir=back];\n", i, ref); + } else { + fprintf(f, "\tn%d -> n%d [color=blue,weight=%d];\n", ref, i, DATA_WEIGHT); + } + break; + case IR_OPND_CONTROL: + if (insn->op == IR_LOOP_BEGIN && ctx->ir_base[ref].op == IR_LOOP_END) { + fprintf(f, "\tn%d -> n%d [style=bold,color=red,dir=back];\n", i, ref); + } else if (insn->op == IR_ENTRY) { + fprintf(f, "\tn%d -> n%d [style=bold,color=red,style=dashed,weight=%d];\n", ref, i, CONTROL_WEIGHT); + } else { + fprintf(f, "\tn%d -> n%d [style=bold,color=red,weight=%d];\n", ref, i, CONTROL_WEIGHT); + } + break; + case IR_OPND_CONTROL_DEP: + case IR_OPND_CONTROL_REF: + fprintf(f, "\tn%d -> n%d [style=dashed,dir=back,weight=%d];\n", ref, i, REF_WEIGHT); + break; + } + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + fprintf(f, "}\n"); +} + +void ir_dump_use_lists(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n, *p; + ir_use_list *list; + + if (ctx->use_lists) { + fprintf(f, "{ # Use Lists\n"); + for (i = 1, list = &ctx->use_lists[1]; i < ctx->insns_count; i++, list++) { + n = list->count; + if (n > 0) { + p = &ctx->use_edges[list->refs]; + fprintf(f, "%05d(%d): [%05d", i, n, *p); + p++; + for (j = 1; j < n; j++, p++) { + fprintf(f, ", %05d", *p); + } + fprintf(f, "]\n"); + } + } + fprintf(f, "}\n"); + } +} + +static int ir_dump_dessa_move(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +{ + FILE *f = ctx->data; + int8_t reg; + + if (IR_IS_CONST_REF(from)) { + fprintf(f, "\tmov c_%d -> ", -from); + } else if (from) { + fprintf(f, "\tmov R%d", ctx->vregs[from]); + if (ctx->live_intervals && ctx->live_intervals[ctx->vregs[from]]) { + reg = ctx->live_intervals[ctx->vregs[from]]->reg; + if (reg >= 0) { + fprintf(f, " [%%%s]", ir_reg_name(reg, type)); + } + } + fprintf(f, " -> "); + } else { + fprintf(f, "\tmov TMP -> "); + } + + if (to) { + fprintf(f, "R%d", ctx->vregs[to]); + if (ctx->live_intervals && ctx->live_intervals[ctx->vregs[to]]) { + reg = ctx->live_intervals[ctx->vregs[to]]->reg; + if (reg >= 0) { + fprintf(f, " [%%%s]", ir_reg_name(reg, type)); + } + } + fprintf(f, "\n"); + } else { + fprintf(f, "TMP\n"); + } + return 1; +} + +void ir_dump_cfg(ir_ctx *ctx, FILE *f) +{ + if (ctx->cfg_blocks) { + uint32_t b, i, bb_count = ctx->cfg_blocks_count; + ir_block *bb = ctx->cfg_blocks + 1; + + fprintf(f, "{ # CFG\n"); + for (b = 1; b <= bb_count; b++, bb++) { + fprintf(f, "BB%d:\n", b); + fprintf(f, "\tstart=%d\n", bb->start); + fprintf(f, "\tend=%d\n", bb->end); + if (bb->successors_count) { + fprintf(f, "\tsuccessors(%d) [BB%d", bb->successors_count, ctx->cfg_edges[bb->successors]); + for (i = 1; i < bb->successors_count; i++) { + fprintf(f, ", BB%d", ctx->cfg_edges[bb->successors + i]); + } + fprintf(f, "]\n"); + } + if (bb->predecessors_count) { + fprintf(f, "\tpredecessors(%d) [BB%d", bb->predecessors_count, ctx->cfg_edges[bb->predecessors]); + for (i = 1; i < bb->predecessors_count; i++) { + fprintf(f, ", BB%d", ctx->cfg_edges[bb->predecessors + i]); + } + fprintf(f, "]\n"); + } + if (bb->dom_parent > 0) { + fprintf(f, "\tdom_parent=BB%d\n", bb->dom_parent); + } + fprintf(f, "\tdom_depth=%d\n", bb->dom_depth); + if (bb->dom_child > 0) { + int child = bb->dom_child; + fprintf(f, "\tdom_children [BB%d", child); + child = ctx->cfg_blocks[child].dom_next_child; + while (child > 0) { + fprintf(f, ", BB%d", child); + child = ctx->cfg_blocks[child].dom_next_child; + } + fprintf(f, "]\n"); + } + if (bb->flags & IR_BB_ENTRY) { + fprintf(f, "\tENTRY\n"); + } + if (bb->flags & IR_BB_UNREACHABLE) { + fprintf(f, "\tUNREACHABLE\n"); + } + if (bb->flags & IR_BB_LOOP_HEADER) { + if (bb->flags & IR_BB_LOOP_WITH_ENTRY) { + fprintf(f, "\tLOOP_HEADER, LOOP_WITH_ENTRY\n"); + } else { + fprintf(f, "\tLOOP_HEADER\n"); + } + } + if (bb->flags & IR_BB_IRREDUCIBLE_LOOP) { + fprintf(stderr, "\tIRREDUCIBLE_LOOP\n"); + } + if (bb->loop_header > 0) { + fprintf(f, "\tloop_header=BB%d\n", bb->loop_header); + } + if (bb->loop_depth != 0) { + fprintf(f, "\tloop_depth=%d\n", bb->loop_depth); + } + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_list *list = (ir_list*)ctx->osr_entry_loads; + uint32_t pos = 0, i, count; + + IR_ASSERT(list); + while (1) { + i = ir_list_at(list, pos); + if (b == i) { + break; + } + IR_ASSERT(i != 0); /* end marker */ + pos++; + count = ir_list_at(list, pos); + pos += count + 1; + } + pos++; + count = ir_list_at(list, pos); + pos++; + + for (i = 0; i < count; i++, pos++) { + ir_ref ref = ir_list_at(list, pos); + fprintf(f, "\tOSR_ENTRY_LOAD=d_%d\n", ref); + } + } + if (bb->flags & IR_BB_DESSA_MOVES) { + ctx->data = f; + ir_gen_dessa_moves(ctx, b, ir_dump_dessa_move); + } + } + fprintf(f, "}\n"); + } +} + +void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f) +{ + ir_ref i; + uint32_t *_blocks = ctx->cfg_map; + + if (_blocks) { + fprintf(f, "{ # CFG map (insn -> bb)\n"); + for (i = IR_UNUSED + 1; i < ctx->insns_count; i++) { + fprintf(f, "%d -> %d\n", i, _blocks[i]); + } + fprintf(f, "}\n"); + } +} + +void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n; + + if (!ctx->live_intervals) { + return; + } + fprintf(f, "{ # LIVE-RANGES (vregs_count=%d)\n", ctx->vregs_count); + for (i = 0; i <= ctx->vregs_count; i++) { + ir_live_interval *ival = ctx->live_intervals[i]; + + if (ival) { + ir_live_range *p; + ir_use_pos *use_pos; + + if (i == 0) { + fprintf(f, "TMP"); + } else { + for (j = 1; j < ctx->insns_count; j++) { + if (ctx->vregs[j] == (uint32_t)i) { + break; + } + } + fprintf(f, "R%d (d_%d", i, j); + for (j++; j < ctx->insns_count; j++) { + if (ctx->vregs[j] == (uint32_t)i) { + fprintf(f, ", d_%d", j); + } + } + fprintf(f, ")"); + if (ival->stack_spill_pos != -1) { + if (ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + IR_ASSERT(ctx->spill_base >= 0); + fprintf(f, " [SPILL=0x%x(%%%s)]", ival->stack_spill_pos, ir_reg_name(ctx->spill_base, IR_ADDR)); + } else { + fprintf(f, " [SPILL=0x%x]", ival->stack_spill_pos); + } + } + } + if (ival->next) { + fprintf(f, "\n\t"); + } else if (ival->reg != IR_REG_NONE) { + fprintf(f, " "); + } + do { + if (ival->reg != IR_REG_NONE) { + fprintf(f, "[%%%s]", ir_reg_name(ival->reg, ival->type)); + } + p = &ival->range; + fprintf(f, ": [%d.%d-%d.%d)", + IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start), + IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end)); + if (i == 0) { + /* This is a TMP register */ + if (ival->tmp_ref == IR_LIVE_POS_TO_REF(p->start)) { + fprintf(f, "/%d", ival->tmp_op_num); + } else { + fprintf(f, "/%d.%d", ival->tmp_ref, ival->tmp_op_num); + } + } else { + p = p->next; + while (p) { + fprintf(f, ", [%d.%d-%d.%d)", + IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start), + IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end)); + p = p->next; + } + } + use_pos = ival->use_pos; + while (use_pos) { + if (use_pos->flags & IR_PHI_USE) { + IR_ASSERT(use_pos->op_num > 0); + fprintf(f, ", PHI_USE(%d.%d, phi=d_%d/%d)", + IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos), + -use_pos->hint_ref, use_pos->op_num); + } else if (use_pos->flags & IR_FUSED_USE) { + fprintf(f, ", USE(%d.%d/%d.%d", + IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos), + -use_pos->hint_ref, use_pos->op_num); + if (use_pos->hint >= 0) { + fprintf(f, ", hint=%%%s", ir_reg_name(use_pos->hint, ival->type)); + } + fprintf(f, ")"); + if (use_pos->flags & IR_USE_MUST_BE_IN_REG) { + fprintf(f, "!"); + } + } else { + if (!use_pos->op_num) { + fprintf(f, ", DEF(%d.%d", + IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos)); + } else { + fprintf(f, ", USE(%d.%d/%d", + IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos), + use_pos->op_num); + } + if (use_pos->hint >= 0) { + fprintf(f, ", hint=%%%s", ir_reg_name(use_pos->hint, ival->type)); + } + if (use_pos->hint_ref) { + fprintf(f, ", hint=R%d", ctx->vregs[use_pos->hint_ref]); + } + fprintf(f, ")"); + if (use_pos->flags & IR_USE_MUST_BE_IN_REG) { + fprintf(f, "!"); + } + } + use_pos = use_pos->next; + } + if (ival->next) { + fprintf(f, "\n\t"); + } + ival = ival->next; + } while (ival); + fprintf(f, "\n"); + } + } +#if 1 + n = ctx->vregs_count + ir_regs_number() + 2; + for (i = ctx->vregs_count + 1; i <= n; i++) { + ir_live_interval *ival = ctx->live_intervals[i]; + + if (ival) { + ir_live_range *p = &ival->range; + fprintf(f, "[%%%s] : [%d.%d-%d.%d)", + ir_reg_name(ival->reg, ival->type), + IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start), + IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end)); + p = p->next; + while (p) { + fprintf(f, ", [%d.%d-%d.%d)", + IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start), + IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end)); + p = p->next; + } + fprintf(f, "\n"); + } + } +#endif + fprintf(f, "}\n"); +} + +void ir_dump_codegen(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n, ref, *p; + ir_insn *insn; + uint32_t flags, b; + ir_block *bb; + bool first; + + fprintf(f, "{\n"); + for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { + fprintf(f, "\t%s c_%d = ", ir_type_cname[insn->type], i); + if (insn->op == IR_FUNC) { + if (!insn->const_flags) { + fprintf(f, "func(%s)", ir_get_str(ctx, insn->val.i32)); + } else { + fprintf(f, "func(%s, %d)", ir_get_str(ctx, insn->val.i32), insn->const_flags); + } + } else if (insn->op == IR_SYM) { + fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.i32)); + } else if (insn->op == IR_FUNC_ADDR) { + fprintf(f, "func_addr("); + ir_print_const(ctx, insn, f, true); + if (insn->const_flags) { + fprintf(f, ", %d", insn->const_flags); + } + fprintf(f, ")"); + } else { + ir_print_const(ctx, insn, f, true); + } + fprintf(f, ";\n"); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + fprintf(f, "#BB%d:\n", b); + + for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) { + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_CONTROL) { + if (!(flags & IR_OP_FLAG_MEM) || insn->type == IR_VOID) { + fprintf(f, "\tl_%d = ", i); + } else { + fprintf(f, "\t%s d_%d", ir_type_cname[insn->type], i); + if (ctx->vregs && ctx->vregs[i]) { + fprintf(f, " {R%d}", ctx->vregs[i]); + } + if (ctx->regs) { + int8_t reg = ctx->regs[i][0]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), insn->type), + (reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : ""); + } + } + fprintf(f, ", l_%d = ", i); + } + } else { + fprintf(f, "\t"); + if (flags & IR_OP_FLAG_DATA) { + fprintf(f, "%s d_%d", ir_type_cname[insn->type], i); + if (ctx->vregs && ctx->vregs[i]) { + fprintf(f, " {R%d}", ctx->vregs[i]); + } + if (ctx->regs) { + int8_t reg = ctx->regs[i][0]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), insn->type), + (reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : ""); + } + } + fprintf(f, " = "); + } + } + fprintf(f, "%s", ir_op_name[insn->op]); + n = ir_operands_count(ctx, insn); + if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) && n != 2) { + fprintf(f, "/%d", n); + } else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL) && n != 2) { + fprintf(f, "/%d", n - 2); + } else if (insn->op == IR_PHI && n != 3) { + fprintf(f, "/%d", n - 1); + } else if (insn->op == IR_SNAPSHOT) { + fprintf(f, "/%d", n - 1); + } + first = 1; + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + uint32_t opnd_kind = IR_OPND_KIND(flags, j); + + ref = *p; + if (ref) { + switch (opnd_kind) { + case IR_OPND_DATA: + if (IR_IS_CONST_REF(ref)) { + fprintf(f, "%sc_%d", first ? "(" : ", ", -ref); + } else { + fprintf(f, "%sd_%d", first ? "(" : ", ", ref); + } + if (ctx->vregs && ref > 0 && ctx->vregs[ref]) { + fprintf(f, " {R%d}", ctx->vregs[ref]); + } + if (ctx->regs) { + int8_t *regs = ctx->regs[i]; + int8_t reg = regs[j]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[ref].type), + (reg & (IR_REG_SPILL_LOAD|IR_REG_SPILL_SPECIAL)) ? ":load" : ""); + } + } + first = 0; + break; + case IR_OPND_CONTROL: + case IR_OPND_CONTROL_DEP: + case IR_OPND_CONTROL_REF: + fprintf(f, "%sl_%d", first ? "(" : ", ", ref); + first = 0; + break; + case IR_OPND_STR: + fprintf(f, "%s\"%s\"", first ? "(" : ", ", ir_get_str(ctx, ref)); + first = 0; + break; + case IR_OPND_PROB: + if (ref == 0) { + break; + } + IR_FALLTHROUGH; + case IR_OPND_NUM: + fprintf(f, "%s%d", first ? "(" : ", ", ref); + first = 0; + break; + } + } else if (opnd_kind == IR_OPND_NUM) { + fprintf(f, "%s%d", first ? "(" : ", ", ref); + first = 0; + } else if (IR_IS_REF_OPND_KIND(opnd_kind) && j != n) { + fprintf(f, "%snull", first ? "(" : ", "); + first = 0; + } + } + if (first) { + fprintf(f, ";"); + } else { + fprintf(f, ");"); + } + if (((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) && ctx->binding) { + ir_ref var = ir_binding_find(ctx, i); + if (var) { + IR_ASSERT(var < 0); + fprintf(f, " # BIND(0x%x);", -var); + } + } + if (ctx->rules) { + uint32_t rule = ctx->rules[i]; + uint32_t id = rule & ~(IR_FUSED|IR_SKIPPED|IR_SIMPLE); + + if (id < IR_LAST_OP) { + fprintf(f, " # RULE(%s", ir_op_name[id]); + } else { + IR_ASSERT(id > IR_LAST_OP /*&& id < IR_LAST_RULE*/); + fprintf(f, " # RULE(%s", ir_rule_name[id - IR_LAST_OP]); + } + if (rule & IR_FUSED) { + fprintf(f, ":FUSED"); + } + if (rule & IR_SKIPPED) { + fprintf(f, ":SKIPPED"); + } + if (rule & IR_SIMPLE) { + fprintf(f, ":SIMPLE"); + } + fprintf(f, ")"); + } + fprintf(f, "\n"); + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + + if (bb->flags & IR_BB_DESSA_MOVES) { + uint32_t succ; + ir_block *succ_bb; + ir_use_list *use_list; + ir_ref k, i, *p, use_ref, input; + ir_insn *use_insn; + + IR_ASSERT(bb->successors_count == 1); + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + IR_ASSERT(succ_bb->predecessors_count > 1); + use_list = &ctx->use_lists[succ_bb->start]; + k = ir_phi_input_number(ctx, succ_bb, b); + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + use_ref = *p; + use_insn = &ctx->ir_base[use_ref]; + if (use_insn->op == IR_PHI) { + input = ir_insn_op(use_insn, k); + if (IR_IS_CONST_REF(input)) { + fprintf(f, "\t# DESSA MOV c_%d", -input); + } else if (ctx->vregs[input] != ctx->vregs[use_ref]) { + fprintf(f, "\t# DESSA MOV d_%d {R%d}", input, ctx->vregs[input]); + } else { + continue; + } + if (ctx->regs) { + int8_t *regs = ctx->regs[use_ref]; + int8_t reg = regs[k]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[input].type), + (reg & (IR_REG_SPILL_LOAD|IR_REG_SPILL_SPECIAL)) ? ":load" : ""); + } + } + fprintf(f, " -> d_%d {R%d}", use_ref, ctx->vregs[use_ref]); + if (ctx->regs) { + int8_t reg = ctx->regs[use_ref][0]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[use_ref].type), + (reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : ""); + } + } + fprintf(f, "\n"); + } + } + } + + insn = &ctx->ir_base[bb->end]; + if (insn->op == IR_END || insn->op == IR_LOOP_END) { + uint32_t succ; + + if (bb->successors_count == 1) { + succ = ctx->cfg_edges[bb->successors]; + } else { + /* END may have a fake control edge to ENTRY */ + IR_ASSERT(bb->successors_count == 2); + succ = ctx->cfg_edges[bb->successors]; + if (ctx->ir_base[ctx->cfg_blocks[succ].start].op == IR_ENTRY) { + succ = ctx->cfg_edges[bb->successors + 1]; +#ifdef IR_DEBUG + } else { + uint32_t fake_succ = ctx->cfg_edges[bb->successors + 1]; + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[fake_succ].start].op == IR_ENTRY); +#endif + } + } + if (succ != b + 1) { + fprintf(f, "\t# GOTO BB%d\n", succ); + } + } else if (insn->op == IR_IF) { + uint32_t true_block, false_block, *p; + + p = &ctx->cfg_edges[bb->successors]; + true_block = *p; + if (ctx->ir_base[ctx->cfg_blocks[true_block].start].op == IR_IF_TRUE) { + false_block = *(p+1); + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[false_block].start].op == IR_IF_FALSE); + } else { + false_block = true_block; + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[false_block].start].op == IR_IF_FALSE); + true_block = *(p+1); + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[true_block].start].op == IR_IF_TRUE); + } + fprintf(f, "\t# IF_TRUE BB%d, IF_FALSE BB%d\n", true_block, false_block); + } else if (insn->op == IR_SWITCH) { + fprintf(f, "\t# SWITCH ...\n"); + } + } + fprintf(f, "}\n"); +} diff --git a/ext/opcache/jit/ir/ir_elf.h b/ext/opcache/jit/ir/ir_elf.h new file mode 100644 index 00000000000..961789a7b4a --- /dev/null +++ b/ext/opcache/jit/ir/ir_elf.h @@ -0,0 +1,101 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (ELF header definitions) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_ELF +#define IR_ELF + +#if defined(IR_TARGET_X64) || defined(IR_TARGET_AARCH64) +# define ELF64 +#else +# undef ELF64 +#endif + +typedef struct _ir_elf_header { + uint8_t emagic[4]; + uint8_t eclass; + uint8_t eendian; + uint8_t eversion; + uint8_t eosabi; + uint8_t eabiversion; + uint8_t epad[7]; + uint16_t type; + uint16_t machine; + uint32_t version; + uintptr_t entry; + uintptr_t phofs; + uintptr_t shofs; + uint32_t flags; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstridx; +} ir_elf_header; + +typedef struct ir_elf_sectheader { + uint32_t name; + uint32_t type; + uintptr_t flags; + uintptr_t addr; + uintptr_t ofs; + uintptr_t size; + uint32_t link; + uint32_t info; + uintptr_t align; + uintptr_t entsize; +} ir_elf_sectheader; + +#define ELFSECT_IDX_ABS 0xfff1 + +enum { + ELFSECT_TYPE_PROGBITS = 1, + ELFSECT_TYPE_SYMTAB = 2, + ELFSECT_TYPE_STRTAB = 3, + ELFSECT_TYPE_NOBITS = 8, + ELFSECT_TYPE_DYNSYM = 11, +}; + +#define ELFSECT_FLAGS_WRITE (1 << 0) +#define ELFSECT_FLAGS_ALLOC (1 << 1) +#define ELFSECT_FLAGS_EXEC (1 << 2) +#define ELFSECT_FLAGS_TLS (1 << 10) + +typedef struct ir_elf_symbol { +#ifdef ELF64 + uint32_t name; + uint8_t info; + uint8_t other; + uint16_t sectidx; + uintptr_t value; + uint64_t size; +#else + uint32_t name; + uintptr_t value; + uint32_t size; + uint8_t info; + uint8_t other; + uint16_t sectidx; +#endif +} ir_elf_symbol; + +#define ELFSYM_BIND(info) ((info) >> 4) +#define ELFSYM_TYPE(info) ((info) & 0xf) +#define ELFSYM_INFO(bind, type) (((bind) << 4) | (type)) + +enum { + ELFSYM_TYPE_DATA = 2, + ELFSYM_TYPE_FUNC = 2, + ELFSYM_TYPE_FILE = 4, +}; + +enum { + ELFSYM_BIND_LOCAL = 0, + ELFSYM_BIND_GLOBAL = 1, +}; + +#endif diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c new file mode 100644 index 00000000000..d6de65cda70 --- /dev/null +++ b/ext/opcache/jit/ir/ir_emit.c @@ -0,0 +1,608 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Native code generator based on DynAsm) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# include "ir_x86.h" +#elif defined(IR_TARGET_AARCH64) +# include "ir_aarch64.h" +#else +# error "Unknown IR target" +#endif + +#include "ir_private.h" +#ifndef _WIN32 +# include +#else +# define WIN32_LEAN_AND_MEAN +# include +# include +#endif + +#define DASM_M_GROW(ctx, t, p, sz, need) \ + do { \ + size_t _sz = (sz), _need = (need); \ + if (_sz < _need) { \ + if (_sz < 16) _sz = 16; \ + while (_sz < _need) _sz += _sz; \ + (p) = (t *)ir_mem_realloc((p), _sz); \ + (sz) = _sz; \ + } \ + } while(0) + +#define DASM_M_FREE(ctx, p, sz) ir_mem_free(p) + +#if IR_DEBUG +# define DASM_CHECKS +#endif + +typedef struct _ir_copy { + ir_type type; + ir_reg from; + ir_reg to; +} ir_copy; + +typedef struct _ir_delayed_copy { + ir_ref input; + ir_ref output; + ir_type type; + ir_reg from; + ir_reg to; +} ir_delayed_copy; + +#if IR_REG_INT_ARGS +static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS]; +#else +static const int8_t *_ir_int_reg_params; +#endif +#if IR_REG_FP_ARGS +static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS]; +#else +static const int8_t *_ir_fp_reg_params; +#endif + +#ifdef IR_HAVE_FASTCALL +static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS]; +static const int8_t *_ir_fp_fc_reg_params; + +bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn) +{ + if (sizeof(void*) == 4) { + if (IR_IS_CONST_REF(insn->op2)) { + return (ctx->ir_base[insn->op2].const_flags & IR_CONST_FASTCALL_FUNC) != 0; + } else if (ctx->ir_base[insn->op2].op == IR_BITCAST) { + return (ctx->ir_base[insn->op2].op2 & IR_CONST_FASTCALL_FUNC) != 0; + } + return 0; + } + return 0; +} +#else +bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn) +{ + return 0; +} +#endif + +bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn) +{ + if (IR_IS_CONST_REF(insn->op2)) { + return (ctx->ir_base[insn->op2].const_flags & IR_CONST_VARARG_FUNC) != 0; + } else if (ctx->ir_base[insn->op2].op == IR_BITCAST) { + return (ctx->ir_base[insn->op2].op2 & IR_CONST_VARARG_FUNC) != 0; + } + return 0; +} + +IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref) +{ + IR_ASSERT(!IR_IS_CONST_REF(ref)); + return ctx->rules[ref]; +} + +IR_ALWAYS_INLINE bool ir_in_same_block(ir_ctx *ctx, ir_ref ref) +{ + return ref > ctx->bb_start; +} + + +static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + int i; + ir_ref use, *p; + ir_insn *insn; + int int_param = 0; + int fp_param = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + +#ifdef IR_HAVE_FASTCALL + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (use == ref) { + if (int_param < int_reg_params_count) { + return int_reg_params[int_param]; + } else { + return IR_REG_NONE; + } + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(insn->type)); + if (use == ref) { + if (fp_param < fp_reg_params_count) { + return fp_reg_params[fp_param]; + } else { + return IR_REG_NONE; + } + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + } + } + return IR_REG_NONE; +} + +static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs) +{ + int j, n; + ir_type type; + int int_param = 0; + int fp_param = 0; + int count = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + +#ifdef IR_HAVE_FASTCALL + if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + n = insn->inputs_count; + n = IR_MIN(n, IR_MAX_REG_ARGS + 2); + for (j = 3; j <= n; j++) { + type = ctx->ir_base[ir_insn_op(insn, j)].type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + regs[j] = int_reg_params[int_param]; + count = j + 1; + } else { + regs[j] = IR_REG_NONE; + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + regs[j] = fp_reg_params[fp_param]; + count = j + 1; + } else { + regs[j] = IR_REG_NONE; + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + } + return count; +} + +static bool ir_is_same_mem(const ir_ctx *ctx, ir_ref r1, ir_ref r2) +{ + ir_live_interval *ival1, *ival2; + int32_t o1, o2; + + if (IR_IS_CONST_REF(r1) || IR_IS_CONST_REF(r2)) { + return 0; + } + + IR_ASSERT(ctx->vregs[r1] && ctx->vregs[r2]); + ival1 = ctx->live_intervals[ctx->vregs[r1]]; + ival2 = ctx->live_intervals[ctx->vregs[r2]]; + IR_ASSERT(ival1 && ival2); + o1 = ival1->stack_spill_pos; + o2 = ival2->stack_spill_pos; + IR_ASSERT(o1 != -1 && o2 != -1); + return o1 == o2; +} + +static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset) +{ + ir_live_interval *ival1; + int32_t o1; + + if (IR_IS_CONST_REF(r1)) { + return 0; + } + + IR_ASSERT(ctx->vregs[r1]); + ival1 = ctx->live_intervals[ctx->vregs[r1]]; + IR_ASSERT(ival1); + o1 = ival1->stack_spill_pos; + IR_ASSERT(o1 != -1); + return o1 == offset; +} + +void *ir_resolve_sym_name(const char *name) +{ + void *handle = NULL; + void *addr; + +#ifndef _WIN32 +# ifdef RTLD_DEFAULT + handle = RTLD_DEFAULT; +# endif + addr = dlsym(handle, name); +#else + HMODULE mods[256]; + DWORD cbNeeded; + uint32_t i = 0; + + /* Quick workaraund to prevent *.irt tests failures */ + // TODO: try to find a general solution ??? + if (strcmp(name, "printf") == 0) { + return (void*)printf; + } + + addr = NULL; + + EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &cbNeeded); + + while(i < (cbNeeded / sizeof(HMODULE))) { + addr = GetProcAddress(mods[i], name); + if (addr) { + return addr; + } + i++; + } +#endif + IR_ASSERT(addr != NULL); + return addr; +} + +#ifdef IR_SNAPSHOT_HANDLER_DCL + IR_SNAPSHOT_HANDLER_DCL(); +#endif + +static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn) +{ + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } +#ifdef IR_SNAPSHOT_HANDLER + if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) { + addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr); + } +#endif + return addr; +} + +#if defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Warray-bounds" +# pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#endif + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# include "dynasm/dasm_proto.h" +# include "dynasm/dasm_x86.h" +#elif defined(IR_TARGET_AARCH64) +# include "dynasm/dasm_proto.h" +static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset); +# define DASM_ADD_VENEER ir_add_veneer +# include "dynasm/dasm_arm64.h" +#else +# error "Unknown IR target" +#endif + +#if defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + + +/* Forward Declarations */ +static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb); +static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb); + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# include "ir_emit_x86.h" +#elif defined(IR_TARGET_AARCH64) +# include "ir_emit_aarch64.h" +#else +# error "Unknown IR target" +#endif + +static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb) +{ + ir_list *list = (ir_list*)ctx->osr_entry_loads; + int pos = 0, count, i; + ir_ref ref; + + IR_ASSERT(ctx->binding); + IR_ASSERT(list); + while (1) { + i = ir_list_at(list, pos); + if (b == i) { + break; + } + IR_ASSERT(i != 0); /* end marker */ + pos++; + count = ir_list_at(list, pos); + pos += count + 1; + } + pos++; + count = ir_list_at(list, pos); + pos++; + + for (i = 0; i < count; i++, pos++) { + ref = ir_list_at(list, pos); + IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) { + /* not spilled */ + ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg; + ir_type type = ctx->ir_base[ref].type; + int32_t offset = -ir_binding_find(ctx, ref); + + IR_ASSERT(offset > 0); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, reg, ctx->spill_base, offset); + } else { + ir_emit_load_mem_fp(ctx, type, reg, ctx->spill_base, offset); + } + } else { + IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL); + } + } +} + +static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb) +{ + uint32_t succ, k, n = 0, n2 = 0; + ir_block *succ_bb; + ir_use_list *use_list; + ir_ref i, *p; + ir_copy *copies; + ir_delayed_copy *copies2; + ir_reg tmp_reg = ctx->regs[bb->end][0]; + ir_reg tmp_fp_reg = ctx->regs[bb->end][1]; + + IR_ASSERT(bb->successors_count == 1); + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + IR_ASSERT(succ_bb->predecessors_count > 1); + use_list = &ctx->use_lists[succ_bb->start]; + k = ir_phi_input_number(ctx, succ_bb, b); + + copies = ir_mem_malloc(use_list->count * sizeof(ir_copy) + use_list->count * sizeof(ir_delayed_copy)); + copies2 = (ir_delayed_copy*)(copies + use_list->count); + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + ir_ref ref = *p; + ir_insn *insn = &ctx->ir_base[ref]; + + if (insn->op == IR_PHI) { + ir_ref input = ir_insn_op(insn, k); + ir_reg src = ir_get_alocated_reg(ctx, ref, k); + ir_reg dst = ctx->regs[ref][0]; + + if (dst == IR_REG_NONE) { + /* STORE to memory cannot clobber any input register (do it right now) */ + if (IR_IS_CONST_REF(input)) { + IR_ASSERT(src == IR_REG_NONE); +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + if (IR_IS_TYPE_INT(insn->type) + && (ir_type_size[insn->type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[input].val.i64))) { + ir_emit_store_imm(ctx, insn->type, ref, ctx->ir_base[input].val.i32); + continue; + } +#endif + ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg; + + IR_ASSERT(tmp != IR_REG_NONE); + ir_emit_load(ctx, insn->type, tmp, input); + ir_emit_store(ctx, insn->type, ref, tmp); + } else if (src == IR_REG_NONE) { + if (!ir_is_same_mem(ctx, input, ref)) { + ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg; + + IR_ASSERT(tmp != IR_REG_NONE); + ir_emit_load(ctx, insn->type, tmp, input); + ir_emit_store(ctx, insn->type, ref, tmp); + } + } else { + if (IR_REG_SPILLED(src)) { + src = IR_REG_NUM(src); + ir_emit_load(ctx, insn->type, src, input); + if (ir_is_same_mem(ctx, input, ref)) { + continue; + } + } + ir_emit_store(ctx, insn->type, ref, src); + } + } else if (src == IR_REG_NONE) { + /* STORE of constant or memory can't be clobbered by parallel reg->reg copies (delay it) */ + copies2[n2].input = input; + copies2[n2].output = ref; + copies2[n2].type = insn->type; + copies2[n2].from = src; + copies2[n2].to = dst; + n2++; + } else { + IR_ASSERT(!IR_IS_CONST_REF(input)); + if (IR_REG_SPILLED(src)) { + ir_emit_load(ctx, insn->type, IR_REG_NUM(src), input); + } + if (IR_REG_SPILLED(dst) && (!IR_REG_SPILLED(src) || !ir_is_same_mem(ctx, input, ref))) { + ir_emit_store(ctx, insn->type, ref, IR_REG_NUM(src)); + } + if (IR_REG_NUM(src) != IR_REG_NUM(dst)) { + /* Schedule parallel reg->reg copy */ + copies[n].type = insn->type; + copies[n].from = IR_REG_NUM(src); + copies[n].to = IR_REG_NUM(dst); + n++; + } + } + } + } + + if (n > 0) { + ir_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg); + } + + for (n = 0; n < n2; n++) { + ir_ref input = copies2[n].input; + ir_ref ref = copies2[n].output; + ir_type type = copies2[n].type; + ir_reg dst = copies2[n].to; + + IR_ASSERT(dst != IR_REG_NONE); + if (IR_IS_CONST_REF(input)) { + ir_emit_load(ctx, type, IR_REG_NUM(dst), input); + } else { + IR_ASSERT(copies2[n].from == IR_REG_NONE); + if (IR_REG_SPILLED(dst) && ir_is_same_mem(ctx, input, ref)) { + /* avoid LOAD and STORE to the same memory */ + continue; + } + ir_emit_load(ctx, type, IR_REG_NUM(dst), input); + } + if (IR_REG_SPILLED(dst)) { + ir_emit_store(ctx, type, ref, IR_REG_NUM(dst)); + } + } + + ir_mem_free(copies); +} + +int ir_match(ir_ctx *ctx) +{ + uint32_t b; + ir_ref start, ref, *prev_ref; + ir_block *bb; + ir_insn *insn; + uint32_t entries_count = 0; + + ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); + + prev_ref = ctx->prev_ref; + if (!prev_ref) { + ir_build_prev_refs(ctx); + prev_ref = ctx->prev_ref; + } + + if (ctx->entries_count) { + ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref)); + } + + for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + start = bb->start; + if (UNEXPECTED(bb->flags & IR_BB_ENTRY)) { + IR_ASSERT(entries_count < ctx->entries_count); + insn = &ctx->ir_base[start]; + IR_ASSERT(insn->op == IR_ENTRY); + insn->op3 = entries_count; + ctx->entries[entries_count] = b; + entries_count++; + } + ctx->rules[start] = IR_SKIPPED | IR_NOP; + ref = bb->end; + if (bb->successors_count == 1) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_END || insn->op == IR_LOOP_END) { + ctx->rules[ref] = insn->op; + ref = prev_ref[ref]; + if (ref == start) { + if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) { + bb->flags |= IR_BB_EMPTY; + } else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) { + bb->flags |= IR_BB_EMPTY; + if (ctx->cfg_edges[bb->successors] == b + 1) { + (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY; + } + } + continue; + } + } + } + + ctx->bb_start = start; /* bb_start is used by matcher to avoid fusion of insns from different blocks */ + + while (ref != start) { + uint32_t rule = ctx->rules[ref]; + + if (!rule) { + ctx->rules[ref] = rule = ir_match_insn(ctx, ref); + } + ir_match_insn2(ctx, ref, rule); + ref = prev_ref[ref]; + } + } + + if (ctx->entries_count) { + ctx->entries_count = entries_count; + if (!entries_count) { + ir_mem_free(ctx->entries); + ctx->entries = NULL; + } + } + + return 1; +} + +int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref) +{ + int32_t offset; + + IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; + IR_ASSERT(offset != -1); + return IR_SPILL_POS_TO_OFFSET(offset); +} diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h new file mode 100644 index 00000000000..d6053286508 --- /dev/null +++ b/ext/opcache/jit/ir/ir_fold.h @@ -0,0 +1,2129 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Folding engine rules) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * Based on Mike Pall's implementation for LuaJIT. + */ + +/* Constant Folding */ +IR_FOLD(EQ(C_BOOL, C_BOOL)) +IR_FOLD(EQ(C_U8, C_U8)) +IR_FOLD(EQ(C_U16, C_U16)) +IR_FOLD(EQ(C_U32, C_U32)) +IR_FOLD(EQ(C_U64, C_U64)) +IR_FOLD(EQ(C_ADDR, C_ADDR)) +IR_FOLD(EQ(C_CHAR, C_CHAR)) +IR_FOLD(EQ(C_I8, C_I8)) +IR_FOLD(EQ(C_I16, C_I16)) +IR_FOLD(EQ(C_I32, C_I32)) +IR_FOLD(EQ(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 == op2_insn->val.u64); +} + +IR_FOLD(EQ(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d == op2_insn->val.d); +} + +IR_FOLD(EQ(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.d == op2_insn->val.d); +} + +IR_FOLD(NE(C_BOOL, C_BOOL)) +IR_FOLD(NE(C_U8, C_U8)) +IR_FOLD(NE(C_U16, C_U16)) +IR_FOLD(NE(C_U32, C_U32)) +IR_FOLD(NE(C_U64, C_U64)) +IR_FOLD(NE(C_ADDR, C_ADDR)) +IR_FOLD(NE(C_CHAR, C_CHAR)) +IR_FOLD(NE(C_I8, C_I8)) +IR_FOLD(NE(C_I16, C_I16)) +IR_FOLD(NE(C_I32, C_I32)) +IR_FOLD(NE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 != op2_insn->val.u64); +} + +IR_FOLD(NE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d != op2_insn->val.d); +} + +IR_FOLD(NE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f != op2_insn->val.f); +} + +IR_FOLD(LT(C_BOOL, C_BOOL)) +IR_FOLD(LT(C_U8, C_U8)) +IR_FOLD(LT(C_U16, C_U16)) +IR_FOLD(LT(C_U32, C_U32)) +IR_FOLD(LT(C_U64, C_U64)) +IR_FOLD(LT(C_ADDR, C_ADDR)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 < op2_insn->val.u64); +} + +IR_FOLD(LT(C_CHAR, C_CHAR)) +IR_FOLD(LT(C_I8, C_I8)) +IR_FOLD(LT(C_I16, C_I16)) +IR_FOLD(LT(C_I32, C_I32)) +IR_FOLD(LT(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.i64 < op2_insn->val.i64); +} + +IR_FOLD(LT(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d < op2_insn->val.d); +} + +IR_FOLD(LT(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f < op2_insn->val.f); +} + +IR_FOLD(GE(C_BOOL, C_BOOL)) +IR_FOLD(GE(C_U8, C_U8)) +IR_FOLD(GE(C_U16, C_U16)) +IR_FOLD(GE(C_U32, C_U32)) +IR_FOLD(GE(C_U64, C_U64)) +IR_FOLD(GE(C_ADDR, C_ADDR)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 >= op2_insn->val.u64); +} + +IR_FOLD(GE(C_CHAR, C_CHAR)) +IR_FOLD(GE(C_I8, C_I8)) +IR_FOLD(GE(C_I16, C_I16)) +IR_FOLD(GE(C_I32, C_I32)) +IR_FOLD(GE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.i64 >= op2_insn->val.i64); +} + +IR_FOLD(GE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d >= op2_insn->val.d); +} + +IR_FOLD(GE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f >= op2_insn->val.f); +} + +IR_FOLD(LE(C_BOOL, C_BOOL)) +IR_FOLD(LE(C_U8, C_U8)) +IR_FOLD(LE(C_U16, C_U16)) +IR_FOLD(LE(C_U32, C_U32)) +IR_FOLD(LE(C_U64, C_U64)) +IR_FOLD(LE(C_ADDR, C_ADDR)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 <= op2_insn->val.u64); +} + +IR_FOLD(LE(C_CHAR, C_CHAR)) +IR_FOLD(LE(C_I8, C_I8)) +IR_FOLD(LE(C_I16, C_I16)) +IR_FOLD(LE(C_I32, C_I32)) +IR_FOLD(LE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.i64 <= op2_insn->val.i64); +} + +IR_FOLD(LE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d <= op2_insn->val.d); +} + +IR_FOLD(LE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f <= op2_insn->val.f); +} + +IR_FOLD(GT(C_BOOL, C_BOOL)) +IR_FOLD(GT(C_U8, C_U8)) +IR_FOLD(GT(C_U16, C_U16)) +IR_FOLD(GT(C_U32, C_U32)) +IR_FOLD(GT(C_U64, C_U64)) +IR_FOLD(GT(C_ADDR, C_ADDR)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 > op2_insn->val.u64); +} + +IR_FOLD(GT(C_CHAR, C_CHAR)) +IR_FOLD(GT(C_I8, C_I8)) +IR_FOLD(GT(C_I16, C_I16)) +IR_FOLD(GT(C_I32, C_I32)) +IR_FOLD(GT(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.i64 > op2_insn->val.i64); +} + +IR_FOLD(GT(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d > op2_insn->val.d); +} + +IR_FOLD(GT(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f > op2_insn->val.f); +} + +IR_FOLD(ULT(C_BOOL, C_BOOL)) +IR_FOLD(ULT(C_U8, C_U8)) +IR_FOLD(ULT(C_U16, C_U16)) +IR_FOLD(ULT(C_U32, C_U32)) +IR_FOLD(ULT(C_U64, C_U64)) +IR_FOLD(ULT(C_ADDR, C_ADDR)) +IR_FOLD(ULT(C_CHAR, C_CHAR)) +IR_FOLD(ULT(C_I8, C_I8)) +IR_FOLD(ULT(C_I16, C_I16)) +IR_FOLD(ULT(C_I32, C_I32)) +IR_FOLD(ULT(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 < op2_insn->val.u64); +} + +IR_FOLD(ULT(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(!(op1_insn->val.d >= op2_insn->val.d)); +} + +IR_FOLD(ULT(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(!(op1_insn->val.f >= op2_insn->val.f)); +} + +IR_FOLD(UGE(C_BOOL, C_BOOL)) +IR_FOLD(UGE(C_U8, C_U8)) +IR_FOLD(UGE(C_U16, C_U16)) +IR_FOLD(UGE(C_U32, C_U32)) +IR_FOLD(UGE(C_U64, C_U64)) +IR_FOLD(UGE(C_ADDR, C_ADDR)) +IR_FOLD(UGE(C_CHAR, C_CHAR)) +IR_FOLD(UGE(C_I8, C_I8)) +IR_FOLD(UGE(C_I16, C_I16)) +IR_FOLD(UGE(C_I32, C_I32)) +IR_FOLD(UGE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 >= op2_insn->val.u64); +} + +IR_FOLD(UGE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(!(op1_insn->val.d < op2_insn->val.d)); +} + +IR_FOLD(UGE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(!(op1_insn->val.f < op2_insn->val.f)); +} + +IR_FOLD(ULE(C_BOOL, C_BOOL)) +IR_FOLD(ULE(C_U8, C_U8)) +IR_FOLD(ULE(C_U16, C_U16)) +IR_FOLD(ULE(C_U32, C_U32)) +IR_FOLD(ULE(C_U64, C_U64)) +IR_FOLD(ULE(C_ADDR, C_ADDR)) +IR_FOLD(ULE(C_CHAR, C_CHAR)) +IR_FOLD(ULE(C_I8, C_I8)) +IR_FOLD(ULE(C_I16, C_I16)) +IR_FOLD(ULE(C_I32, C_I32)) +IR_FOLD(ULE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 <= op2_insn->val.u64); +} + +IR_FOLD(ULE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(!(op1_insn->val.d > op2_insn->val.d)); +} + +IR_FOLD(ULE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(!(op1_insn->val.f > op2_insn->val.f)); +} + +IR_FOLD(UGT(C_BOOL, C_BOOL)) +IR_FOLD(UGT(C_U8, C_U8)) +IR_FOLD(UGT(C_U16, C_U16)) +IR_FOLD(UGT(C_U32, C_U32)) +IR_FOLD(UGT(C_U64, C_U64)) +IR_FOLD(UGT(C_ADDR, C_ADDR)) +IR_FOLD(UGT(C_CHAR, C_CHAR)) +IR_FOLD(UGT(C_I8, C_I8)) +IR_FOLD(UGT(C_I16, C_I16)) +IR_FOLD(UGT(C_I32, C_I32)) +IR_FOLD(UGT(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 > op2_insn->val.u64); +} + +IR_FOLD(UGT(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(!(op1_insn->val.d <= op2_insn->val.d)); +} + +IR_FOLD(UGT(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(!(op1_insn->val.f <= op2_insn->val.f)); +} + +IR_FOLD(ADD(C_U8, C_U8)) +IR_FOLD(ADD(C_U16, C_U16)) +IR_FOLD(ADD(C_U32, C_U32)) +IR_FOLD(ADD(C_U64, C_U64)) +IR_FOLD(ADD(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 + op2_insn->val.u64); +} + +IR_FOLD(ADD(C_I8, C_I8)) +IR_FOLD(ADD(C_I16, C_I16)) +IR_FOLD(ADD(C_I32, C_I32)) +IR_FOLD(ADD(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i64 + op2_insn->val.i64); +} + +IR_FOLD(ADD(C_DOUBLE, C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(op1_insn->val.d + op2_insn->val.d); +} + +IR_FOLD(ADD(C_FLOAT, C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(op1_insn->val.f + op2_insn->val.f); +} + +IR_FOLD(SUB(C_U8, C_U8)) +IR_FOLD(SUB(C_U16, C_U16)) +IR_FOLD(SUB(C_U32, C_U32)) +IR_FOLD(SUB(C_U64, C_U64)) +IR_FOLD(SUB(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 - op2_insn->val.u64); +} + +IR_FOLD(SUB(C_I8, C_I8)) +IR_FOLD(SUB(C_I16, C_I16)) +IR_FOLD(SUB(C_I32, C_I32)) +IR_FOLD(SUB(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i64 - op2_insn->val.i64); +} + +IR_FOLD(SUB(C_DOUBLE, C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(op1_insn->val.d - op2_insn->val.d); +} + +IR_FOLD(SUB(C_FLOAT, C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(op1_insn->val.f - op2_insn->val.f); +} + +IR_FOLD(MUL(C_U8, C_U8)) +IR_FOLD(MUL(C_U16, C_U16)) +IR_FOLD(MUL(C_U32, C_U32)) +IR_FOLD(MUL(C_U64, C_U64)) +IR_FOLD(MUL(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 * op2_insn->val.u64); +} + +IR_FOLD(MUL(C_I8, C_I8)) +IR_FOLD(MUL(C_I16, C_I16)) +IR_FOLD(MUL(C_I32, C_I32)) +IR_FOLD(MUL(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i64 * op2_insn->val.i64); +} + +IR_FOLD(MUL(C_DOUBLE, C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(op1_insn->val.d * op2_insn->val.d); +} + +IR_FOLD(MUL(C_FLOAT, C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(op1_insn->val.f * op2_insn->val.f); +} + +IR_FOLD(DIV(C_U8, C_U8)) +IR_FOLD(DIV(C_U16, C_U16)) +IR_FOLD(DIV(C_U32, C_U32)) +IR_FOLD(DIV(C_U64, C_U64)) +IR_FOLD(DIV(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.u64 == 0) { + /* division by zero */ + IR_FOLD_EMIT; + } + IR_FOLD_CONST_U(op1_insn->val.u64 / op2_insn->val.u64); +} + +IR_FOLD(DIV(C_I8, C_I8)) +IR_FOLD(DIV(C_I16, C_I16)) +IR_FOLD(DIV(C_I32, C_I32)) +IR_FOLD(DIV(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.i64 == 0) { + /* division by zero */ + IR_FOLD_EMIT; + } + IR_FOLD_CONST_I(op1_insn->val.i64 / op2_insn->val.i64); +} + +IR_FOLD(DIV(C_DOUBLE, C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(op1_insn->val.d / op2_insn->val.d); +} + +IR_FOLD(DIV(C_FLOAT, C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(op1_insn->val.f / op2_insn->val.f); +} + +IR_FOLD(MOD(C_U8, C_U8)) +IR_FOLD(MOD(C_U16, C_U16)) +IR_FOLD(MOD(C_U32, C_U32)) +IR_FOLD(MOD(C_U64, C_U64)) +IR_FOLD(MOD(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.u64 == 0) { + /* division by zero */ + IR_FOLD_EMIT; + } + IR_FOLD_CONST_U(op1_insn->val.u64 % op2_insn->val.u64); +} + +IR_FOLD(MOD(C_I8, C_I8)) +IR_FOLD(MOD(C_I16, C_I16)) +IR_FOLD(MOD(C_I32, C_I32)) +IR_FOLD(MOD(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.i64 == 0) { + /* division by zero */ + IR_FOLD_EMIT; + } + IR_FOLD_CONST_I(op1_insn->val.i64 % op2_insn->val.i64); +} + +IR_FOLD(NEG(C_I8)) +IR_FOLD(NEG(C_I16)) +IR_FOLD(NEG(C_I32)) +IR_FOLD(NEG(C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(-op1_insn->val.i64); +} + +IR_FOLD(NEG(C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(-op1_insn->val.d); +} + +IR_FOLD(NEG(C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(-op1_insn->val.f); +} + +IR_FOLD(ABS(C_I8)) +IR_FOLD(ABS(C_I16)) +IR_FOLD(ABS(C_I32)) +IR_FOLD(ABS(C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op1_insn->val.i64 >= 0) { + IR_FOLD_COPY(op1); + } else { + IR_FOLD_CONST_I(-op1_insn->val.i64); + } +} + +IR_FOLD(ABS(C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(fabs(op1_insn->val.d)); +} + +IR_FOLD(ABS(C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(fabsf(op1_insn->val.f)); +} + +IR_FOLD(ADD_OV(C_U8, C_U8)) +IR_FOLD(ADD_OV(C_U16, C_U16)) +IR_FOLD(ADD_OV(C_U32, C_U32)) +IR_FOLD(ADD_OV(C_U64, C_U64)) +{ + ir_type type = IR_OPT_TYPE(opt); + uint64_t max = ((uint64_t)0xffffffffffffffff) >> (64 - ir_type_size[type] * 8); + IR_ASSERT(type == op1_insn->type); + if (op1_insn->val.u64 > max - op2_insn->val.u64) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_U(op1_insn->val.u64 + op2_insn->val.u64); +} + +IR_FOLD(ADD_OV(C_I8, C_I8)) +IR_FOLD(ADD_OV(C_I16, C_I16)) +IR_FOLD(ADD_OV(C_I32, C_I32)) +IR_FOLD(ADD_OV(C_I64, C_I64)) +{ + ir_type type = IR_OPT_TYPE(opt); + int64_t max = ((uint64_t)0x7fffffffffffffff) >> (64 - ir_type_size[type] * 8); + int64_t min = - max - 1; + IR_ASSERT(type == op1_insn->type); + if ((op2_insn->val.i64 > 0 && op1_insn->val.i64 > max - op2_insn->val.i64) + || (op2_insn->val.i64 < 0 && op1_insn->val.i64 < min - op2_insn->val.i64)) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_I(op1_insn->val.i64 + op2_insn->val.i64); +} + +IR_FOLD(SUB_OV(C_U8, C_U8)) +IR_FOLD(SUB_OV(C_U16, C_U16)) +IR_FOLD(SUB_OV(C_U32, C_U32)) +IR_FOLD(SUB_OV(C_U64, C_U64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.u64 > op1_insn->val.u64) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_U(op1_insn->val.u64 - op2_insn->val.u64); +} + +IR_FOLD(SUB_OV(C_I8, C_I8)) +IR_FOLD(SUB_OV(C_I16, C_I16)) +IR_FOLD(SUB_OV(C_I32, C_I32)) +IR_FOLD(SUB_OV(C_I64, C_I64)) +{ + ir_type type = IR_OPT_TYPE(opt); + int64_t max = ((uint64_t)0x7fffffffffffffff) >> (64 - ir_type_size[type] * 8); + int64_t min = - max - 1; + IR_ASSERT(type == op1_insn->type); + if ((op2_insn->val.i64 > 0 && op1_insn->val.i64 < min + op2_insn->val.i64) + || (op2_insn->val.i64 < 0 && op1_insn->val.i64 > max + op2_insn->val.i64)) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_I(op1_insn->val.i64 - op2_insn->val.i64); +} + +IR_FOLD(MUL_OV(C_U8, C_U8)) +IR_FOLD(MUL_OV(C_U16, C_U16)) +IR_FOLD(MUL_OV(C_U32, C_U32)) +IR_FOLD(MUL_OV(C_U64, C_U64)) +{ + ir_type type = IR_OPT_TYPE(opt); + uint64_t max = ((uint64_t)0xffffffffffffffff) >> (64 - ir_type_size[type] * 8); + uint64_t res; + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + res = op1_insn->val.u64 * op2_insn->val.u64; + if (op1_insn->val.u64 != 0 && res / op1_insn->val.u64 != op2_insn->val.u64 && res <= max) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_U(res); +} + +IR_FOLD(MUL_OV(C_I8, C_I8)) +IR_FOLD(MUL_OV(C_I16, C_I16)) +IR_FOLD(MUL_OV(C_I32, C_I32)) +IR_FOLD(MUL_OV(C_I64, C_I64)) +{ + ir_type type = IR_OPT_TYPE(opt); + int64_t max = ((uint64_t)0x7fffffffffffffff) >> (64 - ir_type_size[type] * 8); + int64_t min = - max - 1; + int64_t res; + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + res = op1_insn->val.i64 * op2_insn->val.i64; + if (op1_insn->val.i64 != 0 && res / op1_insn->val.i64 != op2_insn->val.i64 && res >= min && res <= max) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_U(res); +} + +IR_FOLD(OVERFLOW(_)) +{ + if (op1_insn->op != IR_ADD_OV && op1_insn->op != IR_SUB_OV && op1_insn->op != IR_MUL_OV) { + IR_FOLD_COPY(IR_FALSE); + } + IR_FOLD_NEXT; +} + +IR_FOLD(NOT(C_BOOL)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_BOOL(!op1_insn->val.u64); +} + +IR_FOLD(NOT(C_U8)) +IR_FOLD(NOT(C_CHAR)) +IR_FOLD(NOT(C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(~op1_insn->val.u8); +} + +IR_FOLD(NOT(C_U16)) +IR_FOLD(NOT(C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(~op1_insn->val.u16); +} + +IR_FOLD(NOT(C_U32)) +IR_FOLD(NOT(C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(~op1_insn->val.u32); +} + +IR_FOLD(NOT(C_U64)) +IR_FOLD(NOT(C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(~op1_insn->val.u64); +} + +IR_FOLD(OR(C_BOOL, C_BOOL)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_BOOL(op1_insn->val.b || op2_insn->val.b); +} + +IR_FOLD(OR(C_U8, C_U8)) +IR_FOLD(OR(C_CHAR, C_CHAR)) +IR_FOLD(OR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 | op2_insn->val.u8); +} + +IR_FOLD(OR(C_U16, C_U16)) +IR_FOLD(OR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 | op2_insn->val.u16); +} + +IR_FOLD(OR(C_U32, C_U32)) +IR_FOLD(OR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 | op2_insn->val.u32); +} + +IR_FOLD(OR(C_U64, C_U64)) +IR_FOLD(OR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 | op2_insn->val.u64); +} + +IR_FOLD(AND(C_BOOL, C_BOOL)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_BOOL(op1_insn->val.b && op2_insn->val.b); +} + +IR_FOLD(AND(C_U8, C_U8)) +IR_FOLD(AND(C_CHAR, C_CHAR)) +IR_FOLD(AND(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 & op2_insn->val.u8); +} + +IR_FOLD(AND(C_U16, C_U16)) +IR_FOLD(AND(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 & op2_insn->val.u16); +} + +IR_FOLD(AND(C_U32, C_U32)) +IR_FOLD(AND(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 & op2_insn->val.u32); +} + +IR_FOLD(AND(C_U64, C_U64)) +IR_FOLD(AND(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 & op2_insn->val.u64); +} + +IR_FOLD(XOR(C_BOOL, C_BOOL)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_BOOL(op1_insn->val.b != op2_insn->val.b); +} + +IR_FOLD(XOR(C_U8, C_U8)) +IR_FOLD(XOR(C_CHAR, C_CHAR)) +IR_FOLD(XOR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 ^ op2_insn->val.u8); +} + +IR_FOLD(XOR(C_U16, C_U16)) +IR_FOLD(XOR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 ^ op2_insn->val.u16); +} + +IR_FOLD(XOR(C_U32, C_U32)) +IR_FOLD(XOR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 ^ op2_insn->val.u32); +} + +IR_FOLD(XOR(C_U64, C_U64)) +IR_FOLD(XOR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 ^ op2_insn->val.u64); +} + +IR_FOLD(SHL(C_U8, C_U8)) +IR_FOLD(SHL(C_CHAR, C_CHAR)) +IR_FOLD(SHL(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 << op2_insn->val.u8); +} + +IR_FOLD(SHL(C_U16, C_U16)) +IR_FOLD(SHL(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 << op2_insn->val.u16); +} + +IR_FOLD(SHL(C_U32, C_U32)) +IR_FOLD(SHL(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 << op2_insn->val.u32); +} + +IR_FOLD(SHL(C_U64, C_U64)) +IR_FOLD(SHL(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 << op2_insn->val.u64); +} + +IR_FOLD(SHR(C_U8, C_U8)) +IR_FOLD(SHR(C_CHAR, C_CHAR)) +IR_FOLD(SHR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 >> op2_insn->val.u8); +} + +IR_FOLD(SHR(C_U16, C_U16)) +IR_FOLD(SHR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 >> op2_insn->val.u16); +} + +IR_FOLD(SHR(C_U32, C_U32)) +IR_FOLD(SHR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 >> op2_insn->val.u32); +} + +IR_FOLD(SHR(C_U64, C_U64)) +IR_FOLD(SHR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 >> op2_insn->val.u64); +} + +IR_FOLD(SAR(C_U8, C_U8)) +IR_FOLD(SAR(C_CHAR, C_CHAR)) +IR_FOLD(SAR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i8 >> op2_insn->val.i8); +} + +IR_FOLD(SAR(C_U16, C_U16)) +IR_FOLD(SAR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i16 >> op2_insn->val.i16); +} + +IR_FOLD(SAR(C_U32, C_U32)) +IR_FOLD(SAR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i32 >> op2_insn->val.i32); +} + +IR_FOLD(SAR(C_U64, C_U64)) +IR_FOLD(SAR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i64 >> op2_insn->val.i64); +} + +IR_FOLD(ROL(C_U8, C_U8)) +IR_FOLD(ROL(C_CHAR, C_CHAR)) +IR_FOLD(ROL(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_rol8(op1_insn->val.u8, op2_insn->val.u8)); +} + +IR_FOLD(ROL(C_U16, C_U16)) +IR_FOLD(ROL(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_rol16(op1_insn->val.u16, op2_insn->val.u16)); +} + +IR_FOLD(ROL(C_U32, C_U32)) +IR_FOLD(ROL(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_rol32(op1_insn->val.u32, op2_insn->val.u32)); +} + +IR_FOLD(ROL(C_U64, C_U64)) +IR_FOLD(ROL(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_rol64(op1_insn->val.u64, op2_insn->val.u64)); +} + +IR_FOLD(ROR(C_U8, C_U8)) +IR_FOLD(ROR(C_CHAR, C_CHAR)) +IR_FOLD(ROR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_ror8(op1_insn->val.u8, op2_insn->val.u8)); +} + +IR_FOLD(ROR(C_U16, C_U16)) +IR_FOLD(ROR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_ror16(op1_insn->val.u16, op2_insn->val.u16)); +} + +IR_FOLD(ROR(C_U32, C_U32)) +IR_FOLD(ROR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_ror32(op1_insn->val.u32, op2_insn->val.u32)); +} + +IR_FOLD(ROR(C_U64, C_U64)) +IR_FOLD(ROR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_ror64(op1_insn->val.u64, op2_insn->val.u64)); +} + +//IR_FOLD(BSWAP(CONST)) +//TODO: bswap + +IR_FOLD(MIN(C_BOOL, C_BOOL)) +IR_FOLD(MIN(C_U8, C_U8)) +IR_FOLD(MIN(C_U16, C_U16)) +IR_FOLD(MIN(C_U32, C_U32)) +IR_FOLD(MIN(C_U64, C_U64)) +IR_FOLD(MIN(C_ADDR, C_ADDR)) +{ + IR_FOLD_COPY(op1_insn->val.u64 <= op2_insn->val.u64 ? op1 : op2); +} + +IR_FOLD(MIN(C_CHAR, C_CHAR)) +IR_FOLD(MIN(C_I8, C_U8)) +IR_FOLD(MIN(C_I16, C_U16)) +IR_FOLD(MIN(C_I32, C_U32)) +IR_FOLD(MIN(C_I64, C_U64)) +{ + IR_FOLD_COPY(op1_insn->val.i64 <= op2_insn->val.i64 ? op1 : op2); +} + +IR_FOLD(MIN(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_COPY(op1_insn->val.d <= op2_insn->val.d ? op1 : op2); +} + +IR_FOLD(MIN(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_COPY(op1_insn->val.f <= op2_insn->val.f ? op1 : op2); +} + +IR_FOLD(MAX(C_BOOL, C_BOOL)) +IR_FOLD(MAX(C_U8, C_U8)) +IR_FOLD(MAX(C_U16, C_U16)) +IR_FOLD(MAX(C_U32, C_U32)) +IR_FOLD(MAX(C_U64, C_U64)) +IR_FOLD(MAX(C_ADDR, C_ADDR)) +{ + IR_FOLD_COPY(op1_insn->val.u64 >= op2_insn->val.u64 ? op1 : op2); +} + +IR_FOLD(MAX(C_CHAR, C_CHAR)) +IR_FOLD(MAX(C_I8, C_U8)) +IR_FOLD(MAX(C_I16, C_U16)) +IR_FOLD(MAX(C_I32, C_U32)) +IR_FOLD(MAX(C_I64, C_U64)) +{ + IR_FOLD_COPY(op1_insn->val.i64 >= op2_insn->val.i64 ? op1 : op2); +} + +IR_FOLD(MAX(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_COPY(op1_insn->val.d >= op2_insn->val.d ? op1 : op2); +} + +IR_FOLD(MAX(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_COPY(op1_insn->val.f >= op2_insn->val.f ? op1 : op2); +} + +IR_FOLD(SEXT(C_I8)) +IR_FOLD(SEXT(C_U8)) +IR_FOLD(SEXT(C_BOOL)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_I((int64_t)op1_insn->val.i8); +} + +IR_FOLD(SEXT(C_I16)) +IR_FOLD(SEXT(C_U16)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_I((int64_t)op1_insn->val.i16); +} + +IR_FOLD(SEXT(C_I32)) +IR_FOLD(SEXT(C_U32)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_I((int64_t)op1_insn->val.i32); +} + +IR_FOLD(ZEXT(C_I8)) +IR_FOLD(ZEXT(C_U8)) +IR_FOLD(ZEXT(C_BOOL)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_U((uint64_t)op1_insn->val.u8); +} + +IR_FOLD(ZEXT(C_I16)) +IR_FOLD(ZEXT(C_U16)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_U((uint64_t)op1_insn->val.u16); +} + +IR_FOLD(ZEXT(C_I32)) +IR_FOLD(ZEXT(C_U32)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_U((uint64_t)op1_insn->val.u32); +} + +IR_FOLD(TRUNC(C_I16)) +IR_FOLD(TRUNC(C_I32)) +IR_FOLD(TRUNC(C_I64)) +IR_FOLD(TRUNC(C_U16)) +IR_FOLD(TRUNC(C_U32)) +IR_FOLD(TRUNC(C_U64)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] < ir_type_size[op1_insn->type]); + switch (IR_OPT_TYPE(opt)) { + default: + IR_ASSERT(0); + case IR_I8: + IR_FOLD_CONST_I(op1_insn->val.i8); + case IR_I16: + IR_FOLD_CONST_I(op1_insn->val.i16); + case IR_I32: + IR_FOLD_CONST_I(op1_insn->val.i32); + case IR_U8: + IR_FOLD_CONST_U(op1_insn->val.u8); + case IR_U16: + IR_FOLD_CONST_U(op1_insn->val.u16); + case IR_U32: + IR_FOLD_CONST_U(op1_insn->val.u32); + } +} + + +IR_FOLD(BITCAST(C_I8)) +IR_FOLD(BITCAST(C_I16)) +IR_FOLD(BITCAST(C_I32)) +IR_FOLD(BITCAST(C_I64)) +IR_FOLD(BITCAST(C_U8)) +IR_FOLD(BITCAST(C_U16)) +IR_FOLD(BITCAST(C_U32)) +IR_FOLD(BITCAST(C_U64)) +IR_FOLD(BITCAST(C_FLOAT)) +IR_FOLD(BITCAST(C_DOUBLE)) +IR_FOLD(BITCAST(C_BOOL)) +IR_FOLD(BITCAST(C_CHAR)) +IR_FOLD(BITCAST(C_ADDR)) +{ + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] == ir_type_size[op1_insn->type]); + switch (IR_OPT_TYPE(opt)) { + default: + IR_ASSERT(0); + case IR_I8: + IR_FOLD_CONST_I(op1_insn->val.i8); + case IR_I16: + IR_FOLD_CONST_I(op1_insn->val.i16); + case IR_I32: + IR_FOLD_CONST_I(op1_insn->val.i32); + case IR_I64: + IR_FOLD_CONST_I(op1_insn->val.i64); + case IR_U8: + IR_FOLD_CONST_U(op1_insn->val.u8); + case IR_U16: + IR_FOLD_CONST_U(op1_insn->val.u16); + case IR_U32: + IR_FOLD_CONST_U(op1_insn->val.u32); + case IR_U64: + IR_FOLD_CONST_U(op1_insn->val.u64); + case IR_FLOAT: + IR_FOLD_CONST_F(op1_insn->val.f); + case IR_DOUBLE: + IR_FOLD_CONST_D(op1_insn->val.d); + case IR_CHAR: + IR_FOLD_CONST_I(op1_insn->val.c); + case IR_ADDR: + IR_FOLD_CONST_U(op1_insn->val.addr); + } +} + +IR_FOLD(INT2FP(C_I8)) +IR_FOLD(INT2FP(C_I16)) +IR_FOLD(INT2FP(C_I32)) +IR_FOLD(INT2FP(C_I64)) +{ + if (IR_OPT_TYPE(opt) == IR_DOUBLE) { + IR_FOLD_CONST_D((double)op1_insn->val.i64); + } else { + IR_ASSERT(IR_OPT_TYPE(opt) == IR_FLOAT); + IR_FOLD_CONST_F((float)op1_insn->val.i64); + } +} + +IR_FOLD(INT2FP(C_U8)) +IR_FOLD(INT2FP(C_U16)) +IR_FOLD(INT2FP(C_U32)) +IR_FOLD(INT2FP(C_U64)) +{ + if (IR_OPT_TYPE(opt) == IR_DOUBLE) { + IR_FOLD_CONST_D((double)op1_insn->val.u64); + } else { + IR_ASSERT(IR_OPT_TYPE(opt) == IR_FLOAT); + IR_FOLD_CONST_F((float)op1_insn->val.u64); + } +} + +IR_FOLD(FP2INT(C_FLOAT)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + switch (IR_OPT_TYPE(opt)) { + default: + IR_ASSERT(0); + case IR_I8: + IR_FOLD_CONST_I((int8_t)op1_insn->val.f); + case IR_I16: + IR_FOLD_CONST_I((int16_t)op1_insn->val.f); + case IR_I32: + IR_FOLD_CONST_I((int32_t)op1_insn->val.f); + case IR_I64: + IR_FOLD_CONST_I((int64_t)op1_insn->val.f); + case IR_U8: + IR_FOLD_CONST_U((uint8_t)op1_insn->val.f); + case IR_U16: + IR_FOLD_CONST_U((uint16_t)op1_insn->val.f); + case IR_U32: + IR_FOLD_CONST_U((uint32_t)op1_insn->val.f); + case IR_U64: + IR_FOLD_CONST_U((uint64_t)op1_insn->val.f); + } +} + +IR_FOLD(FP2INT(C_DOUBLE)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + switch (IR_OPT_TYPE(opt)) { + default: + IR_ASSERT(0); + case IR_I8: + IR_FOLD_CONST_I((int8_t)op1_insn->val.d); + case IR_I16: + IR_FOLD_CONST_I((int16_t)op1_insn->val.d); + case IR_I32: + IR_FOLD_CONST_I((int32_t)op1_insn->val.d); + case IR_I64: + IR_FOLD_CONST_I((int64_t)op1_insn->val.d); + case IR_U8: + IR_FOLD_CONST_U((uint8_t)op1_insn->val.d); + case IR_U16: + IR_FOLD_CONST_U((uint16_t)op1_insn->val.d); + case IR_U32: + IR_FOLD_CONST_U((uint32_t)op1_insn->val.d); + case IR_U64: + IR_FOLD_CONST_U((uint64_t)op1_insn->val.d); + } +} + +IR_FOLD(FP2FP(C_FLOAT)) +{ + if (IR_OPT_TYPE(opt) == IR_DOUBLE) { + IR_FOLD_CONST_D((double)op1_insn->val.f); + } else { + IR_ASSERT(IR_OPT_TYPE(opt) == IR_FLOAT); + IR_FOLD_COPY(op1); + } +} + +IR_FOLD(FP2FP(C_DOUBLE)) +{ + if (IR_OPT_TYPE(opt) == IR_DOUBLE) { + IR_FOLD_COPY(op1); + } else { + IR_ASSERT(IR_OPT_TYPE(opt) == IR_FLOAT); + IR_FOLD_CONST_F((float)op1_insn->val.d); + } +} + +// TODO: constant functions (e.g. sin, cos) + +/* Copy Propagation */ +IR_FOLD(COPY(_)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (!op2) { + IR_FOLD_COPY(op1); + } + /* skip CSE */ + IR_FOLD_EMIT; +} + +IR_FOLD(PHI(_, _)) // TODO: PHI(_, _, _) +{ + if (op2 == op3 && op3 != IR_UNUSED) { + IR_FOLD_COPY(op2); + } + /* skip CSE */ + opt = opt | (3 << IR_OPT_INPUTS_SHIFT); + IR_FOLD_EMIT; +} + +IR_FOLD(COND(C_BOOL, _)) // TODO: COND(CONST, _, _) +IR_FOLD(COND(C_U8, _)) +IR_FOLD(COND(C_U16, _)) +IR_FOLD(COND(C_U32, _)) +IR_FOLD(COND(C_U64, _)) +IR_FOLD(COND(C_ADDR, _)) +IR_FOLD(COND(C_CHAR, _)) +IR_FOLD(COND(C_I8, _)) +IR_FOLD(COND(C_I16, _)) +IR_FOLD(COND(C_I32, _)) +IR_FOLD(COND(C_I64, _)) +IR_FOLD(COND(C_DOUBLE, _)) +IR_FOLD(COND(C_FLOAT, _)) +{ + if (ir_const_is_true(op1_insn)) { + IR_FOLD_COPY(op2); + } else { + IR_FOLD_COPY(op3); + } +} + +/* Algebraic simplifications */ +IR_FOLD(ABS(ABS)) +{ + /* abs(x = abs(y)) => x */ + IR_FOLD_COPY(op1); +} + +IR_FOLD(ABS(NEG)) +{ + /* abs(neg(y)) => abs(y) */ + op1 = op1_insn->op1; + IR_FOLD_RESTART; +} + +IR_FOLD(NEG(NEG)) +IR_FOLD(NOT(NOT)) +IR_FOLD(BSWAP(BSWAP)) +{ + /* f(f(y)) => y */ + IR_FOLD_COPY(op1_insn->op1); +} + +IR_FOLD(ADD(_, C_U8)) +IR_FOLD(ADD(_, C_U16)) +IR_FOLD(ADD(_, C_U32)) +IR_FOLD(ADD(_, C_U64)) +IR_FOLD(ADD(_, C_I8)) +IR_FOLD(ADD(_, C_I16)) +IR_FOLD(ADD(_, C_I32)) +IR_FOLD(ADD(_, C_I64)) +IR_FOLD(ADD(_, C_ADDR)) +IR_FOLD(SUB(_, C_U8)) +IR_FOLD(SUB(_, C_U16)) +IR_FOLD(SUB(_, C_U32)) +IR_FOLD(SUB(_, C_U64)) +IR_FOLD(SUB(_, C_I8)) +IR_FOLD(SUB(_, C_I16)) +IR_FOLD(SUB(_, C_I32)) +IR_FOLD(SUB(_, C_I64)) +IR_FOLD(SUB(_, C_ADDR)) +IR_FOLD(ADD_OV(_, C_U8)) +IR_FOLD(ADD_OV(_, C_U16)) +IR_FOLD(ADD_OV(_, C_U32)) +IR_FOLD(ADD_OV(_, C_U64)) +IR_FOLD(ADD_OV(_, C_I8)) +IR_FOLD(ADD_OV(_, C_I16)) +IR_FOLD(ADD_OV(_, C_I32)) +IR_FOLD(ADD_OV(_, C_I64)) +IR_FOLD(ADD_OV(_, C_ADDR)) +IR_FOLD(SUB_OV(_, C_U8)) +IR_FOLD(SUB_OV(_, C_U16)) +IR_FOLD(SUB_OV(_, C_U32)) +IR_FOLD(SUB_OV(_, C_U64)) +IR_FOLD(SUB_OV(_, C_I8)) +IR_FOLD(SUB_OV(_, C_I16)) +IR_FOLD(SUB_OV(_, C_I32)) +IR_FOLD(SUB_OV(_, C_I64)) +IR_FOLD(SUB_OV(_, C_ADDR)) +{ + if (op2_insn->val.u64 == 0) { + /* a +/- 0 => a */ + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(C_I8, _)) +IR_FOLD(SUB(C_I16, _)) +IR_FOLD(SUB(C_I32, _)) +IR_FOLD(SUB(C_I64, _)) +{ + if (op1_insn->val.u64 == 0) { + /* 0 - a => -a (invalid for +0.0) */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op1 = op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(ADD(NEG, _)) +{ + /* (-a) + b => b - a */ + opt++; /* ADD -> SUB */ + op1 = op2; + op2 = op1_insn->op1; + IR_FOLD_RESTART; +} + +IR_FOLD(ADD(_, NEG)) +IR_FOLD(SUB(_,NEG)) +{ + /* a + (-b) => a - b */ + opt ^= 1; /* ADD <-> SUB */ + op2 = op2_insn->op2; + IR_FOLD_RESTART; +} + +IR_FOLD(ADD(SUB, _)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op1_insn->op2 == op2) { + /* (a - b) + b => a */ + IR_FOLD_COPY(op1_insn->op1); + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(ADD(_, SUB)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op2_insn->op2 == op1) { + /* a + (b - a) => b */ + IR_FOLD_COPY(op2_insn->op1); + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(ADD, _)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op1_insn->op1 == op2) { + /* (a + b) - a => b */ + IR_FOLD_COPY(op1_insn->op2); + } else if (op1_insn->op2 == op2) { + /* (a + b) - a => b */ + IR_FOLD_COPY(op1_insn->op1); + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(_, ADD)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op2_insn->op1 == op1) { + /* a - (a + b) => -b */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op1 = op2_insn->op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } else if (op2_insn->op2 == op1) { + /* b - (a + b) => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op1 = op2_insn->op1; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(SUB, _)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op1_insn->op1 == op2) { + /* (a - b) - a => -b */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op1 = op1_insn->op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(_, SUB)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op2_insn->op1 == op1) { + /* a - (a - b) => b */ + IR_FOLD_COPY(op2_insn->op2); + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(ADD, ADD)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op1_insn->op1 == op2_insn->op1) { + /* (a + b) - (a + c) => b - c */ + op1 = op1_insn->op2; + op2 = op2_insn->op2; + IR_FOLD_RESTART; + } else if (op1_insn->op1 == op2_insn->op2) { + /* (a + b) - (c + a) => b - c */ + op1 = op1_insn->op2; + op2 = op2_insn->op1; + IR_FOLD_RESTART; + } else if (op1_insn->op2 == op2_insn->op1) { + /* (a + b) - (b + c) => a - c */ + op1 = op1_insn->op1; + op2 = op2_insn->op2; + IR_FOLD_RESTART; + } else if (op1_insn->op2 == op2_insn->op2) { + /* (a + b) - (c + b) => a - c */ + op1 = op1_insn->op1; + op2 = op2_insn->op1; + IR_FOLD_RESTART; + } + } + IR_FOLD_NEXT; +} + +// IR_FOLD(SUB(NEG, CONST)) TODO: -a - b => -b - a +// IR_FOLD(MUL(NEG, CONST)) TODO: -a * b => a * -b +// IR_FOLD(DIV(NEG, CONST)) TODO: -a / b => a / -b + +IR_FOLD(MUL(_, C_U8)) +IR_FOLD(MUL(_, C_U16)) +IR_FOLD(MUL(_, C_U32)) +IR_FOLD(MUL(_, C_U64)) +{ + if (op2_insn->val.u64 == 0) { + /* a * 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.u64 == 1) { + IR_FOLD_COPY(op1); + } else if (op2_insn->val.u64 == 2) { + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(_, C_I8)) +IR_FOLD(MUL(_, C_I16)) +IR_FOLD(MUL(_, C_I32)) +IR_FOLD(MUL(_, C_I64)) +{ + if (op2_insn->val.i64 == 0) { + /* a * 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i64 == 1) { + /* a * 1 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i64 == 2) { + /* a * 2 => a + a */ + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } else if (op2_insn->val.i64 == -1) { + /* a * -1 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(_, C_DOUBLE)) +{ + if (op2_insn->val.d == 1.0) { + /* a * 1.0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.d == 2.0) { + /* a * 2.0 => a + a */ + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } else if (op2_insn->val.d == -1.0) { + /* a * -1.0 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(_, C_FLOAT)) +{ + if (op2_insn->val.f == 1.0) { + /* a * 1.0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.f == 2.0) { + /* a * 2.0 => a + a */ + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } else if (op2_insn->val.f == -1.0) { + /* a * -1.0 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(DIV(_, C_U8)) +IR_FOLD(DIV(_, C_U16)) +IR_FOLD(DIV(_, C_U32)) +IR_FOLD(DIV(_, C_U64)) +{ + if (op2_insn->val.u64 == 1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(DIV(_, C_I8)) +IR_FOLD(DIV(_, C_I16)) +IR_FOLD(DIV(_, C_I32)) +IR_FOLD(DIV(_, C_I64)) +{ + if (op2_insn->val.i64 == 1) { + /* a / 1 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i64 == -1) { + /* a / -1 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(DIV(_, C_DOUBLE)) +{ + if (op2_insn->val.d == 1.0) { + /* a / 1.0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.d == -1.0) { + /* a / -1.0 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(DIV(_, C_FLOAT)) +{ + if (op2_insn->val.f == 1.0) { + /* a / 1.0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.f == -1.0) { + /* a / -1.0 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(NEG, NEG)) +IR_FOLD(DIV(NEG, NEG)) +{ + op1 = op1_insn->op1; + op2 = op2_insn->op1; + IR_FOLD_RESTART; +} + +IR_FOLD(AND(_, C_BOOL)) +{ + IR_FOLD_COPY(op2_insn->val.b ? op1 : op2); +} + +IR_FOLD(AND(_, C_U8)) +IR_FOLD(AND(_, C_I8)) +IR_FOLD(AND(_, C_CHAR)) +{ + if (op2_insn->val.i8 == 0) { + /* a & 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i8 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(_, C_U16)) +IR_FOLD(AND(_, C_I16)) +{ + if (op2_insn->val.i16 == 0) { + /* a & 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i16 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(_, C_U32)) +IR_FOLD(AND(_, C_I32)) +{ + if (op2_insn->val.i32 == 0) { + /* a & 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i32 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(_, C_U64)) +IR_FOLD(AND(_, C_I64)) +{ + if (op2_insn->val.i64 == 0) { + /* a & 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i64 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(_, C_BOOL)) +{ + IR_FOLD_COPY(op2_insn->val.b ? op2 : op1); +} + +IR_FOLD(OR(_, C_U8)) +IR_FOLD(OR(_, C_I8)) +IR_FOLD(OR(_, C_CHAR)) +{ + if (op2_insn->val.i8 == -1) { + /* a | 1 => 1 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i8 == 0) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(_, C_U16)) +IR_FOLD(OR(_, C_I16)) +{ + if (op2_insn->val.i16 == -1) { + /* a | 1 => 1 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i16 == 0) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(_, C_U32)) +IR_FOLD(OR(_, C_I32)) +{ + if (op2_insn->val.i32 == -1) { + /* a | 1 => 1 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i32 == -0) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(_, C_U64)) +IR_FOLD(OR(_, C_I64)) +{ + if (op2_insn->val.i64 == -1) { + /* a | 1 => 1 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i64 == 0) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(_, C_BOOL)) +{ + if (!op2_insn->val.b) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else { + /* a ^ 1 => !a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } +} + +IR_FOLD(XOR(_, C_U8)) +IR_FOLD(XOR(_, C_I8)) +IR_FOLD(XOR(_, C_CHAR)) +{ + if (op2_insn->val.i8 == 0) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i8 == -1) { + /* a ^ 1 => ~a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(_, C_U16)) +IR_FOLD(XOR(_, C_I16)) +{ + if (op2_insn->val.i16 == 0) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i16 == -1) { + /* a ^ 1 => ~a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(_, C_U32)) +IR_FOLD(XOR(_, C_I32)) +{ + if (op2_insn->val.i32 == 0) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i32 == -1) { + /* a ^ 1 => ~a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(_, C_U64)) +IR_FOLD(XOR(_, C_I64)) +{ + if (op2_insn->val.i64 == 0) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i64 == -1) { + /* a ^ 1 => ~a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(SHL(_, C_U8)) +IR_FOLD(SHL(_, C_U16)) +IR_FOLD(SHL(_, C_U32)) +IR_FOLD(SHL(_, C_U64)) +IR_FOLD(SHL(_, C_I8)) +IR_FOLD(SHL(_, C_I16)) +IR_FOLD(SHL(_, C_I32)) +IR_FOLD(SHL(_, C_I64)) +{ + if (op2_insn->val.u64 == 0) { + /* a << 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.u64 == 1) { + /* a << 1 => a + a */ + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(SHR(_, C_U8)) +IR_FOLD(SHR(_, C_U16)) +IR_FOLD(SHR(_, C_U32)) +IR_FOLD(SHR(_, C_U64)) +IR_FOLD(SHR(_, C_I8)) +IR_FOLD(SHR(_, C_I16)) +IR_FOLD(SHR(_, C_I32)) +IR_FOLD(SHR(_, C_I64)) +IR_FOLD(SAR(_, C_U8)) +IR_FOLD(SAR(_, C_U16)) +IR_FOLD(SAR(_, C_U32)) +IR_FOLD(SAR(_, C_U64)) +IR_FOLD(SAR(_, C_I8)) +IR_FOLD(SAR(_, C_I16)) +IR_FOLD(SAR(_, C_I32)) +IR_FOLD(SAR(_, C_I64)) +IR_FOLD(ROL(_, C_U8)) +IR_FOLD(ROL(_, C_U16)) +IR_FOLD(ROL(_, C_U32)) +IR_FOLD(ROL(_, C_U64)) +IR_FOLD(ROL(_, C_I8)) +IR_FOLD(ROL(_, C_I16)) +IR_FOLD(ROL(_, C_I32)) +IR_FOLD(ROL(_, C_I64)) +IR_FOLD(ROR(_, C_U8)) +IR_FOLD(ROR(_, C_U16)) +IR_FOLD(ROR(_, C_U32)) +IR_FOLD(ROR(_, C_U64)) +IR_FOLD(ROR(_, C_I8)) +IR_FOLD(ROR(_, C_I16)) +IR_FOLD(ROR(_, C_I32)) +IR_FOLD(ROR(_, C_I64)) +{ + if (op2_insn->val.u64 == 0) { + /* a >> 0 => a */ + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SHL(C_U8, _)) +IR_FOLD(SHL(C_U16, _)) +IR_FOLD(SHL(C_U32, _)) +IR_FOLD(SHL(C_U64, _)) +IR_FOLD(SHL(C_I8, _)) +IR_FOLD(SHL(C_I16, _)) +IR_FOLD(SHL(C_I32, _)) +IR_FOLD(SHL(C_I64, _)) +IR_FOLD(SHR(C_U8, _)) +IR_FOLD(SHR(C_U16, _)) +IR_FOLD(SHR(C_U32, _)) +IR_FOLD(SHR(C_U64, _)) +IR_FOLD(SHR(C_I8, _)) +IR_FOLD(SHR(C_I16, _)) +IR_FOLD(SHR(C_I32, _)) +IR_FOLD(SHR(C_I64, _)) +{ + if (op1_insn->val.u64 == 0) { + /* 0 << a => 0 */ + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SAR(C_U8, _)) +IR_FOLD(SAR(C_I8, _)) +IR_FOLD(ROL(C_U8, _)) +IR_FOLD(ROL(C_I8, _)) +IR_FOLD(ROR(C_U8, _)) +IR_FOLD(ROR(C_I8, _)) +{ + if (op1_insn->val.i8 == 0 || op1_insn->val.i8 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SAR(C_U16, _)) +IR_FOLD(SAR(C_I16, _)) +IR_FOLD(ROL(C_U16, _)) +IR_FOLD(ROL(C_I16, _)) +IR_FOLD(ROR(C_U16, _)) +IR_FOLD(ROR(C_I16, _)) +{ + if (op1_insn->val.i16 == 0 || op1_insn->val.i16 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SAR(C_U32, _)) +IR_FOLD(SAR(C_I32, _)) +IR_FOLD(ROL(C_U32, _)) +IR_FOLD(ROL(C_I32, _)) +IR_FOLD(ROR(C_U32, _)) +IR_FOLD(ROR(C_I32, _)) +{ + if (op1_insn->val.i32 == 0 || op1_insn->val.i32 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SAR(C_U64, _)) +IR_FOLD(SAR(C_I64, _)) +IR_FOLD(ROL(C_U64, _)) +IR_FOLD(ROL(C_I64, _)) +IR_FOLD(ROR(C_U64, _)) +IR_FOLD(ROR(C_I64, _)) +{ + if (op1_insn->val.i64 == 0 || op1_insn->val.i64 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +// TODO: conversions + +// TODO: Reassociation +IR_FOLD(ADD(ADD, C_U8)) +IR_FOLD(ADD(ADD, C_U16)) +IR_FOLD(ADD(ADD, C_U32)) +IR_FOLD(ADD(ADD, C_U64)) +IR_FOLD(ADD(ADD, C_ADDR)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x + c1) + c2 => x + (c1 + c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 + op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(ADD(ADD, C_I8)) +IR_FOLD(ADD(ADD, C_I16)) +IR_FOLD(ADD(ADD, C_I32)) +IR_FOLD(ADD(ADD, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x + c1) + c2 => x + (c1 + c2) */ + val.i64 = ctx->ir_base[op1_insn->op2].val.i64 + op2_insn->val.i64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(MUL, C_U8)) +IR_FOLD(MUL(MUL, C_U16)) +IR_FOLD(MUL(MUL, C_U32)) +IR_FOLD(MUL(MUL, C_U64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x * c1) * c2 => x * (c1 * c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 * op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(MUL, C_I8)) +IR_FOLD(MUL(MUL, C_I16)) +IR_FOLD(MUL(MUL, C_I32)) +IR_FOLD(MUL(MUL, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x * c1) * c2 => x * (c1 * c2) */ + val.i64 = ctx->ir_base[op1_insn->op2].val.i64 * op2_insn->val.i64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(AND, C_U8)) +IR_FOLD(AND(AND, C_U16)) +IR_FOLD(AND(AND, C_U32)) +IR_FOLD(AND(AND, C_U64)) +IR_FOLD(AND(AND, C_I8)) +IR_FOLD(AND(AND, C_I16)) +IR_FOLD(AND(AND, C_I32)) +IR_FOLD(AND(AND, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x & c1) & c2 => x & (c1 & c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 & op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(OR, C_U8)) +IR_FOLD(OR(OR, C_U16)) +IR_FOLD(OR(OR, C_U32)) +IR_FOLD(OR(OR, C_U64)) +IR_FOLD(OR(OR, C_I8)) +IR_FOLD(OR(OR, C_I16)) +IR_FOLD(OR(OR, C_I32)) +IR_FOLD(OR(OR, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x | c1) | c2 => x | (c1 | c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 | op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(XOR, C_U8)) +IR_FOLD(XOR(XOR, C_U16)) +IR_FOLD(XOR(XOR, C_U32)) +IR_FOLD(XOR(XOR, C_U64)) +IR_FOLD(XOR(XOR, C_I8)) +IR_FOLD(XOR(XOR, C_I16)) +IR_FOLD(XOR(XOR, C_I32)) +IR_FOLD(XOR(XOR, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x ^ c1) ^ c2 => x ^ (c1 ^ c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 ^ op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(AND, _)) +IR_FOLD(OR(OR, _)) +IR_FOLD(MIN(MIN, _)) +IR_FOLD(MAX(MAX, _)) +{ + if (op1_insn->op1 == op2 || op1_insn->op2 == op2) { + IR_FOLD_COPY(op2); + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(XOR, _)) +{ + if (op1_insn->op1 == op2) { + IR_FOLD_COPY(op1_insn->op2); + } else if (op1_insn->op2 == op2) { + IR_FOLD_COPY(op1_insn->op1); + } + IR_FOLD_NEXT; +} + +/* Swap operands (move lower ref to op2) for better CSE */ +IR_FOLD(ADD(_, _)) +IR_FOLD(MUL(_, _)) +IR_FOLD_NAMED(swap_ops) +{ + if (op1 < op2) { /* move lower ref to op2 */ + ir_ref tmp = op1; + op1 = op2; + op2 = tmp; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(ADD_OV(_, _)) +IR_FOLD(MUL_OV(_, _)) +{ + if (op1 < op2) { /* move lower ref to op2 */ + ir_ref tmp = op1; + op1 = op2; + op2 = tmp; + IR_FOLD_RESTART; + } + /* skip CSE ??? */ + IR_FOLD_EMIT; +} + +IR_FOLD(SUB(_, _)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt)) && op1 == op2) { + IR_FOLD_CONST_U(0); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB_OV(_, _)) +{ + if (op1 == op2) { + IR_FOLD_CONST_U(0); + } + /* skip CSE ??? */ + IR_FOLD_EMIT; +} + +/* Binary operations with op1 == op2 */ +IR_FOLD(AND(_,_)) +IR_FOLD(OR(_,_)) +IR_FOLD(MIN(_, _)) +IR_FOLD(MAX(_, _)) +{ + /* a & a => a */ + if (op1 == op2) { + IR_FOLD_COPY(op1); + } + IR_FOLD_DO_NAMED(swap_ops); +} + +IR_FOLD(XOR(_,_)) +{ + /* a xor a => 0 */ + if (op1 == op2) { + IR_FOLD_CONST_U(0); + } + IR_FOLD_DO_NAMED(swap_ops); +} + +IR_FOLD(EQ(_, _)) +IR_FOLD(NE(_, _)) +{ + if (op1 != op2) { + IR_FOLD_DO_NAMED(swap_ops); + } else if (IR_IS_TYPE_INT(op1_insn->type)) { + /* a == a => true */ + IR_FOLD_BOOL((opt & IR_OPT_OP_MASK) == IR_EQ); + } + IR_FOLD_NEXT; +} + +IR_FOLD(LT(_, _)) +IR_FOLD(GE(_, _)) +IR_FOLD(LE(_, _)) +IR_FOLD(GT(_, _)) +{ + if (op1 == op2) { + if (IR_IS_TYPE_INT(op1_insn->type)) { + /* a >= a => true (two low bits are differ) */ + IR_FOLD_BOOL((opt ^ (opt >> 1)) & 1); + } + } else if (op1 < op2) { /* move lower ref to op2 */ + ir_ref tmp = op1; + op1 = op2; + op2 = tmp; + opt ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */ + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(ULT(_, _)) +IR_FOLD(UGE(_, _)) +IR_FOLD(ULE(_, _)) +IR_FOLD(UGT(_, _)) +{ + if (op1 == op2) { + /* a >= a => true (two low bits are differ) */ + IR_FOLD_BOOL((opt ^ (opt >> 1)) & 1); + } else if (op1 < op2) { /* move lower ref to op2 */ + ir_ref tmp = op1; + op1 = op2; + op2 = tmp; + opt ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */ + } + IR_FOLD_NEXT; +} + +IR_FOLD(COND(_, _)) // TODO: COND(_, _, _) +{ + if (op2 == op3) { + IR_FOLD_COPY(op2); + } + IR_FOLD_NEXT; +} diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c new file mode 100644 index 00000000000..694271a57c2 --- /dev/null +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -0,0 +1,897 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (GCM - Global Code Motion and Scheduler) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * The GCM algorithm is based on Cliff Click's publication + * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95. + */ + +#include "ir.h" +#include "ir_private.h" + +static int32_t ir_gcm_schedule_early(ir_ctx *ctx, int32_t *_blocks, ir_ref ref, ir_list *queue_rest) +{ + ir_ref n, *p, input; + ir_insn *insn; + uint32_t dom_depth; + int32_t b, result; + bool reschedule_late = 1; + + insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR); + IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI); + + result = 1; + dom_depth = 0; + + n = insn->inputs_count; + for (p = insn->ops + 1; n > 0; p++, n--) { + input = *p; + if (input > 0) { + b = _blocks[input]; + if (b == 0) { + b = ir_gcm_schedule_early(ctx, _blocks, input, queue_rest); + } else if (b < 0) { + b = -b; + } + if (dom_depth < ctx->cfg_blocks[b].dom_depth) { + dom_depth = ctx->cfg_blocks[b].dom_depth; + result = b; + } + reschedule_late = 0; + } + } + _blocks[ref] = -result; + + if (UNEXPECTED(reschedule_late)) { + /* Floating nodes that don't depend on other nodes + * (e.g. only on constants), have to be scheduled to the + * last common ancestor. Otherwise they always go to the + * first block. + */ + ir_list_push_unchecked(queue_rest, ref); + } + return result; +} + +/* Last Common Ancestor */ +static int32_t ir_gcm_find_lca(ir_ctx *ctx, int32_t b1, int32_t b2) +{ + uint32_t dom_depth; + + dom_depth = ctx->cfg_blocks[b2].dom_depth; + while (ctx->cfg_blocks[b1].dom_depth > dom_depth) { + b1 = ctx->cfg_blocks[b1].dom_parent; + } + dom_depth = ctx->cfg_blocks[b1].dom_depth; + while (ctx->cfg_blocks[b2].dom_depth > dom_depth) { + b2 = ctx->cfg_blocks[b2].dom_parent; + } + while (b1 != b2) { + b1 = ctx->cfg_blocks[b1].dom_parent; + b2 = ctx->cfg_blocks[b2].dom_parent; + } + return b2; +} + +static void ir_gcm_schedule_late(ir_ctx *ctx, int32_t *_blocks, ir_ref ref) +{ + ir_ref n, *p, use; + ir_insn *insn; + ir_use_list *use_list; + + IR_ASSERT(_blocks[ref] < 0); + _blocks[ref] = -_blocks[ref]; + use_list = &ctx->use_lists[ref]; + n = use_list->count; + if (n) { + int32_t lca, b; + + insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR); + IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI); + + lca = 0; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + b = _blocks[use]; + if (!b) { + continue; + } else if (b < 0) { + ir_gcm_schedule_late(ctx, _blocks, use); + b = _blocks[use]; + IR_ASSERT(b != 0); + } + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + ir_ref *p = insn->ops + 2; /* PHI data inputs */ + ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ + ir_ref n = insn->inputs_count - 1; + + for (;n > 0; p++, q++, n--) { + if (*p == ref) { + b = _blocks[*q]; + lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); + } + } + } else { + lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); + } + } + IR_ASSERT(lca != 0 && "No Common Ancestor"); + b = lca; + + if (b != _blocks[ref]) { + ir_block *bb = &ctx->cfg_blocks[b]; + uint32_t loop_depth = bb->loop_depth; + + if (loop_depth) { + uint32_t flags; + + use_list = &ctx->use_lists[ref]; + if (use_list->count == 1) { + use = ctx->use_edges[use_list->refs]; + insn = &ctx->ir_base[use]; + if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { + _blocks[ref] = b; + return; + } + } + + flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; + if ((flags & IR_BB_LOOP_WITH_ENTRY) + && !(ctx->binding && ir_binding_find(ctx, ref))) { + /* Don't move loop invariant code across an OSR ENTRY if we can't restore it */ + } else { + do { + lca = bb->dom_parent; + bb = &ctx->cfg_blocks[lca]; + if (bb->loop_depth < loop_depth) { + if (!bb->loop_depth) { + b = lca; + break; + } + flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; + if ((flags & IR_BB_LOOP_WITH_ENTRY) + && !(ctx->binding && ir_binding_find(ctx, ref))) { + break; + } + loop_depth = bb->loop_depth; + b = lca; + } + } while (lca != _blocks[ref]); + } + } + _blocks[ref] = b; + if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) { + /* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */ + _blocks[ref + 1] = b; + } + } + } +} + +static void ir_gcm_schedule_rest(ir_ctx *ctx, int32_t *_blocks, ir_ref ref) +{ + ir_ref n, *p, use; + ir_insn *insn; + + IR_ASSERT(_blocks[ref] < 0); + _blocks[ref] = -_blocks[ref]; + n = ctx->use_lists[ref].count; + if (n) { + uint32_t lca; + int32_t b; + + insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR); + IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI); + + lca = 0; + for (p = &ctx->use_edges[ctx->use_lists[ref].refs]; n > 0; p++, n--) { + use = *p; + b = _blocks[use]; + if (!b) { + continue; + } else if (b < 0) { + ir_gcm_schedule_late(ctx, _blocks, use); + b = _blocks[use]; + IR_ASSERT(b != 0); + } + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + ir_ref *p = insn->ops + 2; /* PHI data inputs */ + ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ + + ir_ref n = insn->inputs_count - 1; + + for (;n > 0; p++, q++, n--) { + if (*p == ref) { + b = _blocks[*q]; + lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); + } + } + } else { + lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); + } + } + IR_ASSERT(lca != 0 && "No Common Ancestor"); + b = lca; + _blocks[ref] = b; + if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) { + /* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */ + _blocks[ref + 1] = b; + } + } +} + +int ir_gcm(ir_ctx *ctx) +{ + ir_ref k, n, *p, ref; + ir_block *bb; + ir_list queue_early; + ir_list queue_late; + ir_list queue_rest; + int32_t *_blocks, b; + ir_insn *insn, *use_insn; + ir_use_list *use_list; + + IR_ASSERT(ctx->cfg_map); + _blocks = (int32_t*)ctx->cfg_map; + + ir_list_init(&queue_early, ctx->insns_count); + + if (ctx->cfg_blocks_count == 1) { + ref = ctx->cfg_blocks[1].end; + do { + insn = &ctx->ir_base[ref]; + _blocks[ref] = 1; /* pin to block */ + if (insn->inputs_count > 1) { + /* insn has input data edges */ + ir_list_push_unchecked(&queue_early, ref); + } + ref = insn->op1; /* control predecessor */ + } while (ref != 1); /* IR_START */ + _blocks[1] = 1; /* pin to block */ + + use_list = &ctx->use_lists[1]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { + ref = *p; + use_insn = &ctx->ir_base[ref]; + if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) { + ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR; + _blocks[ref] = 1; /* pin to block */ + } + } + + /* Place all live nodes to the first block */ + while (ir_list_len(&queue_early)) { + ref = ir_list_pop(&queue_early); + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + for (p = insn->ops + 1; n > 0; p++, n--) { + ref = *p; + if (ref > 0 && _blocks[ref] == 0) { + _blocks[ref] = 1; + ir_list_push_unchecked(&queue_early, ref); + } + } + } + + ir_list_free(&queue_early); + + return 1; + } + + ir_list_init(&queue_late, ctx->insns_count); + + /* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */ + b = ctx->cfg_blocks_count; + for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + ref = bb->end; + + /* process the last instruction of the block */ + insn = &ctx->ir_base[ref]; + _blocks[ref] = b; /* pin to block */ + if (insn->inputs_count > 1) { + /* insn has input data edges */ + ir_list_push_unchecked(&queue_early, ref); + } + ref = insn->op1; /* control predecessor */ + + while (ref != bb->start) { + insn = &ctx->ir_base[ref]; + _blocks[ref] = b; /* pin to block */ + if (insn->inputs_count > 1) { + /* insn has input data edges */ + ir_list_push_unchecked(&queue_early, ref); + } + if (insn->type != IR_VOID) { + IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM); + ir_list_push_unchecked(&queue_late, ref); + } + ref = insn->op1; /* control predecessor */ + } + + /* process the first instruction of the block */ + _blocks[ref] = b; /* pin to block */ + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + if (n > 1) { + for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { + ref = *p; + use_insn = &ctx->ir_base[ref]; + if (use_insn->op == IR_PHI || use_insn->op == IR_PI) { + bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI; + if (EXPECTED(ctx->use_lists[ref].count != 0)) { + _blocks[ref] = b; /* pin to block */ + ir_list_push_unchecked(&queue_early, ref); + ir_list_push_unchecked(&queue_late, ref); + } + } else if (use_insn->op == IR_PARAM) { + bb->flags |= IR_BB_HAS_PARAM; + _blocks[ref] = b; /* pin to block */ + if (EXPECTED(ctx->use_lists[ref].count != 0)) { + ir_list_push_unchecked(&queue_late, ref); + } + } else if (use_insn->op == IR_VAR) { + bb->flags |= IR_BB_HAS_VAR; + _blocks[ref] = b; /* pin to block */ + if (EXPECTED(ctx->use_lists[ref].count != 0)) { + /* This is necessary only for VADDR */ + ir_list_push_unchecked(&queue_late, ref); + } + } + } + } + } + + ir_list_init(&queue_rest, ctx->insns_count); + + n = ir_list_len(&queue_early); + while (n > 0) { + n--; + ref = ir_list_at(&queue_early, n); + insn = &ctx->ir_base[ref]; + k = insn->inputs_count - 1; + for (p = insn->ops + 2; k > 0; p++, k--) { + ref = *p; + if (ref > 0 && _blocks[ref] == 0) { + ir_gcm_schedule_early(ctx, _blocks, ref, &queue_rest); + } + } + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_GCM) { + fprintf(stderr, "GCM Schedule Early\n"); + for (n = 1; n < ctx->insns_count; n++) { + fprintf(stderr, "%d -> %d\n", n, _blocks[n]); + } + } +#endif + + n = ir_list_len(&queue_late); + while (n > 0) { + n--; + ref = ir_list_at(&queue_late, n); + use_list = &ctx->use_lists[ref]; + k = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) { + ref = *p; + if (_blocks[ref] < 0) { + ir_gcm_schedule_late(ctx, _blocks, ref); + } + } + } + + n = ir_list_len(&queue_rest); + while (n > 0) { + n--; + ref = ir_list_at(&queue_rest, n); + ir_gcm_schedule_rest(ctx, _blocks, ref); + } + + ir_list_free(&queue_early); + ir_list_free(&queue_late); + ir_list_free(&queue_rest); + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_GCM) { + fprintf(stderr, "GCM Schedule Late\n"); + for (n = 1; n < ctx->insns_count; n++) { + fprintf(stderr, "%d -> %d\n", n, _blocks[n]); + } + } +#endif + + return 1; +} + +static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat) +{ + uint32_t n1, n2, pos; + ir_ref key; + ir_hashtab_bucket *b1, *b2; + ir_hashtab *binding = ctx->binding; + uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask); + + memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t)); + n1 = binding->count; + n2 = 0; + pos = 0; + b1 = binding->data; + b2 = binding->data; + while (n1 > 0) { + key = b1->key; + IR_ASSERT(key < ctx->insns_count); + if (_xlat[key]) { + key = _xlat[key]; + b2->key = key; + if (b1->val > 0) { + IR_ASSERT(_xlat[b1->val]); + b2->val = _xlat[b1->val]; + } else { + b2->val = b1->val; + } + key |= binding->mask; + b2->next = ((uint32_t*)binding->data)[key]; + ((uint32_t*)binding->data)[key] = pos; + pos += sizeof(ir_hashtab_bucket); + b2++; + n2++; + } + b1++; + n1--; + } + binding->count = n2; +} + +IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref) +{ + if (!_xlat[ref]) { + _xlat[ref] = ref; /* this is only a "used constant" marker */ + return 1; + } + return 0; +} + +int ir_schedule(ir_ctx *ctx) +{ + ir_ctx new_ctx; + ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count; + ir_ref *_xlat; + ir_ref *edges; + uint32_t b, prev_b; + uint32_t *_blocks = ctx->cfg_map; + ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); + ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); + ir_ref _move_down = 0; + ir_block *bb; + ir_insn *insn, *new_insn; + ir_use_list *lists, *use_list, *new_list; + + /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */ + prev_b = _blocks[1]; + IR_ASSERT(prev_b); + _prev[1] = 0; + _prev[ctx->cfg_blocks[1].end] = 0; + for (i = 2, j = 1; i < ctx->insns_count; i++) { + b = _blocks[i]; + IR_ASSERT((int32_t)b >= 0); + if (b == prev_b) { + /* add to the end of the list */ + _next[j] = i; + _prev[i] = j; + j = i; + } else if (b > prev_b) { + bb = &ctx->cfg_blocks[b]; + if (i == bb->start) { + IR_ASSERT(bb->end > bb->start); + prev_b = b; + _prev[bb->end] = 0; + /* add to the end of the list */ + _next[j] = i; + _prev[i] = j; + j = i; + } else { + IR_ASSERT(i != bb->end); + /* move down late (see the following loop) */ + _next[i] = _move_down; + _move_down = i; + } + } else if (b) { + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(i != bb->start); + if (_prev[bb->end]) { + /* move up, insert before the end of the already scheduled BB */ + k = bb->end; + } else { + /* move up, insert at the end of the block */ + k = ctx->cfg_blocks[b + 1].start; + } + /* insert before "k" */ + _prev[i] = _prev[k]; + _next[i] = k; + _next[_prev[k]] = i; + _prev[k] = i; + } + } + _next[j] = 0; + + while (_move_down) { + i = _move_down; + _move_down = _next[i]; + b = _blocks[i]; + bb = &ctx->cfg_blocks[b]; + k = _next[bb->start]; + + if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) { + /* insert after the start of the block and all PARAM, VAR, PI, PHI */ + insn = &ctx->ir_base[k]; + while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) { + k = _next[k]; + insn = &ctx->ir_base[k]; + } + } + + /* insert before "k" */ + _prev[i] = _prev[k]; + _next[i] = k; + _next[_prev[k]] = i; + _prev[k] = i; + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCHEDULE) { + fprintf(stderr, "Before Schedule\n"); + for (i = 1; i != 0; i = _next[i]) { + fprintf(stderr, "%d -> %d\n", i, _blocks[i]); + } + } +#endif + + _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); + _xlat += ctx->consts_count; + _xlat[IR_TRUE] = IR_TRUE; + _xlat[IR_FALSE] = IR_FALSE; + _xlat[IR_NULL] = IR_NULL; + _xlat[IR_UNUSED] = IR_UNUSED; + insns_count = 1; + consts_count = -(IR_TRUE - 1); + + /* Topological sort according dependencies inside each basic block */ + for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + /* Schedule BB start */ + i = bb->start; + _xlat[i] = bb->start = insns_count; + insn = &ctx->ir_base[i]; + if (insn->op == IR_CASE_VAL) { + IR_ASSERT(insn->op2 < IR_TRUE); + consts_count += ir_count_constant(_xlat, insn->op2); + } + n = insn->inputs_count; + insns_count += ir_insn_inputs_to_len(n); + i = _next[i]; + insn = &ctx->ir_base[i]; + if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) { + /* Schedule PARAM, VAR, PI */ + while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) { + _xlat[i] = insns_count; + insns_count += 1; + i = _next[i]; + insn = &ctx->ir_base[i]; + } + /* Schedule PHIs */ + while (insn->op == IR_PHI) { + ir_ref j, *p, input; + + _xlat[i] = insns_count; + /* Reuse "n" from MERGE and skip first input */ + insns_count += ir_insn_inputs_to_len(n + 1); + for (j = n, p = insn->ops + 2; j > 0; p++, j--) { + input = *p; + if (input < IR_TRUE) { + consts_count += ir_count_constant(_xlat, input); + } + } + i = _next[i]; + insn = &ctx->ir_base[i]; + } + } + while (i != bb->end) { + ir_ref n, j, *p, input; + +restart: + n = insn->inputs_count; + for (j = n, p = insn->ops + 1; j > 0; p++, j--) { + input = *p; + if (!_xlat[input]) { + /* input is not scheduled yet */ + if (input > 0) { + if (_blocks[input] == b) { + /* "input" should be before "i" to satisfy dependency */ +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCHEDULE) { + fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i); + } +#endif + /* remove "input" */ + _prev[_next[input]] = _prev[input]; + _next[_prev[input]] = _next[input]; + /* insert before "i" */ + _prev[input] = _prev[i]; + _next[input] = i; + _next[_prev[i]] = input; + _prev[i] = input; + /* restart from "input" */ + i = input; + insn = &ctx->ir_base[i]; + goto restart; + } + } else if (input < IR_TRUE) { + consts_count += ir_count_constant(_xlat, input); + } + } + } + _xlat[i] = insns_count; + insns_count += ir_insn_inputs_to_len(n); + i = _next[i]; + insn = &ctx->ir_base[i]; + } + /* Schedule BB end */ + _xlat[i] = bb->end = insns_count; + insns_count++; + if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) { + if (insn->op2 < IR_TRUE) { + consts_count += ir_count_constant(_xlat, insn->op2); + } + } + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCHEDULE) { + fprintf(stderr, "After Schedule\n"); + for (i = 1; i != 0; i = _next[i]) { + fprintf(stderr, "%d -> %d\n", i, _blocks[i]); + } + } +#endif + +#if 1 + /* Check if scheduling didn't make any modifications */ + if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) { + bool changed = 0; + + for (i = 1; i != 0; i = _next[i]) { + if (_xlat[i] != i) { + changed = 1; + break; + } + } + if (!changed) { + _xlat -= ctx->consts_count; + ir_mem_free(_xlat); + ir_mem_free(_next); + + ctx->prev_ref = _prev; + ctx->flags |= IR_LINEAR; + ir_truncate(ctx); + + return 1; + } + } +#endif + + ir_mem_free(_prev); + + ir_init(&new_ctx, ctx->flags, consts_count, insns_count); + new_ctx.insns_count = insns_count; + new_ctx.ret_type = ctx->ret_type; + new_ctx.mflags = ctx->mflags; + new_ctx.spill_base = ctx->spill_base; + new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone; + new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size; + new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size; + new_ctx.fixed_regset = ctx->fixed_regset; + new_ctx.fixed_save_regset = ctx->fixed_save_regset; + new_ctx.entries_count = ctx->entries_count; +#if defined(IR_TARGET_AARCH64) + new_ctx.deoptimization_exits = ctx->deoptimization_exits; + new_ctx.get_exit_addr = ctx->get_exit_addr; + new_ctx.get_veneer = ctx->get_veneer; + new_ctx.set_veneer = ctx->set_veneer; +#endif + new_ctx.loader = ctx->loader; + + /* Copy constants */ + if (consts_count == ctx->consts_count) { + new_ctx.consts_count = consts_count; + ref = 1 - consts_count; + insn = &ctx->ir_base[ref]; + new_insn = &new_ctx.ir_base[ref]; + + memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref)); + if (ctx->strtab.data) { + while (ref != IR_TRUE) { + if (new_insn->op == IR_FUNC || new_insn->op == IR_SYM || new_insn->op == IR_STR) { + new_insn->val.addr = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.i32)); + } + new_insn++; + ref++; + } + } + } else { + new_ref = -new_ctx.consts_count; + new_insn = &new_ctx.ir_base[new_ref]; + for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) { + if (!_xlat[ref]) { + continue; + } + new_insn->optx = insn->optx; + new_insn->prev_const = 0; + if (insn->op == IR_FUNC || insn->op == IR_SYM || insn->op == IR_STR) { + new_insn->val.addr = ir_str(&new_ctx, ir_get_str(ctx, insn->val.i32)); + } else { + new_insn->val.u64 = insn->val.u64; + } + _xlat[ref] = new_ref; + new_ref--; + new_insn--; + } + new_ctx.consts_count = -new_ref; + } + + new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); + new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref)); + new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list)); + new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref)); + + /* Copy instructions, use lists and use edges */ + prev_ref = 0; + use_edges_count = 0; + for (i = 1; i != 0; i = _next[i]) { + new_ref = _xlat[i]; + new_ctx.cfg_map[new_ref] = _blocks[i]; + _prev[new_ref] = prev_ref; + prev_ref = new_ref; + + use_list = &ctx->use_lists[i]; + n = use_list->count; + k = 0; + if (n == 1) { + ref = ctx->use_edges[use_list->refs]; + if (_xlat[ref]) { + *edges = _xlat[ref]; + edges++; + k = 1; + } + } else { + p = &ctx->use_edges[use_list->refs]; + while (n--) { + ref = *p; + if (_xlat[ref]) { + *edges = _xlat[ref]; + edges++; + k++; + } + p++; + } + } + new_list = &lists[new_ref]; + new_list->refs = use_edges_count; + use_edges_count += k; + new_list->count = k; + + insn = &ctx->ir_base[i]; + new_insn = &new_ctx.ir_base[new_ref]; + + new_insn->optx = insn->optx; + n = new_insn->inputs_count; + switch (n) { + case 0: + new_insn->op1 = insn->op1; + new_insn->op2 = insn->op2; + new_insn->op3 = insn->op3; + break; + case 1: + new_insn->op1 = _xlat[insn->op1]; + if (new_insn->op == IR_PARAM || insn->op == IR_VAR) { + new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2)); + } else { + new_insn->op2 = insn->op2; + } + new_insn->op3 = insn->op3; + break; + case 2: + new_insn->op1 = _xlat[insn->op1]; + new_insn->op2 = _xlat[insn->op2]; + new_insn->op3 = insn->op3; + break; + case 3: + new_insn->op1 = _xlat[insn->op1]; + new_insn->op2 = _xlat[insn->op2]; + new_insn->op3 = _xlat[insn->op3]; + break; + default: + for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) { + *q = _xlat[*p]; + } + break; + } + } + + /* Update list of terminators (IR_OPND_CONTROL_REF) */ + insn = &new_ctx.ir_base[1]; + ref = insn->op1; + if (ref) { + insn->op1 = ref = _xlat[ref]; + while (1) { + insn = &new_ctx.ir_base[ref]; + ref = insn->op3; + if (!ref) { + break; + } + insn->op3 = ref = _xlat[ref]; + } + } + + IR_ASSERT(ctx->use_edges_count >= use_edges_count); + new_ctx.use_edges_count = use_edges_count; + new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref)); + + if (ctx->binding) { + ir_xlat_binding(ctx, _xlat); + new_ctx.binding = ctx->binding; + ctx->binding = NULL; + } + + _xlat -= ctx->consts_count; + ir_mem_free(_xlat); + + new_ctx.cfg_blocks_count = ctx->cfg_blocks_count; + new_ctx.cfg_edges_count = ctx->cfg_edges_count; + new_ctx.cfg_blocks = ctx->cfg_blocks; + new_ctx.cfg_edges = ctx->cfg_edges; + ctx->cfg_blocks = NULL; + ctx->cfg_edges = NULL; + + ir_free(ctx); + IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit); + IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit); + memcpy(ctx, &new_ctx, sizeof(ir_ctx)); + ctx->flags |= IR_LINEAR; + + ir_mem_free(_next); + + return 1; +} + +void ir_build_prev_refs(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + ir_ref i, n, prev; + ir_insn *insn; + + ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); + prev = 0; + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) { + ctx->prev_ref[i] = prev; + n = ir_insn_len(insn); + prev = i; + i += n; + insn += n; + } + ctx->prev_ref[i] = prev; + } +} diff --git a/ext/opcache/jit/ir/ir_gdb.c b/ext/opcache/jit/ir/ir_gdb.c new file mode 100644 index 00000000000..8c2781d6038 --- /dev/null +++ b/ext/opcache/jit/ir/ir_gdb.c @@ -0,0 +1,642 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (GDB interface) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * Based on Mike Pall's implementation of GDB interface for LuaJIT. + */ + +#include +#include +#include +#include + +#ifdef __FreeBSD__ +# include +# include +# include +# include +#endif + +#include "ir.h" +#include "ir_private.h" +#include "ir_elf.h" + +/* DWARF definitions. */ +#define DW_CIE_VERSION 1 + +/* CFA (Canonical frame address) */ +enum { + DW_CFA_nop = 0x0, + DW_CFA_offset_extended = 0x5, + DW_CFA_def_cfa = 0xc, + DW_CFA_def_cfa_offset = 0xe, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_advance_loc = 0x40, + DW_CFA_offset = 0x80 +}; + +enum { + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_textrel = 0x20 +}; + +enum { + DW_TAG_compile_unit = 0x11 +}; + +enum { + DW_children_no = 0, + DW_children_yes = 1 +}; + +enum { + DW_AT_name = 0x03, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12 +}; + +enum { + DW_FORM_addr = 0x01, + DW_FORM_data4 = 0x06, + DW_FORM_string = 0x08 +}; + +enum { + DW_LNS_extended_op = 0, + DW_LNS_copy = 1, + DW_LNS_advance_pc = 2, + DW_LNS_advance_line = 3 +}; + +enum { + DW_LNE_end_sequence = 1, + DW_LNE_set_address = 2 +}; + +enum { +#if defined(IR_TARGET_X86) + DW_REG_AX, DW_REG_CX, DW_REG_DX, DW_REG_BX, + DW_REG_SP, DW_REG_BP, DW_REG_SI, DW_REG_DI, + DW_REG_RA, +#elif defined(IR_TARGET_X64) + /* Yes, the order is strange, but correct. */ + DW_REG_AX, DW_REG_DX, DW_REG_CX, DW_REG_BX, + DW_REG_SI, DW_REG_DI, DW_REG_BP, DW_REG_SP, + DW_REG_8, DW_REG_9, DW_REG_10, DW_REG_11, + DW_REG_12, DW_REG_13, DW_REG_14, DW_REG_15, + DW_REG_RA, +#elif defined(IR_TARGET_AARCH64) + DW_REG_SP = 31, + DW_REG_RA = 30, + DW_REG_X29 = 29, +#else +#error "Unsupported target architecture" +#endif +}; + +enum { + GDBJIT_SECT_NULL, + GDBJIT_SECT_text, + GDBJIT_SECT_eh_frame, + GDBJIT_SECT_shstrtab, + GDBJIT_SECT_strtab, + GDBJIT_SECT_symtab, + GDBJIT_SECT_debug_info, + GDBJIT_SECT_debug_abbrev, + GDBJIT_SECT_debug_line, + GDBJIT_SECT__MAX +}; + +enum { + GDBJIT_SYM_UNDEF, + GDBJIT_SYM_FILE, + GDBJIT_SYM_FUNC, + GDBJIT_SYM__MAX +}; + +typedef struct _ir_gdbjit_obj { + ir_elf_header hdr; + ir_elf_sectheader sect[GDBJIT_SECT__MAX]; + ir_elf_symbol sym[GDBJIT_SYM__MAX]; + uint8_t space[4096]; +} ir_gdbjit_obj; + +static const ir_elf_header ir_elfhdr_template = { + .emagic = { 0x7f, 'E', 'L', 'F' }, +#ifdef ELF64 + .eclass = 2, +#else + .eclass = 1, +#endif +#ifdef WORDS_BIGENDIAN + .eendian = 2, +#else + .eendian = 1, +#endif + .eversion = 1, +#if defined(Linux) + .eosabi = 0, /* TODO: Nope, it's not 3. ??? */ +#elif defined(__FreeBSD__) + .eosabi = 9, +#elif defined(__OpenBSD__) + .eosabi = 12, +#elif defined(__NetBSD__) + .eosabi = 2, +#elif defined(__DragonFly__) + .eosabi = 0, +#elif (defined(__sun__) && defined(__svr4__)) + .eosabi = 6, +#else + .eosabi = 0, +#endif + .eabiversion = 0, + .epad = { 0, 0, 0, 0, 0, 0, 0 }, + .type = 1, +#if defined(IR_TARGET_X86) + .machine = 3, +#elif defined(IR_TARGET_X64) + .machine = 62, +#elif defined(IR_TARGET_AARCH64) + .machine = 183, +#else +# error "Unsupported target architecture" +#endif + .version = 1, + .entry = 0, + .phofs = 0, + .shofs = offsetof(ir_gdbjit_obj, sect), + .flags = 0, + .ehsize = sizeof(ir_elf_header), + .phentsize = 0, + .phnum = 0, + .shentsize = sizeof(ir_elf_sectheader), + .shnum = GDBJIT_SECT__MAX, + .shstridx = GDBJIT_SECT_shstrtab +}; + +/* Context for generating the ELF object for the GDB JIT API. */ +typedef struct _ir_gdbjit_ctx { + uint8_t *p; /* Pointer to next address in obj.space. */ + uint8_t *startp; /* Pointer to start address in obj.space. */ + uintptr_t mcaddr; /* Machine code address. */ + uint32_t szmcode; /* Size of machine code. */ + int32_t lineno; /* Starting line number. */ + const char *name; /* JIT function name */ + const char *filename; /* Starting file name. */ + size_t objsize; /* Final size of ELF object. */ + ir_gdbjit_obj obj; /* In-memory ELF object. */ +} ir_gdbjit_ctx; + +/* Add a zero-terminated string */ +static uint32_t ir_gdbjit_strz(ir_gdbjit_ctx *ctx, const char *str) +{ + uint8_t *p = ctx->p; + uint32_t ofs = (uint32_t)(p - ctx->startp); + do { + *p++ = (uint8_t)*str; + } while (*str++); + ctx->p = p; + return ofs; +} + +/* Add a ULEB128 value */ +static void ir_gdbjit_uleb128(ir_gdbjit_ctx *ctx, uint32_t v) +{ + uint8_t *p = ctx->p; + for (; v >= 0x80; v >>= 7) + *p++ = (uint8_t)((v & 0x7f) | 0x80); + *p++ = (uint8_t)v; + ctx->p = p; +} + +/* Add a SLEB128 value */ +static void ir_gdbjit_sleb128(ir_gdbjit_ctx *ctx, int32_t v) +{ + uint8_t *p = ctx->p; + for (; (uint32_t)(v+0x40) >= 0x80; v >>= 7) + *p++ = (uint8_t)((v & 0x7f) | 0x80); + *p++ = (uint8_t)(v & 0x7f); + ctx->p = p; +} + +static void ir_gdbjit_secthdr(ir_gdbjit_ctx *ctx) +{ + ir_elf_sectheader *sect; + + *ctx->p++ = '\0'; + +#define SECTDEF(id, tp, al) \ + sect = &ctx->obj.sect[GDBJIT_SECT_##id]; \ + sect->name = ir_gdbjit_strz(ctx, "." #id); \ + sect->type = ELFSECT_TYPE_##tp; \ + sect->align = (al) + + SECTDEF(text, NOBITS, 16); + sect->flags = ELFSECT_FLAGS_ALLOC|ELFSECT_FLAGS_EXEC; + sect->addr = ctx->mcaddr; + sect->ofs = 0; + sect->size = ctx->szmcode; + + SECTDEF(eh_frame, PROGBITS, sizeof(uintptr_t)); + sect->flags = ELFSECT_FLAGS_ALLOC; + + SECTDEF(shstrtab, STRTAB, 1); + SECTDEF(strtab, STRTAB, 1); + + SECTDEF(symtab, SYMTAB, sizeof(uintptr_t)); + sect->ofs = offsetof(ir_gdbjit_obj, sym); + sect->size = sizeof(ctx->obj.sym); + sect->link = GDBJIT_SECT_strtab; + sect->entsize = sizeof(ir_elf_symbol); + sect->info = GDBJIT_SYM_FUNC; + + SECTDEF(debug_info, PROGBITS, 1); + SECTDEF(debug_abbrev, PROGBITS, 1); + SECTDEF(debug_line, PROGBITS, 1); + +#undef SECTDEF +} + +static void ir_gdbjit_symtab(ir_gdbjit_ctx *ctx) +{ + ir_elf_symbol *sym; + + *ctx->p++ = '\0'; + + sym = &ctx->obj.sym[GDBJIT_SYM_FILE]; + sym->name = ir_gdbjit_strz(ctx, "JIT code"); + sym->sectidx = ELFSECT_IDX_ABS; + sym->info = ELFSYM_INFO(ELFSYM_BIND_LOCAL, ELFSYM_TYPE_FILE); + + sym = &ctx->obj.sym[GDBJIT_SYM_FUNC]; + sym->name = ir_gdbjit_strz(ctx, ctx->name); + sym->sectidx = GDBJIT_SECT_text; + sym->value = 0; + sym->size = ctx->szmcode; + sym->info = ELFSYM_INFO(ELFSYM_BIND_GLOBAL, ELFSYM_TYPE_FUNC); +} + +typedef IR_SET_ALIGNED(1, uint16_t unaligned_uint16_t); +typedef IR_SET_ALIGNED(1, uint32_t unaligned_uint32_t); +typedef IR_SET_ALIGNED(1, uintptr_t unaligned_uintptr_t); + +#define SECTALIGN(p, a) \ + ((p) = (uint8_t *)(((uintptr_t)(p) + ((a)-1)) & ~(uintptr_t)((a)-1))) + +/* Shortcuts to generate DWARF structures. */ +#define DB(x) (*p++ = (x)) +#define DI8(x) (*(int8_t *)p = (x), p++) +#define DU16(x) (*(unaligned_uint16_t *)p = (x), p += 2) +#define DU32(x) (*(unaligned_uint32_t *)p = (x), p += 4) +#define DADDR(x) (*(unaligned_uintptr_t *)p = (x), p += sizeof(uintptr_t)) +#define DUV(x) (ctx->p = p, ir_gdbjit_uleb128(ctx, (x)), p = ctx->p) +#define DSV(x) (ctx->p = p, ir_gdbjit_sleb128(ctx, (x)), p = ctx->p) +#define DSTR(str) (ctx->p = p, ir_gdbjit_strz(ctx, (str)), p = ctx->p) +#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop +#define DSECT(name, stmt) \ + { unaligned_uint32_t *szp_##name = (uint32_t *)p; p += 4; stmt \ + *szp_##name = (uint32_t)((p-(uint8_t *)szp_##name)-4); } + +static void ir_gdbjit_ehframe(ir_gdbjit_ctx *ctx, uint32_t sp_offset, uint32_t sp_adjustment) +{ + uint8_t *p = ctx->p; + uint8_t *framep = p; + + /* DWARF EH CIE (Common Information Entry) */ + DSECT(CIE, + DU32(0); /* CIE ID. */ + DB(DW_CIE_VERSION); /* Version */ + DSTR("zR"); /* Augmentation String. */ + DUV(1); /* Code alignment factor. */ + DSV(-(int32_t)sizeof(uintptr_t)); /* Data alignment factor. */ + DB(DW_REG_RA); /* Return address register. */ + DB(1); DB(DW_EH_PE_textrel|DW_EH_PE_udata4); /* Augmentation data. */ +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(sizeof(uintptr_t)); + DB(DW_CFA_offset|DW_REG_RA); DUV(1); +#elif defined(IR_TARGET_AARCH64) + DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(0); +#endif + DALIGNNOP(sizeof(uintptr_t)); + ) + + /* DWARF EH FDE (Frame Description Entry). */ + DSECT(FDE, + DU32((uint32_t)(p-framep)); /* Offset to CIE Pointer. */ + DU32(0); /* Machine code offset relative to .text. */ + DU32(ctx->szmcode); /* Machine code length. */ + DB(0); /* Augmentation data. */ + DB(DW_CFA_def_cfa_offset); DUV(sp_offset); +#if defined(IR_TARGET_AARCH64) + if (sp_offset) { + if (sp_adjustment && sp_adjustment < sp_offset) { + DB(DW_CFA_offset|DW_REG_X29); DUV(sp_adjustment / sizeof(uintptr_t)); + DB(DW_CFA_offset|DW_REG_RA); DUV((sp_adjustment / sizeof(uintptr_t)) - 1); + } else { + DB(DW_CFA_offset|DW_REG_X29); DUV(sp_offset / sizeof(uintptr_t)); + DB(DW_CFA_offset|DW_REG_RA); DUV((sp_offset / sizeof(uintptr_t)) - 1); + } + } +#endif + if (sp_adjustment && sp_adjustment > sp_offset) { + DB(DW_CFA_advance_loc|1); DB(DW_CFA_def_cfa_offset); DUV(sp_adjustment); +#if defined(IR_TARGET_AARCH64) + if (!sp_offset) { + DB(DW_CFA_offset|DW_REG_X29); DUV(sp_adjustment / sizeof(uintptr_t)); + DB(DW_CFA_offset|DW_REG_RA); DUV((sp_adjustment / sizeof(uintptr_t)) - 1); + } +#endif + } + DALIGNNOP(sizeof(uintptr_t)); + ) + + ctx->p = p; +} + +static void ir_gdbjit_debuginfo(ir_gdbjit_ctx *ctx) +{ + uint8_t *p = ctx->p; + + DSECT(info, + DU16(2); /* DWARF version. */ + DU32(0); /* Abbrev offset. */ + DB(sizeof(uintptr_t)); /* Pointer size. */ + + DUV(1); /* Abbrev #1: DW_TAG_compile_unit. */ + DSTR(ctx->filename); /* DW_AT_name. */ + DADDR(ctx->mcaddr); /* DW_AT_low_pc. */ + DADDR(ctx->mcaddr + ctx->szmcode); /* DW_AT_high_pc. */ + DU32(0); /* DW_AT_stmt_list. */ + ); + + ctx->p = p; +} + +static void ir_gdbjit_debugabbrev(ir_gdbjit_ctx *ctx) +{ + uint8_t *p = ctx->p; + + /* Abbrev #1: DW_TAG_compile_unit. */ + DUV(1); + DUV(DW_TAG_compile_unit); + DB(DW_children_no); + DUV(DW_AT_name); + DUV(DW_FORM_string); + DUV(DW_AT_low_pc); + DUV(DW_FORM_addr); + DUV(DW_AT_high_pc); + DUV(DW_FORM_addr); + DUV(DW_AT_stmt_list); + DUV(DW_FORM_data4); + DB(0); + DB(0); + + ctx->p = p; +} + +#define DLNE(op, s) (DB(DW_LNS_extended_op), DUV(1+(s)), DB((op))) + +static void ir_gdbjit_debugline(ir_gdbjit_ctx *ctx) +{ + uint8_t *p = ctx->p; + + DSECT(line, + DU16(2); /* DWARF version. */ + DSECT(header, + DB(1); /* Minimum instruction length. */ + DB(1); /* is_stmt. */ + DI8(0); /* Line base for special opcodes. */ + DB(2); /* Line range for special opcodes. */ + DB(3+1); /* Opcode base at DW_LNS_advance_line+1. */ + DB(0); DB(1); DB(1); /* Standard opcode lengths. */ + /* Directory table. */ + DB(0); + /* File name table. */ + DSTR(ctx->filename); DUV(0); DUV(0); DUV(0); + DB(0); + ); + DLNE(DW_LNE_set_address, sizeof(uintptr_t)); + DADDR(ctx->mcaddr); + if (ctx->lineno) (DB(DW_LNS_advance_line), DSV(ctx->lineno-1)); + DB(DW_LNS_copy); + DB(DW_LNS_advance_pc); DUV(ctx->szmcode); + DLNE(DW_LNE_end_sequence, 0); + ); + + ctx->p = p; +} + + +#undef DLNE + +/* Undef shortcuts. */ +#undef DB +#undef DI8 +#undef DU16 +#undef DU32 +#undef DADDR +#undef DUV +#undef DSV +#undef DSTR +#undef DALIGNNOP +#undef DSECT + +typedef void (*ir_gdbjit_initf) (ir_gdbjit_ctx *ctx); + +static void ir_gdbjit_initsect(ir_gdbjit_ctx *ctx, int sect) +{ + ctx->startp = ctx->p; + ctx->obj.sect[sect].ofs = (uintptr_t)((char *)ctx->p - (char *)&ctx->obj); +} + +static void ir_gdbjit_initsect_done(ir_gdbjit_ctx *ctx, int sect) +{ + ctx->obj.sect[sect].size = (uintptr_t)(ctx->p - ctx->startp); +} + +static void ir_gdbjit_buildobj(ir_gdbjit_ctx *ctx, uint32_t sp_offset, uint32_t sp_adjustment) +{ + ir_gdbjit_obj *obj = &ctx->obj; + + /* Fill in ELF header and clear structures. */ + memcpy(&obj->hdr, &ir_elfhdr_template, sizeof(ir_elf_header)); + memset(&obj->sect, 0, sizeof(ir_elf_sectheader) * GDBJIT_SECT__MAX); + memset(&obj->sym, 0, sizeof(ir_elf_symbol) * GDBJIT_SYM__MAX); + + /* Initialize sections. */ + ctx->p = obj->space; + ir_gdbjit_initsect(ctx, GDBJIT_SECT_shstrtab); ir_gdbjit_secthdr(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_shstrtab); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_strtab); ir_gdbjit_symtab(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_strtab); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_debug_info); ir_gdbjit_debuginfo(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_debug_info); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_debug_abbrev); ir_gdbjit_debugabbrev(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_debug_abbrev); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_debug_line); ir_gdbjit_debugline(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_debug_line); + SECTALIGN(ctx->p, sizeof(uintptr_t)); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame); ir_gdbjit_ehframe(ctx, sp_offset, sp_adjustment); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_eh_frame); + ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); + + IR_ASSERT(ctx->objsize < sizeof(ir_gdbjit_obj)); +} + +enum { + IR_GDBJIT_NOACTION, + IR_GDBJIT_REGISTER, + IR_GDBJIT_UNREGISTER +}; + +typedef struct _ir_gdbjit_code_entry { + struct _ir_gdbjit_code_entry *next_entry; + struct _ir_gdbjit_code_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; +} ir_gdbjit_code_entry; + +typedef struct _ir_gdbjit_descriptor { + uint32_t version; + uint32_t action_flag; + struct _ir_gdbjit_code_entry *relevant_entry; + struct _ir_gdbjit_code_entry *first_entry; +} ir_gdbjit_descriptor; + +ir_gdbjit_descriptor __jit_debug_descriptor = { + 1, IR_GDBJIT_NOACTION, NULL, NULL +}; + +#ifdef IR_EXTERNAL_GDB_ENTRY +void __jit_debug_register_code(void); +#else +IR_NEVER_INLINE void __jit_debug_register_code(void) +{ + __asm__ __volatile__(""); +} +#endif + +static bool ir_gdb_register_code(const void *object, size_t size) +{ + ir_gdbjit_code_entry *entry; + + entry = malloc(sizeof(ir_gdbjit_code_entry) + size); + if (entry == NULL) { + return 0; + } + + entry->symfile_addr = ((char*)entry) + sizeof(ir_gdbjit_code_entry); + entry->symfile_size = size; + + memcpy((char *)entry->symfile_addr, object, size); + + entry->prev_entry = NULL; + entry->next_entry = __jit_debug_descriptor.first_entry; + + if (entry->next_entry) { + entry->next_entry->prev_entry = entry; + } + __jit_debug_descriptor.first_entry = entry; + + /* Notify GDB */ + __jit_debug_descriptor.relevant_entry = entry; + __jit_debug_descriptor.action_flag = IR_GDBJIT_REGISTER; + __jit_debug_register_code(); + + return 1; +} + +void ir_gdb_unregister_all(void) +{ + ir_gdbjit_code_entry *entry; + + __jit_debug_descriptor.action_flag = IR_GDBJIT_UNREGISTER; + while ((entry = __jit_debug_descriptor.first_entry)) { + __jit_debug_descriptor.first_entry = entry->next_entry; + if (entry->next_entry) { + entry->next_entry->prev_entry = NULL; + } + /* Notify GDB */ + __jit_debug_descriptor.relevant_entry = entry; + __jit_debug_register_code(); + + free(entry); + } +} + +bool ir_gdb_present(void) +{ + bool ret = 0; +#if defined(__linux__) /* netbsd while having this procfs part, does not hold the tracer pid */ + int fd = open("/proc/self/status", O_RDONLY); + + if (fd > 0) { + char buf[1024]; + ssize_t n = read(fd, buf, sizeof(buf) - 1); + char *s; + pid_t pid; + + if (n > 0) { + buf[n] = 0; + s = strstr(buf, "TracerPid:"); + if (s) { + s += sizeof("TracerPid:") - 1; + while (*s == ' ' || *s == '\t') { + s++; + } + pid = atoi(s); + if (pid) { + char out[1024]; + sprintf(buf, "/proc/%d/exe", (int)pid); + if (readlink(buf, out, sizeof(out) - 1) > 0) { + if (strstr(out, "gdb")) { + ret = 1; + } + } + } + } + } + + close(fd); + } +#elif defined(__FreeBSD__) + struct kinfo_proc *proc = kinfo_getproc(getpid()); + + if (proc) { + if ((proc->ki_flag & P_TRACED) != 0) { + struct kinfo_proc *dbg = kinfo_getproc(proc->ki_tracer); + + ret = (dbg && strstr(dbg->ki_comm, "gdb")); + } + } +#endif + + return ret; +} + +int ir_gdb_register(const char *name, + const void *start, + size_t size, + uint32_t sp_offset, + uint32_t sp_adjustment) +{ + ir_gdbjit_ctx ctx; + + ctx.mcaddr = (uintptr_t)start; + ctx.szmcode = (uint32_t)size; + ctx.name = name; + ctx.filename = "unknown"; + ctx.lineno = 0; + + ir_gdbjit_buildobj(&ctx, sp_offset, sp_adjustment); + + return ir_gdb_register_code(&ctx.obj, ctx.objsize); +} + +void ir_gdb_init(void) +{ + /* This might enable registration of all JIT-ed code, but unfortunately, + * in case of many functions, this takes enormous time. */ + if (ir_gdb_present()) { +#if 0 + _debug |= IR_DEBUG_GDB; +#endif + } +} diff --git a/ext/opcache/jit/ir/ir_patch.c b/ext/opcache/jit/ir/ir_patch.c new file mode 100644 index 00000000000..39e08eb46a5 --- /dev/null +++ b/ext/opcache/jit/ir/ir_patch.c @@ -0,0 +1,270 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Native code patcher) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * Based on Mike Pall's implementation for LuaJIT. + */ + +#include "ir.h" +#include "ir_private.h" + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +static uint32_t _asm_x86_inslen(const uint8_t* p) +{ + static const uint8_t map_op1[256] = { + 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20, + 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51, + 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, + 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, +#ifdef IR_TARGET_X64 + 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14, +#else + 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, +#endif + 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, + 0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, + 0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, + 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51, +#ifdef IR_TARGET_X64 + 0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#else + 0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#endif + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, + 0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51, + 0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51, + 0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92 + }; + static const uint8_t map_op2[256] = { + 0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93, + 0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52 + }; + uint32_t result = 0; + uint32_t prefixes = 0; + uint32_t x = map_op1[*p]; + + for (;;) { + switch (x >> 4) { + case 0: + return result + x + (prefixes & 4); + case 1: + prefixes |= x; + x = map_op1[*++p]; + result++; + break; + case 2: + x = map_op2[*++p]; + break; + case 3: + p++; + goto mrm; + case 4: + result -= (prefixes & 2); + /* fallthrough */ + case 5: + return result + (x & 15); + case 6: /* Group 3. */ + if (p[1] & 0x38) { + x = 2; + } else if ((prefixes & 2) && (x == 0x66)) { + x = 4; + } + goto mrm; + case 7: /* VEX c4/c5. */ +#ifdef IR_TARGET_X86 + if (p[1] < 0xc0) { + x = 2; + goto mrm; + } +#endif + if (x == 0x70) { + x = *++p & 0x1f; + result++; + if (x >= 2) { + p += 2; + result += 2; + goto mrm; + } + } + p++; + result++; + x = map_op2[*++p]; + break; + case 8: + result -= (prefixes & 2); + /* fallthrough */ + case 9: +mrm: + /* ModR/M and possibly SIB. */ + result += (x & 15); + x = *++p; + switch (x >> 6) { + case 0: + if ((x & 7) == 5) { + return result + 4; + } + break; + case 1: + result++; + break; + case 2: + result += 4; + break; + case 3: + return result; + } + if ((x & 7) == 4) { + result++; + if (x < 0x40 && (p[1] & 7) == 5) { + result += 4; + } + } + return result; + } + } +} + +typedef IR_SET_ALIGNED(1, uint16_t unaligned_uint16_t); +typedef IR_SET_ALIGNED(1, int32_t unaligned_int32_t); + +static int ir_patch_code(const void *code, size_t size, const void *from_addr, const void *to_addr) +{ + int ret = 0; + uint8_t *p, *end; + + p = (uint8_t*)code; + end = p + size - 4; + while (p < end) { + if ((*(unaligned_uint16_t*)p & 0xf0ff) == 0x800f && p + *(unaligned_int32_t*)(p+2) == (uint8_t*)from_addr - 6) { + *(unaligned_int32_t*)(p+2) = ((uint8_t*)to_addr - (p + 6)); + ret++; + } else if (*p == 0xe9 && p + *(unaligned_int32_t*)(p+1) == (uint8_t*)from_addr - 5) { + *(unaligned_int32_t*)(p+1) = ((uint8_t*)to_addr - (p + 5)); + ret++; + } + p += _asm_x86_inslen(p); + } + if (ret) { + ir_mem_flush((void*)code, size); + } + return ret; +} + +#elif defined(IR_TARGET_AARCH64) + +static int ir_patch_code(const void *code, size_t size, const void *from_addr, const void *to_addr) +{ + int ret = 0; + uint8_t *p, *end; + const void *veneer = NULL; + ptrdiff_t delta; + + end = (uint8_t*)code; + p = end + size; + while (p > end) { + uint32_t *ins_ptr; + uint32_t ins; + + p -= 4; + ins_ptr = (uint32_t*)p; + ins = *ins_ptr; + if ((ins & 0xfc000000u) == 0x14000000u) { + // B (imm26:0..25) + delta = (uint32_t*)from_addr - ins_ptr; + if (((ins ^ (uint32_t)delta) & 0x01ffffffu) == 0) { + delta = (uint32_t*)to_addr - ins_ptr; + if (((delta + 0x02000000) >> 26) != 0) { + abort(); // branch target out of range + } + *ins_ptr = (ins & 0xfc000000u) | ((uint32_t)delta & 0x03ffffffu); + ret++; + if (!veneer) { + veneer = p; + } + } + } else if ((ins & 0xff000000u) == 0x54000000u || + (ins & 0x7e000000u) == 0x34000000u) { + // B.cond, CBZ, CBNZ (imm19:5..23) + delta = (uint32_t*)from_addr - ins_ptr; + if (((ins ^ ((uint32_t)delta << 5)) & 0x00ffffe0u) == 0) { + delta = (uint32_t*)to_addr - ins_ptr; + if (((delta + 0x40000) >> 19) != 0) { + if (veneer) { + delta = (uint32_t*)veneer - ins_ptr; + if (((delta + 0x40000) >> 19) != 0) { + abort(); // branch target out of range + } + } else { + abort(); // branch target out of range + } + } + *ins_ptr = (ins & 0xff00001fu) | (((uint32_t)delta & 0x7ffffu) << 5); + ret++; + } + } else if ((ins & 0x7e000000u) == 0x36000000u) { + // TBZ, TBNZ (imm14:5..18) + delta = (uint32_t*)from_addr - ins_ptr; + if (((ins ^ ((uint32_t)delta << 5)) & 0x0007ffe0u) == 0) { + delta = (uint32_t*)to_addr - ins_ptr; + if (((delta + 0x2000) >> 14) != 0) { + if (veneer) { + delta = (uint32_t*)veneer - ins_ptr; + if (((delta + 0x2000) >> 14) != 0) { + abort(); // branch target out of range + } + } else { + abort(); // branch target out of range + } + } + *ins_ptr = (ins & 0xfff8001fu) | (((uint32_t)delta & 0x3fffu) << 5); + ret++; + } + } + } + + if (ret) { + ir_mem_flush((void*)code, size); + } + + return ret; +} +#endif + +int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void *from_addr, const void *to_addr) +{ + int ret = 0; + + if (jmp_table_size) { + const void **jmp_slot = (const void **)((char*)code + IR_ALIGNED_SIZE(size, sizeof(void*))); + + do { + if (*jmp_slot == from_addr) { + *jmp_slot = to_addr; + ret++; + } + jmp_slot++; + } while (--jmp_table_size); + } + + ret += ir_patch_code(code, size, from_addr, to_addr); + + return ret; +} diff --git a/ext/opcache/jit/ir/ir_perf.c b/ext/opcache/jit/ir/ir_perf.c new file mode 100644 index 00000000000..5eac3006e2a --- /dev/null +++ b/ext/opcache/jit/ir/ir_perf.c @@ -0,0 +1,266 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Linux perf interface) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * 1) Profile using perf-.map + * perf record ./prog + * perf report + * + * 2) Profile using jit-.dump + * perf record -k 1 ./prog + * perf inject -j -i perf.data -o perf.data.jitted + * perf report -i perf.data.jitted + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__linux__) +#include +#elif defined(__darwin__) +# include +#elif defined(__FreeBSD__) +# include +# include +#elif defined(__NetBSD__) +# include +#elif defined(__DragonFly__) +# include +# include +#elif defined(__sun) +// avoiding thread.h inclusion as it conflicts with vtunes types. +extern unsigned int thr_self(void); +#elif defined(__HAIKU__) +#include +#endif + +#include "ir.h" +#include "ir_elf.h" + +#define IR_PERF_JITDUMP_HEADER_MAGIC 0x4A695444 +#define IR_PERF_JITDUMP_HEADER_VERSION 1 + +#define IR_PERF_JITDUMP_RECORD_LOAD 0 +#define IR_PERF_JITDUMP_RECORD_MOVE 1 +#define IR_PERF_JITDUMP_RECORD_DEBUG_INFO 2 +#define IR_PERF_JITDUMP_RECORD_CLOSE 3 +#define IR_PERF_JITDUMP_UNWINDING_UNFO 4 + +#define ALIGN8(size) (((size) + 7) & ~7) +#define PADDING8(size) (ALIGN8(size) - (size)) + +typedef struct ir_perf_jitdump_header { + uint32_t magic; + uint32_t version; + uint32_t size; + uint32_t elf_mach_target; + uint32_t reserved; + uint32_t process_id; + uint64_t time_stamp; + uint64_t flags; +} ir_perf_jitdump_header; + +typedef struct _ir_perf_jitdump_record { + uint32_t event; + uint32_t size; + uint64_t time_stamp; +} ir_perf_jitdump_record; + +typedef struct _ir_perf_jitdump_load_record { + ir_perf_jitdump_record hdr; + uint32_t process_id; + uint32_t thread_id; + uint64_t vma; + uint64_t code_address; + uint64_t code_size; + uint64_t code_id; +} ir_perf_jitdump_load_record; + +static int jitdump_fd = -1; +static void *jitdump_mem = MAP_FAILED; + +static uint64_t ir_perf_timestamp(void) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { + return 0; + } + return ((uint64_t)ts.tv_sec * 1000000000) + ts.tv_nsec; +} + +int ir_perf_jitdump_open(void) +{ + char filename[64]; + int fd, ret; + ir_elf_header elf_hdr; + ir_perf_jitdump_header jit_hdr; + + sprintf(filename, "/tmp/jit-%d.dump", getpid()); + if (!ir_perf_timestamp()) { + return 0; + } + +#if defined(__linux__) + fd = open("/proc/self/exe", O_RDONLY); +#elif defined(__NetBSD__) + fd = open("/proc/curproc/exe", O_RDONLY); +#elif defined(__FreeBSD__) || defined(__DragonFly__) + char path[PATH_MAX]; + size_t pathlen = sizeof(path); + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; + if (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) { + return 0; + } + fd = open(path, O_RDONLY); +#elif defined(__sun) + fd = open("/proc/self/path/a.out", O_RDONLY); +#elif defined(__HAIKU__) + char path[PATH_MAX]; + if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH, + NULL, path, sizeof(path)) != B_OK) { + return 0; + } + + fd = open(path, O_RDONLY); +#else + fd = -1; +#endif + if (fd < 0) { + return 0; + } + + ret = read(fd, &elf_hdr, sizeof(elf_hdr)); + close(fd); + + if (ret != sizeof(elf_hdr) || + elf_hdr.emagic[0] != 0x7f || + elf_hdr.emagic[1] != 'E' || + elf_hdr.emagic[2] != 'L' || + elf_hdr.emagic[3] != 'F') { + return 0; + } + + jitdump_fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); + if (jitdump_fd < 0) { + return 0; + } + + jitdump_mem = mmap(NULL, + sysconf(_SC_PAGESIZE), + PROT_READ|PROT_EXEC, + MAP_PRIVATE, jitdump_fd, 0); + + if (jitdump_mem == MAP_FAILED) { + close(jitdump_fd); + jitdump_fd = -1; + return 0; + } + + memset(&jit_hdr, 0, sizeof(jit_hdr)); + jit_hdr.magic = IR_PERF_JITDUMP_HEADER_MAGIC; + jit_hdr.version = IR_PERF_JITDUMP_HEADER_VERSION; + jit_hdr.size = sizeof(jit_hdr); + jit_hdr.elf_mach_target = elf_hdr.machine; + jit_hdr.process_id = getpid(); + jit_hdr.time_stamp = ir_perf_timestamp(); + jit_hdr.flags = 0; + if (write(jitdump_fd, &jit_hdr, sizeof(jit_hdr)) != sizeof(jit_hdr)) { + return 0; + } + return 1; +} + +int ir_perf_jitdump_close(void) +{ + int ret = 1; + + if (jitdump_fd >= 0) { + ir_perf_jitdump_record rec; + + rec.event = IR_PERF_JITDUMP_RECORD_CLOSE; + rec.size = sizeof(rec); + rec.time_stamp = ir_perf_timestamp(); + if (write(jitdump_fd, &rec, sizeof(rec)) != sizeof(rec)) { + ret = 0; + } + close(jitdump_fd); + + if (jitdump_mem != MAP_FAILED) { + munmap(jitdump_mem, sysconf(_SC_PAGESIZE)); + } + } + return ret; +} + +int ir_perf_jitdump_register(const char *name, const void *start, size_t size) +{ + if (jitdump_fd >= 0) { + static uint64_t id = 1; + ir_perf_jitdump_load_record rec; + size_t len = strlen(name); + uint32_t thread_id = 0; +#if defined(__linux__) + thread_id = syscall(SYS_gettid); +#elif defined(__darwin__) + uint64_t thread_id_u64; + pthread_threadid_np(NULL, &thread_id_u64); + thread_id = (uint32_t) thread_id_u64; +#elif defined(__FreeBSD__) + long tid; + thr_self(&tid); + thread_id = (uint32_t)tid; +#elif defined(__OpenBSD__) + thread_id = getthrid(); +#elif defined(__NetBSD__) + thread_id = _lwp_self(); +#elif defined(__DragonFly__) + thread_id = lwp_gettid(); +#elif defined(__sun) + thread_id = thr_self(); +#endif + + memset(&rec, 0, sizeof(rec)); + rec.hdr.event = IR_PERF_JITDUMP_RECORD_LOAD; + rec.hdr.size = sizeof(rec) + len + 1 + size; + rec.hdr.time_stamp = ir_perf_timestamp(); + rec.process_id = getpid(); + rec.thread_id = thread_id; + rec.vma = (uint64_t)(uintptr_t)start; + rec.code_address = (uint64_t)(uintptr_t)start; + rec.code_size = (uint64_t)size; + rec.code_id = id++; + + if (write(jitdump_fd, &rec, sizeof(rec)) != sizeof(rec) + || write(jitdump_fd, name, len + 1) < 0 + || write(jitdump_fd, start, size) < 0) { + return 0; + } + } + return 1; +} + +void ir_perf_map_register(const char *name, const void *start, size_t size) +{ + static FILE *fp = NULL; + + if (!fp) { + char filename[64]; + + sprintf(filename, "/tmp/perf-%d.map", getpid()); + fp = fopen(filename, "w"); + if (!fp) { + return; + } + setlinebuf(fp); + } + fprintf(fp, "%zx %zx %s\n", (size_t)(uintptr_t)start, size, name); +} diff --git a/ext/opcache/jit/ir/ir_php.h b/ext/opcache/jit/ir/ir_php.h new file mode 100644 index 00000000000..d26f78c99bc --- /dev/null +++ b/ext/opcache/jit/ir/ir_php.h @@ -0,0 +1,37 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR/PHP integration) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_PHP_H +#define IR_PHP_H + +#define IR_PHP_OPS(_) + + +#define IR_SNAPSHOT_HANDLER_DCL() \ + void *zend_jit_snapshot_handler(ir_ctx *ctx, ir_ref snapshot_ref, ir_insn *snapshot, void *addr) + +#define IR_SNAPSHOT_HANDLER(ctx, ref, insn, addr) \ + zend_jit_snapshot_handler(ctx, ref, insn, addr) + +#ifndef IR_PHP_MM +# define IR_PHP_MM 1 +#endif + +#if IR_PHP_MM +# include "zend.h" + +# define ir_mem_malloc emalloc +# define ir_mem_calloc ecalloc +# define ir_mem_realloc erealloc +# define ir_mem_free efree +#endif + +#if defined(IR_TARGET_AARCH64) +# define IR_EXTERNAL_GDB_ENTRY +#endif + +#endif /* IR_PHP_H */ diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h new file mode 100644 index 00000000000..0f6267bd585 --- /dev/null +++ b/ext/opcache/jit/ir/ir_private.h @@ -0,0 +1,1206 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Common data structures and non public definitions) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_PRIVATE_H +#define IR_PRIVATE_H +#include +#include + +#ifdef IR_DEBUG +# include +# define IR_ASSERT(x) assert(x) +#else +# define IR_ASSERT(x) +#endif + +#ifdef _WIN32 +# include +# ifdef _M_X64 +# pragma intrinsic(_BitScanForward64) +# pragma intrinsic(_BitScanReverse64) +# endif +# pragma intrinsic(_BitScanForward) +# pragma intrinsic(_BitScanReverse) +#endif + +#ifdef __has_builtin +# if __has_builtin(__builtin_expect) +# define EXPECTED(condition) __builtin_expect(!!(condition), 1) +# define UNEXPECTED(condition) __builtin_expect(!!(condition), 0) +# endif +# if __has_attribute(__aligned__) +# define IR_SET_ALIGNED(alignment, decl) decl __attribute__ ((__aligned__ (alignment))) +# endif +# if __has_attribute(__fallthrough__) +# define IR_FALLTHROUGH __attribute__((__fallthrough__)) +# endif +#elif defined(_WIN32) +# define IR_SET_ALIGNED(alignment, decl) __declspec(align(alignment)) decl +#else /* GCC prior to 10 or non-clang/msvc compilers */ +#define __has_builtin(x) 0 +#endif +#ifndef EXPECTED +# define EXPECTED(condition) (condition) +# define UNEXPECTED(condition) (condition) +#endif +#ifndef IR_SET_ALIGNED +# define IR_SET_ALIGNED(alignment, decl) decl +#endif +#ifndef IR_FALLTHROUGH +# define IR_FALLTHROUGH ((void)0) +#endif + +/*** Helper routines ***/ + +#define IR_ALIGNED_SIZE(size, alignment) \ + (((size) + ((alignment) - 1)) & ~((alignment) - 1)) + +#define IR_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define IR_MIN(a, b) (((a) < (b)) ? (a) : (b)) + +#define IR_IS_POWER_OF_TWO(x) (!((x) & ((x) - 1))) + +#define IR_LOG2(x) ir_ntzl(x) + +IR_ALWAYS_INLINE uint8_t ir_rol8(uint8_t op1, uint8_t op2) +{ + return (op1 << op2) | (op1 >> (8 - op2)); +} + +IR_ALWAYS_INLINE uint16_t ir_rol16(uint16_t op1, uint16_t op2) +{ + return (op1 << op2) | (op1 >> (16 - op2)); +} + +IR_ALWAYS_INLINE uint32_t ir_rol32(uint32_t op1, uint32_t op2) +{ + return (op1 << op2) | (op1 >> (32 - op2)); +} + +IR_ALWAYS_INLINE uint64_t ir_rol64(uint64_t op1, uint64_t op2) +{ + return (op1 << op2) | (op1 >> (64 - op2)); +} + +IR_ALWAYS_INLINE uint8_t ir_ror8(uint8_t op1, uint8_t op2) +{ + return (op1 >> op2) | (op1 << (8 - op2)); +} + +IR_ALWAYS_INLINE uint16_t ir_ror16(uint16_t op1, uint16_t op2) +{ + return (op1 >> op2) | (op1 << (16 - op2)); +} + +IR_ALWAYS_INLINE uint32_t ir_ror32(uint32_t op1, uint32_t op2) +{ + return (op1 >> op2) | (op1 << (32 - op2)); +} + +IR_ALWAYS_INLINE uint64_t ir_ror64(uint64_t op1, uint64_t op2) +{ + return (op1 >> op2) | (op1 << (64 - op2)); +} + +/* Number of trailing zero bits (0x01 -> 0; 0x40 -> 6; 0x00 -> LEN) */ +IR_ALWAYS_INLINE uint32_t ir_ntz(uint32_t num) +{ +#if (defined(__GNUC__) || __has_builtin(__builtin_ctz)) + return __builtin_ctz(num); +#elif defined(_WIN32) + uint32_t index; + + if (!_BitScanForward(&index, num)) { + /* undefined behavior */ + return 32; + } + + return index; +#else + int n; + + if (num == 0) return 32; + + n = 1; + if ((num & 0x0000ffff) == 0) {n += 16; num = num >> 16;} + if ((num & 0x000000ff) == 0) {n += 8; num = num >> 8;} + if ((num & 0x0000000f) == 0) {n += 4; num = num >> 4;} + if ((num & 0x00000003) == 0) {n += 2; num = num >> 2;} + return n - (num & 1); +#endif +} + +/* Number of trailing zero bits (0x01 -> 0; 0x40 -> 6; 0x00 -> LEN) */ +IR_ALWAYS_INLINE uint32_t ir_ntzl(uint64_t num) +{ +#if (defined(__GNUC__) || __has_builtin(__builtin_ctzl)) + return __builtin_ctzl(num); +#elif defined(_WIN64) + unsigned long index; + + if (!_BitScanForward64(&index, num)) { + /* undefined behavior */ + return 64; + } + + return (uint32_t) index; +#else + uint32_t n; + + if (num == 0) return 64; + + n = 1; + if ((num & 0xffffffff) == 0) {n += 32; num = num >> 32;} + if ((num & 0x0000ffff) == 0) {n += 16; num = num >> 16;} + if ((num & 0x000000ff) == 0) {n += 8; num = num >> 8;} + if ((num & 0x0000000f) == 0) {n += 4; num = num >> 4;} + if ((num & 0x00000003) == 0) {n += 2; num = num >> 2;} + return n - (uint32_t)(num & 1); +#endif +} + +/* Number of leading zero bits (Undefined for zero) */ +IR_ALWAYS_INLINE int ir_nlz(uint32_t num) +{ +#if (defined(__GNUC__) || __has_builtin(__builtin_clz)) + return __builtin_clz(num); +#elif defined(_WIN32) + uint32_t index; + + if (!_BitScanReverse(&index, num)) { + /* undefined behavior */ + return 32; + } + + return (int) (32 - 1) - index; +#else + uint32_t x; + uint32_t n; + + n = 32; + x = num >> 16; if (x != 0) {n -= 16; num = x;} + x = num >> 8; if (x != 0) {n -= 8; num = x;} + x = num >> 4; if (x != 0) {n -= 4; num = x;} + x = num >> 2; if (x != 0) {n -= 2; num = x;} + x = num >> 1; if (x != 0) return n - 2; + return n - num; +#endif +} + +IR_ALWAYS_INLINE int ir_nlzl(uint64_t num) +{ +#if (defined(__GNUC__) || __has_builtin(__builtin_clzll)) + return __builtin_clzll(num); +#elif defined(_WIN64) + unsigned long index; + + if (!_BitScanReverse64(&index, num)) { + /* undefined behavior */ + return 64; + } + + return (int) (64 - 1) - index; +#else + uint64_t x; + uint32_t n; + + n = 64; + x = num >> 32; if (x != 0) {n -= 32; num = x;} + x = num >> 16; if (x != 0) {n -= 16; num = x;} + x = num >> 8; if (x != 0) {n -= 8; num = x;} + x = num >> 4; if (x != 0) {n -= 4; num = x;} + x = num >> 2; if (x != 0) {n -= 2; num = x;} + x = num >> 1; if (x != 0) return n - 2; + return n - (uint32_t)num; +#endif +} + +/*** Helper data types ***/ + +/* Arena */ +struct _ir_arena { + char *ptr; + char *end; + ir_arena *prev; +}; + +IR_ALWAYS_INLINE ir_arena* ir_arena_create(size_t size) +{ + ir_arena *arena; + + IR_ASSERT(size >= IR_ALIGNED_SIZE(sizeof(ir_arena), 8)); + arena = (ir_arena*)ir_mem_malloc(size); + arena->ptr = (char*) arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8); + arena->end = (char*) arena + size; + arena->prev = NULL; + return arena; +} + +IR_ALWAYS_INLINE void ir_arena_free(ir_arena *arena) +{ + do { + ir_arena *prev = arena->prev; + ir_mem_free(arena); + arena = prev; + } while (arena); +} + +IR_ALWAYS_INLINE void* ir_arena_alloc(ir_arena **arena_ptr, size_t size) +{ + ir_arena *arena = *arena_ptr; + char *ptr = arena->ptr; + + size = IR_ALIGNED_SIZE(size, 8); + + if (EXPECTED(size <= (size_t)(arena->end - ptr))) { + arena->ptr = ptr + size; + } else { + size_t arena_size = + UNEXPECTED((size + IR_ALIGNED_SIZE(sizeof(ir_arena), 8)) > (size_t)(arena->end - (char*) arena)) ? + (size + IR_ALIGNED_SIZE(sizeof(ir_arena), 8)) : + (size_t)(arena->end - (char*) arena); + ir_arena *new_arena = (ir_arena*)ir_mem_malloc(arena_size); + + ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8); + new_arena->ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8) + size; + new_arena->end = (char*) new_arena + arena_size; + new_arena->prev = arena; + *arena_ptr = new_arena; + } + + return (void*) ptr; +} + +IR_ALWAYS_INLINE void* ir_arena_checkpoint(ir_arena *arena) +{ + return arena->ptr; +} + +IR_ALWAYS_INLINE void ir_release(ir_arena **arena_ptr, void *checkpoint) +{ + ir_arena *arena = *arena_ptr; + + while (UNEXPECTED((char*)checkpoint > arena->end) || + UNEXPECTED((char*)checkpoint <= (char*)arena)) { + ir_arena *prev = arena->prev; + ir_mem_free(arena); + *arena_ptr = arena = prev; + } + IR_ASSERT((char*)checkpoint > (char*)arena && (char*)checkpoint <= arena->end); + arena->ptr = (char*)checkpoint; +} + +/* Bitsets */ +#if defined(IR_TARGET_X86) +# define IR_BITSET_BITS 32 +# define IR_BITSET_ONE 1U +# define ir_bitset_base_t uint32_t +# define ir_bitset_ntz ir_ntz +#else +# define IR_BITSET_BITS 64 +# ifdef _M_X64 /* MSVC*/ +# define IR_BITSET_ONE 1ui64 +# else +# define IR_BITSET_ONE 1UL +# endif +# define ir_bitset_base_t uint64_t +# define ir_bitset_ntz ir_ntzl +#endif + +typedef ir_bitset_base_t *ir_bitset; + +IR_ALWAYS_INLINE uint32_t ir_bitset_len(uint32_t n) +{ + return (n + (IR_BITSET_BITS - 1)) / IR_BITSET_BITS; +} + +IR_ALWAYS_INLINE ir_bitset ir_bitset_malloc(uint32_t n) +{ + return ir_mem_calloc(ir_bitset_len(n), IR_BITSET_BITS / 8); +} + +IR_ALWAYS_INLINE void ir_bitset_incl(ir_bitset set, uint32_t n) +{ + set[n / IR_BITSET_BITS] |= IR_BITSET_ONE << (n % IR_BITSET_BITS); +} + +IR_ALWAYS_INLINE void ir_bitset_excl(ir_bitset set, uint32_t n) +{ + set[n / IR_BITSET_BITS] &= ~(IR_BITSET_ONE << (n % IR_BITSET_BITS)); +} + +IR_ALWAYS_INLINE bool ir_bitset_in(const ir_bitset set, uint32_t n) +{ + return (set[(n / IR_BITSET_BITS)] & (IR_BITSET_ONE << (n % IR_BITSET_BITS))) != 0; +} + +IR_ALWAYS_INLINE void ir_bitset_clear(ir_bitset set, uint32_t len) +{ + memset(set, 0, len * (IR_BITSET_BITS / 8)); +} + +IR_ALWAYS_INLINE void ir_bitset_fill(ir_bitset set, uint32_t len) +{ + memset(set, 0xff, len * (IR_BITSET_BITS / 8)); +} + +IR_ALWAYS_INLINE bool ir_bitset_empty(const ir_bitset set, uint32_t len) +{ + uint32_t i; + for (i = 0; i < len; i++) { + if (set[i]) { + return 0; + } + } + return 1; +} + +IR_ALWAYS_INLINE bool ir_bitset_equal(const ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + return memcmp(set1, set2, len * (IR_BITSET_BITS / 8)) == 0; +} + +IR_ALWAYS_INLINE void ir_bitset_copy(ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + memcpy(set1, set2, len * (IR_BITSET_BITS / 8)); +} + +IR_ALWAYS_INLINE void ir_bitset_intersection(ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + set1[i] &= set2[i]; + } +} + +IR_ALWAYS_INLINE void ir_bitset_union(ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + set1[i] |= set2[i]; + } +} + +IR_ALWAYS_INLINE void ir_bitset_difference(ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + set1[i] = set1[i] & ~set2[i]; + } +} + +IR_ALWAYS_INLINE bool ir_bitset_is_subset(const ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + if (set1[i] & ~set2[i]) { + return 0; + } + } + return 1; +} + +IR_ALWAYS_INLINE int ir_bitset_first(const ir_bitset set, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + if (set[i]) { + return IR_BITSET_BITS * i + ir_bitset_ntz(set[i]); + } + } + return -1; /* empty set */ +} + +IR_ALWAYS_INLINE int ir_bitset_last(const ir_bitset set, uint32_t len) +{ + uint32_t i = len; + + while (i > 0) { + i--; + if (set[i]) { + uint32_t j = IR_BITSET_BITS * i - 1; + ir_bitset_base_t x = set[i]; + do { + x = x >> 1; + j++; + } while (x != 0); + return j; + } + } + return -1; /* empty set */ +} + +IR_ALWAYS_INLINE int ir_bitset_pop_first(ir_bitset set, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + ir_bitset_base_t x = set[i]; + + if (x) { + int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x); + set[i] = x & (x - 1); + return bit; + } + } + return -1; /* empty set */ +} + +#define IR_BITSET_FOREACH(set, len, bit) do { \ + ir_bitset _set = (set); \ + uint32_t _i, _len = (len); \ + for (_i = 0; _i < _len; _set++, _i++) { \ + ir_bitset_base_t _x = *_set; \ + while (_x) { \ + (bit) = IR_BITSET_BITS * _i + ir_bitset_ntz(_x); \ + _x &= _x - 1; + +#define IR_BITSET_FOREACH_DIFFERENCE(set1, set2, len, bit) do { \ + ir_bitset _set1 = (set1); \ + ir_bitset _set2 = (set2); \ + uint32_t _i, _len = (len); \ + for (_i = 0; _i < _len; _i++) { \ + ir_bitset_base_t _x = _set1[_i] & ~_set2[_i]; \ + while (_x) { \ + (bit) = IR_BITSET_BITS * _i + ir_bitset_ntz(_x); \ + _x &= _x - 1; + +#define IR_BITSET_FOREACH_END() \ + } \ + } \ +} while (0) + +/* Bit Queue */ +typedef struct _ir_bitqueue { + uint32_t len; + uint32_t pos; + ir_bitset set; +} ir_bitqueue; + +IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n) +{ + q->len = ir_bitset_len(n); + q->pos = q->len - 1; + q->set = ir_bitset_malloc(n); +} + +IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q) +{ + ir_mem_free(q->set); +} + +IR_ALWAYS_INLINE void ir_bitqueue_clear(ir_bitqueue *q) +{ + q->pos = q->len - 1; + ir_bitset_clear(q->set, q->len); +} + +IR_ALWAYS_INLINE int ir_bitqueue_pop(ir_bitqueue *q) +{ + uint32_t i = q->pos; + ir_bitset_base_t x, *p = q->set + i; + do { + x = *p; + if (x) { + int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x); + *p = x & (x - 1); + q->pos = i; + return bit; + } + p++; + i++; + } while (i < q->len); + q->pos = q->len - 1; + return -1; /* empty set */ +} + +IR_ALWAYS_INLINE void ir_bitqueue_add(ir_bitqueue *q, uint32_t n) +{ + uint32_t i = n / IR_BITSET_BITS; + q->set[i] |= IR_BITSET_ONE << (n % IR_BITSET_BITS); + if (i < q->pos) { + q->pos = i; + } +} + +IR_ALWAYS_INLINE void ir_bitqueue_del(ir_bitqueue *q, uint32_t n) +{ + ir_bitset_excl(q->set, n); +} + +IR_ALWAYS_INLINE bool ir_bitqueue_in(const ir_bitqueue *q, uint32_t n) +{ + return ir_bitset_in(q->set, n); +} + +/* Dynamic array of numeric references */ +typedef struct _ir_array { + ir_ref *refs; + uint32_t size; +} ir_array; + +void ir_array_grow(ir_array *a, uint32_t size); +void ir_array_insert(ir_array *a, uint32_t i, ir_ref val); +void ir_array_remove(ir_array *a, uint32_t i); + +IR_ALWAYS_INLINE void ir_array_init(ir_array *a, uint32_t size) +{ + a->refs = ir_mem_malloc(size * sizeof(ir_ref)); + a->size = size; +} + +IR_ALWAYS_INLINE void ir_array_free(ir_array *a) +{ + ir_mem_free(a->refs); + a->refs = NULL; + a->size = 0; +} + +IR_ALWAYS_INLINE uint32_t ir_array_size(const ir_array *a) +{ + return a->size; +} + +IR_ALWAYS_INLINE ir_ref ir_array_get(const ir_array *a, uint32_t i) +{ + return (i < a->size) ? a->refs[i] : IR_UNUSED; +} + +IR_ALWAYS_INLINE ir_ref ir_array_at(const ir_array *a, uint32_t i) +{ + IR_ASSERT(i < a->size); + return a->refs[i]; +} + +IR_ALWAYS_INLINE void ir_array_set(ir_array *a, uint32_t i, ir_ref val) +{ + if (i >= a->size) { + ir_array_grow(a, i + 1); + } + a->refs[i] = val; +} + +IR_ALWAYS_INLINE void ir_array_set_unchecked(ir_array *a, uint32_t i, ir_ref val) +{ + IR_ASSERT(i < a->size); + a->refs[i] = val; +} + +/* List/Stack of numeric references */ +typedef struct _ir_list { + ir_array a; + uint32_t len; +} ir_list; + +bool ir_list_contains(const ir_list *l, ir_ref val); +void ir_list_insert(ir_list *l, uint32_t i, ir_ref val); +void ir_list_remove(ir_list *l, uint32_t i); + +IR_ALWAYS_INLINE void ir_list_init(ir_list *l, uint32_t size) +{ + ir_array_init(&l->a, size); + l->len = 0; +} + +IR_ALWAYS_INLINE void ir_list_free(ir_list *l) +{ + ir_array_free(&l->a); + l->len = 0; +} + +IR_ALWAYS_INLINE void ir_list_clear(ir_list *l) +{ + l->len = 0; +} + +IR_ALWAYS_INLINE uint32_t ir_list_len(const ir_list *l) +{ + return l->len; +} + +IR_ALWAYS_INLINE uint32_t ir_list_capasity(const ir_list *l) +{ + return ir_array_size(&l->a); +} + +IR_ALWAYS_INLINE void ir_list_push(ir_list *l, ir_ref val) +{ + ir_array_set(&l->a, l->len++, val); +} + +IR_ALWAYS_INLINE void ir_list_push_unchecked(ir_list *l, ir_ref val) +{ + ir_array_set_unchecked(&l->a, l->len++, val); +} + +IR_ALWAYS_INLINE ir_ref ir_list_pop(ir_list *l) +{ + IR_ASSERT(l->len > 0); + return ir_array_at(&l->a, --l->len); +} + +IR_ALWAYS_INLINE ir_ref ir_list_peek(const ir_list *l) +{ + IR_ASSERT(l->len > 0); + return ir_array_at(&l->a, l->len - 1); +} + +IR_ALWAYS_INLINE ir_ref ir_list_at(const ir_list *l, uint32_t i) +{ + IR_ASSERT(i < l->len); + return ir_array_at(&l->a, i); +} + +IR_ALWAYS_INLINE void ir_list_set(ir_list *l, uint32_t i, ir_ref val) +{ + IR_ASSERT(i < l->len); + ir_array_set_unchecked(&l->a, i, val); +} + +/* Worklist (unique list) */ +typedef struct _ir_worklist { + ir_list l; + ir_bitset visited; +} ir_worklist; + +IR_ALWAYS_INLINE void ir_worklist_init(ir_worklist *w, uint32_t size) +{ + ir_list_init(&w->l, size); + w->visited = ir_bitset_malloc(size); +} + +IR_ALWAYS_INLINE void ir_worklist_free(ir_worklist *w) +{ + ir_list_free(&w->l); + ir_mem_free(w->visited); +} + +IR_ALWAYS_INLINE uint32_t ir_worklist_len(const ir_worklist *w) +{ + return ir_list_len(&w->l); +} + +IR_ALWAYS_INLINE uint32_t ir_worklist_capasity(const ir_worklist *w) +{ + return ir_list_capasity(&w->l); +} + +IR_ALWAYS_INLINE void ir_worklist_clear(ir_worklist *w) +{ + ir_list_clear(&w->l); + ir_bitset_clear(w->visited, ir_bitset_len(ir_worklist_capasity(w))); +} + +IR_ALWAYS_INLINE bool ir_worklist_push(ir_worklist *w, ir_ref val) +{ + IR_ASSERT(val >= 0 && (uint32_t)val < ir_worklist_capasity(w)); + if (ir_bitset_in(w->visited, val)) { + return 0; + } + ir_bitset_incl(w->visited, val); + IR_ASSERT(ir_list_len(&w->l) < ir_list_capasity(&w->l)); + ir_list_push_unchecked(&w->l, val); + return 1; +} + +IR_ALWAYS_INLINE ir_ref ir_worklist_pop(ir_worklist *w) +{ + return ir_list_pop(&w->l); +} + +IR_ALWAYS_INLINE ir_ref ir_worklist_peek(const ir_worklist *w) +{ + return ir_list_peek(&w->l); +} + +/* IR Hash Table */ +#define IR_INVALID_IDX 0xffffffff +#define IR_INVALID_VAL 0x80000000 + +typedef struct _ir_hashtab_bucket { + uint32_t key; + ir_ref val; + uint32_t next; +} ir_hashtab_bucket; + +typedef struct _ir_hashtab { + void *data; + uint32_t mask; + uint32_t size; + uint32_t count; + uint32_t pos; +} ir_hashtab; + +void ir_hashtab_init(ir_hashtab *tab, uint32_t size); +void ir_hashtab_free(ir_hashtab *tab); +ir_ref ir_hashtab_find(const ir_hashtab *tab, uint32_t key); +bool ir_hashtab_add(ir_hashtab *tab, uint32_t key, ir_ref val); +void ir_hashtab_key_sort(ir_hashtab *tab); + +/* IR Addr Table */ +typedef struct _ir_addrtab_bucket { + uint64_t key; + ir_ref val; + uint32_t next; +} ir_addrtab_bucket; + +void ir_addrtab_init(ir_hashtab *tab, uint32_t size); +void ir_addrtab_free(ir_hashtab *tab); +ir_ref ir_addrtab_find(const ir_hashtab *tab, uint64_t key); +bool ir_addrtab_add(ir_hashtab *tab, uint64_t key, ir_ref val); + +/*** IR OP info ***/ +extern const uint8_t ir_type_flags[IR_LAST_TYPE]; +extern const char *ir_type_name[IR_LAST_TYPE]; +extern const char *ir_type_cname[IR_LAST_TYPE]; +extern const uint8_t ir_type_size[IR_LAST_TYPE]; +extern const uint32_t ir_op_flags[IR_LAST_OP]; +extern const char *ir_op_name[IR_LAST_OP]; + +#define IR_IS_CONST_OP(op) ((op) > IR_NOP && (op) <= IR_C_FLOAT) +#define IR_IS_FOLDABLE_OP(op) ((op) <= IR_LAST_FOLDABLE_OP) + +IR_ALWAYS_INLINE bool ir_const_is_true(const ir_insn *v) +{ + + if (v->type == IR_BOOL) { + return v->val.b; + } else if (IR_IS_TYPE_INT(v->type)) { + return v->val.i64 != 0; + } else if (v->type == IR_DOUBLE) { + return v->val.d != 0.0; + } else { + IR_ASSERT(v->type == IR_FLOAT); + return v->val.f != 0.0; + } + return 0; +} + +IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref) +{ + if (ref == IR_TRUE) { + return 1; + } else if (ref == IR_FALSE) { + return 0; + } else { + IR_ASSERT(IR_IS_CONST_REF(ref)); + return ir_const_is_true(&ctx->ir_base[ref]); + } +} + +/* IR OP flags */ +#define IR_OP_FLAG_OPERANDS_SHIFT 3 + +#define IR_OP_FLAG_EDGES_MASK 0x03 +#define IR_OP_FLAG_VAR_INPUTS 0x04 +#define IR_OP_FLAG_OPERANDS_MASK 0x18 +#define IR_OP_FLAG_MEM_MASK ((1<<6)|(1<<7)) + +#define IR_OP_FLAG_DATA (1<<8) +#define IR_OP_FLAG_CONTROL (1<<9) +#define IR_OP_FLAG_MEM (1<<10) +#define IR_OP_FLAG_COMMUTATIVE (1<<11) +#define IR_OP_FLAG_BB_START (1<<12) +#define IR_OP_FLAG_BB_END (1<<13) +#define IR_OP_FLAG_TERMINATOR (1<<14) +#define IR_OP_FLAG_PINNED (1<<15) + +#define IR_OP_FLAG_MEM_LOAD ((0<<6)|(0<<7)) +#define IR_OP_FLAG_MEM_STORE ((0<<6)|(1<<7)) +#define IR_OP_FLAG_MEM_CALL ((1<<6)|(0<<7)) +#define IR_OP_FLAG_MEM_ALLOC ((1<<6)|(1<<7)) +#define IR_OP_FLAG_MEM_MASK ((1<<6)|(1<<7)) + +#define IR_OPND_UNUSED 0x0 +#define IR_OPND_DATA 0x1 +#define IR_OPND_CONTROL 0x2 +#define IR_OPND_CONTROL_DEP 0x3 +#define IR_OPND_CONTROL_REF 0x4 +#define IR_OPND_STR 0x5 +#define IR_OPND_NUM 0x6 +#define IR_OPND_PROB 0x7 + +#define IR_OP_FLAGS(op_flags, op1_flags, op2_flags, op3_flags) \ + ((op_flags) | ((op1_flags) << 20) | ((op2_flags) << 24) | ((op3_flags) << 28)) + +#define IR_INPUT_EDGES_COUNT(flags) (flags & IR_OP_FLAG_EDGES_MASK) +#define IR_OPERANDS_COUNT(flags) ((flags & IR_OP_FLAG_OPERANDS_MASK) >> IR_OP_FLAG_OPERANDS_SHIFT) + +#define IR_OP_HAS_VAR_INPUTS(flags) ((flags) & IR_OP_FLAG_VAR_INPUTS) + +#define IR_OPND_KIND(flags, i) \ + (((flags) >> (16 + (4 * (((i) > 3) ? 3 : (i))))) & 0xf) + +#define IR_IS_REF_OPND_KIND(kind) \ + ((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_REF) + +IR_ALWAYS_INLINE ir_ref ir_operands_count(const ir_ctx *ctx, const ir_insn *insn) +{ + uint32_t flags = ir_op_flags[insn->op]; + uint32_t n = IR_OPERANDS_COUNT(flags); + + if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) { + /* MERGE, PHI, CALL, etc */ + n = insn->inputs_count; + } + return n; +} + +IR_ALWAYS_INLINE ir_ref ir_input_edges_count(const ir_ctx *ctx, const ir_insn *insn) +{ + uint32_t flags = ir_op_flags[insn->op]; + uint32_t n = IR_INPUT_EDGES_COUNT(flags); + if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) { + /* MERGE, PHI, CALL, etc */ + n = insn->inputs_count; + } + return n; +} + +IR_ALWAYS_INLINE uint32_t ir_insn_inputs_to_len(uint32_t inputs_count) +{ + return 1 + (inputs_count >> 2); +} + +IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn) +{ + return ir_insn_inputs_to_len(insn->inputs_count); +} + +/*** IR Binding ***/ +IR_ALWAYS_INLINE ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref) +{ + ir_ref var = ir_hashtab_find(ctx->binding, ref); + return (var != (ir_ref)IR_INVALID_VAL) ? var : 0; +} + +/*** IR Use Lists ***/ +struct _ir_use_list { + ir_ref refs; /* index in ir_ctx->use_edges[] array */ + ir_ref count; +}; + +/*** IR Basic Blocks info ***/ +#define IR_IS_BB_START(op) \ + ((ir_op_flags[op] & IR_OP_FLAG_BB_START) != 0) + +#define IR_IS_BB_MERGE(op) \ + ((op) == IR_MERGE || (op) == IR_LOOP_BEGIN) + +#define IR_IS_BB_END(op) \ + ((ir_op_flags[op] & IR_OP_FLAG_BB_END) != 0) + +#define IR_BB_UNREACHABLE (1<<0) +#define IR_BB_START (1<<1) +#define IR_BB_ENTRY (1<<2) +#define IR_BB_LOOP_HEADER (1<<3) +#define IR_BB_IRREDUCIBLE_LOOP (1<<4) +#define IR_BB_DESSA_MOVES (1<<5) /* translation out of SSA requires MOVEs */ +#define IR_BB_EMPTY (1<<6) +#define IR_BB_PREV_EMPTY_ENTRY (1<<7) +#define IR_BB_OSR_ENTRY_LOADS (1<<8) /* OSR Entry-point with register LOADs */ +#define IR_BB_LOOP_WITH_ENTRY (1<<9) /* set together with LOOP_HEADER if there is an ENTRY in the loop */ + +/* The following flags are set by GCM */ +#define IR_BB_HAS_PHI (1<<10) +#define IR_BB_HAS_PI (1<<11) +#define IR_BB_HAS_PARAM (1<<12) +#define IR_BB_HAS_VAR (1<<13) + + +struct _ir_block { + uint32_t flags; + ir_ref start; /* index of first instruction */ + ir_ref end; /* index of last instruction */ + uint32_t successors; /* index in ir_ctx->cfg_edges[] array */ + uint32_t successors_count; + uint32_t predecessors; /* index in ir_ctx->cfg_edges[] array */ + uint32_t predecessors_count; + union { + uint32_t dom_parent; /* immediate dominator block */ + uint32_t idom; /* immediate dominator block */ + }; + union { + uint32_t dom_depth; /* depth from the root of the dominators tree */ + uint32_t postnum; /* used temporary during tree constructon */ + }; + uint32_t dom_child; /* first dominated blocks */ + uint32_t dom_next_child; /* next dominated block (linked list) */ + uint32_t loop_header; + uint32_t loop_depth; +}; + +uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b); +uint32_t ir_skip_empty_next_blocks(const ir_ctx *ctx, uint32_t b); +void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block, uint32_t *next_block); + +IR_ALWAYS_INLINE uint32_t ir_phi_input_number(const ir_ctx *ctx, const ir_block *bb, uint32_t from) +{ + uint32_t n, *p; + + for (n = 0, p = &ctx->cfg_edges[bb->predecessors]; n < bb->predecessors_count; p++, n++) { + if (*p == from) { + return n + 2; /* first input is a reference to MERGE */ + } + } + IR_ASSERT(0); + return 0; +} + +/*** Folding Engine (see ir.c and ir_fold.h) ***/ +typedef enum _ir_fold_action { + IR_FOLD_DO_RESTART, + IR_FOLD_DO_CSE, + IR_FOLD_DO_EMIT, + IR_FOLD_DO_COPY, + IR_FOLD_DO_CONST +} ir_fold_action; + +ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn); + +/*** IR Live Info ***/ +typedef ir_ref ir_live_pos; +typedef struct _ir_use_pos ir_use_pos; + +#define IR_SUB_REFS_COUNT 4 + +#define IR_LOAD_SUB_REF 0 +#define IR_USE_SUB_REF 1 +#define IR_DEF_SUB_REF 2 +#define IR_SAVE_SUB_REF 3 + +#define IR_LIVE_POS_TO_REF(pos) ((pos) / IR_SUB_REFS_COUNT) +#define IR_LIVE_POS_TO_SUB_REF(pos) ((pos) % IR_SUB_REFS_COUNT) + +#define IR_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT) + +#define IR_START_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT) +#define IR_LOAD_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_LOAD_SUB_REF) +#define IR_USE_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_USE_SUB_REF) +#define IR_DEF_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_DEF_SUB_REF) +#define IR_SAVE_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_SAVE_SUB_REF) +#define IR_END_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_SUB_REFS_COUNT) + +/* ir_use_pos.flags bits */ +#define IR_USE_MUST_BE_IN_REG (1<<0) +#define IR_USE_SHOULD_BE_IN_REG (1<<1) +#define IR_DEF_REUSES_OP1_REG (1<<2) +#define IR_DEF_CONFLICTS_WITH_INPUT_REGS (1<<3) + +#define IR_FUSED_USE (1<<6) +#define IR_PHI_USE (1<<7) + +#define IR_OP1_MUST_BE_IN_REG (1<<8) +#define IR_OP1_SHOULD_BE_IN_REG (1<<9) +#define IR_OP2_MUST_BE_IN_REG (1<<10) +#define IR_OP2_SHOULD_BE_IN_REG (1<<11) +#define IR_OP3_MUST_BE_IN_REG (1<<12) +#define IR_OP3_SHOULD_BE_IN_REG (1<<13) + +#define IR_USE_FLAGS(def_flags, op_num) (((def_flags) >> (6 + (IR_MIN((op_num), 3) * 2))) & 3) + +struct _ir_use_pos { + uint16_t op_num; /* 0 - means result */ + int8_t hint; + uint8_t flags; + ir_ref hint_ref; /* negative references are used for FUSION anf PHI */ + ir_live_pos pos; + ir_use_pos *next; +}; + +struct _ir_live_range { + ir_live_pos start; /* inclusive */ + ir_live_pos end; /* exclusive */ + ir_live_range *next; +}; + +/* ir_live_interval.flags bits (two low bits are reserved for temporary register number) */ +#define IR_LIVE_INTERVAL_FIXED (1<<0) +#define IR_LIVE_INTERVAL_TEMP (1<<1) +#define IR_LIVE_INTERVAL_HAS_HINT_REGS (1<<2) +#define IR_LIVE_INTERVAL_HAS_HINT_REFS (1<<3) +#define IR_LIVE_INTERVAL_MEM_PARAM (1<<4) +#define IR_LIVE_INTERVAL_MEM_LOAD (1<<5) +#define IR_LIVE_INTERVAL_COALESCED (1<<6) +#define IR_LIVE_INTERVAL_SPILL_SPECIAL (1<<7) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */ +#define IR_LIVE_INTERVAL_SPILLED (1<<8) +#define IR_LIVE_INTERVAL_SPLIT_CHILD (1<<9) + +struct _ir_live_interval { + uint8_t type; + int8_t reg; + uint16_t flags; + union { + int32_t vreg; + int32_t tmp_ref; + }; + union { + int32_t stack_spill_pos; + ir_ref tmp_op_num; + }; + ir_live_pos end; /* end of the last live range (cahce of ival.range.{next->}end) */ + ir_live_range range; + ir_live_range *current_range; + ir_use_pos *use_pos; + ir_live_interval *next; + ir_live_interval *list_next; /* linked list of active, inactive or unhandled intervals */ +}; + +typedef int (*emit_copy_t)(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to); + +int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy); + +#if defined(IR_REGSET_64BIT) + +/*typedef enum _ir_reg ir_reg;*/ +typedef int8_t ir_reg; + +/*** Register Sets ***/ +#if IR_REGSET_64BIT +typedef uint64_t ir_regset; +#else +typedef uint32_t ir_regset; +#endif + +#define IR_REGSET_EMPTY 0 + +#define IR_REGSET_IS_EMPTY(regset) \ + (regset == IR_REGSET_EMPTY) + +#define IR_REGSET_IS_SINGLETON(regset) \ + (regset && !(regset & (regset - 1))) + +#if IR_REGSET_64BIT +# define IR_REGSET(reg) \ + (1ull << (reg)) +#else +# define IR_REGSET(reg) \ + (1u << (reg)) +#endif + +#if IR_REGSET_64BIT +# define IR_REGSET_INTERVAL(reg1, reg2) \ + (((1ull << ((reg2) - (reg1) + 1)) - 1) << (reg1)) +#else +# define IR_REGSET_INTERVAL(reg1, reg2) \ + (((1u << ((reg2) - (reg1) + 1)) - 1) << (reg1)) +#endif + +#define IR_REGSET_IN(regset, reg) \ + (((regset) & IR_REGSET(reg)) != 0) + +#define IR_REGSET_INCL(regset, reg) \ + (regset) |= IR_REGSET(reg) + +#define IR_REGSET_EXCL(regset, reg) \ + (regset) &= ~IR_REGSET(reg) + +#define IR_REGSET_UNION(set1, set2) \ + ((set1) | (set2)) + +#define IR_REGSET_INTERSECTION(set1, set2) \ + ((set1) & (set2)) + +#define IR_REGSET_DIFFERENCE(set1, set2) \ + ((set1) & ~(set2)) + +#if IR_REGSET_64BIT +# define IR_REGSET_FIRST(set) ((ir_reg)ir_ntzl(set)) +# define ir_REGSET_LAST(set) ((ir_reg)(ir_nlzl(set)(set)^63)) +#else +# define IR_REGSET_FIRST(set) ((ir_reg)ir_ntz(set)) +# define IR_REGSET_LAST(set) ((ir_reg)(ir_nlz(set)^31)) +#endif + +IR_ALWAYS_INLINE ir_reg ir_regset_pop_first(ir_regset *set) +{ + ir_reg reg; + + IR_ASSERT(!IR_REGSET_IS_EMPTY(*set)); + reg = IR_REGSET_FIRST(*set); + *set = (*set) & ((*set) - 1); + return reg; +} + +#define IR_REGSET_FOREACH(set, reg) \ + do { \ + ir_regset _tmp = (set); \ + while (!IR_REGSET_IS_EMPTY(_tmp)) { \ + reg = ir_regset_pop_first(&_tmp); + +#define IR_REGSET_FOREACH_END() \ + } \ + } while (0) + +#endif /* defined(IR_REGSET_64BIT) */ + +/*** IR Register Allocation ***/ +/* Flags for ctx->regs[][] (low bits are used for register number itself) */ +typedef struct _ir_reg_alloc_data { + int32_t unused_slot_4; + int32_t unused_slot_2; + int32_t unused_slot_1; + ir_live_interval **handled; +} ir_reg_alloc_data; + +int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data); + +IR_ALWAYS_INLINE void ir_set_alocated_reg(ir_ctx *ctx, ir_ref ref, int op_num, int8_t reg) +{ + int8_t *regs = ctx->regs[ref]; + + if (op_num > 0) { + /* regs[] is not limited by the declared boundary 4, the real boundary checked below */ + IR_ASSERT(op_num <= IR_MAX(3, ctx->ir_base[ref].inputs_count)); + } + regs[op_num] = reg; +} + +IR_ALWAYS_INLINE int8_t ir_get_alocated_reg(const ir_ctx *ctx, ir_ref ref, int op_num) +{ + int8_t *regs = ctx->regs[ref]; + + /* regs[] is not limited by the declared boundary 4, the real boundary checked below */ + IR_ASSERT(op_num <= IR_MAX(3, ctx->ir_base[ref].inputs_count)); + return regs[op_num]; +} + +/*** IR Target Interface ***/ + +/* ctx->rules[] flags */ +#define IR_FUSED (1U<<31) /* Insn is fused into others (code is generated as part of the fusion root) */ +#define IR_SKIPPED (1U<<30) /* Insn is skipped (code is not generated) */ +#define IR_SIMPLE (1U<<29) /* Insn doesn't have any target constraints */ + +#define IR_RULE_MASK 0xff + +extern const char *ir_rule_name[]; + +typedef struct _ir_target_constraints ir_target_constraints; + +#define IR_TMP_REG(_num, _type, _start, _end) \ + (ir_tmp_reg){.num=(_num), .type=(_type), .start=(_start), .end=(_end)} +#define IR_SCRATCH_REG(_reg, _start, _end) \ + (ir_tmp_reg){.reg=(_reg), .type=IR_VOID, .start=(_start), .end=(_end)} + +int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints); + +void ir_fix_stack_frame(ir_ctx *ctx); + +/* Utility */ +ir_type ir_get_return_type(ir_ctx *ctx); +bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn); +bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn); + +//#define IR_BITSET_LIVENESS + +#endif /* IR_PRIVATE_H */ diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c new file mode 100644 index 00000000000..d3b9ac134a9 --- /dev/null +++ b/ext/opcache/jit/ir/ir_ra.c @@ -0,0 +1,3870 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (RA - Register Allocation, Liveness, Coalescing, SSA Deconstruction) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * See: "Linear Scan Register Allocation on SSA Form", Christian Wimmer and + * Michael Franz, CGO'10 (2010) + * See: "Optimized Interval Splitting in a Linear Scan Register Allocator", + * Christian Wimmer VEE'10 (2005) + */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#include +#include "ir.h" + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# include "ir_x86.h" +#elif defined(IR_TARGET_AARCH64) +# include "ir_aarch64.h" +#else +# error "Unknown IR target" +#endif + +#include "ir_private.h" + +int ir_regs_number(void) +{ + return IR_REG_NUM; +} + +bool ir_reg_is_int(int32_t reg) +{ + IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); + return reg >= IR_REG_GP_FIRST && reg <= IR_REG_GP_LAST; +} + +static int ir_assign_virtual_registers_slow(ir_ctx *ctx) +{ + uint32_t *vregs; + uint32_t vregs_count = 0; + uint32_t b; + ir_ref i, n; + ir_block *bb; + ir_insn *insn; + uint32_t flags; + + /* Assign unique virtual register to each data node */ + vregs = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref)); + n = 1; + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + i = bb->start; + + /* skip first instruction */ + insn = ctx->ir_base + i; + n = ir_insn_len(insn); + i += n; + insn += n; + while (i < bb->end) { + flags = ir_op_flags[insn->op]; + if (((flags & IR_OP_FLAG_DATA) && insn->op != IR_VAR && (insn->op != IR_PARAM || ctx->use_lists[i].count > 0)) + || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) { + if (!ctx->rules || !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) { + vregs[i] = ++vregs_count; + } + } + n = ir_insn_len(insn); + i += n; + insn += n; + } + } + ctx->vregs_count = vregs_count; + ctx->vregs = vregs; + + return 1; +} + +int ir_assign_virtual_registers(ir_ctx *ctx) +{ + uint32_t *vregs; + uint32_t vregs_count = 0; + ir_ref i; + ir_insn *insn; + + if (!ctx->rules) { + return ir_assign_virtual_registers_slow(ctx); + } + + /* Assign unique virtual register to each rule that needs it */ + vregs = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); + + for (i = 1, insn = &ctx->ir_base[1]; i < ctx->insns_count; i++, insn++) { + uint32_t v = 0; + + if (ctx->rules[i] && !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) { + uint32_t flags = ir_op_flags[insn->op]; + + if ((flags & IR_OP_FLAG_DATA) + || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) { + v = ++vregs_count; + } + } + vregs[i] = v; + } + + ctx->vregs_count = vregs_count; + ctx->vregs = vregs; + + return 1; +} + +/* Lifetime intervals construction */ + +static ir_live_interval *ir_new_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end) +{ + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + + ival->type = IR_VOID; + ival->reg = IR_REG_NONE; + ival->flags = 0; + ival->vreg = v; + ival->stack_spill_pos = -1; // not allocated + ival->range.start = start; + ival->range.end = ival->end = end; + ival->range.next = NULL; + ival->use_pos = NULL; + ival->next = NULL; + + ctx->live_intervals[v] = ival; + return ival; +} + +static ir_live_interval *ir_add_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end) +{ + ir_live_interval *ival = ctx->live_intervals[v]; + ir_live_range *p, *q; + + if (!ival) { + return ir_new_live_range(ctx, v, start, end); + } + + p = &ival->range; + if (end >= p->start) { + ir_live_range *prev = NULL; + + do { + if (p->end >= start) { + if (start < p->start) { + p->start = start; + } + if (end > p->end) { + /* merge with next */ + ir_live_range *next = p->next; + + p->end = end; + while (next && p->end >= next->start) { + if (next->end > p->end) { + p->end = next->end; + } + p->next = next->next; + /* remember in the "unused_ranges" list */ + next->next = ctx->unused_ranges; + ctx->unused_ranges = next; + next = p->next; + } + if (!p->next) { + ival->end = p->end; + } + } + return ival; + } + prev = p; + p = prev->next; + } while (p && end >= p->start); + if (!p) { + ival->end = end; + } + if (prev) { + if (ctx->unused_ranges) { + /* reuse */ + q = ctx->unused_ranges; + ctx->unused_ranges = q->next; + } else { + q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range)); + } + prev->next = q; + q->start = start; + q->end = end; + q->next = p; + return ival; + } + } + + if (ctx->unused_ranges) { + /* reuse */ + q = ctx->unused_ranges; + ctx->unused_ranges = q->next; + } else { + q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range)); + } + q->start = p->start; + q->end = p->end; + q->next = p->next; + p->start = start; + p->end = end; + p->next = q; + return ival; +} + +IR_ALWAYS_INLINE ir_live_interval *ir_add_prev_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end) +{ + ir_live_interval *ival = ctx->live_intervals[v]; + + if (ival && ival->range.start == end) { + ival->range.start = start; + return ival; + } + return ir_add_live_range(ctx, v, start, end); +} + +static void ir_add_fixed_live_range(ir_ctx *ctx, ir_reg reg, ir_live_pos start, ir_live_pos end) +{ + int v = ctx->vregs_count + 1 + reg; + ir_live_interval *ival = ctx->live_intervals[v]; + ir_live_range *q; + + if (!ival) { + ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + ival->type = IR_VOID; + ival->reg = reg; + ival->flags = IR_LIVE_INTERVAL_FIXED; + ival->vreg = v; + ival->stack_spill_pos = -1; // not allocated + ival->range.start = start; + ival->range.end = ival->end = end; + ival->range.next = NULL; + ival->use_pos = NULL; + ival->next = NULL; + + ctx->live_intervals[v] = ival; + } else if (EXPECTED(end < ival->range.start)) { + if (ctx->unused_ranges) { + /* reuse */ + q = ctx->unused_ranges; + ctx->unused_ranges = q->next; + } else { + q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range)); + } + + q->start = ival->range.start; + q->end = ival->range.end; + q->next = ival->range.next; + ival->range.start = start; + ival->range.end = end; + ival->range.next = q; + } else if (end == ival->range.start) { + ival->range.start = start; + } else { + ir_add_live_range(ctx, v, start, end); + } +} + +static void ir_add_tmp(ir_ctx *ctx, ir_ref ref, ir_ref tmp_ref, int32_t tmp_op_num, ir_tmp_reg tmp_reg) +{ + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + + ival->type = tmp_reg.type; + ival->reg = IR_REG_NONE; + ival->flags = IR_LIVE_INTERVAL_TEMP; + ival->tmp_ref = tmp_ref; + ival->tmp_op_num = tmp_op_num; + ival->range.start = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.start; + ival->range.end = ival->end = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.end; + ival->range.next = NULL; + ival->use_pos = NULL; + + if (!ctx->live_intervals[0]) { + ival->next = NULL; + ctx->live_intervals[0] = ival; + } else if (ival->range.start >= ctx->live_intervals[0]->range.start) { + ir_live_interval *prev = ctx->live_intervals[0]; + + while (prev->next && ival->range.start >= prev->next->range.start) { + prev = prev->next; + } + ival->next = prev->next; + prev->next = ival; + } else { + ir_live_interval *next = ctx->live_intervals[0]; + + ival->next = next; + ctx->live_intervals[0] = ival; + } + return; +} + +static bool ir_has_tmp(ir_ctx *ctx, ir_ref ref, int32_t op_num) +{ + ir_live_interval *ival = ctx->live_intervals[0]; + + if (ival) { + while (ival && IR_LIVE_POS_TO_REF(ival->range.start) <= ref) { + if (ival->tmp_ref == ref && ival->tmp_op_num == op_num) { + return 1; + } + ival = ival->next; + } + } + return 0; +} + +static ir_live_interval *ir_fix_live_range(ir_ctx *ctx, int v, ir_live_pos old_start, ir_live_pos new_start) +{ + ir_live_interval *ival = ctx->live_intervals[v]; + ir_live_range *p = &ival->range; + +#if 0 + while (p && p->start < old_start) { + p = p->next; + } +#endif + IR_ASSERT(ival && p->start == old_start); + p->start = new_start; + return ival; +} + +static void ir_add_use_pos(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos) +{ + ir_use_pos *p = ival->use_pos; + + if (EXPECTED(!p || p->pos > use_pos->pos)) { + use_pos->next = p; + ival->use_pos = use_pos; + } else { + ir_use_pos *prev; + + do { + prev = p; + p = p->next; + } while (p && p->pos < use_pos->pos); + + use_pos->next = prev->next; + prev->next = use_pos; + } +} + + +IR_ALWAYS_INLINE void ir_add_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_reg hint, uint8_t use_flags, ir_ref hint_ref) +{ + ir_use_pos *use_pos; + + use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos)); + use_pos->op_num = op_num; + use_pos->hint = hint; + use_pos->flags = use_flags; + use_pos->hint_ref = hint_ref; + use_pos->pos = pos; + + if (hint != IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + if (hint_ref > 0) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS; + } + + ir_add_use_pos(ctx, ival, use_pos); +} + +static void ir_add_phi_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_ref phi_ref) +{ + ir_use_pos *use_pos; + + IR_ASSERT(phi_ref > 0); + use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos)); + use_pos->op_num = op_num; + use_pos->hint = IR_REG_NONE; + use_pos->flags = IR_PHI_USE | IR_USE_SHOULD_BE_IN_REG; // TODO: ??? + use_pos->hint_ref = -phi_ref; + use_pos->pos = pos; + + ir_add_use_pos(ctx, ival, use_pos); +} + +static void ir_add_hint(ir_ctx *ctx, ir_ref ref, ir_live_pos pos, ir_reg hint) +{ + ir_live_interval *ival = ctx->live_intervals[ctx->vregs[ref]]; + + if (!(ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS)) { + ir_use_pos *use_pos = ival->use_pos; + + while (use_pos) { + if (use_pos->pos == pos) { + if (use_pos->hint == IR_REG_NONE) { + use_pos->hint = hint; + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + } + use_pos = use_pos->next; + } + } +} + +static void ir_hint_propagation(ir_ctx *ctx) +{ + int i; + ir_live_interval *ival; + ir_use_pos *use_pos; + ir_use_pos *hint_use_pos; + + for (i = ctx->vregs_count; i > 0; i--) { + ival = ctx->live_intervals[i]; + if (ival + && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) == (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) { + use_pos = ival->use_pos; + hint_use_pos = NULL; + while (use_pos) { + if (use_pos->op_num == 0) { + if (use_pos->hint_ref > 0) { + hint_use_pos = use_pos; + } + } else if (use_pos->hint != IR_REG_NONE) { + if (hint_use_pos) { + ir_add_hint(ctx, hint_use_pos->hint_ref, hint_use_pos->pos, use_pos->hint); + hint_use_pos = NULL; + } + } + use_pos = use_pos->next; + } + } + } +} + +#ifdef IR_BITSET_LIVENESS +/* DFS + Loop-Forest livness for SSA using bitset(s) */ +static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, uint32_t len, uint32_t b) +{ + bool ok = 1; + int count = 0; + ir_list *list = (ir_list*)ctx->osr_entry_loads; + ir_ref i; + + IR_BITSET_FOREACH(live, len, i) { + /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */ + ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos; + ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos); + + if (use_pos->op_num) { + ir_ref *ops = ctx->ir_base[ref].ops; + ref = ops[use_pos->op_num]; + } + + if (ctx->ir_base[ref].op == IR_PARAM) { + continue; + } + if (ctx->binding) { + ir_ref var = ir_binding_find(ctx, ref); + if (var < 0) { + /* We may load the value at OSR entry-point */ + if (!count) { + bb->flags &= ~IR_BB_EMPTY; + bb->flags |= IR_BB_OSR_ENTRY_LOADS; + if (!ctx->osr_entry_loads) { + list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list)); + ir_list_init(list, 16); + } + ir_list_push(list, b); + ir_list_push(list, 0); + } + ir_list_push(list, ref); + count++; + continue; + } + } + fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref); + ok = 0; + } IR_BITSET_FOREACH_END(); + + if (!ok) { + IR_ASSERT(0); + } + if (count) { + ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count); + +#if 0 + /* ENTRY "clobbers" all registers */ + ir_ref ref = ctx->ir_base[bb->start].op1; + ir_add_fixed_live_range(ctx, IR_REG_ALL, + IR_DEF_LIVE_POS_FROM_REF(ref), + IR_SAVE_LIVE_POS_FROM_REF(ref)); +#endif + } +} + +static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, ir_bitset live) +{ + ir_ref stack[4]; + int stack_pos = 0; + ir_target_constraints constraints; + ir_insn *insn; + uint32_t j, n, flags, def_flags; + ir_ref *p, child; + uint8_t use_flags; + ir_reg reg; + ir_live_pos use_pos; + ir_live_interval *ival; + + while (1) { + IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED); + + if (!(ctx->rules[input] & IR_SIMPLE)) { + def_flags = ir_get_target_constraints(ctx, input, &constraints); + n = constraints.tmps_count; + while (n > 0) { + n--; + if (constraints.tmp_regs[n].type) { + ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]); + } else { + /* CPU specific constraints */ + ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end); + } + } + } else { + def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + constraints.hints_count = 0; + } + + insn = &ctx->ir_base[input]; + flags = ir_op_flags[insn->op]; + n = IR_INPUT_EDGES_COUNT(flags); + j = 1; + p = insn->ops + j; + if (flags & IR_OP_FLAG_CONTROL) { + j++; + p++; + } + for (; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + child = *p; + if (child > 0) { + uint32_t v = ctx->vregs[child]; + + if (v) { + use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j); + reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + if (EXPECTED(reg == IR_REG_NONE)) { + use_pos += IR_USE_SUB_REF; + } + + if (!ir_bitset_in(live, v)) { + /* live.add(opd) */ + ir_bitset_incl(live, v); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input); + } else if (ctx->rules[child] & IR_FUSED) { + IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos))); + stack[stack_pos++] = child; + } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2); + } + } + } + if (!stack_pos) { + break; + } + input = stack[--stack_pos]; + } +} + +int ir_compute_live_ranges(ir_ctx *ctx) +{ + uint32_t b, i, j, k, n, succ, *p; + ir_ref ref; + uint32_t len; + ir_insn *insn; + ir_block *bb, *succ_bb; +#ifdef IR_DEBUG + ir_bitset visited; +#endif + ir_bitset live, bb_live; + ir_bitset loops = NULL; + ir_bitqueue queue; + ir_live_interval *ival; + + if (!(ctx->flags & IR_LINEAR) || !ctx->vregs) { + return 0; + } + + if (ctx->rules) { + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + } + + /* Root of the list of IR_VARs */ + ctx->vars = IR_UNUSED; + + /* Compute Live Ranges */ + ctx->flags &= ~IR_LR_HAVE_DESSA_MOVES; + len = ir_bitset_len(ctx->vregs_count + 1); + bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t)); + + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + +#ifdef IR_DEBUG + visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1); +#endif + + if (!ctx->arena) { + ctx->arena = ir_arena_create(16 * 1024); + } + + /* for each basic block in reverse order */ + for (b = ctx->cfg_blocks_count; b > 0; b--) { + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + /* for each successor of b */ + +#ifdef IR_DEBUG + ir_bitset_incl(visited, b); +#endif + live = bb_live + (len * b); + n = bb->successors_count; + if (n == 0) { + ir_bitset_clear(live, len); + } else { + p = &ctx->cfg_edges[bb->successors]; + succ = *p; + +#ifdef IR_DEBUG + /* blocks must be ordered where all dominators of a block are before this block */ + IR_ASSERT(ir_bitset_in(visited, succ) || bb->loop_header == succ); +#endif + + /* live = union of successors.liveIn */ + if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) { + ir_bitset_copy(live, bb_live + (len * succ), len); + } else { + IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER)); + ir_bitset_clear(live, len); + } + if (n > 1) { + for (p++, n--; n > 0; p++, n--) { + succ = *p; + if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) { + ir_bitset_union(live, bb_live + (len * succ), len); + } else { + IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER)); + } + } + } + + /* for each opd in live */ + IR_BITSET_FOREACH(live, len, i) { + /* intervals[opd].addRange(b.from, b.to) */ + ir_add_prev_live_range(ctx, i, + IR_START_LIVE_POS_FROM_REF(bb->start), + IR_END_LIVE_POS_FROM_REF(bb->end)); + } IR_BITSET_FOREACH_END(); + } + + if (bb->successors_count == 1) { + /* for each phi function phi of successor */ + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + if (succ_bb->flags & IR_BB_HAS_PHI) { + ir_use_list *use_list = &ctx->use_lists[succ_bb->start]; + + k = ir_phi_input_number(ctx, succ_bb, b); + IR_ASSERT(k != 0); + for (ref = 0; ref < use_list->count; ref++) { + ir_ref use = ctx->use_edges[use_list->refs + ref]; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + ir_ref input = ir_insn_op(insn, k); + if (input > 0) { + uint32_t v = ctx->vregs[input]; + + /* live.add(phi.inputOf(b)) */ + IR_ASSERT(v); + ir_bitset_incl(live, v); + /* intervals[phi.inputOf(b)].addRange(b.from, b.to) */ + ival = ir_add_prev_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), + IR_END_LIVE_POS_FROM_REF(bb->end)); + ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use); + } + } + } + } + } + + /* for each operation op of b in reverse order */ + ref = bb->end; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_END || insn->op == IR_LOOP_END) { + ref = ctx->prev_ref[ref]; + } + for (; ref > bb->start; ref = ctx->prev_ref[ref]) { + uint32_t def_flags; + uint32_t flags; + ir_ref *p; + ir_target_constraints constraints; + uint32_t v; + + if (ctx->rules) { + int n; + + if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) { + if (ctx->rules[ref] == (IR_SKIPPED|IR_VAR) && ctx->use_lists[ref].count > 0) { + insn = &ctx->ir_base[ref]; + insn->op3 = ctx->vars; + ctx->vars = ref; + } + continue; + } + + def_flags = ir_get_target_constraints(ctx, ref, &constraints); + n = constraints.tmps_count; + while (n > 0) { + n--; + if (constraints.tmp_regs[n].type) { + ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]); + } else { + /* CPU specific constraints */ + ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end); + } + } + } else { + def_flags = 0; + constraints.def_reg = IR_REG_NONE; + constraints.hints_count = 0; + } + + insn = &ctx->ir_base[ref]; + v = ctx->vregs[ref]; + if (v) { + IR_ASSERT(ir_bitset_in(live, v)); + + if (insn->op != IR_PHI) { + ir_live_pos def_pos; + ir_ref hint_ref = 0; + ir_reg reg = constraints.def_reg; + + if (reg != IR_REG_NONE) { + def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref); + if (insn->op == IR_PARAM || insn->op == IR_RLOAD) { + /* parameter register must be kept before it's copied */ + ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + } + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) { + hint_ref = insn->op1; + } + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else { + if (insn->op == IR_PARAM) { + /* We may reuse parameter stack slot for spilling */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else if (insn->op == IR_VLOAD) { + /* Load may be fused into the usage instruction */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; + } + def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); + } + /* live.remove(opd) */ + ir_bitset_excl(live, v); + /* intervals[opd].setFrom(op.id) */ + ival = ir_fix_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + ival->type = insn->type; + ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref); + } else { + /* live.remove(opd) */ + ir_bitset_excl(live, v); + /* PHIs inputs must not be processed */ + ival = ctx->live_intervals[v]; + if (UNEXPECTED(!ival)) { + /* Dead PHI */ + ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref)); + } + ival->type = insn->type; + ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); + continue; + } + } + + IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)))); + flags = ir_op_flags[insn->op]; + j = 1; + p = insn->ops + 1; + if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) { + j++; + p++; + } + for (; j <= insn->inputs_count; j++, p++) { + ir_ref input = *p; + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + ir_live_pos use_pos; + ir_ref hint_ref = 0; + uint32_t v; + + if (input > 0) { + v = ctx->vregs[input]; + if (v) { + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); + if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (j == 1) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + IR_ASSERT(ctx->vregs[ref]); + hint_ref = ref; + } else if (input == insn->op1) { + /* Input is the same as "op1" */ + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } + } + if (!ir_bitset_in(live, v)) { + /* live.add(opd) */ + ir_bitset_incl(live, v); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref); + } else if (ctx->rules) { + if (ctx->rules[input] & IR_FUSED) { + ir_add_fusion_ranges(ctx, ref, input, bb, live); + } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + } + } + } else if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } + } + } + + /* if b is loop header */ + if ((bb->flags & IR_BB_LOOP_HEADER) + && !ir_bitset_empty(live, len)) { + /* variables live at loop header are alive at the whole loop body */ + uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1); + uint32_t child; + ir_block *child_bb; + ir_bitset child_live_in; + + if (!loops) { + loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1); + ir_bitqueue_init(&queue, ctx->cfg_blocks_count + 1); + } else { + ir_bitset_clear(loops, bb_set_len); + ir_bitqueue_clear(&queue); + } + ir_bitset_incl(loops, b); + child = b; + do { + child_bb = &ctx->cfg_blocks[child]; + child_live_in = bb_live + (len * child); + + IR_BITSET_FOREACH(live, len, i) { + ir_bitset_incl(child_live_in, i); + ir_add_live_range(ctx, i, + IR_START_LIVE_POS_FROM_REF(child_bb->start), + IR_END_LIVE_POS_FROM_REF(child_bb->end)); + } IR_BITSET_FOREACH_END(); + + child = child_bb->dom_child; + while (child) { + child_bb = &ctx->cfg_blocks[child]; + if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) { + ir_bitqueue_add(&queue, child); + if (child_bb->flags & IR_BB_LOOP_HEADER) { + ir_bitset_incl(loops, child); + } + } + child = child_bb->dom_next_child; + } + } while ((child = ir_bitqueue_pop(&queue)) != (uint32_t)-1); + } + } + + if (ctx->entries) { + for (i = 0; i < ctx->entries_count; i++) { + b = ctx->entries[i]; + bb = &ctx->cfg_blocks[b]; + live = bb_live + (len * b); + ir_add_osr_entry_loads(ctx, bb, live, len, b); + } + if (ctx->osr_entry_loads) { + ir_list_push((ir_list*)ctx->osr_entry_loads, 0); + } + } + + if (loops) { + ir_mem_free(loops); + ir_bitqueue_free(&queue); + } + + ir_mem_free(bb_live); +#ifdef IR_DEBUG + ir_mem_free(visited); +#endif + + return 1; +} + +#else +/* Path exploration by definition liveness for SSA using sets represented by linked lists */ + +#define IS_LIVE_IN_BLOCK(v, b) \ + (live_in_block[v] == b) +#define SET_LIVE_IN_BLOCK(v, b) do { \ + live_in_block[v] = b; \ + } while (0) + +/* Returns the last virtual register alive at the end of the block (it is used as an already-visited marker) */ +IR_ALWAYS_INLINE uint32_t ir_live_out_top(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b) +{ +#if 0 + return live_outs[b]; +#else + if (!live_outs[b]) { + return -1; + } + return ir_list_at(live_lists, live_outs[b]); +#endif +} + +/* Remember a virtual register alive at the end of the block */ +IR_ALWAYS_INLINE void ir_live_out_push(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b, uint32_t v) +{ +#if 0 + ir_block *bb = &ctx->cfg_blocks[b]; + live_outs[b] = v; + ir_add_prev_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), + IR_END_LIVE_POS_FROM_REF(bb->end)); +#else + if (live_lists->len >= live_lists->a.size) { + ir_array_grow(&live_lists->a, live_lists->a.size + 1024); + } + /* Form a linked list of virtual register live at the end of the block */ + ir_list_push_unchecked(live_lists, live_outs[b]); /* push old root of the list (previous element of the list) */ + live_outs[b] = ir_list_len(live_lists); /* remember the new root */ + ir_list_push_unchecked(live_lists, v); /* push a virtual register */ +#endif +} + +/* + * Computes live-out sets for each basic-block per variable using def-use chains. + * + * The implementation is based on algorithms 6 and 7 desriebed in + * "Computing Liveness Sets for SSA-Form Programs", Florian Brandner, Benoit Boissinot. + * Alain Darte, Benoit Dupont de Dinechin, Fabrice Rastello. TR Inria RR-7503, 2011 + */ +static void ir_compute_live_sets(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists) +{ + ir_list block_queue, fuse_queue; + ir_ref i; + + ir_list_init(&fuse_queue, 16); + ir_list_init(&block_queue, 256); + + /* For each virtual register explore paths from all uses to definition */ + for (i = ctx->insns_count - 1; i > 0; i--) { + uint32_t v = ctx->vregs[i]; + + if (v) { + uint32_t def_block = ctx->cfg_map[i]; + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref *p, n = use_list->count; + + /* Collect all blocks where 'v' is used into a 'block_queue' */ + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + ir_ref use = *p; + ir_insn *insn = &ctx->ir_base[use]; + + if (UNEXPECTED(insn->op == IR_PHI)) { + ir_ref n = insn->inputs_count - 1; + ir_ref *p = insn->ops + 2; /* PHI data inputs */ + ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ + + for (;n > 0; p++, q++, n--) { + if (*p == i) { + uint32_t pred_block = ctx->cfg_map[*q]; + + if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) { + ir_live_out_push(ctx, live_outs, live_lists, pred_block, v); + if (pred_block != def_block) { + ir_list_push(&block_queue, pred_block); + } + } + } + } + } else if (ctx->rules && UNEXPECTED(ctx->rules[use] & IR_FUSED)) { + while (1) { + ir_use_list *use_list = &ctx->use_lists[use]; + ir_ref *p, n = use_list->count; + + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + ir_ref use = *p; + + if (ctx->rules[use] & IR_FUSED) { + ir_list_push(&fuse_queue, use); + } else { + uint32_t use_block = ctx->cfg_map[use]; + + if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) { + ir_list_push(&block_queue, use_block); + } + } + } + if (!ir_list_len(&fuse_queue)) { + break; + } + use = ir_list_pop(&fuse_queue); + } + } else { + uint32_t use_block = ctx->cfg_map[use]; + + /* Check if the virtual register is alive at the start of 'use_block' */ + if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) { + ir_list_push(&block_queue, use_block); + } + } + } + + /* UP_AND_MARK: Traverse through predecessor blocks until we reach the block where 'v' is defined*/ + while (ir_list_len(&block_queue)) { + uint32_t b = ir_list_pop(&block_queue); + ir_block *bb = &ctx->cfg_blocks[b]; + uint32_t *p, n = bb->predecessors_count; + + if (bb->flags & IR_BB_ENTRY) { + /* live_in_push(ENTRY, v) */ + ir_insn *insn = &ctx->ir_base[bb->start]; + + IR_ASSERT(insn->op == IR_ENTRY); + IR_ASSERT(insn->op3 >= 0 && insn->op3 < (ir_ref)ctx->entries_count); + if (live_lists->len >= live_lists->a.size) { + ir_array_grow(&live_lists->a, live_lists->a.size + 1024); + } + ir_list_push_unchecked(live_lists, live_outs[ctx->cfg_blocks_count + 1 + insn->op3]); + ir_list_push_unchecked(live_lists, v); + live_outs[ctx->cfg_blocks_count + 1 + insn->op3] = ir_list_len(live_lists) - 1; + continue; + } + for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) { + uint32_t pred_block = *p; + + /* Check if 'pred_block' wasn't traversed before */ + if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) { + /* Mark a virtual register 'v' alive at the end of 'pred_block' */ + ir_live_out_push(ctx, live_outs, live_lists, pred_block, v); + if (pred_block != def_block) { + ir_list_push(&block_queue, pred_block); + } + } + } + } + } + } + + ir_list_free(&block_queue); + ir_list_free(&fuse_queue); +} + +static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, uint32_t pos, ir_list *live_lists, uint32_t b) +{ + bool ok = 1; + int count = 0; + ir_list *list = (ir_list*)ctx->osr_entry_loads; + ir_ref i; + + while (pos) { + i = ir_list_at(live_lists, pos); + pos = ir_list_at(live_lists, pos - 1); + + /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */ + ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos; + ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos); + + if (use_pos->op_num) { + ir_ref *ops = ctx->ir_base[ref].ops; + ref = ops[use_pos->op_num]; + } + + if (ctx->ir_base[ref].op == IR_PARAM) { + continue; + } + if (ctx->binding) { + ir_ref var = ir_binding_find(ctx, ref); + if (var < 0) { + /* We may load the value at OSR entry-point */ + if (!count) { + bb->flags &= ~IR_BB_EMPTY; + bb->flags |= IR_BB_OSR_ENTRY_LOADS; + if (!ctx->osr_entry_loads) { + list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list)); + ir_list_init(list, 16); + } + ir_list_push(list, b); + ir_list_push(list, 0); + } + ir_list_push(list, ref); + count++; + continue; + } + } + fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref); + ok = 0; + } + + if (!ok) { + IR_ASSERT(0); + } + if (count) { + ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count); + +#if 0 + /* ENTRY "clobbers" all registers */ + ir_ref ref = ctx->ir_base[bb->start].op1; + ir_add_fixed_live_range(ctx, IR_REG_ALL, + IR_DEF_LIVE_POS_FROM_REF(ref), + IR_SAVE_LIVE_POS_FROM_REF(ref)); +#endif + } +} + +static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, uint32_t *live_in_block, uint32_t b) +{ + ir_ref stack[4]; + int stack_pos = 0; + ir_target_constraints constraints; + ir_insn *insn; + uint32_t j, n, flags, def_flags; + ir_ref *p, child; + uint8_t use_flags; + ir_reg reg; + ir_live_pos pos = IR_START_LIVE_POS_FROM_REF(ref); + ir_live_pos use_pos; + ir_live_interval *ival; + + while (1) { + IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED); + + if (!(ctx->rules[input] & IR_SIMPLE)) { + def_flags = ir_get_target_constraints(ctx, input, &constraints); + n = constraints.tmps_count; + while (n > 0) { + n--; + if (constraints.tmp_regs[n].type) { + ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]); + } else { + /* CPU specific constraints */ + ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg, + pos + constraints.tmp_regs[n].start, + pos + constraints.tmp_regs[n].end); + } + } + } else { + def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + constraints.hints_count = 0; + } + + insn = &ctx->ir_base[input]; + flags = ir_op_flags[insn->op]; + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); + n = IR_INPUT_EDGES_COUNT(flags); + j = 1; + p = insn->ops + j; + if (flags & IR_OP_FLAG_CONTROL) { + j++; + p++; + } + for (; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + child = *p; + if (child > 0) { + uint32_t v = ctx->vregs[child]; + + if (v) { + reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + use_pos = pos; + if (EXPECTED(reg == IR_REG_NONE)) { + use_pos += IR_USE_SUB_REF; + } + + if (!IS_LIVE_IN_BLOCK(v, b)) { + /* live.add(opd) */ + SET_LIVE_IN_BLOCK(v, b); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j); + ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input); + } else if (ctx->rules[child] & IR_FUSED) { + IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos))); + stack[stack_pos++] = child; + } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2); + } + } + } + if (!stack_pos) { + break; + } + input = stack[--stack_pos]; + } +} + +int ir_compute_live_ranges(ir_ctx *ctx) +{ + uint32_t b, i, j, k, n, succ; + ir_ref ref; + ir_insn *insn; + ir_block *bb, *succ_bb; + uint32_t *live_outs; + uint32_t *live_in_block; + ir_list live_lists; + ir_live_interval *ival; + + if (!(ctx->flags & IR_LINEAR) || !ctx->vregs) { + return 0; + } + + if (ctx->rules) { + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + } + + /* Root of the list of IR_VARs */ + ctx->vars = IR_UNUSED; + + /* Compute Live Ranges */ + ctx->flags &= ~IR_LR_HAVE_DESSA_MOVES; + + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + + if (!ctx->arena) { + ctx->arena = ir_arena_create(16 * 1024); + } + + live_outs = ir_mem_calloc(ctx->cfg_blocks_count + 1 + ctx->entries_count, sizeof(uint32_t)); + ir_list_init(&live_lists, 1024); + ir_compute_live_sets(ctx, live_outs, &live_lists); + live_in_block = ir_mem_calloc(ctx->vregs_count + 1, sizeof(uint32_t)); + + /* for each basic block in reverse order */ + for (b = ctx->cfg_blocks_count; b > 0; b--) { + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + + /* For all virtual register alive at the end of the block */ + n = live_outs[b]; + while (n != 0) { + i = ir_list_at(&live_lists, n); + SET_LIVE_IN_BLOCK(i, b); + ir_add_prev_live_range(ctx, i, + IR_START_LIVE_POS_FROM_REF(bb->start), + IR_END_LIVE_POS_FROM_REF(bb->end)); + n = ir_list_at(&live_lists, n - 1); + } + + if (bb->successors_count == 1) { + /* for each phi function of the successor */ + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + if (succ_bb->flags & IR_BB_HAS_PHI) { + ir_use_list *use_list = &ctx->use_lists[succ_bb->start]; + ir_ref n, *p; + + k = ir_phi_input_number(ctx, succ_bb, b); + IR_ASSERT(k != 0); + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + ir_ref use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + ir_ref input = ir_insn_op(insn, k); + if (input > 0) { + uint32_t v = ctx->vregs[input]; + + IR_ASSERT(v); + ival = ctx->live_intervals[v]; + ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use); + } + } + } + } + } + + /* for each operation of the block in reverse order */ + ref = bb->end; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_END || insn->op == IR_LOOP_END) { + ref = ctx->prev_ref[ref]; + } + for (; ref > bb->start; ref = ctx->prev_ref[ref]) { + uint32_t def_flags; + uint32_t flags; + ir_ref *p; + ir_target_constraints constraints; + uint32_t v; + + if (ctx->rules) { + int n; + + if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) { + if (ctx->rules[ref] == (IR_SKIPPED|IR_VAR) && ctx->use_lists[ref].count > 0) { + insn = &ctx->ir_base[ref]; + insn->op3 = ctx->vars; + ctx->vars = ref; + } + continue; + } + + def_flags = ir_get_target_constraints(ctx, ref, &constraints); + n = constraints.tmps_count; + while (n > 0) { + n--; + if (constraints.tmp_regs[n].type) { + ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]); + } else { + /* CPU specific constraints */ + ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end); + } + } + } else { + def_flags = 0; + constraints.def_reg = IR_REG_NONE; + constraints.hints_count = 0; + } + + insn = &ctx->ir_base[ref]; + v = ctx->vregs[ref]; + if (v) { + if (insn->op != IR_PHI) { + ir_live_pos def_pos; + ir_ref hint_ref = 0; + ir_reg reg = constraints.def_reg; + + if (reg != IR_REG_NONE) { + def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref); + if (insn->op == IR_PARAM || insn->op == IR_RLOAD) { + /* parameter register must be kept before it's copied */ + ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + } + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) { + hint_ref = insn->op1; + } + if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + def_pos = IR_USE_LIVE_POS_FROM_REF(ref); + } else { + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } + } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else { + if (insn->op == IR_PARAM) { + /* We may reuse parameter stack slot for spilling */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else if (insn->op == IR_VLOAD) { + /* Load may be fused into the usage instruction */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; + } + def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); + } + /* intervals[opd].setFrom(op.id) */ + ival = ir_fix_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + ival->type = insn->type; + ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref); + } else { + /* PHIs inputs must not be processed */ + ival = ctx->live_intervals[v]; + if (UNEXPECTED(!ival)) { + /* Dead PHI */ + ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref)); + } + ival->type = insn->type; + ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); + continue; + } + } + + IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)))); + flags = ir_op_flags[insn->op]; + j = 1; + p = insn->ops + 1; + if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) { + j++; + p++; + } + for (; j <= insn->inputs_count; j++, p++) { + ir_ref input = *p; + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + ir_live_pos use_pos; + ir_ref hint_ref = 0; + uint32_t v; + + if (input > 0) { + v = ctx->vregs[input]; + if (v) { + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); + if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (j == 1) { + if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); + } else { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } + IR_ASSERT(ctx->vregs[ref]); + hint_ref = ref; + } else if (input == insn->op1) { + /* Input is the same as "op1" */ + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } + } + if (!IS_LIVE_IN_BLOCK(v, b)) { + /* live.add(opd) */ + SET_LIVE_IN_BLOCK(v, b); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref); + } else if (ctx->rules) { + if (ctx->rules[input] & IR_FUSED) { + ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b); + } else { + if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + } + if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } + } + } + } else if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } + } + } + } + + if (ctx->entries) { + for (i = 0; i < ctx->entries_count; i++) { + b = ctx->entries[i]; + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(bb->predecessors_count == 1); + ir_add_osr_entry_loads(ctx, bb, live_outs[ctx->cfg_blocks_count + 1 + i], &live_lists, b); + } + if (ctx->osr_entry_loads) { + ir_list_push((ir_list*)ctx->osr_entry_loads, 0); + } + } + + ir_list_free(&live_lists); + ir_mem_free(live_outs); + ir_mem_free(live_in_block); + + return 1; +} + +#endif + +/* Live Ranges coalescing */ + +static ir_live_pos ir_ivals_overlap(ir_live_range *lrg1, ir_live_range *lrg2) +{ + while (1) { + if (lrg2->start < lrg1->end) { + if (lrg1->start < lrg2->end) { + return IR_MAX(lrg1->start, lrg2->start); + } else { + lrg2 = lrg2->next; + if (!lrg2) { + return 0; + } + } + } else { + lrg1 = lrg1->next; + if (!lrg1) { + return 0; + } + } + } +} + +static ir_live_pos ir_vregs_overlap(ir_ctx *ctx, uint32_t r1, uint32_t r2) +{ + ir_live_interval *ival1 = ctx->live_intervals[r1]; + ir_live_interval *ival2 = ctx->live_intervals[r2]; + +#if 0 + if (ival2->range.start >= ival1->end + || ival1->range.start >= ival2->end) { + return 0; + } +#endif + return ir_ivals_overlap(&ival1->range, &ival2->range); +} + +static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2) +{ + ir_live_interval *ival = ctx->live_intervals[r2]; + ir_live_range *live_range = &ival->range; + ir_live_range *next; + ir_use_pos *use_pos, *next_pos, **prev; + +#if 0 + fprintf(stderr, "COALESCE %d -> %d\n", r2, r1); +#endif + + ir_add_live_range(ctx, r1, live_range->start, live_range->end); + live_range = live_range->next; + while (live_range) { + next = live_range->next; + live_range->next = ctx->unused_ranges; + ctx->unused_ranges = live_range; + ir_add_live_range(ctx, r1, live_range->start, live_range->end); + live_range = next; + } + + /* merge sorted use_pos lists */ + prev = &ctx->live_intervals[r1]->use_pos; + use_pos = ival->use_pos; + while (use_pos) { + if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r1) { + use_pos->hint_ref = 0; + } + while (*prev && ((*prev)->pos < use_pos->pos || + ((*prev)->pos == use_pos->pos && + (use_pos->op_num == 0 || (*prev)->op_num < use_pos->op_num)))) { + if ((*prev)->hint_ref > 0 && ctx->vregs[(*prev)->hint_ref] == r2) { + (*prev)->hint_ref = 0; + } + prev = &(*prev)->next; + } + next_pos = use_pos->next; + use_pos->next = *prev; + *prev = use_pos; + prev = &use_pos->next; + use_pos = next_pos; + } + use_pos = *prev; + while (use_pos) { + if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r2) { + use_pos->hint_ref = 0; + } + use_pos = use_pos->next; + } + + ctx->live_intervals[r1]->flags |= + IR_LIVE_INTERVAL_COALESCED | (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)); + if (ctx->ir_base[IR_LIVE_POS_TO_REF(ctx->live_intervals[r1]->use_pos->pos)].op != IR_VLOAD) { + ctx->live_intervals[r1]->flags &= ~IR_LIVE_INTERVAL_MEM_LOAD; + } + ctx->live_intervals[r2] = NULL; + + // TODO: remember to reuse ??? + //ir_mem_free(ival); +} + +static bool ir_try_coalesce(ir_ctx *ctx, ir_ref from, ir_ref to) +{ + ir_ref i; + uint32_t v1 = ctx->vregs[from]; + uint32_t v2 = ctx->vregs[to]; + + if (v1 != v2 && !ir_vregs_overlap(ctx, v1, v2)) { + uint16_t f1 = ctx->live_intervals[v1]->flags; + uint16_t f2 = ctx->live_intervals[v2]->flags; + + if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) { + ir_vregs_join(ctx, v1, v2); + ctx->vregs[to] = v1; + } else if ((f2 & IR_LIVE_INTERVAL_COALESCED) && !(f1 & IR_LIVE_INTERVAL_COALESCED)) { + ir_vregs_join(ctx, v2, v1); + ctx->vregs[from] = v2; + } else if (from < to) { + ir_vregs_join(ctx, v1, v2); + if (f2 & IR_LIVE_INTERVAL_COALESCED) { + for (i = 0; i < ctx->insns_count; i++) { + if (ctx->vregs[i] == v2) { + ctx->vregs[i] = v1; + } + } + } else { + ctx->vregs[to] = v1; + } + } else { + ir_vregs_join(ctx, v2, v1); + if (f1 & IR_LIVE_INTERVAL_COALESCED) { + for (i = 0; i < ctx->insns_count; i++) { + if (ctx->vregs[i] == v1) { + ctx->vregs[i] = v2; + } + } + } else { + ctx->vregs[from] = v2; + } + } + return 1; + } + return 0; +} + +static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to) +{ + if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) { + ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY; + ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES; + ctx->flags |= IR_LR_HAVE_DESSA_MOVES; +#if 0 + fprintf(stderr, "BB%d: MOV %d -> %d\n", b, from, to); +#endif + } +} + +#if defined(_WIN32) || defined(__APPLE__) || defined(__FreeBSD__) +static int ir_block_cmp(void *data, const void *b1, const void *b2) +#else +static int ir_block_cmp(const void *b1, const void *b2, void *data) +#endif +{ + ir_ctx *ctx = data; + int d1 = ctx->cfg_blocks[*(ir_ref*)b1].loop_depth; + int d2 = ctx->cfg_blocks[*(ir_ref*)b2].loop_depth; + + if (d1 > d2) { + return -1; + } else if (d1 == d2) { + if (ctx->cfg_blocks[*(ir_ref*)b1].start < ctx->cfg_blocks[*(ir_ref*)b2].start) { + return -1; + } else { + return 1; + } + } else { + return 1; + } +} + +static void ir_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn) +{ + ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i); + ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i); + ir_live_interval *ival; + ir_live_range *r; + ir_use_pos *p, *p1 = NULL, *p2 = NULL; + ir_ref tmp; + + tmp = insn->op1; + insn->op1 = insn->op2; + insn->op2 = tmp; + + ival = ctx->live_intervals[ctx->vregs[insn->op1]]; + p = ival->use_pos; + while (p) { + if (p->pos == pos) { + p->pos = load_pos; + p->op_num = 1; + p1 = p; + break; + } + p = p->next; + } + + ival = ctx->live_intervals[ctx->vregs[i]]; + p = ival->use_pos; + while (p) { + if (p->pos == load_pos) { + p->hint_ref = insn->op1; + break; + } + p = p->next; + } + + if (insn->op2 > 0 && ctx->vregs[insn->op2]) { + ival = ctx->live_intervals[ctx->vregs[insn->op2]]; + r = &ival->range; + while (r) { + if (r->end == load_pos) { + r->end = pos; + if (!r->next) { + ival->end = pos; + } + break; + } + r = r->next; + } + p = ival->use_pos; + while (p) { + if (p->pos == load_pos) { + p->pos = pos; + p->op_num = 2; + p2 = p; + break; + } + p = p->next; + } + } + if (p1 && p2) { + uint8_t tmp = p1->flags; + p1->flags = p2->flags; + p2->flags = tmp; + } +} + +static int ir_hint_conflict(ir_ctx *ctx, ir_ref ref, int use, int def) +{ + ir_use_pos *p; + ir_reg r1 = IR_REG_NONE; + ir_reg r2 = IR_REG_NONE; + + p = ctx->live_intervals[use]->use_pos; + while (p) { + if (IR_LIVE_POS_TO_REF(p->pos) == ref) { + break; + } + if (p->hint != IR_REG_NONE) { + r1 = p->hint; + } + p = p->next; + } + + p = ctx->live_intervals[def]->use_pos; + while (p) { + if (IR_LIVE_POS_TO_REF(p->pos) > ref) { + if (p->hint != IR_REG_NONE) { + r2 = p->hint; + break; + } + } + p = p->next; + } + return r1 != r2 && r1 != IR_REG_NONE && r2 != IR_REG_NONE; +} + +static int ir_try_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn) +{ + if (ctx->vregs[insn->op1] + && ctx->vregs[insn->op1] != ctx->vregs[i] + && !ir_vregs_overlap(ctx, ctx->vregs[insn->op1], ctx->vregs[i]) + && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op1], ctx->vregs[i])) { + /* pass */ + } else { + if (ctx->vregs[insn->op2] && ctx->vregs[insn->op2] != ctx->vregs[i]) { + ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i); + ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i); + ir_live_interval *ival = ctx->live_intervals[ctx->vregs[insn->op2]]; + ir_live_range *r = &ival->range; + + if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) && ctx->use_lists[insn->op2].count == 1) { + return 0; + } + while (r) { + if (r->end == pos) { + r->end = load_pos; + if (!r->next) { + ival->end = load_pos; + } + if (!ir_vregs_overlap(ctx, ctx->vregs[insn->op2], ctx->vregs[i]) + && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op2], ctx->vregs[i])) { + ir_swap_operands(ctx, i, insn); + return 1; + } else { + r->end = pos; + if (!r->next) { + ival->end = pos; + } + } + break; + } + r = r->next; + } + } + } + return 0; +} + +int ir_coalesce(ir_ctx *ctx) +{ + uint32_t b, n, succ; + ir_ref *p, use, input, k, j; + ir_block *bb, *succ_bb; + ir_use_list *use_list; + ir_insn *insn; + ir_worklist blocks; + bool compact = 0; + + /* Collect a list of blocks which are predecossors to block with phi functions */ + ir_worklist_init(&blocks, ctx->cfg_blocks_count + 1); + for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (bb->flags & IR_BB_HAS_PHI) { + k = bb->predecessors_count; + use_list = &ctx->use_lists[bb->start]; + n = use_list->count; + IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count); + k++; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + for (j = 2; j <= k; j++) { + ir_worklist_push(&blocks, ctx->cfg_edges[bb->predecessors + (j-2)]); + } + } + } + } + } + +#ifdef _WIN32 +# define qsort_fn(base, num, width, func, data) qsort_s(base, num, width, func, data) +#elif defined(__APPLE__) || defined(__FreeBSD__) +# define qsort_fn(base, num, width, func, data) qsort_r(base, num, width, data, func) +#else +# define qsort_fn(base, num, width, func, data) qsort_r(base, num, width, func, data) +#endif + qsort_fn(blocks.l.a.refs, ir_worklist_len(&blocks), sizeof(ir_ref), ir_block_cmp, ctx); + + while (ir_worklist_len(&blocks)) { + uint32_t i; + + b = ir_worklist_pop(&blocks); + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(bb->successors_count == 1); + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + IR_ASSERT(succ_bb->predecessors_count > 1); + k = ir_phi_input_number(ctx, succ_bb, b); + use_list = &ctx->use_lists[succ_bb->start]; + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + input = ir_insn_op(insn, k); + if (input > 0) { + if (!ir_try_coalesce(ctx, input, use)) { + ir_add_phi_move(ctx, b, input, use); + } else { + compact = 1; + } + } else { + /* Move for constant input */ + ir_add_phi_move(ctx, b, input, use); + } + } + } + } + ir_worklist_free(&blocks); + + ir_hint_propagation(ctx); + + if (ctx->rules) { + /* try to swap operands of commutative instructions for better register allocation */ + for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { + ir_ref i; + + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + i = bb->end; + + /* skip last instruction */ + i = ctx->prev_ref[i]; + + while (i != bb->start) { + insn = &ctx->ir_base[i]; + if ((ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) + && ctx->vregs[i] + && ctx->live_intervals[ctx->vregs[i]]->use_pos + && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG) + && insn->op2 > 0 + && insn->op1 > 0 + && insn->op1 != insn->op2) { + ir_try_swap_operands(ctx, i, insn); + } + i = ctx->prev_ref[i]; + } + } + } + + if (compact) { + ir_ref i, n; + uint32_t *xlat = ir_mem_malloc((ctx->vregs_count + 1) * sizeof(uint32_t)); + + for (i = 1, n = 1; i <= ctx->vregs_count; i++) { + if (ctx->live_intervals[i]) { + xlat[i] = n; + if (i != n) { + ctx->live_intervals[n] = ctx->live_intervals[i]; + ctx->live_intervals[n]->vreg = n; + } + n++; + } + } + n--; + if (n != ctx->vregs_count) { + j = ctx->vregs_count - n; + /* vregs + tmp + fixed + SRATCH + ALL */ + for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) { + ctx->live_intervals[i] = ctx->live_intervals[i + j]; + if (ctx->live_intervals[i]) { + ctx->live_intervals[i]->vreg = i; + } + } + for (j = 1; j < ctx->insns_count; j++) { + if (ctx->vregs[j]) { + ctx->vregs[j] = xlat[ctx->vregs[j]]; + } + } + ctx->vregs_count = n; + } + ir_mem_free(xlat); + } + + return 1; +} + +/* SSA Deconstruction */ + +int ir_compute_dessa_moves(ir_ctx *ctx) +{ + uint32_t b, i, n; + ir_ref j, k, *p, use; + ir_block *bb; + ir_use_list *use_list; + ir_insn *insn; + + for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + k = bb->predecessors_count; + if (k > 1) { + use_list = &ctx->use_lists[bb->start]; + n = use_list->count; + if (n > 1) { + IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count); + k++; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + for (j = 2; j <= k; j++) { + if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) { + int pred = ctx->cfg_edges[bb->predecessors + (j-2)]; + ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY; + ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES; + ctx->flags |= IR_LR_HAVE_DESSA_MOVES; + } + } + } + } + } + } + } + return 1; +} + +int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy) +{ + uint32_t succ, k, n = 0; + ir_block *bb, *succ_bb; + ir_use_list *use_list; + ir_ref *loc, *pred, i, *p, ref, input; + ir_insn *insn; + uint32_t len; + ir_bitset todo, ready; + bool have_constants = 0; + + bb = &ctx->cfg_blocks[b]; + if (!(bb->flags & IR_BB_DESSA_MOVES)) { + return 0; + } + IR_ASSERT(bb->successors_count == 1); + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + IR_ASSERT(succ_bb->predecessors_count > 1); + use_list = &ctx->use_lists[succ_bb->start]; + + k = ir_phi_input_number(ctx, succ_bb, b); + + loc = ir_mem_malloc(ctx->insns_count * 2 * sizeof(ir_ref)); + pred = loc + ctx->insns_count; + len = ir_bitset_len(ctx->insns_count); + todo = ir_bitset_malloc(ctx->insns_count); + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + ref = *p; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_PHI) { + input = ir_insn_op(insn, k); + if (IR_IS_CONST_REF(input)) { + have_constants = 1; + } else if (ctx->vregs[input] != ctx->vregs[ref]) { + loc[ref] = pred[input] = 0; + ir_bitset_incl(todo, ref); + n++; + } + } + } + + if (n > 0) { + ready = ir_bitset_malloc(ctx->insns_count); + IR_BITSET_FOREACH(todo, len, ref) { + insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->op == IR_PHI); + input = ir_insn_op(insn, k); + loc[input] = input; + pred[ref] = input; + } IR_BITSET_FOREACH_END(); + + IR_BITSET_FOREACH(todo, len, i) { + if (!loc[i]) { + ir_bitset_incl(ready, i); + } + } IR_BITSET_FOREACH_END(); + + while (1) { + ir_ref a, b, c; + + while ((b = ir_bitset_pop_first(ready, len)) >= 0) { + a = pred[b]; + c = loc[a]; + emit_copy(ctx, ctx->ir_base[b].type, c, b); + ir_bitset_excl(todo, b); + loc[a] = b; + if (a == c && pred[a]) { + ir_bitset_incl(ready, a); + } + } + b = ir_bitset_pop_first(todo, len); + if (b < 0) { + break; + } + IR_ASSERT(b != loc[pred[b]]); + emit_copy(ctx, ctx->ir_base[b].type, b, 0); + loc[b] = 0; + ir_bitset_incl(ready, b); + } + + ir_mem_free(ready); + } + + ir_mem_free(todo); + ir_mem_free(loc); + + if (have_constants) { + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + ref = *p; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_PHI) { + input = ir_insn_op(insn, k); + if (IR_IS_CONST_REF(input)) { + emit_copy(ctx, insn->type, input, ref); + } + } + } + } + + return 1; +} + +/* Linear Scan Register Allocation */ + +#ifdef IR_DEBUG +# define IR_LOG_LSRA(action, ival, comment) do { \ + if (ctx->flags & IR_DEBUG_RA) { \ + ir_live_interval *_ival = (ival); \ + ir_live_pos _start = _ival->range.start; \ + ir_live_pos _end = _ival->end; \ + fprintf(stderr, action " R%d [%d.%d...%d.%d)" comment "\n", \ + (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \ + IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ + IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end)); \ + } \ + } while (0) +# define IR_LOG_LSRA_ASSIGN(action, ival, comment) do { \ + if (ctx->flags & IR_DEBUG_RA) { \ + ir_live_interval *_ival = (ival); \ + ir_live_pos _start = _ival->range.start; \ + ir_live_pos _end = _ival->end; \ + fprintf(stderr, action " R%d [%d.%d...%d.%d) to %s" comment "\n", \ + (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \ + IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ + IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \ + ir_reg_name(_ival->reg, _ival->type)); \ + } \ + } while (0) +# define IR_LOG_LSRA_SPLIT(ival, pos) do { \ + if (ctx->flags & IR_DEBUG_RA) { \ + ir_live_interval *_ival = (ival); \ + ir_live_pos _start = _ival->range.start; \ + ir_live_pos _end = _ival->end; \ + ir_live_pos _pos = (pos); \ + fprintf(stderr, " ---- Split R%d [%d.%d...%d.%d) at %d.%d\n", \ + (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \ + IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ + IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \ + IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \ + } \ + } while (0) +# define IR_LOG_LSRA_CONFLICT(action, ival, pos) do { \ + if (ctx->flags & IR_DEBUG_RA) { \ + ir_live_interval *_ival = (ival); \ + ir_live_pos _start = _ival->range.start; \ + ir_live_pos _end = _ival->end; \ + ir_live_pos _pos = (pos); \ + fprintf(stderr, action " R%d [%d.%d...%d.%d) assigned to %s at %d.%d\n", \ + (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \ + IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ + IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \ + ir_reg_name(_ival->reg, _ival->type), \ + IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \ + } \ + } while (0) +#else +# define IR_LOG_LSRA(action, ival, comment) +# define IR_LOG_LSRA_ASSIGN(action, ival, comment) +# define IR_LOG_LSRA_SPLIT(ival, pos) +# define IR_LOG_LSRA_CONFLICT(action, ival, pos); +#endif + +static bool ir_ival_covers(ir_live_interval *ival, ir_live_pos position) +{ + ir_live_range *live_range = &ival->range; + + do { + if (position < live_range->end) { + return position >= live_range->start; + } + live_range = live_range->next; + } while (live_range); + + return 0; +} + +static bool ir_ival_has_hole_between(ir_live_interval *ival, ir_live_pos from, ir_live_pos to) +{ + ir_live_range *r = &ival->range; + + while (r) { + if (from < r->start) { + return 1; + } else if (to <= r->end) { + return 0; + } + r = r->next; + } + return 0; +} + + +static ir_live_pos ir_last_use_pos_before(ir_live_interval *ival, ir_live_pos pos, uint8_t flags) +{ + ir_live_pos ret = 0; + ir_use_pos *p = ival->use_pos; + + while (p && p->pos <= pos) { + if (p->flags & flags) { + ret = p->pos; + } + p = p->next; + } + return ret; +} + +static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos pos, uint8_t flags) +{ + ir_use_pos *p = ival->use_pos; + + while (p && p->pos <= pos) { + p = p->next; + } + while (p && !(p->flags & flags)) { + p = p->next; + } + return p ? p->pos : 0x7fffffff; +} + +static ir_live_pos ir_first_use_pos(ir_live_interval *ival, uint8_t flags) +{ + ir_use_pos *p = ival->use_pos; + + while (p && !(p->flags & flags)) { + p = p->next; + } + return p ? p->pos : 0x7fffffff; +} + +static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos) +{ + ir_ref ref = IR_LIVE_POS_TO_REF(pos); + uint32_t b = ctx->cfg_map[ref]; + + while (!b) { + ref--; + IR_ASSERT(ref > 0); + b = ctx->cfg_map[ref]; + } + IR_ASSERT(b <= ctx->cfg_blocks_count); + return &ctx->cfg_blocks[b]; +} + +static ir_live_pos ir_find_optimal_split_position(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos min_pos, ir_live_pos max_pos, bool prefer_max) +{ + ir_block *min_bb, *max_bb; + + if (min_pos == max_pos) { + return max_pos; + } + + IR_ASSERT(min_pos < max_pos); + IR_ASSERT(min_pos >= ival->range.start); + IR_ASSERT(max_pos < ival->end); + + min_bb = ir_block_from_live_pos(ctx, min_pos); + max_bb = ir_block_from_live_pos(ctx, max_pos); + + if (min_bb == max_bb + || ir_ival_has_hole_between(ival, min_pos, max_pos)) { // TODO: ??? + return (prefer_max) ? max_pos : min_pos; + } + + if (max_bb->loop_depth > 0) { + /* Split at the end of the loop entry */ + do { + ir_block *bb; + + if (max_bb->flags & IR_BB_LOOP_HEADER) { + bb = max_bb; + } else { + IR_ASSERT(max_bb->loop_header); + bb = &ctx->cfg_blocks[max_bb->loop_header]; + } + bb = &ctx->cfg_blocks[bb->idom]; + if (IR_DEF_LIVE_POS_FROM_REF(bb->end) < min_pos) { + break; + } + max_bb = bb; + } while (max_bb->loop_depth > 0); + + if (IR_DEF_LIVE_POS_FROM_REF(max_bb->end) < max_pos) { + return IR_DEF_LIVE_POS_FROM_REF(max_bb->end); + } + } + + if (IR_LOAD_LIVE_POS_FROM_REF(max_bb->start) > min_pos) { + return IR_LOAD_LIVE_POS_FROM_REF(max_bb->start); + } else { + // TODO: "min_bb" is in a deeper loop than "max_bb" ??? + return max_pos; + } +} + +static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos pos) +{ + ir_live_interval *child; + ir_live_range *p, *prev; + ir_use_pos *use_pos, *prev_use_pos; + + IR_LOG_LSRA_SPLIT(ival, pos); + IR_ASSERT(pos > ival->range.start); + ctx->flags |= IR_RA_HAVE_SPLITS; + + p = &ival->range; + prev = NULL; + while (p && pos >= p->end) { + prev = p; + p = prev->next; + } + IR_ASSERT(p); + + if (pos < p->start) { + /* split between ranges */ + pos = p->start; + } + + use_pos = ival->use_pos; + prev_use_pos = NULL; + + ival->flags &= ~(IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS); + if (p->start == pos) { + while (use_pos && pos > use_pos->pos) { + if (use_pos->hint != IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + if (use_pos->hint_ref > 0) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS; + } + prev_use_pos = use_pos; + use_pos = use_pos->next; + } + } else { + while (use_pos && pos >= use_pos->pos) { + if (use_pos->hint != IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + if (use_pos->hint_ref > 0) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS; + } + prev_use_pos = use_pos; + use_pos = use_pos->next; + } + } + + child = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + child->type = ival->type; + child->reg = IR_REG_NONE; + child->flags = IR_LIVE_INTERVAL_SPLIT_CHILD; + child->vreg = ival->vreg; + child->stack_spill_pos = -1; // not allocated + child->range.start = pos; + child->range.end = p->end; + child->range.next = p->next; + child->end = ival->end; + child->use_pos = prev_use_pos ? prev_use_pos->next : use_pos; + + child->next = ival->next; + ival->next = child; + + if (pos == p->start) { + prev->next = NULL; + ival->end = prev->end; + /* Cache to reuse */ + p->next = ctx->unused_ranges; + ctx->unused_ranges = p; + } else { + p->end = ival->end = pos; + p->next = NULL; + } + if (prev_use_pos) { + prev_use_pos->next = NULL; + } else { + ival->use_pos = NULL; + } + + use_pos = child->use_pos; + while (use_pos) { + if (use_pos->hint != IR_REG_NONE) { + child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + if (use_pos->hint_ref > 0) { + child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS; + } + use_pos = use_pos->next; + } + + return child; +} + +int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data) +{ + int32_t ret; + uint8_t size = ir_type_size[type]; + + IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8); + if (data->handled && data->handled[size]) { + ret = data->handled[size]->stack_spill_pos; + data->handled[size] = data->handled[size]->list_next; + } else if (size == 8) { + ret = ctx->stack_frame_size; + ctx->stack_frame_size += 8; + } else if (size == 4) { + if (data->unused_slot_4) { + ret = data->unused_slot_4; + data->unused_slot_4 = 0; + } else if (data->handled && data->handled[8]) { + ret = data->handled[8]->stack_spill_pos; + data->handled[8] = data->handled[8]->list_next; + data->unused_slot_4 = ret + 4; + } else { + ret = ctx->stack_frame_size; + if (sizeof(void*) == 8) { + data->unused_slot_4 = ctx->stack_frame_size + 4; + ctx->stack_frame_size += 8; + } else { + ctx->stack_frame_size += 4; + } + } + } else if (size == 2) { + if (data->unused_slot_2) { + ret = data->unused_slot_2; + data->unused_slot_2 = 0; + } else if (data->unused_slot_4) { + ret = data->unused_slot_4; + data->unused_slot_2 = data->unused_slot_4 + 2; + data->unused_slot_4 = 0; + } else if (data->handled && data->handled[4]) { + ret = data->handled[4]->stack_spill_pos; + data->handled[4] = data->handled[4]->list_next; + data->unused_slot_2 = ret + 2; + } else if (data->handled && data->handled[8]) { + ret = data->handled[8]->stack_spill_pos; + data->handled[8] = data->handled[8]->list_next; + data->unused_slot_2 = ret + 2; + data->unused_slot_4 = ret + 4; + } else { + ret = ctx->stack_frame_size; + data->unused_slot_2 = ctx->stack_frame_size + 2; + if (sizeof(void*) == 8) { + data->unused_slot_4 = ctx->stack_frame_size + 4; + ctx->stack_frame_size += 8; + } else { + ctx->stack_frame_size += 4; + } + } + } else { + IR_ASSERT(size == 1); + if (data->unused_slot_1) { + ret = data->unused_slot_1; + data->unused_slot_1 = 0; + } else if (data->unused_slot_2) { + ret = data->unused_slot_2; + data->unused_slot_1 = data->unused_slot_2 + 1; + data->unused_slot_2 = 0; + } else if (data->unused_slot_4) { + ret = data->unused_slot_4; + data->unused_slot_1 = data->unused_slot_4 + 1; + data->unused_slot_2 = data->unused_slot_4 + 2; + data->unused_slot_4 = 0; + } else if (data->handled && data->handled[2]) { + ret = data->handled[2]->stack_spill_pos; + data->handled[2] = data->handled[2]->list_next; + data->unused_slot_1 = ret + 1; + } else if (data->handled && data->handled[4]) { + ret = data->handled[4]->stack_spill_pos; + data->handled[4] = data->handled[4]->list_next; + data->unused_slot_1 = ret + 1; + data->unused_slot_2 = ret + 2; + } else if (data->handled && data->handled[8]) { + ret = data->handled[8]->stack_spill_pos; + data->handled[8] = data->handled[8]->list_next; + data->unused_slot_1 = ret + 1; + data->unused_slot_2 = ret + 2; + data->unused_slot_4 = ret + 4; + } else { + ret = ctx->stack_frame_size; + data->unused_slot_1 = ctx->stack_frame_size + 1; + data->unused_slot_2 = ctx->stack_frame_size + 2; + if (sizeof(void*) == 8) { + data->unused_slot_4 = ctx->stack_frame_size + 4; + ctx->stack_frame_size += 8; + } else { + ctx->stack_frame_size += 4; + } + } + } + return ret; +} + +static ir_reg ir_get_first_reg_hint(ir_ctx *ctx, ir_live_interval *ival, ir_regset available) +{ + ir_use_pos *use_pos; + ir_reg reg; + + use_pos = ival->use_pos; + while (use_pos) { + reg = use_pos->hint; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + return reg; + } + use_pos = use_pos->next; + } + + return IR_REG_NONE; +} + +static ir_reg ir_try_allocate_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available, ir_live_pos *freeUntilPos) +{ + ir_use_pos *use_pos; + ir_reg reg; + + if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) { + use_pos = ival->use_pos; + while (use_pos) { + reg = use_pos->hint; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + if (ival->end <= freeUntilPos[reg]) { + /* register available for the whole interval */ + return reg; + } + } + use_pos = use_pos->next; + } + } + + if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REFS) { + use_pos = ival->use_pos; + while (use_pos) { + if (use_pos->hint_ref > 0) { + reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + if (ival->end <= freeUntilPos[reg]) { + /* register available for the whole interval */ + return reg; + } + } + } + use_pos = use_pos->next; + } + } + + return IR_REG_NONE; +} + +static ir_reg ir_get_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available) +{ + ir_use_pos *use_pos; + ir_reg reg; + + use_pos = ival->use_pos; + while (use_pos) { + reg = use_pos->hint; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + return reg; + } else if (use_pos->hint_ref > 0) { + reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + return reg; + } + } + use_pos = use_pos->next; + } + + return IR_REG_NONE; +} + +static void ir_add_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival) +{ + ir_live_pos pos = ival->range.start; + + if (*unhandled == NULL + || pos < (*unhandled)->range.start + || (pos == (*unhandled)->range.start + && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) + && !((*unhandled)->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))) + || (pos == (*unhandled)->range.start + && ival->vreg > (*unhandled)->vreg)) { + ival->list_next = *unhandled; + *unhandled = ival; + } else { + ir_live_interval *prev = *unhandled; + + while (prev->list_next) { + if (pos < prev->list_next->range.start + || (pos == prev->list_next->range.start + && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) + && !(prev->list_next->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))) + || (pos == prev->list_next->range.start + && ival->vreg > prev->list_next->vreg)) { + break; + } + prev = prev->list_next; + } + ival->list_next = prev->list_next; + prev->list_next = ival; + } +} + +/* merge sorted lists */ +static void ir_merge_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival) +{ + ir_live_interval **prev; + ir_live_pos pos; + + if (*unhandled == NULL) { + *unhandled = ival; + while (ival) { + ival = ival->list_next = ival->next; + } + } else { + prev = unhandled; + while (ival) { + pos = ival->range.start; + while (*prev && pos >= (*prev)->range.start) { + prev = &(*prev)->list_next; + } + ival->list_next = *prev; + *prev = ival; + prev = &ival->list_next; + ival = ival->next; + } + } +#if IR_DEBUG + ival = *unhandled; + pos = 0; + + while (ival) { + IR_ASSERT(ival->range.start >= pos); + pos = ival->range.start; + ival = ival->list_next; + } +#endif +} + +static void ir_add_to_unhandled_spill(ir_live_interval **unhandled, ir_live_interval *ival) +{ + ir_live_pos pos = ival->range.start; + + if (*unhandled == NULL + || pos <= (*unhandled)->range.start) { + ival->list_next = *unhandled; + *unhandled = ival; + } else { + ir_live_interval *prev = *unhandled; + + while (prev->list_next) { + if (pos <= prev->list_next->range.start) { + break; + } + prev = prev->list_next; + } + ival->list_next = prev->list_next; + prev->list_next = ival; + } +} + +static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval *inactive, ir_live_interval **unhandled) +{ + ir_live_pos freeUntilPos[IR_REG_NUM]; + int i, reg; + ir_live_pos pos, next; + ir_live_interval *other; + ir_regset available, overlapped, scratch; + + if (IR_IS_TYPE_FP(ival->type)) { + available = IR_REGSET_FP; + /* set freeUntilPos of all physical registers to maxInt */ + for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { + freeUntilPos[i] = 0x7fffffff; + } + } else { + available = IR_REGSET_GP; + if (ctx->flags & IR_USE_FRAME_POINTER) { + IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER); + } +#if defined(IR_TARGET_X86) + if (ir_type_size[ival->type] == 1) { + /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */ + IR_REGSET_EXCL(available, IR_REG_RBP); + IR_REGSET_EXCL(available, IR_REG_RSI); + IR_REGSET_EXCL(available, IR_REG_RDI); + } +#endif + /* set freeUntilPos of all physical registers to maxInt */ + for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { + freeUntilPos[i] = 0x7fffffff; + } + } + + available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset); + + /* for each interval it in active */ + other = *active; + while (other) { + /* freeUntilPos[it.reg] = 0 */ + reg = other->reg; + IR_ASSERT(reg >= 0); + if (reg >= IR_REG_SCRATCH) { + if (reg == IR_REG_SCRATCH) { + available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + available = IR_REGSET_EMPTY; + } + } else { + IR_REGSET_EXCL(available, reg); + } + other = other->list_next; + } + + /* for each interval it in inactive intersecting with current + * + * This loop is not necessary for program in SSA form (see LSRA on SSA fig. 6), + * but it is still necessary after coalescing and splitting + */ + overlapped = IR_REGSET_EMPTY; + other = inactive; + pos = ival->end; + while (other) { + /* freeUntilPos[it.reg] = next intersection of it with current */ + if (other->current_range->start < pos) { + next = ir_ivals_overlap(&ival->range, other->current_range); + if (next) { + reg = other->reg; + IR_ASSERT(reg >= 0); + if (reg >= IR_REG_SCRATCH) { + ir_regset regset; + + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } + overlapped = IR_REGSET_UNION(overlapped, regset); + IR_REGSET_FOREACH(regset, reg) { + if (next < freeUntilPos[reg]) { + freeUntilPos[reg] = next; + } + } IR_REGSET_FOREACH_END(); + } else if (IR_REGSET_IN(available, reg)) { + IR_REGSET_INCL(overlapped, reg); + if (next < freeUntilPos[reg]) { + freeUntilPos[reg] = next; + } + } + } + } + other = other->list_next; + } + + available = IR_REGSET_DIFFERENCE(available, overlapped); + if (available != IR_REGSET_EMPTY) { + + if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) { + /* Try to use hint */ + reg = ir_try_allocate_preferred_reg(ctx, ival, available, freeUntilPos); + if (reg != IR_REG_NONE) { + ival->reg = reg; + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (hint available without spilling)"); + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + return reg; + } + } + + if (ival->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) { + /* Try to reuse the register previously allocated for splited interval */ + reg = ctx->live_intervals[ival->vreg]->reg; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + ival->reg = reg; + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling)"); + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + return reg; + } + } + + /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */ + scratch = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + if (scratch != IR_REGSET_EMPTY) { + /* prefer registers that don't conflict with the hints for the following unhandled intervals */ + if (1) { + ir_regset non_conflicting = scratch; + + other = *unhandled; + while (other && other->range.start < ival->range.end) { + if (other->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) { + reg = ir_get_first_reg_hint(ctx, other, non_conflicting); + + if (reg >= 0) { + IR_REGSET_EXCL(non_conflicting, reg); + if (non_conflicting == IR_REGSET_EMPTY) { + break; + } + } + } + other = other->list_next; + } + if (non_conflicting != IR_REGSET_EMPTY) { + reg = IR_REGSET_FIRST(non_conflicting); + } else { + reg = IR_REGSET_FIRST(scratch); + } + } else { + reg = IR_REGSET_FIRST(scratch); + } + } else { + reg = IR_REGSET_FIRST(available); + } + ival->reg = reg; + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling)"); + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + return reg; + } + + /* reg = register with highest freeUntilPos */ + reg = IR_REG_NONE; + pos = 0; + IR_REGSET_FOREACH(overlapped, i) { + if (freeUntilPos[i] > pos) { + pos = freeUntilPos[i]; + reg = i; + } else if (freeUntilPos[i] == pos + && !IR_REGSET_IN(IR_REGSET_SCRATCH, reg) + && IR_REGSET_IN(IR_REGSET_SCRATCH, i)) { + /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */ + pos = freeUntilPos[i]; + reg = i; + } + } IR_REGSET_FOREACH_END(); + + if (pos > ival->range.start) { + /* register available for the first part of the interval */ + /* split current before freeUntilPos[reg] */ + ir_live_pos split_pos = ir_last_use_pos_before(ival, pos, + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (split_pos > ival->range.start) { + split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, pos, 0); + other = ir_split_interval_at(ctx, ival, split_pos); + if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) { + ir_reg pref_reg = ir_try_allocate_preferred_reg(ctx, ival, IR_REGSET_UNION(available, overlapped), freeUntilPos); + + if (pref_reg != IR_REG_NONE) { + ival->reg = pref_reg; + } else { + ival->reg = reg; + } + } else { + ival->reg = reg; + } + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling for the first part)"); + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + ir_add_to_unhandled(unhandled, other); + IR_LOG_LSRA(" ---- Queue", other, ""); + return reg; + } + } + return IR_REG_NONE; +} + +static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval **inactive, ir_live_interval **unhandled) +{ + ir_live_pos nextUsePos[IR_REG_NUM]; + ir_live_pos blockPos[IR_REG_NUM]; + int i, reg; + ir_live_pos pos, next_use_pos; + ir_live_interval *other, *prev; + ir_use_pos *use_pos; + ir_regset available, tmp_regset; + + if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) { + use_pos = ival->use_pos; + while (use_pos && !(use_pos->flags & IR_USE_MUST_BE_IN_REG)) { + use_pos = use_pos->next; + } + if (!use_pos) { + /* spill */ + IR_LOG_LSRA(" ---- Spill", ival, " (no use pos that must be in reg)"); + ctx->flags |= IR_RA_HAVE_SPILLS; + return IR_REG_NONE; + } + next_use_pos = use_pos->pos; + } else { + next_use_pos = ival->range.end; + } + + if (IR_IS_TYPE_FP(ival->type)) { + available = IR_REGSET_FP; + /* set nextUsePos of all physical registers to maxInt */ + for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { + nextUsePos[i] = 0x7fffffff; + blockPos[i] = 0x7fffffff; + } + } else { + available = IR_REGSET_GP; + if (ctx->flags & IR_USE_FRAME_POINTER) { + IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER); + } +#if defined(IR_TARGET_X86) + if (ir_type_size[ival->type] == 1) { + /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */ + IR_REGSET_EXCL(available, IR_REG_RBP); + IR_REGSET_EXCL(available, IR_REG_RSI); + IR_REGSET_EXCL(available, IR_REG_RDI); + } +#endif + /* set nextUsePos of all physical registers to maxInt */ + for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { + nextUsePos[i] = 0x7fffffff; + blockPos[i] = 0x7fffffff; + } + } + + available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset); + + if (IR_REGSET_IS_EMPTY(available)) { + fprintf(stderr, "LSRA Internal Error: No registers available. Allocation is not possible\n"); + IR_ASSERT(0); + exit(-1); + } + + /* for each interval it in active */ + other = *active; + while (other) { + /* nextUsePos[it.reg] = next use of it after start of current */ + reg = other->reg; + IR_ASSERT(reg >= 0); + if (reg >= IR_REG_SCRATCH) { + ir_regset regset; + + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } + IR_REGSET_FOREACH(regset, reg) { + blockPos[reg] = nextUsePos[reg] = 0; + } IR_REGSET_FOREACH_END(); + } else if (IR_REGSET_IN(available, reg)) { + if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) { + blockPos[reg] = nextUsePos[reg] = 0; + } else { + pos = ir_first_use_pos_after(other, ival->range.start, + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (pos < nextUsePos[reg]) { + nextUsePos[reg] = pos; + } + } + } + other = other->list_next; + } + + /* for each interval it in inactive intersecting with current */ + other = *inactive; + while (other) { + /* freeUntilPos[it.reg] = next intersection of it with current */ + reg = other->reg; + IR_ASSERT(reg >= 0); + if (reg >= IR_REG_SCRATCH) { + ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); + + if (overlap) { + ir_regset regset; + + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } + IR_REGSET_FOREACH(regset, reg) { + if (overlap < nextUsePos[reg]) { + nextUsePos[reg] = overlap; + } + if (overlap < blockPos[reg]) { + blockPos[reg] = overlap; + } + } IR_REGSET_FOREACH_END(); + } + } else if (IR_REGSET_IN(available, reg)) { + ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); + + if (overlap) { + if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) { + if (overlap < nextUsePos[reg]) { + nextUsePos[reg] = overlap; + } + if (overlap < blockPos[reg]) { + blockPos[reg] = overlap; + } + } else { + pos = ir_first_use_pos_after(other, ival->range.start, + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (pos < nextUsePos[reg]) { + nextUsePos[reg] = pos; + } + } + } + } + other = other->list_next; + } + + /* register hinting */ + reg = IR_REG_NONE; + if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) { + reg = ir_get_preferred_reg(ctx, ival, available); + } + if (reg == IR_REG_NONE) { +select_register: + reg = IR_REGSET_FIRST(available); + } + + /* reg = register with highest nextUsePos */ + pos = nextUsePos[reg]; + tmp_regset = available; + IR_REGSET_EXCL(tmp_regset, reg); + IR_REGSET_FOREACH(tmp_regset, i) { + if (nextUsePos[i] > pos) { + pos = nextUsePos[i]; + reg = i; + } + } IR_REGSET_FOREACH_END(); + + /* if first usage of current is after nextUsePos[reg] then */ + if (next_use_pos > pos && !(ival->flags & IR_LIVE_INTERVAL_TEMP)) { + /* all other intervals are used before current, so it is best to spill current itself */ + /* assign spill slot to current */ + /* split current before its first use position that requires a register */ + ir_live_pos split_pos; + +spill_current: + if (next_use_pos == ival->range.start) { + IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0); + /* split right after definition */ + split_pos = next_use_pos + 1; + } else { + split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1); + } + + if (split_pos > ival->range.start) { + IR_LOG_LSRA(" ---- Conflict with others", ival, " (all others are used before)"); + other = ir_split_interval_at(ctx, ival, split_pos); + IR_LOG_LSRA(" ---- Spill", ival, ""); + ir_add_to_unhandled(unhandled, other); + IR_LOG_LSRA(" ---- Queue", other, ""); + return IR_REG_NONE; + } + } + + if (ival->end > blockPos[reg]) { + /* spilling make a register free only for the first part of current */ + IR_LOG_LSRA(" ---- Conflict with others", ival, " (spilling make a register free only for the first part)"); + /* split current at optimal position before block_pos[reg] */ + ir_live_pos split_pos = ir_last_use_pos_before(ival, blockPos[reg] + 1, + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (split_pos == 0) { + split_pos = ir_first_use_pos_after(ival, blockPos[reg], + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; + other = ir_split_interval_at(ctx, ival, split_pos); + ir_add_to_unhandled(unhandled, other); + IR_LOG_LSRA(" ---- Queue", other, ""); + return IR_REG_NONE; + } + if (split_pos >= blockPos[reg]) { +try_next_available_register: + IR_REGSET_EXCL(available, reg); + if (IR_REGSET_IS_EMPTY(available)) { + fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n"); + IR_ASSERT(0); + exit(-1); + } + IR_LOG_LSRA(" ---- Restart", ival, ""); + goto select_register; + } + split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, blockPos[reg], 1); + other = ir_split_interval_at(ctx, ival, split_pos); + ir_add_to_unhandled(unhandled, other); + IR_LOG_LSRA(" ---- Queue", other, ""); + } + + /* spill intervals that currently block reg */ + prev = NULL; + other = *active; + while (other) { + ir_live_pos split_pos; + + if (reg == other->reg) { + /* split active interval for reg at position */ + ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); + + if (overlap) { + ir_live_interval *child, *child2; + + IR_ASSERT(other->type != IR_VOID); + IR_LOG_LSRA_CONFLICT(" ---- Conflict with active", other, overlap); + + split_pos = ir_last_use_pos_before(other, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (split_pos == 0) { + split_pos = ival->range.start; + } + split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->range.start, 1); + if (split_pos > other->range.start) { + child = ir_split_interval_at(ctx, other, split_pos); + if (prev) { + prev->list_next = other->list_next; + } else { + *active = other->list_next; + } + IR_LOG_LSRA(" ---- Finish", other, ""); + } else { + if (ir_first_use_pos(other, IR_USE_MUST_BE_IN_REG) <= other->end) { + if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) { + next_use_pos = ir_first_use_pos(ival, IR_USE_MUST_BE_IN_REG); + if (next_use_pos == ival->range.start) { + IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0); + /* split right after definition */ + split_pos = next_use_pos + 1; + } else { + split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1); + } + + if (split_pos > ival->range.start) { + goto spill_current; + } + } + goto try_next_available_register; + } + child = other; + other->reg = IR_REG_NONE; + if (prev) { + prev->list_next = other->list_next; + } else { + *active = other->list_next; + } + IR_LOG_LSRA(" ---- Spill and Finish", other, " (it must not be in reg)"); + } + + split_pos = ir_first_use_pos_after(child, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; // TODO: ??? + if (split_pos > child->range.start && split_pos < child->end) { + ir_live_pos opt_split_pos = ir_find_optimal_split_position(ctx, child, ival->range.start, split_pos, 1); + if (opt_split_pos > child->range.start) { + split_pos = opt_split_pos; + } + child2 = ir_split_interval_at(ctx, child, split_pos); + IR_LOG_LSRA(" ---- Spill", child, ""); + ir_add_to_unhandled(unhandled, child2); + IR_LOG_LSRA(" ---- Queue", child2, ""); + } else if (child != other) { + // TODO: this may cause endless loop + ir_add_to_unhandled(unhandled, child); + IR_LOG_LSRA(" ---- Queue", child, ""); + } + } + break; + } + prev = other; + other = other->list_next; + } + + /* split any inactive interval for reg at the end of its lifetime hole */ + other = *inactive; + prev = NULL; + while (other) { + /* freeUntilPos[it.reg] = next intersection of it with current */ + if (reg == other->reg) { + ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); + + if (overlap) { + ir_live_interval *child; + + IR_ASSERT(other->type != IR_VOID); + IR_LOG_LSRA_CONFLICT(" ---- Conflict with inactive", other, overlap); + // TODO: optimal split position (this case is not tested) + child = ir_split_interval_at(ctx, other, overlap); + if (prev) { + prev->list_next = other = other->list_next; + } else { + *inactive = other = other->list_next; + } + ir_add_to_unhandled(unhandled, child); + IR_LOG_LSRA(" ---- Queue", child, ""); + continue; + } + } + prev = other; + other = other->list_next; + } + + /* current.reg = reg */ + ival->reg = reg; + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (after splitting others)"); + + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + return reg; +} + +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +{ + ir_block *bb = ctx->data; + ir_tmp_reg tmp_reg; + + if (to == 0) { + if (IR_IS_TYPE_INT(type)) { + tmp_reg.num = 0; + tmp_reg.type = type; + tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.end = IR_SAVE_SUB_REF; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + tmp_reg.num = 1; + tmp_reg.type = type; + tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.end = IR_SAVE_SUB_REF; + } + } else if (from != 0) { + if (IR_IS_TYPE_INT(type)) { + tmp_reg.num = 0; + tmp_reg.type = type; + tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.end = IR_SAVE_SUB_REF; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + tmp_reg.num = 1; + tmp_reg.type = type; + tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.end = IR_SAVE_SUB_REF; + } + } else { + return 1; + } + if (!ir_has_tmp(ctx, bb->end, tmp_reg.num)) { + ir_add_tmp(ctx, bb->end, bb->end, tmp_reg.num, tmp_reg); + } + return 1; +} + +static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data) +{ + ir_use_pos *use_pos = ival->use_pos; + ir_insn *insn; + + if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) { + IR_ASSERT(!ival->next && use_pos && use_pos->op_num == 0); + insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)]; + IR_ASSERT(insn->op == IR_PARAM); + use_pos = use_pos->next; + if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) { + return 0; + } + + if (use_pos) { + ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos); + if (bb->loop_depth) { + return 0; + } + } + + return 1; + } else if (ival->flags & IR_LIVE_INTERVAL_MEM_LOAD) { + insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)]; + IR_ASSERT(insn->op == IR_VLOAD); + use_pos = use_pos->next; + if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) { + return 0; + } + + if (use_pos) { + ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos); + if (bb->loop_depth && bb != ir_block_from_live_pos(ctx, ival->use_pos->pos)) { + return 0; + } + } + + IR_ASSERT(ctx->ir_base[insn->op2].op == IR_VAR); + ival->stack_spill_pos = ctx->ir_base[insn->op2].op3; + + return 1; + } + return 0; +} + +static void ir_assign_bound_spill_slots(ir_ctx *ctx) +{ + ir_hashtab_bucket *b = ctx->binding->data; + uint32_t n = ctx->binding->count; + uint32_t v; + ir_live_interval *ival; + + while (n > 0) { + v = ctx->vregs[b->key]; + if (v) { + ival = ctx->live_intervals[v]; + if (ival + && ival->stack_spill_pos == -1 + && (ival->next || ival->reg == IR_REG_NONE)) { + IR_ASSERT(b->val < 0); + /* special spill slot */ + ival->stack_spill_pos = -b->val; + ival->flags |= IR_LIVE_INTERVAL_SPILLED | IR_LIVE_INTERVAL_SPILL_SPECIAL; + } + } + b++; + n--; + } +} + +static int ir_linear_scan(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + ir_live_interval *unhandled = NULL; + ir_live_interval *active = NULL; + ir_live_interval *inactive = NULL; + ir_live_interval *ival, *other, *prev; + int j; + ir_live_pos position; + ir_reg reg; + ir_reg_alloc_data data; + ir_ref vars = ctx->vars; + + if (!ctx->live_intervals) { + return 0; + } + + if (ctx->flags & IR_LR_HAVE_DESSA_MOVES) { + /* Add fixed intervals for temporary registers used for DESSA moves */ + for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (bb->flags & IR_BB_DESSA_MOVES) { + ctx->data = bb; + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + } + } + } + + ctx->data = &data; + ctx->stack_frame_size = 0; + data.unused_slot_4 = 0; + data.unused_slot_2 = 0; + data.unused_slot_1 = 0; + data.handled = NULL; + + while (vars) { + ir_insn *insn = &ctx->ir_base[vars]; + + IR_ASSERT(insn->op == IR_VAR); + vars = insn->op3; /* list next */ + + insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data); + } + + for (j = ctx->vregs_count; j != 0; j--) { + ival = ctx->live_intervals[j]; + if (ival) { + if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD)) + || !ir_ival_spill_for_fuse_load(ctx, ival, &data)) { + ir_add_to_unhandled(&unhandled, ival); + } + } + } + + ival = ctx->live_intervals[0]; + if (ival) { + ir_merge_to_unhandled(&unhandled, ival); + } + + /* vregs + tmp + fixed + SRATCH + ALL */ + for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) { + ival = ctx->live_intervals[j]; + if (ival) { + ival->current_range = &ival->range; + ival->list_next = inactive; + inactive = ival; + } + } + + ctx->flags &= ~(IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS); + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_RA) { + fprintf(stderr, "----\n"); + ir_dump_live_ranges(ctx, stderr); + fprintf(stderr, "---- Start LSRA\n"); + } +#endif + + while (unhandled) { + ival = unhandled; + ival->current_range = &ival->range; + unhandled = ival->list_next; + position = ival->range.start; + + IR_LOG_LSRA(" ---- Processing", ival, "..."); + + /* for each interval i in active */ + other = active; + prev = NULL; + while (other) { + ir_live_range *r = other->current_range; + + IR_ASSERT(r); + if (r->end <= position) { + do { + r = r->next; + } while (r && r->end <= position); + if (!r) { + /* move i from active to handled */ + other = other->list_next; + if (prev) { + prev->list_next = other; + } else { + active = other; + } + continue; + } + other->current_range = r; + } + if (position < r->start) { + /* move i from active to inactive */ + if (prev) { + prev->list_next = other->list_next; + } else { + active = other->list_next; + } + other->list_next = inactive; + inactive = other; + } else { + prev = other; + } + other = prev ? prev->list_next : active; + } + + /* for each interval i in inactive */ + other = inactive; + prev = NULL; + while (other) { + ir_live_range *r = other->current_range; + + IR_ASSERT(r); + if (r->end <= position) { + do { + r = r->next; + } while (r && r->end <= position); + if (!r) { + /* move i from inactive to handled */ + other = other->list_next; + if (prev) { + prev->list_next = other; + } else { + inactive = other; + } + continue; + } + other->current_range = r; + } + if (position >= r->start) { + /* move i from inactive to active */ + if (prev) { + prev->list_next = other->list_next; + } else { + inactive = other->list_next; + } + other->list_next = active; + active = other; + } else { + prev = other; + } + other = prev ? prev->list_next : inactive; + } + + reg = ir_try_allocate_free_reg(ctx, ival, &active, inactive, &unhandled); + if (reg == IR_REG_NONE) { + reg = ir_allocate_blocked_reg(ctx, ival, &active, &inactive, &unhandled); + } + } + +#if 0 //def IR_DEBUG + /* all intervals must be processed */ + ival = active; + while (ival) { + IR_ASSERT(!ival->next); + ival = ival->list_next; + } + ival = inactive; + while (ival) { + IR_ASSERT(!ival->next); + ival = ival->list_next; + } +#endif + + if (ctx->flags & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS)) { + + if (ctx->binding) { + ir_assign_bound_spill_slots(ctx); + } + + /* Use simple linear-scan (without holes) to allocate and reuse spill slots */ + unhandled = NULL; + for (j = ctx->vregs_count; j != 0; j--) { + ival = ctx->live_intervals[j]; + if (ival + && (ival->next || ival->reg == IR_REG_NONE) + && ival->stack_spill_pos == -1) { + ival->flags |= IR_LIVE_INTERVAL_SPILLED; + if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) { + ir_live_range *r; + + other = ival; + while (other->next) { + other = other->next; + } + r = &other->range; + while (r->next) { + r = r->next; + } + ival->end = r->end; + ir_add_to_unhandled_spill(&unhandled, ival); + } + } + } + + if (unhandled) { + uint8_t size; + ir_live_interval *handled[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; + ir_live_interval *old; + + data.handled = handled; + active = NULL; + while (unhandled) { + ival = unhandled; + ival->current_range = &ival->range; + unhandled = ival->list_next; + position = ival->range.start; + + /* for each interval i in active */ + other = active; + prev = NULL; + while (other) { + if (other->end <= position) { + /* move i from active to handled */ + if (prev) { + prev->list_next = other->list_next; + } else { + active = other->list_next; + } + size = ir_type_size[other->type]; + IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8); + old = handled[size]; + while (old) { + if (old->stack_spill_pos == other->stack_spill_pos) { + break; + } + old = old->list_next; + } + if (!old) { + other->list_next = handled[size]; + handled[size] = other; + } + } else { + prev = other; + } + other = prev ? prev->list_next : active; + } + + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data); + if (unhandled && ival->end > unhandled->range.start) { + ival->list_next = active; + active = ival; + } else { + size = ir_type_size[ival->type]; + IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8); + old = handled[size]; + while (old) { + if (old->stack_spill_pos == ival->stack_spill_pos) { + break; + } + old = old->list_next; + } + if (!old) { + ival->list_next = handled[size]; + handled[size] = ival; + } + } + } + data.handled = NULL; + } + } + +#ifdef IR_TARGET_X86 + if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data); + } else { + ctx->ret_slot = -1; + } +#endif + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_RA) { + fprintf(stderr, "---- Finish LSRA\n"); + ir_dump_live_ranges(ctx, stderr); + fprintf(stderr, "----\n"); + } +#endif + + return 1; +} + +static bool needs_spill_reload(ir_ctx *ctx, ir_live_interval *ival, uint32_t b0, ir_bitset available) +{ + ir_worklist worklist; + ir_block *bb; + uint32_t b, *p, n; + + ir_worklist_init(&worklist, ctx->cfg_blocks_count + 1); + ir_worklist_push(&worklist, b0); + while (ir_worklist_len(&worklist) != 0) { + b = ir_worklist_pop(&worklist); + bb = &ctx->cfg_blocks[b]; + if (bb->flags & (IR_BB_ENTRY|IR_BB_START)) { + ir_worklist_free(&worklist); + return 1; + } + n = bb->predecessors_count; + for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) { + b = *p; + bb = &ctx->cfg_blocks[b]; + + if (!ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(bb->end))) { + ir_worklist_free(&worklist); + return 1; + } else if (!ir_bitset_in(available, b)) { + ir_worklist_push(&worklist, b); + } + } + } + ir_worklist_free(&worklist); + return 0; +} + +static bool needs_spill_load(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos) +{ + if (use_pos->next + && use_pos->op_num == 1 + && use_pos->next->pos == use_pos->pos + && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) { + /* Support for R2 = ADD(R1, R1) */ + use_pos = use_pos->next; + } + return use_pos->next && use_pos->next->op_num != 0; +} + +static void assign_regs(ir_ctx *ctx) +{ + ir_ref i; + ir_live_interval *ival, *top_ival; + ir_use_pos *use_pos; + int8_t reg, old_reg; + ir_ref ref; + ir_regset used_regs = 0; + + if (!ctx->regs) { + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + } + + if (!(ctx->flags & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS))) { + for (i = 1; i <= ctx->vregs_count; i++) { + ival = ctx->live_intervals[i]; + if (ival) { + do { + if (ival->reg != IR_REG_NONE) { + reg = ival->reg; + IR_REGSET_INCL(used_regs, reg); + use_pos = ival->use_pos; + while (use_pos) { + ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos); + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + use_pos = use_pos->next; + } + } + ival = ival->next; + } while (ival); + } + } + } else { + ir_bitset available = ir_bitset_malloc(ctx->cfg_blocks_count + 1); + + for (i = 1; i <= ctx->vregs_count; i++) { + top_ival = ival = ctx->live_intervals[i]; + if (ival) { + if (!(ival->flags & IR_LIVE_INTERVAL_SPILLED)) { + do { + if (ival->reg != IR_REG_NONE) { + IR_REGSET_INCL(used_regs, ival->reg); + use_pos = ival->use_pos; + while (use_pos) { + reg = ival->reg; + ref = IR_LIVE_POS_TO_REF(use_pos->pos); + if (use_pos->hint_ref < 0) { + ref = -use_pos->hint_ref; + } + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + + use_pos = use_pos->next; + } + } + ival = ival->next; + } while (ival); + } else { + do { + if (ival->reg != IR_REG_NONE) { + ir_ref prev_use_ref = IR_UNUSED; + + ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1)); + IR_REGSET_INCL(used_regs, ival->reg); + use_pos = ival->use_pos; + while (use_pos) { + reg = ival->reg; + ref = IR_LIVE_POS_TO_REF(use_pos->pos); + // TODO: Insert spill loads and stores in optimal positions (resolution) + if (use_pos->op_num == 0) { + if (ctx->ir_base[ref].op == IR_PHI) { + /* Spilled PHI var is passed through memory */ + reg = IR_REG_NONE; + } else { + uint32_t use_b = ctx->cfg_map[ref]; + + if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) { + ir_bitset_incl(available, use_b); + } + if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + reg |= IR_REG_SPILL_SPECIAL; + } else { + reg |= IR_REG_SPILL_STORE; + } + prev_use_ref = ref; + } + } else if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref]) + && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) { + if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG) + && use_pos->hint != reg +// && ctx->ir_base[ref].op != IR_CALL +// && ctx->ir_base[ref].op != IR_TAILCALL) { + && ctx->ir_base[ref].op != IR_SNAPSHOT + && !needs_spill_load(ctx, ival, use_pos)) { + /* fuse spill load (valid only when register is not reused) */ + reg = IR_REG_NONE; + if (use_pos->next + && use_pos->op_num == 1 + && use_pos->next->pos == use_pos->pos + && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) { + /* Support for R2 = BINOP(R1, R1) */ + if (use_pos->hint_ref < 0) { + ref = -use_pos->hint_ref; + } + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + use_pos = use_pos->next; + } + } else { + if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + reg |= IR_REG_SPILL_SPECIAL; + } else { + reg |= IR_REG_SPILL_LOAD; + } + if (ctx->ir_base[ref].op != IR_SNAPSHOT) { + uint32_t use_b = ctx->cfg_map[ref]; + + if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) { + ir_bitset_incl(available, use_b); + } + prev_use_ref = ref; + } + } + } else if (use_pos->flags & IR_PHI_USE) { + IR_ASSERT(use_pos->hint_ref < 0); + IR_ASSERT(ctx->vregs[-use_pos->hint_ref]); + IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]); + if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) { + /* Spilled PHI var is passed through memory */ + reg = IR_REG_NONE; + } + } else if (use_pos->hint_ref < 0 + && ctx->use_lists[-use_pos->hint_ref].count > 1 + && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE + && (old_reg & (IR_REG_SPILL_SPECIAL|IR_REG_SPILL_LOAD))) { + /* Force spill load */ + // TODO: Find a better solution ??? + if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + reg |= IR_REG_SPILL_SPECIAL; + } else { + reg |= IR_REG_SPILL_LOAD; + } + IR_ASSERT(reg == old_reg); + } else { + /* reuse register without spill load */ + } + if (use_pos->hint_ref < 0) { + ref = -use_pos->hint_ref; + } + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + + use_pos = use_pos->next; + } + } else if (!(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) { + use_pos = ival->use_pos; + while (use_pos) { + ref = IR_LIVE_POS_TO_REF(use_pos->pos); + if (ctx->ir_base[ref].op == IR_SNAPSHOT) { + IR_ASSERT(use_pos->hint_ref >= 0); + /* A reference to a CPU spill slot */ + reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER; + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + } + use_pos = use_pos->next; + } + } + ival = ival->next; + } while (ival); + } + } + } + ir_mem_free(available); + } + + /* Temporary registers */ + ival = ctx->live_intervals[0]; + if (ival) { + do { + IR_ASSERT(ival->reg != IR_REG_NONE); + IR_REGSET_INCL(used_regs, ival->reg); + ir_set_alocated_reg(ctx, ival->tmp_ref, ival->tmp_op_num, ival->reg); + ival = ival->next; + } while (ival); + } + + if (ctx->fixed_stack_frame_size != -1) { + ctx->used_preserved_regs = (ir_regset)ctx->fixed_save_regset; + if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED), + ctx->used_preserved_regs)) { + // TODO: Preserved reg and fixed frame conflict ??? + // IR_ASSERT(0 && "Preserved reg and fixed frame conflict"); + } + } else { + ctx->used_preserved_regs = IR_REGSET_UNION((ir_regset)ctx->fixed_save_regset, + IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED), + (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED)); + } + + ir_fix_stack_frame(ctx); +} + +int ir_reg_alloc(ir_ctx *ctx) +{ + if (ir_linear_scan(ctx)) { + assign_regs(ctx); + return 1; + } + return 0; +} diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c new file mode 100644 index 00000000000..d89ec6eebce --- /dev/null +++ b/ext/opcache/jit/ir/ir_save.c @@ -0,0 +1,128 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR saver) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +void ir_save(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n, ref, *p; + ir_insn *insn; + uint32_t flags; + bool first; + + fprintf(f, "{\n"); + for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { + fprintf(f, "\t%s c_%d = ", ir_type_cname[insn->type], i); + if (insn->op == IR_FUNC) { + if (!insn->const_flags) { + fprintf(f, "func(%s)", ir_get_str(ctx, insn->val.i32)); + } else { + fprintf(f, "func(%s, %d)", ir_get_str(ctx, insn->val.i32), insn->const_flags); + } + } else if (insn->op == IR_SYM) { + fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.i32)); + } else if (insn->op == IR_FUNC_ADDR) { + fprintf(f, "func_addr("); + ir_print_const(ctx, insn, f, true); + if (insn->const_flags) { + fprintf(f, ", %d", insn->const_flags); + } + fprintf(f, ")"); + } else { + ir_print_const(ctx, insn, f, true); + } + fprintf(f, ";\n"); + } + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_CONTROL) { + if (!(flags & IR_OP_FLAG_MEM) || insn->type == IR_VOID) { + fprintf(f, "\tl_%d = ", i); + } else { + fprintf(f, "\t%s d_%d, l_%d = ", ir_type_cname[insn->type], i, i); + } + } else { + fprintf(f, "\t"); + if (flags & IR_OP_FLAG_DATA) { + fprintf(f, "%s d_%d = ", ir_type_cname[insn->type], i); + } + } + fprintf(f, "%s", ir_op_name[insn->op]); + n = ir_operands_count(ctx, insn); + if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) && n != 2) { + fprintf(f, "/%d", n); + } else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL) && n != 2) { + fprintf(f, "/%d", n - 2); + } else if (insn->op == IR_PHI && n != 3) { + fprintf(f, "/%d", n - 1); + } else if (insn->op == IR_SNAPSHOT) { + fprintf(f, "/%d", n - 1); + } + first = 1; + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + uint32_t opnd_kind = IR_OPND_KIND(flags, j); + + ref = *p; + if (ref) { + switch (opnd_kind) { + case IR_OPND_DATA: + if (IR_IS_CONST_REF(ref)) { + fprintf(f, "%sc_%d", first ? "(" : ", ", -ref); + } else { + fprintf(f, "%sd_%d", first ? "(" : ", ", ref); + } + first = 0; + break; + case IR_OPND_CONTROL: + case IR_OPND_CONTROL_DEP: + case IR_OPND_CONTROL_REF: + fprintf(f, "%sl_%d", first ? "(" : ", ", ref); + first = 0; + break; + case IR_OPND_STR: + fprintf(f, "%s\"%s\"", first ? "(" : ", ", ir_get_str(ctx, ref)); + first = 0; + break; + case IR_OPND_PROB: + if (ref == 0) { + break; + } + IR_FALLTHROUGH; + case IR_OPND_NUM: + fprintf(f, "%s%d", first ? "(" : ", ", ref); + first = 0; + break; + } + } else if (opnd_kind == IR_OPND_NUM) { + fprintf(f, "%s%d", first ? "(" : ", ", ref); + first = 0; + } else if (IR_IS_REF_OPND_KIND(opnd_kind) && j != n) { + fprintf(f, "%snull", first ? "(" : ", "); + first = 0; + } + } + if (first) { + fprintf(f, ";"); + } else { + fprintf(f, ");"); + } + if (((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) && ctx->binding) { + ir_ref var = ir_binding_find(ctx, i); + if (var) { + IR_ASSERT(var < 0); + fprintf(f, " # BIND(0x%x);", -var); + } + } + fprintf(f, "\n"); + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + fprintf(f, "}\n"); +} diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c new file mode 100644 index 00000000000..6c0297f2b14 --- /dev/null +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -0,0 +1,885 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (SCCP - Sparse Conditional Constant Propagation) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication + * See: M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches" + * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991 + */ + +#include "ir.h" +#include "ir_private.h" + +#define IR_TOP IR_UNUSED +#define IR_BOTTOM IR_LAST_OP + +#define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0) +#define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0) + +#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].optx == IR_TOP) +#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].optx == IR_BOTTOM) +#define IR_IS_FEASIBLE(ref) (ref >= 0 && _values[ref].optx != IR_TOP) + +#define IR_COMBO_COPY_PROPAGATION 1 + +#if IR_COMBO_COPY_PROPAGATION +IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_insn *_values, ir_ref a) +{ + if (a > 0 && _values[a].op == IR_COPY) { + a = _values[a].op1; + IR_ASSERT(a <= 0 || _values[a].op != IR_COPY); + } + return a; +} +#endif + +static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; + +#if IR_COMBO_COPY_PROPAGATION + op1 = ir_sccp_identity(_values, op1); + op2 = ir_sccp_identity(_values, op2); + op3 = ir_sccp_identity(_values, op3); +#endif + +restart: + op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; + op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2; + op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3; + + switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { + case IR_FOLD_DO_RESTART: + opt = ctx->fold_insn.optx; + op1 = ctx->fold_insn.op1; + op2 = ctx->fold_insn.op2; + op3 = ctx->fold_insn.op3; + goto restart; + case IR_FOLD_DO_EMIT: + IR_MAKE_BOTTOM(res); + return 1; + case IR_FOLD_DO_COPY: + op1 = ctx->fold_insn.op1; +#if IR_COMBO_COPY_PROPAGATION + op1 = ir_sccp_identity(_values, op1); +#endif + insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; + if (IR_IS_CONST_OP(insn->op)) { + /* pass */ +#if IR_COMBO_COPY_PROPAGATION + } else if (IR_IS_TOP(res)) { + _values[res].optx = IR_OPT(IR_COPY, insn->type); + _values[res].op1 = op1; + return 1; + } else if (_values[res].op == IR_COPY && _values[res].op1 == op1) { + return 0; /* not changed */ +#endif + } else { + IR_MAKE_BOTTOM(res); + return 1; + } + break; + case IR_FOLD_DO_CONST: + insn = &ctx->fold_insn; + break; + default: + IR_ASSERT(0); + return 0; + } + + if (IR_IS_TOP(res)) { + _values[res].optx = IR_OPT(insn->type, insn->type); + _values[res].val.u64 = insn->val.u64; + return 1; + } else if (_values[res].opt != IR_OPT(insn->type, insn->type) || _values[res].val.u64 != insn->val.u64) { + IR_MAKE_BOTTOM(res); + return 1; + } + return 0; /* not changed */ +} + +static bool ir_sccp_join_values(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) +{ + ir_insn *v; + + if (!IR_IS_BOTTOM(a) && !IR_IS_TOP(b)) { + b = ir_sccp_identity(_values, b); + v = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b]; + if (IR_IS_TOP(a)) { +#if IR_COMBO_COPY_PROPAGATION + if (v->op == IR_BOTTOM) { + _values[a].optx = IR_OPT(IR_COPY, ctx->ir_base[b].type); + _values[a].op1 = b; + return 1; + } +#endif + _values[a].optx = v->opt; + _values[a].val.u64 = v->val.u64; + return 1; + } else if (_values[a].opt == v->opt && _values[a].val.u64 == v->val.u64) { + /* pass */ +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[a].op == IR_COPY && _values[a].op1 == b) { + /* pass */ +#endif + } else { + IR_MAKE_BOTTOM(a); + return 1; + } + } + return 0; +} + +static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a) +{ + ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; + + return ir_const_is_true(v); +} + +static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) +{ + ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; + ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b]; + + return v1->val.u64 == v2->val.u64; +} + +static void ir_sccp_remove_from_use_list(ir_ctx *ctx, ir_ref from, ir_ref ref) +{ + ir_ref j, n, *p, *q, use; + ir_use_list *use_list = &ctx->use_lists[from]; + ir_ref skip = 0; + + n = use_list->count; + for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (use == ref) { + skip++; + } else { + if (p != q) { + *q = use; + } + q++; + } + } + use_list->count -= skip; +#if IR_COMBO_COPY_PROPAGATION + if (skip) { + do { + *q = IR_UNUSED; + q++; + } while (--skip); + } +#endif +} + +#if IR_COMBO_COPY_PROPAGATION +static int ir_sccp_add_to_use_list(ir_ctx *ctx, ir_ref to, ir_ref ref) +{ + ir_use_list *use_list = &ctx->use_lists[to]; + ir_ref n = use_list->refs + use_list->count; + + if (n < ctx->use_edges_count && ctx->use_edges[n] == IR_UNUSED) { + ctx->use_edges[n] = ref; + use_list->count++; + return 0; + } else { + /* Reallocate the whole edges buffer (this is inefficient) */ + ctx->use_edges = ir_mem_realloc(ctx->use_edges, (ctx->use_edges_count + use_list->count + 1) * sizeof(ir_ref)); + memcpy(ctx->use_edges + ctx->use_edges_count, ctx->use_edges + use_list->refs, use_list->count * sizeof(ir_ref)); + use_list->refs = ctx->use_edges_count; + ctx->use_edges[use_list->refs + use_list->count] = ref; + use_list->count++; + ctx->use_edges_count += use_list->count; + return 1; + } +} +#endif + +static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref) +{ + ir_ref j, n, *p; + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_insn *insn; + + use_list->refs = 0; + use_list->count = 0; + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + *p = IR_UNUSED; + } +} + +static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p; + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_insn *insn; + + use_list->refs = 0; + use_list->count = 0; + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0 && _values[input].op == IR_BOTTOM) { + ir_sccp_remove_from_use_list(ctx, input, ref); + /* schedule DCE */ + if ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0) + || ((ir_op_flags[ctx->ir_base[input].op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[input].count == 1)) { + ir_bitqueue_add(worklist, input); + } + } + } +} + +static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p, use, k, l; + ir_insn *insn; + ir_use_list *use_list; + + IR_ASSERT(ref != new_ref); + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_sccp_remove_from_use_list(ctx, input, ref); + /* schedule DCE */ + if (worklist + && ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0) + || ((ir_op_flags[ctx->ir_base[input].op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[input].count == 1))) { + ir_bitqueue_add(worklist, input); + } + } + } + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (IR_IS_FEASIBLE(use)) { + insn = &ctx->ir_base[use]; + l = insn->inputs_count; + for (k = 1; k <= l; k++) { + if (ir_insn_op(insn, k) == ref) { + ir_insn_set_op(insn, k, new_ref); + } + } +#if IR_COMBO_COPY_PROPAGATION + if (new_ref > 0 && IR_IS_BOTTOM(use)) { + if (ir_sccp_add_to_use_list(ctx, new_ref, use)) { + /* restore after reallocation */ + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs + j]; + } + } +#endif + /* schedule folding */ + if (worklist && _values[use].op == IR_BOTTOM) { + ir_bitqueue_add(worklist, use); + } + } + } + + use_list->refs = 0; + use_list->count = 0; +} + +static void ir_sccp_fold2(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) +{ + uint32_t opt; + ir_ref op1, op2, op3; + ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; + + insn = &ctx->ir_base[ref]; + opt = insn->opt; + op1 = insn->op1; + op2 = insn->op2; + op3 = insn->op3; + +restart: + op1_insn = ctx->ir_base + op1; + op2_insn = ctx->ir_base + op2; + op3_insn = ctx->ir_base + op3; + + switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { + case IR_FOLD_DO_RESTART: + opt = ctx->fold_insn.optx; + op1 = ctx->fold_insn.op1; + op2 = ctx->fold_insn.op2; + op3 = ctx->fold_insn.op3; + goto restart; + case IR_FOLD_DO_EMIT: + insn = &ctx->ir_base[ref]; + if (insn->opt != ctx->fold_insn.opt + || insn->op1 != ctx->fold_insn.op1 + || insn->op2 != ctx->fold_insn.op2 + || insn->op3 != ctx->fold_insn.op3) { + + ir_use_list *use_list; + ir_ref n, j, *p, use; + + insn->optx = ctx->fold_insn.opt; + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK])); + insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); + if (insn->op1 != ctx->fold_insn.op1) { + if (!IR_IS_CONST_REF(insn->op1) && insn->op1 != ctx->fold_insn.op2 && insn->op1 != ctx->fold_insn.op3) { + ir_sccp_remove_from_use_list(ctx, insn->op1, ref); + } + if (!IR_IS_CONST_REF(ctx->fold_insn.op1) && ctx->fold_insn.op1 != insn->op2 && ctx->fold_insn.op1 != insn->op3) { + ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op1, ref); + } + } + if (insn->op2 != ctx->fold_insn.op2) { + if (!IR_IS_CONST_REF(insn->op2) && insn->op2 != ctx->fold_insn.op1 && insn->op2 != ctx->fold_insn.op3) { + ir_sccp_remove_from_use_list(ctx, insn->op2, ref); + } + if (!IR_IS_CONST_REF(ctx->fold_insn.op2) && ctx->fold_insn.op2 != insn->op1 && ctx->fold_insn.op2 != insn->op3) { + ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op2, ref); + } + } + if (insn->op3 != ctx->fold_insn.op3) { + if (!IR_IS_CONST_REF(insn->op3) && insn->op3 != ctx->fold_insn.op1 && insn->op3 != ctx->fold_insn.op2) { + ir_sccp_remove_from_use_list(ctx, insn->op3, ref); + } + if (!IR_IS_CONST_REF(ctx->fold_insn.op3) && ctx->fold_insn.op3 != insn->op1 && ctx->fold_insn.op3 != insn->op2) { + ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op3, ref); + } + } + insn->op1 = ctx->fold_insn.op1; + insn->op2 = ctx->fold_insn.op2; + insn->op3 = ctx->fold_insn.op3; + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (_values[use].op == IR_BOTTOM) { + ir_bitqueue_add(worklist, use); + } + } + } + break; + case IR_FOLD_DO_COPY: + op1 = ctx->fold_insn.op1; + ir_sccp_replace_insn(ctx, _values, ref, op1, worklist); + break; + case IR_FOLD_DO_CONST: + op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); + ir_sccp_replace_insn(ctx, _values, ref, op1, worklist); + break; + default: + IR_ASSERT(0); + break; + } +} + +static void ir_sccp_replace_use(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use) +{ + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_ref i, n, *p; + + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + if (*p == use) { + *p = new_use; + break; + } + } +} + +static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) +{ + ir_ref j, n, *p, use, next; + ir_insn *insn, *next_insn; + ir_use_list *use_list = &ctx->use_lists[ref]; + + insn = &ctx->ir_base[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (use == dst) { + next = ctx->use_edges[ctx->use_lists[use].refs]; + next_insn = &ctx->ir_base[next]; + /* remove IF and IF_TRUE/FALSE from double linked control list */ + next_insn->op1 = insn->op1; + ir_sccp_replace_use(ctx, insn->op1, ref, next); + /* remove IF and IF_TRUE/FALSE instructions */ + ir_sccp_make_nop(ctx, ref); + ir_sccp_make_nop(ctx, use); + break; + } + } +} + +static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref unfeasible_inputs) +{ + ir_ref i, j, n, k, *p, use; + ir_insn *insn, *use_insn; + ir_use_list *use_list; + ir_bitset life_inputs; + + insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN); + n = insn->inputs_count; + if (n - unfeasible_inputs == 1) { + /* remove MERGE completely */ + for (j = 1; j <= n; j++) { + ir_ref input = ir_insn_op(insn, j); + if (input && IR_IS_FEASIBLE(input)) { + ir_insn *input_insn = &ctx->ir_base[input]; + + IR_ASSERT(input_insn->op == IR_END || input_insn->op == IR_IJMP || input_insn->op == IR_UNREACHABLE); + if (input_insn->op == IR_END) { + ir_ref prev, next = IR_UNUSED; + ir_insn *next_insn = NULL; + + prev = input_insn->op1; + use_list = &ctx->use_lists[ref]; + for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed"); + if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { + next = use; + next_insn = use_insn; + break; + } + } + IR_ASSERT(prev && next); + /* remove MERGE and input END from double linked control list */ + next_insn->op1 = prev; + ir_sccp_replace_use(ctx, prev, input, next); + /* remove MERGE and input END instructions */ + ir_sccp_make_nop(ctx, ref); + ir_sccp_make_nop(ctx, input); + break; + } else { + for (i = 2; i <= n; i++) { + ir_insn_set_op(insn, i, IR_UNUSED); + } + insn->op = IR_BEGIN; + insn->op1 = input; + } + } + } + } else { + n = insn->inputs_count; + i = 1; + life_inputs = ir_bitset_malloc(n + 1); + for (j = 1; j <= n; j++) { + ir_ref input = ir_insn_op(insn, j); + + if (input) { + if (i != j) { + ir_insn_set_op(insn, i, input); + } + ir_bitset_incl(life_inputs, j); + i++; + } + } + j = i; + while (j < n) { + ir_insn_set_op(insn, j, IR_UNUSED); + j++; + } + i--; + insn->inputs_count = i; + + n++; + use_list = &ctx->use_lists[ref]; + if (use_list->count > 1) { + for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_PHI) { + i = 2; + for (j = 2; j <= n; j++) { + ir_ref input = ir_insn_op(use_insn, j); + + if (ir_bitset_in(life_inputs, j - 1)) { + IR_ASSERT(input); + if (i != j) { + ir_insn_set_op(use_insn, i, input); + } + i++; + } else if (!IR_IS_CONST_REF(input)) { + ir_sccp_remove_from_use_list(ctx, input, use); + } + } + while (i <= n) { + ir_insn_set_op(use_insn, i, IR_UNUSED); + i++; + } + use_insn->inputs_count = insn->inputs_count + 1; + } + } + } + ir_mem_free(life_inputs); + } +} + +int ir_sccp(ir_ctx *ctx) +{ + ir_ref i, j, n, *p, use; + ir_use_list *use_list; + ir_insn *insn, *use_insn, *value; + uint32_t flags; + ir_bitqueue worklist; + ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); + + ctx->flags |= IR_OPT_IN_SCCP; + + /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ + ir_bitqueue_init(&worklist, ctx->insns_count); + worklist.pos = 0; + ir_bitset_incl(worklist.set, 1); + while ((i = ir_bitqueue_pop(&worklist)) >= 0) { + insn = &ctx->ir_base[i]; + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_DATA) { + if (insn->op == IR_PHI) { + ir_insn *merge_insn = &ctx->ir_base[insn->op1]; + bool changed = 0; + + if (!IR_IS_FEASIBLE(insn->op1)) { + continue; + } + n = merge_insn->inputs_count + 1; + if (n > 3 && _values[i].optx == IR_TOP) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + } + /* for all live merge inputs */ + for (j = 1; j < n; j++) { + ir_ref merge_input = ir_insn_op(merge_insn, j); + + IR_ASSERT(merge_input > 0); + if (_values[merge_input].optx != IR_TOP) { + ir_ref input = ir_insn_op(insn, j + 1); + + if (input > 0 && IR_IS_TOP(input)) { + ir_bitqueue_add(&worklist, input); + } else if (ir_sccp_join_values(ctx, _values, i, input)) { + changed = 1; + } + } + } + if (!changed) { + continue; + } + } else if (ctx->use_lists[i].count == 0) { + /* dead code */ + continue; + } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { + bool may_benefit = 0; + bool has_top = 0; + + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); + n = IR_INPUT_EDGES_COUNT(flags); + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + if (input > 0) { + if (_values[input].optx == IR_TOP) { + has_top = 1; + ir_bitqueue_add(&worklist, input); + } else if (_values[input].optx != IR_BOTTOM) { + /* Perform folding only if some of direct inputs + * is going to be replaced by a constant or copy. + * This approach may miss some folding optimizations + * dependent on indirect inputs. e.g. reassociation. + */ + may_benefit = 1; + } + } + } + if (has_top) { + continue; + } + if (!may_benefit) { + IR_MAKE_BOTTOM(i); + } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { + /* not changed */ + continue; + } + } else { + IR_MAKE_BOTTOM(i); + } + } else if (flags & IR_OP_FLAG_BB_START) { + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + ir_ref unfeasible_inputs = 0; + + n = insn->inputs_count; + if (n > 3 && _values[i].optx == IR_TOP) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + } + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + IR_ASSERT(input > 0); + if (_values[input].optx == IR_TOP) { + unfeasible_inputs++; + } + } + if (unfeasible_inputs == 0) { + IR_MAKE_BOTTOM(i); + } else if (_values[i].op1 != unfeasible_inputs) { + _values[i].optx = IR_MERGE; + _values[i].op1 = unfeasible_inputs; + } else { + continue; + } + } else { + IR_ASSERT(insn->op == IR_START || IR_IS_FEASIBLE(insn->op1)); + IR_MAKE_BOTTOM(i); + } + } else { + IR_ASSERT(insn->op1 > 0); + if (_values[insn->op1].optx == IR_TOP) { + /* control inpt is not feasible */ + continue; + } + if (insn->op == IR_IF) { + if (IR_IS_TOP(insn->op2)) { + ir_bitqueue_add(&worklist, insn->op2); + continue; + } + if (!IR_IS_BOTTOM(insn->op2) +#if IR_COMBO_COPY_PROPAGATION + && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY) +#endif + ) { + bool b = ir_sccp_is_true(ctx, _values, insn->op2); + use_list = &ctx->use_lists[i]; + IR_ASSERT(use_list->count == 2); + p = &ctx->use_edges[use_list->refs]; + use = *p; + use_insn = &ctx->ir_base[use]; + IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE); + if ((use_insn->op == IR_IF_TRUE) != b) { + use = *(p+1); + IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE); + } + if (_values[i].optx == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use; + } else if (_values[i].optx != IR_IF || _values[i].op1 != use) { + IR_MAKE_BOTTOM(i); + } + if (!IR_IS_BOTTOM(use)) { + ir_bitqueue_add(&worklist, use); + } + continue; + } + IR_MAKE_BOTTOM(i); + } else if (insn->op == IR_SWITCH) { + if (IR_IS_TOP(insn->op2)) { + ir_bitqueue_add(&worklist, insn->op2); + continue; + } + if (!IR_IS_BOTTOM(insn->op2)) { + ir_ref use_case = IR_UNUSED; + + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + IR_ASSERT(use > 0); + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_CASE_VAL) { + if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) { + use_case = use; + break; + } + } else if (use_insn->op == IR_CASE_DEFAULT) { + use_case = use; + } + } + if (use_case) { + use_insn = &ctx->ir_base[use_case]; + if (_values[i].optx == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use_case; + } else if (_values[i].optx != IR_IF || _values[i].op1 != use_case) { + IR_MAKE_BOTTOM(i); + } + if (!IR_IS_BOTTOM(use_case)) { + ir_bitqueue_add(&worklist, use_case); + } + } + if (!IR_IS_BOTTOM(i)) { + continue; + } + } + IR_MAKE_BOTTOM(i); + } else if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[i].count == 1) { + /* dead load */ + _values[i].optx = IR_LOAD; + } else { + IR_MAKE_BOTTOM(i); + + /* control, call, load and store instructions may have unprocessed inputs */ + n = IR_INPUT_EDGES_COUNT(flags); + if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + for (j = 2, p = insn->ops + j; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + use = *p; + if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { + ir_bitqueue_add(&worklist, use); + } + } + } else if (n >= 2) { + IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); + use = insn->op2; + if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { + ir_bitqueue_add(&worklist, use); + } + if (n > 2) { + IR_ASSERT(n == 3); + IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); + use = insn->op3; + if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { + ir_bitqueue_add(&worklist, use); + } + } + } + } + } + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + if (_values[use].optx != IR_BOTTOM) { + ir_bitqueue_add(&worklist, use); + } + } + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCCP) { + for (i = 1; i < ctx->insns_count; i++) { + if (IR_IS_CONST_OP(_values[i].op)) { + fprintf(stderr, "%d. CONST(", i); + ir_print_const(ctx, &_values[i], stderr, true); + fprintf(stderr, ")\n"); +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[i].op == IR_COPY) { + fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1); +#endif + } else if (IR_IS_TOP(i)) { + fprintf(stderr, "%d. TOP\n", i); + } else if (_values[i].op == IR_IF) { + fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1); + } else if (_values[i].op == IR_MERGE) { + fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1); + } else if (!IR_IS_BOTTOM(i)) { + fprintf(stderr, "%d. %d\n", i, _values[i].op); + } + } + } +#endif + + for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) { + if (value->op == IR_BOTTOM) { + continue; + } else if (IR_IS_CONST_OP(value->op)) { + /* replace instruction by constant */ + j = ir_const(ctx, value->val, value->type); + ir_sccp_replace_insn(ctx, _values, i, j, &worklist); +#if IR_COMBO_COPY_PROPAGATION + } else if (value->op == IR_COPY) { + ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist); +#endif + } else if (value->op == IR_TOP) { + /* remove unreachable instruction */ + insn = &ctx->ir_base[i]; + if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { + if (insn->op != IR_PARAM && insn->op != IR_VAR) { + ir_sccp_remove_insn(ctx, _values, i, &worklist); + } + } else { + if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { + /* remove from terminators list */ + ir_ref prev = ctx->ir_base[1].op1; + if (prev == i) { + ctx->ir_base[1].op1 = insn->op3; + } else { + while (prev) { + if (ctx->ir_base[prev].op3 == i) { + ctx->ir_base[prev].op3 = insn->op3; + break; + } + prev = ctx->ir_base[prev].op3; + } + } + } + ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, NULL); + } + } else if (value->op == IR_IF) { + /* remove one way IF/SWITCH */ + ir_sccp_remove_if(ctx, _values, i, value->op1); + } else if (value->op == IR_MERGE) { + /* schedule merge to remove unfeasible MERGE inputs */ + ir_bitqueue_add(&worklist, i); + } else if (value->op == IR_LOAD) { + /* schedule dead load elimination */ + ir_bitqueue_add(&worklist, i); + } + } + + while ((i = ir_bitqueue_pop(&worklist)) >= 0) { + if (_values[i].op == IR_MERGE) { + ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); + } else { + insn = &ctx->ir_base[i]; + if (IR_IS_FOLDABLE_OP(insn->op)) { + if (ctx->use_lists[i].count == 0) { + ir_sccp_remove_insn(ctx, _values, i, &worklist); + } else { + ir_sccp_fold2(ctx, _values, i, &worklist); + } + } else if ((ir_op_flags[insn->op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[i].count == 1) { + /* dead load */ + ir_ref next = ctx->use_edges[ctx->use_lists[i].refs]; + + /* remove LOAD from double linked control list */ + ctx->ir_base[next].op1 = insn->op1; + ir_sccp_replace_use(ctx, insn->op1, i, next); + insn->op1 = IR_UNUSED; + ir_sccp_remove_insn(ctx, _values, i, &worklist); + } + } + } + + ir_mem_free(_values); + ir_bitqueue_free(&worklist); + + ctx->flags &= ~IR_OPT_IN_SCCP; + ctx->flags |= IR_SCCP_DONE; + + return 1; +} diff --git a/ext/opcache/jit/ir/ir_strtab.c b/ext/opcache/jit/ir/ir_strtab.c new file mode 100644 index 00000000000..c5115e5aa0f --- /dev/null +++ b/ext/opcache/jit/ir/ir_strtab.c @@ -0,0 +1,227 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (String table) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +typedef struct _ir_strtab_bucket { + uint32_t h; + uint32_t len; + const char *str; + uint32_t next; + ir_ref val; +} ir_strtab_bucket; + +static uint32_t ir_str_hash(const char *str, size_t len) +{ + size_t i; + uint32_t h = 5381; + + for (i = 0; i < len; i++) { + h = ((h << 5) + h) + *str; + } + return h | 0x10000000; +} + +static uint32_t ir_strtab_hash_size(uint32_t size) +{ + /* Use big enough power of 2 */ + size -= 1; + size |= (size >> 1); + size |= (size >> 2); + size |= (size >> 4); + size |= (size >> 8); + size |= (size >> 16); + return size + 1; +} + +static void ir_strtab_resize(ir_strtab *strtab) +{ + uint32_t old_hash_size = (uint32_t)(-(int32_t)strtab->mask); + char *old_data = strtab->data; + uint32_t size = strtab->size * 2; + uint32_t hash_size = ir_strtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket)); + ir_strtab_bucket *p; + uint32_t pos, i; + + memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t)); + strtab->data = data + (hash_size * sizeof(uint32_t)); + strtab->mask = (uint32_t)(-(int32_t)hash_size); + strtab->size = size; + + memcpy(strtab->data, old_data, strtab->count * sizeof(ir_strtab_bucket)); + ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t))); + + i = strtab->count; + pos = 0; + p = (ir_strtab_bucket*)strtab->data; + do { + uint32_t h = p->h | strtab->mask; + p->next = ((uint32_t*)strtab->data)[(int32_t)h]; + ((uint32_t*)strtab->data)[(int32_t)h] = pos; + pos += sizeof(ir_strtab_bucket); + p++; + } while (--i); +} + +static void ir_strtab_grow_buf(ir_strtab *strtab, uint32_t len) +{ + size_t old = (size_t)strtab->buf; + + do { + strtab->buf_size *= 2; + } while (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1)); + + strtab->buf = ir_mem_realloc(strtab->buf, strtab->buf_size); + if ((size_t)strtab->buf != old) { + size_t offset = (size_t)strtab->buf - old; + ir_strtab_bucket *p = (ir_strtab_bucket*)strtab->data; + uint32_t i; + for (i = strtab->count; i > 0; i--) { + p->str += offset; + p++; + } + } +} + +void ir_strtab_init(ir_strtab *strtab, uint32_t size, uint32_t buf_size) +{ + IR_ASSERT(size > 0); + uint32_t hash_size = ir_strtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket)); + memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t)); + strtab->data = (data + (hash_size * sizeof(uint32_t))); + strtab->mask = (uint32_t)(-(int32_t)hash_size); + strtab->size = size; + strtab->count = 0; + strtab->pos = 0; + if (buf_size) { + strtab->buf = ir_mem_malloc(buf_size); + strtab->buf_size = buf_size; + strtab->buf_top = 0; + } else { + strtab->buf = NULL; + strtab->buf_size = 0; + strtab->buf_top = 0; + } +} + +ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len) +{ + uint32_t h = ir_str_hash(str, len); + const char *data = (const char*)strtab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)]; + ir_strtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_strtab_bucket*)(data + pos); + if (p->h == h + && p->len == len + && memcmp(p->str, str, len) == 0) { + return p->val; + } + pos = p->next; + } + return 0; +} + +ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val) +{ + uint32_t h = ir_str_hash(str, len); + char *data = (char*)strtab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)]; + ir_strtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_strtab_bucket*)(data + pos); + if (p->h == h + && p->len == len + && memcmp(p->str, str, len) == 0) { + return p->val; + } + pos = p->next; + } + + IR_ASSERT(val != 0); + + if (UNEXPECTED(strtab->count >= strtab->size)) { + ir_strtab_resize(strtab); + data = strtab->data; + } + + if (strtab->buf) { + if (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1)) { + ir_strtab_grow_buf(strtab, len + 1); + } + + memcpy(strtab->buf + strtab->buf_top, str, len); + strtab->buf[strtab->buf_top + len] = 0; + str = (const char*)strtab->buf + strtab->buf_top; + strtab->buf_top += len + 1; + } + + pos = strtab->pos; + strtab->pos += sizeof(ir_strtab_bucket); + strtab->count++; + p = (ir_strtab_bucket*)(data + pos); + p->h = h; + p->len = len; + p->str = str; + h |= strtab->mask; + p->next = ((uint32_t*)data)[(int32_t)h]; + ((uint32_t*)data)[(int32_t)h] = pos; + p->val = val; + return val; +} + +ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val) +{ + uint32_t h = ir_str_hash(str, len); + char *data = (char*)strtab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)]; + ir_strtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_strtab_bucket*)(data + pos); + if (p->h == h + && p->len == len + && memcmp(p->str, str, len) == 0) { + return p->val = val; + } + pos = p->next; + } + return 0; +} + +const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx) +{ + IR_ASSERT(idx >= 0 && (uint32_t)idx < strtab->count); + return ((const ir_strtab_bucket*)strtab->data)[idx].str; +} + +void ir_strtab_free(ir_strtab *strtab) +{ + uint32_t hash_size = (uint32_t)(-(int32_t)strtab->mask); + char *data = (char*)strtab->data - (hash_size * sizeof(uint32_t)); + ir_mem_free(data); + strtab->data = NULL; + if (strtab->buf) { + ir_mem_free(strtab->buf); + strtab->buf = NULL; + } +} + +void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func) +{ + uint32_t i; + + for (i = 0; i < strtab->count; i++) { + const ir_strtab_bucket *b = &((ir_strtab_bucket*)strtab->data)[i]; + func(b->str, b->len, b->val); + } +} diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc new file mode 100644 index 00000000000..2690e173d67 --- /dev/null +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -0,0 +1,9056 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (x86/x86_64 native code generator based on DynAsm) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +|.if X64 +|.arch x64 +|.else +|.arch x86 +|.endif + +|.actionlist dasm_actions +|.globals ir_lb +|.section code, cold_code, rodata, jmp_table + +#define IR_IS_SIGNED_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= (-2147483647 - 1))) +#define IR_IS_UNSIGNED_32BIT(val) (((uintptr_t)(val)) <= 0xffffffff) +#define IR_IS_32BIT(type, val) (IR_IS_TYPE_SIGNED(type) ? IR_IS_SIGNED_32BIT((val).i64) : IR_IS_UNSIGNED_32BIT((val).u64)) +#define IR_IS_FP_ZERO(insn) ((insn.type == IR_DOUBLE) ? (insn.val.u64 == 0) : (insn.val.u32 == 0)) +#define IR_MAY_USE_32BIT_ADDR(addr) \ + (ctx->code_buffer && \ + IR_IS_SIGNED_32BIT((char*)(addr) - (char*)ctx->code_buffer) && \ + IR_IS_SIGNED_32BIT((char*)(addr) - ((char*)ctx->code_buffer + ctx->code_buffer_size))) + +#define IR_SPILL_POS_TO_OFFSET(offset) \ + ((ctx->flags & IR_USE_FRAME_POINTER) ? \ + ((offset) - (ctx->stack_frame_size - ctx->stack_frame_alignment)) : \ + ((offset) + ctx->call_stack_size)) + +|.macro ASM_REG_OP, op, type, reg +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(reg) +|| break; +|| case 2: +| op Rw(reg) +|| break; +|| case 4: +| op Rd(reg) +|| break; +|.if X64 +|| case 8: +| op Rq(reg) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_MEM_OP, op, type, mem +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op byte mem +|| break; +|| case 2: +| op word mem +|| break; +|| case 4: +| op dword mem +|| break; +|.if X64 +|| case 8: +| op qword mem +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_REG_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(dst), Rb(src) +|| break; +|| case 2: +| op Rw(dst), Rw(src) +|| break; +|| case 4: +| op Rd(dst), Rd(src) +|| break; +|.if X64 +|| case 8: +| op Rq(dst), Rq(src) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_REG_OP2, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +|| case 2: +| op Rw(dst), Rw(src) +|| break; +|| case 4: +| op Rd(dst), Rd(src) +|| break; +|.if X64 +|| case 8: +| op Rq(dst), Rq(src) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_TXT_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(dst), src +|| break; +|| case 2: +| op Rw(dst), src +|| break; +|| case 4: +| op Rd(dst), src +|| break; +|.if X64 +|| case 8: +| op Rq(dst), src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_IMM_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(dst), (src & 0xff) +|| break; +|| case 2: +| op Rw(dst), (src & 0xffff) +|| break; +|| case 4: +| op Rd(dst), src +|| break; +|.if X64 +|| case 8: +| op Rq(dst), src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_MEM_REG_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op byte dst, Rb(src) +|| break; +|| case 2: +| op word dst, Rw(src) +|| break; +|| case 4: +| op dword dst, Rd(src) +|| break; +|.if X64 +|| case 8: +| op qword dst, Rq(src) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_MEM_TXT_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op byte dst, src +|| break; +|| case 2: +| op word dst, src +|| break; +|| case 4: +| op dword dst, src +|| break; +|.if X64 +|| case 8: +| op qword dst, src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_MEM_IMM_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op byte dst, (src & 0xff) +|| break; +|| case 2: +| op word dst, (src & 0xffff) +|| break; +|| case 4: +| op dword dst, src +|| break; +|.if X64 +|| case 8: +| op qword dst, src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_MEM_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(dst), byte src +|| break; +|| case 2: +| op Rw(dst), word src +|| break; +|| case 4: +| op Rd(dst), dword src +|| break; +|.if X64 +|| case 8: +| op Rq(dst), qword src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_REG_IMUL, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 2: +| imul Rw(dst), Rw(src) +|| break; +|| case 4: +| imul Rd(dst), Rd(src) +|| break; +|.if X64 +|| case 8: +| imul Rq(dst), Rq(src) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_IMM_IMUL, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 2: +| imul Rw(dst), src +|| break; +|| case 4: +| imul Rd(dst), src +|| break; +|.if X64 +|| case 8: +| imul Rq(dst), src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_MEM_IMUL, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 2: +| imul Rw(dst), word src +|| break; +|| case 4: +| imul Rd(dst), dword src +|| break; +|.if X64 +|| case 8: +| imul Rq(dst), qword src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_SSE2_REG_REG_OP, fop, dop, type, dst, src +|| if (type == IR_DOUBLE) { +| dop xmm(dst-IR_REG_FP_FIRST), xmm(src-IR_REG_FP_FIRST) +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| fop xmm(dst-IR_REG_FP_FIRST), xmm(src-IR_REG_FP_FIRST) +|| } +|.endmacro + +|.macro ASM_SSE2_REG_MEM_OP, fop, dop, type, dst, src +|| if (type == IR_DOUBLE) { +| dop xmm(dst-IR_REG_FP_FIRST), qword src +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| fop xmm(dst-IR_REG_FP_FIRST), dword src +|| } +|.endmacro + +|.macro ASM_AVX_REG_REG_REG_OP, fop, dop, type, dst, op1, op2 +|| if (type == IR_DOUBLE) { +| dop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| fop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) +|| } +|.endmacro + +|.macro ASM_AVX_REG_REG_MEM_OP, fop, dop, type, dst, op1, op2 +|| if (type == IR_DOUBLE) { +| dop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), qword op2 +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| fop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), dword op2 +|| } +|.endmacro + +|.macro ASM_FP_REG_REG_OP, fop, dop, avx_fop, avx_dop, type, dst, src +|| if (ctx->mflags & IR_X86_AVX) { +| ASM_SSE2_REG_REG_OP avx_fop, avx_dop, type, dst, src +|| } else { +| ASM_SSE2_REG_REG_OP fop, dop, type, dst, src +|| } +|.endmacro + +|.macro ASM_FP_MEM_REG_OP, fop, dop, avx_fop, avx_dop, type, dst, src +|| if (type == IR_DOUBLE) { +|| if (ctx->mflags & IR_X86_AVX) { +| avx_dop qword dst, xmm(src-IR_REG_FP_FIRST) +|| } else { +| dop qword dst, xmm(src-IR_REG_FP_FIRST) +|| } +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +|| if (ctx->mflags & IR_X86_AVX) { +| avx_fop dword dst, xmm(src-IR_REG_FP_FIRST) +|| } else { +| fop dword dst, xmm(src-IR_REG_FP_FIRST) +|| } +|| } +|.endmacro + +|.macro ASM_FP_REG_MEM_OP, fop, dop, avx_fop, avx_dop, type, dst, src +|| if (ctx->mflags & IR_X86_AVX) { +| ASM_SSE2_REG_MEM_OP avx_fop, avx_dop, type, dst, src +|| } else { +| ASM_SSE2_REG_MEM_OP fop, dop, type, dst, src +|| } +|.endmacro + +typedef struct _ir_backend_data { + ir_reg_alloc_data ra_data; + uint32_t dessa_from_block; + dasm_State *dasm_state; + int rodata_label, jmp_table_label; + bool double_neg_const; + bool float_neg_const; + bool double_abs_const; + bool float_abs_const; + bool double_zero_const; +} ir_backend_data; + +#define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \ + #name64, +#define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \ + #name32, +#define IR_GP_REG_NAME16(code, name64, name32, name16, name8, name8h) \ + #name16, +#define IR_GP_REG_NAME8(code, name64, name32, name16, name8, name8h) \ + #name8, +#define IR_FP_REG_NAME(code, name) \ + #name, + +static const char *_ir_reg_name[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME) + IR_FP_REGS(IR_FP_REG_NAME) +}; + +static const char *_ir_reg_name32[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME32) +}; + +static const char *_ir_reg_name16[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME16) +}; + +static const char *_ir_reg_name8[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME8) +}; + +/* Calling Convention */ +#ifdef _WIN64 + +static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { + IR_REG_INT_ARG1, + IR_REG_INT_ARG2, + IR_REG_INT_ARG3, + IR_REG_INT_ARG4, +}; + +static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { + IR_REG_FP_ARG1, + IR_REG_FP_ARG2, + IR_REG_FP_ARG3, + IR_REG_FP_ARG4, +}; + +#elif defined(IR_TARGET_X64) + +static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { + IR_REG_INT_ARG1, + IR_REG_INT_ARG2, + IR_REG_INT_ARG3, + IR_REG_INT_ARG4, + IR_REG_INT_ARG5, + IR_REG_INT_ARG6, +}; + +static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { + IR_REG_FP_ARG1, + IR_REG_FP_ARG2, + IR_REG_FP_ARG3, + IR_REG_FP_ARG4, + IR_REG_FP_ARG5, + IR_REG_FP_ARG6, + IR_REG_FP_ARG7, + IR_REG_FP_ARG8, +}; + +#else + +static const int8_t *_ir_int_reg_params = NULL; +static const int8_t *_ir_fp_reg_params = NULL; +static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS] = { + IR_REG_INT_FCARG1, + IR_REG_INT_FCARG2, +}; +static const int8_t *_ir_fp_fc_reg_params = NULL; + +#endif + +const char *ir_reg_name(int8_t reg, ir_type type) +{ + if (reg >= IR_REG_NUM) { + if (reg == IR_REG_SCRATCH) { + return "SCRATCH"; + } else { + IR_ASSERT(reg == IR_REG_ALL); + return "ALL"; + } + } + IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); + if (type == IR_VOID) { + type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; + } + if (IR_IS_TYPE_FP(type) || ir_type_size[type] == 8) { + return _ir_reg_name[reg]; + } else if (ir_type_size[type] == 4) { + return _ir_reg_name32[reg]; + } else if (ir_type_size[type] == 2) { + return _ir_reg_name16[reg]; + } else { + IR_ASSERT(ir_type_size[type] == 1); + return _ir_reg_name8[reg]; + } +} + +#define IR_RULES(_) \ + _(CMP_INT) \ + _(CMP_FP) \ + _(MUL_INT) \ + _(DIV_INT) \ + _(MOD_INT) \ + _(TEST_INT) \ + _(SETCC_INT) \ + _(TESTCC_INT) \ + _(LEA_OB) \ + _(LEA_SI) \ + _(LEA_SIB) \ + _(LEA_IB) \ + _(LEA_SI_O) \ + _(LEA_SIB_O) \ + _(LEA_IB_O) \ + _(LEA_I_OB) \ + _(LEA_OB_I) \ + _(LEA_OB_SI) \ + _(LEA_SI_OB) \ + _(LEA_B_SI) \ + _(LEA_SI_B) \ + _(INC) \ + _(DEC) \ + _(MUL_PWR2) \ + _(DIV_PWR2) \ + _(MOD_PWR2) \ + _(BOOL_NOT_INT) \ + _(ABS_INT) \ + _(OP_INT) \ + _(OP_FP) \ + _(IMUL3) \ + _(BINOP_INT) \ + _(BINOP_SSE2) \ + _(BINOP_AVX) \ + _(SHIFT) \ + _(SHIFT_CONST) \ + _(COPY_INT) \ + _(COPY_FP) \ + _(CMP_AND_BRANCH_INT) \ + _(CMP_AND_BRANCH_FP) \ + _(TEST_AND_BRANCH_INT) \ + _(JCC_INT) \ + _(GUARD_CMP_INT) \ + _(GUARD_CMP_FP) \ + _(GUARD_TEST_INT) \ + _(GUARD_JCC_INT) \ + _(GUARD_OVERFLOW) \ + _(OVERFLOW_AND_BRANCH) \ + _(MIN_MAX_INT) \ + _(MEM_OP_INT) \ + _(MEM_INC) \ + _(MEM_DEC) \ + _(MEM_MUL_PWR2) \ + _(MEM_DIV_PWR2) \ + _(MEM_MOD_PWR2) \ + _(MEM_BINOP_INT) \ + _(MEM_SHIFT) \ + _(MEM_SHIFT_CONST) \ + _(REG_BINOP_INT) \ + _(VSTORE_INT) \ + _(VSTORE_FP) \ + _(LOAD_INT) \ + _(LOAD_FP) \ + _(STORE_INT) \ + _(STORE_FP) \ + _(IF_INT) \ + _(RETURN_VOID) \ + _(RETURN_INT) \ + _(RETURN_FP) \ + +#define IR_RULE_ENUM(name) IR_ ## name, + +enum _ir_rule { + IR_FIRST_RULE = IR_LAST_OP, + IR_RULES(IR_RULE_ENUM) + IR_LAST_RULE +}; + +#define IR_RULE_NAME(name) #name, +const char *ir_rule_name[IR_LAST_OP] = { + NULL, + IR_RULES(IR_RULE_NAME) + NULL +}; + +/* register allocation */ +int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) +{ + uint32_t rule = ir_rule(ctx, ref); + const ir_insn *insn; + int n = 0; + int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + + constraints->def_reg = IR_REG_NONE; + constraints->hints_count = 0; + switch (rule & IR_RULE_MASK) { + case IR_BINOP_INT: + insn = &ctx->ir_base[ref]; + if (rule & IR_FUSED) { + if (ctx->ir_base[insn->op1].op == IR_RLOAD) { + flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + } else { + flags = IR_OP2_MUST_BE_IN_REG; + } + } else { + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + insn = &ctx->ir_base[insn->op2]; + if (ir_type_size[insn->type] == 8 && !IR_IS_32BIT(insn->type, insn->val)) { + constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + } + break; + case IR_IMUL3: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_SHIFT: + if (rule & IR_FUSED) { + flags = IR_OP2_MUST_BE_IN_REG; + } else { + flags = IR_DEF_REUSES_OP1_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + } + constraints->hints[1] = IR_REG_NONE; + constraints->hints[2] = IR_REG_RCX; + constraints->hints_count = 3; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RCX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + break; + case IR_MUL_INT: + /* %rax - used as input and result */ + constraints->def_reg = IR_REG_RAX; + constraints->hints[1] = IR_REG_RAX; + constraints->hints_count = 2; + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_USE_SUB_REF, IR_DEF_SUB_REF); + constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n = 2; + break; + case IR_DIV_INT: + /* %rax - used as input and result */ + constraints->def_reg = IR_REG_RAX; + constraints->hints[1] = IR_REG_RAX; + constraints->hints_count = 2; + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n = 2; + goto op2_const; + case IR_MOD_INT: + constraints->def_reg = IR_REG_RDX; + constraints->hints[1] = IR_REG_RAX; + constraints->hints_count = 2; + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n = 2; + goto op2_const; + case IR_MIN_MAX_INT: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; +op2_const: + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_CMP_INT: + case IR_TEST_INT: + insn = &ctx->ir_base[ref]; + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + if (IR_IS_CONST_REF(insn->op1)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op1]; + constraints->tmp_regs[0] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } else if (ir_rule(ctx, insn->op1) & IR_FUSED) { + flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + if (ir_type_size[val_insn->type] == 8 && !IR_IS_32BIT(val_insn->type, val_insn->val)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_CMP_FP: + insn = &ctx->ir_base[ref]; + if (!(rule & IR_FUSED)) { + constraints->tmp_regs[0] = IR_TMP_REG(3, IR_BOOL, IR_DEF_SUB_REF, IR_SAVE_SUB_REF); + n = 1; + } + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + if (IR_IS_CONST_REF(insn->op1)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op1]; + constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_BINOP_AVX: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; + case IR_VSTORE_INT: + flags = IR_OP3_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + if (ir_type_size[insn->type] == 8 && !IR_IS_32BIT(insn->type, insn->val)) { + constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + } + break; + case IR_STORE_INT: + flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + IR_ASSERT(val_insn->type == IR_ADDR); + if (ir_type_size[val_insn->type] == 8 && !IR_IS_SIGNED_32BIT(val_insn->val.i64)) { + constraints->tmp_regs[0] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + } + if (IR_IS_CONST_REF(insn->op3)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op3]; + if (ir_type_size[val_insn->type] == 8 && !IR_IS_32BIT(val_insn->type, val_insn->val)) { + constraints->tmp_regs[n] = IR_TMP_REG(3, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_VSTORE_FP: + flags = IR_OP3_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; + case IR_LOAD_FP: + case IR_MEM_BINOP_INT: + flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + IR_ASSERT(val_insn->type == IR_ADDR); + if (ir_type_size[val_insn->type] == 8 && !IR_IS_32BIT(val_insn->type, val_insn->val)) { + constraints->tmp_regs[0] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + } + break; + case IR_STORE_FP: + flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + constraints->tmp_regs[0] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_SWITCH: + flags = IR_OP2_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + if (sizeof(void*) == 8) { + constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_CALL: + insn = &ctx->ir_base[ref]; + if (IR_IS_TYPE_INT(insn->type)) { + constraints->def_reg = IR_REG_INT_RET1; +#ifdef IR_REG_FP_RET1 + } else { + constraints->def_reg = IR_REG_FP_RET1; +#endif + } + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); + n = 1; + IR_FALLTHROUGH; + case IR_TAILCALL: + insn = &ctx->ir_base[ref]; + if (insn->inputs_count > 2) { + constraints->hints[2] = IR_REG_NONE; + constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); + if (!IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); + n++; + } + } + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_BINOP_SSE2: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + break; + case IR_SHIFT_CONST: + case IR_INC: + case IR_DEC: + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + case IR_OP_INT: + case IR_OP_FP: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_COPY_INT: + case IR_COPY_FP: + case IR_SEXT: + case IR_ZEXT: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_ABS_INT: + flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; + break; + case IR_PARAM: + constraints->def_reg = ir_get_param_reg(ctx, ref); + flags = 0; + break; + case IR_PI: + case IR_PHI: + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_RLOAD: + constraints->def_reg = ctx->ir_base[ref].op2; + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_EXITCALL: + flags = IR_USE_MUST_BE_IN_REG; + constraints->def_reg = IR_REG_INT_RET1; + break; + case IR_IF_INT: + case IR_GUARD: + case IR_GUARD_NOT: + flags = IR_OP2_SHOULD_BE_IN_REG; + break; + case IR_IJMP: + flags = IR_OP2_SHOULD_BE_IN_REG; + break; + case IR_RSTORE: + flags = IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_RETURN_INT: + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_INT_RET1; + constraints->hints_count = 3; + break; + case IR_RETURN_FP: +#ifdef IR_REG_FP_RET1 + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_FP_RET1; + constraints->hints_count = 3; +#endif + break; + case IR_SNAPSHOT: + flags = 0; + break; + } + constraints->tmps_count = n; + + return flags; +} + +/* instruction selection */ +static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref); + +static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) +{ + if (!IR_IS_CONST_REF(addr_ref)) { + uint32_t rule = ctx->rules[addr_ref]; + + if (!rule) { + ctx->rules[addr_ref] = rule = ir_match_insn(ctx, addr_ref); + } + if (rule == IR_LEA_OB) { + ir_use_list *use_list = &ctx->use_lists[addr_ref]; + ir_ref j = use_list->count; + + if (j > 1) { + /* check if address is used only in LOAD and STORE */ + ir_ref *p = &ctx->use_edges[use_list->refs]; + + do { + ir_insn *insn = &ctx->ir_base[*p]; + if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { + return; + } + p++; + } while (--j); + } + ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; + } + } +} + +/* A naive check if there is a STORE or CALL between this LOAD and the fusion root */ +static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root) +{ + if (ref + 1 != root) { + ir_ref pos = ctx->prev_ref[root]; + + do { + ir_insn *insn = &ctx->ir_base[pos]; + + if (insn->op == IR_STORE) { + // TODO: check if LOAD and STORE addresses may alias + return 1; + } else if (insn->op == IR_CALL) { + return 1; + } + pos = ctx->prev_ref[pos]; + } while (ref != pos); + } + return 0; +} + +static bool ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) +{ + if (ir_in_same_block(ctx, ref) + && ctx->ir_base[ref].op == IR_LOAD + && ctx->use_lists[ref].count == 2 + && !ir_match_has_mem_deps(ctx, ref, root)) { + ir_ref addr_ref = ctx->ir_base[ref].op2; + ir_insn *addr_insn = &ctx->ir_base[addr_ref]; + + if (IR_IS_CONST_REF(addr_ref)) { + if (addr_insn->op == IR_C_ADDR && + (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) { + ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; + return 1; + } + } else { + ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; + ir_match_fuse_addr(ctx, addr_ref); + return 1; + } + } else if (ir_in_same_block(ctx, ref) + && ctx->ir_base[ref].op == IR_VLOAD) { + return 1; + } + return 0; +} + +static void ir_swap_ops(ir_insn *insn) +{ + ir_ref tmp = insn->op1; + insn->op1 = insn->op2; + insn->op2 = tmp; +} + +static void ir_match_fuse_load_commutative_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) +{ + if (IR_IS_CONST_REF(insn->op2)) { + if (ir_type_size[insn->type] > 4 && !IR_IS_32BIT(ctx->ir_base[insn->op2].type, ctx->ir_base[insn->op2].val) + && !IR_IS_CONST_REF(insn->op1) + && ir_match_fuse_load(ctx, insn->op1, root)) { + ir_swap_ops(insn); + } + } else if (!ir_match_fuse_load(ctx, insn->op2, root)) { + if (!IR_IS_CONST_REF(insn->op1) + && ir_match_fuse_load(ctx, insn->op1, root)) { + ir_swap_ops(insn); + } + } +} + +static void ir_match_fuse_load_commutative_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) +{ + if (!IR_IS_CONST_REF(insn->op2) + && !ir_match_fuse_load(ctx, insn->op2, root) + && (IR_IS_CONST_REF(insn->op1) || ir_match_fuse_load(ctx, insn->op1, root))) { + ir_swap_ops(insn); + } +} + +static void ir_match_fuse_load_cmp_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) +{ + if (IR_IS_CONST_REF(insn->op2)) { + if (!IR_IS_CONST_REF(insn->op1) + && ir_match_fuse_load(ctx, insn->op1, root) + && ir_type_size[ctx->ir_base[insn->op2].type] > 4 + && !IR_IS_32BIT(ctx->ir_base[insn->op2].type, ctx->ir_base[insn->op2].val)) { + ir_swap_ops(insn); + if (insn->op != IR_EQ && insn->op != IR_NE) { + insn->op ^= 3; + } + } + } else if (!ir_match_fuse_load(ctx, insn->op2, root)) { + if (!IR_IS_CONST_REF(insn->op1) + && ir_match_fuse_load(ctx, insn->op1, root)) { + ir_swap_ops(insn); + if (insn->op != IR_EQ && insn->op != IR_NE) { + insn->op ^= 3; + } + } + } +} + +static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root, bool direct) +{ + if (direct) { + if (insn->op == IR_LT || insn->op == IR_LE) { + /* swap operands to avoid P flag check */ + ir_swap_ops(insn); + insn->op ^= 3; + } + } else { + if (insn->op == IR_GT || insn->op == IR_GE) { + /* swap operands to avoid P flag check */ + ir_swap_ops(insn); + insn->op ^= 3; + } + } + if (IR_IS_CONST_REF(insn->op2)) { + } else if (ir_match_fuse_load(ctx, insn->op2, root)) { + } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_fuse_load(ctx, insn->op1, root)) { + ir_swap_ops(insn); + if (insn->op != IR_EQ && insn->op != IR_NE) { + insn->op ^= 3; + } + } +} + +static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) +{ + ir_insn *op2_insn; + ir_insn *insn = &ctx->ir_base[ref]; + uint32_t store_rule; + ir_op load_op; + + switch (insn->op) { + case IR_EQ: + case IR_NE: + case IR_LT: + case IR_GE: + case IR_LE: + case IR_GT: + case IR_ULT: + case IR_UGE: + case IR_ULE: + case IR_UGT: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { + if (IR_IS_CONST_REF(insn->op2) + && ctx->ir_base[insn->op2].val.i64 == 0 + && insn->op1 == ref - 1) { /* previous instruction */ + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + + if (op1_insn->op == IR_ADD || + op1_insn->op == IR_SUB || +// op1_insn->op == IR_MUL || + op1_insn->op == IR_OR || + op1_insn->op == IR_AND || + op1_insn->op == IR_XOR) { + + if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); + } else { + ir_match_fuse_load(ctx, op1_insn->op2, ref); + } + if (op1_insn->op == IR_AND && ctx->use_lists[insn->op1].count == 1) { + /* v = AND(_, _); CMP(v, 0) => SKIP_TEST; TEST */ + if (IR_IS_CONST_REF(op1_insn->op2)) { + ir_match_fuse_load(ctx, op1_insn->op1, ref); + } + ctx->rules[insn->op1] = IR_FUSED | IR_TEST_INT; + return IR_TESTCC_INT; + } else { + /* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */ + ctx->rules[insn->op1] = IR_BINOP_INT; + return IR_SETCC_INT; + } + } + } + ir_match_fuse_load_cmp_int(ctx, insn, ref); + return IR_CMP_INT; + } else { + ir_match_fuse_load_cmp_fp(ctx, insn, ref, 1); + return IR_CMP_FP; + } + break; + case IR_ADD: + case IR_SUB: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) || + (ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_32BIT(-op2_insn->val.i64))) { + if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { + uint32_t rule = ctx->rules[insn->op1]; + + if (!rule) { + ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); + } + if (rule == IR_LEA_SI) { + /* z = MUL(Y, 2|4|8) ... ADD(z, imm32) => SKIP ... LEA [Y*2|4|8+im32] */ + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; + return IR_LEA_SI_O; + } else if (rule == IR_LEA_SIB) { + /* z = ADD(X, MUL(Y, 2|4|8)) ... ADD(z, imm32) => SKIP ... LEA [X+Y*2|4|8+im32] */ + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SIB; + return IR_LEA_SIB_O; + } else if (rule == IR_LEA_IB) { + /* z = ADD(X, Y) ... ADD(z, imm32) => SKIP ... LEA [X+Y+im32] */ + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_IB; + return IR_LEA_IB_O; + } + } + /* ADD(X, imm32) => LEA [X+imm32] */ + return IR_LEA_OB; + } else if (op2_insn->val.i64 == 1 || op2_insn->val.i64 == -1) { + if (insn->op == IR_ADD) { + if (op2_insn->val.i64 == 1) { + /* ADD(_, 1) => INC */ + return IR_INC; + } else { + /* ADD(_, -1) => DEC */ + return IR_DEC; + } + } else { + if (op2_insn->val.i64 == 1) { + /* SUB(_, 1) => DEC */ + return IR_DEC; + } else { + /* SUB(_, -1) => INC */ + return IR_INC; + } + } + } + } else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) { + if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { + uint32_t rule =ctx->rules[insn->op1]; + if (!rule) { + ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); + } + if (rule == IR_LEA_OB) { + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + rule = ctx->rules[insn->op2]; + if (!rule) { + ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); + } + if (rule == IR_LEA_SI) { + /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(x, y) => SKIP ... SKIP ... LEA */ + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; + return IR_LEA_OB_SI; + } + } + /* x = ADD(X, imm32) ... ADD(x, Y) => SKIP ... LEA */ + return IR_LEA_OB_I; + } + } + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + uint32_t rule = ctx->rules[insn->op2]; + if (!rule) { + ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); + } + if (rule == IR_LEA_OB) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; + if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { + rule =ctx->rules[insn->op1]; + if (!rule) { + ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); + } + if (rule == IR_LEA_SI) { + /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(y, x) => SKIP ... SKIP ... LEA */ + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; + return IR_LEA_SI_OB; + } + } + /* x = ADD(X, imm32) ... ADD(Y, x) => SKIP ... LEA */ + return IR_LEA_I_OB; + } + } + /* ADD(X, Y) => LEA [X + Y] */ + return IR_LEA_IB; + } +binop_int: + if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, insn, ref); + } else { + ir_match_fuse_load(ctx, insn->op2, ref); + } + return IR_BINOP_INT; + } else { +binop_fp: + if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_fp(ctx, insn, ref); + } else { + ir_match_fuse_load(ctx, insn->op2, ref); + } + if (ctx->mflags & IR_X86_AVX) { + return IR_BINOP_AVX; + } else { + return IR_BINOP_SSE2; + } + } + break; + case IR_MUL: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + // 0 + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (ir_type_size[insn->type] >= 4 && + (op2_insn->val.u64 == 2 || op2_insn->val.u64 == 4 || op2_insn->val.u64 == 8)) { + /* MUL(X, 2|4|8) => LEA [X*2|4|8] */ + return IR_LEA_SI; + } else if (ir_type_size[insn->type] >= 4 && + (op2_insn->val.u64 == 3 || op2_insn->val.u64 == 5 || op2_insn->val.u64 == 9)) { + /* MUL(X, 3|5|9) => LEA [X+X*2|4|8] */ + return IR_LEA_SIB; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + /* MUL(X, PWR2) => SHL */ + return IR_MUL_PWR2; + } else if (IR_IS_TYPE_SIGNED(insn->type) + && ir_type_size[insn->type] != 1 + && IR_IS_SIGNED_32BIT(op2_insn->val.i64) + && !IR_IS_CONST_REF(insn->op1)) { + /* MUL(_, imm32) => IMUL */ + ir_match_fuse_load(ctx, insn->op1, ref); + return IR_IMUL3; + } + } + /* Prefer IMUL over MUL because it's more flexible and uses less registers ??? */ +// if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { + if (ir_type_size[insn->type] != 1) { + goto binop_int; + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_MUL_INT; + } else { + goto binop_fp; + } + break; + case IR_ADD_OV: + case IR_SUB_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + goto binop_int; + case IR_MUL_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_SIGNED_32BIT(op2_insn->val.i64) + && !IR_IS_CONST_REF(insn->op1)) { + /* MUL(_, imm32) => IMUL */ + ir_match_fuse_load(ctx, insn->op1, ref); + return IR_IMUL3; + } + } + goto binop_int; + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_MUL_INT; + case IR_DIV: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + /* DIV(X, PWR2) => SHR */ + return IR_DIV_PWR2; + } + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_DIV_INT; + } else { + goto binop_fp; + } + break; + case IR_MOD: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (IR_IS_TYPE_UNSIGNED(insn->type) + && IR_IS_POWER_OF_TWO(op2_insn->val.u64) + && IR_IS_UNSIGNED_32BIT(op2_insn->val.u64 - 1)) { + /* MOD(X, PWR2) => AND */ + return IR_MOD_PWR2; + } + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_MOD_INT; + case IR_BSWAP: + case IR_NOT: + if (insn->type == IR_BOOL) { + IR_ASSERT(IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)); // TODO: IR_BOOL_NOT_FP + return IR_BOOL_NOT_INT; + } else { + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + return IR_OP_INT; + } + break; + case IR_NEG: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_OP_INT; + } else { + return IR_OP_FP; + } + case IR_ABS: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_ABS_INT; // movl %edi, %eax; negl %eax; cmovs %edi, %eax + } else { + return IR_OP_FP; + } + case IR_OR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } else if (op2_insn->val.i64 == -1) { + // -1 + } + } + goto binop_int; + case IR_AND: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + // 0 + } else if (op2_insn->val.i64 == -1) { + return IR_COPY_INT; + } + } + goto binop_int; + case IR_XOR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } + } + goto binop_int; + case IR_SHL: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } else if (ir_type_size[insn->type] >= 4) { + if (op2_insn->val.u64 == 1) { + // lea [op1*2] + } else if (op2_insn->val.u64 == 2) { + // lea [op1*4] + } else if (op2_insn->val.u64 == 3) { + // lea [op1*8] + } + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_SHR: + case IR_SAR: + case IR_ROL: + case IR_ROR: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_MIN: + case IR_MAX: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_MIN_MAX_INT; + } else { + goto binop_fp; + } + break; +// case IR_COND: + case IR_COPY: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_COPY_INT; + } else { + return IR_COPY_FP; + } + break; + case IR_CALL: + ctx->flags |= IR_HAS_CALLS; + IR_FALLTHROUGH; + case IR_TAILCALL: + if (ir_in_same_block(ctx, insn->op2)) { + ir_match_fuse_load(ctx, insn->op2, ref); + } + return insn->op; + case IR_VAR: + return IR_SKIPPED | IR_VAR; + case IR_PARAM: + return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; + case IR_ALLOCA: + /* alloca() may be use only in functions */ + if (ctx->flags & IR_FUNCTION) { + ctx->flags |= IR_USE_FRAME_POINTER | IR_HAS_ALLOCA; + } + return IR_ALLOCA; + case IR_VSTORE: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { + store_rule = IR_VSTORE_INT; + load_op = IR_VLOAD; +store_int: + if ((ctx->flags & IR_OPT_CODEGEN) + && ir_in_same_block(ctx, insn->op3) + && (ctx->use_lists[insn->op3].count == 1 || + (ctx->use_lists[insn->op3].count == 2 + && (ctx->ir_base[insn->op3].op == IR_ADD_OV || + ctx->ir_base[insn->op3].op == IR_SUB_OV)))) { + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + uint32_t rule = ctx->rules[insn->op3]; + + if (!rule) { + ctx->rules[insn->op3] = rule = ir_match_insn(ctx, insn->op3); + } + if ((rule == IR_BINOP_INT && op_insn->op != IR_MUL) || rule == IR_LEA_OB || rule == IR_LEA_IB) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = BINOP(l, _) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ + ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + if (ctx->ir_base[op_insn->op2].op == IR_LOAD) { + ir_match_fuse_addr(ctx, ctx->ir_base[op_insn->op2].op2); + ctx->rules[op_insn->op2] = IR_LOAD_INT; + } + ir_match_fuse_addr(ctx, insn->op2); + return IR_MEM_BINOP_INT; + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && insn->op1 == op_insn->op2 + && ctx->ir_base[op_insn->op2].op == load_op + && ctx->ir_base[op_insn->op2].op2 == insn->op2 + && ctx->use_lists[op_insn->op2].count == 2) { + /* l = LOAD(_, a) ... v = BINOP(_, l) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ + ir_swap_ops(op_insn); + ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + if (ctx->ir_base[op_insn->op2].op == IR_LOAD) { + ir_match_fuse_addr(ctx, ctx->ir_base[op_insn->op2].op2); + ctx->rules[op_insn->op2] = IR_LOAD_INT; + } + ir_match_fuse_addr(ctx, insn->op2); + return IR_MEM_BINOP_INT; + } + } else if (rule == IR_INC) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = INC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_INC */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_INC; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_INC; + } + } else if (rule == IR_DEC) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2){ + /* l = LOAD(_, a) ... v = DEC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DEC */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_DEC; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_DEC; + } + } else if (rule == IR_MUL_PWR2) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = MUL_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MUL_PWR2 */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_MUL_PWR2; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_MUL_PWR2; + } + } else if (rule == IR_DIV_PWR2) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = DIV_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DIV_PWR2 */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_DIV_PWR2; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_DIV_PWR2; + } + } else if (rule == IR_MOD_PWR2) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = MOD_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MOD_PWR2 */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_MOD_PWR2; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_MOD_PWR2; + } + } else if (rule == IR_SHIFT) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = SHIFT(l, _) ... STORE(l, a, v) => SKIP ... SKIP_SHIFT ... MEM_SHIFT */ + ctx->rules[insn->op3] = IR_FUSED | IR_SHIFT; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + if (ctx->ir_base[op_insn->op2].op == IR_LOAD) { + ir_match_fuse_addr(ctx, ctx->ir_base[op_insn->op2].op2); + ctx->rules[op_insn->op2] = IR_LOAD_INT; + } + return IR_MEM_SHIFT; + } + } else if (rule == IR_SHIFT_CONST) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = SHIFT(l, CONST) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_SHIFT_CONST */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_SHIFT_CONST; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_SHIFT_CONST; + } + } else if (rule == IR_OP_INT && op_insn->op != IR_BSWAP) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = OP(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_OP */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_OP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_OP_INT; + } + } + } + return store_rule; + } else { + return IR_VSTORE_FP; + } + break; + case IR_LOAD: + ir_match_fuse_addr(ctx, insn->op2); + if (IR_IS_TYPE_INT(insn->type)) { + return IR_LOAD_INT; + } else { + return IR_LOAD_FP; + } + break; + case IR_STORE: + ir_match_fuse_addr(ctx, insn->op2); + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { + store_rule = IR_STORE_INT; + load_op = IR_LOAD; + goto store_int; + } else { + return IR_STORE_FP; + } + break; + case IR_RLOAD: + if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { + return IR_SKIPPED | IR_RLOAD; + } + return IR_RLOAD; + case IR_RSTORE: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + if ((ctx->flags & IR_OPT_CODEGEN) + && ir_in_same_block(ctx, insn->op2) + && ctx->use_lists[insn->op2].count == 1 + && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + + if (op_insn->op == IR_ADD || + op_insn->op == IR_SUB || +// op_insn->op == IR_MUL || + op_insn->op == IR_OR || + op_insn->op == IR_AND || + op_insn->op == IR_XOR) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == IR_RLOAD + && ctx->ir_base[op_insn->op1].op2 == insn->op3 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = RLOAD(r) ... v = BINOP(l, _) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && insn->op1 == op_insn->op2 + && ctx->ir_base[op_insn->op2].op == IR_RLOAD + && ctx->ir_base[op_insn->op2].op2 == insn->op3 + && ctx->use_lists[op_insn->op2].count == 2) { + /* l = RLOAD(r) ... v = BINOP(x, l) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ + ir_swap_ops(op_insn); + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } + } + } + } + if (ir_in_same_block(ctx, insn->op2)) { + ir_match_fuse_load(ctx, insn->op2, ref); + } + return IR_RSTORE; + case IR_START: + case IR_BEGIN: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_UNREACHABLE: + return IR_SKIPPED | insn->op; + case IR_RETURN: + if (!insn->op2) { + return IR_RETURN_VOID; + } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + return IR_RETURN_INT; + } else { + return IR_RETURN_FP; + } + case IR_IF: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + if (IR_IS_CONST_REF(op2_insn->op2) + && ctx->ir_base[op2_insn->op2].val.i64 == 0 + && op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ + ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; + + if (op1_insn->op == IR_ADD || + op1_insn->op == IR_SUB || +// op1_insn->op == IR_MUL || + op1_insn->op == IR_OR || + op1_insn->op == IR_AND || + op1_insn->op == IR_XOR) { + + if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); + } else { + ir_match_fuse_load(ctx, op1_insn->op2, ref); + } + if (op1_insn->op == IR_AND && ctx->use_lists[op2_insn->op1].count == 1) { + /* v = AND(_, _); c = CMP(v, 0) ... IF(c) => SKIP_TEST; SKIP ... TEST_AND_BRANCH */ + if (IR_IS_CONST_REF(op1_insn->op2)) { + ir_match_fuse_load(ctx, op1_insn->op1, ref); + } + ctx->rules[op2_insn->op1] = IR_FUSED | IR_TEST_INT; + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_NOP; + return IR_TEST_AND_BRANCH_INT; + } else { + /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */ + ctx->rules[op2_insn->op1] = IR_BINOP_INT; + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_JCC_INT; + } + } + } + /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ + ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_CMP_AND_BRANCH_INT; + } else { + /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ + ir_match_fuse_load_cmp_fp(ctx, op2_insn, ref, 1); + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_CMP_AND_BRANCH_FP; + } + } else if (op2_insn->op == IR_AND) { + /* c = AND(_, _) ... IF(c) => SKIP_TEST ... TEST_AND_BRANCH */ + ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); + if (IR_IS_CONST_REF(op2_insn->op2)) { + ir_match_fuse_load(ctx, op2_insn->op1, ref); + } + ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; + return IR_TEST_AND_BRANCH_INT; + } else if (op2_insn->op == IR_OVERFLOW) { + /* c = OVERFLOW(_) ... IF(c) => SKIP_OVERFLOW ... OVERFLOW_AND_BRANCH */ + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_OVERFLOW_AND_BRANCH; + } + } + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + if (insn->op2 == ref - 1 /* previous instruction */ + && ir_in_same_block(ctx, insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op == IR_ADD || + op2_insn->op == IR_SUB || +// op2_insn->op == IR_MUL || + op2_insn->op == IR_OR || + op2_insn->op == IR_AND || + op2_insn->op == IR_XOR) { + + /* v = BINOP(_, _); IF(v) => BINOP; JCC */ + if (ir_op_flags[op2_insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); + } else { + ir_match_fuse_load(ctx, op2_insn->op2, ref); + } + ctx->rules[insn->op2] = IR_BINOP_INT; + return IR_JCC_INT; + } + } else if ((ctx->flags & IR_OPT_CODEGEN) + && insn->op1 == ref - 1 /* previous instruction */ + && insn->op2 == ref - 2 /* previous instruction */ + && ir_in_same_block(ctx, insn->op2) + && ctx->use_lists[insn->op2].count == 2 + && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + ir_insn *store_insn = &ctx->ir_base[insn->op1]; + + if (store_insn->op == IR_STORE && store_insn->op3 == insn->op2) { + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + + if (op_insn->op == IR_ADD || + op_insn->op == IR_SUB || +// op_insn->op == IR_MUL || + op_insn->op == IR_OR || + op_insn->op == IR_AND || + op_insn->op == IR_XOR) { + if (ctx->ir_base[op_insn->op1].op == IR_LOAD + && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { + if (ir_in_same_block(ctx, op_insn->op1) + && ctx->use_lists[op_insn->op1].count == 2 + && store_insn->op1 == op_insn->op1) { + /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; + ir_match_fuse_addr(ctx, store_insn->op2); + ctx->rules[insn->op1] = IR_MEM_BINOP_INT; + return IR_JCC_INT; + } + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && ctx->ir_base[op_insn->op2].op == IR_LOAD + && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { + if (ir_in_same_block(ctx, op_insn->op2) + && ctx->use_lists[op_insn->op2].count == 2 + && store_insn->op1 == op_insn->op2) { + /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ + ir_swap_ops(op_insn); + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; + ir_match_fuse_addr(ctx, store_insn->op2); + ctx->rules[insn->op1] = IR_MEM_BINOP_INT; + return IR_JCC_INT; + } + } + } + } + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_IF_INT; + } else { + IR_ASSERT(0 && "NIY IR_IF_FP"); + break; + } + case IR_GUARD: + case IR_GUARD_NOT: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT + // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP + && (insn->op2 == ref - 1 || + (insn->op2 == ctx->prev_ref[ref] - 1 + && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + if (IR_IS_CONST_REF(op2_insn->op2) + && ctx->ir_base[op2_insn->op2].val.i64 == 0) { + if (op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ + ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; + + if (op1_insn->op == IR_ADD || + op1_insn->op == IR_SUB || +// op1_insn->op == IR_MUL || + op1_insn->op == IR_OR || + op1_insn->op == IR_AND || + op1_insn->op == IR_XOR) { + + if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); + } else { + ir_match_fuse_load(ctx, op1_insn->op2, ref); + } + /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */ + ctx->rules[op2_insn->op1] = IR_BINOP_INT; + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_GUARD_JCC_INT; + } + } else if ((ctx->flags & IR_OPT_CODEGEN) + && op2_insn->op1 == insn->op2 - 2 /* before previous instruction */ + && ir_in_same_block(ctx, op2_insn->op1) + && ctx->use_lists[op2_insn->op1].count == 2) { + ir_insn *store_insn = &ctx->ir_base[insn->op2 - 1]; + + if (store_insn->op == IR_STORE && store_insn->op3 == op2_insn->op1) { + ir_insn *op_insn = &ctx->ir_base[op2_insn->op1]; + + if (op_insn->op == IR_ADD || + op_insn->op == IR_SUB || +// op_insn->op == IR_MUL || + op_insn->op == IR_OR || + op_insn->op == IR_AND || + op_insn->op == IR_XOR) { + if (ctx->ir_base[op_insn->op1].op == IR_LOAD + && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { + if (ir_in_same_block(ctx, op_insn->op1) + && ctx->use_lists[op_insn->op1].count == 2 + && store_insn->op1 == op_insn->op1) { + /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; GUARD_JCC */ + ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; + ir_match_fuse_addr(ctx, store_insn->op2); + ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; + ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; + return IR_GUARD_JCC_INT; + } + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && ctx->ir_base[op_insn->op2].op == IR_LOAD + && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { + if (ir_in_same_block(ctx, op_insn->op2) + && ctx->use_lists[op_insn->op2].count == 2 + && store_insn->op1 == op_insn->op2) { + /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ + ir_swap_ops(op_insn); + ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; + ir_match_fuse_addr(ctx, store_insn->op2); + ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; + ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; + return IR_GUARD_JCC_INT; + } + } + } + } + } + } + /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ + ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_GUARD_CMP_INT; + } else { + /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ + ir_match_fuse_load_cmp_fp(ctx, op2_insn, ref, insn->op == IR_GUARD_NOT); + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_GUARD_CMP_FP; + } + } else if (op2_insn->op == IR_AND) { // TODO: OR, XOR. etc + /* c = AND(_, _) ... GUARD(c) => SKIP_TEST ... GUARD_TEST */ + ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); + if (IR_IS_CONST_REF(op2_insn->op2)) { + ir_match_fuse_load(ctx, op2_insn->op1, ref); + } + ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; + return IR_GUARD_TEST_INT; + } else if (op2_insn->op == IR_OVERFLOW) { + /* c = OVERFLOW(_) ... GUARD(c) => SKIP_OVERFLOW ... GUARD_OVERFLOW */ + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_GUARD_OVERFLOW; + } + } + ir_match_fuse_load(ctx, insn->op2, ref); + return insn->op; + case IR_IJMP: + if (ir_in_same_block(ctx, insn->op2)) { + ir_match_fuse_load(ctx, insn->op2, ref); + } + return insn->op; + case IR_SEXT: + case IR_ZEXT: + case IR_BITCAST: + case IR_INT2FP: + case IR_FP2INT: + case IR_FP2FP: + ir_match_fuse_load(ctx, insn->op1, ref); + return insn->op; + default: + break; + } + + return insn->op; +} + +static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) +{ + if (rule == IR_LEA_IB) { + ir_insn *insn = &ctx->ir_base[ref]; + + if (insn->op1 == insn->op2) { + /* pass */ + } else if (ir_match_fuse_load(ctx, insn->op2, ref) || + (ctx->ir_base[insn->op2].op == IR_PARAM + && ctx->use_lists[insn->op2].count == 1 + && ir_get_param_reg(ctx, insn->op2) == IR_REG_NONE)) { + ctx->rules[ref] = IR_BINOP_INT; + } else if (ir_match_fuse_load(ctx, insn->op1, ref) || + (ctx->ir_base[insn->op1].op == IR_PARAM + && ctx->use_lists[insn->op1].count == 1 + && ir_get_param_reg(ctx, insn->op1) == IR_REG_NONE)) { + /* swap for better load fusion */ + ir_swap_ops(insn); + ctx->rules[ref] = IR_BINOP_INT; + } + } +} + +/* code generation */ +static int32_t ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + int32_t offset; + + IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; + IR_ASSERT(offset != -1); + if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + IR_ASSERT(ctx->spill_base != IR_REG_NONE); + *reg = ctx->spill_base; + return offset; + } + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(offset); +} + +static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg reg, int32_t offset) +{ + ir_reg fp; + + return ir_ref_spill_slot(ctx, ref, &fp) == offset && reg == fp; +} + +static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + ir_insn *var_insn = &ctx->ir_base[ref]; + + IR_ASSERT(var_insn->op == IR_VAR); + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(var_insn->op3); +} + +static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_live_interval *ival; + + IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + ival = ctx->live_intervals[ctx->vregs[ref]]; + while (ival) { + ir_use_pos *use_pos = ival->use_pos; + while (use_pos) { + if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { + return !use_pos->next || use_pos->next->op_num == 0; + } + use_pos = use_pos->next; + } + ival = ival->next; + } + return 0; +} + +static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (val == 0) { + | ASM_REG_REG_OP xor, type, reg, reg + } else if (ir_type_size[type] == 8) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (IR_IS_UNSIGNED_32BIT(val)) { + | mov Rd(reg), (uint32_t)val // zero extended load + } else if (IR_IS_SIGNED_32BIT(val)) { + | mov Rq(reg), (int32_t)val // sign extended load + } else { + | mov64 Ra(reg), val + } +|.endif + } else { + | ASM_REG_IMM_OP mov, type, reg, (int32_t)val // sign extended load + } +} + +static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (base_reg != IR_REG_NONE) { + | ASM_REG_MEM_OP mov, type, reg, [Ra(base_reg)+offset] + } else { + | ASM_REG_MEM_OP mov, type, reg, [offset] + } +} + +static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *insn = &ctx->ir_base[src]; + int label; + + if (type == IR_FLOAT && insn->val.u32 == 0) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) + } else { + | xorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) + } + } else if (type == IR_DOUBLE && insn->val.u64 == 0) { + if (ctx->mflags & IR_X86_AVX) { + | vxorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) + } else { + | xorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) + } + } else { + label = ctx->cfg_blocks_count - src; + insn->const_flags |= IR_CONST_EMIT; + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, reg, [=>label] + } +} + +static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (base_reg != IR_REG_NONE) { + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, reg, [Ra(base_reg)+offset] + } else { + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, reg, [Ra(base_reg)+offset] + } +} + +static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + int32_t offset; + ir_reg fp; + + if (IR_IS_CONST_REF(src)) { + if (IR_IS_TYPE_INT(type)) { + ir_insn *insn = &ctx->ir_base[src]; + + IR_ASSERT(insn->op != IR_STR && insn->op != IR_SYM && insn->op != IR_FUNC); + ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); + } else { + ir_emit_load_imm_fp(ctx, type, reg, src); + } + } else { + offset = ir_ref_spill_slot(ctx, src, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, reg, fp, offset); + } + } +} + +static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + | ASM_MEM_REG_OP mov, type, [Ra(base_reg)+offset], reg +} + +static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [Ra(base_reg)+offset], reg +} + +static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) +{ + int32_t offset; + ir_reg fp; + + IR_ASSERT(dst >= 0); + offset = ir_ref_spill_slot(ctx, dst, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, fp, offset, reg); + } else { + ir_emit_store_mem_fp(ctx, type, fp, offset, reg); + } +} + +static void ir_emit_store_imm(ir_ctx *ctx, ir_type type, ir_ref dst, int32_t imm) +{ + int32_t offset; + ir_reg fp; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(dst >= 0); + IR_ASSERT(IR_IS_TYPE_INT(type)); + offset = ir_ref_spill_slot(ctx, dst, &fp); + + | ASM_MEM_IMM_OP mov, type, [Ra(fp)+offset], imm +} + +static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + | ASM_REG_REG_OP mov, type, dst, src +} + +static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, dst, src +} + +static int32_t ir_fuse_addr(ir_ctx *ctx, ir_ref ref, ir_reg *preg) +{ + ir_insn *addr_insn = &ctx->ir_base[ref]; + ir_reg reg; + + IR_ASSERT(addr_insn->op == IR_ADD); + IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2)); + reg = ctx->regs[ref][1]; + if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1); + } + *preg = reg; + return ctx->ir_base[addr_insn->op2].val.i32; +} + +static int32_t ir_fuse_load(ir_ctx *ctx, ir_ref ref, ir_reg *preg) +{ + ir_insn *load_insn = &ctx->ir_base[ref]; + ir_reg reg = ctx->regs[ref][2]; + + IR_ASSERT(load_insn->op == IR_LOAD); + if (IR_IS_CONST_REF(load_insn->op2)) { + *preg = reg; + if (reg == IR_REG_NONE) { + ir_insn *addr_insn = &ctx->ir_base[load_insn->op2]; + + IR_ASSERT(addr_insn->op == IR_C_ADDR); + IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)); + return addr_insn->val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, load_insn->op2); + return 0; + } + } else if (reg == IR_REG_NONE) { + return ir_fuse_addr(ctx, load_insn->op2, preg); + } else { + if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, load_insn->op2); + } + *preg = reg; + return 0; + } +} + +static void ir_emit_prologue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + | push Ra(IR_REG_RBP) + | mov Ra(IR_REG_RBP), Ra(IR_REG_RSP) + } + if (ctx->stack_frame_size + ctx->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | sub Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size) + } + } + if (ctx->used_preserved_regs) { + int offset; + uint32_t i; + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = 0; + } else { + offset = ctx->stack_frame_size + ctx->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(used_preserved_regs, i)) { + if (i < IR_REG_FP_FIRST) { + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + offset -= sizeof(void*); + | mov aword [Ra(fp)+offset], Ra(i) + } else { + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + offset -= sizeof(void*); + if (ctx->mflags & IR_X86_AVX) { + | vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) + } else { + | movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) + } + } + } + } + } +} + +static void ir_emit_epilogue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ctx->used_preserved_regs) { + int offset; + uint32_t i; + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = 0; + } else { + offset = ctx->stack_frame_size + ctx->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(used_preserved_regs, i)) { + if (i < IR_REG_FP_FIRST) { + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + offset -= sizeof(void*); + | mov Ra(i), aword [Ra(fp)+offset] + } else { + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + offset -= sizeof(void*); + if (ctx->mflags & IR_X86_AVX) { + | vmovsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] + } else { + | movsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] + } + } + } + } + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + | mov Ra(IR_REG_RSP), Ra(IR_REG_RBP) + | pop Ra(IR_REG_RBP) + } else if (ctx->stack_frame_size + ctx->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | add Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size) + } + } +} + +static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + if (op1 == op2) { + op2_reg = def_reg; + } + } + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + | ASM_REG_REG_OP add, type, def_reg, op2_reg + break; + case IR_SUB: + case IR_SUB_OV: + | ASM_REG_REG_OP sub, type, def_reg, op2_reg + break; + case IR_MUL: + case IR_MUL_OV: + | ASM_REG_REG_IMUL type, def_reg, op2_reg + break; + case IR_OR: + | ASM_REG_REG_OP or, type, def_reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_OP and, type, def_reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_OP xor, type, def_reg, op2_reg + break; + } + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int32_t val; + + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + val = val_insn->val.i32; + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + | ASM_REG_IMM_OP add, type, def_reg, val + break; + case IR_SUB: + case IR_SUB_OV: + | ASM_REG_IMM_OP sub, type, def_reg, val + break; + case IR_MUL: + case IR_MUL_OV: + | ASM_REG_IMM_IMUL type, def_reg, val + break; + case IR_OR: + | ASM_REG_IMM_OP or, type, def_reg, val + break; + case IR_AND: + | ASM_REG_IMM_OP and, type, def_reg, val + break; + case IR_XOR: + | ASM_REG_IMM_OP xor, type, def_reg, val + break; + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + if (op2_reg != IR_REG_NONE) { + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + | ASM_REG_MEM_OP add, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_SUB: + case IR_SUB_OV: + | ASM_REG_MEM_OP sub, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_MUL: + case IR_MUL_OV: + | ASM_REG_MEM_IMUL type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_OR: + | ASM_REG_MEM_OP or, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_AND: + | ASM_REG_MEM_OP and, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_XOR: + | ASM_REG_MEM_OP xor, type, def_reg, [Ra(op2_reg)+offset] + break; + } + } else { + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + | ASM_REG_MEM_OP add, type, def_reg, [offset] + break; + case IR_SUB: + case IR_SUB_OV: + | ASM_REG_MEM_OP sub, type, def_reg, [offset] + break; + case IR_MUL: + case IR_MUL_OV: + | ASM_REG_MEM_IMUL type, def_reg, [offset] + break; + case IR_OR: + | ASM_REG_MEM_OP or, type, def_reg, [offset] + break; + case IR_AND: + | ASM_REG_MEM_OP and, type, def_reg, [offset] + break; + case IR_XOR: + | ASM_REG_MEM_OP xor, type, def_reg, [offset] + break; + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_imul3(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_insn *val_insn = &ctx->ir_base[op2]; + int32_t val; + + IR_ASSERT(def_reg != IR_REG_NONE); + IR_ASSERT(!IR_IS_CONST_REF(op1)); + IR_ASSERT(IR_IS_CONST_REF(op2)); + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + val = val_insn->val.i32; + + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 2: + | imul Rw(def_reg), Rw(op1_reg), val + break; + case 4: + | imul Rd(def_reg), Rd(op1_reg), val + break; +|.if X64 +|| case 8: +| imul Rq(def_reg), Rq(op1_reg), val +|| break; +|.endif + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, op1, &op1_reg); + } + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 2: + | imul Rw(def_reg), word [Ra(op1_reg)+offset], val + break; + case 4: + | imul Rd(def_reg), dword [Ra(op1_reg)+offset], val + break; +|.if X64 +|| case 8: +| imul Rq(def_reg), qword [Ra(op1_reg)+offset], val +|| break; +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + + if (op1 == op2) { + return; + } + + | ASM_REG_REG_OP cmp, type, def_reg, op2_reg + if (insn->op == IR_MIN) { + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_OP2 cmovg, type, def_reg, op2_reg + } else { + | ASM_REG_REG_OP2 cmova, type, def_reg, op2_reg + } + } else { + IR_ASSERT(insn->op == IR_MAX); + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_OP2 cmova, type, def_reg, op2_reg + } else { + | ASM_REG_REG_OP2 cmovg, type, def_reg, op2_reg + } + } + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_type type = ctx->ir_base[insn->op1].type; + + IR_ASSERT(def_reg != IR_REG_NONE); + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (IR_IS_TYPE_SIGNED(type)) { + | seto Rb(def_reg) + } else { + | setc Rb(def_reg) + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; + ir_type type = ctx->ir_base[overflow_insn->op1].type; + uint32_t true_block, false_block, next_block; + bool reverse = 0; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + reverse = 1; + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (IR_IS_TYPE_SIGNED(type)) { + if (reverse) { + | jno =>true_block + } else { + | jo =>true_block + } + } else { + if (reverse) { + | jnc =>true_block + } else { + | jc =>true_block + } + } + if (false_block) { + | jmp =>false_block + } +} + +static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + ir_ref op2 = op_insn->op2; + ir_reg op2_reg = ctx->regs[insn->op3][2]; + ir_reg reg; + int32_t offset = 0; + + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + if (op2_reg == IR_REG_NONE) { + ir_val *val = &ctx->ir_base[op2].val; + + IR_ASSERT(IR_IS_CONST_REF(op2) && (ir_type_size[type] != 8 || IR_IS_32BIT(type, ctx->ir_base[op2].val))); + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP add, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP add, type, [offset], val->i32 + } + break; + case IR_SUB: + case IR_SUB_OV: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP sub, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP sub, type, [offset], val->i32 + } + break; + case IR_OR: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP or, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP or, type, [offset], val->i32 + } + break; + case IR_AND: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP and, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP and, type, [offset], val->i32 + } + break; + case IR_XOR: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP xor, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP xor, type, [offset], val->i32 + } + break; + } + } else { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP add, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP add, type, [offset], op2_reg + } + break; + case IR_SUB: + case IR_SUB_OV: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP sub, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP sub, type, [offset], op2_reg + } + break; + case IR_OR: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP or, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP or, type, [offset], op2_reg + } + break; + case IR_AND: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP and, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP and, type, [offset], op2_reg + } + break; + case IR_XOR: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP xor, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP xor, type, [offset], op2_reg + } + break; + } + } +} + +static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + ir_type type = op_insn->type; + ir_ref op2 = op_insn->op2; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + ir_reg reg; + + IR_ASSERT(insn->op == IR_RSTORE); + reg = insn->op3; + + if (op2_reg == IR_REG_NONE) { + ir_val *val = &ctx->ir_base[op2].val; + + IR_ASSERT(IR_IS_CONST_REF(op2) && (ir_type_size[type] != 8 || IR_IS_32BIT(type, ctx->ir_base[op2].val))); + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_IMM_OP add, type, reg, val->i32 + break; + case IR_SUB: + | ASM_REG_IMM_OP sub, type, reg, val->i32 + break; + case IR_OR: + | ASM_REG_IMM_OP or, type, reg, val->i32 + break; + case IR_AND: + | ASM_REG_IMM_OP and, type, reg, val->i32 + break; + case IR_XOR: + | ASM_REG_IMM_OP xor, type, reg, val->i32 + break; + } + } else { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_OP add, type, reg, op2_reg + break; + case IR_SUB: + | ASM_REG_REG_OP sub, type, reg, op2_reg + break; + case IR_OR: + | ASM_REG_REG_OP or, type, reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_OP and, type, reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_OP xor, type, reg, op2_reg + break; + } + } +} + +static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + if (insn->op == IR_MUL) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + if (shift == 1) { + | ASM_REG_REG_OP add, insn->type, def_reg, def_reg + } else { + | ASM_REG_IMM_OP shl, insn->type, def_reg, shift + } + } else if (insn->op == IR_DIV) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + | ASM_REG_IMM_OP shr, insn->type, def_reg, shift + } else { + IR_ASSERT(insn->op == IR_MOD); + uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; + IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask)); + | ASM_REG_IMM_OP and, insn->type, def_reg, mask + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + ir_reg reg; + int32_t offset = 0; + + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + if (op_insn->op == IR_MUL) { + uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); + | ASM_MEM_IMM_OP shl, type, [Ra(reg)+offset], shift + } else if (op_insn->op == IR_DIV) { + uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); + | ASM_MEM_IMM_OP shr, type, [Ra(reg)+offset], shift + } else { + IR_ASSERT(op_insn->op == IR_MOD); + uint64_t mask = ctx->ir_base[op_insn->op2].val.u64 - 1; + IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask)); + | ASM_MEM_IMM_OP and, type, [Ra(reg)+offset], mask + } +} + +static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && def_reg != IR_REG_RCX); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg != IR_REG_RCX) { + if (op1_reg == IR_REG_RCX) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + op1_reg = def_reg; + } + if (op2_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_RCX, insn->op2); + } + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, insn->op1); + } + } + switch (insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_REG_TXT_OP shl, insn->type, def_reg, cl + break; + case IR_SHR: + | ASM_REG_TXT_OP shr, insn->type, def_reg, cl + break; + case IR_SAR: + | ASM_REG_TXT_OP sar, insn->type, def_reg, cl + break; + case IR_ROL: + | ASM_REG_TXT_OP rol, insn->type, def_reg, cl + break; + case IR_ROR: + | ASM_REG_TXT_OP ror, insn->type, def_reg, cl + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + ir_ref op2 = op_insn->op2; + ir_reg op2_reg = ctx->regs[insn->op3][2]; + ir_reg reg; + int32_t offset = 0; + + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + if (op2_reg != IR_REG_RCX) { + if (op2_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_RCX, op2); + } + } + switch (op_insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_MEM_TXT_OP shl, type, [Ra(reg)+offset], cl + break; + case IR_SHR: + | ASM_MEM_TXT_OP shr, type, [Ra(reg)+offset], cl + break; + case IR_SAR: + | ASM_MEM_TXT_OP sar, type, [Ra(reg)+offset], cl + break; + case IR_ROL: + | ASM_MEM_TXT_OP rol, type, [Ra(reg)+offset], cl + break; + case IR_ROR: + | ASM_MEM_TXT_OP ror, type, [Ra(reg)+offset], cl + break; + } +} + +static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int32_t shift; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); + shift = ctx->ir_base[insn->op2].val.i32; + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + switch (insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_REG_IMM_OP shl, insn->type, def_reg, shift + break; + case IR_SHR: + | ASM_REG_IMM_OP shr, insn->type, def_reg, shift + break; + case IR_SAR: + | ASM_REG_IMM_OP sar, insn->type, def_reg, shift + break; + case IR_ROL: + | ASM_REG_IMM_OP rol, insn->type, def_reg, shift + break; + case IR_ROR: + | ASM_REG_IMM_OP ror, insn->type, def_reg, shift + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + int32_t shift; + ir_reg reg; + int32_t offset = 0; + + IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[op_insn->op2].val.i64)); + shift = ctx->ir_base[op_insn->op2].val.i32; + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + switch (op_insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_MEM_IMM_OP shl, type, [Ra(reg)+offset], shift + break; + case IR_SHR: + | ASM_MEM_IMM_OP shr, type, [Ra(reg)+offset], shift + break; + case IR_SAR: + | ASM_MEM_IMM_OP sar, type, [Ra(reg)+offset], shift + break; + case IR_ROL: + | ASM_MEM_IMM_OP rol, type, [Ra(reg)+offset], shift + break; + case IR_ROR: + | ASM_MEM_IMM_OP ror, type, [Ra(reg)+offset], shift + break; + } +} + +static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + if (insn->op == IR_ADD) { + | ASM_REG_OP inc, insn->type, def_reg + } else if (insn->op == IR_SUB) { + | ASM_REG_OP dec, insn->type, def_reg + } else if (insn->op == IR_NOT) { + | ASM_REG_OP not, insn->type, def_reg + } else if (insn->op == IR_NEG) { + | ASM_REG_OP neg, insn->type, def_reg + } else { + IR_ASSERT(insn->op == IR_BSWAP); + switch (ir_type_size[insn->type]) { + default: + IR_ASSERT(0); + case 4: + | bswap Rd(def_reg) + break; + case 8: + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | bswap Rq(def_reg) +|.endif + break; + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + ir_reg reg; + int32_t offset = 0; + + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + if (op_insn->op == IR_ADD) { + | ASM_MEM_OP inc, type, [Ra(reg)+offset] + } else if (insn->op == IR_SUB) { + | ASM_MEM_OP dec, type, [Ra(reg)+offset] + } else if (insn->op == IR_NOT) { + | ASM_MEM_OP not, type, [Ra(reg)+offset] + } else { + IR_ASSERT(insn->op == IR_NEG); + | ASM_MEM_OP neg, type, [Ra(reg)+offset] + } +} + +static void ir_emit_abs_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + + IR_ASSERT(def_reg != op1_reg); + + ir_emit_mov(ctx, insn->type, def_reg, op1_reg); + | ASM_REG_OP neg, insn->type, def_reg + | ASM_REG_REG_OP2, cmovs, type, def_reg, op1_reg + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_bool_not_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[insn->op1].type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + + if (op1_reg != IR_REG_NONE) { + | ASM_REG_REG_OP test, type, op1_reg, op1_reg + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, op1, &fp); + + | ASM_MEM_IMM_OP cmp, type, [Ra(fp)+offset], 0 + } + | sete Rb(def_reg) + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mul_div_mod(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + int32_t offset = 0; + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op1_reg != IR_REG_RAX) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, IR_REG_RAX, op1_reg); + } else { + ir_emit_load(ctx, type, IR_REG_RAX, op1); + } + } + if (op2_reg == IR_REG_NONE && op1 == op2) { + op2_reg = IR_REG_RAX; + } else if (IR_IS_CONST_REF(op2)) { + if (insn->op == IR_MUL || insn->op == IR_MUL_OV) { + op2_reg = IR_REG_RDX; + } else { + IR_ASSERT(op2_reg != IR_REG_NONE); + } + ir_emit_load(ctx, type, op2_reg, op2); + } + if (insn->op == IR_MUL || insn->op == IR_MUL_OV) { + IR_ASSERT(!IR_IS_TYPE_SIGNED(insn->type)); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + | ASM_REG_OP mul, type, op2_reg + } else { + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_MEM_OP mul, type, [Ra(op2_reg)+offset] + } + } else { + if (IR_IS_TYPE_SIGNED(type)) { + if (ir_type_size[type] == 8) { + | cqo + } else if (ir_type_size[type] == 4) { + | cdq + } else if (ir_type_size[type] == 2) { + | cwd + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + | ASM_REG_OP idiv, type, op2_reg + } else { + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_MEM_OP idiv, type, [Ra(op2_reg)+offset] + } + } else { + | ASM_REG_REG_OP xor, type, IR_REG_RDX, IR_REG_RDX + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + | ASM_REG_OP div, type, op2_reg + } else { + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_MEM_OP div, type, [Ra(op2_reg)+offset] + } + } + } + + if (insn->op == IR_MUL || insn->op == IR_MUL_OV || insn->op == IR_DIV) { + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_RAX) { + ir_emit_mov(ctx, type, def_reg, IR_REG_RAX); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + } else { + ir_emit_store(ctx, type, def, IR_REG_RAX); + } + } else { + IR_ASSERT(insn->op == IR_MOD); + if (ir_type_size[type] == 1) { + if (def_reg != IR_REG_NONE) { + | mov al, ah + if (def_reg != IR_REG_RAX) { + | mov Rb(def_reg), al + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, def, &fp); + + | mov byte [Ra(fp)+offset], ah + } + } else { + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_RDX) { + ir_emit_mov(ctx, type, def_reg, IR_REG_RDX); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + } else { + ir_emit_store(ctx, type, def, IR_REG_RDX); + } + } + } +} + +static void ir_rodata(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + |.rodata + if (!data->rodata_label) { + int label = data->rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + |=>label: + } +} + +static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + if (insn->op == IR_NEG) { + if (insn->type == IR_DOUBLE) { + if (!data->double_neg_const) { + data->double_neg_const = 1; + ir_rodata(ctx); + |.align 16 + |->double_neg_const: + |.dword 0, 0x80000000, 0, 0 + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vxorpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] + } else { + | xorpd xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] + } + } else { + IR_ASSERT(insn->type == IR_FLOAT); + if (!data->float_neg_const) { + data->float_neg_const = 1; + ir_rodata(ctx); + |.align 16 + |->float_neg_const: + |.dword 0x80000000, 0, 0, 0 + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] + } else { + | xorps xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] + } + } + } else { + IR_ASSERT(insn->op == IR_ABS); + if (insn->type == IR_DOUBLE) { + if (!data->double_abs_const) { + data->double_abs_const = 1; + ir_rodata(ctx); + |.align 16 + |->double_abs_const: + |.dword 0xffffffff, 0x7fffffff, 0, 0 + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vandpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] + } else { + | andpd xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] + } + } else { + IR_ASSERT(insn->type == IR_FLOAT); + if (!data->float_abs_const) { + data->float_abs_const = 1; + ir_rodata(ctx); + |.align 16 + |->float_abs_const: + |.dword 0x7fffffff, 0, 0, 0 + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vandps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] + } else { + | andps xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + if (op1 == op2) { + op2_reg = def_reg; + } + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_SSE2_REG_REG_OP addss, addsd, type, def_reg, op2_reg + break; + case IR_SUB: + | ASM_SSE2_REG_REG_OP subss, subsd, type, def_reg, op2_reg + break; + case IR_MUL: + | ASM_SSE2_REG_REG_OP mulss, mulsd, type, def_reg, op2_reg + break; + case IR_DIV: + | ASM_SSE2_REG_REG_OP divss, divsd, type, def_reg, op2_reg + break; + case IR_MIN: + | ASM_SSE2_REG_REG_OP minss, minsd, type, def_reg, op2_reg + break; + case IR_MAX: + | ASM_SSE2_REG_REG_OP maxss, maxsd, type, def_reg, op2_reg + break; + } + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int label = ctx->cfg_blocks_count - op2; + + val_insn->const_flags |= IR_CONST_EMIT; + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [=>label] + break; + case IR_SUB: + | ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [=>label] + break; + case IR_MUL: + | ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [=>label] + break; + case IR_DIV: + | ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [=>label] + break; + case IR_MIN: + | ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [=>label] + break; + case IR_MAX: + | ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [=>label] + break; + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_SUB: + | ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_MUL: + | ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_DIV: + | ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_MIN: + | ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_MAX: + | ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [Ra(op2_reg)+offset] + break; + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_AVX_REG_REG_REG_OP vaddss, vaddsd, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB: + | ASM_AVX_REG_REG_REG_OP vsubss, vsubsd, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL: + | ASM_AVX_REG_REG_REG_OP vmulss, vmulsd, type, def_reg, op1_reg, op2_reg + break; + case IR_DIV: + | ASM_AVX_REG_REG_REG_OP vdivss, vdivsd, type, def_reg, op1_reg, op2_reg + break; + case IR_MIN: + | ASM_AVX_REG_REG_REG_OP vminss, vminsd, type, def_reg, op1_reg, op2_reg + break; + case IR_MAX: + | ASM_AVX_REG_REG_REG_OP vmaxss, vmaxsd, type, def_reg, op1_reg, op2_reg + break; + } + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int label = ctx->cfg_blocks_count - op2; + + val_insn->const_flags |= IR_CONST_EMIT; + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [=>label] + break; + case IR_SUB: + | ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [=>label] + break; + case IR_MUL: + | ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [=>label] + break; + case IR_DIV: + | ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [=>label] + break; + case IR_MIN: + | ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [=>label] + break; + case IR_MAX: + | ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [=>label] + break; + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_SUB: + | ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_MUL: + | ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_DIV: + | ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_MIN: + | ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_MAX: + | ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_insn *insn, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (op1_reg != IR_REG_NONE) { + if (op2_reg != IR_REG_NONE) { + | ASM_REG_REG_OP cmp, type, op1_reg, op2_reg + } else if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + | ASM_REG_REG_OP test, type, op1_reg, op1_reg + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + | ASM_REG_IMM_OP cmp, type, op1_reg, val_insn->val.i32 + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + if (op2_reg != IR_REG_NONE) { + | ASM_REG_MEM_OP cmp, type, op1_reg, [Ra(op2_reg)+offset] + } else { + | ASM_REG_MEM_OP cmp, type, op1_reg, [offset] + } + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + if (op2_reg != IR_REG_NONE) { + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_REG_OP cmp, type, [offset], op2_reg + } else { + | ASM_MEM_REG_OP cmp, type, [Ra(op1_reg)+offset], op2_reg + } + } else { + IR_ASSERT(!IR_IS_CONST_REF(op1)); + IR_ASSERT(IR_IS_CONST_REF(op2)); + IR_ASSERT(IR_IS_32BIT(ctx->ir_base[op2].type, ctx->ir_base[op2].val)); + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_IMM_OP cmp, type, [offset], ctx->ir_base[op2].val.i32 + } else { + | ASM_MEM_IMM_OP cmp, type, [Ra(op1_reg)+offset], ctx->ir_base[op2].val.i32 + } + } + } +} + +static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | sete Rb(def_reg) + break; + case IR_NE: + | setne Rb(def_reg) + break; + case IR_LT: + | setl Rb(def_reg) + break; + case IR_GE: + | setge Rb(def_reg) + break; + case IR_LE: + | setle Rb(def_reg) + break; + case IR_GT: + | setg Rb(def_reg) + break; + case IR_ULT: + | setb Rb(def_reg) + break; + case IR_UGE: + | setae Rb(def_reg) + break; + case IR_ULE: + | setbe Rb(def_reg) + break; + case IR_UGT: + | seta Rb(def_reg) + break; + } +} + +static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[insn->op1].type; + ir_op op = insn->op; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && (IR_IS_CONST_REF(op1) || IR_REG_SPILLED(op1_reg))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || IR_REG_SPILLED(op2_reg))) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + | xor Ra(def_reg), Ra(def_reg) + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_UGE) { + /* always true */ + | ASM_REG_IMM_OP mov, insn->type, def_reg, 1 + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + } + ir_emit_cmp_int_common(ctx, type, insn, op1_reg, op1, op2_reg, op2); + _ir_emit_setcc_int(ctx, op, def_reg); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref ref, ir_op op) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *binop_insn = &ctx->ir_base[ref]; + ir_type type = binop_insn->type; + ir_ref op1 = binop_insn->op1; + ir_ref op2 = binop_insn->op2; + ir_reg op1_reg = ctx->regs[ref][1]; + ir_reg op2_reg = ctx->regs[ref][2]; + + IR_ASSERT(binop_insn->op == IR_AND); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + | ASM_REG_REG_OP test, type, op1_reg, op2_reg + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int32_t val; + + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + val = val_insn->val.i32; + if ((op == IR_EQ || op == IR_NE) && val == 0xff && (sizeof(void*) == 8 || op1_reg <= IR_REG_R3)) { + | test Rb(op1_reg), Rb(op1_reg) + } else if ((op == IR_EQ || op == IR_NE) && val == 0xff00 && op1_reg <= IR_REG_R3) { + if (op1_reg == IR_REG_RAX) { + | test ah, ah + } else if (op1_reg == IR_REG_RBX) { + | test bh, bh + } else if (op1_reg == IR_REG_RCX) { + | test ch, ch + } else if (op1_reg == IR_REG_RDX) { + | test dh, dh + } else { + IR_ASSERT(0); + } + } else if ((op == IR_EQ || op == IR_NE) && val == 0xffff) { + | test Rw(op1_reg), Rw(op1_reg) + } else if ((op == IR_EQ || op == IR_NE) && val == -1) { + | test Rd(op1_reg), Rd(op1_reg) + } else { + | ASM_REG_IMM_OP test, type, op1_reg, val + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_REG_MEM_OP test, type, op1_reg, [Ra(op2_reg)+offset] + } + } else if (IR_IS_CONST_REF(op1)) { + IR_ASSERT(0); + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, op1, &op1_reg); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_REG_OP test, type, [offset], op2_reg + } else { + | ASM_MEM_REG_OP test, type, [Ra(op1_reg)+offset], op2_reg + } + } else { + IR_ASSERT(!IR_IS_CONST_REF(op1)); + IR_ASSERT(IR_IS_CONST_REF(op2)); + IR_ASSERT(IR_IS_32BIT(ctx->ir_base[op2].type, ctx->ir_base[op2].val)); + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_IMM_OP test, type, [offset], ctx->ir_base[op2].val.i32 + } else { + | ASM_MEM_IMM_OP test, type, [Ra(op1_reg)+offset], ctx->ir_base[op2].val.i32 + } + } + } +} + +static void ir_emit_testcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + ir_emit_test_int_common(ctx, insn->op1, insn->op); + _ir_emit_setcc_int(ctx, insn->op, def_reg); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + _ir_emit_setcc_int(ctx, insn->op, def_reg); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_op op = cmp_insn->op; + ir_ref op1, op2; + ir_reg op1_reg, op2_reg; + + op1 = cmp_insn->op1; + op2 = cmp_insn->op2; + op1_reg = ctx->regs[cmp_ref][1]; + op2_reg = ctx->regs[cmp_ref][2]; + + if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { + ir_ref tmp; + ir_reg tmp_reg; + + tmp = op1; + op1 = op2; + op2 = tmp; + tmp_reg = op1_reg; + op1_reg = op2_reg; + op2_reg = tmp_reg; + } + + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + | ASM_FP_REG_REG_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, op2_reg + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int label = ctx->cfg_blocks_count - op2; + + val_insn->const_flags |= IR_CONST_EMIT; + | ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [=>label] + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [Ra(op2_reg)+offset] + } + return op; +} + +static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_op op = ir_emit_cmp_fp_common(ctx, def, insn); + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE); + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmovne Rd(def_reg), Rd(tmp_reg) + break; + case IR_NE: + | setp Rb(def_reg) + | mov Rd(tmp_reg), 1 + | cmovne Rd(def_reg), Rd(tmp_reg) + break; + case IR_LT: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmovae Rd(def_reg), Rd(tmp_reg) + break; + case IR_GE: + | setae Rb(def_reg) + break; + case IR_LE: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmova Rd(def_reg), Rd(tmp_reg) + break; + case IR_GT: + | seta Rb(def_reg) + break; + case IR_ULT: + | setb Rb(def_reg) + break; + case IR_UGE: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmovb Rd(def_reg), Rd(tmp_reg) + break; + case IR_ULE: + | setbe Rb(def_reg) + break; + case IR_UGT: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmovbe Rd(def_reg), Rd(tmp_reg) + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block != next_block) { + | jmp =>true_block + } +} + +static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (false_block != next_block) { + | jmp =>false_block + } +} + +static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, uint32_t b, ir_ref def, ir_insn *insn, bool int_cmp) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + bool swap = 0; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + /* swap to avoid unconditional JMP */ + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + swap = 1; + } else if (false_block == next_block) { + false_block = 0; + } + + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | je =>true_block + break; + case IR_NE: + | jne =>true_block + break; + case IR_LT: + | jl =>true_block + break; + case IR_GE: + | jge =>true_block + break; + case IR_LE: + | jle =>true_block + break; + case IR_GT: + | jg =>true_block + break; + case IR_ULT: + | jb =>true_block + break; + case IR_UGE: + | jae =>true_block + break; + case IR_ULE: + | jbe =>true_block + break; + case IR_UGT: + | ja =>true_block + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + if (!false_block) { + | jp >1 + | je =>true_block + |1: + } else { + | jp =>false_block + | je =>true_block + } + break; + case IR_NE: + | jne =>true_block + | jp =>true_block + break; + case IR_LT: + if (swap) { + | jb =>true_block + } else if (!false_block) { + | jp >1 + | jb =>true_block + |1: + } else { + | jp =>false_block + | jb =>true_block + } + break; + case IR_GE: + if (swap) { + | jp =>true_block + } + | jae =>true_block + break; + case IR_LE: + if (swap) { + | jbe =>true_block + } else if (!false_block) { + | jp >1 + | jbe =>true_block + |1: + } else { + | jp =>false_block + | jbe =>true_block + } + break; + case IR_GT: + if (swap) { + | jp =>true_block + } + | ja =>true_block + break; +// + case IR_ULT: + if (swap) { + | jp =>true_block + } + | jb =>true_block + break; + case IR_UGE: + if (swap) { + | jae =>true_block + } else if (!false_block) { + | jp >1 + | jae =>true_block + |1: + } else { + | jp =>false_block + | jae =>true_block + } + break; + case IR_ULE: + if (swap) { + | jp =>true_block + } + | jbe =>true_block + break; + case IR_UGT: + if (swap) { + | ja =>true_block + } else if (!false_block) { + | jp >1 + | ja =>true_block + |1: + } else { + | jp =>false_block + | ja =>true_block + } + break; + } + } + if (false_block) { + | jmp =>false_block + } +} + +static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + + if (op1_reg != IR_REG_NONE && (IR_IS_CONST_REF(op1) || IR_REG_SPILLED(op1_reg))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || IR_REG_SPILLED(op2_reg))) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + ir_emit_jmp_false(ctx, b, def); + return; + } else if (op == IR_UGE) { + /* always true */ + ir_emit_jmp_true(ctx, b, def); + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + } + + bool same_comparison = 0; + ir_insn *prev_insn = &ctx->ir_base[insn->op1]; + if (prev_insn->op == IR_IF_TRUE || prev_insn->op == IR_IF_FALSE) { + if (ir_rule(ctx, prev_insn->op1) == IR_CMP_AND_BRANCH_INT) { + prev_insn = &ctx->ir_base[prev_insn->op1]; + prev_insn = &ctx->ir_base[prev_insn->op2]; + if (prev_insn->op1 == cmp_insn->op1 && prev_insn->op2 == cmp_insn->op2) { + same_comparison = true; + } + } + } + if (!same_comparison) { + ir_emit_cmp_int_common(ctx, type, cmp_insn, op1_reg, op1, op2_reg, op2); + } + ir_emit_jcc(ctx, op, b, def, insn, 1); +} + +static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_ref op2 = insn->op2; + ir_op op = ctx->ir_base[op2].op; + + if (op >= IR_EQ && op <= IR_UGT) { + op2 = ctx->ir_base[op2].op1; + } else { + IR_ASSERT(op == IR_AND); + op = IR_NE; + } + + ir_emit_test_int_common(ctx, op2, op); + ir_emit_jcc(ctx, op, b, def, insn, 1); +} + +static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + ir_emit_jcc(ctx, op, b, def, insn, 0); +} + +static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_type type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | ASM_REG_REG_OP test, type, op2_reg, op2_reg + } else if (IR_IS_CONST_REF(insn->op2)) { + uint32_t true_block, false_block, next_block; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (ir_const_is_true(&ctx->ir_base[insn->op2])) { + if (true_block != next_block) { + | jmp =>true_block + } + } else { + if (false_block != next_block) { + | jmp =>false_block + } + } + return; + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + } + if (op2_reg == IR_REG_NONE) { + | ASM_MEM_IMM_OP cmp, type, [offset], 0 + } else { + | ASM_MEM_IMM_OP cmp, type, [Ra(op2_reg)+offset], 0 + } + } + ir_emit_jcc(ctx, IR_NE, b, def, insn, 1); +} + +static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_ref op3 = insn->op3; + ir_type op1_type = ctx->ir_base[op1].type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg op3_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op2_reg != IR_REG_NONE && (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2))) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + if (op1 == op2) { + op1_reg = op2_reg; + } + if (op3 == op2) { + op3_reg = op2_reg; + } + } + if (op3_reg != IR_REG_NONE && op3 != op2 && (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(op3))) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, op3); + if (op1 == op2) { + op1_reg = op3_reg; + } + } + if (op1_reg != IR_REG_NONE && op1 != op2 && op1 != op3 && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, op1_type, op1_reg, op1); + } + + if (IR_IS_TYPE_INT(op1_type)) { + if (op1_reg != IR_REG_NONE) { + | ASM_REG_REG_OP test, op1_type, op1_reg, op1_reg + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, op1, &fp); + + | ASM_MEM_IMM_OP cmp, op1_type, [Ra(fp)+offset], 0 + } + | je >2 + } else { + if (!data->double_zero_const) { + data->double_zero_const = 1; + ir_rodata(ctx); + |.align 16 + |->double_zero_const: + |.dword 0, 0 + |.code + } + | ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, op1_type, op1_reg, [->double_zero_const] + | jp >1 + | je >2 + |1: + } + + if (op2_reg != IR_REG_NONE) { + if (def_reg != op2_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, def_reg, op2_reg); + } else { + ir_emit_fp_mov(ctx, type, def_reg, op2_reg); + } + } + } else if (IR_IS_CONST_REF(op2) || !(ir_rule(ctx, op2) & IR_FUSED)) { + ir_emit_load(ctx, type, def_reg, op2); + } else { + int32_t offset = ir_fuse_load(ctx, op2, &op2_reg); + + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, offset); + } + } + | jmp >3 + |2: + if (op3_reg != IR_REG_NONE) { + if (def_reg != op3_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, def_reg, op3_reg); + } else { + ir_emit_fp_mov(ctx, type, def_reg, op3_reg); + } + } + } else if (IR_IS_CONST_REF(op3) || !(ir_rule(ctx, op3) & IR_FUSED)) { + ir_emit_load(ctx, type, def_reg, op3); + } else { + int32_t offset = ir_fuse_load(ctx, op3, &op3_reg); + + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, op3_reg, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, op3_reg, offset); + } + } + |3: + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_return_void(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_emit_epilogue(ctx); + +#ifdef IR_TARGET_X86 + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC) && ctx->param_stack_size) { + | ret ctx->param_stack_size + return; + } +#endif + + | ret +} + +static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + + if (op2_reg != IR_REG_INT_RET1) { + ir_type type = ctx->ir_base[insn->op2].type; + + if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { + ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); + } + } + ir_emit_return_void(ctx); +} + +static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + ir_type type = ctx->ir_base[insn->op2].type; + +#ifdef IR_REG_FP_RET1 + if (op2_reg != IR_REG_FP_RET1) { + if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { + ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); + } + } +#else + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &fp); + + if (type == IR_DOUBLE) { + | fld qword [Ra(fp)+offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | fld dword [Ra(fp)+offset] + } + } else { + int32_t offset = ctx->ret_slot; + ir_reg fp; + + IR_ASSERT(offset != -1); + offset = IR_SPILL_POS_TO_OFFSET(offset); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + ir_emit_store_mem_fp(ctx, type, fp, offset, op2_reg); + if (type == IR_DOUBLE) { + | fld qword [Ra(fp)+offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | fld dword [Ra(fp)+offset] + } + } +#endif + ir_emit_return_void(ctx); +} + +static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | movsx Rw(def_reg), Rb(op1_reg) + } else if (ir_type_size[dst_type] == 4) { + | movsx Rd(def_reg), Rb(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movsx Rq(def_reg), Rb(op1_reg) +|.endif + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | movsx Rd(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movsx Rq(def_reg), Rw(op1_reg) +|.endif + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movsxd Rq(def_reg), Rd(op1_reg) +|.endif + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + if (op1_reg != IR_REG_NONE) { + | movsx Rw(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movsx Rw(def_reg), byte [offset] + } + } else if (ir_type_size[dst_type] == 4) { + if (op1_reg != IR_REG_NONE) { + | movsx Rd(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movsx Rd(def_reg), byte [offset] + } + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movsx Rq(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movsx Rq(def_reg), byte [offset] + } +|.endif + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + if (op1_reg != IR_REG_NONE) { + | movsx Rd(def_reg), word [Ra(op1_reg)+offset] + } else { + | movsx Rd(def_reg), word [offset] + } + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movsx Rq(def_reg), word [Ra(op1_reg)+offset] + } else { + | movsx Rq(def_reg), word [offset] + } +|.endif + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movsxd Rq(def_reg), dword [Ra(op1_reg)+offset] + } else { + | movsxd Rq(def_reg), dword [offset] + } +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | movzx Rw(def_reg), Rb(op1_reg) + } else if (ir_type_size[dst_type] == 4) { + | movzx Rd(def_reg), Rb(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movzx Rq(def_reg), Rb(op1_reg) +|.endif + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | movzx Rd(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movzx Rq(def_reg), Rw(op1_reg) +|.endif + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + /* Avoid zero extension to the same register. This may be not always safe ??? */ + if (op1_reg != def_reg) { + | mov Rd(def_reg), Rd(op1_reg) + } +|.endif + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + if (op1_reg != IR_REG_NONE) { + | movzx Rw(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movzx Rw(def_reg), byte [offset] + } + } else if (ir_type_size[dst_type] == 4) { + if (op1_reg != IR_REG_NONE) { + | movzx Rd(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movzx Rd(def_reg), byte [offset] + } + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movzx Rq(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movzx Rq(def_reg), byte [offset] + } +|.endif + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + if (op1_reg != IR_REG_NONE) { + | movzx Rd(def_reg), word [Ra(op1_reg)+offset] + } else { + | movzx Rd(def_reg), word [offset] + } + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movzx Rq(def_reg), word [Ra(op1_reg)+offset] + } else { + | movzx Rq(def_reg), word [offset] + } +|.endif + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | mov Rd(def_reg), dword [Ra(op1_reg)+offset] + } else { + | mov Rd(def_reg), dword [offset] + } +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != IR_REG_NONE) { + if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + int32_t offset; + + IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + ir_emit_load_mem_int(ctx, dst_type, def_reg, op1_reg, offset); + } else if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + ir_emit_load_mem_fp(ctx, dst_type, def_reg, op1_reg, offset); + } else if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type)) { + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == IR_DOUBLE) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (ctx->mflags & IR_X86_AVX) { + | vmovd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | movd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } +|.endif + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vmovd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | movd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } + } else if (IR_IS_CONST_REF(insn->op1)) { + ir_insn *_insn = &ctx->ir_base[insn->op1]; + if (src_type == IR_DOUBLE) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Rq(def_reg), _insn->val.i64 +|.endif + } else { + IR_ASSERT(src_type == IR_FLOAT); + | mov Rd(def_reg), _insn->val.i32 + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (src_type == IR_DOUBLE) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov Rq(def_reg), qword [Ra(op1_reg)+offset] +|.endif + } else { + IR_ASSERT(src_type == IR_FLOAT); + | mov Rd(def_reg), dword [Ra(op1_reg)+offset] + } + } + } else if (IR_IS_TYPE_FP(dst_type)) { + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (dst_type == IR_DOUBLE) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (ctx->mflags & IR_X86_AVX) { + | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } else { + | movd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } +|.endif + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } else { + | movd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } + } + } else if (IR_IS_CONST_REF(insn->op1)) { + ir_insn *val_insn = &ctx->ir_base[insn->op1]; + int label = ctx->cfg_blocks_count - insn->op1; + + val_insn->const_flags |= IR_CONST_EMIT; + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, dst_type, def_reg, [=>label] + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, dst_type, def_reg, [Ra(op1_reg)+offset] + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + bool src64 = 0; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_TYPE_SIGNED(src_type) ? ir_type_size[src_type] == 8 : ir_type_size[src_type] >= 4) { + // TODO: we might need to perform sign/zero integer extension to 32/64 bit integer + src64 = 1; + } + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (!src64) { + if (dst_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (dst_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } + } +|.endif + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (!src64) { + if (dst_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (dst_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } + } +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + bool dst64 = 0; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_TYPE_SIGNED(dst_type) ? ir_type_size[dst_type] == 8 : ir_type_size[dst_type] >= 4) { + // TODO: we might need to perform truncation from 32/64 bit integer + dst64 = 1; + } + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (!dst64) { + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } +|.endif + } + } else if (IR_IS_CONST_REF(insn->op1)) { + ir_insn *_insn = &ctx->ir_base[insn->op1]; + int label = ctx->cfg_blocks_count - insn->op1; + + _insn->const_flags |= IR_CONST_EMIT; + if (!dst64) { + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rd(def_reg), qword [=>label] + } else { + | cvtsd2si Rd(def_reg), qword [=>label] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rd(def_reg), dword [=>label] + } else { + | cvtss2si Rd(def_reg), dword [=>label] + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rq(def_reg), qword [=>label] + } else { + | cvtsd2si Rq(def_reg), qword [=>label] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rq(def_reg), dword [=>label] + } else { + | cvtss2si Rq(def_reg), dword [=>label] + } + } +|.endif + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (!dst64) { + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rd(def_reg), qword [Ra(op1_reg)+offset] + } else { + | cvtsd2si Rd(def_reg), qword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rd(def_reg), dword [Ra(op1_reg)+offset] + } else { + | cvtss2si Rd(def_reg), dword [Ra(op1_reg)+offset] + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rq(def_reg), qword [Ra(op1_reg)+offset] + } else { + | cvtsd2si Rq(def_reg), qword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rq(def_reg), dword [Ra(op1_reg)+offset] + } else { + | cvtss2si Rq(def_reg), dword [Ra(op1_reg)+offset] + } + } +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == dst_type) { + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) + } + } + } else if (IR_IS_CONST_REF(insn->op1)) { + ir_insn *_insn = &ctx->ir_base[insn->op1]; + int label = ctx->cfg_blocks_count - insn->op1; + + _insn->const_flags |= IR_CONST_EMIT; + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] + } else { + | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] + } else { + | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] + } + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } else { + | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } else { + | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + int32_t offset; + ir_reg fp; + + IR_ASSERT(def_reg != IR_REG_NONE); + offset = ir_var_spill_slot(ctx, insn->op1, &fp); + | lea Ra(def_reg), aword [Ra(fp)+offset] + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { + return; // fake load + } + IR_ASSERT(def_reg != IR_REG_NONE); + + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, fp, offset); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vstore_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) + && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + return; // fake store + } + if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) { + | ASM_MEM_IMM_OP mov, type, [Ra(fp)+offset], val_insn->val.i32 + } else { + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_int(ctx, type, fp, offset, op3_reg); + } +} + +static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_ref type = ctx->ir_base[insn->op3].type; + ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) + && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + return; // fake store + } + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_fp(ctx, type, fp, offset, op3_reg); +} + +static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + int32_t offset = 0; + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_CONST_REF(insn->op2)) { + void *addr = (void*)ctx->ir_base[insn->op2].val.addr; + + if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr)) { + int32_t addr32 = (int32_t)(intptr_t)addr; + | ASM_REG_MEM_OP mov, type, def_reg, [addr32] + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + return; + } + } + if (op2_reg == IR_REG_NONE) { + op2_reg = def_reg; + } + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + offset = ir_fuse_addr(ctx, insn->op2, &op2_reg); + if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, op2_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, def, def)) { + ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, offset); + } + /* avoid load to the same location (valid only when register is not reused) */ + return; + } + } else if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, offset); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + int32_t offset = 0; + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_CONST_REF(insn->op2)) { + if (op2_reg == IR_REG_NONE) { + int32_t addr32 = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, def_reg, [addr32] + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + return; + } else { + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + } else if (op2_reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, &op2_reg); + if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, op2_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, def, def)) { + ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, offset); + } + /* avoid load to the same location (valid only when register is not reused) */ + return; + } + } else if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, offset); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + int32_t offset = 0; + + if (IR_IS_CONST_REF(insn->op2)) { + + if (op2_reg == IR_REG_NONE) { + int32_t addr32 = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); + if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) { + | ASM_MEM_IMM_OP mov, type, [addr32], val_insn->val.i32 + } else { + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + | ASM_MEM_REG_OP mov, type, [addr32], op3_reg + } + return; + } else { + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + } else if (op2_reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, &op2_reg); + if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, op2_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + /* avoid store to the same location */ + return; + } + } else if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) { + | ASM_MEM_IMM_OP mov, type, [Ra(op2_reg)+offset], val_insn->val.i32 + } else { + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_int(ctx, type, op2_reg, offset, op3_reg); + } +} + +static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = ctx->ir_base[insn->op3].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + int32_t offset = 0; + + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_IS_CONST_REF(insn->op2)) { + if (op2_reg == IR_REG_NONE) { + int32_t addr32 = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [addr32], op3_reg + return; + } else { + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + } else if (op2_reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, &op2_reg); + if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, op2_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + /* avoid store to the same location */ + return; + } + } else if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_fp(ctx, type, op2_reg, offset, op3_reg); +} + +static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_reg src_reg = insn->op2; + ir_type type = insn->type; + + if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { + if (ctx->vregs[def] + && ctx->live_intervals[ctx->vregs[def]] + && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (def_reg == IR_REG_NONE) { + /* op3 is used as a flag that the value is already stored in memory. + * If op3 is set we don't have to store the value once again (in case of spilling) + */ + if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, ctx->spill_base, insn->op3)) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + if (src_reg != def_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, def_reg, src_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, def_reg, src_reg); + } + } + if (IR_REG_SPILLED(ctx->regs[def][0]) + && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, ctx->spill_base, insn->op3))) { + ir_emit_store(ctx, type, def, def_reg); + } + } + } +} + +static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_ref type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg dst_reg = insn->op3; + + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + int32_t offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, dst_reg, op2_reg, offset); + } else { + ir_emit_load_mem_fp(ctx, type, dst_reg, op2_reg, offset); + } + } else if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg != dst_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, dst_reg, op2_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); + } + } + } else { + ir_emit_load(ctx, type, dst_reg, insn->op2); + } +} + +static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | ASM_REG_IMM_OP sub, IR_ADDR, IR_REG_RSP, size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + ctx->call_stack_size += size; + } + } else { + int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (def_reg != op2_reg) { + if (op2_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op2_reg); + } else { + ir_emit_load(ctx, type, def_reg, insn->op2); + } + } + + | ASM_REG_IMM_OP add, IR_ADDR, def_reg, (alignment-1) + | ASM_REG_IMM_OP and, IR_ADDR, def_reg, ~(alignment-1) + | ASM_REG_REG_OP sub, IR_ADDR, IR_REG_RSP, def_reg + } + if (def_reg != IR_REG_NONE) { + | mov Ra(def_reg), Ra(IR_REG_RSP) + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else { + ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); + } +} + +static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | ASM_REG_IMM_OP add, IR_ADDR, IR_REG_RSP, size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + ctx->call_stack_size -= size; + } + } else { +// int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + // TODO: alignment ??? + + | ASM_REG_REG_OP add, IR_ADDR, IR_REG_RSP, op2_reg + } +} + +static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type; + ir_block *bb; + ir_insn *use_insn, *val; + uint32_t n, *p, use_block; + int i; + int label, default_label = 0; + int count = 0; + ir_val min, max; + int64_t offset; + ir_reg op2_reg = ctx->regs[def][2]; +|.if X64 +|| ir_reg tmp_reg = ctx->regs[def][3]; +|.endif + + type = ctx->ir_base[insn->op2].type; + if (IR_IS_TYPE_SIGNED(type)) { + min.u64 = 0x7fffffffffffffff; + max.u64 = 0x8000000000000000; + } else { + min.u64 = 0xffffffffffffffff; + max.u64 = 0x0; + } + + bb = &ctx->cfg_blocks[b]; + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + if (IR_IS_TYPE_SIGNED(type)) { + IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); + min.i64 = IR_MIN(min.i64, val->val.i64); + max.i64 = IR_MAX(max.i64, val->val.i64); + } else { + IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); + min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); + max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); + } + count++; + } else { + IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); + default_label = ir_skip_empty_target_blocks(ctx, use_block); + } + } + + IR_ASSERT(op2_reg != IR_REG_NONE); +|.if X64 +|| IR_ASSERT(tmp_reg != IR_REG_NONE || sizeof(void*) != 8); +|.endif + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } else if (IR_IS_CONST_REF(insn->op2)) { + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + /* Generate a table jmp or a seqence of calls */ + if ((max.i64-min.i64) < count * 8) { + int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1)); + + for (i = 0; i <= (max.i64 - min.i64); i++) { + labels[i] = default_label; + } + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + labels[val->val.i64 - min.i64] = label; + } + } + + if (IR_IS_32BIT(type, max)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, max.i32 + } else { + IR_ASSERT(ir_type_size[type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Rq(tmp_reg), max.i64 + | cmp Rq(op2_reg), Rq(tmp_reg) +|.endif + } + if (IR_IS_TYPE_SIGNED(type)) { + | jg =>default_label + } else { + | ja =>default_label + } + + if (IR_IS_32BIT(type, min)) { + offset = -min.i64 * sizeof(void*); + if (IR_IS_SIGNED_32BIT(offset)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, min.i32 + } else { + | ASM_REG_REG_OP sub, type, op2_reg, (int32_t)offset // TODO: reg clobbering + offset = 0; + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Rq(tmp_reg), min.i64 + | ASM_REG_REG_OP sub, type, op2_reg, tmp_reg // TODO: reg clobbering + offset = 0; +|.endif + } + if (IR_IS_TYPE_SIGNED(type)) { + | jl =>default_label + } else { + | jb =>default_label + } + if (sizeof(void*) == 8) { +|.if X64 + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Ra(op2_reg), Rb(op2_reg) + } else { + | movzx Ra(op2_reg), Rb(op2_reg) + } + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Ra(op2_reg), Rw(op2_reg) + } else { + | movzx Ra(op2_reg), Rw(op2_reg) + } + break; + case 4: + if (IR_IS_TYPE_SIGNED(type)) { + | movsxd Ra(op2_reg), Rd(op2_reg) + } else { + | mov Rd(op2_reg), Rd(op2_reg) + } + break; + case 8: + break; + } + | lea Ra(tmp_reg), aword [>1] + | jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8+(int32_t)offset] +|.endif + } else { +|.if not X64 + switch (ir_type_size[type]) { + default: + IR_ASSERT(0 && "Unsupported type size"); + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Ra(op2_reg), Rb(op2_reg) + } else { + | movzx Ra(op2_reg), Rb(op2_reg) + } + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Ra(op2_reg), Rw(op2_reg) + } else { + | movzx Ra(op2_reg), Rw(op2_reg) + } + break; + case 4: + break; + } + |// jmp aword [Ra(op2_reg)*4+(int32_t)offset+>1] + | lea Ra(op2_reg), aword [Ra(op2_reg)*4+(int32_t)offset] // TODO: reg clobbering + | jmp aword [Ra(op2_reg)+>1] +|.endif + } + |.jmp_table + if (!data->jmp_table_label) { + data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; + |=>data->jmp_table_label: + } + |.align aword + |1: + for (i = 0; i <= (max.i64 - min.i64); i++) { + int b = labels[i]; + ir_block *bb = &ctx->cfg_blocks[b]; + ir_insn *insn = &ctx->ir_base[bb->end]; + + if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { + ir_ref prev = ctx->prev_ref[bb->end]; + if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { + prev = ctx->prev_ref[prev]; + } + if (prev == bb->start) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + | .aword &addr + if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { + bb->flags |= IR_BB_EMPTY; + } + continue; + } + } + | .aword =>b + } + |.code + ir_mem_free(labels); + } else { + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + if (IR_IS_32BIT(type, val->val)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32 + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Ra(tmp_reg), val->val.i64 + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg +|.endif + } + | je =>label + } + } + if (default_label) { + | jmp =>default_label + } + } +} + +static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int i; + int8_t *pred, *loc, *types; + ir_reg to, from_reg, c; + ir_type type; + ir_regset todo, ready; + ir_reg last_reg = IR_REG_NONE, last_fp_reg = IR_REG_NONE; + + loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t)); + pred = loc + IR_REG_NUM; + types = pred + IR_REG_NUM; + memset(loc, IR_REG_NONE, IR_REG_NUM * 2 * sizeof(int8_t)); + todo = IR_REGSET_EMPTY; + ready = IR_REGSET_EMPTY; + + for (i = 0; i < count; i++) { + from_reg = copies[i].from; + to = copies[i].to; + if (from_reg != to) { + loc[from_reg] = from_reg; + pred[to] = from_reg; + types[from_reg] = copies[i].type; + /* temporary register may be the same as some of destinations */ + if (to == tmp_reg) { + IR_ASSERT(last_reg == IR_REG_NONE); + last_reg = to; + } else if (to == tmp_fp_reg) { + IR_ASSERT(last_fp_reg == IR_REG_NONE); + last_fp_reg = to; + } else { + IR_ASSERT(!IR_REGSET_IN(todo, to)); + IR_REGSET_INCL(todo, to); + } + } + } + + IR_REGSET_FOREACH(todo, i) { + if (loc[i] == IR_REG_NONE) { + IR_REGSET_INCL(ready, i); + } + } IR_REGSET_FOREACH_END(); + + while (1) { + while (ready != IR_REGSET_EMPTY) { + to = ir_regset_pop_first(&ready); + from_reg = pred[to]; + c = loc[from_reg]; + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + if (ir_type_size[type] > 2) { + ir_emit_mov(ctx, type, to, c); + } else if (ir_type_size[type] == 2) { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rw(c) + type = IR_I32; + } else { + | movzx Rd(to), Rw(c) + type = IR_U32; + } + } else /* if (ir_type_size[type] == 1) */ { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rb(c) + type = IR_I32; + } else { + | movzx Rd(to), Rb(c) + type = IR_U32; + } + } + } else { + ir_emit_fp_mov(ctx, type, to, c); + } + IR_REGSET_EXCL(todo, to); + loc[from_reg] = to; + if (from_reg == c && pred[from_reg] != IR_REG_NONE) { + IR_REGSET_INCL(ready, from_reg); + } + } + + if (todo == IR_REGSET_EMPTY) { + break; + } + to = ir_regset_pop_first(&todo); + from_reg = pred[to]; + IR_ASSERT(to != loc[from_reg]); + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST); + ir_emit_mov(ctx, type, tmp_reg, to); + loc[to] = tmp_reg; + } else { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST); + ir_emit_fp_mov(ctx, type, tmp_fp_reg, to); + loc[to] = tmp_fp_reg; + } + IR_REGSET_INCL(ready, to); + } + + if (last_reg != IR_REG_NONE) { + to = last_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (ir_type_size[type] > 2) { + ir_emit_mov(ctx, type, to, c); + } else if (ir_type_size[type] == 2) { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rw(c) + type = IR_I32; + } else { + | movzx Rd(to), Rw(c) + type = IR_U32; + } + } else /* if (ir_type_size[type] == 1) */ { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rb(c) + type = IR_I32; + } else { + | movzx Rd(to), Rb(c) + type = IR_U32; + } + } + } + } + + if (last_fp_reg != IR_REG_NONE) { + to = last_fp_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(!IR_IS_TYPE_INT(type)); + ir_emit_fp_mov(ctx, type, to, c); + } + } + + ir_mem_free(loc); + + return 1; +} + +static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) +{ + int j, n; + ir_type type; + int int_param = 0; + int fp_param = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + int32_t used_stack = 0; + +#ifdef IR_HAVE_FASTCALL + if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + } +#endif + + n = insn->inputs_count; + for (j = 3; j <= n; j++) { + type = ctx->ir_base[ir_insn_op(insn, j)].type; + if (IR_IS_TYPE_INT(type)) { + if (int_param >= int_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param >= fp_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + } + + /* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */ + used_stack += IR_SHADOW_ARGS; + + return used_stack; +} + +static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int j, n; + ir_ref arg; + ir_insn *arg_insn; + uint8_t type; + ir_reg src_reg, dst_reg; + int int_param = 0; + int fp_param = 0; + int count = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t used_stack, stack_offset = IR_SHADOW_ARGS; + ir_copy *copies; + bool do_pass3 = 0; + /* For temporaries we may use any scratch registers except for registers used for parameters */ + ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ + + n = insn->inputs_count; + if (n < 3) { + return 0; + } + + if (tmp_reg == IR_REG_NONE) { + tmp_reg = IR_REG_RAX; + } + +#ifdef IR_HAVE_FASTCALL + if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + if (insn->op == IR_CALL + && (ctx->flags & IR_PREALLOCATED_STACK) +#ifdef IR_HAVE_FASTCALL + && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ +#endif + ) { + // TODO: support for preallocated stack + used_stack = 0; + } else { + used_stack = ir_call_used_stack(ctx, insn); + if (IR_SHADOW_ARGS + && insn->op == IR_TAILCALL + && used_stack == IR_SHADOW_ARGS) { + used_stack = 0; + } + if (ctx->fixed_call_stack_size + && used_stack <= ctx->fixed_call_stack_size +#ifdef IR_HAVE_FASTCALL + && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ +#endif + ) { + used_stack = 0; + } else { + /* Stack must be 16 byte aligned */ + int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); + ctx->call_stack_size += aligned_stack; + if (aligned_stack) { + | sub Ra(IR_REG_RSP), aligned_stack + } + } + } + + /* 1. move all register arguments that should be passed through stack + * and collect arguments that should be passed through registers */ + copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + /* delay CONST->REG and MEM->REG moves to third pass */ + do_pass3 = 1; + } else { + if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + } + if (src_reg != dst_reg) { + /* delay REG->REG moves to second pass */ + copies[count].type = type; + copies[count].from = src_reg; + copies[count].to = dst_reg; + count++; + } + } + } else { + /* Pass register arguments to stack (REG->MEM moves) */ + if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } else { + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + do_pass3 = 1; + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + + /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ + if (count) { + ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); + } + ir_mem_free(copies); + + /* 3. move the remaining memory and immediate values */ + if (do_pass3) { + stack_offset = IR_SHADOW_ARGS; + int_param = 0; + fp_param = 0; + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + if (type == IR_ADDR) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + | lea Ra(dst_reg), aword [=>label] + continue; + } else if (val_insn->op == IR_SYM || val_insn->op == IR_FUNC) { + void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, val_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, val_insn->val.i32)); + if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr)) { + | mov Ra(dst_reg), ((ptrdiff_t)addr) + } else { +|.if X64 + | mov64 Rq(dst_reg), ((ptrdiff_t)addr) +|.endif + } + continue; + } + IR_ASSERT(val_insn->op == IR_ADDR || val_insn->op == IR_FUNC_ADDR); + } + if (type == IR_I8 || type == IR_I16) { + type = IR_I32; + } else if (type == IR_U8 || type == IR_U16) { + type = IR_U32; + } + ir_emit_load(ctx, type, dst_reg, arg); + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, arg, &fp); + + if (ir_type_size[type] > 2) { + ir_emit_load_mem_int(ctx, type, dst_reg, fp, offset); + } else if (ir_type_size[type] == 2) { + if (type == IR_I16) { + if (fp != IR_REG_NONE) { + | movsx Rd(dst_reg), word [Ra(fp)+offset] + } else { + | movsx Rd(dst_reg), word [offset] + } + } else { + if (fp != IR_REG_NONE) { + | movzx Rd(dst_reg), word [Ra(fp)+offset] + } else { + | movzx Rd(dst_reg), word [offset] + } + } + } else { + IR_ASSERT(ir_type_size[type] == 1); + if (type == IR_I8) { + if (fp != IR_REG_NONE) { + | movsx Rd(dst_reg), byte [Ra(fp)+offset] + } else { + | movsx Rd(dst_reg), byte [offset] + } + } else { + if (fp != IR_REG_NONE) { + | movzx Rd(dst_reg), byte [Ra(fp)+offset] + } else { + | movzx Rd(dst_reg), byte [offset] + } + } + } + } + } else { + ir_emit_load(ctx, type, dst_reg, arg); + } + } + } else { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + IR_ASSERT(tmp_reg != IR_REG_NONE); +|.if X64 + | lea Ra(tmp_reg), aword [=>label] + | mov [Ra(IR_REG_RSP)+stack_offset], Ra(tmp_reg) +|.else + | mov [Ra(IR_REG_RSP)+stack_offset], =>label +|.endif + } else if (val_insn->op == IR_FUNC || val_insn->op == IR_SYM) { + void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, val_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, val_insn->val.i32)); + if (sizeof(void*) == 4) { + | mov aword [Ra(IR_REG_RSP)+stack_offset], ((ptrdiff_t)addr) +|.if X64 +|| } else if (IR_IS_SIGNED_32BIT(addr)) { + | mov Ra(tmp_reg), ((ptrdiff_t)addr) + | mov [Ra(IR_REG_RSP)+stack_offset], Ra(tmp_reg) +|| } else { + | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) + | mov [Ra(IR_REG_RSP)+stack_offset], Ra(tmp_reg) +|.endif + } + } else if (IR_IS_SIGNED_32BIT(val_insn->val.i64)) { + if (ir_type_size[type] <= 4) { + | mov dword [Ra(IR_REG_RSP)+stack_offset], val_insn->val.i32 + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov qword [rsp+stack_offset], val_insn->val.i32 +|.endif + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + IR_ASSERT(tmp_reg != IR_REG_NONE); + | mov64 Ra(tmp_reg), val_insn->val.i64 + | mov [rsp+stack_offset], Ra(tmp_reg) +|.endif + } + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, tmp_reg); + } else if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + if (IR_IS_CONST_REF(arg)) { + ir_val *val = &ctx->ir_base[arg].val; + if (ir_type_size[type] == 4) { + | mov dword [Ra(IR_REG_RSP)+stack_offset], val->i32 + } else if (sizeof(void*) == 8) { +|.if X64 + if (val->i64 == 0) { + | mov qword [rsp+stack_offset], val->i32 + } else { + IR_ASSERT(tmp_reg != IR_REG_NONE); + | mov64 Rq(tmp_reg), val->i64 + | mov qword [rsp+stack_offset], Ra(tmp_reg) + } +|.endif + } else { + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } else if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + } + +#ifdef _WIN64 + /* WIN64 calling convention requires duplcation of parameters passed in FP register into GP ones */ + if (ir_is_vararg(ctx, insn)) { + n = IR_MIN(n, IR_MAX_REG_ARGS + 2); + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_FP(type)) { + src_reg = fp_reg_params[j-3]; + dst_reg = int_reg_params[j-3]; +|.if X64 + if (ctx->mflags & IR_X86_AVX) { + | vmovd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) + } else { + | movd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) + } +|.endif + } + } + } +#endif +#ifdef IR_REG_VARARG_FP_REGS + /* set hidden argument to specify the number of vector registers used */ + if (ir_is_vararg(ctx, insn)) { + fp_param = IR_MIN(fp_param, fp_reg_params_count); + | mov Rd(IR_REG_VARARG_FP_REGS), fp_param + } +#endif + + return used_stack; +} + +static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | call aword &addr + } else { +|.if X64 +|| ir_reg tmp_reg = IR_REG_RAX; + +#ifdef IR_REG_VARARG_FP_REGS +|| if (ir_is_vararg(ctx, insn)) { +|| tmp_reg = IR_REG_R11; +|| } +#endif +|| if (IR_IS_SIGNED_32BIT(addr)) { + | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 +|| } else { + | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 +|| } + | call Rq(tmp_reg) +|.endif + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | call Ra(op2_reg) + } else { + int32_t offset; + + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + } + + if (op2_reg != IR_REG_NONE) { + | call aword [Ra(op2_reg)+offset] + } else { + | call aword [offset] + } + } + } + + if (used_stack) { + int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); + + ctx->call_stack_size -= aligned_stack; + if (ir_is_fastcall(ctx, insn)) { + aligned_stack -= used_stack; + if (aligned_stack) { + | add Ra(IR_REG_RSP), aligned_stack + } + } else { + | add Ra(IR_REG_RSP), aligned_stack + } + } + + if (insn->type != IR_VOID) { + if (IR_IS_TYPE_INT(insn->type)) { + def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(insn->type)); + def_reg = IR_REG_NUM(ctx->regs[def][0]); +#ifdef IR_REG_FP_RET1 + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_FP_RET1) { + ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); + } +#else + IR_ASSERT(0); // TODO: float/double return value +#endif + } + } +} + +static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + (void) used_stack; + + if (used_stack != 0) { + ir_emit_call(ctx, def, insn); + ir_emit_return_void(ctx); + return; + } + + ir_emit_epilogue(ctx); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | jmp aword &addr + } else { +|.if X64 +|| ir_reg tmp_reg = IR_REG_RAX; + +#ifdef IR_REG_VARARG_FP_REGS +|| if (ir_is_vararg(ctx, insn)) { +|| tmp_reg = IR_REG_R11; +|| } +#endif +|| if (IR_IS_SIGNED_32BIT(addr)) { + | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 +|| } else { + | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 +|| } + | jmp Rq(tmp_reg) +|.endif + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | jmp Ra(op2_reg) + } else { + int32_t offset; + + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + } + + if (op2_reg != IR_REG_NONE) { + | jmp aword [Ra(op2_reg)+offset] + } else { + | jmp aword [offset] + } + } + } +} + +static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + + if (IR_IS_CONST_REF(insn->op2)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | jmp aword &addr + } else { +|.if X64 + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | jmp rax +|.endif + } + } else if (ir_rule(ctx, insn->op2) & IR_FUSED) { + int32_t offset; + + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + if (op2_reg == IR_REG_NONE) { + | jmp aword [offset] + } else { + | jmp aword [Ra(op2_reg)+offset] + } + } else if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | jmp Ra(op2_reg) + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &fp); + + | jmp aword [Ra(fp)+offset] + } +} + +static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint8_t op, void *addr, bool int_cmp) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *next_insn = &ctx->ir_base[def + 1]; + + if (next_insn->op == IR_END || next_insn->op == IR_LOOP_END) { + ir_block *bb = &ctx->cfg_blocks[b]; + uint32_t target; + + if (!(bb->flags & IR_BB_DESSA_MOVES)) { + target = ctx->cfg_edges[bb->successors]; + if (UNEXPECTED(bb->successors_count == 2)) { + if (ctx->cfg_blocks[target].flags & IR_BB_ENTRY) { + target = ctx->cfg_edges[bb->successors + 1]; + } else { + IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); + } + } else { + IR_ASSERT(bb->successors_count == 1); + } + target = ir_skip_empty_target_blocks(ctx, target); + if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jne =>target + break; + case IR_NE: + | je =>target + break; + case IR_LT: + | jge =>target + break; + case IR_GE: + | jl =>target + break; + case IR_LE: + | jg =>target + break; + case IR_GT: + | jle =>target + break; + case IR_ULT: + | jae =>target + break; + case IR_UGE: + | jb =>target + break; + case IR_ULE: + | ja =>target + break; + case IR_UGT: + | jbe =>target + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jne =>target + | jp =>target + break; + case IR_NE: + | jp &addr + | je =>target + break; + case IR_LT: + | jae =>target + break; + case IR_GE: + | jp &addr + | jb =>target + break; + case IR_LE: + | ja =>target + break; + case IR_GT: + | jp &addr + | jbe =>target + break; + } + } + | jmp &addr + return 1; + } + } + } else if (next_insn->op == IR_IJMP && IR_IS_CONST_REF(next_insn->op2)) { + void *target_addr = ir_jmp_addr(ctx, next_insn, &ctx->ir_base[next_insn->op2]); + + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(target_addr)) { + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jne &target_addr + break; + case IR_NE: + | je &target_addr + break; + case IR_LT: + | jge &target_addr + break; + case IR_GE: + | jl &target_addr + break; + case IR_LE: + | jg &target_addr + break; + case IR_GT: + | jle &target_addr + break; + case IR_ULT: + | jae &target_addr + break; + case IR_UGE: + | jb &target_addr + break; + case IR_ULE: + | ja &target_addr + break; + case IR_UGT: + | jbe &target_addr + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jne &target_addr + | jp &target_addr + break; + case IR_NE: + | jp &addr + | je &target_addr + break; + case IR_LT: + | jae &target_addr + break; + case IR_GE: + | jp &addr + | jb &target_addr + break; + case IR_LE: + | ja &target_addr + break; + case IR_GT: + | jp &addr + | jbe &target_addr + break; + } + } + | jmp &addr + return 1; + } + } + + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | je &addr + break; + case IR_NE: + | jne &addr + break; + case IR_LT: + | jl &addr + break; + case IR_GE: + | jge &addr + break; + case IR_LE: + | jle &addr + break; + case IR_GT: + | jg &addr + break; + case IR_ULT: + | jb &addr + break; + case IR_UGE: + | jae &addr + break; + case IR_ULE: + | jbe &addr + break; + case IR_UGT: + | ja &addr + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jp >1 + | je &addr + |1: + break; + case IR_NE: + | jne &addr + | jp &addr + break; + case IR_LT: + | jp >1 + | jb &addr + |1: + break; + case IR_GE: + | jae &addr + break; + case IR_LE: + | jp >1 + | jbe &addr + |1: + break; + case IR_GT: + | ja &addr + break; +// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; +// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; +// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; +// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; + } + } + return 0; +} + +static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + void *addr; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (IR_IS_CONST_REF(insn->op2)) { + bool is_true = ir_ref_is_true(ctx, insn->op2); + + if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { + addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | jmp aword &addr + } else { +|.if X64 + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | jmp aword [rax] +|.endif + } + } + return 0; + } + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | ASM_REG_REG_OP test, type, op2_reg, op2_reg + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + } + if (op2_reg == IR_REG_NONE) { + | ASM_MEM_IMM_OP cmp, type, [offset], 0 + } else { + | ASM_MEM_IMM_OP cmp, type, [Ra(op2_reg)+offset], 0 + } + } + + addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + ir_op op; + + if (insn->op == IR_GUARD) { + op = IR_EQ; + } else { + op = IR_NE; + } + return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); + } else { +|.if X64 + if (insn->op == IR_GUARD) { + | je >1 + } else { + | jne >1 + } + |.cold_code + |1: + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | jmp aword [rax] + |.code +|.endif + return 0; + } +} + +static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + void *addr; + + if (op1_reg != IR_REG_NONE && (IR_IS_CONST_REF(op1) || IR_REG_SPILLED(op1_reg))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || IR_REG_SPILLED(op2_reg))) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + + addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | jmp aword &addr + } else { +|.if X64 + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | jmp aword [rax] +|.endif + } + return 0; + } else if (op == IR_UGE) { + /* always true */ + return 0; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + } + ir_emit_cmp_int_common(ctx, type, cmp_insn, op1_reg, op1, op2_reg, op2); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + + return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); +} + +static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + return ir_emit_guard_jcc(ctx, b, def, op, addr, 0); +} + +static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE; + + ir_emit_test_int_common(ctx, insn->op2, op); + return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); +} + +static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + ir_op op = ctx->ir_base[insn->op2].op; + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); +} + +static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type; + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + type = ctx->ir_base[ctx->ir_base[insn->op2].op1].type; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (IR_IS_TYPE_SIGNED(type)) { + if (insn->op == IR_GUARD) { + | jno &addr + } else { + | jo &addr + } + } else { + if (insn->op == IR_GUARD) { + | jnc &addr + } else { + | jc &addr + } + } + return 0; +} + +static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type, ir_reg base_reg, ir_reg index_reg, uint8_t scale, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + if (index_reg == IR_REG_NONE) { + IR_ASSERT(base_reg != IR_REG_NONE); + if (!offset) { + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)] + } + } else { + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+offset] + } + } + } else { + if (base_reg == IR_REG_NONE) { + if (!offset) { + switch (scale) { + default: + IR_ASSERT(0); + case 2: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*2] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*2] + } + break; + case 4: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*4] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*4] + } + break; + case 8: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*8] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*8] + } + break; + } + } else { + switch (scale) { + default: + IR_ASSERT(0); + case 2: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*2+offset] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*2+offset] + } + break; + case 4: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*4+offset] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*4+offset] + } + break; + case 8: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*8+offset] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*8+offset] + } + break; + } + } + } else { + if (!offset) { + switch (scale) { + default: + IR_ASSERT(0); + case 1: + if (ir_type_size[type] == sizeof(void*)) { + if (def_reg == base_reg) { + | add Ra(def_reg), Ra(index_reg) + } else if (def_reg == index_reg) { + | add Ra(def_reg), Ra(base_reg) + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)] + } + } else { + IR_ASSERT(sizeof(void*) == 8 && ir_type_size[type] == 4); + if (def_reg == base_reg) { + | add Rd(def_reg), Rd(index_reg) + } else if (def_reg == index_reg) { + | add Rd(def_reg), Rd(base_reg) + } else { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)] + } + } + break; + case 2: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*2] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*2] + } + break; + case 4: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*4] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*4] + } + break; + case 8: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*8] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*8] + } + break; + } + } else { + switch (scale) { + default: + IR_ASSERT(0); + case 1: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)+offset] + } + break; + case 2: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*2+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*2+offset] + } + break; + case 4: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*4+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*4+offset] + } + break; + case 8: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*8+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*8+offset] + } + break; + } + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + +|.if X64WIN +| gs +| mov Ra(reg), aword [0x58] +| mov Ra(reg), aword [Ra(reg)+insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|.elif WIN +| fs +| mov Ra(reg), aword [0x2c] +| mov Ra(reg), aword [Ra(reg)+insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|.elif X64APPLE +| gs +|| if (insn->op3 == IR_NULL) { +| mov Ra(reg), aword [insn->op2] +|| } else { +| mov Ra(reg), aword [insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|| } +|.elif X64 +| fs +|| if (insn->op3 == IR_NULL) { +| mov Ra(reg), aword [insn->op2] +|| } else { +| mov Ra(reg), [0x8] +| mov Ra(reg), aword [Ra(reg)+insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|| } +|.else +| gs +|| if (insn->op3 == IR_NULL) { +| mov Ra(reg), aword [insn->op2] +|| } else { +| mov Ra(reg), [0x4] +| mov Ra(reg), aword [Ra(reg)+insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|| } +| .endif + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, IR_ADDR, def, reg); + } +} + +static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + + |.if X64 + | sub rsp, 16*8+16*8+8 /* CPU regs + SSE regs */ + | mov aword [rsp+0*8], rax + | mov aword [rsp+1*8], rcx + | mov aword [rsp+2*8], rdx + | mov aword [rsp+3*8], rbx + | mov aword [rsp+5*8], rbp + | mov aword [rsp+6*8], rsi + | mov aword [rsp+7*8], rdi + | mov aword [rsp+8*8], r8 + | mov aword [rsp+9*8], r9 + | mov aword [rsp+10*8], r10 + | mov aword [rsp+11*8], r11 + | mov aword [rsp+12*8], r12 + | mov aword [rsp+13*8], r13 + | mov aword [rsp+14*8], r14 + | mov aword [rsp+15*8], r15 + | movsd qword [rsp+16*8+0*8], xmm0 + | movsd qword [rsp+16*8+1*8], xmm1 + | movsd qword [rsp+16*8+2*8], xmm2 + | movsd qword [rsp+16*8+3*8], xmm3 + | movsd qword [rsp+16*8+4*8], xmm4 + | movsd qword [rsp+16*8+5*8], xmm5 + | movsd qword [rsp+16*8+6*8], xmm6 + | movsd qword [rsp+16*8+7*8], xmm7 + | movsd qword [rsp+16*8+8*8], xmm8 + | movsd qword [rsp+16*8+9*8], xmm9 + | movsd qword [rsp+16*8+10*8], xmm10 + | movsd qword [rsp+16*8+11*8], xmm11 + | movsd qword [rsp+16*8+12*8], xmm12 + | movsd qword [rsp+16*8+13*8], xmm13 + | movsd qword [rsp+16*8+14*8], xmm14 + | movsd qword [rsp+16*8+15*8], xmm15 + | + | mov Ra(IR_REG_INT_ARG2), rsp + | lea Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+16] + | mov aword [rsp+4*8], Ra(IR_REG_INT_ARG1) + | mov Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+8] + |.if X64WIN + | sub rsp, 32 /* shadow space */ + |.endif + |.else + | sub esp, 8*4+8*8+12 /* CPU regs + SSE regs */ + | mov aword [esp+0*4], eax + | mov aword [esp+1*4], ecx + | mov aword [esp+2*4], edx + | mov aword [esp+3*4], ebx + | mov aword [esp+5*4], ebp + | mov aword [esp+6*4], esi + | mov aword [esp+7*4], edi + | movsd qword [esp+8*4+0*8], xmm0 + | movsd qword [esp+8*4+1*8], xmm1 + | movsd qword [esp+8*4+2*8], xmm2 + | movsd qword [esp+8*4+3*8], xmm3 + | movsd qword [esp+8*4+4*8], xmm4 + | movsd qword [esp+8*4+5*8], xmm5 + | movsd qword [esp+8*4+6*8], xmm6 + | movsd qword [esp+8*4+7*8], xmm7 + | + | mov Ra(IR_REG_INT_FCARG2), esp + | lea Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+16] + | mov aword [esp+4*4], Ra(IR_REG_INT_FCARG1) + | mov Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+12] + |.endif + + if (IR_IS_CONST_REF(insn->op2)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | call aword &addr + } else { +|.if X64 + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | call rax +|.endif + } + } else { + IR_ASSERT(0); + } + + // restore SP + |.if X64WIN + | add rsp, 32+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */ + |.elif X64 + | add rsp, 16*8+16*8+16 /* CPU regs + SSE regs */ + |.else + | add esp, 8*4+8*8+16 /* CPU regs + SSE regs */ + |.endif + + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) +{ + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); + + if (IR_IS_TYPE_INT(type)) { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_int(ctx, type, to_reg, fp, offset); + } + } else { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_fp(ctx, type, to_reg, fp, offset); + } + } +} + +static void ir_emit_load_params(ir_ctx *ctx) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + ir_reg dst_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_offset = 0; + +#ifdef IR_TARGET_X86 + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + if (ctx->flags & IR_USE_FRAME_POINTER) { + stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ + } else { + stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */ + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param_num++; +#endif + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param_num++; +#endif + } + if (ctx->vregs[use]) { + dst_reg = IR_REG_NUM(ctx->regs[use][0]); + IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || + stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + + ((ctx->flags & IR_USE_FRAME_POINTER) ? -ctx->stack_frame_size : ctx->call_stack_size)); + if (src_reg != dst_reg) { + ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); + } + if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { + ir_emit_store(ctx, insn->type, use, dst_reg); + } + } + if (src_reg == IR_REG_NONE) { + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } +} + +static ir_reg ir_get_free_reg(ir_type type, ir_regset available) +{ + if (IR_IS_TYPE_INT(type)) { + available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); + } + IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); + return IR_REGSET_FIRST(available); +} + +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +{ + ir_backend_data *data = ctx->data; + ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; + + if (to == 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_RAX; + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_XMM0; + } + } + } else if (from != 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_RAX; + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_XMM0; + } + } + } + return 1; +} + +static void ir_fix_param_spills(ir_ctx *ctx) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_start = 0; + int32_t stack_offset = 0; + +#ifdef IR_TARGET_X86 + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + if (ctx->flags & IR_USE_FRAME_POINTER) { + /* skip old frame pointer and return address */ + stack_start = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); + } else { + /* skip return address */ + stack_start = sizeof(void*) + ctx->stack_frame_size; + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param_num++; +#endif + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param_num++; +#endif + } + if (src_reg == IR_REG_NONE) { + if (ctx->vregs[use]) { + ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; + if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) + && ival->stack_spill_pos == -1 + && (ival->next || ival->reg == IR_REG_NONE)) { + ival->stack_spill_pos = stack_start + stack_offset; + ctx->regs[use][0] = IR_REG_NONE; + } + } + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } + +#ifdef IR_TARGET_X86 + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { + ctx->param_stack_size = stack_offset; + } +#endif +} + +static void ir_allocate_unique_spill_slots(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + ir_insn *insn; + ir_ref i, n, j, *p; + uint32_t *rule, insn_flags; + ir_backend_data *data = ctx->data; + ir_regset available = 0; + ir_target_constraints constraints; + uint32_t def_flags; + ir_reg reg; + +#ifndef IR_REG_FP_RET1 + if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data); + } else { + ctx->ret_slot = -1; + } +#endif + + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + + if (!ctx->arena) { + ctx->arena = ir_arena_create(16 * 1024); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { + switch (ctx->rules ? *rule : insn->op) { + case IR_START: + case IR_BEGIN: + case IR_END: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_LOOP_END: + break; + default: + def_flags = ir_get_target_constraints(ctx, i, &constraints); + if (ctx->rules + && *rule != IR_CMP_AND_BRANCH_INT + && *rule != IR_CMP_AND_BRANCH_FP + && *rule != IR_TEST_AND_BRANCH_INT + && *rule != IR_GUARD_CMP_INT + && *rule != IR_GUARD_CMP_FP) { + available = IR_REGSET_SCRATCH; + } + if (ctx->vregs[i]) { + reg = constraints.def_reg; + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } else if (def_flags & IR_USE_MUST_BE_IN_REG) { + if (insn->op == IR_VLOAD + && ctx->live_intervals[ctx->vregs[i]] + && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { + /* pass */ + } else if (insn->op != IR_PARAM) { + reg = ir_get_free_reg(insn->type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } + } + if (!ctx->live_intervals[ctx->vregs[i]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[i]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[i]; + ival->stack_spill_pos = -1; + if (insn->op == IR_PARAM && reg == IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else { + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); + } + } else if (insn->op == IR_PARAM) { + IR_ASSERT(0 && "unexpected PARAM"); + return; + } + } else if (insn->op == IR_VAR) { + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref n = use_list->count; + + if (n > 0) { + int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); + ir_ref i, *p, use; + ir_insn *use_insn; + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_VLOAD) { + if (ctx->vregs[use] + && !ctx->live_intervals[ctx->vregs[use]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use]; + ival->stack_spill_pos = stack_spill_pos; + } + } else if (use_insn->op == IR_VSTORE) { + if (!IR_IS_CONST_REF(use_insn->op3) + && ctx->vregs[use_insn->op3] + && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use_insn->op3]; + ival->stack_spill_pos = stack_spill_pos; + } + } + } + } + } + + insn_flags = ir_op_flags[insn->op]; + n = constraints.tmps_count; + if (n) { + do { + n--; + if (constraints.tmp_regs[n].type) { + ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][constraints.tmp_regs[n].num] = reg; + } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { + available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); + } else { + IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); + } + } while (n); + } + n = insn->inputs_count; + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { + if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { + ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); + ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; + } else { + uint8_t use_flags = IR_USE_FLAGS(def_flags, j); + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { + ctx->regs[i][j] = ctx->regs[i][1]; + } else if (use_flags & IR_USE_MUST_BE_IN_REG) { + reg = ir_get_free_reg(ctx->ir_base[input].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } + } + } + } + break; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } + if (bb->flags & IR_BB_DESSA_MOVES) { + data->dessa_from_block = b; + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + } + } + + ctx->used_preserved_regs = ctx->fixed_save_regset; + ctx->flags |= IR_NO_STACK_COMBINE; + ir_fix_stack_frame(ctx); +} + +static void ir_preallocate_call_stack(ir_ctx *ctx) +{ + int call_stack_size, peak_call_stack_size = 0; + ir_ref i, n; + ir_insn *insn; + + for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { + if (insn->op == IR_CALL) { + call_stack_size = ir_call_used_stack(ctx, insn); + if (call_stack_size > peak_call_stack_size +#ifdef IR_HAVE_FASTCALL + && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ +#endif + ) { + peak_call_stack_size = call_stack_size; + } + } + n = ir_insn_len(insn); + i += n; + insn += n; + } + if (peak_call_stack_size) { + ctx->call_stack_size = peak_call_stack_size; + ctx->flags |= IR_PREALLOCATED_STACK; + } +} + +void ir_fix_stack_frame(ir_ctx *ctx) +{ + uint32_t additional_size = 0; + + if (ctx->used_preserved_regs) { + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + ir_reg reg; + (void) reg; + + IR_REGSET_FOREACH(used_preserved_regs, reg) { + additional_size += sizeof(void*); + } IR_REGSET_FOREACH_END(); + } + + ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); + ctx->stack_frame_size += additional_size; + ctx->stack_frame_alignment = 0; + ctx->call_stack_size = 0; + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + if (!(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_USE_FRAME_POINTER) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else { + if (!(ctx->flags & IR_NO_STACK_COMBINE)) { + ir_preallocate_call_stack(ctx); + } + while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*), 16) != + ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*)) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } + } + + ir_fix_param_spills(ctx); +} + +static void* dasm_labels[ir_lb_MAX]; + +void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) +{ + uint32_t b, n, target; + ir_block *bb; + ir_ref i; + ir_insn *insn; + uint32_t *rule; + ir_backend_data data; + dasm_State **Dst; + int ret; + void *entry; + size_t size; + + data.ra_data.unused_slot_4 = 0; + data.ra_data.unused_slot_2 = 0; + data.ra_data.unused_slot_1 = 0; + data.ra_data.handled = NULL; + data.rodata_label = 0; + data.jmp_table_label = 0; + data.double_neg_const = 0; + data.float_neg_const = 0; + data.double_abs_const = 0; + data.float_abs_const = 0; + data.double_zero_const = 0; + ctx->data = &data; + + if (!ctx->live_intervals) { + ctx->stack_frame_size = 0; + ctx->stack_frame_alignment = 0; + ctx->call_stack_size = 0; + ctx->used_preserved_regs = 0; + ir_allocate_unique_spill_slots(ctx); + } + + if (ctx->fixed_stack_frame_size != -1) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); + } + if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { + // TODO: report error to caller +#ifdef IR_DEBUG_MESSAGES + fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", + __FILE__, __LINE__); +#endif + ctx->data = NULL; + ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; + return NULL; + } + ctx->stack_frame_size = ctx->fixed_stack_frame_size; + ctx->call_stack_size = ctx->fixed_call_stack_size; + ctx->stack_frame_alignment = 0; + } + + Dst = &data.dasm_state; + data.dasm_state = NULL; + dasm_init(&data.dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&data.dasm_state, dasm_actions); + /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ + dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); + + if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_START_BR_TARGET)) { + |.if X64 + | endbr64 + |.else + | endbr32 + |.endif + } + + if (!(ctx->flags & IR_SKIP_PROLOGUE)) { + ir_emit_prologue(ctx); + } + if (ctx->flags & IR_FUNCTION) { + ir_emit_load_params(ctx); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { + continue; + } + |=>b: + + i = bb->start; + insn = ctx->ir_base + i; + if (bb->flags & IR_BB_ENTRY) { + uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; + + |=>label: + if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_ENTRY_BR_TARGET)) { + |.if X64 + | endbr64 + |.else + | endbr32 + |.endif + } + ir_emit_prologue(ctx); + ctx->entries[insn->op3] = i; + } + + /* skip first instruction */ + n = ir_insn_len(insn); + i += n; + insn += n; + rule = ctx->rules + i; + + while (i <= bb->end) { + if (!((*rule) & (IR_FUSED|IR_SKIPPED))) + switch (*rule) { + case IR_VAR: + case IR_PARAM: + case IR_PI: + case IR_PHI: + case IR_SNAPSHOT: + break; + case IR_LEA_OB: + { + ir_reg op1_reg = ctx->regs[i][1]; + int32_t offset = ctx->ir_base[insn->op2].val.i32; + + if (insn->op == IR_ADD) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + IR_ASSERT(insn->op == IR_SUB); + int64_t long_offset = ctx->ir_base[insn->op2].val.i64; + long_offset = -long_offset; + IR_ASSERT(IR_IS_SIGNED_32BIT(long_offset)); + offset = (int32_t)long_offset; + } + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + ir_emit_lea(ctx, i, insn->type, op1_reg, IR_REG_NONE, 1, offset); + } + break; + case IR_LEA_SI: + { + ir_reg op1_reg = ctx->regs[i][1]; + int32_t scale = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + ir_emit_lea(ctx, i, insn->type, IR_REG_NONE, op1_reg, scale, 0); + } + break; + case IR_LEA_SIB: + { + ir_reg op1_reg = ctx->regs[i][1]; + int32_t scale = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op1_reg, scale - 1, 0); + } + break; + case IR_LEA_IB: + { + ir_reg op1_reg = ctx->regs[i][1]; + ir_reg op2_reg = ctx->regs[i][2]; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, insn->op2); + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, 0); + } + break; + case IR_LEA_OB_I: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[i][2]; + int32_t offset = ctx->ir_base[op1_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, insn->op2); + } + if (op1_insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, offset); + } + break; + case IR_LEA_I_OB: + { + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + ir_reg op1_reg = ctx->regs[i][1]; + ir_reg op2_reg = ctx->regs[insn->op2][1]; + int32_t offset = ctx->ir_base[op2_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1); + } + if (op2_insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, offset); + } + break; + case IR_LEA_SI_O: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; + int32_t offset = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, IR_REG_NONE, op1_reg, scale, offset); + } + break; + case IR_LEA_SIB_O: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; + int32_t offset = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op1_reg, scale - 1, offset); + } + break; + case IR_LEA_IB_O: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[insn->op1][2]; + int32_t offset = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op1_insn->op2); + } + if (insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, offset); + } + break; + case IR_LEA_OB_SI: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[insn->op2][1]; + int32_t offset = ctx->ir_base[op1_insn->op2].val.i32; + int32_t scale = ctx->ir_base[op2_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1); + } + if (op1_insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, scale, offset); + } + break; + case IR_LEA_SI_OB: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[insn->op2][1]; + int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; + int32_t offset = ctx->ir_base[op2_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1); + } + if (op1_insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op2_reg, op1_reg, scale, offset); + } + break; + case IR_LEA_B_SI: + { + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + ir_reg op1_reg = ctx->regs[i][1]; + ir_reg op2_reg = ctx->regs[insn->op2][1]; + int32_t scale = ctx->ir_base[op2_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1); + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, scale, 0); + } + break; + case IR_LEA_SI_B: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[i][2]; + int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, insn->op2); + } + ir_emit_lea(ctx, i, insn->type, op2_reg, op1_reg, scale, 0); + } + break; + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + ir_emit_mul_div_mod_pwr2(ctx, i, insn); + break; + case IR_SHIFT: + ir_emit_shift(ctx, i, insn); + break; + case IR_SHIFT_CONST: + ir_emit_shift_const(ctx, i, insn); + break; + case IR_INC: + case IR_DEC: + case IR_OP_INT: + ir_emit_op_int(ctx, i, insn); + break; + case IR_ABS_INT: + ir_emit_abs_int(ctx, i, insn); + break; + case IR_BOOL_NOT_INT: + ir_emit_bool_not_int(ctx, i, insn); + break; + case IR_OP_FP: + ir_emit_op_fp(ctx, i, insn); + break; + case IR_IMUL3: + ir_emit_imul3(ctx, i, insn); + break; + case IR_BINOP_INT: + ir_emit_binop_int(ctx, i, insn); + break; + case IR_BINOP_SSE2: + ir_emit_binop_sse2(ctx, i, insn); + break; + case IR_BINOP_AVX: + ir_emit_binop_avx(ctx, i, insn); + break; + case IR_MUL_INT: + case IR_DIV_INT: + case IR_MOD_INT: + ir_emit_mul_div_mod(ctx, i, insn); + break; + case IR_CMP_INT: + ir_emit_cmp_int(ctx, i, insn); + break; + case IR_TESTCC_INT: + ir_emit_testcc_int(ctx, i, insn); + break; + case IR_SETCC_INT: + ir_emit_setcc_int(ctx, i, insn); + break; + case IR_CMP_FP: + ir_emit_cmp_fp(ctx, i, insn); + break; + case IR_SEXT: + ir_emit_sext(ctx, i, insn); + break; + case IR_ZEXT: + ir_emit_zext(ctx, i, insn); + break; + case IR_TRUNC: + ir_emit_trunc(ctx, i, insn); + break; + case IR_BITCAST: + ir_emit_bitcast(ctx, i, insn); + break; + case IR_INT2FP: + ir_emit_int2fp(ctx, i, insn); + break; + case IR_FP2INT: + ir_emit_fp2int(ctx, i, insn); + break; + case IR_FP2FP: + ir_emit_fp2fp(ctx, i, insn); + break; + case IR_COPY_INT: + ir_emit_copy_int(ctx, i, insn); + break; + case IR_COPY_FP: + ir_emit_copy_fp(ctx, i, insn); + break; + case IR_CMP_AND_BRANCH_INT: + ir_emit_cmp_and_branch_int(ctx, b, i, insn); + break; + case IR_CMP_AND_BRANCH_FP: + ir_emit_cmp_and_branch_fp(ctx, b, i, insn); + break; + case IR_TEST_AND_BRANCH_INT: + ir_emit_test_and_branch_int(ctx, b, i, insn); + break; + case IR_JCC_INT: + { + ir_op op = ctx->ir_base[insn->op2].op; + + if (op == IR_ADD || + op == IR_SUB || +// op == IR_MUL || + op == IR_OR || + op == IR_AND || + op == IR_XOR) { + op = IR_NE; + } else { + IR_ASSERT(op >= IR_EQ && op <= IR_UGT); + } + ir_emit_jcc(ctx, op, b, i, insn, 1); + } + break; + case IR_GUARD_CMP_INT: + if (ir_emit_guard_cmp_int(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_GUARD_CMP_FP: + if (ir_emit_guard_cmp_fp(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_GUARD_TEST_INT: + if (ir_emit_guard_test_int(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_GUARD_JCC_INT: + if (ir_emit_guard_jcc_int(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_IF_INT: + ir_emit_if_int(ctx, b, i, insn); + break; + case IR_COND: + ir_emit_cond(ctx, i, insn); + break; + case IR_SWITCH: + ir_emit_switch(ctx, b, i, insn); + break; + case IR_MIN_MAX_INT: + ir_emit_min_max_int(ctx, i, insn); + break; + case IR_OVERFLOW: + ir_emit_overflow(ctx, i, insn); + break; + case IR_OVERFLOW_AND_BRANCH: + ir_emit_overflow_and_branch(ctx, b, i, insn); + break; + case IR_END: + case IR_LOOP_END: + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_emit_osr_entry_loads(ctx, b, bb); + } + if (bb->flags & IR_BB_DESSA_MOVES) { + ir_emit_dessa_moves(ctx, b, bb); + } + do { + ir_ref succ = ctx->cfg_edges[bb->successors]; + + if (UNEXPECTED(bb->successors_count == 2)) { + if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { + succ = ctx->cfg_edges[bb->successors + 1]; + } else { + IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); + } + } else { + IR_ASSERT(bb->successors_count == 1); + } + target = ir_skip_empty_target_blocks(ctx, succ); + if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { + | jmp =>target + } + } while (0); + break; + case IR_RETURN_VOID: + ir_emit_return_void(ctx); + break; + case IR_RETURN_INT: + ir_emit_return_int(ctx, i, insn); + break; + case IR_RETURN_FP: + ir_emit_return_fp(ctx, i, insn); + break; + case IR_CALL: + ir_emit_call(ctx, i, insn); + break; + case IR_TAILCALL: + ir_emit_tailcall(ctx, i, insn); + break; + case IR_IJMP: + ir_emit_ijmp(ctx, i, insn); + break; + case IR_MEM_OP_INT: + case IR_MEM_INC: + case IR_MEM_DEC: + ir_emit_mem_op_int(ctx, i, insn); + break; + case IR_MEM_BINOP_INT: + ir_emit_mem_binop_int(ctx, i, insn); + break; + case IR_MEM_MUL_PWR2: + case IR_MEM_DIV_PWR2: + case IR_MEM_MOD_PWR2: + ir_emit_mem_mul_div_mod_pwr2(ctx, i, insn); + break; + case IR_MEM_SHIFT: + ir_emit_mem_shift(ctx, i, insn); + break; + case IR_MEM_SHIFT_CONST: + ir_emit_mem_shift_const(ctx, i, insn); + break; + case IR_REG_BINOP_INT: + ir_emit_reg_binop_int(ctx, i, insn); + break; + case IR_VADDR: + ir_emit_vaddr(ctx, i, insn); + break; + case IR_VLOAD: + ir_emit_vload(ctx, i, insn); + break; + case IR_VSTORE_INT: + ir_emit_vstore_int(ctx, i, insn); + break; + case IR_VSTORE_FP: + ir_emit_vstore_fp(ctx, i, insn); + break; + case IR_RLOAD: + ir_emit_rload(ctx, i, insn); + break; + case IR_RSTORE: + ir_emit_rstore(ctx, i, insn); + break; + case IR_LOAD_INT: + ir_emit_load_int(ctx, i, insn); + break; + case IR_LOAD_FP: + ir_emit_load_fp(ctx, i, insn); + break; + case IR_STORE_INT: + ir_emit_store_int(ctx, i, insn); + break; + case IR_STORE_FP: + ir_emit_store_fp(ctx, i, insn); + break; + case IR_ALLOCA: + ir_emit_alloca(ctx, i, insn); + break; + case IR_AFREE: + ir_emit_afree(ctx, i, insn); + break; + case IR_EXITCALL: + ir_emit_exitcall(ctx, i, insn); + break; + case IR_GUARD: + case IR_GUARD_NOT: + if (ir_emit_guard(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_GUARD_OVERFLOW: + if (ir_emit_guard_overflow(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_TLS: + ir_emit_tls(ctx, i, insn); + break; + case IR_TRAP: + | int3 + break; + default: + IR_ASSERT(0 && "NIY rule/instruction"); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; + return NULL; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } +next_block:; + } + + if (data.rodata_label) { + |.rodata + } + for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { + if (insn->const_flags & IR_CONST_EMIT) { + if (IR_IS_TYPE_FP(insn->type)) { + int label = ctx->cfg_blocks_count + i; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + if (insn->type == IR_DOUBLE) { + |.align 8 + |=>label: + |.dword insn->val.u32, insn->val.u32_hi + } else { + IR_ASSERT(insn->type == IR_FLOAT); + |.align 4 + |=>label: + |.dword insn->val.u32 + } + } else if (insn->op == IR_STR) { + int label = ctx->cfg_blocks_count + i; + const char *str = ir_get_str(ctx, insn->val.i32); + int i = 0; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + |.align 8 + |=>label: + while (str[i]) { + char c = str[i]; + + if (c == '\\') { + if (str[i+1] == '\\') { + i++; + c = '\\'; + } else if (str[i+1] == '\'') { + i++; + c = '\''; + } else if (str[i+1] == '"') { + i++; + c = '"'; + } else if (str[i+1] == 'a') { + i++; + c = '\a'; + } else if (str[i+1] == 'b') { + i++; + c = '\b'; + } else if (str[i+1] == 'e') { + i++; + c = 27; /* '\e'; */ + } else if (str[i+1] == 'f') { + i++; + c = '\f'; + } else if (str[i+1] == 'n') { + i++; + c = '\n'; + } else if (str[i+1] == 'r') { + i++; + c = '\r'; + } else if (str[i+1] == 't') { + i++; + c = '\t'; + } else if (str[i+1] == 'v') { + i++; + c = '\v'; + } else if (str[i+1] == '?') { + i++; + c = 0x3f; + } + } + |.byte c + i++; + } + |.byte 0 + + } else { + IR_ASSERT(0); + } + } + } + if (data.rodata_label) { + |.code + } + + if (ctx->status) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + return NULL; + } + + ret = dasm_link(&data.dasm_state, size_ptr); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_LINK; + return NULL; + } + size = *size_ptr; + + if (ctx->code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > ctx->code_buffer_size) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; + return NULL; + } + entry = ctx->code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + if (!entry) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; + return NULL; + } + ir_mem_unprotect(entry, size); + } + + ret = dasm_encode(&data.dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + if (ctx->code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + ctx->data = NULL; + ctx->status = IR_ERROR_ENCODE; + return NULL; + } + + if (data.jmp_table_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); + ctx->jmp_table_offset = offset; + } else { + ctx->jmp_table_offset = 0; + } + if (data.rodata_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); + ctx->rodata_offset = offset; + } else { + ctx->rodata_offset = 0; + } + + if (ctx->entries_count) { + /* For all entries */ + i = ctx->entries_count; + do { + ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; + uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); + insn->op3 = offset; + } while (i != 0); + } + + dasm_free(&data.dasm_state); + + ir_mem_flush(entry, size); + + if (ctx->code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + ctx->data = NULL; + return entry; +} + +const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, void *code_buffer, size_t code_buffer_size, size_t *size_ptr) +{ + void *entry; + size_t size; + uint32_t i; + dasm_State **Dst, *dasm_state; + int ret; + + IR_ASSERT(code_buffer); + IR_ASSERT(IR_IS_SIGNED_32BIT((char*)exit_addr - (char*)code_buffer)); + IR_ASSERT(IR_IS_SIGNED_32BIT((char*)exit_addr - ((char*)code_buffer + code_buffer_size))); + + Dst = &dasm_state; + dasm_state = NULL; + dasm_init(&dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&dasm_state, dasm_actions); + + for (i = 0; i < exit_points_per_group - 1; i++) { + | push byte i + | .byte 0xeb, (4*(exit_points_per_group-i)-6) // jmp >1 + } + | push byte i + |// 1: + | add aword [r4], first_exit_point + | jmp aword &exit_addr + + ret = dasm_link(&dasm_state, &size); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + return NULL; + } + + if (code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > code_buffer_size) { + dasm_free(&dasm_state); + return NULL; + } + entry = code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + ir_mem_unprotect(entry, size); + } + + ret = dasm_encode(&dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + if (code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + return NULL; + } + + dasm_free(&dasm_state); + + ir_mem_flush(entry, size); + + if (code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + *size_ptr = size; + return entry; +} diff --git a/ext/opcache/jit/ir/ir_x86.h b/ext/opcache/jit/ir/ir_x86.h new file mode 100644 index 00000000000..ff4b767b2eb --- /dev/null +++ b/ext/opcache/jit/ir/ir_x86.h @@ -0,0 +1,226 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (x86/x86_64 CPU specific definitions) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_X86_H +#define IR_X86_H + +#if defined(IR_TARGET_X64) +# define IR_GP_REGS(_) \ + _(R0, rax, eax, ax, al, ah) \ + _(R1, rcx, ecx, cx, cl, ch) \ + _(R2, rdx, edx, dx, dl, dh) \ + _(R3, rbx, ebx, bx, bl, bh) \ + _(R4, rsp, esp, __, __, __) \ + _(R5, rbp, ebp, bp, r5b, __) \ + _(R6, rsi, esi, si, r6b, __) \ + _(R7, rdi, edi, di, r7b, __) \ + _(R8, r8, r8d, r8w, r8b, __) \ + _(R9, r9, r9d, r9w, r9b, __) \ + _(R10, r10, r10d, r10w, r10b, __) \ + _(R11, r11, r11d, r11w, r11b, __) \ + _(R12, r12, r12d, r12w, r12b, __) \ + _(R13, r13, r13d, r13w, r13b, __) \ + _(R14, r14, r14d, r14w, r14b, __) \ + _(R15, r15, r15d, r15w, r15b, __) \ + +# define IR_FP_REGS(_) \ + _(XMM0, xmm0) \ + _(XMM1, xmm1) \ + _(XMM2, xmm2) \ + _(XMM3, xmm3) \ + _(XMM4, xmm4) \ + _(XMM5, xmm5) \ + _(XMM6, xmm6) \ + _(XMM7, xmm7) \ + _(XMM8, xmm8) \ + _(XMM9, xmm9) \ + _(XMM10, xmm10) \ + _(XMM11, xmm11) \ + _(XMM12, xmm12) \ + _(XMM13, xmm13) \ + _(XMM14, xmm14) \ + _(XMM15, xmm15) \ + +#elif defined(IR_TARGET_X86) + +# define IR_GP_REGS(_) \ + _(R0, ___, eax, ax, al, ah) \ + _(R1, ___, ecx, cx, cl, ch) \ + _(R2, ___, edx, dx, dl, dh) \ + _(R3, ___, ebx, bx, bl, bh) \ + _(R4, ___, esp, __, __, __) \ + _(R5, ___, ebp, bp, __, __) \ + _(R6, ___, esi, si, __, __) \ + _(R7, ___, edi, di, __, __) \ + +# define IR_FP_REGS(_) \ + _(XMM0, xmm0) \ + _(XMM1, xmm1) \ + _(XMM2, xmm2) \ + _(XMM3, xmm3) \ + _(XMM4, xmm4) \ + _(XMM5, xmm5) \ + _(XMM6, xmm6) \ + _(XMM7, xmm7) \ + +#else +# error "Unsupported target architecture" +#endif + +#define IR_GP_REG_ENUM(code, name64, name32, name16, name8, name8h) \ + IR_REG_ ## code, + +#define IR_FP_REG_ENUM(code, name) \ + IR_REG_ ## code, + +enum _ir_reg { + _IR_REG_NONE = -1, + IR_GP_REGS(IR_GP_REG_ENUM) + IR_FP_REGS(IR_FP_REG_ENUM) + IR_REG_NUM, +}; + +#define IR_REG_GP_FIRST IR_REG_R0 +#define IR_REG_FP_FIRST IR_REG_XMM0 +#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) +#define IR_REG_FP_LAST (IR_REG_NUM - 1) +#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */ +#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */ + +#define IR_REGSET_64BIT 0 + +#define IR_REG_STACK_POINTER \ + IR_REG_RSP +#define IR_REG_FRAME_POINTER \ + IR_REG_RBP +#define IR_REGSET_FIXED \ + (IR_REGSET(IR_REG_RSP)) +#define IR_REGSET_GP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_GP_FIRST, IR_REG_GP_LAST), IR_REGSET_FIXED) +#define IR_REGSET_FP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_FP_FIRST, IR_REG_FP_LAST), IR_REGSET_FIXED) + +#define IR_REG_RAX IR_REG_R0 +#define IR_REG_RCX IR_REG_R1 +#define IR_REG_RDX IR_REG_R2 +#define IR_REG_RBX IR_REG_R3 +#define IR_REG_RSP IR_REG_R4 +#define IR_REG_RBP IR_REG_R5 +#define IR_REG_RSI IR_REG_R6 +#define IR_REG_RDI IR_REG_R7 + +/* Calling Convention */ +#ifdef _WIN64 + +# define IR_REG_INT_RET1 IR_REG_RAX +# define IR_REG_FP_RET1 IR_REG_XMM0 +# define IR_REG_INT_ARGS 4 +# define IR_REG_FP_ARGS 4 +# define IR_REG_INT_ARG1 IR_REG_RCX +# define IR_REG_INT_ARG2 IR_REG_RDX +# define IR_REG_INT_ARG3 IR_REG_R8 +# define IR_REG_INT_ARG4 IR_REG_R9 +# define IR_REG_FP_ARG1 IR_REG_XMM0 +# define IR_REG_FP_ARG2 IR_REG_XMM1 +# define IR_REG_FP_ARG3 IR_REG_XMM2 +# define IR_REG_FP_ARG4 IR_REG_XMM3 +# define IR_MAX_REG_ARGS 4 +# define IR_SHADOW_ARGS 32 /* Reserved space in bytes - "home space" or "shadow store" for register arguments */ + +# define IR_REGSET_SCRATCH \ + (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \ + | IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \ + | IR_REGSET_INTERVAL(IR_REG_XMM0, IR_REG_XMM5)) + +# define IR_REGSET_PRESERVED \ + (IR_REGSET(IR_REG_RBX) \ + | IR_REGSET_INTERVAL(IR_REG_RBP, IR_REG_RDI) \ + | IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15) \ + | IR_REGSET_INTERVAL(IR_REG_XMM6, IR_REG_XMM15)) + +#elif defined(IR_TARGET_X64) + +# define IR_REG_INT_RET1 IR_REG_RAX +# define IR_REG_FP_RET1 IR_REG_XMM0 +# define IR_REG_INT_ARGS 6 +# define IR_REG_FP_ARGS 8 +# define IR_REG_INT_ARG1 IR_REG_RDI +# define IR_REG_INT_ARG2 IR_REG_RSI +# define IR_REG_INT_ARG3 IR_REG_RDX +# define IR_REG_INT_ARG4 IR_REG_RCX +# define IR_REG_INT_ARG5 IR_REG_R8 +# define IR_REG_INT_ARG6 IR_REG_R9 +# define IR_REG_FP_ARG1 IR_REG_XMM0 +# define IR_REG_FP_ARG2 IR_REG_XMM1 +# define IR_REG_FP_ARG3 IR_REG_XMM2 +# define IR_REG_FP_ARG4 IR_REG_XMM3 +# define IR_REG_FP_ARG5 IR_REG_XMM4 +# define IR_REG_FP_ARG6 IR_REG_XMM5 +# define IR_REG_FP_ARG7 IR_REG_XMM6 +# define IR_REG_FP_ARG8 IR_REG_XMM7 +# define IR_MAX_REG_ARGS 14 +# define IR_SHADOW_ARGS 0 + +# define IR_REG_VARARG_FP_REGS IR_REG_RAX /* hidden argument to specify the number of vector registers used */ + +# define IR_REGSET_SCRATCH \ + (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \ + | IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI) \ + | IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \ + | IR_REGSET_FP) + +# define IR_REGSET_PRESERVED \ + (IR_REGSET(IR_REG_RBX) \ + | IR_REGSET(IR_REG_RBP) \ + | IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15)) + +#elif defined(IR_TARGET_X86) + +# define IR_REG_INT_RET1 IR_REG_RAX +# define IR_REG_INT_RET2 IR_REG_RDX +# define IR_REG_INT_ARGS 0 +# define IR_REG_FP_ARGS 0 + +# define IR_HAVE_FASTCALL 1 +# define IR_REG_INT_FCARGS 2 +# define IR_REG_FP_FCARGS 0 +# define IR_REG_INT_FCARG1 IR_REG_RCX +# define IR_REG_INT_FCARG2 IR_REG_RDX +# define IR_MAX_REG_ARGS 2 +# define IR_SHADOW_ARGS 0 + +# define IR_REGSET_SCRATCH \ + (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | IR_REGSET_FP) + +# define IR_REGSET_PRESERVED \ + (IR_REGSET(IR_REG_RBX) \ + | IR_REGSET(IR_REG_RBP) \ + | IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI)) + +#else +# error "Unsupported target architecture" +#endif + +typedef struct _ir_tmp_reg { + union { + uint8_t num; + int8_t reg; + }; + uint8_t type; + uint8_t start; + uint8_t end; +} ir_tmp_reg; + +struct _ir_target_constraints { + int8_t def_reg; + uint8_t tmps_count; + uint8_t hints_count; + ir_tmp_reg tmp_regs[3]; + int8_t hints[IR_MAX_REG_ARGS + 3]; +}; + +#endif /* IR_X86_H */ diff --git a/ext/opcache/jit/ir/y.txt b/ext/opcache/jit/ir/y.txt new file mode 100644 index 00000000000..b125f850050 --- /dev/null +++ b/ext/opcache/jit/ir/y.txt @@ -0,0 +1,6 @@ +llvm.floor.f64 +llvm.fmuladd.f64 +llvm.memcpy.p0.p0.i64 +llvm.memset.p0.i64 +llvm.va_end +llvm.va_start diff --git a/ext/opcache/jit/zend_jit.c b/ext/opcache/jit/zend_jit.c index 3d086cd27c3..02cb0cab797 100644 --- a/ext/opcache/jit/zend_jit.c +++ b/ext/opcache/jit/zend_jit.c @@ -39,11 +39,15 @@ #include "Optimizer/zend_call_graph.h" #include "Optimizer/zend_dump.h" +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_X86 # include "jit/zend_jit_x86.h" #elif ZEND_JIT_TARGET_ARM64 # include "jit/zend_jit_arm64.h" #endif +#else +#include "Optimizer/zend_worklist.h" +#endif #include "jit/zend_jit_internal.h" @@ -74,6 +78,7 @@ zend_jit_globals jit_globals; #define JIT_STUB_PREFIX "JIT$$" #define TRACE_PREFIX "TRACE-" +#ifndef ZEND_JIT_IR #define DASM_M_GROW(ctx, t, p, sz, need) \ do { \ size_t _sz = (sz), _need = (need); \ @@ -102,6 +107,7 @@ typedef struct _zend_jit_stub { #define JIT_STUB(name, offset, adjustment) \ {JIT_STUB_PREFIX #name, zend_jit_ ## name ## _stub, offset, adjustment} +#endif /* ZEND_JIT_IR */ zend_ulong zend_jit_profile_counter = 0; int zend_jit_profile_counter_rid = -1; @@ -137,8 +143,11 @@ static int zend_jit_trace_may_exit(const zend_op_array *op_array, const zend_op static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t flags); static const void *zend_jit_trace_get_exit_addr(uint32_t n); static void zend_jit_trace_add_code(const void *start, uint32_t size); +static zend_string *zend_jit_func_name(const zend_op_array *op_array); + static bool zend_jit_needs_arg_dtor(const zend_function *func, uint32_t arg_num, zend_call_info *call_info); +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_ARM64 static zend_jit_trace_info *zend_jit_get_current_trace_info(void); static uint32_t zend_jit_trace_find_exit_point(const void* addr); @@ -180,6 +189,7 @@ static int zend_jit_assign_to_variable(dasm_State **Dst, uint32_t val_info, zend_jit_addr res_addr, bool check_exception); +#endif /* ZEND_JIT_IR */ static bool dominates(const zend_basic_block *blocks, int a, int b) { while (blocks[b].level > blocks[a].level) { @@ -206,6 +216,18 @@ static bool zend_ssa_is_last_use(const zend_op_array *op_array, const zend_ssa * || (ssa->cfg.blocks[ssa->cfg.map[use]].flags & ZEND_BB_LOOP_HEADER)) { int b = ssa->cfg.map[use]; int prev_use = ssa->vars[var].use_chain; + int def_block; + + if (ssa->vars[var].definition >= 0) { + def_block =ssa->cfg.map[ssa->vars[var].definition]; + } else { + ZEND_ASSERT(ssa->vars[var].definition_phi); + def_block = ssa->vars[var].definition_phi->block; + } + if (dominates(ssa->cfg.blocks, def_block, + (ssa->cfg.blocks[b].flags & ZEND_BB_LOOP_HEADER) ? b : ssa->cfg.blocks[b].loop_header)) { + return 0; + } while (prev_use >= 0 && prev_use != use) { if (b != ssa->cfg.map[prev_use] @@ -226,6 +248,7 @@ static bool zend_ssa_is_last_use(const zend_op_array *op_array, const zend_ssa * return 0; } +#ifndef ZEND_JIT_IR static bool zend_ival_is_last_use(const zend_lifetime_interval *ival, int use) { if (ival->flags & ZREG_LAST_USE) { @@ -248,6 +271,7 @@ static bool zend_is_commutative(uint8_t opcode) opcode == ZEND_BW_AND || opcode == ZEND_BW_XOR; } +#endif static int zend_jit_is_constant_cmp_long_long(const zend_op *opline, zend_ssa_range *op1_range, @@ -737,6 +761,7 @@ static bool zend_may_be_dynamic_property(zend_class_entry *ce, zend_string *memb #define OP2_RANGE() OP_RANGE(ssa_op, op2) #define OP1_DATA_RANGE() OP_RANGE(ssa_op + 1, op1) +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_X86 # include "dynasm/dasm_x86.h" #elif ZEND_JIT_TARGET_ARM64 @@ -758,6 +783,11 @@ static int zend_jit_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int # include #endif +#else /* ZEND_JIT_IR */ +#include "jit/zend_jit_helpers.c" +#include "Zend/zend_cpuinfo.h" +#endif /* ZEND_JIT_IR */ + #ifdef HAVE_GCC_GLOBAL_REGS # define GCC_GLOBAL_REGS 1 #else @@ -773,6 +803,7 @@ static int zend_jit_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int #define BP_JIT_IS 6 /* Used for ISSET_ISEMPTY_DIM_OBJ. see BP_VAR_*defines in Zend/zend_compile.h */ +#ifndef ZEND_JIT_IR typedef enum _sp_adj_kind { SP_ADJ_NONE, SP_ADJ_RET, @@ -783,6 +814,7 @@ typedef enum _sp_adj_kind { } sp_adj_kind; static int sp_adj[SP_ADJ_LAST]; +#endif /* The generated code may contain tautological comparisons, ignore them. */ #if defined(__clang__) @@ -791,12 +823,16 @@ static int sp_adj[SP_ADJ_LAST]; # pragma clang diagnostic ignored "-Wstring-compare" #endif +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_X86 # include "jit/zend_jit_vtune.c" # include "jit/zend_jit_x86.c" #elif ZEND_JIT_TARGET_ARM64 # include "jit/zend_jit_arm64.c" #endif +#else +#include "jit/zend_jit_ir.c" +#endif #if defined(__clang__) # pragma clang diagnostic pop @@ -835,19 +871,20 @@ static zend_string *zend_jit_func_name(const zend_op_array *op_array) smart_str buf = {0}; if (op_array->function_name) { + smart_str_appends(&buf, JIT_PREFIX); if (op_array->scope) { - smart_str_appends(&buf, JIT_PREFIX); smart_str_appendl(&buf, ZSTR_VAL(op_array->scope->name), ZSTR_LEN(op_array->scope->name)); smart_str_appends(&buf, "::"); - smart_str_appendl(&buf, ZSTR_VAL(op_array->function_name), ZSTR_LEN(op_array->function_name)); - smart_str_0(&buf); - return buf.s; - } else { - smart_str_appends(&buf, JIT_PREFIX); - smart_str_appendl(&buf, ZSTR_VAL(op_array->function_name), ZSTR_LEN(op_array->function_name)); - smart_str_0(&buf); - return buf.s; } + smart_str_appendl(&buf, ZSTR_VAL(op_array->function_name), ZSTR_LEN(op_array->function_name)); + if (op_array->fn_flags & ZEND_ACC_CLOSURE) { + smart_str_appends(&buf, ":"); + smart_str_appendl(&buf, ZSTR_VAL(op_array->filename), ZSTR_LEN(op_array->filename)); + smart_str_appends(&buf, ":"); + smart_str_append_long(&buf, op_array->line_start); + } + smart_str_0(&buf); + return buf.s; } else if (op_array->filename) { smart_str_appends(&buf, JIT_PREFIX); smart_str_appendl(&buf, ZSTR_VAL(op_array->filename), ZSTR_LEN(op_array->filename)); @@ -858,6 +895,7 @@ static zend_string *zend_jit_func_name(const zend_op_array *op_array) } } +#ifndef ZEND_JIT_IR #if ZEND_DEBUG static void handle_dasm_error(int ret) { switch (ret & 0xff000000u) { @@ -1130,6 +1168,7 @@ static void *dasm_link_and_encode(dasm_State **dasm_state, return entry; } +#endif /* ZEND_JIT_IR */ static int zend_may_overflow(const zend_op *opline, const zend_ssa_op *ssa_op, const zend_op_array *op_array, zend_ssa *ssa) { @@ -1377,6 +1416,7 @@ static int zend_jit_op_array_analyze2(const zend_op_array *op_array, zend_script return SUCCESS; } +#ifndef ZEND_JIT_IR static int zend_jit_add_range(zend_lifetime_interval **intervals, int var, uint32_t from, uint32_t to) { zend_lifetime_interval *ival = intervals[var]; @@ -2660,6 +2700,198 @@ failure: return NULL; } +#else /* ZEND_JIT_IR */ + +static void zend_jit_allocate_registers(zend_jit_ctx *ctx, const zend_op_array *op_array, zend_ssa *ssa) +{ + void *checkpoint; + int candidates_count, i; + zend_jit_reg_var *ra; + + checkpoint = zend_arena_checkpoint(CG(arena)); + ra = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_jit_reg_var)); + candidates_count = 0; + for (i = 0; i < ssa->vars_count; i++) { + if (zend_jit_may_be_in_reg(op_array, ssa, i)) { + ra[i].ref = IR_NULL; + candidates_count++; + } + } + if (!candidates_count) { + zend_arena_release(&CG(arena), checkpoint); + return; + } + + if (JIT_G(opt_flags) & ZEND_JIT_REG_ALLOC_GLOBAL) { + /* Naive SSA resolution */ + for (i = 0; i < ssa->vars_count; i++) { + if (ssa->vars[i].definition_phi && !ssa->vars[i].no_val) { + zend_ssa_phi *phi = ssa->vars[i].definition_phi; + int k, src; + + if (phi->pi >= 0) { + src = phi->sources[0]; + if (ra[i].ref) { + if (!ra[src].ref) { + ra[i].flags |= ZREG_LOAD; + } else { + ra[i].flags |= ZREG_PI; + } + } else if (ra[src].ref) { + ra[src].flags |= ZREG_STORE; + } + } else { + int need_move = 0; + + for (k = 0; k < ssa->cfg.blocks[phi->block].predecessors_count; k++) { + src = phi->sources[k]; + if (src >= 0) { + if (ssa->vars[src].definition_phi + && ssa->vars[src].definition_phi->pi >= 0 + && phi->block == ssa->vars[src].definition_phi->block) { + /* Skip zero-length interval for Pi variable */ + src = ssa->vars[src].definition_phi->sources[0]; + } + if (ra[i].ref) { + if (!ra[src].ref) { + need_move = 1; + } + } else if (ra[src].ref) { + need_move = 1; + } + } + } + if (need_move) { + if (ra[i].ref) { + ra[i].flags |= ZREG_LOAD; + } + for (k = 0; k < ssa->cfg.blocks[phi->block].predecessors_count; k++) { + src = phi->sources[k]; + if (src >= 0) { + if (ssa->vars[src].definition_phi + && ssa->vars[src].definition_phi->pi >= 0 + && phi->block == ssa->vars[src].definition_phi->block) { + /* Skip zero-length interval for Pi variable */ + src = ssa->vars[src].definition_phi->sources[0]; + } + if (ra[src].ref) { + ra[src].flags |= ZREG_STORE; + } + } + } + } else { + ra[i].flags |= ZREG_PHI; + } + } + } + } + + /* Remove useless register allocation */ + for (i = 0; i < ssa->vars_count; i++) { + if (ra[i].ref && + ((ra[i].flags & ZREG_LOAD) || + ((ra[i].flags & ZREG_STORE) && ssa->vars[i].definition >= 0)) && + ssa->vars[i].use_chain < 0) { + bool may_remove = 1; + zend_ssa_phi *phi = ssa->vars[i].phi_use_chain; + + while (phi) { + if (ra[phi->ssa_var].ref && + !(ra[phi->ssa_var].flags & ZREG_LOAD)) { + may_remove = 0; + break; + } + phi = zend_ssa_next_use_phi(ssa, i, phi); + } + if (may_remove) { + ra[i].ref = IR_UNUSED; + } + } + } + + /* Remove intervals used once */ + for (i = 0; i < ssa->vars_count; i++) { + if (ra[i].ref && + (ra[i].flags & ZREG_LOAD) && + (ra[i].flags & ZREG_STORE) && + (ssa->vars[i].use_chain < 0 || + zend_ssa_next_use(ssa->ops, i, ssa->vars[i].use_chain) < 0)) { + bool may_remove = 1; + zend_ssa_phi *phi = ssa->vars[i].phi_use_chain; + + while (phi) { + if (ra[phi->ssa_var].ref && + !(ra[phi->ssa_var].flags & ZREG_LOAD)) { + may_remove = 0; + break; + } + phi = zend_ssa_next_use_phi(ssa, i, phi); + } + if (may_remove) { + ra[i].ref = IR_UNUSED; + } + } + } + } + + if (JIT_G(debug) & ZEND_JIT_DEBUG_REG_ALLOC) { + fprintf(stderr, "Live Ranges \"%s\"\n", op_array->function_name ? ZSTR_VAL(op_array->function_name) : "[main]"); + for (i = 0; i < ssa->vars_count; i++) { + if (ra[i].ref) { + fprintf(stderr, "#%d.", i); + uint32_t var_num = ssa->vars[i].var; + zend_dump_var(op_array, (var_num < op_array->last_var ? IS_CV : 0), var_num); + if (ra[i].flags & ZREG_LOAD) { + fprintf(stderr, " load"); + } + if (ra[i].flags & ZREG_STORE) { + fprintf(stderr, " store"); + } + fprintf(stderr, "\n"); + } + } + fprintf(stderr, "\n"); + } + + ctx->ra = ra; +} + +static int zend_jit_compute_post_order(zend_cfg *cfg, int start, int *post_order) +{ + int count = 0; + int b, n, *p; + zend_basic_block *bb; + zend_worklist worklist; + ALLOCA_FLAG(use_heap) + + ZEND_WORKLIST_ALLOCA(&worklist, cfg->blocks_count, use_heap); + zend_worklist_push(&worklist, start); + + while (zend_worklist_len(&worklist) != 0) { +next: + b = zend_worklist_peek(&worklist); + bb = &cfg->blocks[b]; + n = bb->successors_count; + if (n > 0) { + p = bb->successors; + do { + if (cfg->blocks[*p].flags & (ZEND_BB_CATCH|ZEND_BB_FINALLY|ZEND_BB_FINALLY_END)) { + /* skip */ + } else if (zend_worklist_push(&worklist, *p)) { + goto next; + } + p++; + n--; + } while (n > 0); + } + zend_worklist_pop(&worklist); + post_order[count++] = b; + } + ZEND_WORKLIST_FREE_ALLOCA(&worklist, use_heap); + return count; +} +#endif /* ZEND_JIT_IR */ + static bool zend_jit_next_is_send_result(const zend_op *opline) { if (opline->result_type == IS_TMP_VAR @@ -2704,12 +2936,18 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op { int b, i, end; zend_op *opline; - dasm_State* dasm_state = NULL; +#ifndef ZEND_JIT_IR + dasm_State* ctx = NULL; + zend_lifetime_interval **ra = NULL; + bool is_terminated = 1; /* previous basic block is terminated by jump */ +#else + zend_jit_ctx ctx; + zend_jit_ctx *jit = &ctx; + zend_jit_reg_var *ra = NULL; +#endif void *handler; int call_level = 0; void *checkpoint = NULL; - zend_lifetime_interval **ra = NULL; - bool is_terminated = 1; /* previous basic block is terminated by jump */ bool recv_emitted = 0; /* emitted at least one RECV opcode */ uint8_t smart_branch_opcode; uint32_t target_label, target_label2; @@ -2733,6 +2971,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } +#ifndef ZEND_JIT_IR if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { checkpoint = zend_arena_checkpoint(CG(arena)); ra = zend_jit_allocate_registers(op_array, ssa); @@ -2751,13 +2990,13 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } - dasm_init(&dasm_state, DASM_MAXSECTION); - dasm_setupglobal(&dasm_state, dasm_labels, zend_lb_MAX); - dasm_setup(&dasm_state, dasm_actions); + dasm_init(&ctx, DASM_MAXSECTION); + dasm_setupglobal(&ctx, dasm_labels, zend_lb_MAX); + dasm_setup(&ctx, dasm_actions); - dasm_growpc(&dasm_state, ssa->cfg.blocks_count * 2 + 1); + dasm_growpc(&ctx, ssa->cfg.blocks_count * 2 + 1); - zend_jit_align_func(&dasm_state); + zend_jit_align_func(&ctx); for (b = 0; b < ssa->cfg.blocks_count; b++) { if ((ssa->cfg.blocks[b].flags & ZEND_BB_REACHABLE) == 0) { continue; @@ -2775,11 +3014,11 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } if (ssa->cfg.blocks[b].flags & ZEND_BB_FOLLOW) { if (!is_terminated) { - zend_jit_jmp(&dasm_state, b); + zend_jit_jmp(&ctx, b); } } - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); } else //#endif if (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_RECV_ENTRY)) { @@ -2789,13 +3028,13 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline == op_array->opcodes || (opline-1)->opcode != ZEND_RECV_INIT) { if (recv_emitted) { - zend_jit_jmp(&dasm_state, b); + zend_jit_jmp(&ctx, b); } - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); for (i = 1; (opline+i)->opcode == ZEND_RECV_INIT; i++) { - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b + i); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b + i); } - zend_jit_prologue(&dasm_state); + zend_jit_prologue(&ctx); } recv_emitted = 1; } else if (opline->opcode == ZEND_RECV) { @@ -2803,9 +3042,9 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* skip */ continue; } else if (recv_emitted) { - zend_jit_jmp(&dasm_state, b); - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_jmp(&ctx, b); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); } else { zend_arg_info *arg_info; @@ -2821,47 +3060,156 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* skip */ continue; } - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); recv_emitted = 1; } } else { if (recv_emitted) { - zend_jit_jmp(&dasm_state, b); + zend_jit_jmp(&ctx, b); } else if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE && ssa->cfg.blocks[b].len == 1 && (ssa->cfg.blocks[b].flags & ZEND_BB_EXIT)) { /* don't generate code for BB with single opcode */ - dasm_free(&dasm_state); + dasm_free(&ctx); if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { zend_arena_release(&CG(arena), checkpoint); } return SUCCESS; } - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); recv_emitted = 1; } } else if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE && ssa->cfg.blocks[b].len == 1 && (ssa->cfg.blocks[b].flags & ZEND_BB_EXIT)) { /* don't generate code for BB with single opcode */ - dasm_free(&dasm_state); + dasm_free(&ctx); if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { zend_arena_release(&CG(arena), checkpoint); } return SUCCESS; } else { - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); } } is_terminated = 0; - zend_jit_label(&dasm_state, b); + zend_jit_label(&ctx, b); +#else /* ZEND_JIT_IR */ + + if (ssa->cfg.flags & ZEND_FUNC_IRREDUCIBLE) { + /* We can't order blocks properly */ + return FAILURE; + } + + if (rt_opline) { + /* Set BB_ENTRY flag to limit register usage across the OSR ENTRY point */ + ssa->cfg.blocks[ssa->cfg.map[rt_opline - op_array->opcodes]].flags |= ZEND_BB_ENTRY; + } + + zend_jit_start(&ctx, op_array, ssa); + if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { + checkpoint = zend_arena_checkpoint(CG(arena)); + zend_jit_allocate_registers(&ctx, op_array, ssa); + ra = ctx.ra; + } + + /* Process blocks in Reverse Post Order */ + int *sorted_blocks = alloca(sizeof(int) * ssa->cfg.blocks_count); + int n = zend_jit_compute_post_order(&ssa->cfg, 0, sorted_blocks); + + while (n > 0) { + b = sorted_blocks[--n]; + if ((ssa->cfg.blocks[b].flags & ZEND_BB_REACHABLE) == 0) { + continue; + } + + if (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_RECV_ENTRY)) { + opline = op_array->opcodes + ssa->cfg.blocks[b].start; + if (ssa->cfg.flags & ZEND_CFG_RECV_ENTRY) { + if (opline->opcode == ZEND_RECV_INIT) { + if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { + if (opline != op_array->opcodes && (opline-1)->opcode != ZEND_RECV_INIT) { + zend_jit_recv_entry(&ctx, b); + } + } else { + if (opline != op_array->opcodes && recv_emitted) { + zend_jit_recv_entry(&ctx, b); + } + } + recv_emitted = 1; + } else if (opline->opcode == ZEND_RECV) { + if (!(op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS)) { + /* skip */ + zend_jit_bb_start(&ctx, b); + zend_jit_bb_end(&ctx, b); + continue; + } else if (recv_emitted) { + zend_jit_recv_entry(&ctx, b); + } else { + recv_emitted = 1; + } + } else { + if (recv_emitted) { + zend_jit_recv_entry(&ctx, b); + } else if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE && + ssa->cfg.blocks[b].len == 1 && + (ssa->cfg.blocks[b].flags & ZEND_BB_EXIT)) { + /* don't generate code for BB with single opcode */ + zend_jit_free_ctx(&ctx); + + if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { + zend_arena_release(&CG(arena), checkpoint); + } + return SUCCESS; + } + } + } else if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE && + ssa->cfg.blocks[b].len == 1 && + (ssa->cfg.blocks[b].flags & ZEND_BB_EXIT)) { + /* don't generate code for BB with single opcode */ + zend_jit_free_ctx(&ctx); + + if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { + zend_arena_release(&CG(arena), checkpoint); + } + return SUCCESS; + } + } + + zend_jit_bb_start(&ctx, b); + + if ((JIT_G(opt_flags) & ZEND_JIT_REG_ALLOC_GLOBAL) && ctx.ra) { + zend_ssa_phi *phi = ssa->blocks[b].phis; + + /* First try to insert IR Phi */ + while (phi) { + zend_jit_reg_var *ival = &ctx.ra[phi->ssa_var]; + + if (ival->ref) { + if (ival->flags & ZREG_PI) { + zend_jit_gen_pi(jit, phi); + } else if (ival->flags & ZREG_PHI) { + zend_jit_gen_phi(jit, phi); + } + } + phi = phi->next; + } + } + + if (rt_opline + && (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_RECV_ENTRY)) == 0 + && rt_opline == op_array->opcodes + ssa->cfg.blocks[b].start) { + zend_jit_osr_entry(&ctx, b); /* OSR (On-Stack-Replacement) Entry-Point */ + } +#endif /* ZEND_JIT_IR */ + if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { if ((ssa->cfg.blocks[b].flags & ZEND_BB_FOLLOW) && ssa->cfg.blocks[b].start != 0 @@ -2869,47 +3217,87 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op || op_array->opcodes[ssa->cfg.blocks[b].start - 1].opcode == ZEND_SWITCH_LONG || op_array->opcodes[ssa->cfg.blocks[b].start - 1].opcode == ZEND_SWITCH_STRING || op_array->opcodes[ssa->cfg.blocks[b].start - 1].opcode == ZEND_MATCH)) { +#ifndef ZEND_JIT_IR zend_jit_reset_last_valid_opline(); - if (!zend_jit_set_ip(&dasm_state, op_array->opcodes + ssa->cfg.blocks[b].start)) { + if (!zend_jit_set_ip(&ctx, op_array->opcodes + ssa->cfg.blocks[b].start)) { goto jit_failure; } +#else + zend_jit_reset_last_valid_opline(&ctx); +#endif } else { +#ifndef ZEND_JIT_IR zend_jit_set_last_valid_opline(op_array->opcodes + ssa->cfg.blocks[b].start); +#else + zend_jit_set_last_valid_opline(&ctx, op_array->opcodes + ssa->cfg.blocks[b].start); +#endif } } else if (ssa->cfg.blocks[b].flags & ZEND_BB_TARGET) { +#ifndef ZEND_JIT_IR zend_jit_reset_last_valid_opline(); } else if (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_RECV_ENTRY|ZEND_BB_ENTRY)) { zend_jit_set_last_valid_opline(op_array->opcodes + ssa->cfg.blocks[b].start); +#else + zend_jit_reset_last_valid_opline(&ctx); + } else if (ssa->cfg.blocks[b].flags & ZEND_BB_RECV_ENTRY) { + zend_jit_reset_last_valid_opline(&ctx); + } else if (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_ENTRY)) { + zend_jit_set_last_valid_opline(&ctx, op_array->opcodes + ssa->cfg.blocks[b].start); +#endif } if (ssa->cfg.blocks[b].flags & ZEND_BB_LOOP_HEADER) { - if (!zend_jit_check_timeout(&dasm_state, op_array->opcodes + ssa->cfg.blocks[b].start, NULL)) { + if (!zend_jit_check_timeout(&ctx, op_array->opcodes + ssa->cfg.blocks[b].start, NULL)) { goto jit_failure; } } if (!ssa->cfg.blocks[b].len) { +#ifdef ZEND_JIT_IR + zend_jit_bb_end(&ctx, b); +#endif continue; } if ((JIT_G(opt_flags) & ZEND_JIT_REG_ALLOC_GLOBAL) && ra) { zend_ssa_phi *phi = ssa->blocks[b].phis; while (phi) { +#ifndef ZEND_JIT_IR zend_lifetime_interval *ival = ra[phi->ssa_var]; if (ival) { if (ival->flags & ZREG_LOAD) { ZEND_ASSERT(ival->reg != ZREG_NONE); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg)) { + if (!zend_jit_load_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg)) { goto jit_failure; } } else if (ival->flags & ZREG_STORE) { ZEND_ASSERT(ival->reg != ZREG_NONE); - if (!zend_jit_store_var(&dasm_state, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg, 1)) { + if (!zend_jit_store_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg, 1)) { goto jit_failure; } } } +#else + zend_jit_reg_var *ival = &ra[phi->ssa_var]; + + if (ival->ref) { + if (ival->flags & ZREG_LOAD) { + ZEND_ASSERT(ival->ref == IR_NULL); + + if (!zend_jit_load_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, phi->ssa_var)) { + goto jit_failure; + } + } else if (ival->flags & ZREG_STORE) { + ZEND_ASSERT(ival->ref != IR_NULL); + + if (!zend_jit_store_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, phi->ssa_var, 1)) { + goto jit_failure; + } + } + } +#endif + phi = phi->next; } } @@ -2965,7 +3353,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op res_addr = 0; } op1_def_info = OP1_DEF_INFO(); - if (!zend_jit_inc_dec(&dasm_state, opline, + if (!zend_jit_inc_dec(&ctx, opline, op1_info, OP1_REG_ADDR(), op1_def_info, OP1_DEF_REG_ADDR(), res_use_info, res_info, @@ -2986,9 +3374,11 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } op1_info = OP1_INFO(); op2_info = OP2_INFO(); +#ifndef ZEND_JIT_IR if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { break; } +#endif if (!(op1_info & MAY_BE_LONG) || !(op2_info & MAY_BE_LONG)) { break; @@ -3000,7 +3390,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op i++; res_use_info = -1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } else { @@ -3019,7 +3409,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_long_math(&dasm_state, opline, + if (!zend_jit_long_math(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), res_use_info, RES_INFO(), res_addr, @@ -3054,7 +3444,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op i++; res_use_info = -1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } else { @@ -3077,11 +3467,11 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline->opcode == ZEND_ADD && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY && (op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) { - if (!zend_jit_add_arrays(&dasm_state, opline, op1_info, OP1_REG_ADDR(), op2_info, OP2_REG_ADDR(), res_addr)) { + if (!zend_jit_add_arrays(&ctx, opline, op1_info, OP1_REG_ADDR(), op2_info, OP2_REG_ADDR(), res_addr)) { goto jit_failure; } } else { - if (!zend_jit_math(&dasm_state, opline, + if (!zend_jit_math(&ctx, opline, op1_info, OP1_REG_ADDR(), op2_info, OP2_REG_ADDR(), res_use_info, res_info, res_addr, @@ -3110,11 +3500,11 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op && zend_jit_next_is_send_result(opline)) { i++; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } - if (!zend_jit_concat(&dasm_state, opline, + if (!zend_jit_concat(&ctx, opline, op1_info, op2_info, res_addr, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -3134,7 +3524,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op break; } op1_def_info = OP1_DEF_INFO(); - if (!zend_jit_assign_op(&dasm_state, opline, + if (!zend_jit_assign_op(&ctx, opline, op1_info, op1_def_info, OP1_RANGE(), op2_info, OP2_RANGE(), (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (op1_def_info & MAY_BE_DOUBLE) && zend_may_overflow(opline, ssa_op, op_array, ssa), @@ -3153,7 +3543,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op opline->extended_value, MAY_BE_ANY, OP1_DATA_INFO())) { break; } - if (!zend_jit_assign_dim_op(&dasm_state, opline, + if (!zend_jit_assign_dim_op(&ctx, opline, OP1_INFO(), OP1_DEF_INFO(), OP1_REG_ADDR(), OP2_INFO(), OP1_DATA_INFO(), OP1_DATA_RANGE(), IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa))) { @@ -3167,7 +3557,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (PROFITABILITY_CHECKS && (!ssa->ops || !ssa->var_info)) { break; } - if (!zend_jit_assign_dim(&dasm_state, opline, + if (!zend_jit_assign_dim(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), OP2_INFO(), OP1_DATA_INFO(), IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -3211,7 +3601,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_incdec_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_incdec_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, 0, ce, ce_is_instanceof, on_this, 0, NULL, IS_UNKNOWN)) { goto jit_failure; @@ -3259,7 +3649,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_assign_obj_op(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_assign_obj_op(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, OP1_DATA_INFO(), OP1_DATA_RANGE(), 0, ce, ce_is_instanceof, on_this, 0, NULL, IS_UNKNOWN)) { goto jit_failure; @@ -3300,7 +3690,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_assign_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_assign_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, OP1_DATA_INFO(), 0, ce, ce_is_instanceof, on_this, 0, NULL, IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa))) { @@ -3338,16 +3728,19 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op && (!(op1_info & MAY_HAVE_DTOR) || !(op1_info & MAY_BE_RC1))) { i++; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } } - if (!zend_jit_assign(&dasm_state, opline, + if (!zend_jit_assign(&ctx, opline, op1_info, OP1_REG_ADDR(), OP1_DEF_INFO(), OP1_DEF_REG_ADDR(), OP2_INFO(), op2_addr, op2_def_addr, res_info, res_addr, +#ifdef ZEND_JIT_IR + 0, +#endif zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } @@ -3361,7 +3754,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } else { op1_def_addr = op1_addr; } - if (!zend_jit_qm_assign(&dasm_state, opline, + if (!zend_jit_qm_assign(&ctx, opline, OP1_INFO(), op1_addr, op1_def_addr, -1, RES_INFO(), RES_REG_ADDR())) { goto jit_failure; @@ -3370,7 +3763,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op case ZEND_INIT_FCALL: case ZEND_INIT_FCALL_BY_NAME: case ZEND_INIT_NS_FCALL_BY_NAME: - if (!zend_jit_init_fcall(&dasm_state, opline, b, op_array, ssa, ssa_op, call_level, NULL, 0)) { + if (!zend_jit_init_fcall(&ctx, opline, b, op_array, ssa, ssa_op, call_level, NULL, 0)) { goto jit_failure; } goto done; @@ -3384,7 +3777,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op && opline->op2.num > MAX_ARG_FLAG_NUM) { break; } - if (!zend_jit_send_val(&dasm_state, opline, + if (!zend_jit_send_val(&ctx, opline, OP1_INFO(), OP1_REG_ADDR())) { goto jit_failure; } @@ -3394,7 +3787,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* Named parameters not supported in JIT (yet) */ break; } - if (!zend_jit_send_ref(&dasm_state, opline, op_array, + if (!zend_jit_send_ref(&ctx, opline, op_array, OP1_INFO(), 0)) { goto jit_failure; } @@ -3421,7 +3814,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } else { op1_def_addr = op1_addr; } - if (!zend_jit_send_var(&dasm_state, opline, op_array, + if (!zend_jit_send_var(&ctx, opline, op_array, OP1_INFO(), op1_addr, op1_def_addr)) { goto jit_failure; } @@ -3434,22 +3827,24 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline->op2.num > MAX_ARG_FLAG_NUM) { break; } - if (!zend_jit_check_func_arg(&dasm_state, opline)) { + if (!zend_jit_check_func_arg(&ctx, opline)) { goto jit_failure; } goto done; case ZEND_CHECK_UNDEF_ARGS: - if (!zend_jit_check_undef_args(&dasm_state, opline)) { + if (!zend_jit_check_undef_args(&ctx, opline)) { goto jit_failure; } goto done; case ZEND_DO_UCALL: +#ifndef ZEND_JIT_IR is_terminated = 1; +#endif ZEND_FALLTHROUGH; case ZEND_DO_ICALL: case ZEND_DO_FCALL_BY_NAME: case ZEND_DO_FCALL: - if (!zend_jit_do_fcall(&dasm_state, opline, op_array, ssa, call_level, b + 1, NULL)) { + if (!zend_jit_do_fcall(&ctx, opline, op_array, ssa, call_level, b + 1, NULL)) { goto jit_failure; } goto done; @@ -3480,7 +3875,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_cmp(&dasm_state, opline, + if (!zend_jit_cmp(&ctx, opline, OP1_INFO(), OP1_RANGE(), OP1_REG_ADDR(), OP2_INFO(), OP2_RANGE(), OP2_REG_ADDR(), res_addr, @@ -3494,9 +3889,14 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op case ZEND_IS_IDENTICAL: case ZEND_IS_NOT_IDENTICAL: case ZEND_CASE_STRICT: + res_addr = RES_REG_ADDR(); if ((opline->result_type & IS_TMP_VAR) && (i + 1) <= end && ((opline+1)->opcode == ZEND_JMPZ +#ifdef ZEND_JIT_IR + || (opline+1)->opcode == ZEND_JMPZ_EX + || (opline+1)->opcode == ZEND_JMPNZ_EX +#endif || (opline+1)->opcode == ZEND_JMPNZ) && (opline+1)->op1_type == IS_TMP_VAR && (opline+1)->op1.var == opline->result.var) { @@ -3504,14 +3904,21 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = (opline+1)->opcode; target_label = ssa->cfg.blocks[b].successors[0]; target_label2 = ssa->cfg.blocks[b].successors[1]; +#ifdef ZEND_JIT_IR + /* For EX variant write into the result of EX opcode. */ + if ((opline+1)->opcode == ZEND_JMPZ_EX + || (opline+1)->opcode == ZEND_JMPNZ_EX) { + res_addr = OP_REG_ADDR(opline + 1, result_type, result, result_def); + } +#endif } else { smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_identical(&dasm_state, opline, + if (!zend_jit_identical(&ctx, opline, OP1_INFO(), OP1_RANGE(), OP1_REG_ADDR(), OP2_INFO(), OP2_RANGE(), OP2_REG_ADDR(), - RES_REG_ADDR(), + res_addr, zend_may_throw(opline, ssa_op, op_array, ssa), smart_branch_opcode, target_label, target_label2, NULL, 0)) { @@ -3533,7 +3940,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_defined(&dasm_state, opline, smart_branch_opcode, target_label, target_label2, NULL)) { + if (!zend_jit_defined(&ctx, opline, smart_branch_opcode, target_label, target_label2, NULL)) { goto jit_failure; } goto done; @@ -3556,7 +3963,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_type_check(&dasm_state, opline, OP1_INFO(), smart_branch_opcode, target_label, target_label2, NULL)) { + if (!zend_jit_type_check(&ctx, opline, OP1_INFO(), smart_branch_opcode, target_label, target_label2, NULL)) { goto jit_failure; } goto done; @@ -3568,60 +3975,62 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op || !op_array->function_name // TODO: support for IS_UNDEF ??? || (op1_info & MAY_BE_UNDEF)) { - if (!zend_jit_tail_handler(&dasm_state, opline)) { + if (!zend_jit_tail_handler(&ctx, opline)) { goto jit_failure; } } else { - int j; - bool left_frame = 0; - - if (!zend_jit_return(&dasm_state, opline, op_array, + if (!zend_jit_return(&ctx, opline, op_array, op1_info, OP1_REG_ADDR())) { goto jit_failure; } +#ifndef ZEND_JIT_IR + bool left_frame = 0; if (jit_return_label >= 0) { - if (!zend_jit_jmp(&dasm_state, jit_return_label)) { + if (!zend_jit_jmp(&ctx, jit_return_label)) { goto jit_failure; } goto done; } jit_return_label = ssa->cfg.blocks_count * 2; - if (!zend_jit_label(&dasm_state, jit_return_label)) { + if (!zend_jit_label(&ctx, jit_return_label)) { goto jit_failure; } if (op_array->last_var > 100) { /* To many CVs to unroll */ - if (!zend_jit_free_cvs(&dasm_state)) { + if (!zend_jit_free_cvs(&ctx)) { goto jit_failure; } left_frame = 1; } if (!left_frame) { + int j; + for (j = 0 ; j < op_array->last_var; j++) { uint32_t info = zend_ssa_cv_info(op_array, ssa, j); if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { if (!left_frame) { left_frame = 1; - if (!zend_jit_leave_frame(&dasm_state)) { + if (!zend_jit_leave_frame(&ctx)) { goto jit_failure; } } - if (!zend_jit_free_cv(&dasm_state, info, j)) { + if (!zend_jit_free_cv(&ctx, info, j)) { goto jit_failure; } } } } - if (!zend_jit_leave_func(&dasm_state, op_array, opline, op1_info, left_frame, + if (!zend_jit_leave_func(&ctx, op_array, opline, op1_info, left_frame, NULL, NULL, (ssa->cfg.flags & ZEND_FUNC_INDIRECT_VAR_ACCESS) != 0, 1)) { goto jit_failure; } +#endif } goto done; case ZEND_BOOL: case ZEND_BOOL_NOT: - if (!zend_jit_bool_jmpznz(&dasm_state, opline, + if (!zend_jit_bool_jmpznz(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), RES_REG_ADDR(), -1, -1, zend_may_throw(opline, ssa_op, op_array, ssa), @@ -3634,7 +4043,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline > op_array->opcodes + ssa->cfg.blocks[b].start && ((opline-1)->result_type & (IS_SMART_BRANCH_JMPZ|IS_SMART_BRANCH_JMPNZ)) != 0) { /* smart branch */ - if (!zend_jit_cond_jmp(&dasm_state, opline + 1, ssa->cfg.blocks[b].successors[0])) { + if (!zend_jit_cond_jmp(&ctx, opline + 1, ssa->cfg.blocks[b].successors[0])) { goto jit_failure; } goto done; @@ -3647,7 +4056,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } else { res_addr = RES_REG_ADDR(); } - if (!zend_jit_bool_jmpznz(&dasm_state, opline, + if (!zend_jit_bool_jmpznz(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), res_addr, ssa->cfg.blocks[b].successors[0], ssa->cfg.blocks[b].successors[1], zend_may_throw(opline, ssa_op, op_array, ssa), @@ -3674,7 +4083,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_isset_isempty_cv(&dasm_state, opline, + if (!zend_jit_isset_isempty_cv(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), smart_branch_opcode, target_label, target_label2, NULL)) { @@ -3703,7 +4112,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_in_array(&dasm_state, opline, + if (!zend_jit_in_array(&ctx, opline, op1_info, OP1_REG_ADDR(), smart_branch_opcode, target_label, target_label2, NULL)) { @@ -3716,7 +4125,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (PROFITABILITY_CHECKS && (!ssa->ops || !ssa->var_info)) { break; } - if (!zend_jit_fetch_dim_read(&dasm_state, opline, ssa, ssa_op, + if (!zend_jit_fetch_dim_read(&ctx, opline, ssa, ssa_op, OP1_INFO(), OP1_REG_ADDR(), 0, OP2_INFO(), RES_INFO(), RES_REG_ADDR(), IS_UNKNOWN)) { goto jit_failure; @@ -3732,7 +4141,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline->op1_type != IS_CV) { break; } - if (!zend_jit_fetch_dim(&dasm_state, opline, + if (!zend_jit_fetch_dim(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), OP2_INFO(), RES_REG_ADDR(), IS_UNKNOWN)) { goto jit_failure; } @@ -3759,7 +4168,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_isset_isempty_dim(&dasm_state, opline, + if (!zend_jit_isset_isempty_dim(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), 0, OP2_INFO(), IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa), @@ -3802,7 +4211,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_fetch_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_fetch_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, 0, ce, ce_is_instanceof, on_this, 0, 0, NULL, IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa))) { @@ -3815,17 +4224,17 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } else { op1_info = OP1_INFO(); } - if (!zend_jit_bind_global(&dasm_state, opline, op1_info)) { + if (!zend_jit_bind_global(&ctx, opline, op1_info)) { goto jit_failure; } goto done; case ZEND_RECV: - if (!zend_jit_recv(&dasm_state, opline, op_array)) { + if (!zend_jit_recv(&ctx, opline, op_array)) { goto jit_failure; } goto done; case ZEND_RECV_INIT: - if (!zend_jit_recv_init(&dasm_state, opline, op_array, + if (!zend_jit_recv_init(&ctx, opline, op_array, (opline + 1)->opcode != ZEND_RECV_INIT, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -3833,7 +4242,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op goto done; case ZEND_FREE: case ZEND_FE_FREE: - if (!zend_jit_free(&dasm_state, opline, OP1_INFO(), + if (!zend_jit_free(&ctx, opline, OP1_INFO(), zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } @@ -3843,7 +4252,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_echo(&dasm_state, opline, op1_info)) { + if (!zend_jit_echo(&ctx, opline, op1_info)) { goto jit_failure; } goto done; @@ -3852,7 +4261,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_strlen(&dasm_state, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR())) { + if (!zend_jit_strlen(&ctx, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR())) { goto jit_failure; } goto done; @@ -3861,19 +4270,19 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_ARRAY) { break; } - if (!zend_jit_count(&dasm_state, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR(), zend_may_throw(opline, ssa_op, op_array, ssa))) { + if (!zend_jit_count(&ctx, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR(), zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } goto done; case ZEND_FETCH_THIS: - if (!zend_jit_fetch_this(&dasm_state, opline, op_array, 0)) { + if (!zend_jit_fetch_this(&ctx, opline, op_array, 0)) { goto jit_failure; } goto done; case ZEND_SWITCH_LONG: case ZEND_SWITCH_STRING: case ZEND_MATCH: - if (!zend_jit_switch(&dasm_state, opline, op_array, ssa, NULL, NULL)) { + if (!zend_jit_switch(&ctx, opline, op_array, ssa, NULL, NULL)) { goto jit_failure; } goto done; @@ -3894,7 +4303,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* TODO May need reference unwrapping. */ break; } - if (!zend_jit_verify_return_type(&dasm_state, opline, op_array, OP1_INFO())) { + if (!zend_jit_verify_return_type(&ctx, opline, op_array, OP1_INFO())) { goto jit_failure; } goto done; @@ -3903,7 +4312,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) != MAY_BE_ARRAY) { break; } - if (!zend_jit_fe_reset(&dasm_state, opline, op1_info)) { + if (!zend_jit_fe_reset(&ctx, opline, op1_info)) { goto jit_failure; } goto done; @@ -3912,13 +4321,13 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & MAY_BE_ANY) != MAY_BE_ARRAY) { break; } - if (!zend_jit_fe_fetch(&dasm_state, opline, op1_info, OP2_INFO(), + if (!zend_jit_fe_fetch(&ctx, opline, op1_info, OP2_INFO(), ssa->cfg.blocks[b].successors[0], opline->opcode, NULL)) { goto jit_failure; } goto done; case ZEND_FETCH_CONSTANT: - if (!zend_jit_fetch_constant(&dasm_state, opline, op_array, ssa, ssa_op, RES_REG_ADDR())) { + if (!zend_jit_fetch_constant(&ctx, opline, op_array, ssa, ssa_op, RES_REG_ADDR())) { goto jit_failure; } goto done; @@ -3953,9 +4362,13 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_init_method_call(&dasm_state, opline, b, op_array, ssa, ssa_op, call_level, + if (!zend_jit_init_method_call(&ctx, opline, b, op_array, ssa, ssa_op, call_level, op1_info, op1_addr, ce, ce_is_instanceof, on_this, 0, NULL, - NULL, 0, 0)) { + NULL, 0, +#ifdef ZEND_JIT_IR + -1, -1, +#endif + 0)) { goto jit_failure; } goto done; @@ -3966,7 +4379,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op2_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_rope(&dasm_state, opline, op2_info)) { + if (!zend_jit_rope(&ctx, opline, op2_info)) { goto jit_failure; } goto done; @@ -3981,12 +4394,16 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline == op_array->opcodes || opline->opcode != op_array->opcodes[i-1].opcode) { /* repeatable opcodes */ - if (!zend_jit_handler(&dasm_state, opline, + if (!zend_jit_handler(&ctx, opline, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } } +#ifndef ZEND_JIT_IR zend_jit_set_last_valid_opline(opline+1); +#else + zend_jit_set_last_valid_opline(&ctx, opline+1); +#endif break; case ZEND_NOP: case ZEND_OP_DATA: @@ -3998,14 +4415,16 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { const zend_op *target = OP_JMP_ADDR(opline, opline->op1); - if (!zend_jit_set_ip(&dasm_state, target)) { + if (!zend_jit_set_ip(&ctx, target)) { goto jit_failure; } } - if (!zend_jit_jmp(&dasm_state, ssa->cfg.blocks[b].successors[0])) { +#ifndef ZEND_JIT_IR + if (!zend_jit_jmp(&ctx, ssa->cfg.blocks[b].successors[0])) { goto jit_failure; } is_terminated = 1; +#endif break; case ZEND_CATCH: case ZEND_FAST_CALL: @@ -4019,27 +4438,39 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* switch through trampoline */ case ZEND_YIELD: case ZEND_YIELD_FROM: - if (!zend_jit_tail_handler(&dasm_state, opline)) { +#ifdef ZEND_JIT_IR + case ZEND_THROW: + case ZEND_VERIFY_NEVER_TYPE: +#endif + if (!zend_jit_tail_handler(&ctx, opline)) { goto jit_failure; } +#ifndef ZEND_JIT_IR is_terminated = 1; +#else + /* THROW and EXIT may be used in the middle of BB */ + /* don't generate code for the rest of BB */ + i = end; +#endif break; /* stackless execution */ case ZEND_INCLUDE_OR_EVAL: case ZEND_DO_FCALL: case ZEND_DO_UCALL: case ZEND_DO_FCALL_BY_NAME: - if (!zend_jit_call(&dasm_state, opline, b + 1)) { + if (!zend_jit_call(&ctx, opline, b + 1)) { goto jit_failure; } +#ifndef ZEND_JIT_IR is_terminated = 1; +#endif break; case ZEND_JMPZ: case ZEND_JMPNZ: if (opline > op_array->opcodes + ssa->cfg.blocks[b].start && ((opline-1)->result_type & (IS_SMART_BRANCH_JMPZ|IS_SMART_BRANCH_JMPNZ)) != 0) { /* smart branch */ - if (!zend_jit_cond_jmp(&dasm_state, opline + 1, ssa->cfg.blocks[b].successors[0])) { + if (!zend_jit_cond_jmp(&ctx, opline + 1, ssa->cfg.blocks[b].successors[0])) { goto jit_failure; } goto done; @@ -4056,14 +4487,14 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: case ZEND_BIND_INIT_STATIC_OR_JMP: - if (!zend_jit_handler(&dasm_state, opline, + if (!zend_jit_handler(&ctx, opline, zend_may_throw(opline, ssa_op, op_array, ssa)) || - !zend_jit_cond_jmp(&dasm_state, opline + 1, ssa->cfg.blocks[b].successors[0])) { + !zend_jit_cond_jmp(&ctx, opline + 1, ssa->cfg.blocks[b].successors[0])) { goto jit_failure; } break; case ZEND_NEW: - if (!zend_jit_handler(&dasm_state, opline, 1)) { + if (!zend_jit_handler(&ctx, opline, 1)) { return 0; } if (opline->extended_value == 0 && (opline+1)->opcode == ZEND_DO_FCALL) { @@ -4091,13 +4522,18 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (!ce || !(ce->ce_flags & ZEND_ACC_LINKED) || ce->constructor) { const zend_op *next_opline = opline + 1; - zend_jit_cond_jmp(&dasm_state, next_opline, ssa->cfg.blocks[b].successors[0]); +#ifndef ZEND_JIT_IR + zend_jit_cond_jmp(&ctx, next_opline, ssa->cfg.blocks[b].successors[0]); if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { - zend_jit_call(&dasm_state, next_opline, b + 1); + zend_jit_call(&ctx, next_opline, b + 1); is_terminated = 1; } else { - zend_jit_do_fcall(&dasm_state, next_opline, op_array, ssa, call_level, b + 1, NULL); + zend_jit_do_fcall(&ctx, next_opline, op_array, ssa, call_level, b + 1, NULL); } +#else + ZEND_ASSERT(b + 1 == ssa->cfg.blocks[b].successors[0]); + zend_jit_constructor(&ctx, next_opline, op_array, ssa, call_level, b + 1); +#endif } /* We skip over the DO_FCALL, so decrement call_level ourselves. */ @@ -4105,20 +4541,26 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } break; default: - if (!zend_jit_handler(&dasm_state, opline, + if (!zend_jit_handler(&ctx, opline, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } if (i == end && (opline->result_type & (IS_SMART_BRANCH_JMPZ|IS_SMART_BRANCH_JMPNZ)) != 0) { /* smart branch split across basic blocks */ - if (!zend_jit_cond_jmp(&dasm_state, opline + 2, ssa->cfg.blocks[b+1].successors[0])) { +#ifndef ZEND_JIT_IR + if (!zend_jit_cond_jmp(&ctx, opline + 2, ssa->cfg.blocks[b+1].successors[0])) { goto jit_failure; } - if (!zend_jit_jmp(&dasm_state, ssa->cfg.blocks[b+1].successors[1])) { + if (!zend_jit_jmp(&ctx, ssa->cfg.blocks[b+1].successors[1])) { goto jit_failure; } is_terminated = 1; +#else + if (!zend_jit_set_cond(&ctx, opline + 2, opline->result.var)) { + goto jit_failure; + } +#endif } } done: @@ -4131,14 +4573,61 @@ done: call_level--; } } +#ifdef ZEND_JIT_IR + zend_jit_bb_end(&ctx, b); +#endif } - handler = dasm_link_and_encode(&dasm_state, op_array, ssa, rt_opline, ra, NULL, 0, +#ifndef ZEND_JIT_IR + handler = dasm_link_and_encode(&ctx, op_array, ssa, rt_opline, ra, NULL, 0, (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) ? SP_ADJ_VM : SP_ADJ_RET, SP_ADJ_JIT); if (!handler) { goto jit_failure; } - dasm_free(&dasm_state); + dasm_free(&ctx); +#else + if (jit->return_inputs) { + zend_jit_common_return(jit); + + bool left_frame = 0; + if (op_array->last_var > 100) { + /* To many CVs to unroll */ + if (!zend_jit_free_cvs(&ctx)) { + goto jit_failure; + } + left_frame = 1; + } + if (!left_frame) { + int j; + + for (j = 0 ; j < op_array->last_var; j++) { + uint32_t info = zend_ssa_cv_info(op_array, ssa, j); + + if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + if (!left_frame) { + left_frame = 1; + if (!zend_jit_leave_frame(&ctx)) { + goto jit_failure; + } + } + if (!zend_jit_free_cv(&ctx, info, j)) { + goto jit_failure; + } + } + } + } + if (!zend_jit_leave_func(&ctx, op_array, NULL, MAY_BE_ANY, left_frame, + NULL, NULL, (ssa->cfg.flags & ZEND_FUNC_INDIRECT_VAR_ACCESS) != 0, 1)) { + goto jit_failure; + } + } + + handler = zend_jit_finish(&ctx); + if (!handler) { + goto jit_failure; + } + zend_jit_free_ctx(&ctx); +#endif if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { zend_arena_release(&CG(arena), checkpoint); @@ -4146,9 +4635,13 @@ done: return SUCCESS; jit_failure: - if (dasm_state) { - dasm_free(&dasm_state); +#ifndef ZEND_JIT_IR + if (ctx) { + dasm_free(&ctx); } +#else + zend_jit_free_ctx(&ctx); +#endif if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { zend_arena_release(&CG(arena), checkpoint); } @@ -4426,8 +4919,8 @@ static int zend_jit_setup_hot_counters(zend_op_array *op_array) zend_cfg cfg; uint32_t i; - ZEND_ASSERT(zend_jit_func_hot_counter_handler != NULL); - ZEND_ASSERT(zend_jit_loop_hot_counter_handler != NULL); + ZEND_ASSERT(!JIT_G(hot_func) || zend_jit_func_hot_counter_handler != NULL); + ZEND_ASSERT(!JIT_G(hot_loop) || zend_jit_loop_hot_counter_handler != NULL); if (zend_jit_build_cfg(op_array, &cfg) != SUCCESS) { return FAILURE; @@ -4528,6 +5021,7 @@ ZEND_EXT_API int zend_jit_op_array(zend_op_array *op_array, zend_script *script) } else { ZEND_UNREACHABLE(); } + return FAILURE; } ZEND_EXT_API int zend_jit_script(zend_script *script) @@ -4564,6 +5058,7 @@ ZEND_EXT_API int zend_jit_script(zend_script *script) if (zend_jit_op_array_analyze1(call_graph.op_arrays[i], script, &info->ssa) != SUCCESS) { goto jit_failure; } + info->ssa.cfg.flags |= info->flags; info->flags = info->ssa.cfg.flags; } } @@ -4706,6 +5201,7 @@ ZEND_EXT_API void zend_jit_protect(void) static void zend_jit_init_handlers(void) { if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { +#ifndef ZEND_JIT_IR zend_jit_runtime_jit_handler = dasm_labels[zend_lbhybrid_runtime_jit]; zend_jit_profile_jit_handler = dasm_labels[zend_lbhybrid_profile_jit]; zend_jit_func_hot_counter_handler = dasm_labels[zend_lbhybrid_func_hot_counter]; @@ -4713,6 +5209,15 @@ static void zend_jit_init_handlers(void) zend_jit_func_trace_counter_handler = dasm_labels[zend_lbhybrid_func_trace_counter]; zend_jit_ret_trace_counter_handler = dasm_labels[zend_lbhybrid_ret_trace_counter]; zend_jit_loop_trace_counter_handler = dasm_labels[zend_lbhybrid_loop_trace_counter]; +#else + zend_jit_runtime_jit_handler = zend_jit_stub_handlers[jit_stub_hybrid_runtime_jit]; + zend_jit_profile_jit_handler = zend_jit_stub_handlers[jit_stub_hybrid_profile_jit]; + zend_jit_func_hot_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_func_hot_counter]; + zend_jit_loop_hot_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_loop_hot_counter]; + zend_jit_func_trace_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_func_trace_counter]; + zend_jit_ret_trace_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_ret_trace_counter]; + zend_jit_loop_trace_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_loop_trace_counter]; +#endif } else { zend_jit_runtime_jit_handler = (const void*)zend_runtime_jit; zend_jit_profile_jit_handler = (const void*)zend_jit_profile_helper; @@ -4724,6 +5229,7 @@ static void zend_jit_init_handlers(void) } } +#ifndef ZEND_JIT_IR static int zend_jit_make_stubs(void) { dasm_State* dasm_state = NULL; @@ -4749,6 +5255,7 @@ static int zend_jit_make_stubs(void) dasm_free(&dasm_state); return 1; } +#endif static void zend_jit_globals_ctor(zend_jit_globals *jit_globals) { @@ -4925,21 +5432,13 @@ ZEND_EXT_API int zend_jit_check_support(void) ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) { +#ifndef ZEND_JIT_IR int ret; +#endif zend_jit_halt_op = zend_get_halt_op(); - - if (zend_jit_setup() != SUCCESS) { - // TODO: error reporting and cleanup ??? - return FAILURE; - } - zend_jit_profile_counter_rid = zend_get_op_array_extension_handle(ACCELERATOR_PRODUCT_NAME); -#ifdef HAVE_GDB - zend_jit_gdb_init(); -#endif - #if ZEND_JIT_SUPPORT_CLDEMOTE cpu_support_cldemote = zend_cpu_supports_cldemote(); #endif @@ -4950,6 +5449,7 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) dasm_buf = buf; dasm_size = size; + dasm_ptr = dasm_end = (void*)(((char*)dasm_buf) + size - sizeof(*dasm_ptr) * 2); #ifdef HAVE_MPROTECT #ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP @@ -4988,17 +5488,48 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) } #endif - dasm_ptr = dasm_end = (void*)(((char*)dasm_buf) + size - sizeof(*dasm_ptr) * 2); if (!reattached) { zend_jit_unprotect(); *dasm_ptr = dasm_buf; -#if _WIN32 +#if defined(_WIN32) && !defined(ZEND_JIT_IR) /* reserve space for global labels */ *dasm_ptr = (void**)*dasm_ptr + zend_lb_MAX; +#elif defined(_WIN32) && defined(ZEND_JIT_IR) + zend_jit_stub_handlers = dasm_buf; + *dasm_ptr = (void**)*dasm_ptr + sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0]); +#elif defined(IR_TARGET_AARCH64) && defined(ZEND_JIT_IR) + zend_jit_stub_handlers = dasm_buf; + *dasm_ptr = (void**)*dasm_ptr + (sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0])) * 2; + memset(zend_jit_stub_handlers, 0, (sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0])) * 2 * sizeof(void*)); #endif + *dasm_ptr = (void*)ZEND_MM_ALIGNED_SIZE_EX(((size_t)(*dasm_ptr)), 16); zend_jit_protect(); + } else { +#if (defined(_WIN32) || defined(IR_TARGET_AARCH64)) && defined(ZEND_JIT_IR) + zend_jit_stub_handlers = dasm_buf; + zend_jit_init_handlers(); +#endif } +#ifndef ZEND_JIT_IR + if (zend_jit_setup() != SUCCESS) { + // TODO: error reporting and cleanup ??? + return FAILURE; + } + +#ifdef HAVE_GDB + zend_jit_gdb_init(); +#endif + +#ifdef HAVE_OPROFILE + if (JIT_G(debug) & ZEND_JIT_DEBUG_OPROFILE) { + if (!zend_jit_oprofile_startup()) { + // TODO: error reporting and cleanup ??? + return FAILURE; + } + } +#endif + #ifdef HAVE_DISASM if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS)) { if (!zend_jit_disasm_init()) { @@ -5017,7 +5548,7 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) if (!reattached) { zend_jit_unprotect(); ret = zend_jit_make_stubs(); -#if _WIN32 +#if defined(_WIN32) && !defined(ZEND_JIT_IR) /* save global labels */ memcpy(dasm_buf, dasm_labels, sizeof(void*) * zend_lb_MAX); #endif @@ -5027,23 +5558,36 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) return FAILURE; } } else { -#if _WIN32 +#if defined(_WIN32) /* restore global labels */ memcpy(dasm_labels, dasm_buf, sizeof(void*) * zend_lb_MAX); zend_jit_init_handlers(); #endif } +#else /* ZEND_JIT_IR */ + zend_jit_unprotect(); + if (zend_jit_setup() != SUCCESS) { + zend_jit_protect(); + // TODO: error reporting and cleanup ??? + return FAILURE; + } + zend_jit_protect(); + zend_jit_init_handlers(); +#endif /* ZEND_JIT_IR */ + if (zend_jit_trace_startup(reattached) != SUCCESS) { return FAILURE; } zend_jit_unprotect(); +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_ARM64 /* reserve space for global labels veneers */ dasm_labels_veneers = *dasm_ptr; *dasm_ptr = (void**)*dasm_ptr + ZEND_MM_ALIGNED_SIZE_EX(zend_lb_MAX, DASM_ALIGNMENT); memset(dasm_labels_veneers, 0, sizeof(void*) * ZEND_MM_ALIGNED_SIZE_EX(zend_lb_MAX, DASM_ALIGNMENT)); +#endif #endif /* save JIT buffer pos */ dasm_ptr[1] = dasm_ptr[0]; @@ -5058,6 +5602,7 @@ ZEND_EXT_API void zend_jit_shutdown(void) fprintf(stderr, "\nJIT memory usage: %td\n", (ptrdiff_t)((char*)*dasm_ptr - (char*)dasm_buf)); } +#ifndef ZEND_JIT_IR #ifdef HAVE_GDB if (JIT_G(debug) & ZEND_JIT_DEBUG_GDB) { zend_jit_gdb_unregister(); @@ -5073,6 +5618,10 @@ ZEND_EXT_API void zend_jit_shutdown(void) zend_jit_perf_jitdump_close(); } #endif +#else + zend_jit_shutdown_ir(); +#endif + #ifdef ZTS ts_free_id(jit_globals_id); #else @@ -5190,8 +5739,10 @@ ZEND_EXT_API void zend_jit_restart(void) if (dasm_buf) { zend_jit_unprotect(); +#ifndef ZEND_JIT_IR //??? #if ZEND_JIT_TARGET_ARM64 memset(dasm_labels_veneers, 0, sizeof(void*) * ZEND_MM_ALIGNED_SIZE_EX(zend_lb_MAX, DASM_ALIGNMENT)); +#endif #endif /* restore JIT buffer pos */ diff --git a/ext/opcache/jit/zend_jit.h b/ext/opcache/jit/zend_jit.h index 029bdd9a510..8a1aab1d039 100644 --- a/ext/opcache/jit/zend_jit.h +++ b/ext/opcache/jit/zend_jit.h @@ -74,6 +74,16 @@ #define ZEND_JIT_DEBUG_TRACE_TSSA (1<<19) #define ZEND_JIT_DEBUG_TRACE_EXIT_INFO (1<<20) +#define ZEND_JIT_DEBUG_IR_SRC (1<<24) +#define ZEND_JIT_DEBUG_IR_FINAL (1<<25) +#define ZEND_JIT_DEBUG_IR_CFG (1<<26) +#define ZEND_JIT_DEBUG_IR_REGS (1<<27) + +#define ZEND_JIT_DEBUG_IR_AFTER_SCCP (1<<28) +#define ZEND_JIT_DEBUG_IR_AFTER_SCHEDULE (1<<29) +#define ZEND_JIT_DEBUG_IR_AFTER_REGS (1<<30) +#define ZEND_JIT_DEBUG_IR_CODEGEN (1U<<31) + #define ZEND_JIT_DEBUG_PERSISTENT 0x1f0 /* profile and debugger flags can't be changed at run-time */ #define ZEND_JIT_TRACE_MAX_LENGTH 1024 /* max length of single trace */ @@ -155,6 +165,27 @@ ZEND_EXT_API void zend_jit_deactivate(void); ZEND_EXT_API void zend_jit_status(zval *ret); ZEND_EXT_API void zend_jit_restart(void); +#ifdef ZEND_JIT_IR + +#define ZREG_LOAD (1<<0) +#define ZREG_STORE (1<<1) +#define ZREG_LAST_USE (1<<2) + +#define ZREG_PI (1<<3) +#define ZREG_PHI (1<<4) +#define ZREG_FORWARD (1<<5) + +#define ZREG_SPILL_SLOT (1<<3) + +#define ZREG_CONST (1<<4) +#define ZREG_ZVAL_COPY (2<<4) +#define ZREG_TYPE_ONLY (3<<4) +#define ZREG_ZVAL_ADDREF (4<<4) +#define ZREG_THIS (5<<4) + +#define ZREG_NONE -1 + +#else typedef struct _zend_lifetime_interval zend_lifetime_interval; typedef struct _zend_life_range zend_life_range; @@ -187,5 +218,6 @@ struct _zend_lifetime_interval { zend_lifetime_interval *used_as_hint; zend_lifetime_interval *list_next; }; +#endif #endif /* HAVE_JIT_H */ diff --git a/ext/opcache/jit/zend_jit_internal.h b/ext/opcache/jit/zend_jit_internal.h index 49331b76a66..77df230b831 100644 --- a/ext/opcache/jit/zend_jit_internal.h +++ b/ext/opcache/jit/zend_jit_internal.h @@ -21,6 +21,8 @@ #ifndef ZEND_JIT_INTERNAL_H #define ZEND_JIT_INTERNAL_H +#ifndef ZEND_JIT_IR + #include "zend_bitset.h" /* Register Set */ @@ -108,21 +110,20 @@ uint32_t __inline __zend_jit_clz(uint32_t value) { /* Register Names */ extern const char *zend_reg_name[]; +#endif /* ZEND_JIT_IR */ /* Address Encoding */ typedef uintptr_t zend_jit_addr; #define IS_CONST_ZVAL 0 #define IS_MEM_ZVAL 1 -#define IS_REG 2 +#define IS_REG 2 /* CPU register or PHP SSA variable number (for IR JIT) */ +#define IS_REF_ZVAL 3 /* IR reference */ #define _ZEND_ADDR_MODE_MASK 0x3 #define _ZEND_ADDR_REG_SHIFT 2 #define _ZEND_ADDR_REG_MASK 0x3f /* no more than 64 registers */ #define _ZEND_ADDR_OFFSET_SHIFT 8 -#define _ZEND_ADDR_REG_STORE_BIT 8 -#define _ZEND_ADDR_REG_LOAD_BIT 9 -#define _ZEND_ADDR_REG_LAST_USE_BIT 10 #define ZEND_ADDR_CONST_ZVAL(zv) \ (((zend_jit_addr)(uintptr_t)(zv)) | IS_CONST_ZVAL) @@ -138,6 +139,13 @@ typedef uintptr_t zend_jit_addr; #define Z_ZV(addr) ((zval*)(addr)) #define Z_OFFSET(addr) ((uint32_t)((addr)>>_ZEND_ADDR_OFFSET_SHIFT)) #define Z_REG(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_SHIFT) & _ZEND_ADDR_REG_MASK)) + +#ifndef ZEND_JIT_IR + +#define _ZEND_ADDR_REG_STORE_BIT 8 +#define _ZEND_ADDR_REG_LOAD_BIT 9 +#define _ZEND_ADDR_REG_LAST_USE_BIT 10 + #define Z_STORE(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_STORE_BIT) & 1)) #define Z_LOAD(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_LOAD_BIT) & 1)) #define Z_LAST_USE(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_LAST_USE_BIT) & 1)) @@ -178,6 +186,47 @@ static zend_always_inline zend_jit_addr _zend_jit_decode_op(uint8_t op_type, zno #define OP_ADDR(opline, type, op) \ _zend_jit_decode_op((opline)->type, (opline)->op, opline, ZREG_NONE) +#define OP_REG_ADDR(opline, type, _op, _ssa_op) \ + _zend_jit_decode_op((opline)->type, (opline)->_op, opline, \ + OP_REG(ssa_op, _ssa_op)) + +#else /* ZEND_JIT_IR */ + +#define ZEND_ADDR_REF_ZVAL(ref) \ + ((((zend_jit_addr)(uintptr_t)(ref)) << _ZEND_ADDR_REG_SHIFT) | \ + IS_REF_ZVAL) + +#define Z_SSA_VAR(addr) ((addr)>>_ZEND_ADDR_REG_SHIFT) +#define Z_IR_REF(addr) ((addr)>>_ZEND_ADDR_REG_SHIFT) + +#define Z_STORE(addr) \ + ((jit->ra && jit->ra[Z_SSA_VAR(addr)].ref) ? \ + (jit->ra[Z_SSA_VAR(addr)].flags & ZREG_STORE) : \ + 0) +#define Z_LOAD(addr) \ + ((jit->ra && jit->ra[Z_SSA_VAR(addr)].ref) ? \ + (jit->ra[Z_SSA_VAR(addr)].flags & ZREG_LOAD) : \ + 0) + +#if ZEND_USE_ABS_CONST_ADDR +# define OP_ADDR(opline, type, op) \ + (((opline)->type == IS_CONST) ? \ + ZEND_ADDR_CONST_ZVAL((opline)->op.zv) : \ + ZEND_ADDR_MEM_ZVAL(ZREG_FP, (opline)->op.var)) +#else +# define OP_ADDR(opline, type, op) \ + (((opline)->type == IS_CONST) ? \ + ZEND_ADDR_CONST_ZVAL(RT_CONSTANT(opline, (opline)->op)) : \ + ZEND_ADDR_MEM_ZVAL(ZREG_FP, (opline)->op.var)) +#endif + +#define OP_REG_ADDR(opline, type, op, _ssa_op) \ + ((ctx.ra && ssa_op->_ssa_op >= 0 && ctx.ra[ssa_op->_ssa_op].ref) ? \ + ZEND_ADDR_REG(ssa_op->_ssa_op) : \ + OP_ADDR(opline, type, op)) + +#endif /* ZEND_JIT_IR */ + #define OP1_ADDR() \ OP_ADDR(opline, op1_type, op1) #define OP2_ADDR() \ @@ -187,10 +236,6 @@ static zend_always_inline zend_jit_addr _zend_jit_decode_op(uint8_t op_type, zno #define OP1_DATA_ADDR() \ OP_ADDR(opline + 1, op1_type, op1) -#define OP_REG_ADDR(opline, type, _op, _ssa_op) \ - _zend_jit_decode_op((opline)->type, (opline)->_op, opline, \ - OP_REG(ssa_op, _ssa_op)) - #define OP1_REG_ADDR() \ OP_REG_ADDR(opline, op1_type, op1, op1_use) #define OP2_REG_ADDR() \ @@ -213,8 +258,15 @@ static zend_always_inline bool zend_jit_same_addr(zend_jit_addr addr1, zend_jit_ { if (addr1 == addr2) { return 1; +#ifndef ZEND_JIT_IR } else if (Z_MODE(addr1) == IS_REG && Z_MODE(addr2) == IS_REG) { return Z_REG(addr1) == Z_REG(addr2); +#else + } else if (Z_MODE(addr1) == IS_REG && Z_MODE(addr2) == IS_REG) { + return Z_SSA_VAR(addr1) == Z_SSA_VAR(addr2); + } else if (Z_MODE(addr1) == IS_REF_ZVAL && Z_MODE(addr2) == IS_REF_ZVAL) { + return Z_IR_REF(addr1) == Z_IR_REF(addr2); +#endif } return 0; } @@ -414,6 +466,8 @@ typedef enum _zend_jit_trace_stop { #define ZEND_JIT_EXIT_METHOD_CALL (1<<9) /* exit because of polymorphic INIT_METHOD_CALL call */ #define ZEND_JIT_EXIT_INVALIDATE (1<<10) /* invalidate current trace */ +#define ZEND_JIT_EXIT_FIXED (1U<<31) /* the exit_info can't be changed by zend_jit_snapshot_handler() */ + typedef union _zend_op_trace_info { zend_op dummy; /* the size of this structure must be the same as zend_op */ struct { @@ -515,8 +569,15 @@ typedef struct _zend_jit_trace_exit_info { uint32_t flags; /* set of ZEND_JIT_EXIT_... */ uint32_t stack_size; uint32_t stack_offset; +#ifdef ZEND_JIT_IR + int32_t poly_func_ref; + int32_t poly_this_ref; + int8_t poly_func_reg; + int8_t poly_this_reg; +#endif } zend_jit_trace_exit_info; +#ifndef ZEND_JIT_IR typedef union _zend_jit_trace_stack { int32_t ssa_var; uint32_t info; @@ -530,6 +591,50 @@ typedef union _zend_jit_trace_stack { #define STACK_VAR(_stack, _slot) \ (_stack)[_slot].ssa_var +#define SET_STACK_VAR(_stack, _slot, _ssa_var) do { \ + (_stack)[_slot].ssa_var = _ssa_var; \ + } while (0) + +#define CLEAR_STACK_REF(_stack, _slot) + +#else /* ZEND_JIT_IR */ + +typedef struct _zend_jit_trace_stack { + union { + uint32_t info; + struct { + uint8_t type; /* variable type (for type inference) */ + uint8_t mem_type; /* stack slot type (for eliminate dead type store) */ + int8_t reg; + uint8_t flags; + }; + }; + int32_t ref; +} zend_jit_trace_stack; + +#define STACK_VAR(_stack, _slot) \ + ((int32_t*)(_stack))[_slot] +#define SET_STACK_VAR(_stack, _slot, _ssa_var) do { \ + ((int32_t*)(_stack))[_slot] = _ssa_var; \ + } while (0) + +#define CLEAR_STACK_REF(_stack, _slot) do { \ + (_stack)[_slot].ref = IR_UNUSED; \ + (_stack)[_slot].flags = 0; \ + } while (0) +#define STACK_REF(_stack, _slot) \ + (_stack)[_slot].ref +#define SET_STACK_REF(_stack, _slot, _ref) do { \ + (_stack)[_slot].ref = (_ref); \ + (_stack)[_slot].flags = 0; \ + } while (0) +#define SET_STACK_REF_EX(_stack, _slot, _ref, _flags) do { \ + (_stack)[_slot].ref = (_ref); \ + (_stack)[_slot].flags = _flags; \ + } while (0) + +#endif /* ZEND_JIT_IR */ + #define STACK_INFO(_stack, _slot) \ (_stack)[_slot].info #define STACK_TYPE(_stack, _slot) \ @@ -540,9 +645,6 @@ typedef union _zend_jit_trace_stack { (_stack)[_slot].reg #define STACK_FLAGS(_stack, _slot) \ (_stack)[_slot].flags -#define SET_STACK_VAR(_stack, _slot, _ssa_var) do { \ - (_stack)[_slot].ssa_var = _ssa_var; \ - } while (0) #define SET_STACK_INFO(_stack, _slot, _info) do { \ (_stack)[_slot].info = _info; \ } while (0) @@ -554,6 +656,7 @@ typedef union _zend_jit_trace_stack { } \ (_stack)[_slot].reg = ZREG_NONE; \ (_stack)[_slot].flags = 0; \ + CLEAR_STACK_REF(_stack, _slot); \ } while (0) #define SET_STACK_REG(_stack, _slot, _reg) do { \ (_stack)[_slot].reg = _reg; \ @@ -572,6 +675,13 @@ typedef union _zend_jit_trace_stack { #define ZEND_JIT_TRACE_LOOP (1<<1) #define ZEND_JIT_TRACE_USES_INITIAL_IP (1<<2) +#ifdef ZEND_JIT_IR +typedef union _zend_jit_exit_const { + int64_t i; + double d; +} zend_jit_exit_const; +#endif + typedef struct _zend_jit_trace_info { uint32_t id; /* trace id */ uint32_t root; /* root trace id or self id for root traces */ @@ -591,6 +701,10 @@ typedef struct _zend_jit_trace_info { zend_jit_trace_exit_info *exit_info; /* info about side exits */ zend_jit_trace_stack *stack_map; //uint32_t loop_offset; +#ifdef ZEND_JIT_IR + uint32_t consts_count; /* number of side exits */ + zend_jit_exit_const *constants; +#endif } zend_jit_trace_info; struct _zend_jit_trace_stack_frame { @@ -709,7 +823,9 @@ ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL zend_jit_ret_trace_helper(ZEND_OPCODE_HAND ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL zend_jit_loop_trace_helper(ZEND_OPCODE_HANDLER_ARGS); int ZEND_FASTCALL zend_jit_trace_hot_root(zend_execute_data *execute_data, const zend_op *opline); +#ifndef ZEND_JIT_IR int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf *regs); +#endif zend_jit_trace_stop ZEND_FASTCALL zend_jit_trace_execute(zend_execute_data *execute_data, const zend_op *opline, zend_jit_trace_rec *trace_buffer, uint8_t start, uint32_t is_megamorphc); static zend_always_inline const zend_op* zend_jit_trace_get_exit_opline(zend_jit_trace_rec *trace, const zend_op *opline, bool *exit_if_true) @@ -794,11 +910,13 @@ static zend_always_inline bool zend_long_is_power_of_two(zend_long x) return (x > 0) && !(x & (x - 1)); } +#ifndef ZEND_JIT_IR static zend_always_inline uint32_t zend_long_floor_log2(zend_long x) { ZEND_ASSERT(zend_long_is_power_of_two(x)); return zend_ulong_ntz(x); } +#endif /* from http://aggregate.org/MAGIC/ */ static zend_always_inline uint32_t ones32(uint32_t x) diff --git a/ext/opcache/jit/zend_jit_ir.c b/ext/opcache/jit/zend_jit_ir.c new file mode 100644 index 00000000000..7253e486131 --- /dev/null +++ b/ext/opcache/jit/zend_jit_ir.c @@ -0,0 +1,16531 @@ +/* + * +----------------------------------------------------------------------+ + * | Zend JIT | + * +----------------------------------------------------------------------+ + * | Copyright (c) The PHP Group | + * +----------------------------------------------------------------------+ + * | This source file is subject to version 3.01 of the PHP license, | + * | that is bundled with this package in the file LICENSE, and is | + * | available through the world-wide-web at the following url: | + * | https://www.php.net/license/3_01.txt | + * | If you did not receive a copy of the PHP license and are unable to | + * | obtain it through the world-wide-web, please send a note to | + * | license@php.net so we can mail you a copy immediately. | + * +----------------------------------------------------------------------+ + * | Authors: Dmitry Stogov | + * +----------------------------------------------------------------------+ + */ + +#include "jit/ir/ir.h" +#include "jit/ir/ir_builder.h" + +#if defined(IR_TARGET_X86) +# define IR_REG_SP 4 /* IR_REG_RSP */ +# define IR_REG_FP 5 /* IR_REG_RBP */ +# define ZREG_FP 6 /* IR_REG_RSI */ +# define ZREG_IP 7 /* IR_REG_RDI */ +# define ZREG_FIRST_FPR 8 +# define IR_REGSET_PRESERVED ((1<<3) | (1<<5) | (1<<6) | (1<<7)) /* all preserved registers */ +#elif defined(IR_TARGET_X64) +# define IR_REG_SP 4 /* IR_REG_RSP */ +# define IR_REG_FP 5 /* IR_REG_RBP */ +# define ZREG_FP 14 /* IR_REG_R14 */ +# define ZREG_IP 15 /* IR_REG_R15 */ +# define ZREG_FIRST_FPR 16 +# if defined(_WIN64) +# define IR_REGSET_PRESERVED ((1<<3) | (1<<5) | (1<<6) | (1<<7) | (1<<12) | (1<<13) | (1<<14) | (1<<15)) +/* +# define IR_REGSET_PRESERVED ((1<<3) | (1<<5) | (1<<6) | (1<<7) | (1<<12) | (1<<13) | (1<<14) | (1<<15) | \ + (1<<(16+6)) | (1<<(16+7)) | (1<<(16+8)) | (1<<(16+9)) | (1<<(16+10)) | \ + (1<<(16+11)) | (1<<(16+12)) | (1<<(16+13)) | (1<<(16+14)) | (1<<(16+15))) +*/ +# else +# define IR_REGSET_PRESERVED ((1<<3) | (1<<5) | (1<<12) | (1<<13) | (1<<14) | (1<<15)) /* all preserved registers */ +# endif +#elif defined(IR_TARGET_AARCH64) +# define IR_REG_SP 31 /* IR_REG_RSP */ +# define IR_REG_FP 29 /* IR_REG_X29 */ +# define ZREG_FP 27 /* IR_REG_X27 */ +# define ZREG_IP 28 /* IR_REG_X28 */ +# define ZREG_FIRST_FPR 32 +# define IR_REGSET_PRESERVED ((1<<19) | (1<<20) | (1<<21) | (1<<22) | (1<<23) | \ + (1<<24) | (1<<25) | (1<<26) | (1<<27) | (1<<28)) /* all preserved registers */ +#else +# error "Unknown IR target" +#endif + +#define ZREG_RX ZREG_IP + +#define OPTIMIZE_FOR_SIZE 0 + +/* IR builder defines */ +#undef _ir_CTX +#define _ir_CTX (&jit->ctx) + +#undef ir_CONST_ADDR +#define ir_CONST_ADDR(_addr) jit_CONST_ADDR(jit, (uintptr_t)(_addr)) +#define ir_CONST_FUNC(_addr) jit_CONST_FUNC(jit, (uintptr_t)(_addr), 0) +#define ir_CONST_FC_FUNC(_addr) jit_CONST_FUNC(jit, (uintptr_t)(_addr), IR_CONST_FASTCALL_FUNC) +#define ir_CAST_FC_FUNC(_addr) ir_fold2(_ir_CTX, IR_OPT(IR_BITCAST, IR_ADDR), (_addr), IR_CONST_FASTCALL_FUNC) + +#undef ir_ADD_OFFSET +#define ir_ADD_OFFSET(_addr, _offset) \ + jit_ADD_OFFSET(jit, _addr, _offset) + +#ifdef ZEND_ENABLE_ZVAL_LONG64 +# define IR_LONG IR_I64 +# define ir_CONST_LONG ir_CONST_I64 +# define ir_UNARY_OP_L ir_UNARY_OP_I64 +# define ir_BINARY_OP_L ir_BINARY_OP_I64 +# define ir_ADD_L ir_ADD_I64 +# define ir_SUB_L ir_SUB_I64 +# define ir_MUL_L ir_MUL_I64 +# define ir_DIV_L ir_DIV_I64 +# define ir_MOD_L ir_MOD_I64 +# define ir_NEG_L ir_NEG_I64 +# define ir_ABS_L ir_ABS_I64 +# define ir_SEXT_L ir_SEXT_I64 +# define ir_ZEXT_L ir_ZEXT_I64 +# define ir_TRUNC_L ir_TRUNC_I64 +# define ir_BITCAST_L ir_BITCAST_I64 +# define ir_FP2L ir_FP2I64 +# define ir_ADD_OV_L ir_ADD_OV_I64 +# define ir_SUB_OV_L ir_SUB_OV_I64 +# define ir_MUL_OV_L ir_MUL_OV_I64 +# define ir_NOT_L ir_NOT_I64 +# define ir_OR_L ir_OR_I64 +# define ir_AND_L ir_AND_I64 +# define ir_XOR_L ir_XOR_I64 +# define ir_SHL_L ir_SHL_I64 +# define ir_SHR_L ir_SHR_I64 +# define ir_SAR_L ir_SAR_I64 +# define ir_ROL_L ir_ROL_I64 +# define ir_ROR_L ir_ROR_I64 +# define ir_MIN_L ir_MIN_I64 +# define ir_MAX_L ir_MAX_I64 +# define ir_LOAD_L ir_LOAD_I64 +#else +# define IR_LONG IR_I32 +# define ir_CONST_LONG ir_CONST_I32 +# define ir_UNARY_OP_L ir_UNARY_OP_I32 +# define ir_BINARY_OP_L ir_BINARY_OP_I32 +# define ir_ADD_L ir_ADD_I32 +# define ir_SUB_L ir_SUB_I32 +# define ir_MUL_L ir_MUL_I32 +# define ir_DIV_L ir_DIV_I32 +# define ir_MOD_L ir_MOD_I32 +# define ir_NEG_L ir_NEG_I32 +# define ir_ABS_L ir_ABS_I32 +# define ir_SEXT_L ir_SEXT_I32 +# define ir_ZEXT_L ir_ZEXT_I32 +# define ir_TRUNC_L ir_TRUNC_I32 +# define ir_BITCAST_L ir_BITCAST_I32 +# define ir_FP2L ir_FP2I32 +# define ir_ADD_OV_L ir_ADD_OV_I32 +# define ir_SUB_OV_L ir_SUB_OV_I32 +# define ir_MUL_OV_L ir_MUL_OV_I32 +# define ir_NOT_L ir_NOT_I32 +# define ir_OR_L ir_OR_I32 +# define ir_AND_L ir_AND_I32 +# define ir_XOR_L ir_XOR_I32 +# define ir_SHL_L ir_SHL_I32 +# define ir_SHR_L ir_SHR_I32 +# define ir_SAR_L ir_SAR_I32 +# define ir_ROL_L ir_ROL_I32 +# define ir_ROR_L ir_ROR_I32 +# define ir_MIN_L ir_MIN_I32 +# define ir_MAX_L ir_MAX_I32 +# define ir_LOAD_L ir_LOAD_I32 +#endif + +/* A helper structure to collect IT rers for the following use in (MERGE/PHI)_N */ +typedef struct _ir_refs { + uint32_t count; + uint32_t limit; + ir_ref refs[0]; +} ir_refs; + +#define ir_refs_size(_n) (offsetof(ir_refs, refs) + sizeof(ir_ref) * (_n)) +#define ir_refs_init(_name, _n) _name = alloca(ir_refs_size(_n)); \ + do {_name->count = 0; _name->limit = (_n);} while (0) + +static void ir_refs_add(ir_refs *refs, ir_ref ref) +{ + ir_ref *ptr; + + ZEND_ASSERT(refs->count < refs->limit); + ptr = refs->refs; + ptr[refs->count++] = ref; +} + +static size_t zend_jit_trace_prologue_size = (size_t)-1; +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +static uint32_t allowed_opt_flags = 0; +#endif +static bool delayed_call_chain = 0; // TODO: remove this var (use jit->delayed_call_level) ??? + +#ifdef ZTS +# ifdef _WIN32 +extern uint32_t _tls_index; +extern char *_tls_start; +extern char *_tls_end; +# endif + +static size_t tsrm_ls_cache_tcb_offset = 0; +static size_t tsrm_tls_index = 0; +static size_t tsrm_tls_offset = 0; + +# define EG_TLS_OFFSET(field) \ + (executor_globals_offset + offsetof(zend_executor_globals, field)) + +# define CG_TLS_OFFSET(field) \ + (compiler_globals_offset + offsetof(zend_compiler_globals, field)) + +# define jit_EG(_field) \ + ir_ADD_OFFSET(jit_TLS(jit), EG_TLS_OFFSET(_field)) + +# define jit_CG(_field) \ + ir_ADD_OFFSET(jit_TLS(jit), CG_TLS_OFFSET(_field)) + +#else + +# define jit_EG(_field) \ + ir_CONST_ADDR(&EG(_field)) + +# define jit_CG(_field) \ + ir_CONST_ADDR(&CG(_field)) + +#endif + +#define jit_CALL(_call, _field) \ + ir_ADD_OFFSET(_call, offsetof(zend_execute_data, _field)) + +#define jit_EX(_field) \ + jit_CALL(jit_FP(jit), _field) + +#define jit_RX(_field) \ + jit_CALL(jit_IP(jit), _field) + +#define JIT_STUBS(_) \ + _(exception_handler, IR_SKIP_PROLOGUE) \ + _(exception_handler_undef, IR_SKIP_PROLOGUE) \ + _(exception_handler_free_op2, IR_SKIP_PROLOGUE) \ + _(exception_handler_free_op1_op2, IR_SKIP_PROLOGUE) \ + _(interrupt_handler, IR_SKIP_PROLOGUE) \ + _(leave_function_handler, IR_SKIP_PROLOGUE) \ + _(negative_shift, IR_SKIP_PROLOGUE) \ + _(mod_by_zero, IR_SKIP_PROLOGUE) \ + _(invalid_this, IR_SKIP_PROLOGUE) \ + _(undefined_function, IR_SKIP_PROLOGUE) \ + _(throw_cannot_pass_by_ref, IR_SKIP_PROLOGUE) \ + _(icall_throw, IR_SKIP_PROLOGUE) \ + _(leave_throw, IR_SKIP_PROLOGUE) \ + _(hybrid_runtime_jit, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_profile_jit, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_func_hot_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_loop_hot_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_func_trace_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_ret_trace_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_loop_trace_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(trace_halt, IR_SKIP_PROLOGUE) \ + _(trace_escape, IR_SKIP_PROLOGUE) \ + _(trace_exit, IR_SKIP_PROLOGUE) \ + _(undefined_offset, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(undefined_key, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(cannot_add_element, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_const, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_tmp, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_var, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_cv_noref, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_cv, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(new_array, IR_FUNCTION | IR_FASTCALL_FUNC) \ + +#define JIT_STUB_ID(name, flags) \ + jit_stub_ ## name, + +#define JIT_STUB_FORWARD(name, flags) \ + static int zend_jit_ ## name ## _stub(zend_jit_ctx *jit); + +#define JIT_STUB(name, flags) \ + {JIT_STUB_PREFIX #name, zend_jit_ ## name ## _stub, flags}, + +typedef enum _jit_stub_id { + JIT_STUBS(JIT_STUB_ID) + jit_last_stub +} jit_stub_id; + +typedef struct _zend_jit_reg_var { + ir_ref ref; + uint32_t flags; +} zend_jit_reg_var; + +typedef struct _zend_jit_ctx { + ir_ctx ctx; + const zend_op *last_valid_opline; + bool use_last_valid_opline; + bool track_last_valid_opline; + bool reuse_ip; + uint32_t delayed_call_level; + int b; /* current basic block number or -1 */ +#ifdef ZTS + ir_ref tls; +#endif + ir_ref fp; + ir_ref trace_loop_ref; + ir_ref return_inputs; + const zend_op_array *op_array; + const zend_op_array *current_op_array; + zend_ssa *ssa; + zend_string *name; + ir_ref *bb_start_ref; /* PHP BB -> IR ref mapping */ + ir_ref *bb_predecessors; /* PHP BB -> index in bb_edges -> IR refs of predessors */ + ir_ref *bb_edges; + zend_jit_trace_info *trace; + zend_jit_reg_var *ra; + int delay_var; + ir_refs *delay_refs; + ir_ref eg_exception_addr; + HashTable addr_hash; + ir_ref stub_addr[jit_last_stub]; +} zend_jit_ctx; + +typedef int8_t zend_reg; + +typedef struct _zend_jit_registers_buf { +#if defined(IR_TARGET_X64) + uint64_t gpr[16]; /* general purpose integer register */ + double fpr[16]; /* floating point registers */ +#elif defined(IR_TARGET_X86) + uint32_t gpr[8]; /* general purpose integer register */ + double fpr[8]; /* floating point registers */ +#elif defined (IR_TARGET_AARCH64) + uint64_t gpr[32]; /* general purpose integer register */ + double fpr[32]; /* floating point registers */ +#else +# error "Unknown IR target" +#endif +} zend_jit_registers_buf; + +/* Keep 32 exit points in a single code block */ +#define ZEND_JIT_EXIT_POINTS_SPACING 4 // push byte + short jmp = bytes +#define ZEND_JIT_EXIT_POINTS_PER_GROUP 32 // number of continuous exit points + +static uint32_t zend_jit_exit_point_by_addr(void *addr); +int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf *regs); + +static int zend_jit_assign_to_variable(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr var_use_addr, + zend_jit_addr var_addr, + uint32_t var_info, + uint32_t var_def_info, + uint8_t val_type, + zend_jit_addr val_addr, + uint32_t val_info, + zend_jit_addr res_addr, + zend_jit_addr ref_addr, + bool check_exception); + +typedef struct _zend_jit_stub { + const char *name; + int (*stub)(zend_jit_ctx *jit); + uint32_t flags; +} zend_jit_stub; + +JIT_STUBS(JIT_STUB_FORWARD) + +static const zend_jit_stub zend_jit_stubs[] = { + JIT_STUBS(JIT_STUB) +}; + +#if defined(_WIN32) || defined(IR_TARGET_AARCH64) +/* We keep addresses in SHM to share them between sepaeate processes (on Windows) or to support veneers (on AArch64) */ +static void** zend_jit_stub_handlers = NULL; +#else +static void* zend_jit_stub_handlers[sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0])]; +#endif + +#if defined(IR_TARGET_AARCH64) +static const void *zend_jit_get_veneer(ir_ctx *ctx, const void *addr) +{ + int i, count = sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0]); + + for (i = 0; i < count; i++) { + if (zend_jit_stub_handlers[i] == addr) { + return zend_jit_stub_handlers[count + i]; + } + } + + if (((zend_jit_ctx*)ctx)->trace + && (void*)addr >= dasm_buf && (void*)addr < dasm_end) { + uint32_t exit_point = zend_jit_exit_point_by_addr((void*)addr); + + if (exit_point != (uint32_t)-1) { + zend_jit_trace_info *t = ((zend_jit_ctx*)ctx)->trace; + + ZEND_ASSERT(exit_point < t->exit_count); + return (const void*)((char*)ctx->code_buffer + ctx->code_size - (t->exit_count - exit_point) * 4); + } + } + + return NULL; +} + +static bool zend_jit_set_veneer(ir_ctx *ctx, const void *addr, const void *veneer) +{ + int i, count = sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0]); + int64_t offset; + + for (i = 0; i < count; i++) { + if (zend_jit_stub_handlers[i] == addr) { + const void **ptr = (const void**)&zend_jit_stub_handlers[count + i]; + *ptr = veneer; + if (JIT_G(debug) & ZEND_JIT_DEBUG_ASM) { + const char *name = ir_disasm_find_symbol((uint64_t)(uintptr_t)addr, &offset); + + if (name && !offset) { + if (strstr(name, "@veneer") == NULL) { + char *new_name; + + zend_spprintf(&new_name, 0, "%s@veneer", name); + ir_disasm_add_symbol(new_name, (uint64_t)(uintptr_t)veneer, 4); + efree(new_name); + } else { + ir_disasm_add_symbol(name, (uint64_t)(uintptr_t)veneer, 4); + } + } + } + return 1; + } + } + + return 0; +} + +static void zend_jit_commit_veneers(void) +{ + int i, count = sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0]); + + for (i = 0; i < count; i++) { + if (zend_jit_stub_handlers[count + i]) { + zend_jit_stub_handlers[i] = zend_jit_stub_handlers[count + i]; + zend_jit_stub_handlers[count + i] = NULL; + } + } +} +#endif + +static bool zend_jit_prefer_const_addr_load(zend_jit_ctx *jit, uintptr_t addr) +{ +#if defined(IR_TARGET_X86) + return 0; /* always use immediate value */ +#elif defined(IR_TARGET_X64) + return addr > 0xffffffff; /* prefer loading long constant from memery */ +#elif defined(IR_TARGET_AARCH64) + return addr > 0xffff; +#else +# error "Unknown IR target" +#endif +} + +static const char* zend_reg_name(int8_t reg) +{ + return ir_reg_name(reg, ir_reg_is_int(reg) ? IR_LONG : IR_DOUBLE); +} + +/* IR helpers */ + +#ifdef ZTS +static ir_ref jit_TLS(zend_jit_ctx *jit) +{ + ZEND_ASSERT(jit->ctx.control); + if (jit->tls) { + /* Emit "TLS" once for basic block */ + ir_insn *insn; + ir_ref ref = jit->ctx.control; + + while (1) { + if (ref == jit->tls) { + return jit->tls; + } + insn = &jit->ctx.ir_base[ref]; + if (insn->op >= IR_START || insn->op == IR_CALL) { + break; + } + ref = insn->op1; + } + } + jit->tls = ir_TLS( + tsrm_ls_cache_tcb_offset ? tsrm_ls_cache_tcb_offset : tsrm_tls_index, + tsrm_ls_cache_tcb_offset ? IR_NULL : tsrm_tls_offset); + return jit->tls; +} +#endif + +static ir_ref jit_CONST_ADDR(zend_jit_ctx *jit, uintptr_t addr) +{ + ir_ref ref; + zval *zv; + + if (addr == 0) { + return IR_NULL; + } + zv = zend_hash_index_lookup(&jit->addr_hash, addr); + if (Z_TYPE_P(zv) == IS_LONG) { + ref = Z_LVAL_P(zv); + ZEND_ASSERT(jit->ctx.ir_base[ref].opt == IR_OPT(IR_ADDR, IR_ADDR)); + } else { + ref = ir_unique_const_addr(&jit->ctx, addr); + ZVAL_LONG(zv, ref); + } + return ref; +} + +static ir_ref jit_CONST_FUNC(zend_jit_ctx *jit, uintptr_t addr, uint16_t flags) +{ + ir_ref ref; + ir_insn *insn; + zval *zv; + + ZEND_ASSERT(addr != 0); + zv = zend_hash_index_lookup(&jit->addr_hash, addr); + if (Z_TYPE_P(zv) == IS_LONG) { + ref = Z_LVAL_P(zv); + ZEND_ASSERT(jit->ctx.ir_base[ref].opt == IR_OPT(IR_FUNC_ADDR, IR_ADDR) && jit->ctx.ir_base[ref].const_flags == flags); + } else { + ref = ir_unique_const_addr(&jit->ctx, addr); + insn = &jit->ctx.ir_base[ref]; + insn->optx = IR_OPT(IR_FUNC_ADDR, IR_ADDR); + insn->const_flags = flags; + ZVAL_LONG(zv, ref); + } + return ref; +} + +static ir_ref jit_ADD_OFFSET(zend_jit_ctx *jit, ir_ref addr, uintptr_t offset) +{ + if (offset) { + addr = ir_ADD_A(addr, ir_CONST_ADDR(offset)); + } + return addr; +} + +static ir_ref jit_EG_exception(zend_jit_ctx *jit) +{ +#ifdef ZTS + return jit_EG(exception); +#else + ir_ref ref = jit->eg_exception_addr; + + if (UNEXPECTED(!ref)) { + ref = ir_unique_const_addr(&jit->ctx, (uintptr_t)&EG(exception)); + jit->eg_exception_addr = ref; + } + return ref; +#endif +} + +static ir_ref jit_STUB_ADDR(zend_jit_ctx *jit, jit_stub_id id) +{ + ir_ref ref = jit->stub_addr[id]; + + if (UNEXPECTED(!ref)) { + ref = ir_unique_const_addr(&jit->ctx, (uintptr_t)zend_jit_stub_handlers[id]); + jit->stub_addr[id] = ref; + } + return ref; +} + +static ir_ref jit_STUB_FUNC_ADDR(zend_jit_ctx *jit, jit_stub_id id, uint16_t flags) +{ + ir_ref ref = jit->stub_addr[id]; + ir_insn *insn; + + if (UNEXPECTED(!ref)) { + ref = ir_unique_const_addr(&jit->ctx, (uintptr_t)zend_jit_stub_handlers[id]); + insn = &jit->ctx.ir_base[ref]; + insn->optx = IR_OPT(IR_FUNC_ADDR, IR_ADDR); + insn->const_flags = flags; + jit->stub_addr[id] = ref; + } + return ref; +} + +static void jit_SNAPSHOT(zend_jit_ctx *jit, ir_ref addr) +{ + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && JIT_G(current_frame)) { + const void *ptr = (const void*)jit->ctx.ir_base[addr].val.addr; + const zend_op_array *op_array = &JIT_G(current_frame)->func->op_array; + uint32_t stack_size = op_array->last_var + op_array->T; + + if (ptr == zend_jit_stub_handlers[jit_stub_exception_handler] + || ptr == zend_jit_stub_handlers[jit_stub_exception_handler_undef] + || ptr == zend_jit_stub_handlers[jit_stub_exception_handler_free_op1_op2] + || ptr == zend_jit_stub_handlers[jit_stub_exception_handler_free_op2] + || ptr == zend_jit_stub_handlers[jit_stub_interrupt_handler] + || ptr == zend_jit_stub_handlers[jit_stub_leave_function_handler] + || ptr == zend_jit_stub_handlers[jit_stub_negative_shift] + || ptr == zend_jit_stub_handlers[jit_stub_mod_by_zero] + || ptr == zend_jit_stub_handlers[jit_stub_invalid_this] + || ptr == zend_jit_stub_handlers[jit_stub_undefined_function] + || ptr == zend_jit_stub_handlers[jit_stub_throw_cannot_pass_by_ref] + || ptr == zend_jit_stub_handlers[jit_stub_icall_throw] + || ptr == zend_jit_stub_handlers[jit_stub_leave_throw] + || ptr == zend_jit_stub_handlers[jit_stub_trace_halt] + || ptr == zend_jit_stub_handlers[jit_stub_trace_escape]) { + /* This is a GUARD that trigger exit through a stub code (without deoptimization) */ + return; + } + + /* Check if we need snapshot entries for polymorphic method call */ + zend_jit_trace_info *t = jit->trace; + uint32_t exit_point = 0, n = 0; + + if (addr < 0) { + if (t->exit_count > 0 + && jit->ctx.ir_base[addr].val.u64 == (uintptr_t)zend_jit_trace_get_exit_addr(t->exit_count - 1)) { + exit_point = t->exit_count - 1; + if (t->exit_info[exit_point].flags & ZEND_JIT_EXIT_METHOD_CALL) { + n = 2; + } + } + } + + if (stack_size || n) { + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + uint32_t snapshot_size, i; + + snapshot_size = stack_size; + while (snapshot_size > 0) { + ir_ref ref = STACK_REF(stack, snapshot_size - 1); + + if (!ref || ref == IR_NULL || (STACK_FLAGS(stack, snapshot_size - 1) & (/*ZREG_LOAD|*/ZREG_STORE))) { + snapshot_size--; + } else { + break; + } + } + if (snapshot_size || n) { + ir_ref snapshot; + + snapshot = ir_SNAPSHOT(snapshot_size + n); + for (i = 0; i < snapshot_size; i++) { + ir_ref ref = STACK_REF(stack, i); + + if (!ref || ref == IR_NULL || (STACK_FLAGS(stack, i) & (/*ZREG_LOAD|*/ZREG_STORE))) { + ref = IR_UNUSED; + } + ir_SNAPSHOT_SET_OP(snapshot, i + 1, ref); + } + if (n) { + ir_SNAPSHOT_SET_OP(snapshot, snapshot_size + 1, t->exit_info[exit_point].poly_func_ref); + ir_SNAPSHOT_SET_OP(snapshot, snapshot_size + 2, t->exit_info[exit_point].poly_this_ref); + } + } + } + } +} + +static int32_t _add_trace_const(zend_jit_trace_info *t, int64_t val) +{ + int32_t i; + + for (i = 0; i < t->consts_count; i++) { + if (t->constants[i].i == val) { + return i; + } + } + ZEND_ASSERT(i < 0x7fffffff); + t->consts_count = i + 1; + t->constants = erealloc(t->constants, (i + 1) * sizeof(zend_jit_exit_const)); + t->constants[i].i = val; + return i; +} + +uint32_t zend_jit_duplicate_exit_point(ir_ctx *ctx, zend_jit_trace_info *t, uint32_t exit_point, ir_ref snapshot_ref) +{ + uint32_t stack_size, stack_offset; + uint32_t new_exit_point = t->exit_count; + + if (new_exit_point >= ZEND_JIT_TRACE_MAX_EXITS) { + ctx->status = -ZEND_JIT_TRACE_STOP_TOO_MANY_EXITS; + return exit_point; + } + + t->exit_count++; + memcpy(&t->exit_info[new_exit_point], &t->exit_info[exit_point], sizeof(zend_jit_trace_exit_info)); + stack_size = t->exit_info[new_exit_point].stack_size; + if (stack_size != 0) { + stack_offset = t->stack_map_size; + t->stack_map_size += stack_size; + // TODO: reduce number of reallocations ??? + t->stack_map = erealloc(t->stack_map, t->stack_map_size * sizeof(zend_jit_trace_stack)); + memcpy(t->stack_map + stack_offset, t->stack_map + t->exit_info[new_exit_point].stack_offset, stack_size * sizeof(zend_jit_trace_stack)); + t->exit_info[new_exit_point].stack_offset = stack_offset; + } + t->exit_info[new_exit_point].flags &= ~ZEND_JIT_EXIT_FIXED; + + return new_exit_point; +} + +void *zend_jit_snapshot_handler(ir_ctx *ctx, ir_ref snapshot_ref, ir_insn *snapshot, void *addr) +{ + zend_jit_trace_info *t = ((zend_jit_ctx*)ctx)->trace; + uint32_t exit_point, exit_flags; + ir_ref n = snapshot->inputs_count; + ir_ref i; + + exit_point = zend_jit_exit_point_by_addr(addr); + ZEND_ASSERT(exit_point < t->exit_count); + exit_flags = t->exit_info[exit_point].flags; + + if (exit_flags & ZEND_JIT_EXIT_METHOD_CALL) { + int8_t *reg_ops = ctx->regs[snapshot_ref]; + + ZEND_ASSERT(reg_ops[n - 1] != -1 && reg_ops[n] != -1); + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && (t->exit_info[exit_point].poly_func_reg != reg_ops[n - 1] + || t->exit_info[exit_point].poly_this_reg != reg_ops[n])) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->exit_info[exit_point].poly_func_reg = reg_ops[n - 1]; + t->exit_info[exit_point].poly_this_reg = reg_ops[n]; + n -= 2; + } + + for (i = 2; i <= n; i++) { + ir_ref ref = ir_insn_op(snapshot, i); + + if (ref) { + int8_t *reg_ops = ctx->regs[snapshot_ref]; + int8_t reg = reg_ops[i]; + ir_ref var = i - 2; + + ZEND_ASSERT(var < t->exit_info[exit_point].stack_size); + if (t->stack_map[t->exit_info[exit_point].stack_offset + var].flags == ZREG_ZVAL_COPY) { + ZEND_ASSERT(reg != ZREG_NONE); + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != IR_REG_NUM(reg)) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].reg = IR_REG_NUM(reg); + } else if (t->stack_map[t->exit_info[exit_point].stack_offset + var].flags != ZREG_CONST) { + ZEND_ASSERT(t->stack_map[t->exit_info[exit_point].stack_offset + var].type == IS_LONG || + t->stack_map[t->exit_info[exit_point].stack_offset + var].type == IS_DOUBLE); + + if (ref > 0) { + if (reg != ZREG_NONE) { + if (reg & IR_REG_SPILL_LOAD) { + ZEND_ASSERT(!(reg & IR_REG_SPILL_SPECIAL)); + /* spill slot on a CPU stack */ + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && (t->stack_map[t->exit_info[exit_point].stack_offset + var].ref != ref + || t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != ZREG_NONE + || !(t->stack_map[t->exit_info[exit_point].stack_offset + var].flags & ZREG_SPILL_SLOT))) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].ref = ref; + t->stack_map[t->exit_info[exit_point].stack_offset + var].reg = ZREG_NONE; + t->stack_map[t->exit_info[exit_point].stack_offset + var].flags |= ZREG_SPILL_SLOT; + } else if (reg & IR_REG_SPILL_SPECIAL) { + /* spill slot on a VM stack */ + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && (t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != ZREG_NONE + || t->stack_map[t->exit_info[exit_point].stack_offset + var].flags != ZREG_TYPE_ONLY)) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].reg = ZREG_NONE; + t->stack_map[t->exit_info[exit_point].stack_offset + var].flags = ZREG_TYPE_ONLY; + } else { + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != IR_REG_NUM(reg)) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].reg = IR_REG_NUM(reg); + } + } else { + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && (t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != ZREG_NONE + || t->stack_map[t->exit_info[exit_point].stack_offset + var].flags != ZREG_TYPE_ONLY)) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].flags = ZREG_TYPE_ONLY; + } + } else if (!(exit_flags & ZEND_JIT_EXIT_FIXED)) { + int32_t idx = _add_trace_const(t, ctx->ir_base[ref].val.i64); + t->stack_map[t->exit_info[exit_point].stack_offset + var].flags = ZREG_CONST; + t->stack_map[t->exit_info[exit_point].stack_offset + var].ref = idx; + } + } + } + } + t->exit_info[exit_point].flags |= ZEND_JIT_EXIT_FIXED; + return addr; +} + +static void jit_SIDE_EXIT(zend_jit_ctx *jit, ir_ref addr) +{ + jit_SNAPSHOT(jit, addr); + ir_IJMP(addr); +} + +/* PHP JIT helpers */ + +static ir_ref jit_EMALLOC(zend_jit_ctx *jit, size_t size, const zend_op_array *op_array, const zend_op *opline) +{ +#if ZEND_DEBUG + return ir_CALL_5(IR_ADDR, ir_CONST_FC_FUNC(_emalloc), + ir_CONST_ADDR(size), + op_array->filename ? ir_CONST_ADDR(op_array->filename->val) : IR_NULL, + ir_CONST_U32(opline ? opline->lineno : 0), + IR_NULL, + ir_CONST_U32(0)); +#elif defined(HAVE_BUILTIN_CONSTANT_P) + if (size > 24 && size <= 32) { + return ir_CALL(IR_ADDR, ir_CONST_FC_FUNC(_emalloc_32)); + } else { + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_emalloc), ir_CONST_ADDR(size)); + } +#else + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_emalloc), ir_CONST_ADDR(size)); +#endif +} + +static ir_ref jit_EFREE(zend_jit_ctx *jit, ir_ref ptr, size_t size, const zend_op_array *op_array, const zend_op *opline) +{ +#if ZEND_DEBUG + return ir_CALL_5(IR_ADDR, ir_CONST_FC_FUNC(_efree), + ptr, + op_array && op_array->filename ? ir_CONST_ADDR(op_array->filename->val) : IR_NULL, + ir_CONST_U32(opline ? opline->lineno : 0), + IR_NULL, + ir_CONST_U32(0)); +#elif defined(HAVE_BUILTIN_CONSTANT_P) + if (size > 24 && size <= 32) { + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_efree_32), ptr); + } else { + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_efree), ptr); + } +#else + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_efree), ptr); +#endif +} + +static ir_ref jit_FP(zend_jit_ctx *jit) +{ + ZEND_ASSERT(jit->ctx.control); + if (jit->fp == IR_UNUSED) { + /* Emit "RLOAD FP" once for basic block */ + jit->fp = ir_RLOAD_A(ZREG_FP); + } else { + ir_insn *insn; + ir_ref ref = jit->ctx.control; + + while (1) { + if (ref == jit->fp) { + break; + } + insn = &jit->ctx.ir_base[ref]; + if (insn->op >= IR_START || insn->op == IR_CALL) { + jit->fp = ir_RLOAD_A(ZREG_FP); + break; + } + ref = insn->op1; + } + } + return jit->fp; +} + +static void jit_STORE_FP(zend_jit_ctx *jit, ir_ref ref) +{ + ir_RSTORE(ZREG_FP, ref); + jit->fp = IR_UNUSED; +} + +static ir_ref jit_IP(zend_jit_ctx *jit) +{ + return ir_RLOAD_A(ZREG_IP); +} + +static void jit_STORE_IP(zend_jit_ctx *jit, ir_ref ref) +{ + ir_RSTORE(ZREG_IP, ref); +} + +static ir_ref jit_IP32(zend_jit_ctx *jit) +{ + return ir_RLOAD_U32(ZREG_IP); +} + +static void jit_LOAD_IP(zend_jit_ctx *jit, ir_ref ref) +{ + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ref); + } else { + ir_STORE(jit_EX(opline), ref); + } +} + +static void jit_LOAD_IP_ADDR(zend_jit_ctx *jit, const zend_op *target) +{ + jit_LOAD_IP(jit, ir_CONST_ADDR(target)); +} + +static void zend_jit_track_last_valid_opline(zend_jit_ctx *jit) +{ + jit->use_last_valid_opline = 0; + jit->track_last_valid_opline = 1; +} + +static void zend_jit_use_last_valid_opline(zend_jit_ctx *jit) +{ + if (jit->track_last_valid_opline) { + jit->use_last_valid_opline = 1; + jit->track_last_valid_opline = 0; + } +} + +static bool zend_jit_trace_uses_initial_ip(zend_jit_ctx *jit) +{ + return jit->use_last_valid_opline; +} + +static void zend_jit_set_last_valid_opline(zend_jit_ctx *jit, const zend_op *opline) +{ + if (!jit->reuse_ip) { + jit->track_last_valid_opline = 1; + jit->last_valid_opline = opline; + } +} + +static void zend_jit_reset_last_valid_opline(zend_jit_ctx *jit) +{ + jit->track_last_valid_opline = 0; + jit->last_valid_opline = NULL; +} + +static void zend_jit_start_reuse_ip(zend_jit_ctx *jit) +{ + zend_jit_reset_last_valid_opline(jit); + jit->reuse_ip = 1; +} + +static int zend_jit_reuse_ip(zend_jit_ctx *jit) +{ + if (!jit->reuse_ip) { + zend_jit_start_reuse_ip(jit); + // RX = EX(call); + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(call))); + } + return 1; +} + +static void zend_jit_stop_reuse_ip(zend_jit_ctx *jit) +{ + jit->reuse_ip = 0; +} + +static int zend_jit_save_call_chain(zend_jit_ctx *jit, uint32_t call_level) +{ + ir_ref rx, call; + + if (call_level == 1) { + // JIT: call = NULL; + call = IR_NULL; + } else { + // JIT: call = EX(call); + call = ir_LOAD_A(jit_EX(call)); + } + + rx = jit_IP(jit); + + // JIT: call->prev_execute_data = call; + ir_STORE(jit_CALL(rx, prev_execute_data), call); + + // JIT: EX(call) = call; + ir_STORE(jit_EX(call), rx); + + jit->delayed_call_level = 0; + delayed_call_chain = 0; + + return 1; +} + +static int zend_jit_set_ip(zend_jit_ctx *jit, const zend_op *target) +{ + ir_ref ref; + ir_ref addr = IR_UNUSED; + + if (jit->delayed_call_level) { + if (!zend_jit_save_call_chain(jit, jit->delayed_call_level)) { + return 0; + } + } + + if (jit->last_valid_opline) { + zend_jit_use_last_valid_opline(jit); + if (jit->last_valid_opline != target) { + if (GCC_GLOBAL_REGS) { + ref = jit_IP(jit); + } else { + addr = jit_EX(opline); + ref = ir_LOAD_A(addr); + } + if (target > jit->last_valid_opline) { + ref = ir_ADD_OFFSET(ref, (uintptr_t)target - (uintptr_t)jit->last_valid_opline); + } else { + ref = ir_SUB_A(ref, ir_CONST_ADDR((uintptr_t)jit->last_valid_opline - (uintptr_t)target)); + } + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ref); + } else { + ir_STORE(addr, ref); + } + } + } else { + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ir_CONST_ADDR(target)); + } else { + ir_STORE(jit_EX(opline), ir_CONST_ADDR(target)); + } + } + jit->reuse_ip = 0; + zend_jit_set_last_valid_opline(jit, target); + return 1; +} + +static int zend_jit_set_ip_ex(zend_jit_ctx *jit, const zend_op *target, bool set_ip_reg) +{ + if (!GCC_GLOBAL_REGS && set_ip_reg && !jit->last_valid_opline) { + /* Optimization to avoid duplicate constant load */ + ir_STORE(jit_EX(opline), ir_HARD_COPY_A(ir_CONST_ADDR(target))); + return 1; + } + return zend_jit_set_ip(jit, target); +} + +static void jit_SET_EX_OPLINE(zend_jit_ctx *jit, const zend_op *target) +{ + if (jit->last_valid_opline == target) { + zend_jit_use_last_valid_opline(jit); + if (GCC_GLOBAL_REGS) { + // EX(opline) = opline + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + } else { + ir_STORE(jit_EX(opline), ir_CONST_ADDR(target)); + if (!GCC_GLOBAL_REGS) { + zend_jit_reset_last_valid_opline(jit); + } + } +} + +static ir_ref jit_ZVAL_ADDR(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + return ir_ADD_OFFSET(reg, Z_OFFSET(addr)); + } else if (Z_MODE(addr) == IS_REF_ZVAL) { + return Z_IR_REF(addr); + } else { + ZEND_ASSERT(Z_MODE(addr) == IS_CONST_ZVAL); + return ir_CONST_ADDR(Z_ZV(addr)); + } +} + +static ir_ref jit_Z_TYPE_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_U8(ir_ADD_OFFSET(ref, offsetof(zval, u1.v.type))); +} + +static ir_ref jit_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_U8(Z_TYPE_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + ZEND_ASSERT(Z_MODE(addr) == IS_MEM_ZVAL); + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + return ir_LOAD_U8(ir_ADD_OFFSET(reg, Z_OFFSET(addr) + offsetof(zval, u1.v.type))); + } else { + return jit_Z_TYPE_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static ir_ref jit_Z_TYPE_FLAGS_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_U8(ir_ADD_OFFSET(ref, offsetof(zval, u1.v.type_flags))); +} + +static ir_ref jit_Z_TYPE_FLAGS(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_U8(Z_TYPE_FLAGS_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + ZEND_ASSERT(Z_MODE(addr) == IS_MEM_ZVAL); + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + return ir_LOAD_U8(ir_ADD_OFFSET(reg, Z_OFFSET(addr) + offsetof(zval, u1.v.type_flags))); + } else { + return jit_Z_TYPE_FLAGS_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static ir_ref jit_Z_TYPE_INFO_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zval, u1.type_info))); +} + +static ir_ref jit_Z_TYPE_INFO(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_U32(Z_TYPE_INFO_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + ZEND_ASSERT(Z_MODE(addr) == IS_MEM_ZVAL); + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + return ir_LOAD_U32(ir_ADD_OFFSET(reg, Z_OFFSET(addr) + offsetof(zval, u1.type_info))); + } else { + return jit_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static void jit_set_Z_TYPE_INFO_ref(zend_jit_ctx *jit, ir_ref ref, ir_ref type_info) +{ + ir_STORE(ir_ADD_OFFSET(ref, offsetof(zval, u1.type_info)), type_info); +} + +static void jit_set_Z_TYPE_INFO_ex(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref type_info) +{ + if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + ZEND_ASSERT(Z_MODE(addr) == IS_MEM_ZVAL); + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + ir_STORE(ir_ADD_OFFSET(reg, Z_OFFSET(addr) + offsetof(zval, u1.type_info)), type_info); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, addr), type_info); + } +} + +static void jit_set_Z_TYPE_INFO(zend_jit_ctx *jit, zend_jit_addr addr, uint32_t type_info) +{ + if (type_info < IS_STRING + && Z_MODE(addr) == IS_MEM_ZVAL + && Z_REG(addr) == ZREG_FP + && JIT_G(current_frame) + && STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(addr))) == type_info) { + /* type is already set */ + return; + } + jit_set_Z_TYPE_INFO_ex(jit, addr, ir_CONST_U32(type_info)); +} + +static ir_ref jit_if_Z_TYPE_ref(zend_jit_ctx *jit, ir_ref ref, ir_ref type) +{ + return ir_IF(ir_EQ(jit_Z_TYPE_ref(jit, ref), type)); +} + +static ir_ref jit_if_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr, uint8_t type) +{ + ZEND_ASSERT(type != IS_UNDEF); + return ir_IF(ir_EQ(jit_Z_TYPE(jit, addr), ir_CONST_U8(type))); +} + +static ir_ref jit_if_not_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr, uint8_t type) +{ + ir_ref ref = jit_Z_TYPE(jit, addr); + + if (type != IS_UNDEF) { + ref = ir_NE(ref, ir_CONST_U8(type)); + } + return ir_IF(ref); +} + +static void jit_guard_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr, uint8_t type, const void *exit_addr) +{ + ir_ref ref = jit_Z_TYPE(jit, addr); + + if (type != IS_UNDEF) { + ir_GUARD(ir_EQ(ref, ir_CONST_U8(type)), ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } +} + +static void jit_guard_not_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr, uint8_t type, const void *exit_addr) +{ + ir_ref ref = jit_Z_TYPE(jit, addr); + + if (type != IS_UNDEF) { + ref = ir_NE(ref, ir_CONST_U8(type)); + } + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); +} + +static ir_ref jit_if_REFCOUNTED(zend_jit_ctx *jit, zend_jit_addr addr) +{ + return ir_IF(jit_Z_TYPE_FLAGS(jit, addr)); +} + +static ir_ref jit_if_COLLECTABLE_ref(zend_jit_ctx *jit, ir_ref addr_ref) +{ + return ir_IF(ir_AND_U8(jit_Z_TYPE_FLAGS_ref(jit, addr_ref), ir_CONST_U8(IS_TYPE_COLLECTABLE))); +} + +static ir_ref jit_Z_LVAL_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_L(ref); +} + +static ir_ref jit_Z_DVAL_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_D(ref); +} + +static bool zend_jit_spilling_may_cause_conflict(zend_jit_ctx *jit, int var, ir_ref val) +{ + if (jit->ctx.ir_base[val].op == IR_RLOAD) { + /* Deoptimization */ + return 0; + } +// if (jit->ctx.ir_base[val].op == IR_LOAD +// && jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op == IR_ADD +// && jit->ctx.ir_base[jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op1].op == IR_RLOAD +// && jit->ctx.ir_base[jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op1].op2 == ZREG_FP +// && IR_IS_CONST_REF(jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op2) +// && jit->ctx.ir_base[jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op2].val.addr == (uintptr_t)EX_NUM_TO_VAR(jit->ssa->vars[var].var)) { +// /* LOAD from the same location (the LOAD is pinned) */ +// // TODO: should be anti-dependent with the following stores ??? +// return 0; +// } + if (jit->ssa->vars[var].var < jit->current_op_array->last_var) { + /* IS_CV */ + return 0; + } + return 1; +} + +static void zend_jit_def_reg(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref val) +{ + int var; + + ZEND_ASSERT(Z_MODE(addr) == IS_REG); + var = Z_SSA_VAR(addr); + if (var == jit->delay_var) { + ir_refs_add(jit->delay_refs, val); + return; + } + ZEND_ASSERT(jit->ra && jit->ra[var].ref == IR_NULL); + + /* Negative "var" has special meaning for IR */ + if (val > 0 && !zend_jit_spilling_may_cause_conflict(jit, var, val)) { + val = ir_bind(&jit->ctx, -EX_NUM_TO_VAR(jit->ssa->vars[var].var), val); + } + jit->ra[var].ref = val; + + if (jit->ra[var].flags & ZREG_FORWARD) { + zend_ssa_phi *phi = jit->ssa->vars[var].phi_use_chain; + zend_basic_block *bb; + int n, j, *p; + ir_ref *q; + + jit->ra[var].flags &= ~ZREG_FORWARD; + while (phi != NULL) { + zend_ssa_phi *dst_phi = phi; + int src_var = var; + + if (dst_phi->pi >= 0) { + jit->ra[src_var].ref = val; + src_var = dst_phi->ssa_var; + if (!(jit->ra[src_var].flags & ZREG_FORWARD)) { + phi = zend_ssa_next_use_phi(jit->ssa, var, phi); + continue; + } + dst_phi = jit->ssa->vars[src_var].phi_use_chain; + ZEND_ASSERT(dst_phi != NULL && "reg forwarding"); + ZEND_ASSERT(!zend_ssa_next_use_phi(jit->ssa, src_var, dst_phi) && "reg forwarding"); + jit->ra[src_var].flags &= ~ZREG_FORWARD; + } + + if (jit->ra[dst_phi->ssa_var].ref > 0) { + ir_insn *phi_insn = &jit->ctx.ir_base[jit->ra[dst_phi->ssa_var].ref]; + ZEND_ASSERT(phi_insn->op == IR_PHI); +// ZEND_ASSERT(ir_operands_count(ctx, phi_insn) == n + 1); + bb = &jit->ssa->cfg.blocks[dst_phi->block]; + n = bb->predecessors_count; + for (j = 0, p = &dst_phi->sources[0], q = phi_insn->ops + 2; j < n; j++, p++, q++) { + if (*p == src_var) { + *q = val; + } + } + } + + phi = zend_ssa_next_use_phi(jit->ssa, var, phi); + } + } +} + +static ir_ref zend_jit_use_reg(zend_jit_ctx *jit, zend_jit_addr addr) +{ + int var = Z_SSA_VAR(addr); + + ZEND_ASSERT(Z_MODE(addr) == IS_REG); + ZEND_ASSERT(jit->ra && jit->ra[var].ref); + if (jit->ra[var].ref == IR_NULL) { + zend_jit_addr mem_addr; + ir_ref ref; + + ZEND_ASSERT(jit->ra[var].flags & ZREG_LOAD); + mem_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(jit->ssa->vars[var].var)); + if ((jit->ssa->var_info[var].type & MAY_BE_ANY) == MAY_BE_LONG) { + ref = jit_Z_LVAL_ref(jit, jit_ZVAL_ADDR(jit, mem_addr)); + } else if ((jit->ssa->var_info[var].type & MAY_BE_ANY) == MAY_BE_DOUBLE) { + ref = jit_Z_DVAL_ref(jit, jit_ZVAL_ADDR(jit, mem_addr)); + } else { + ZEND_UNREACHABLE(); + } + zend_jit_def_reg(jit, addr, ref); + return ref; + } + return jit->ra[Z_SSA_VAR(addr)].ref; +} + +static void zend_jit_gen_pi(zend_jit_ctx *jit, zend_ssa_phi *phi) +{ + int src_var = phi->sources[0]; + int dst_var = phi->ssa_var; + + ZEND_ASSERT(phi->pi >= 0); + ZEND_ASSERT(!(jit->ra[dst_var].flags & ZREG_LOAD)); + ZEND_ASSERT(jit->ra[src_var].ref); + + if (jit->ra[src_var].ref == IR_NULL) { + /* Not defined yet */ + if (jit->ssa->vars[dst_var].use_chain < 0 + && jit->ssa->vars[dst_var].phi_use_chain) { + zend_ssa_phi *phi = jit->ssa->vars[dst_var].phi_use_chain; + if (!zend_ssa_next_use_phi(jit->ssa, dst_var, phi)) { + /* This is a Pi forwarded to Phi */ + jit->ra[src_var].flags |= ZREG_FORWARD; + return; + } + } + ZEND_ASSERT(0 && "Not defined Pi source"); + } + /* Reuse register */ + zend_jit_def_reg(jit, ZEND_ADDR_REG(dst_var), + zend_jit_use_reg(jit, ZEND_ADDR_REG(src_var))); +} + +static void zend_jit_gen_phi(zend_jit_ctx *jit, zend_ssa_phi *phi) +{ + int dst_var = phi->ssa_var; + zend_basic_block *bb = &jit->ssa->cfg.blocks[phi->block]; + int n = bb->predecessors_count; + int i; + ir_type type = (jit->ssa->var_info[phi->ssa_var].type & MAY_BE_LONG) ? IR_LONG : IR_DOUBLE; + ir_ref merge = jit->bb_start_ref[phi->block]; + ir_ref ref; + ir_ref old_insns_count = jit->ctx.insns_count; + ir_ref same_src_ref = IR_UNUSED; + bool phi_inputs_are_the_same = 1; + + ZEND_ASSERT(phi->pi < 0); + ZEND_ASSERT(!(jit->ra[dst_var].flags & ZREG_LOAD)); + ZEND_ASSERT(merge); + ZEND_ASSERT(jit->ctx.ir_base[merge].op == IR_MERGE || jit->ctx.ir_base[merge].op == IR_LOOP_BEGIN); + ZEND_ASSERT(n == jit->ctx.ir_base[merge].inputs_count); + + ref = ir_emit_N(&jit->ctx, IR_OPT(IR_PHI, type), n + 1); + ir_set_op(&jit->ctx, ref, 1, merge); + + for (i = 0; i < n; i++) { + int src_var = phi->sources[i]; + + ZEND_ASSERT(jit->ra[src_var].ref); + if (jit->ra[src_var].ref == IR_NULL) { + jit->ra[src_var].flags |= ZREG_FORWARD; + phi_inputs_are_the_same = 0; + } else { + ir_ref src_ref = zend_jit_use_reg(jit, ZEND_ADDR_REG(src_var)); + if (i == 0) { + same_src_ref = src_ref; + } else if (same_src_ref != src_ref) { + phi_inputs_are_the_same = 0; + } + ir_set_op(&jit->ctx, ref, i + 2, src_ref); + } + } + if (phi_inputs_are_the_same) { + ref = same_src_ref; + jit->ctx.insns_count = old_insns_count; + } + + zend_jit_def_reg(jit, ZEND_ADDR_REG(dst_var), ref); +} + +static ir_ref jit_Z_LVAL(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_LONG(Z_LVAL_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_REG) { + return zend_jit_use_reg(jit, addr); + } else { + return jit_Z_LVAL_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static void jit_set_Z_LVAL(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref lval) +{ + if (Z_MODE(addr) == IS_REG) { + zend_jit_def_reg(jit, addr, lval); + } else { + ir_STORE(jit_ZVAL_ADDR(jit, addr), lval); + } +} + +#if SIZEOF_ZEND_LONG == 4 +static ir_ref jit_Z_W2(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_U32((Z_ZV(addr))->value.ww.w2); + } else { + return ir_LOAD_L(ir_ADD_OFFSET(jit_ZVAL_ADDR(jit, addr), offsetof(zval, value.ww.w2))); + } +} + +static void jit_set_Z_W2(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref lval) +{ + ir_STORE(ir_ADD_OFFSET(jit_ZVAL_ADDR(jit, addr), offsetof(zval, value.ww.w2)), lval); +} +#endif + +static ir_ref jit_Z_DVAL(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_DOUBLE(Z_DVAL_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_REG) { + return zend_jit_use_reg(jit, addr); + } else { + return jit_Z_DVAL_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static void jit_set_Z_DVAL(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref dval) +{ + if (Z_MODE(addr) == IS_REG) { + zend_jit_def_reg(jit, addr, dval); + } else { + ir_STORE(jit_ZVAL_ADDR(jit, addr), dval); + } +} + +static ir_ref jit_Z_PTR_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_A(ref); +} + +static ir_ref jit_Z_PTR(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_ADDR(Z_PTR_P(Z_ZV(addr))); + } else { + return jit_Z_PTR_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static void jit_set_Z_PTR(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref ptr) +{ + ir_STORE(jit_ZVAL_ADDR(jit, addr), ptr); +} + +static ir_ref jit_GC_REFCOUNT(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_U32(ref); +} + +static void jit_set_GC_REFCOUNT(zend_jit_ctx *jit, ir_ref ref, uint32_t refcount) +{ + ir_STORE(ref, ir_CONST_U32(refcount)); +} + +static void jit_GC_ADDREF(zend_jit_ctx *jit, ir_ref ref) +{ + ir_STORE(ref, ir_ADD_U32(ir_LOAD_U32(ref), ir_CONST_U32(1))); +} + +static void jit_GC_ADDREF2(zend_jit_ctx *jit, ir_ref ref) +{ + ir_ref counter = ir_LOAD_U32(ref); + ir_STORE(ref, ir_ADD_U32(counter, ir_CONST_U32(2))); +} + +static ir_ref jit_GC_DELREF(zend_jit_ctx *jit, ir_ref ref) +{ + ir_ref counter = ir_LOAD_U32(ref); + counter = ir_SUB_U32(counter, ir_CONST_U32(1)); + ir_STORE(ref, counter); + return counter; +} + +static ir_ref jit_if_GC_MAY_NOT_LEAK(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_IF( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zend_refcounted, gc.u.type_info))), + ir_CONST_U32(GC_INFO_MASK | (GC_NOT_COLLECTABLE << GC_FLAGS_SHIFT)))); +} + +static void jit_ZVAL_COPY_CONST(zend_jit_ctx *jit, zend_jit_addr dst, uint32_t dst_info, uint32_t dst_def_info, zval *zv, bool addref) +{ + ir_ref ref = IR_UNUSED; + + if (Z_TYPE_P(zv) > IS_TRUE) { + if (Z_TYPE_P(zv) == IS_DOUBLE) { + jit_set_Z_DVAL(jit, dst, ir_CONST_DOUBLE(Z_DVAL_P(zv))); + } else if (Z_TYPE_P(zv) == IS_LONG && dst_def_info == MAY_BE_DOUBLE) { + jit_set_Z_DVAL(jit, dst, ir_CONST_DOUBLE((double)Z_LVAL_P(zv))); + } else if (Z_TYPE_P(zv) == IS_LONG) { + jit_set_Z_LVAL(jit, dst, ir_CONST_LONG(Z_LVAL_P(zv))); + } else { + ref = ir_CONST_ADDR(Z_PTR_P(zv)); + jit_set_Z_PTR(jit, dst, ref); + if (addref && Z_REFCOUNTED_P(zv)) { + jit_GC_ADDREF(jit, ref); + } + } + } + if (Z_MODE(dst) != IS_REG) { + if (dst_def_info == MAY_BE_DOUBLE) { + if ((dst_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_GUARD)) != MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } else if (((dst_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_GUARD)) != (1<last_valid_opline == opline) { + ir_GUARD_NOT(ref, jit_STUB_ADDR(jit, jit_stub_interrupt_handler)); + } else { + ir_ref if_timeout = ir_IF(ref); + + ir_IF_TRUE_cold(if_timeout); + jit_LOAD_IP_ADDR(jit, opline); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_interrupt_handler)); + ir_IF_FALSE(if_timeout); + } + return 1; +} + +/* stubs */ + +static int zend_jit_exception_handler_stub(zend_jit_ctx *jit) +{ + const void *handler; + + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + handler = zend_get_opcode_handler_func(EG(exception_op)); + + ir_CALL(IR_VOID, ir_CONST_FUNC(handler)); + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + handler = EG(exception_op)->handler; + + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + ir_ref ref, if_negative; + + ref = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(handler), jit_FP(jit)); + if_negative = ir_IF(ir_LT(ref, ir_CONST_U32(0))); + ir_IF_TRUE(if_negative); + ir_MERGE_WITH_EMPTY_FALSE(if_negative); + ref = ir_PHI_2(IR_I32, ref, ir_CONST_I32(1)); + ir_RETURN(ref); + } + } + return 1; +} + +static int zend_jit_exception_handler_undef_stub(zend_jit_ctx *jit) +{ + ir_ref ref, result_type, if_result_used; + + ref = jit_EG(opline_before_exception); + result_type = ir_LOAD_U8(ir_ADD_OFFSET(ir_LOAD_A(ref), offsetof(zend_op, result_type))); + + if_result_used = ir_IF(ir_AND_U8(result_type, ir_CONST_U8(IS_TMP_VAR|IS_VAR))); + ir_IF_TRUE(if_result_used); + + ref = ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(ref), offsetof(zend_op, result.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ir_STORE(ir_ADD_OFFSET(ir_ADD_A(jit_FP(jit), ref), offsetof(zval, u1.type_info)), ir_CONST_U32(IS_UNDEF)); + ir_MERGE_WITH_EMPTY_FALSE(if_result_used); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + return 1; +} + +static int zend_jit_exception_handler_free_op1_op2_stub(zend_jit_ctx *jit) +{ + ir_ref ref, if_dtor; + zend_jit_addr var_addr; + + ref = ir_LOAD_A(jit_EG(opline_before_exception)); + if_dtor = ir_IF(ir_AND_U8(ir_LOAD_U8(ir_ADD_OFFSET(ref, offsetof(zend_op, op1_type))), + ir_CONST_U8(IS_TMP_VAR|IS_VAR))); + ir_IF_TRUE(if_dtor); + ref = ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zend_op, op1.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + jit_ZVAL_PTR_DTOR(jit, var_addr, MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF, 0, NULL); + ir_MERGE_WITH_EMPTY_FALSE(if_dtor); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op2)); + + return 1; +} + +static int zend_jit_exception_handler_free_op2_stub(zend_jit_ctx *jit) +{ + ir_ref ref, if_dtor; + zend_jit_addr var_addr; + + ref = ir_LOAD_A(jit_EG(opline_before_exception)); + if_dtor = ir_IF(ir_AND_U8(ir_LOAD_U8(ir_ADD_OFFSET(ref, offsetof(zend_op, op2_type))), + ir_CONST_U8(IS_TMP_VAR|IS_VAR))); + ir_IF_TRUE(if_dtor); + ref = ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zend_op, op2.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + jit_ZVAL_PTR_DTOR(jit, var_addr, MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF, 0, NULL); + ir_MERGE_WITH_EMPTY_FALSE(if_dtor); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_undef)); + + return 1; +} + +static int zend_jit_interrupt_handler_stub(zend_jit_ctx *jit) +{ + ir_ref if_timeout, if_exception; + + if (GCC_GLOBAL_REGS) { + // EX(opline) = opline + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + + ir_STORE(jit_EG(vm_interrupt), ir_CONST_U8(0)); + if_timeout = ir_IF(ir_EQ(ir_LOAD_U8(jit_EG(timed_out)), ir_CONST_U8(0))); + ir_IF_FALSE(if_timeout); + ir_CALL(IR_VOID, ir_CONST_FUNC(zend_timeout)); + ir_MERGE_WITH_EMPTY_TRUE(if_timeout); + + if (zend_interrupt_function) { + ir_CALL_1(IR_VOID, ir_CONST_FUNC(zend_interrupt_function), jit_FP(jit)); + if_exception = ir_IF(ir_LOAD_A(jit_EG(exception))); + ir_IF_TRUE(if_exception); + ir_CALL(IR_VOID, ir_CONST_FUNC(zend_jit_exception_in_interrupt_handler_helper)); + ir_MERGE_WITH_EMPTY_FALSE(if_exception); + + jit_STORE_FP(jit, ir_LOAD_A(jit_EG(current_execute_data))); + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + } + + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(1)); + } + return 1; +} + +static int zend_jit_leave_function_handler_stub(zend_jit_ctx *jit) +{ + ir_ref call_info = ir_LOAD_U32(jit_EX(This.u1.type_info)); + ir_ref if_top = ir_IF(ir_AND_U32(call_info, ir_CONST_U32(ZEND_CALL_TOP))); + + ir_IF_FALSE(if_top); + + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_nested_func_helper), call_info); + jit_STORE_IP(jit, + ir_LOAD_A(jit_EX(opline))); + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else if (GCC_GLOBAL_REGS) { + ir_TAILCALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_nested_func_helper), call_info); + } else { + ir_TAILCALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_leave_nested_func_helper), call_info, jit_FP(jit)); + } + + ir_IF_TRUE(if_top); + + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_top_func_helper), call_info); + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else if (GCC_GLOBAL_REGS) { + ir_TAILCALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_top_func_helper), call_info); + } else { + ir_TAILCALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_leave_top_func_helper), call_info, jit_FP(jit)); + } + + return 1; +} + +static int zend_jit_negative_shift_stub(zend_jit_ctx *jit) +{ + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_throw_error), + ir_CONST_ADDR(zend_ce_arithmetic_error), + ir_CONST_ADDR("Bit shift by negative number")); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op1_op2)); + return 1; +} + +static int zend_jit_mod_by_zero_stub(zend_jit_ctx *jit) +{ + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_throw_error), + ir_CONST_ADDR(zend_ce_division_by_zero_error), + ir_CONST_ADDR("Modulo by zero")); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op1_op2)); + return 1; +} + +static int zend_jit_invalid_this_stub(zend_jit_ctx *jit) +{ + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_throw_error), + IR_NULL, + ir_CONST_ADDR("Using $this when not in object context")); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_undef)); + return 1; +} + +static int zend_jit_undefined_function_stub(zend_jit_ctx *jit) +{ + // JIT: load EX(opline) + ir_ref ref = ir_LOAD_A(jit_FP(jit)); + ir_ref arg3 = ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zend_op, op2.constant))); + + if (sizeof(void*) == 8) { + arg3 = ir_LOAD_A(ir_ADD_A(ref, ir_SEXT_A(arg3))); + } else { + arg3 = ir_LOAD_A(arg3); + } + arg3 = ir_ADD_OFFSET(arg3, offsetof(zend_string, val)); + + ir_CALL_3(IR_VOID, ir_CONST_FUNC(zend_throw_error), + IR_NULL, + ir_CONST_ADDR("Call to undefined function %s()"), + arg3); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + return 1; +} + +static int zend_jit_throw_cannot_pass_by_ref_stub(zend_jit_ctx *jit) +{ + ir_ref opline, ref, rx, if_eq, if_tmp; + + // JIT: opline = EX(opline) + opline = ir_LOAD_A(jit_FP(jit)); + + // JIT: ZVAL_UNDEF(ZEND_CALL_VAR(RX, opline->result.var)) + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, result.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + rx = jit_IP(jit); + jit_set_Z_TYPE_INFO_ref(jit, ir_ADD_A(rx, ref), ir_CONST_U32(IS_UNDEF)); + + // last EX(call) frame may be delayed + // JIT: if (EX(call) == RX) + ref = ir_LOAD_A(jit_EX(call)); + if_eq = ir_IF(ir_EQ(rx, ref)); + ir_IF_FALSE(if_eq); + + // JIT: RX->prev_execute_data == EX(call) + ir_STORE(jit_CALL(rx, prev_execute_data), ref); + + // JIT: EX(call) = RX + ir_STORE(jit_EX(call), rx); + ir_MERGE_WITH_EMPTY_TRUE(if_eq); + + // JIT: IP = opline + jit_STORE_IP(jit, opline); + + // JIT: zend_cannot_pass_by_reference(opline->op2.num) + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_cannot_pass_by_reference), + ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.num)))); + + // JIT: if (IP->op1_type == IS_TMP_VAR) + ref = ir_LOAD_U8(ir_ADD_OFFSET(jit_IP(jit), offsetof(zend_op, op1_type))); + if_tmp = ir_IF(ir_EQ(ref, ir_CONST_U8(IS_TMP_VAR))); + ir_IF_TRUE(if_tmp); + + // JIT: zval_ptr_dtor(EX_VAR(IP->op1.var)) + ref = ir_LOAD_U32(ir_ADD_OFFSET(jit_IP(jit), offsetof(zend_op, op1.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + jit_ZVAL_PTR_DTOR(jit, + ZEND_ADDR_REF_ZVAL(ref), + MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF, 0, NULL); + ir_MERGE_WITH_EMPTY_FALSE(if_tmp); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + return 1; +} + +static int zend_jit_icall_throw_stub(zend_jit_ctx *jit) +{ + ir_ref ip, if_set; + + // JIT: zend_rethrow_exception(zend_execute_data *execute_data) + // JIT: if (EX(opline)->opcode != ZEND_HANDLE_EXCEPTION) { + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + ip = jit_IP(jit); + if_set = ir_IF(ir_EQ(ir_LOAD_U8(ir_ADD_OFFSET(ip, offsetof(zend_op, opcode))), + ir_CONST_U8(ZEND_HANDLE_EXCEPTION))); + ir_IF_FALSE(if_set); + + // JIT: EG(opline_before_exception) = opline; + ir_STORE(jit_EG(opline_before_exception), ip); + ir_MERGE_WITH_EMPTY_TRUE(if_set); + + // JIT: opline = EG(exception_op); + jit_STORE_IP(jit, jit_EG(exception_op)); + + if (GCC_GLOBAL_REGS) { + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + return 1; +} + +static int zend_jit_leave_throw_stub(zend_jit_ctx *jit) +{ + ir_ref ip, if_set; + + // JIT: if (opline->opcode != ZEND_HANDLE_EXCEPTION) { + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + ip = jit_IP(jit); + if_set = ir_IF(ir_EQ(ir_LOAD_U8(ir_ADD_OFFSET(ip, offsetof(zend_op, opcode))), + ir_CONST_U8(ZEND_HANDLE_EXCEPTION))); + ir_IF_FALSE(if_set); + + // JIT: EG(opline_before_exception) = opline; + ir_STORE(jit_EG(opline_before_exception), ip); + ir_MERGE_WITH_EMPTY_TRUE(if_set); + + // JIT: opline = EG(exception_op); + jit_LOAD_IP(jit, jit_EG(exception_op)); + + if (GCC_GLOBAL_REGS) { + ir_STORE(jit_EX(opline), jit_IP(jit)); + + // JIT: HANDLE_EXCEPTION() + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } else { + ir_RETURN(ir_CONST_I32(2)); // ZEND_VM_LEAVE + } + + return 1; +} + +static int zend_jit_hybrid_runtime_jit_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID) { + return 0; + } + + ir_CALL(IR_VOID, ir_CONST_FC_FUNC(zend_runtime_jit)); + ir_IJMP(ir_LOAD_A(jit_IP(jit))); + return 1; +} + +static int zend_jit_hybrid_profile_jit_stub(zend_jit_ctx *jit) +{ + ir_ref addr, func, run_time_cache, jit_extension; + + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID) { + return 0; + } + + addr = ir_CONST_ADDR(&zend_jit_profile_counter), + ir_STORE(addr, ir_ADD_L(ir_LOAD_L(addr), ir_CONST_LONG(1))); + + func = ir_LOAD_A(jit_EX(func)); + run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + jit_extension = ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, reserved[zend_func_info_rid]))); + + if (zend_jit_profile_counter_rid) { + addr = ir_ADD_OFFSET(run_time_cache, zend_jit_profile_counter_rid * sizeof(void*)); + } else { + addr = run_time_cache; + } + ir_STORE(addr, ir_ADD_L(ir_LOAD_L(addr), ir_CONST_LONG(1))); + + addr = ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_extension, orig_handler)); + ir_IJMP(ir_LOAD_A(addr)); + + return 1; +} + +static int _zend_jit_hybrid_hot_counter_stub(zend_jit_ctx *jit, uint32_t cost) +{ + ir_ref func, jit_extension, addr, ref, if_overflow; + + func = ir_LOAD_A(jit_EX(func)); + jit_extension = ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, reserved[zend_func_info_rid]))); + addr = ir_LOAD_A(ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_hot_extension, counter))); + ref = ir_SUB_I16(ir_LOAD_I16(addr), ir_CONST_I16(cost)); + ir_STORE(addr, ref); + if_overflow = ir_IF(ir_LE(ref, ir_CONST_I16(0))); + + ir_IF_TRUE_cold(if_overflow); + ir_STORE(addr, ir_CONST_I16(ZEND_JIT_COUNTER_INIT)); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_hot_func), + jit_FP(jit), + jit_IP(jit)); + ir_IJMP(ir_LOAD_A(jit_IP(jit))); + + ir_IF_FALSE(if_overflow); + ref = ir_SUB_A(jit_IP(jit), + ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, opcodes)))); + ref = ir_DIV_A(ref, ir_CONST_ADDR(sizeof(zend_op) / sizeof(void*))); + + addr = ir_ADD_A(ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_hot_extension, orig_handlers)), + ref); + ir_IJMP(ir_LOAD_A(addr)); + + return 1; +} + +static int zend_jit_hybrid_func_hot_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_func)) { + return 0; + } + + return _zend_jit_hybrid_hot_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_func) - 1) / JIT_G(hot_func))); +} + +static int zend_jit_hybrid_loop_hot_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_loop)) { + return 0; + } + + return _zend_jit_hybrid_hot_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_loop) - 1) / JIT_G(hot_loop))); +} + +static ir_ref _zend_jit_orig_opline_handler(zend_jit_ctx *jit, ir_ref offset) +{ + ir_ref addr; + + if (GCC_GLOBAL_REGS) { + addr = ir_ADD_A(offset, jit_IP(jit)); + } else { + addr = ir_ADD_A(offset, ir_LOAD_A(jit_EX(opline))); + } + + return ir_LOAD_A(addr); +} + +static ir_ref zend_jit_orig_opline_handler(zend_jit_ctx *jit) +{ + ir_ref func, jit_extension, offset; + + func = ir_LOAD_A(jit_EX(func)); + jit_extension = ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, reserved[zend_func_info_rid]))); + offset = ir_LOAD_A(ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_trace_extension, offset))); + return _zend_jit_orig_opline_handler(jit, offset); +} + +static int _zend_jit_hybrid_trace_counter_stub(zend_jit_ctx *jit, uint32_t cost) +{ + ir_ref func, jit_extension, offset, addr, ref, if_overflow, ret, if_halt; + + func = ir_LOAD_A(jit_EX(func)); + jit_extension = ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, reserved[zend_func_info_rid]))); + offset = ir_LOAD_A(ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_trace_extension, offset))); + addr = ir_LOAD_A(ir_ADD_OFFSET(ir_ADD_A(offset, jit_IP(jit)), offsetof(zend_op_trace_info, counter))); + ref = ir_SUB_I16(ir_LOAD_I16(addr), ir_CONST_I16(cost)); + ir_STORE(addr, ref); + if_overflow = ir_IF(ir_LE(ref, ir_CONST_I16(0))); + + ir_IF_TRUE_cold(if_overflow); + ir_STORE(addr, ir_CONST_I16(ZEND_JIT_COUNTER_INIT)); + ret = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_trace_hot_root), + jit_FP(jit), + jit_IP(jit)); + if_halt = ir_IF(ir_LT(ret, ir_CONST_I32(0))); + ir_IF_FALSE(if_halt); + + ref = jit_EG(current_execute_data); + jit_STORE_FP(jit, ir_LOAD_A(ref)); + ref = ir_LOAD_A(jit_EX(opline)); + jit_STORE_IP(jit, ref); + ir_IJMP(ir_LOAD_A(jit_IP(jit))); + + ir_IF_FALSE(if_overflow); + ir_IJMP(_zend_jit_orig_opline_handler(jit, offset)); + + ir_IF_TRUE(if_halt); + ir_IJMP(ir_CONST_FC_FUNC(zend_jit_halt_op->handler)); + + return 1; +} + +static int zend_jit_hybrid_func_trace_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_func)) { + return 0; + } + + return _zend_jit_hybrid_trace_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_func) - 1) / JIT_G(hot_func))); +} + +static int zend_jit_hybrid_ret_trace_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_return)) { + return 0; + } + + return _zend_jit_hybrid_trace_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_return) - 1) / JIT_G(hot_return))); +} + +static int zend_jit_hybrid_loop_trace_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_loop)) { + return 0; + } + + return _zend_jit_hybrid_trace_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_loop) - 1) / JIT_G(hot_loop))); +} + +static int zend_jit_trace_halt_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + ir_TAILCALL(IR_VOID, ir_CONST_FC_FUNC(zend_jit_halt_op->handler)); + } else if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, IR_NULL); + ir_RETURN(IR_VOID); + } else { + ir_RETURN(ir_CONST_I32(-1)); // ZEND_VM_RETURN + } + return 1; +} + +static int zend_jit_trace_escape_stub(zend_jit_ctx *jit) +{ + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(1)); // ZEND_VM_ENTER + } + + return 1; +} + +static int zend_jit_trace_exit_stub(zend_jit_ctx *jit) +{ + ir_ref ref, ret, if_zero, addr; + + if (GCC_GLOBAL_REGS) { + // EX(opline) = opline + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + + ret = ir_EXITCALL(ir_CONST_FC_FUNC(zend_jit_trace_exit)); + + if_zero = ir_IF(ir_EQ(ret, ir_CONST_I32(0))); + + ir_IF_TRUE(if_zero); + + if (GCC_GLOBAL_REGS) { + ref = jit_EG(current_execute_data); + jit_STORE_FP(jit, ir_LOAD_A(ref)); + ref = ir_LOAD_A(jit_EX(opline)); + jit_STORE_IP(jit, ref); + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(1)); // ZEND_VM_ENTER + } + + ir_IF_FALSE(if_zero); + + ir_GUARD(ir_GE(ret, ir_CONST_I32(0)), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + + ref = jit_EG(current_execute_data); + jit_STORE_FP(jit, ir_LOAD_A(ref)); + + if (GCC_GLOBAL_REGS) { + ref = ir_LOAD_A(jit_EX(opline)); + jit_STORE_IP(jit, ref); + } + + // check for interrupt (try to avoid this ???) + if (!zend_jit_check_timeout(jit, NULL, NULL)) { + return 0; + } + + addr = zend_jit_orig_opline_handler(jit); + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, addr); + } else { +#if defined(IR_TARGET_X86) + addr = ir_CAST_FC_FUNC(addr); +#endif + ref = ir_CALL_1(IR_I32, addr, jit_FP(jit)); + ir_GUARD(ir_GE(ref, ir_CONST_I32(0)), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + ir_RETURN(ir_CONST_I32(1)); // ZEND_VM_ENTER + } + + return 1; +} + +static int zend_jit_undefined_offset_stub(zend_jit_ctx *jit) +{ + ir_ref opline = ir_LOAD_A(jit_EX(opline)); + ir_ref ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, result.var))); + ir_ref if_const, end1, ref1; + + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + jit_set_Z_TYPE_INFO_ref(jit, ir_ADD_A(jit_FP(jit), ref), ir_CONST_U32(IS_NULL)); + + if_const = ir_IF(ir_EQ(ir_LOAD_U8(ir_ADD_OFFSET(opline, offsetof(zend_op, op2_type))), ir_CONST_U8(IS_CONST))); + + ir_IF_TRUE(if_const); +#if ZEND_USE_ABS_CONST_ADDR + ref1 = ir_LOAD_A(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.zv))); +#else + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.constant))); + if (sizeof(void*) == 8) { + ref = ir_SEXT_A(ref); + } + ref1 = ir_ADD_A(ref, opline); +#endif + + end1 = ir_END(); + + ir_IF_FALSE(if_const); + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref, ref1); + + ref = jit_Z_LVAL_ref(jit, ref); + ir_CALL_3(IR_VOID, ir_CONST_FUNC(zend_error), + ir_CONST_U8(E_WARNING), + ir_CONST_ADDR("Undefined array key " ZEND_LONG_FMT), + ref); + ir_RETURN(IR_VOID); + + return 1; +} + +static int zend_jit_undefined_key_stub(zend_jit_ctx *jit) +{ + ir_ref opline = ir_LOAD_A(jit_EX(opline)); + ir_ref ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, result.var))); + ir_ref if_const, end1, ref1; + + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + jit_set_Z_TYPE_INFO_ref(jit, ir_ADD_A(jit_FP(jit), ref), ir_CONST_U32(IS_NULL)); + + if_const = ir_IF(ir_EQ(ir_LOAD_U8(ir_ADD_OFFSET(opline, offsetof(zend_op, op2_type))), ir_CONST_U8(IS_CONST))); + + ir_IF_TRUE(if_const); +#if ZEND_USE_ABS_CONST_ADDR + ref1 = ir_LOAD_A(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.zv))); +#else + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.constant))); + if (sizeof(void*) == 8) { + ref = ir_SEXT_A(ref); + } + ref1 = ir_ADD_A(ref, opline); +#endif + + end1 = ir_END(); + + ir_IF_FALSE(if_const); + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref, ref1); + + ref = ir_ADD_OFFSET(jit_Z_PTR_ref(jit, ref), offsetof(zend_string, val)); + ir_CALL_3(IR_VOID, ir_CONST_FUNC(zend_error), + ir_CONST_U8(E_WARNING), + ir_CONST_ADDR("Undefined array key \"%s\""), + ref); + ir_RETURN(IR_VOID); + + return 1; +} + +static int zend_jit_cannot_add_element_stub(zend_jit_ctx *jit) +{ + ir_ref opline = ir_LOAD_A(jit_EX(opline)); + ir_ref ref, if_result_used; + + if_result_used = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(opline, offsetof(zend_op, result_type))), + ir_CONST_U8(IS_TMP_VAR|IS_VAR))); + ir_IF_TRUE(if_result_used); + + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, result.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + jit_set_Z_TYPE_INFO_ref(jit, ir_ADD_A(jit_FP(jit), ref), ir_CONST_U32(IS_UNDEF)); + ir_MERGE_WITH_EMPTY_FALSE(if_result_used); + + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_throw_error), + IR_NULL, + ir_CONST_ADDR("Cannot add element to the array as the next element is already occupied")); + ir_RETURN(IR_VOID); + + return 1; +} + +static int zend_jit_assign_const_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_CONST, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static int zend_jit_assign_tmp_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_TMP_VAR, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static int zend_jit_assign_var_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_VAR, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static int zend_jit_assign_cv_noref_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN/*|MAY_BE_UNDEF*/; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_CV, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static int zend_jit_new_array_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + ir_ref ref = ir_CALL(IR_ADDR, ir_CONST_FC_FUNC(_zend_new_array_0)); + + jit_set_Z_PTR(jit, var_addr, ref); + jit_set_Z_TYPE_INFO(jit, var_addr, IS_ARRAY_EX); + ir_RETURN(ref); + return 1; +} + +static int zend_jit_assign_cv_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF/*|MAY_BE_UNDEF*/; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_CV, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static void zend_jit_init_ctx(zend_jit_ctx *jit, uint32_t flags) +{ +#if defined (__CET__) && (__CET__ & 1) != 0 + flags |= IR_GEN_ENDBR; +#endif + flags |= IR_OPT_FOLDING | IR_OPT_CFG | IR_OPT_CODEGEN | IR_HAS_CALLS; + + ir_init(&jit->ctx, flags, 256, 1024); + jit->ctx.ret_type = -1; + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + if (JIT_G(opt_flags) & allowed_opt_flags & ZEND_JIT_CPU_AVX) { + jit->ctx.mflags |= IR_X86_AVX; + } +#elif defined(IR_TARGET_AARCH64) + jit->ctx.get_veneer = zend_jit_get_veneer; + jit->ctx.set_veneer = zend_jit_set_veneer; +#endif + + jit->ctx.fixed_regset = (1<ctx.flags |= IR_NO_STACK_COMBINE; + if (zend_jit_vm_kind == ZEND_VM_KIND_CALL) { + jit->ctx.flags |= IR_FUNCTION; + /* Stack must be 16 byte aligned */ + /* TODO: select stack size ??? */ +#if defined(IR_TARGET_AARCH64) + jit->ctx.fixed_stack_frame_size = sizeof(void*) * 16; /* 10 saved registers and 6 spill slots (8 bytes) */ +#elif defined(_WIN64) + jit->ctx.fixed_stack_frame_size = sizeof(void*) * 11; /* 8 saved registers and 3 spill slots (8 bytes) */ +#elif defined(IR_TARGET_X86_64) + jit->ctx.fixed_stack_frame_size = sizeof(void*) * 9; /* 6 saved registers and 3 spill slots (8 bytes) */ +#else /* IR_TARGET_x86 */ + jit->ctx.fixed_stack_frame_size = sizeof(void*) * 11; /* 4 saved registers and 7 spill slots (4 bytes) */ +#endif + if (GCC_GLOBAL_REGS) { + jit->ctx.fixed_save_regset = IR_REGSET_PRESERVED & ~((1<ctx.fixed_save_regset = IR_REGSET_PRESERVED; +//#ifdef _WIN64 +// jit->ctx.fixed_save_regset &= 0xffff; // TODO: don't save FP registers ??? +//#endif + } + jit->ctx.fixed_call_stack_size = 16; + } else { +#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE + jit->ctx.fixed_stack_red_zone = ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE; + if (jit->ctx.fixed_stack_red_zone > 16) { + jit->ctx.fixed_stack_frame_size = jit->ctx.fixed_stack_red_zone - 16; + jit->ctx.fixed_call_stack_size = 16; + } + jit->ctx.flags |= IR_MERGE_EMPTY_ENTRIES; +#else + jit->ctx.fixed_stack_red_zone = 0; + jit->ctx.fixed_stack_frame_size = 32; /* 4 spill slots (8 bytes) or 8 spill slots (4 bytes) */ + jit->ctx.fixed_call_stack_size = 16; +#endif +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + jit->ctx.fixed_regset |= (1<ctx.snapshot_create = (ir_snapshot_create_t)jit_SNAPSHOT; + + jit->op_array = NULL; + jit->current_op_array = NULL; + jit->ssa = NULL; + jit->name = NULL; + jit->last_valid_opline = NULL; + jit->use_last_valid_opline = 0; + jit->track_last_valid_opline = 0; + jit->reuse_ip = 0; + jit->delayed_call_level = 0; + delayed_call_chain = 0; + jit->b = -1; +#ifdef ZTS + jit->tls = IR_UNUSED; +#endif + jit->fp = IR_UNUSED; + jit->trace_loop_ref = IR_UNUSED; + jit->return_inputs = IR_UNUSED; + jit->bb_start_ref = NULL; + jit->bb_predecessors = NULL; + jit->bb_edges = NULL; + jit->trace = NULL; + jit->ra = NULL; + jit->delay_var = -1; + jit->delay_refs = NULL; + jit->eg_exception_addr = 0; + zend_hash_init(&jit->addr_hash, 64, NULL, NULL, 0); + memset(jit->stub_addr, 0, sizeof(jit->stub_addr)); + + ir_START(); +} + +static int zend_jit_free_ctx(zend_jit_ctx *jit) +{ + if (jit->name) { + zend_string_release(jit->name); + } + zend_hash_destroy(&jit->addr_hash); + ir_free(&jit->ctx); + return 1; +} + +static void *zend_jit_ir_compile(ir_ctx *ctx, size_t *size, const char *name) +{ + void *entry; + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_SRC) { + if (name) fprintf(stderr, "%s: ; after folding\n", name); + ir_save(ctx, stderr); + } + +#if ZEND_DEBUG + ir_check(ctx); +#endif + + ir_build_def_use_lists(ctx); + +#if ZEND_DEBUG + ir_check(ctx); +#endif + +#if 1 + ir_sccp(ctx); +#endif + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_AFTER_SCCP) { + if (name) fprintf(stderr, "%s: ; after SCCP\n", name); + ir_save(ctx, stderr); + } + + ir_build_cfg(ctx); + ir_build_dominators_tree(ctx); + ir_find_loops(ctx); + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_AFTER_SCCP) { + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CFG) { + ir_dump_cfg(ctx, stderr); + } + } + + ir_gcm(ctx); + ir_schedule(ctx); + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_AFTER_SCHEDULE) { + if (name) fprintf(stderr, "%s: ; after schedule\n", name); + ir_save(ctx, stderr); + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CFG) { + ir_dump_cfg(ctx, stderr); + } + } + + ir_match(ctx); + ctx->flags &= ~IR_USE_FRAME_POINTER; /* don't use FRAME_POINTER even with ALLOCA, TODO: cleanup this ??? */ + ir_assign_virtual_registers(ctx); + ir_compute_live_ranges(ctx); + ir_coalesce(ctx); + ir_reg_alloc(ctx); + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_AFTER_REGS) { + if (name) fprintf(stderr, "%s: ; after register allocation\n", name); + ir_save(ctx, stderr); + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CFG) { + ir_dump_cfg(ctx, stderr); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_REGS) { + ir_dump_live_ranges(ctx, stderr); + } + } + + ir_schedule_blocks(ctx); + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_IR_FINAL|ZEND_JIT_DEBUG_IR_CODEGEN)) { + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CODEGEN) { + if (name) fprintf(stderr, "%s: ; codegen\n", name); + ir_dump_codegen(ctx, stderr); + } else { + if (name) fprintf(stderr, "%s: ; final\n", name); + ir_save(ctx, stderr); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CFG) { + ir_dump_cfg(ctx, stderr); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_REGS) { + ir_dump_live_ranges(ctx, stderr); + } + } + +#if ZEND_DEBUG + ir_check(ctx); +#endif + + ctx->code_buffer = *dasm_ptr; + ctx->code_buffer_size = (char*)dasm_end - (char*)*dasm_ptr; + + entry = ir_emit_code(ctx, size); + if (entry) { + *dasm_ptr = (char*)entry + ZEND_MM_ALIGNED_SIZE_EX(*size, 16); + } + +#if defined(IR_TARGET_AARCH64) + if (ctx->veneers_size) { + zend_jit_commit_veneers(); + *size -= ctx->veneers_size; + } +#endif + + return entry; +} + +static int zend_jit_setup_stubs(void) +{ + zend_jit_ctx jit; + void *entry; + size_t size; + uint32_t i; + + for (i = 0; i < sizeof(zend_jit_stubs)/sizeof(zend_jit_stubs[0]); i++) { + zend_jit_init_ctx(&jit, zend_jit_stubs[i].flags); + + if (!zend_jit_stubs[i].stub(&jit)) { + zend_jit_free_ctx(&jit); + zend_jit_stub_handlers[i] = NULL; + continue; + } + + entry = zend_jit_ir_compile(&jit.ctx, &size, zend_jit_stubs[i].name); + if (!entry) { + zend_jit_free_ctx(&jit); + return 0; + } + + zend_jit_stub_handlers[i] = entry; + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS|ZEND_JIT_DEBUG_GDB|ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP)) { +#ifdef HAVE_CAPSTONE + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS)) { + ir_disasm_add_symbol(zend_jit_stubs[i].name, (uintptr_t)entry, size); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_ASM_STUBS) { + ir_disasm(zend_jit_stubs[i].name, + entry, size, (JIT_G(debug) & ZEND_JIT_DEBUG_ASM_ADDR) != 0, &jit.ctx, stderr); + } +#endif +#ifndef _WIN32 + if (JIT_G(debug) & ZEND_JIT_DEBUG_GDB) { +// ir_mem_unprotect(entry, size); + ir_gdb_register(zend_jit_stubs[i].name, entry, size, 0, 0); +// ir_mem_protect(entry, size); + } + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP)) { + ir_perf_map_register(zend_jit_stubs[i].name, entry, size); + if (JIT_G(debug) & ZEND_JIT_DEBUG_PERF_DUMP) { + ir_perf_jitdump_register(zend_jit_stubs[i].name, entry, size); + } + } +#endif + } + zend_jit_free_ctx(&jit); + } + return 1; +} + +#define REGISTER_HELPER(n) \ + ir_disasm_add_symbol(#n, (uint64_t)(uintptr_t)n, sizeof(void*)); +#define REGISTER_DATA(n) \ + ir_disasm_add_symbol(#n, (uint64_t)(uintptr_t)&n, sizeof(n)); + +static void zend_jit_setup_disasm(void) +{ +#ifdef HAVE_CAPSTONE + ir_disasm_init(); + + if (zend_vm_kind() == ZEND_VM_KIND_HYBRID) { + zend_op opline; + + memset(&opline, 0, sizeof(opline)); + + opline.opcode = ZEND_DO_UCALL; + opline.result_type = IS_UNUSED; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_UCALL_SPEC_RETVAL_UNUSED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_UCALL; + opline.result_type = IS_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_UCALL_SPEC_RETVAL_USED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_FCALL_BY_NAME; + opline.result_type = IS_UNUSED; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_FCALL_BY_NAME_SPEC_RETVAL_UNUSED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_FCALL_BY_NAME; + opline.result_type = IS_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_FCALL_BY_NAME_SPEC_RETVAL_USED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_FCALL; + opline.result_type = IS_UNUSED; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_FCALL_SPEC_RETVAL_UNUSED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_FCALL; + opline.result_type = IS_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_FCALL_SPEC_RETVAL_USED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_RETURN; + opline.op1_type = IS_CONST; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_RETURN_SPEC_CONST_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_RETURN; + opline.op1_type = IS_TMP_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_RETURN_SPEC_TMP_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_RETURN; + opline.op1_type = IS_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_RETURN_SPEC_VAR_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_RETURN; + opline.op1_type = IS_CV; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_RETURN_SPEC_CV_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + ir_disasm_add_symbol("ZEND_HYBRID_HALT_LABEL", (uint64_t)(uintptr_t)zend_jit_halt_op->handler, sizeof(void*)); + } + + REGISTER_DATA(zend_jit_profile_counter); + + REGISTER_HELPER(zend_runtime_jit); + REGISTER_HELPER(zend_jit_hot_func); + REGISTER_HELPER(zend_jit_trace_hot_root); + REGISTER_HELPER(zend_jit_trace_exit); + + REGISTER_HELPER(zend_jit_array_free); + REGISTER_HELPER(zend_jit_undefined_op_helper); + REGISTER_HELPER(zend_jit_pre_inc_typed_ref); + REGISTER_HELPER(zend_jit_pre_dec_typed_ref); + REGISTER_HELPER(zend_jit_post_inc_typed_ref); + REGISTER_HELPER(zend_jit_post_dec_typed_ref); + REGISTER_HELPER(zend_jit_pre_inc); + REGISTER_HELPER(zend_jit_pre_dec); + REGISTER_HELPER(zend_jit_add_arrays_helper); + REGISTER_HELPER(zend_jit_fast_assign_concat_helper); + REGISTER_HELPER(zend_jit_fast_concat_helper); + REGISTER_HELPER(zend_jit_fast_concat_tmp_helper); + REGISTER_HELPER(zend_jit_assign_op_to_typed_ref_tmp); + REGISTER_HELPER(zend_jit_assign_op_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_const_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_tmp_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_var_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_cv_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_const_to_typed_ref2); + REGISTER_HELPER(zend_jit_assign_tmp_to_typed_ref2); + REGISTER_HELPER(zend_jit_assign_var_to_typed_ref2); + REGISTER_HELPER(zend_jit_assign_cv_to_typed_ref2); + REGISTER_HELPER(zend_jit_check_constant); + REGISTER_HELPER(zend_jit_get_constant); + REGISTER_HELPER(zend_jit_int_extend_stack_helper); + REGISTER_HELPER(zend_jit_extend_stack_helper); + REGISTER_HELPER(zend_jit_init_func_run_time_cache_helper); + REGISTER_HELPER(zend_jit_find_func_helper); + REGISTER_HELPER(zend_jit_find_ns_func_helper); + REGISTER_HELPER(zend_jit_unref_helper); + REGISTER_HELPER(zend_jit_invalid_method_call); + REGISTER_HELPER(zend_jit_invalid_method_call_tmp); + REGISTER_HELPER(zend_jit_find_method_helper); + REGISTER_HELPER(zend_jit_find_method_tmp_helper); + REGISTER_HELPER(zend_jit_push_static_metod_call_frame); + REGISTER_HELPER(zend_jit_push_static_metod_call_frame_tmp); + REGISTER_HELPER(zend_jit_free_trampoline_helper); + REGISTER_HELPER(zend_jit_verify_return_slow); + REGISTER_HELPER(zend_jit_deprecated_helper); + REGISTER_HELPER(zend_jit_copy_extra_args_helper); + REGISTER_HELPER(zend_jit_vm_stack_free_args_helper); + REGISTER_HELPER(zend_free_extra_named_params); + REGISTER_HELPER(zend_jit_free_call_frame); + REGISTER_HELPER(zend_jit_exception_in_interrupt_handler_helper); + REGISTER_HELPER(zend_jit_verify_arg_slow); + REGISTER_HELPER(zend_missing_arg_error); + REGISTER_HELPER(zend_jit_only_vars_by_reference); + REGISTER_HELPER(zend_jit_leave_func_helper); + REGISTER_HELPER(zend_jit_leave_nested_func_helper); + REGISTER_HELPER(zend_jit_leave_top_func_helper); + REGISTER_HELPER(zend_jit_fetch_global_helper); + REGISTER_HELPER(zend_jit_hash_index_lookup_rw_no_packed); + REGISTER_HELPER(zend_jit_hash_index_lookup_rw); + REGISTER_HELPER(zend_jit_hash_lookup_rw); + REGISTER_HELPER(zend_jit_symtable_find); + REGISTER_HELPER(zend_jit_symtable_lookup_w); + REGISTER_HELPER(zend_jit_symtable_lookup_rw); + REGISTER_HELPER(zend_jit_fetch_dim_r_helper); + REGISTER_HELPER(zend_jit_fetch_dim_is_helper); + REGISTER_HELPER(zend_jit_fetch_dim_isset_helper); + REGISTER_HELPER(zend_jit_fetch_dim_rw_helper); + REGISTER_HELPER(zend_jit_fetch_dim_w_helper); + REGISTER_HELPER(zend_jit_fetch_dim_str_offset_r_helper); + REGISTER_HELPER(zend_jit_fetch_dim_str_r_helper); + REGISTER_HELPER(zend_jit_fetch_dim_str_is_helper); + REGISTER_HELPER(zend_jit_fetch_dim_obj_r_helper); + REGISTER_HELPER(zend_jit_fetch_dim_obj_is_helper); + REGISTER_HELPER(zend_jit_invalid_array_access); + REGISTER_HELPER(zend_jit_zval_array_dup); + REGISTER_HELPER(zend_jit_prepare_assign_dim_ref); + REGISTER_HELPER(zend_jit_fetch_dim_obj_w_helper); + REGISTER_HELPER(zend_jit_fetch_dim_obj_rw_helper); + REGISTER_HELPER(zend_jit_isset_dim_helper); + REGISTER_HELPER(zend_jit_assign_dim_helper); + REGISTER_HELPER(zend_jit_assign_dim_op_helper); + REGISTER_HELPER(zend_jit_fetch_obj_w_slow); + REGISTER_HELPER(zend_jit_fetch_obj_r_slow); + REGISTER_HELPER(zend_jit_fetch_obj_is_slow); + REGISTER_HELPER(zend_jit_fetch_obj_r_dynamic); + REGISTER_HELPER(zend_jit_fetch_obj_is_dynamic); + REGISTER_HELPER(zend_jit_check_array_promotion); + REGISTER_HELPER(zend_jit_create_typed_ref); + REGISTER_HELPER(zend_jit_invalid_property_write); + REGISTER_HELPER(zend_jit_invalid_property_read); + REGISTER_HELPER(zend_jit_extract_helper); + REGISTER_HELPER(zend_jit_invalid_property_assign); + REGISTER_HELPER(zend_jit_assign_to_typed_prop); + REGISTER_HELPER(zend_jit_assign_obj_helper); + REGISTER_HELPER(zend_jit_invalid_property_assign_op); + REGISTER_HELPER(zend_jit_assign_op_to_typed_prop); + REGISTER_HELPER(zend_jit_assign_obj_op_helper); + REGISTER_HELPER(zend_jit_invalid_property_incdec); + REGISTER_HELPER(zend_jit_inc_typed_prop); + REGISTER_HELPER(zend_jit_dec_typed_prop); + REGISTER_HELPER(zend_jit_pre_inc_typed_prop); + REGISTER_HELPER(zend_jit_post_inc_typed_prop); + REGISTER_HELPER(zend_jit_pre_dec_typed_prop); + REGISTER_HELPER(zend_jit_post_dec_typed_prop); + REGISTER_HELPER(zend_jit_pre_inc_obj_helper); + REGISTER_HELPER(zend_jit_post_inc_obj_helper); + REGISTER_HELPER(zend_jit_pre_dec_obj_helper); + REGISTER_HELPER(zend_jit_post_dec_obj_helper); + REGISTER_HELPER(zend_jit_rope_end); + +#ifndef ZTS + REGISTER_DATA(EG(current_execute_data)); + REGISTER_DATA(EG(exception)); + REGISTER_DATA(EG(opline_before_exception)); + REGISTER_DATA(EG(vm_interrupt)); + REGISTER_DATA(EG(timed_out)); + REGISTER_DATA(EG(uninitialized_zval)); + REGISTER_DATA(EG(zend_constants)); + REGISTER_DATA(EG(jit_trace_num)); + REGISTER_DATA(EG(vm_stack_top)); + REGISTER_DATA(EG(vm_stack_end)); + REGISTER_DATA(EG(exception_op)); + REGISTER_DATA(EG(symbol_table)); + + REGISTER_DATA(CG(map_ptr_base)); +#endif +#endif +} + +static int zend_jit_calc_trace_prologue_size(void) +{ + zend_jit_ctx jit_ctx; + zend_jit_ctx *jit = &jit_ctx; + void *entry; + size_t size; + + zend_jit_init_ctx(jit, (zend_jit_vm_kind == ZEND_VM_KIND_CALL) ? 0 : IR_START_BR_TARGET); + + if (!GCC_GLOBAL_REGS) { + ir_ref ref = ir_PARAM(IR_ADDR, "execute_data", 1); + jit_STORE_FP(jit, ref); + jit->ctx.flags |= IR_FASTCALL_FUNC; + } + + ir_UNREACHABLE(); + + entry = zend_jit_ir_compile(&jit->ctx, &size, "JIT$trace_prologue"); + zend_jit_free_ctx(jit); + + if (!entry) { + return 0; + } + + zend_jit_trace_prologue_size = size; + return 1; +} + +#if !ZEND_WIN32 && !defined(IR_TARGET_AARCH64) +static uintptr_t zend_jit_hybrid_vm_sp_adj = 0; + +typedef struct _Unwind_Context _Unwind_Context; +typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *); +extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *); +extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); + +typedef struct _zend_jit_unwind_arg { + int cnt; + uintptr_t cfa[3]; +} zend_jit_unwind_arg; + +static int zend_jit_unwind_cb(_Unwind_Context *ctx, void *a) +{ + zend_jit_unwind_arg *arg = (zend_jit_unwind_arg*)a; + arg->cfa[arg->cnt] = _Unwind_GetCFA(ctx); + arg->cnt++; + if (arg->cnt == 3) { + return 5; // _URC_END_OF_STACK + } + return 0; // _URC_NO_REASON; +} + +static void ZEND_FASTCALL zend_jit_touch_vm_stack_data(void *vm_stack_data) +{ + zend_jit_unwind_arg arg; + + memset(&arg, 0, sizeof(arg)); + _Unwind_Backtrace(zend_jit_unwind_cb, &arg); + if (arg.cnt == 3) { + zend_jit_hybrid_vm_sp_adj = arg.cfa[2] - arg.cfa[1]; + } +} + +extern void (ZEND_FASTCALL *zend_touch_vm_stack_data)(void *vm_stack_data); + +static zend_never_inline void zend_jit_set_sp_adj_vm(void) +{ + void (ZEND_FASTCALL *orig_zend_touch_vm_stack_data)(void *); + + orig_zend_touch_vm_stack_data = zend_touch_vm_stack_data; + zend_touch_vm_stack_data = zend_jit_touch_vm_stack_data; + execute_ex(NULL); // set sp_adj[SP_ADJ_VM] + zend_touch_vm_stack_data = orig_zend_touch_vm_stack_data; +} +#endif + +static int zend_jit_setup(void) +{ +#if defined(IR_TARGET_X86) + if (!zend_cpu_supports_sse2()) { + zend_error(E_CORE_ERROR, "CPU doesn't support SSE2"); + return FAILURE; + } +#endif +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + allowed_opt_flags = 0; + if (zend_cpu_supports_avx()) { + allowed_opt_flags |= ZEND_JIT_CPU_AVX; + } +#endif +#ifdef ZTS +#if defined(IR_TARGET_AARCH64) + tsrm_ls_cache_tcb_offset = tsrm_get_ls_cache_tcb_offset(); + ZEND_ASSERT(tsrm_ls_cache_tcb_offset != 0); +# elif defined(_WIN64) + tsrm_tls_index = _tls_index * sizeof(void*); + + /* To find offset of "_tsrm_ls_cache" in TLS segment we perform a linear scan of local TLS memory */ + /* Probably, it might be better solution */ + do { + void ***tls_mem = ((void****)__readgsqword(0x58))[_tls_index]; + void *val = _tsrm_ls_cache; + size_t offset = 0; + size_t size = (char*)&_tls_end - (char*)&_tls_start; + + while (offset < size) { + if (*tls_mem == val) { + tsrm_tls_offset = offset; + break; + } + tls_mem++; + offset += sizeof(void*); + } + if (offset >= size) { + // TODO: error message ??? + return FAILURE; + } + } while(0); +# elif ZEND_WIN32 + tsrm_tls_index = _tls_index * sizeof(void*); + + /* To find offset of "_tsrm_ls_cache" in TLS segment we perform a linear scan of local TLS memory */ + /* Probably, it might be better solution */ + do { + void ***tls_mem = ((void****)__readfsdword(0x2c))[_tls_index]; + void *val = _tsrm_ls_cache; + size_t offset = 0; + size_t size = (char*)&_tls_end - (char*)&_tls_start; + + while (offset < size) { + if (*tls_mem == val) { + tsrm_tls_offset = offset; + break; + } + tls_mem++; + offset += sizeof(void*); + } + if (offset >= size) { + // TODO: error message ??? + return FAILURE; + } + } while(0); +# elif defined(__APPLE__) && defined(__x86_64__) + tsrm_ls_cache_tcb_offset = tsrm_get_ls_cache_tcb_offset(); + if (tsrm_ls_cache_tcb_offset == 0) { + size_t *ti; + __asm__( + "leaq __tsrm_ls_cache(%%rip),%0" + : "=r" (ti)); + tsrm_tls_offset = ti[2]; + tsrm_tls_index = ti[1] * 8; + } +# elif defined(__GNUC__) && defined(__x86_64__) + tsrm_ls_cache_tcb_offset = tsrm_get_ls_cache_tcb_offset(); + if (tsrm_ls_cache_tcb_offset == 0) { +#if defined(__has_attribute) && __has_attribute(tls_model) && !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__MUSL__) + size_t ret; + + asm ("movq _tsrm_ls_cache@gottpoff(%%rip),%0" + : "=r" (ret)); + tsrm_ls_cache_tcb_offset = ret; +#else + size_t *ti; + + __asm__( + "leaq _tsrm_ls_cache@tlsgd(%%rip), %0\n" + : "=a" (ti)); + tsrm_tls_offset = ti[1]; + tsrm_tls_index = ti[0] * 16; +#endif + } +# elif defined(__GNUC__) && defined(__i386__) + tsrm_ls_cache_tcb_offset = tsrm_get_ls_cache_tcb_offset(); + if (tsrm_ls_cache_tcb_offset == 0) { +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__MUSL__) + size_t ret; + + asm ("leal _tsrm_ls_cache@ntpoff,%0\n" + : "=a" (ret)); + tsrm_ls_cache_tcb_offset = ret; +#else + size_t *ti, _ebx, _ecx, _edx; + + __asm__( + "call 1f\n" + ".subsection 1\n" + "1:\tmovl (%%esp), %%ebx\n\t" + "ret\n" + ".previous\n\t" + "addl $_GLOBAL_OFFSET_TABLE_, %%ebx\n\t" + "leal _tsrm_ls_cache@tlsldm(%%ebx), %0\n\t" + "call ___tls_get_addr@plt\n\t" + "leal _tsrm_ls_cache@tlsldm(%%ebx), %0\n" + : "=a" (ti), "=&b" (_ebx), "=&c" (_ecx), "=&d" (_edx)); + tsrm_tls_offset = ti[1]; + tsrm_tls_index = ti[0] * 8; +#endif + } +# endif +#endif + +#if !ZEND_WIN32 && !defined(IR_TARGET_AARCH64) + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + zend_jit_set_sp_adj_vm(); // set zend_jit_hybrid_vm_sp_adj + } +#endif + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS)) { + zend_jit_setup_disasm(); + } + +#ifndef _WIN32 + if (JIT_G(debug) & ZEND_JIT_DEBUG_PERF_DUMP) { + ir_perf_jitdump_open(); + } + +#endif + zend_long debug = JIT_G(debug); + if (!(debug & ZEND_JIT_DEBUG_ASM_STUBS)) { + JIT_G(debug) &= ~(ZEND_JIT_DEBUG_IR_SRC|ZEND_JIT_DEBUG_IR_FINAL|ZEND_JIT_DEBUG_IR_CFG|ZEND_JIT_DEBUG_IR_REGS| + ZEND_JIT_DEBUG_IR_CODEGEN| + ZEND_JIT_DEBUG_IR_AFTER_SCCP|ZEND_JIT_DEBUG_IR_AFTER_SCHEDULE|ZEND_JIT_DEBUG_IR_AFTER_REGS); + } + + if (!zend_jit_calc_trace_prologue_size()) { + JIT_G(debug) = debug; + return FAILURE; + } + if (!zend_jit_setup_stubs()) { + JIT_G(debug) = debug; + return FAILURE; + } + JIT_G(debug) = debug; + + return SUCCESS; +} + +static void zend_jit_shutdown_ir(void) +{ +#ifndef _WIN32 + if (JIT_G(debug) & ZEND_JIT_DEBUG_PERF_DUMP) { + ir_perf_jitdump_close(); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_GDB) { + ir_gdb_unregister_all(); + } +#endif +#ifdef HAVE_CAPSTONE + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS)) { + ir_disasm_free(); + } +#endif +} + +/* PHP control flow reconstruction helpers */ +static ir_ref jit_IF_ex(zend_jit_ctx *jit, ir_ref condition, ir_ref true_block) +{ + ir_ref ref = ir_IF(condition); + /* op3 is used as a temporary storage for PHP BB number to reconstruct PHP control flow. + * + * It's used in jit_IF_TRUE_FALSE_ex() to select IF_TRUE or IF_FALSE instructions + * to start target block + */ + ir_set_op(&jit->ctx, ref, 3, true_block); + return ref; +} + +static void jit_IF_TRUE_FALSE_ex(zend_jit_ctx *jit, ir_ref if_ref, ir_ref true_block) +{ + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + ZEND_ASSERT(if_ref); + ZEND_ASSERT(jit->ctx.ir_base[if_ref].op == IR_IF); + ZEND_ASSERT(jit->ctx.ir_base[if_ref].op3); + if (jit->ctx.ir_base[if_ref].op3 == true_block) { + ir_IF_TRUE(if_ref); + } else { + ir_IF_FALSE(if_ref); + } +} + +static void _zend_jit_add_predecessor_ref(zend_jit_ctx *jit, int b, int pred, ir_ref ref) +{ + int i, *p; + zend_basic_block *bb; + ir_ref *r, header; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + bb = &jit->ssa->cfg.blocks[b]; + p = &jit->ssa->cfg.predecessors[bb->predecessor_offset]; + r = &jit->bb_edges[jit->bb_predecessors[b]]; + for (i = 0; i < bb->predecessors_count; i++, p++, r++) { + if (*p == pred) { + ZEND_ASSERT(*r == IR_UNUSED || *r == ref); + header = jit->bb_start_ref[b]; + if (header) { + /* this is back edge */ + ZEND_ASSERT(jit->ctx.ir_base[header].op == IR_LOOP_BEGIN); + if (jit->ctx.ir_base[ref].op == IR_END) { + jit->ctx.ir_base[ref].op = IR_LOOP_END; + } else if (jit->ctx.ir_base[ref].op == IR_IF) { + jit_IF_TRUE_FALSE_ex(jit, ref, b); + ref = ir_LOOP_END(); + } else if (jit->ctx.ir_base[ref].op == IR_UNREACHABLE) { + ir_BEGIN(ref); + ref = ir_LOOP_END(); + } else { + ZEND_UNREACHABLE(); + } + ir_MERGE_SET_OP(header, i + 1, ref); + } + *r = ref; + return; + } + } + ZEND_UNREACHABLE(); +} + +static void _zend_jit_merge_smart_branch_inputs(zend_jit_ctx *jit, + uint32_t true_label, + uint32_t false_label, + ir_ref true_inputs, + ir_ref false_inputs) +{ + ir_ref true_path = IR_UNUSED, false_path = IR_UNUSED; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + if (true_inputs) { + ZEND_ASSERT(jit->ctx.ir_base[true_inputs].op == IR_END); + if (!jit->ctx.ir_base[true_inputs].op2) { + true_path = true_inputs; + } else { + ir_MERGE_list(true_inputs); + true_path = ir_END(); + } + } + if (false_inputs) { + ZEND_ASSERT(jit->ctx.ir_base[false_inputs].op == IR_END); + if (!jit->ctx.ir_base[false_inputs].op2) { + false_path = false_inputs; + } else { + ir_MERGE_list(false_inputs); + false_path = ir_END(); + } + } + + if (true_label == false_label && true_path && false_path) { + ir_MERGE_2(true_path, false_path); + _zend_jit_add_predecessor_ref(jit, true_label, jit->b, ir_END()); + } else { + if (true_path) { + _zend_jit_add_predecessor_ref(jit, true_label, jit->b, true_path); + } + if (false_path) { + _zend_jit_add_predecessor_ref(jit, false_label, jit->b, false_path); + } + } + + jit->b = -1; +} + +static void _zend_jit_fix_merges(zend_jit_ctx *jit) +{ + int i, count; + ir_ref j, k, n, *p, *q, *r; + ir_ref ref; + ir_insn *insn, *phi; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + count = jit->ssa->cfg.blocks_count; + for (i = 0, p = jit->bb_start_ref; i < count; i++, p++) { + ref = *p; + if (ref) { + insn = &jit->ctx.ir_base[ref]; + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + n = insn->inputs_count; + /* Remove IS_UNUSED inputs */ + for (j = k = 0, q = r = insn->ops + 1; j < n; j++, q++) { + if (*q) { + if (q != r) { + *r = *q; + phi = insn + 1 + (n >> 2); + while (phi->op == IR_PI) { + phi++; + } + while (phi->op == IR_PHI) { + ir_insn_set_op(phi, k + 2, ir_insn_op(phi, j + 2)); + phi += 1 + ((n + 1) >> 2); + } + } + k++; + r++; + } + } + if (k != n) { + ir_ref n2, k2; + + if (k <= 1) { + insn->op = IR_BEGIN; + insn->inputs_count = 0; + } else { + insn->inputs_count = k; + } + n2 = 1 + (n >> 2); + k2 = 1 + (k >> 2); + while (k2 != n2) { + (insn+k2)->optx = IR_NOP; + k2++; + } + phi = insn + 1 + (n >> 2); + while (phi->op == IR_PI) { + phi++; + } + while (phi->op == IR_PHI) { + if (k <= 1) { + phi->op = IR_COPY; + phi->op1 = phi->op2; + phi->op2 = 1; + } + n2 = 1 + ((n + 1) >> 2); + k2 = 1 + ((k + 1) >> 2); + while (k2 != n2) { + (insn+k2)->optx = IR_NOP; + k2++; + } + phi += 1 + ((n + 1) >> 2); + } + } + } + } + } +} + +static void zend_jit_case_start(zend_jit_ctx *jit, int switch_b, int case_b, ir_ref switch_ref) +{ + zend_basic_block *bb = &jit->ssa->cfg.blocks[switch_b]; + const zend_op *opline = &jit->op_array->opcodes[bb->start + bb->len - 1]; + + if (opline->opcode == ZEND_SWITCH_LONG + || opline->opcode == ZEND_SWITCH_STRING + || opline->opcode == ZEND_MATCH) { + HashTable *jumptable = Z_ARRVAL_P(RT_CONSTANT(opline, opline->op2)); + const zend_op *default_opline = ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value); + int default_b = jit->ssa->cfg.map[default_opline - jit->op_array->opcodes]; + zval *zv; + ir_ref list = IR_UNUSED, idx; + bool first = 1; + + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + const zend_op *target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + int b = jit->ssa->cfg.map[target - jit->op_array->opcodes]; + + if (b == case_b) { + if (!first) { + ir_END_list(list); + } + if (HT_IS_PACKED(jumptable)) { + idx = ir_CONST_LONG(zv - jumptable->arPacked); + } else { + idx = ir_CONST_LONG((Bucket*)zv - jumptable->arData); + } + ir_CASE_VAL(switch_ref, idx); + first = 0; + } + } ZEND_HASH_FOREACH_END(); + if (default_b == case_b) { + if (!first) { + ir_END_list(list); + } + if (jit->ctx.ir_base[switch_ref].op3) { + /* op3 may contain a list of additional "default" path inputs for MATCH */ + ir_ref ref = jit->ctx.ir_base[switch_ref].op3; + jit->ctx.ir_base[switch_ref].op3 = IS_UNDEF; + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_END); + ir_ref end = ref; + while (jit->ctx.ir_base[end].op2) { + ZEND_ASSERT(jit->ctx.ir_base[end].op == IR_END); + end = jit->ctx.ir_base[end].op2; + } + jit->ctx.ir_base[end].op2 = list; + list = ref; + } + ir_CASE_DEFAULT(switch_ref); + } + if (list) { + ir_END_list(list); + ir_MERGE_list(list); + } + } else { + ZEND_UNREACHABLE(); + } +} + +static int zend_jit_bb_start(zend_jit_ctx *jit, int b) +{ + zend_basic_block *bb; + int i, n, *p, pred; + ir_ref ref, bb_start; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + ZEND_ASSERT(b < jit->ssa->cfg.blocks_count); + bb = &jit->ssa->cfg.blocks[b]; + ZEND_ASSERT((bb->flags & ZEND_BB_REACHABLE) != 0); + n = bb->predecessors_count; + + if (n == 0) { + /* pass */ + ZEND_ASSERT(jit->ctx.control); +#if ZEND_DEBUG + ref = jit->ctx.control; + ir_insn *insn = &jit->ctx.ir_base[ref]; + while (insn->op >= IR_CALL && insn->op <= IR_TRAP) { + ref = insn->op1; + insn = &jit->ctx.ir_base[ref]; + } + ZEND_ASSERT(insn->op == IR_START); + ZEND_ASSERT(ref == 1); +#endif + bb_start = 1; + if (jit->ssa->cfg.flags & ZEND_FUNC_RECURSIVE_DIRECTLY) { + /* prvent END/BEGIN merging */ + jit->ctx.control = ir_emit1(&jit->ctx, IR_BEGIN, ir_END()); + bb_start = jit->ctx.control; + } + } else if (n == 1) { + ZEND_ASSERT(!jit->ctx.control); + pred = jit->ssa->cfg.predecessors[bb->predecessor_offset]; + ref = jit->bb_edges[jit->bb_predecessors[b]]; + if (ref == IR_UNUSED) { + if (!jit->ctx.control) { + ir_BEGIN(IR_UNUSED); /* unreachable block */ + } + } else { + ir_op op = jit->ctx.ir_base[ref].op; + + if (op == IR_IF) { + if (!jit->ctx.control) { + jit_IF_TRUE_FALSE_ex(jit, ref, b); + } else { + ir_ref entry_path = ir_END(); + jit_IF_TRUE_FALSE_ex(jit, ref, b); + ir_MERGE_WITH(entry_path); + } + } else if (op == IR_SWITCH) { + zend_jit_case_start(jit, pred, b, ref); + } else { + if (!jit->ctx.control) { + ZEND_ASSERT(op == IR_END || op == IR_UNREACHABLE || op == IR_RETURN); + if ((jit->ssa->cfg.blocks[b].flags & ZEND_BB_RECV_ENTRY) + && (jit->ssa->cfg.flags & ZEND_FUNC_RECURSIVE_DIRECTLY)) { + /* prvent END/BEGIN merging */ + jit->ctx.control = ir_emit1(&jit->ctx, IR_BEGIN, ref); + } else { + ir_BEGIN(ref); + } + } else { + ir_MERGE_WITH(ref); + } + } + } + bb_start = jit->ctx.control; + } else { + int forward_edges_count = 0; + int back_edges_count = 0; + ir_ref *pred_refs; + ir_ref entry_path = IR_UNUSED; + ALLOCA_FLAG(use_heap); + + ZEND_ASSERT(!jit->ctx.control); + if (jit->ctx.control) { + entry_path = ir_END(); + } + pred_refs = (ir_ref *)do_alloca(sizeof(ir_ref) * n, use_heap); + for (i = 0, p = jit->ssa->cfg.predecessors + bb->predecessor_offset; i < n; p++, i++) { + pred = *p; + if (jit->bb_start_ref[pred]) { + /* forward edge */ + forward_edges_count++; + ref = jit->bb_edges[jit->bb_predecessors[b] + i]; + if (ref == IR_UNUSED) { + /* dead edge */ + pred_refs[i] = IR_UNUSED; + } else { + ir_op op = jit->ctx.ir_base[ref].op; + + if (op == IR_IF) { + jit_IF_TRUE_FALSE_ex(jit, ref, b); + pred_refs[i] = ir_END(); + } else if (op == IR_SWITCH) { + zend_jit_case_start(jit, pred, b, ref); + pred_refs[i] = ir_END(); + } else { + ZEND_ASSERT(op == IR_END || op == IR_UNREACHABLE || op == IR_RETURN); + pred_refs[i] = ref; + } + } + } else { + /* backward edge */ + back_edges_count++; + pred_refs[i] = IR_UNUSED; + } + } + + if (bb->flags & ZEND_BB_LOOP_HEADER) { + ZEND_ASSERT(back_edges_count != 0); + ZEND_ASSERT(forward_edges_count != 0); + ir_MERGE_N(n, pred_refs); + jit->ctx.ir_base[jit->ctx.control].op = IR_LOOP_BEGIN; + bb_start = jit->ctx.control; + if (entry_path) { + ir_MERGE_WITH(entry_path); + } + } else { +// ZEND_ASSERT(back_edges_count != 0); + /* edges from exceptional blocks may be counted as back edges */ + ir_MERGE_N(n, pred_refs); + bb_start = jit->ctx.control; + if (entry_path) { + ir_MERGE_WITH(entry_path); + } + } + free_alloca(pred_refs, use_heap); + } + jit->b = b; + jit->bb_start_ref[b] = bb_start; + + if ((bb->flags & ZEND_BB_ENTRY) || (bb->idom >= 0 && jit->bb_start_ref[bb->idom] < jit->ctx.fold_cse_limit)) { + jit->ctx.fold_cse_limit = bb_start; + } + + return 1; +} + +static int zend_jit_bb_end(zend_jit_ctx *jit, int b) +{ + int succ; + zend_basic_block *bb; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + if (jit->b != b) { + return 1; + } + + bb = &jit->ssa->cfg.blocks[b]; + ZEND_ASSERT(bb->successors_count != 0); + if (bb->successors_count == 1) { + succ = bb->successors[0]; + } else { + const zend_op *opline = &jit->op_array->opcodes[bb->start + bb->len - 1]; + + /* Use only the following successor of SWITCH and FE_RESET_R */ + ZEND_ASSERT(opline->opcode == ZEND_SWITCH_LONG + || opline->opcode == ZEND_SWITCH_STRING + || opline->opcode == ZEND_MATCH + || opline->opcode == ZEND_FE_RESET_R); + succ = b + 1; + } + _zend_jit_add_predecessor_ref(jit, succ, b, ir_END()); + jit->b = -1; + return 1; +} + +static int jit_CMP_IP(zend_jit_ctx *jit, ir_op op, const zend_op *next_opline) +{ + ir_ref ref; + +#if 1 + if (GCC_GLOBAL_REGS) { + ref = jit_IP32(jit); + } else { + ref = ir_LOAD_U32(jit_EX(opline)); + } + ref = ir_CMP_OP(op, ref, ir_CONST_U32((uint32_t)(uintptr_t)next_opline)); +#else + if (GCC_GLOBAL_REGS) { + ref = jit_IP(jit); + } else { + ref = ir_LOAD_A(jit_EX(opline)); + } + ref = ir_CMP_OP(op, ref, ir_CONST_ADDR(next_opline)); +#endif + return ref; +} + +static int zend_jit_cond_jmp(zend_jit_ctx *jit, const zend_op *next_opline, int target_block) +{ + ir_ref ref; + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + + ZEND_ASSERT(bb->successors_count == 2); + if (bb->successors[0] == bb->successors[1]) { + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ir_END()); + jit->b = -1; + zend_jit_set_last_valid_opline(jit, next_opline); + return 1; + } + + ref = jit_IF_ex(jit, jit_CMP_IP(jit, IR_NE, next_opline), target_block); + + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ref); + + jit->b = -1; + zend_jit_set_last_valid_opline(jit, next_opline); + + return 1; +} + +static int zend_jit_set_cond(zend_jit_ctx *jit, const zend_op *next_opline, uint32_t var) +{ + ir_ref ref; + + ref = ir_ADD_U32(ir_ZEXT_U32(jit_CMP_IP(jit, IR_EQ, next_opline)), ir_CONST_U32(IS_FALSE)); + + // EX_VAR(var) = ... + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), var + offsetof(zval, u1.type_info)), ref); + + zend_jit_reset_last_valid_opline(jit); + return zend_jit_set_ip(jit, next_opline - 1); +} + +/* PHP JIT handlers */ +static void zend_jit_check_exception(zend_jit_ctx *jit) +{ + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_exception_handler)); +} + +static void zend_jit_check_exception_undef_result(zend_jit_ctx *jit, const zend_op *opline) +{ + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, + (opline->result_type & (IS_TMP_VAR|IS_VAR)) ? jit_stub_exception_handler_undef : jit_stub_exception_handler)); +} + +static void zend_jit_type_check_undef(zend_jit_ctx *jit, + ir_ref type, + uint32_t var, + const zend_op *opline, + bool check_exception, + bool in_cold_path) +{ + ir_ref if_def = ir_IF(type); + + if (!in_cold_path) { + ir_IF_FALSE_cold(if_def); + } else { + ir_IF_FALSE(if_def); + } + if (opline) { + jit_SET_EX_OPLINE(jit, opline); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(var)); + if (check_exception) { + zend_jit_check_exception(jit); + } + ir_MERGE_WITH_EMPTY_TRUE(if_def); +} + +static ir_ref zend_jit_zval_check_undef(zend_jit_ctx *jit, + ir_ref ref, + uint32_t var, + const zend_op *opline, + bool check_exception) +{ + ir_ref if_def, ref2; + + if_def = ir_IF(jit_Z_TYPE_ref(jit, ref)); + ir_IF_FALSE_cold(if_def); + + if (opline) { + jit_SET_EX_OPLINE(jit, opline); + } + + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(var)); + + if (check_exception) { + zend_jit_check_exception(jit); + } + + ref2 = jit_EG(uninitialized_zval); + + ir_MERGE_WITH_EMPTY_TRUE(if_def); + + return ir_PHI_2(IR_ADDR, ref2, ref); +} + +static void zend_jit_recv_entry(zend_jit_ctx *jit, int b) +{ + zend_basic_block *bb = &jit->ssa->cfg.blocks[b]; + int pred; + ir_ref ref; + + ZEND_ASSERT(bb->predecessors_count > 0); + + pred = jit->bb_predecessors[b]; + ref = jit->bb_edges[pred]; + + ZEND_ASSERT(ref); + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_END); + + /* Insert a MERGE block with additional ENTRY input between predecessor and this one */ + ir_ENTRY(ref, bb->start); + if (!GCC_GLOBAL_REGS) { + /* 2 is hardcoded reference to IR_PARAM */ + ZEND_ASSERT(jit->ctx.ir_base[2].op == IR_PARAM); + ZEND_ASSERT(jit->ctx.ir_base[2].op3 == 1); + jit_STORE_FP(jit, 2); + } + + ir_MERGE_WITH(ref); + jit->bb_edges[pred] = ir_END(); +} + +static void zend_jit_osr_entry(zend_jit_ctx *jit, int b) +{ + zend_basic_block *bb = &jit->ssa->cfg.blocks[b]; + ir_ref ref = ir_END(); + + /* Insert a MERGE block with additional ENTRY input between predecessor and this one */ + ir_ENTRY(ref, bb->start); + if (!GCC_GLOBAL_REGS) { + /* 2 is hardcoded reference to IR_PARAM */ + ZEND_ASSERT(jit->ctx.ir_base[2].op == IR_PARAM); + ZEND_ASSERT(jit->ctx.ir_base[2].op3 == 1); + jit_STORE_FP(jit, 2); + } + + ir_MERGE_WITH(ref); +} + +static ir_ref zend_jit_continue_entry(zend_jit_ctx *jit, ir_ref src, unsigned int label) +{ + ir_ENTRY(src, label); + if (!GCC_GLOBAL_REGS) { + /* 2 is hardcoded reference to IR_PARAM */ + ZEND_ASSERT(jit->ctx.ir_base[2].op == IR_PARAM); + ZEND_ASSERT(jit->ctx.ir_base[2].op3 == 1); + jit_STORE_FP(jit, 2); + } + return ir_END(); +} + +static int zend_jit_handler(zend_jit_ctx *jit, const zend_op *opline, int may_throw) +{ + ir_ref ref; + const void *handler; + + zend_jit_set_ip(jit, opline); + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + handler = zend_get_opcode_handler_func(opline); + } else { + handler = opline->handler; + } + if (GCC_GLOBAL_REGS) { + ir_CALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + ref = jit_FP(jit); + ref = ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(handler), ref); + } + if (may_throw) { + zend_jit_check_exception(jit); + } + /* Skip the following OP_DATA */ + switch (opline->opcode) { + case ZEND_ASSIGN_DIM: + case ZEND_ASSIGN_OBJ: + case ZEND_ASSIGN_STATIC_PROP: + case ZEND_ASSIGN_DIM_OP: + case ZEND_ASSIGN_OBJ_OP: + case ZEND_ASSIGN_STATIC_PROP_OP: + case ZEND_ASSIGN_STATIC_PROP_REF: + case ZEND_ASSIGN_OBJ_REF: + zend_jit_set_last_valid_opline(jit, opline + 2); + break; + default: + zend_jit_set_last_valid_opline(jit, opline + 1); + break; + } + return 1; +} + +static int zend_jit_tail_handler(zend_jit_ctx *jit, const zend_op *opline) +{ + const void *handler; + ir_ref ref; + zend_basic_block *bb; + + zend_jit_set_ip(jit, opline); + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + if (opline->opcode == ZEND_DO_UCALL || + opline->opcode == ZEND_DO_FCALL_BY_NAME || + opline->opcode == ZEND_DO_FCALL || + opline->opcode == ZEND_RETURN) { + + /* Use inlined HYBRID VM handler */ + handler = opline->handler; + ir_TAILCALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + handler = zend_get_opcode_handler_func(opline); + ir_CALL(IR_VOID, ir_CONST_FUNC(handler)); + ref = ir_LOAD_A(jit_IP(jit)); + ir_TAILCALL(IR_VOID, ref); + } + } else { + handler = opline->handler; + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + ref = jit_FP(jit); + ir_TAILCALL_1(IR_I32, ir_CONST_FC_FUNC(handler), ref); + } + } + if (jit->b >= 0) { + bb = &jit->ssa->cfg.blocks[jit->b]; + if (bb->successors_count > 0 + && (opline->opcode == ZEND_DO_FCALL + || opline->opcode == ZEND_DO_UCALL + || opline->opcode == ZEND_DO_FCALL_BY_NAME + || opline->opcode == ZEND_INCLUDE_OR_EVAL + || opline->opcode == ZEND_GENERATOR_CREATE + || opline->opcode == ZEND_YIELD + || opline->opcode == ZEND_YIELD_FROM + || opline->opcode == ZEND_FAST_CALL)) { + /* Add a fake control edge from UNREACHABLE to the following ENTRY */ + int succ; + + if (bb->successors_count == 1) { + succ = bb->successors[0]; + ZEND_ASSERT(jit->ssa->cfg.blocks[succ].flags & ZEND_BB_ENTRY); + } else { + /* Use only the following successor of FAST_CALL */ + ZEND_ASSERT(opline->opcode == ZEND_FAST_CALL); + succ = jit->b + 1; + /* we need an entry */ + jit->ssa->cfg.blocks[succ].flags |= ZEND_BB_ENTRY; + } + ref = jit->ctx.insns_count - 1; + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_UNREACHABLE); + ref = zend_jit_continue_entry(jit, ref, jit->ssa->cfg.blocks[succ].start); + _zend_jit_add_predecessor_ref(jit, succ, jit->b, ref); + } + jit->b = -1; + zend_jit_reset_last_valid_opline(jit); + } + return 1; +} + +static int zend_jit_call(zend_jit_ctx *jit, const zend_op *opline, unsigned int next_block) +{ + return zend_jit_tail_handler(jit, opline); +} + +static int zend_jit_spill_store(zend_jit_ctx *jit, zend_jit_addr src, zend_jit_addr dst, uint32_t info, bool set_type) +{ + ZEND_ASSERT(Z_MODE(src) == IS_REG); + ZEND_ASSERT(Z_MODE(dst) == IS_MEM_ZVAL); + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + jit_set_Z_LVAL(jit, dst, zend_jit_use_reg(jit, src)); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_LONG)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + } + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + jit_set_Z_DVAL(jit, dst, zend_jit_use_reg(jit, src)); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_DOUBLE)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static int zend_jit_load_reg(zend_jit_ctx *jit, zend_jit_addr src, zend_jit_addr dst, uint32_t info) +{ + ZEND_ASSERT(Z_MODE(src) == IS_MEM_ZVAL); + ZEND_ASSERT(Z_MODE(dst) == IS_REG); + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + zend_jit_def_reg(jit, dst, jit_Z_LVAL(jit, src)); + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + zend_jit_def_reg(jit, dst, jit_Z_DVAL(jit, src)); + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static int zend_jit_store_var(zend_jit_ctx *jit, uint32_t info, int var, int ssa_var, bool set_type) +{ + zend_jit_addr src = ZEND_ADDR_REG(ssa_var); + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + return zend_jit_spill_store(jit, src, dst, info, set_type); +} + +static int zend_jit_store_ref(zend_jit_ctx *jit, uint32_t info, int var, int32_t src, bool set_type) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + jit_set_Z_LVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_LONG)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + } + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + jit_set_Z_DVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_DOUBLE)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static ir_ref zend_jit_deopt_rload(zend_jit_ctx *jit, ir_type type, int32_t reg) +{ + ir_ref ref = jit->ctx.control; + ir_insn *insn; + + while (1) { + insn = &jit->ctx.ir_base[ref]; + if (insn->op == IR_RLOAD && insn->op2 == reg) { + ZEND_ASSERT(insn->type == type); + return ref; + } else if (insn->op == IR_START) { + break; + } + ref = insn->op1; + } + return ir_RLOAD(type, reg); +} + +static int zend_jit_store_const_long(zend_jit_ctx *jit, int var, zend_long val) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + ir_ref src = ir_CONST_LONG(val); + + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } + jit_set_Z_LVAL(jit, dst, src); + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + return 1; +} + +static int zend_jit_store_const_double(zend_jit_ctx *jit, int var, double val) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + ir_ref src = ir_CONST_DOUBLE(val); + + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } + jit_set_Z_DVAL(jit, dst, src); + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + return 1; +} + +static int zend_jit_store_type(zend_jit_ctx *jit, int var, uint8_t type) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + ZEND_ASSERT(type <= IS_DOUBLE); + jit_set_Z_TYPE_INFO(jit, dst, type); + return 1; +} + +static int zend_jit_store_reg(zend_jit_ctx *jit, uint32_t info, int var, int8_t reg, bool in_mem, bool set_type) +{ + zend_jit_addr src; + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + ir_type type; + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + type = IR_LONG; + src = zend_jit_deopt_rload(jit, type, reg); + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } else if (!in_mem) { + jit_set_Z_LVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_LONG)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + } + } + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + type = IR_DOUBLE; + src = zend_jit_deopt_rload(jit, type, reg); + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } else if (!in_mem) { + jit_set_Z_DVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_DOUBLE)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static int zend_jit_store_spill_slot(zend_jit_ctx *jit, uint32_t info, int var, int8_t reg, int32_t offset, bool set_type) +{ + zend_jit_addr src; + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + src = ir_LOAD_L(ir_ADD_OFFSET(ir_RLOAD_A(reg), offset)); + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } else { + jit_set_Z_LVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_LONG)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + } + } + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + src = ir_LOAD_D(ir_ADD_OFFSET(ir_RLOAD_A(reg), offset)); + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } else { + jit_set_Z_DVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_DOUBLE)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static int zend_jit_store_var_type(zend_jit_ctx *jit, int var, uint32_t type) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + jit_set_Z_TYPE_INFO(jit, dst, type); + return 1; +} + +static int zend_jit_zval_try_addref(zend_jit_ctx *jit, zend_jit_addr var_addr) +{ + ir_ref if_refcounted, end1; + + if_refcounted = jit_if_REFCOUNTED(jit, var_addr); + ir_IF_FALSE(if_refcounted); + end1 = ir_END(); + ir_IF_TRUE(if_refcounted); + jit_GC_ADDREF(jit, jit_Z_PTR(jit, var_addr)); + ir_MERGE_WITH(end1); + return 1; +} + +static int zend_jit_store_var_if_necessary(zend_jit_ctx *jit, int var, zend_jit_addr src, uint32_t info) +{ + if (Z_MODE(src) == IS_REG && Z_STORE(src)) { + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + return zend_jit_spill_store(jit, src, dst, info, 1); + } + return 1; +} + +static int zend_jit_store_var_if_necessary_ex(zend_jit_ctx *jit, int var, zend_jit_addr src, uint32_t info, zend_jit_addr old, uint32_t old_info) +{ + if (Z_MODE(src) == IS_REG && Z_STORE(src)) { + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + bool set_type = 1; + + if ((info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) == + (old_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF))) { + if (Z_MODE(old) != IS_REG || Z_LOAD(old) || Z_STORE(old)) { + if (JIT_G(current_frame)) { + uint32_t mem_type = STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var)); + + if (mem_type != IS_UNKNOWN + && (info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) == (1 << mem_type)) { + set_type = 0; + } + } else { + set_type = 0; + } + } + } + return zend_jit_spill_store(jit, src, dst, info, set_type); + } + return 1; +} + +static int zend_jit_load_var(zend_jit_ctx *jit, uint32_t info, int var, int ssa_var) +{ + zend_jit_addr src = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + zend_jit_addr dst = ZEND_ADDR_REG(ssa_var); + + return zend_jit_load_reg(jit, src, dst, info); +} + +static int zend_jit_invalidate_var_if_necessary(zend_jit_ctx *jit, uint8_t op_type, zend_jit_addr addr, znode_op op) +{ + if ((op_type & (IS_TMP_VAR|IS_VAR)) && Z_MODE(addr) == IS_REG && !Z_LOAD(addr) && !Z_STORE(addr)) { + /* Invalidate operand type to prevent incorrect destuction by exception_handler_free_op1_op2() */ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op.var); + jit_set_Z_TYPE_INFO(jit, dst, IS_UNDEF); + } + return 1; +} + +static int zend_jit_update_regs(zend_jit_ctx *jit, uint32_t var, zend_jit_addr src, zend_jit_addr dst, uint32_t info) +{ + if (!zend_jit_same_addr(src, dst)) { + if (Z_MODE(src) == IS_REG) { + if (Z_MODE(dst) == IS_REG) { + zend_jit_def_reg(jit, dst, zend_jit_use_reg(jit, src)); + if (!Z_LOAD(src) && !Z_STORE(src) && Z_STORE(dst)) { + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + + if (!zend_jit_spill_store(jit, dst, var_addr, info, + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + JIT_G(current_frame) == NULL || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var)) == IS_UNKNOWN || + (1 << STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var))) != (info & MAY_BE_ANY) + )) { + return 0; + } + } + } else if (Z_MODE(dst) == IS_MEM_ZVAL) { + if (!Z_LOAD(src) && !Z_STORE(src)) { + if (!zend_jit_spill_store(jit, src, dst, info, + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + JIT_G(current_frame) == NULL || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var)) == IS_UNKNOWN || + (1 << STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var))) != (info & MAY_BE_ANY) + )) { + return 0; + } + } + } else { + ZEND_UNREACHABLE(); + } + } else if (Z_MODE(src) == IS_MEM_ZVAL) { + if (Z_MODE(dst) == IS_REG) { + if (!zend_jit_load_reg(jit, src, dst, info)) { + return 0; + } + } else { + ZEND_UNREACHABLE(); + } + } else { + ZEND_UNREACHABLE(); + } + } else if (Z_MODE(dst) == IS_REG && Z_STORE(dst)) { + dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + if (!zend_jit_spill_store(jit, src, dst, info, + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + JIT_G(current_frame) == NULL || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var)) == IS_UNKNOWN || + (1 << STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var))) != (info & MAY_BE_ANY) + )) { + return 0; + } + } + return 1; +} + +static int zend_jit_inc_dec(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint32_t op1_def_info, zend_jit_addr op1_def_addr, uint32_t res_use_info, uint32_t res_info, zend_jit_addr res_addr, int may_overflow, int may_throw) +{ + ir_ref if_long = IR_UNUSED; + ir_ref op1_lval_ref = IR_UNUSED; + ir_ref ref; + ir_op op; + + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)-MAY_BE_LONG)) { + if_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_long); + } + if (opline->opcode == ZEND_POST_INC || opline->opcode == ZEND_POST_DEC) { + op1_lval_ref = jit_Z_LVAL(jit, op1_addr); + jit_set_Z_LVAL(jit, res_addr, op1_lval_ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + if (Z_MODE(op1_def_addr) == IS_MEM_ZVAL + && Z_MODE(op1_addr) == IS_REG + && !Z_LOAD(op1_addr) + && !Z_STORE(op1_addr)) { + jit_set_Z_TYPE_INFO(jit, op1_def_addr, IS_LONG); + } + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + op = may_overflow ? IR_ADD_OV : IR_ADD; + } else { + op = may_overflow ? IR_SUB_OV : IR_SUB; + } + if (!op1_lval_ref) { + op1_lval_ref = jit_Z_LVAL(jit, op1_addr); + } + ref = ir_BINARY_OP_L(op, op1_lval_ref, ir_CONST_LONG(1)); + if (op1_def_info & MAY_BE_LONG) { + jit_set_Z_LVAL(jit, op1_def_addr, ref); + } + if (may_overflow && + (((op1_def_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_LONG|MAY_BE_GUARD)) || + ((opline->result_type != IS_UNUSED && (res_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_LONG|MAY_BE_GUARD))))) { + int32_t exit_point; + const void *exit_addr; + zend_jit_trace_stack *stack; + uint32_t old_op1_info, old_res_info = 0; + + stack = JIT_G(current_frame)->stack; + old_op1_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), IS_DOUBLE, 0); + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->op1.var), ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else { + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->op1.var), ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } + if (opline->result_type != IS_UNUSED) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + if (opline->opcode == ZEND_PRE_INC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else if (opline->opcode == ZEND_PRE_DEC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } else if (opline->opcode == ZEND_POST_INC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_LONG, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_LONG(ZEND_LONG_MAX)); + } else if (opline->opcode == ZEND_POST_DEC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_LONG, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_LONG(ZEND_LONG_MIN)); + } + } + + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + ir_GUARD_NOT(ir_OVERFLOW(ref), ir_CONST_ADDR(exit_addr)); + + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var), old_op1_info); + if (opline->result_type != IS_UNUSED) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } + } else if (may_overflow) { + ir_ref if_overflow; + ir_ref merge_inputs = IR_UNUSED; + + if (((op1_def_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_DOUBLE|MAY_BE_GUARD)) + || (opline->result_type != IS_UNUSED && (res_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_DOUBLE|MAY_BE_GUARD))) { + int32_t exit_point; + const void *exit_addr; + zend_jit_trace_stack *stack; + uint32_t old_res_info = 0; + + stack = JIT_G(current_frame)->stack; + if (opline->result_type != IS_UNUSED) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + if (opline->opcode == ZEND_PRE_INC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_LONG, 0); + } + } + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + if_overflow = ir_IF(ir_OVERFLOW(ref)); + ir_IF_FALSE_cold(if_overflow); + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + ir_IF_TRUE(if_overflow); + } else { + ir_GUARD(ir_OVERFLOW(ref), ir_CONST_ADDR(exit_addr)); + } + if (opline->result_type != IS_UNUSED) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } + } else { + if_overflow = ir_IF(ir_OVERFLOW(ref)); + ir_IF_FALSE(if_overflow); + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + ir_END_list(merge_inputs); + + /* overflow => cold path */ + ir_IF_TRUE_cold(if_overflow); + } + + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + if (Z_MODE(op1_def_addr) == IS_REG) { + jit_set_Z_DVAL(jit, op1_def_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, op1_def_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, op1_def_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, op1_def_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, op1_def_addr, IS_DOUBLE); + } + } else { + if (Z_MODE(op1_def_addr) == IS_REG) { + jit_set_Z_DVAL(jit, op1_def_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, op1_def_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, op1_def_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, op1_def_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, op1_def_addr, IS_DOUBLE); + } + } + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + if (Z_MODE(res_addr) == IS_REG) { + jit_set_Z_DVAL(jit, res_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } else { + if (Z_MODE(res_addr) == IS_REG) { + jit_set_Z_DVAL(jit, res_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + } + + if (merge_inputs) { + ir_END_list(merge_inputs); + ir_MERGE_list(merge_inputs); + } + } else { + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + } + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + ir_ref merge_inputs = ir_END(); + + /* !is_long => cold path */ + ir_IF_FALSE_cold(if_long); + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + jit_SET_EX_OPLINE(jit, opline); + if (op1_info & MAY_BE_UNDEF) { + ir_ref if_def; + + if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + + // zend_error(E_WARNING, "Undefined variable $%s", ZSTR_VAL(CV_DEF_OF(EX_VAR_TO_NUM(opline->op1.var)))); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op1.var)); + + jit_set_Z_TYPE_INFO(jit, op1_def_addr, IS_NULL); + ir_MERGE_WITH_EMPTY_TRUE(if_def); + + op1_info |= MAY_BE_NULL; + } + + ref = jit_ZVAL_ADDR(jit, op1_addr); + + if (op1_info & MAY_BE_REF) { + ir_ref if_ref, if_typed, func, ref2, arg2; + + if_ref = jit_if_Z_TYPE_ref(jit, ref, ir_CONST_U8(IS_REFERENCE)); + ir_IF_TRUE(if_ref); + ref2 = jit_Z_PTR_ref(jit, ref); + + if_typed = jit_if_TYPED_REF(jit, ref2); + ir_IF_TRUE(if_typed); + + if (RETURN_VALUE_USED(opline)) { + ZEND_ASSERT(Z_MODE(res_addr) != IS_REG); + arg2 = jit_ZVAL_ADDR(jit, res_addr); + } else { + arg2 = IR_NULL; + } + if (opline->opcode == ZEND_PRE_INC) { + func = ir_CONST_FC_FUNC(zend_jit_pre_inc_typed_ref); + } else if (opline->opcode == ZEND_PRE_DEC) { + func = ir_CONST_FC_FUNC(zend_jit_pre_dec_typed_ref); + } else if (opline->opcode == ZEND_POST_INC) { + func = ir_CONST_FC_FUNC(zend_jit_post_inc_typed_ref); + } else if (opline->opcode == ZEND_POST_DEC) { + func = ir_CONST_FC_FUNC(zend_jit_post_dec_typed_ref); + } else { + ZEND_UNREACHABLE(); + } + + ir_CALL_2(IR_VOID, func, ref2, arg2); + zend_jit_check_exception(jit); + ir_END_list(merge_inputs); + + ir_IF_FALSE(if_typed); + ref2 = ir_ADD_OFFSET(ref2, offsetof(zend_reference, val)); + ir_MERGE_WITH_EMPTY_FALSE(if_ref); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + } + + if (opline->opcode == ZEND_POST_INC || opline->opcode == ZEND_POST_DEC) { + jit_ZVAL_COPY(jit, + res_addr, + res_use_info, + ZEND_ADDR_REF_ZVAL(ref), op1_info, 1); + } + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + if (opline->opcode == ZEND_PRE_INC && opline->result_type != IS_UNUSED) { + ir_ref arg2 = jit_ZVAL_ADDR(jit, res_addr); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_pre_inc), ref, arg2); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(increment_function), ref); + } + } else { + if (opline->opcode == ZEND_PRE_DEC && opline->result_type != IS_UNUSED) { + ir_ref arg2 = jit_ZVAL_ADDR(jit, res_addr); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_pre_dec), ref, arg2); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(decrement_function), ref); + } + } + if (may_throw) { + zend_jit_check_exception(jit); + } + } else { + ref = jit_Z_DVAL(jit, op1_addr); + if (opline->opcode == ZEND_POST_INC || opline->opcode == ZEND_POST_DEC) { + jit_set_Z_DVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + op = IR_ADD; + } else { + op = IR_SUB; + } + ref = ir_BINARY_OP_D(op, ref, ir_CONST_DOUBLE(1.0)); + jit_set_Z_DVAL(jit, op1_def_addr, ref); + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + jit_set_Z_DVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + ir_END_list(merge_inputs); + ir_MERGE_list(merge_inputs); + } + if (!zend_jit_store_var_if_necessary_ex(jit, opline->op1.var, op1_def_addr, op1_def_info, op1_addr, op1_info)) { + return 0; + } + if (opline->result_type != IS_UNUSED) { + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + } + return 1; +} + +static int zend_jit_math_long_long(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t opcode, + zend_jit_addr op1_addr, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint32_t res_info, + uint32_t res_use_info, + int may_overflow) +{ + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + ir_op op; + ir_ref op1, op2, ref, if_overflow = IR_UNUSED; + + if (opcode == ZEND_ADD) { + op = may_overflow ? IR_ADD_OV : IR_ADD; + } else if (opcode == ZEND_SUB) { + op = may_overflow ? IR_SUB_OV : IR_SUB; + } else if (opcode == ZEND_MUL) { + op = may_overflow ? IR_MUL_OV : IR_MUL; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_LVAL(jit, op1_addr); + op2 = (same_ops) ? op1 : jit_Z_LVAL(jit, op2_addr); + ref = ir_BINARY_OP_L(op, op1, op2); + + if (may_overflow) { + if (res_info & MAY_BE_GUARD) { + if ((res_info & MAY_BE_ANY) == MAY_BE_LONG) { + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + uint32_t old_res_info; + int32_t exit_point; + const void *exit_addr; + + if (opline->opcode == ZEND_ADD + && Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } else if (opline->opcode == ZEND_SUB + && Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } else { + exit_point = zend_jit_trace_get_exit_point(opline, 0); + } + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + ir_GUARD_NOT(ir_OVERFLOW(ref), ir_CONST_ADDR(exit_addr)); + may_overflow = 0; + } else if ((res_info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_OVERFLOW(ref), ir_CONST_ADDR(exit_addr)); + } else { + ZEND_UNREACHABLE(); + } + } else { + if_overflow = ir_IF(ir_OVERFLOW(ref)); + ir_IF_FALSE(if_overflow); + } + } + + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + jit_set_Z_LVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) != IS_REG) { + if (!zend_jit_same_addr(op1_addr, res_addr)) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_LONG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + } + } + + if (may_overflow) { + ir_ref fast_path = IR_UNUSED; + + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + fast_path = ir_END(); + ir_IF_TRUE_cold(if_overflow); + } + if (opcode == ZEND_ADD) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) { + if (Z_MODE(res_addr) == IS_REG) { + jit_set_Z_DVAL(jit, res_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + ir_MERGE_WITH(fast_path); + } + return 1; + } + op = IR_ADD; + } else if (opcode == ZEND_SUB) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) { + if (Z_MODE(res_addr) == IS_REG) { + jit_set_Z_DVAL(jit, res_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + ir_MERGE_WITH(fast_path); + } + return 1; + } + op = IR_SUB; + } else if (opcode == ZEND_MUL) { + op = IR_MUL; + } else { + ZEND_UNREACHABLE(); + } +#if 1 + /* reload */ + op1 = jit_Z_LVAL(jit, op1_addr); + op2 = (same_ops) ? op1 : jit_Z_LVAL(jit, op2_addr); +#endif +#if 1 + /* disable CSE */ + ir_ref old_cse_limit = jit->ctx.fold_cse_limit; + jit->ctx.fold_cse_limit = 0x7fffffff; +#endif + op1 = ir_INT2D(op1); + op2 = ir_INT2D(op2); +#if 1 + jit->ctx.fold_cse_limit = old_cse_limit; +#endif + ref = ir_BINARY_OP_D(op, op1, op2); + jit_set_Z_DVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + ir_MERGE_WITH(fast_path); + } + } + + return 1; +} + +static int zend_jit_math_long_double(zend_jit_ctx *jit, + uint8_t opcode, + zend_jit_addr op1_addr, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint32_t res_use_info) +{ + ir_op op; + ir_ref op1, op2, ref; + + if (opcode == ZEND_ADD) { + op = IR_ADD; + } else if (opcode == ZEND_SUB) { + op = IR_SUB; + } else if (opcode == ZEND_MUL) { + op = IR_MUL; + } else if (opcode == ZEND_DIV) { + op = IR_DIV; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_LVAL(jit, op1_addr); + op2 = jit_Z_DVAL(jit, op2_addr); + ref = ir_BINARY_OP_D(op, ir_INT2D(op1), op2); + jit_set_Z_DVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) != IS_REG) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + return 1; +} + +static int zend_jit_math_double_long(zend_jit_ctx *jit, + uint8_t opcode, + zend_jit_addr op1_addr, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint32_t res_use_info) +{ + ir_op op; + ir_ref op1, op2, ref; + + if (opcode == ZEND_ADD) { + op = IR_ADD; + } else if (opcode == ZEND_SUB) { + op = IR_SUB; + } else if (opcode == ZEND_MUL) { + op = IR_MUL; + } else if (opcode == ZEND_DIV) { + op = IR_DIV; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_DVAL(jit, op1_addr); + op2 = jit_Z_LVAL(jit, op2_addr); + ref = ir_BINARY_OP_D(op, op1, ir_INT2D(op2)); + jit_set_Z_DVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) != IS_REG) { + if (!zend_jit_same_addr(op1_addr, res_addr)) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + } + return 1; +} + +static int zend_jit_math_double_double(zend_jit_ctx *jit, + uint8_t opcode, + zend_jit_addr op1_addr, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint32_t res_use_info) +{ + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + ir_op op; + ir_ref op1, op2, ref; + + if (opcode == ZEND_ADD) { + op = IR_ADD; + } else if (opcode == ZEND_SUB) { + op = IR_SUB; + } else if (opcode == ZEND_MUL) { + op = IR_MUL; + } else if (opcode == ZEND_DIV) { + op = IR_DIV; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_DVAL(jit, op1_addr); + op2 = (same_ops) ? op1 : jit_Z_DVAL(jit, op2_addr); + ref = ir_BINARY_OP_D(op, op1, op2); + jit_set_Z_DVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) != IS_REG) { + if (!zend_jit_same_addr(op1_addr, res_addr)) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + } + return 1; +} + +static int zend_jit_math_helper(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t opcode, + uint8_t op1_type, + znode_op op1, + zend_jit_addr op1_addr, + uint32_t op1_info, + uint8_t op2_type, + znode_op op2, + zend_jit_addr op2_addr, + uint32_t op2_info, + uint32_t res_var, + zend_jit_addr res_addr, + uint32_t res_info, + uint32_t res_use_info, + int may_overflow, + int may_throw) +{ + ir_ref if_op1_long = IR_UNUSED; + ir_ref if_op1_double = IR_UNUSED; + ir_ref if_op2_double = IR_UNUSED; + ir_ref if_op1_long_op2_long = IR_UNUSED; + ir_ref if_op1_long_op2_double = IR_UNUSED; + ir_ref if_op1_double_op2_double = IR_UNUSED; + ir_ref if_op1_double_op2_long = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + ir_refs *end_inputs; + ir_refs *res_inputs; + + ir_refs_init(end_inputs, 6); + ir_refs_init(res_inputs, 6); + + if (Z_MODE(op1_addr) == IS_REG) { + if (!has_concrete_type(op2_info & MAY_BE_ANY) && jit->ra[Z_SSA_VAR(op1_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, op1_addr); + } + } else if (Z_MODE(op2_addr) == IS_REG) { + if (!has_concrete_type(op1_info & MAY_BE_ANY) && jit->ra[Z_SSA_VAR(op2_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, op2_addr); + } + } + + if (Z_MODE(res_addr) == IS_REG) { + jit->delay_var = Z_SSA_VAR(res_addr); + jit->delay_refs = res_inputs; + } + + if ((res_info & MAY_BE_GUARD) && (res_info & MAY_BE_LONG) && (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG)) { + if (op1_info & (MAY_BE_ANY-MAY_BE_LONG)) { + if_op1_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_op1_long); + } + if (!same_ops && (op2_info & (MAY_BE_ANY-MAY_BE_LONG))) { + if_op1_long_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_TRUE(if_op1_long_op2_long); + } + if (!zend_jit_math_long_long(jit, opline, opcode, op1_addr, op2_addr, res_addr, res_info, res_use_info, may_overflow)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + if (if_op1_long) { + ir_IF_FALSE_cold(if_op1_long); + ir_END_list(slow_inputs); + } + if (if_op1_long_op2_long) { + ir_IF_FALSE_cold(if_op1_long_op2_long); + ir_END_list(slow_inputs); + } + } else if ((op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (res_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { + if (op1_info & (MAY_BE_ANY-MAY_BE_LONG)) { + if_op1_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_op1_long); + } + if (!same_ops && (op2_info & (MAY_BE_ANY-MAY_BE_LONG))) { + if_op1_long_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_long_op2_long); + if (op2_info & MAY_BE_DOUBLE) { + if (op2_info & (MAY_BE_ANY-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_long_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_long_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_long_op2_double); + } + if (!zend_jit_math_long_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + } else { + ir_END_list(slow_inputs); + } + ir_IF_TRUE(if_op1_long_op2_long); + } + if (!zend_jit_math_long_long(jit, opline, opcode, op1_addr, op2_addr, res_addr, res_info, res_use_info, may_overflow)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + + if (if_op1_long) { + ir_IF_FALSE_cold(if_op1_long); + } + + if (op1_info & MAY_BE_DOUBLE) { + if (op1_info & (MAY_BE_ANY-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double); + } + if (op2_info & MAY_BE_DOUBLE) { + if (!same_ops && (op2_info & (MAY_BE_ANY-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + if (!zend_jit_math_double_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops) { + if (op2_info & (MAY_BE_ANY-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_double_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_double_op2_long); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double_op2_long); + } + if (!zend_jit_math_double_long(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } else if (if_op1_long) { + ir_END_list(slow_inputs); + } + } else if ((op1_info & MAY_BE_DOUBLE) && + !(op1_info & MAY_BE_LONG) && + (op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) && + (res_info & MAY_BE_DOUBLE)) { + if (op1_info & (MAY_BE_ANY-MAY_BE_DOUBLE)) { + if_op1_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double); + } + if (op2_info & MAY_BE_DOUBLE) { + if (!same_ops && (op2_info & (MAY_BE_ANY-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + if (!zend_jit_math_double_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops && (op2_info & MAY_BE_LONG)) { + if (op2_info & (MAY_BE_ANY-(MAY_BE_DOUBLE|MAY_BE_LONG))) { + if_op1_double_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_double_op2_long); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double_op2_long); + } + if (!zend_jit_math_double_long(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } else if ((op2_info & MAY_BE_DOUBLE) && + !(op2_info & MAY_BE_LONG) && + (op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) && + (res_info & MAY_BE_DOUBLE)) { + if (op2_info & (MAY_BE_ANY-MAY_BE_DOUBLE)) { + if_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op2_double); + } + if (op1_info & MAY_BE_DOUBLE) { + if (!same_ops && (op1_info & (MAY_BE_ANY-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + if (!zend_jit_math_double_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops && (op1_info & MAY_BE_LONG)) { + if (op1_info & (MAY_BE_ANY-(MAY_BE_DOUBLE|MAY_BE_LONG))) { + if_op1_long_op2_double = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_long_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_long_op2_double); + } + if (!zend_jit_math_long_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } + + if ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) || + (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE)))) { + ir_ref func, arg1, arg2, arg3; + + if (slow_inputs) { + ir_MERGE_list(slow_inputs); + } + + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + op2_addr = real_addr; + } + if (Z_MODE(res_addr) == IS_REG) { + arg1 = jit_ZVAL_ADDR(jit, ZEND_ADDR_MEM_ZVAL(ZREG_FP, res_var)); + } else { + arg1 = jit_ZVAL_ADDR(jit, res_addr); + } + arg2 = jit_ZVAL_ADDR(jit, op1_addr); + arg3 = jit_ZVAL_ADDR(jit, op2_addr); + jit_SET_EX_OPLINE(jit, opline); + if (opcode == ZEND_ADD) { + func = ir_CONST_FC_FUNC(add_function); + } else if (opcode == ZEND_SUB) { + func = ir_CONST_FC_FUNC(sub_function); + } else if (opcode == ZEND_MUL) { + func = ir_CONST_FC_FUNC(mul_function); + } else if (opcode == ZEND_DIV) { + func = ir_CONST_FC_FUNC(div_function); + } else { + ZEND_UNREACHABLE(); + } + ir_CALL_3(IR_VOID, func, arg1, arg2, arg3); + + jit_FREE_OP(jit, op1_type, op1, op1_info, NULL); + jit_FREE_OP(jit, op2_type, op2, op2_info, NULL); + + if (may_throw) { + if (opline->opcode == ZEND_ASSIGN_DIM_OP && (opline->op2_type & (IS_VAR|IS_TMP_VAR))) { + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op2)); + } else if (Z_MODE(res_addr) == IS_MEM_ZVAL && Z_REG(res_addr) == ZREG_RX) { + zend_jit_check_exception_undef_result(jit, opline); + } else { + zend_jit_check_exception(jit); + } + } + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, res_var); + if (!zend_jit_load_reg(jit, real_addr, res_addr, res_info)) { + return 0; + } + } + ir_refs_add(end_inputs, ir_END()); + } + + if (end_inputs->count) { + ir_MERGE_N(end_inputs->count, end_inputs->refs); + } + + if (Z_MODE(res_addr) == IS_REG) { + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(end_inputs->count == res_inputs->count); + jit->delay_var = -1; + jit->delay_refs = NULL; + if (res_inputs->count == 1) { + zend_jit_def_reg(jit, res_addr, res_inputs->refs[0]); + } else { + ir_ref phi = ir_PHI_N((res_info & MAY_BE_LONG) ? IR_LONG : IR_DOUBLE, res_inputs->count, res_inputs->refs); + zend_jit_def_reg(jit, res_addr, phi); + } + } + + return 1; +} + +static int zend_jit_math(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint32_t op2_info, zend_jit_addr op2_addr, uint32_t res_use_info, uint32_t res_info, zend_jit_addr res_addr, int may_overflow, int may_throw) +{ + ZEND_ASSERT(!(op1_info & MAY_BE_UNDEF) && !(op2_info & MAY_BE_UNDEF)); + + if (!zend_jit_math_helper(jit, opline, opline->opcode, opline->op1_type, opline->op1, op1_addr, op1_info, opline->op2_type, opline->op2, op2_addr, op2_info, opline->result.var, res_addr, res_info, res_use_info, may_overflow, may_throw)) { + return 0; + } + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + return 1; +} + +static int zend_jit_add_arrays(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint32_t op2_info, zend_jit_addr op2_addr, zend_jit_addr res_addr) +{ + ir_ref ref; + ir_ref arg1 = jit_Z_PTR(jit, op1_addr); + ir_ref arg2 = jit_Z_PTR(jit, op2_addr); + + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_add_arrays_helper), arg1, arg2); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_ARRAY_EX); + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + return 1; +} + +static int zend_jit_long_math_helper(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t opcode, + uint8_t op1_type, + znode_op op1, + zend_jit_addr op1_addr, + uint32_t op1_info, + zend_ssa_range *op1_range, + uint8_t op2_type, + znode_op op2, + zend_jit_addr op2_addr, + uint32_t op2_info, + zend_ssa_range *op2_range, + uint32_t res_var, + zend_jit_addr res_addr, + uint32_t res_info, + uint32_t res_use_info, + int may_throw) +{ + ir_ref ref = IR_UNUSED; + ir_ref if_long1 = IR_UNUSED; + ir_ref if_long2 = IR_UNUSED; + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + ir_refs *res_inputs; + + ir_refs_init(res_inputs, 2); + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + if_long1 = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_long1); + } + if (!same_ops && (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG))) { + if_long2 = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_TRUE(if_long2); + } + + if (opcode == ZEND_SL) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL) { + zend_long op2_lval = Z_LVAL_P(Z_ZV(op2_addr)); + + if (UNEXPECTED((zend_ulong)op2_lval >= SIZEOF_ZEND_LONG * 8)) { + if (EXPECTED(op2_lval > 0)) { + ref = ir_CONST_LONG(0); + } else { + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + jit_SET_EX_OPLINE(jit, opline); + ir_GUARD(IR_FALSE, jit_STUB_ADDR(jit, jit_stub_negative_shift)); + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_def_reg(jit, res_addr, ir_CONST_LONG(0)); // dead code + } + } + } else { + ref = ir_SHL_L(jit_Z_LVAL(jit, op1_addr), ir_CONST_LONG(op2_lval)); + } + } else { + ref = jit_Z_LVAL(jit, op2_addr); + if (!op2_range || + op2_range->min < 0 || + op2_range->max >= SIZEOF_ZEND_LONG * 8) { + + ir_ref if_wrong, cold_path, ref2, if_ok; + ir_ref op1_ref = jit_Z_LVAL(jit, op1_addr); + + if_wrong = ir_IF(ir_UGT(ref, ir_CONST_LONG((SIZEOF_ZEND_LONG * 8) - 1))); + ir_IF_TRUE_cold(if_wrong); + if_ok = ir_IF(ir_GE(ref, ir_CONST_LONG(0))); + ir_IF_FALSE(if_ok); + jit_SET_EX_OPLINE(jit, opline); + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_negative_shift)); + ir_IF_TRUE(if_ok); + ref2 = ir_CONST_LONG(0); + cold_path = ir_END(); + ir_IF_FALSE(if_wrong); + ref = ir_SHL_L(op1_ref, ref); + ir_MERGE_WITH(cold_path); + ref = ir_PHI_2(IR_LONG, ref, ref2); + } else { + ref = ir_SHL_L(jit_Z_LVAL(jit, op1_addr), ref); + } + } + } else if (opcode == ZEND_SR) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL) { + zend_long op2_lval = Z_LVAL_P(Z_ZV(op2_addr)); + + if (UNEXPECTED((zend_ulong)op2_lval >= SIZEOF_ZEND_LONG * 8)) { + if (EXPECTED(op2_lval > 0)) { + ref = ir_SAR_L( + jit_Z_LVAL(jit, op1_addr), + ir_CONST_LONG((SIZEOF_ZEND_LONG * 8) - 1)); + } else { + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + jit_SET_EX_OPLINE(jit, opline); + ir_GUARD(IR_FALSE, jit_STUB_ADDR(jit, jit_stub_negative_shift)); + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_def_reg(jit, res_addr, ir_CONST_LONG(0)); // dead code + } + } + } else { + ref = ir_SAR_L(jit_Z_LVAL(jit, op1_addr), ir_CONST_LONG(op2_lval)); + } + } else { + ref = jit_Z_LVAL(jit, op2_addr); + if (!op2_range || + op2_range->min < 0 || + op2_range->max >= SIZEOF_ZEND_LONG * 8) { + + ir_ref if_wrong, cold_path, ref2, if_ok; + + if_wrong = ir_IF(ir_UGT(ref, ir_CONST_LONG((SIZEOF_ZEND_LONG * 8) - 1))); + ir_IF_TRUE_cold(if_wrong); + if_ok = ir_IF(ir_GE(ref, ir_CONST_LONG(0))); + ir_IF_FALSE(if_ok); + jit_SET_EX_OPLINE(jit, opline); + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_negative_shift)); + ir_IF_TRUE(if_ok); + ref2 = ir_CONST_LONG((SIZEOF_ZEND_LONG * 8) - 1); + cold_path = ir_END(); + ir_IF_FALSE(if_wrong); + ir_MERGE_WITH(cold_path); + ref = ir_PHI_2(IR_LONG, ref, ref2); + } + ref = ir_SAR_L(jit_Z_LVAL(jit, op1_addr), ref); + } + } else if (opcode == ZEND_MOD) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL) { + zend_long op2_lval = Z_LVAL_P(Z_ZV(op2_addr)); + + if (op2_lval == 0) { + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + jit_SET_EX_OPLINE(jit, opline); + ir_GUARD(IR_FALSE, jit_STUB_ADDR(jit, jit_stub_mod_by_zero)); + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_def_reg(jit, res_addr, ir_CONST_LONG(0)); // dead code + } + } else if (zend_long_is_power_of_two(op2_lval) && op1_range && op1_range->min >= 0) { + ref = ir_AND_L(jit_Z_LVAL(jit, op1_addr), ir_CONST_LONG(op2_lval - 1)); + } else { + ref = ir_MOD_L(jit_Z_LVAL(jit, op1_addr), ir_CONST_LONG(op2_lval)); + } + } else { + ir_ref zero_path = 0; + ir_ref op1_ref = jit_Z_LVAL(jit, op1_addr); + + ref = jit_Z_LVAL(jit, op2_addr); + if ((op2_type & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE)) || !op2_range || (op2_range->min <= 0 && op2_range->max >= 0)) { + ir_ref if_ok = ir_IF(ref); + ir_IF_FALSE(if_ok); + jit_SET_EX_OPLINE(jit, opline); + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_mod_by_zero)); + ir_IF_TRUE(if_ok); + } + + /* Prevent overflow error/crash if op1 == LONG_MIN and op2 == -1 */ + if (!op2_range || (op2_range->min <= -1 && op2_range->max >= -1)) { + ir_ref if_minus_one = ir_IF(ir_EQ(ref, ir_CONST_LONG(-1))); + ir_IF_TRUE_cold(if_minus_one); + zero_path = ir_END(); + ir_IF_FALSE(if_minus_one); + } + ref = ir_MOD_L(op1_ref, ref); + + if (zero_path) { + ir_MERGE_WITH(zero_path); + ref = ir_PHI_2(IR_LONG, ref, ir_CONST_LONG(0)); + } + } + } else { + ir_op op; + ir_ref op1, op2; + + if (opcode == ZEND_BW_OR) { + op = IR_OR; + } else if (opcode == ZEND_BW_AND) { + op = IR_AND; + } else if (opcode == ZEND_BW_XOR) { + op = IR_XOR; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_LVAL(jit, op1_addr); + op2 = (same_ops) ? op1 : jit_Z_LVAL(jit, op2_addr); + ref = ir_BINARY_OP_L(op, op1, op2); + } + + if (ref) { + if (Z_MODE(res_addr) == IS_REG + && ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) + || (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)))) { + jit->delay_var = Z_SSA_VAR(res_addr); + jit->delay_refs = res_inputs; + } + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + if (!zend_jit_same_addr(op1_addr, res_addr)) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_LONG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + } + } + + if ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) || + (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG))) { + ir_ref fast_path = ir_END(); + ir_ref func, arg1, arg2, arg3; + + if (if_long2 && if_long1) { + ir_ref ref; + ir_IF_FALSE_cold(if_long2); + ref = ir_END(); + ir_IF_FALSE_cold(if_long1); + ir_MERGE_2(ref, ir_END()); + } else if (if_long1) { + ir_IF_FALSE_cold(if_long1); + } else if (if_long2) { + ir_IF_FALSE_cold(if_long2); + } + + if (op1_info & MAY_BE_UNDEF) { + ir_ref if_def; + + if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + + // zend_error(E_WARNING, "Undefined variable $%s", ZSTR_VAL(CV_DEF_OF(EX_VAR_TO_NUM(opline->op1.var)))); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op1.var)); + + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_NULL); + ir_MERGE_WITH_EMPTY_TRUE(if_def); + } + + if (op2_info & MAY_BE_UNDEF) { + ir_ref if_def; + + if_def = jit_if_not_Z_TYPE(jit, op2_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + + // zend_error(E_WARNING, "Undefined variable $%s", ZSTR_VAL(CV_DEF_OF(EX_VAR_TO_NUM(opline->op2.var)))); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op2.var)); + + jit_set_Z_TYPE_INFO(jit, op2_addr, IS_NULL); + ir_MERGE_WITH_EMPTY_TRUE(if_def); + } + + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + op2_addr = real_addr; + } + if (Z_MODE(res_addr) == IS_REG) { + arg1 = jit_ZVAL_ADDR(jit, ZEND_ADDR_MEM_ZVAL(ZREG_FP, res_var)); + } else { + arg1 = jit_ZVAL_ADDR(jit, res_addr); + } + arg2 = jit_ZVAL_ADDR(jit, op1_addr); + arg3 = jit_ZVAL_ADDR(jit, op2_addr); + jit_SET_EX_OPLINE(jit, opline); + if (opcode == ZEND_BW_OR) { + func = ir_CONST_FC_FUNC(bitwise_or_function); + } else if (opcode == ZEND_BW_AND) { + func = ir_CONST_FC_FUNC(bitwise_and_function); + } else if (opcode == ZEND_BW_XOR) { + func = ir_CONST_FC_FUNC(bitwise_xor_function); + } else if (opcode == ZEND_SL) { + func = ir_CONST_FC_FUNC(shift_left_function); + } else if (opcode == ZEND_SR) { + func = ir_CONST_FC_FUNC(shift_right_function); + } else if (opcode == ZEND_MOD) { + func = ir_CONST_FC_FUNC(mod_function); + } else { + ZEND_UNREACHABLE(); + } + ir_CALL_3(IR_VOID, func, arg1, arg2, arg3); + + if (op1_addr == res_addr && (op2_info & MAY_BE_RCN)) { + /* compound assignment may decrement "op2" refcount */ + op2_info |= MAY_BE_RC1; + } + + jit_FREE_OP(jit, op1_type, op1, op1_info, NULL); + jit_FREE_OP(jit, op2_type, op2, op2_info, NULL); + + if (may_throw) { + if (opline->opcode == ZEND_ASSIGN_DIM_OP && (opline->op2_type & (IS_VAR|IS_TMP_VAR))) { + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op2)); + } else if (Z_MODE(res_addr) == IS_MEM_ZVAL && Z_REG(res_addr) == ZREG_RX) { + zend_jit_check_exception_undef_result(jit, opline); + } else { + zend_jit_check_exception(jit); + } + } + + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, res_var); + if (!zend_jit_load_reg(jit, real_addr, res_addr, res_info)) { + return 0; + } + } + + ir_MERGE_2(fast_path, ir_END()); + + if (Z_MODE(res_addr) == IS_REG) { + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(res_inputs->count == 2); + jit->delay_var = -1; + jit->delay_refs = NULL; + if (res_inputs->count == 1) { + zend_jit_def_reg(jit, res_addr, res_inputs->refs[0]); + } else { + ir_ref phi = ir_PHI_N(IR_LONG, res_inputs->count, res_inputs->refs); + zend_jit_def_reg(jit, res_addr, phi); + } + } + } + + return 1; +} + +static int zend_jit_long_math(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_ssa_range *op1_range, zend_jit_addr op1_addr, uint32_t op2_info, zend_ssa_range *op2_range, zend_jit_addr op2_addr, uint32_t res_use_info, uint32_t res_info, zend_jit_addr res_addr, int may_throw) +{ + ZEND_ASSERT((op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG)); + + if (!zend_jit_long_math_helper(jit, opline, opline->opcode, + opline->op1_type, opline->op1, op1_addr, op1_info, op1_range, + opline->op2_type, opline->op2, op2_addr, op2_info, op2_range, + opline->result.var, res_addr, res_info, res_use_info, may_throw)) { + return 0; + } + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + return 1; +} + +static int zend_jit_concat_helper(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t op1_type, + znode_op op1, + zend_jit_addr op1_addr, + uint32_t op1_info, + uint8_t op2_type, + znode_op op2, + zend_jit_addr op2_addr, + uint32_t op2_info, + zend_jit_addr res_addr, + int may_throw) +{ + ir_ref if_op1_string = IR_UNUSED; + ir_ref if_op2_string = IR_UNUSED; + ir_ref fast_path = IR_UNUSED; + + if ((op1_info & MAY_BE_STRING) && (op2_info & MAY_BE_STRING)) { + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF) - MAY_BE_STRING)) { + if_op1_string = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_TRUE(if_op1_string); + } + if (op2_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF) - MAY_BE_STRING)) { + if_op2_string = jit_if_Z_TYPE(jit, op2_addr, IS_STRING); + ir_IF_TRUE(if_op2_string); + } + if (zend_jit_same_addr(op1_addr, res_addr)) { + ir_ref arg1 = jit_ZVAL_ADDR(jit, res_addr); + ir_ref arg2 = jit_ZVAL_ADDR(jit, op2_addr); + + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fast_assign_concat_helper), arg1, arg2); + /* concatenation with itself may reduce refcount */ + op2_info |= MAY_BE_RC1; + } else { + ir_ref arg1 = jit_ZVAL_ADDR(jit, res_addr); + ir_ref arg2 = jit_ZVAL_ADDR(jit, op1_addr); + ir_ref arg3 = jit_ZVAL_ADDR(jit, op2_addr); + + if (op1_type == IS_CV || op1_type == IS_CONST) { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fast_concat_helper), arg1, arg2, arg3); + } else { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fast_concat_tmp_helper), arg1, arg2, arg3); + } + } + /* concatenation with empty string may increase refcount */ + op2_info |= MAY_BE_RCN; + jit_FREE_OP(jit, op2_type, op2, op2_info, opline); + if (if_op1_string || if_op2_string) { + fast_path = ir_END(); + } + } + if ((op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF) - MAY_BE_STRING)) || + (op2_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF) - MAY_BE_STRING))) { + if ((op1_info & MAY_BE_STRING) && (op2_info & MAY_BE_STRING)) { + if (if_op1_string && if_op2_string) { + ir_IF_FALSE(if_op1_string); + ir_MERGE_WITH_EMPTY_FALSE(if_op2_string); + } else if (if_op1_string) { + ir_IF_FALSE_cold(if_op1_string); + } else if (if_op2_string) { + ir_IF_FALSE_cold(if_op2_string); + } + } + ir_ref arg1 = jit_ZVAL_ADDR(jit, res_addr); + ir_ref arg2 = jit_ZVAL_ADDR(jit, op1_addr); + ir_ref arg3 = jit_ZVAL_ADDR(jit, op2_addr); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(concat_function), arg1, arg2, arg3); + /* concatenation with empty string may increase refcount */ + op1_info |= MAY_BE_RCN; + op2_info |= MAY_BE_RCN; + jit_FREE_OP(jit, op1_type, op1, op1_info, NULL); + jit_FREE_OP(jit, op2_type, op2, op2_info, NULL); + if (may_throw) { + if (opline->opcode == ZEND_ASSIGN_DIM_OP && (opline->op2_type & (IS_VAR|IS_TMP_VAR))) { + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op2)); + } else if (Z_MODE(res_addr) == IS_MEM_ZVAL && Z_REG(res_addr) == ZREG_RX) { + zend_jit_check_exception_undef_result(jit, opline); + } else { + zend_jit_check_exception(jit); + } + } + if ((op1_info & MAY_BE_STRING) && (op2_info & MAY_BE_STRING)) { + ir_MERGE_WITH(fast_path); + } + } + return 1; +} + +static int zend_jit_concat(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint32_t op2_info, zend_jit_addr res_addr, int may_throw) +{ + zend_jit_addr op1_addr, op2_addr; + + ZEND_ASSERT(!(op1_info & MAY_BE_UNDEF) && !(op2_info & MAY_BE_UNDEF)); + ZEND_ASSERT((op1_info & MAY_BE_STRING) && (op2_info & MAY_BE_STRING)); + + op1_addr = OP1_ADDR(); + op2_addr = OP2_ADDR(); + + return zend_jit_concat_helper(jit, opline, opline->op1_type, opline->op1, op1_addr, op1_info, opline->op2_type, opline->op2, op2_addr, op2_info, res_addr, may_throw); +} + +static int zend_jit_assign_op(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint32_t op1_def_info, zend_ssa_range *op1_range, uint32_t op2_info, zend_ssa_range *op2_range, int may_overflow, int may_throw) +{ + int result = 1; + zend_jit_addr op1_addr, op2_addr; + ir_ref slow_path = IR_UNUSED; + + + ZEND_ASSERT(opline->op1_type == IS_CV && opline->result_type == IS_UNUSED); + ZEND_ASSERT(!(op1_info & MAY_BE_UNDEF) && !(op2_info & MAY_BE_UNDEF)); + + op1_addr = OP1_ADDR(); + op2_addr = OP2_ADDR(); + + if (op1_info & MAY_BE_REF) { + ir_ref ref, ref2, arg2, op1_noref_path; + ir_ref if_op1_ref = IR_UNUSED; + ir_ref if_op1_typed = IR_UNUSED; + binary_op_type binary_op = get_binary_op(opline->extended_value); + + ref = jit_ZVAL_ADDR(jit, op1_addr); + if_op1_ref = jit_if_Z_TYPE_ref(jit, ref, ir_CONST_U8(IS_REFERENCE)); + ir_IF_FALSE(if_op1_ref); + op1_noref_path = ir_END(); + ir_IF_TRUE(if_op1_ref); + ref2 = jit_Z_PTR_ref(jit, ref); + + if_op1_typed = jit_if_TYPED_REF(jit, ref2); + ir_IF_TRUE_cold(if_op1_typed); + + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + jit_SET_EX_OPLINE(jit, opline); + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) + && (op2_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref_tmp), + ref2, arg2, ir_CONST_FC_FUNC(binary_op)); + } else { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref), + ref2, arg2, ir_CONST_FC_FUNC(binary_op)); + } + zend_jit_check_exception(jit); + slow_path = ir_END(); + + ir_IF_FALSE(if_op1_typed); + ref2 = ir_ADD_OFFSET(ref2, offsetof(zend_reference, val)); + + ir_MERGE_WITH(op1_noref_path); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + switch (opline->extended_value) { + case ZEND_ADD: + case ZEND_SUB: + case ZEND_MUL: + case ZEND_DIV: + result = zend_jit_math_helper(jit, opline, opline->extended_value, opline->op1_type, opline->op1, op1_addr, op1_info, opline->op2_type, opline->op2, op2_addr, op2_info, opline->op1.var, op1_addr, op1_def_info, op1_info, may_overflow, may_throw); + break; + case ZEND_BW_OR: + case ZEND_BW_AND: + case ZEND_BW_XOR: + case ZEND_SL: + case ZEND_SR: + case ZEND_MOD: + result = zend_jit_long_math_helper(jit, opline, opline->extended_value, + opline->op1_type, opline->op1, op1_addr, op1_info, op1_range, + opline->op2_type, opline->op2, op2_addr, op2_info, op2_range, + opline->op1.var, op1_addr, op1_def_info, op1_info, may_throw); + break; + case ZEND_CONCAT: + result = zend_jit_concat_helper(jit, opline, opline->op1_type, opline->op1, op1_addr, op1_info, opline->op2_type, opline->op2, op2_addr, op2_info, op1_addr, may_throw); + break; + default: + ZEND_UNREACHABLE(); + } + + if (op1_info & MAY_BE_REF) { + ir_MERGE_WITH(slow_path); + } + + return result; +} + +static ir_ref jit_ZVAL_DEREF_ref(zend_jit_ctx *jit, ir_ref ref) +{ + ir_ref if_ref, ref2; + + if_ref = ir_IF(ir_EQ(jit_Z_TYPE_ref(jit, ref), ir_CONST_U8(IS_REFERENCE))); + ir_IF_TRUE(if_ref); + ref2 = ir_ADD_OFFSET(jit_Z_PTR_ref(jit, ref), offsetof(zend_reference, val)); + ir_MERGE_WITH_EMPTY_FALSE(if_ref); + return ir_PHI_2(IR_ADDR, ref2, ref); +} + +static zend_jit_addr jit_ZVAL_DEREF(zend_jit_ctx *jit, zend_jit_addr addr) +{ + ir_ref ref = jit_ZVAL_ADDR(jit, addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + return ZEND_ADDR_REF_ZVAL(ref); +} + +static ir_ref jit_ZVAL_INDIRECT_DEREF_ref(zend_jit_ctx *jit, ir_ref ref) +{ + ir_ref if_ref, ref2; + + if_ref = ir_IF(ir_EQ(jit_Z_TYPE_ref(jit, ref), ir_CONST_U8(IS_INDIRECT))); + ir_IF_TRUE(if_ref); + ref2 = jit_Z_PTR_ref(jit, ref); + ir_MERGE_WITH_EMPTY_FALSE(if_ref); + return ir_PHI_2(IR_ADDR, ref2, ref); +} + +static zend_jit_addr jit_ZVAL_INDIRECT_DEREF(zend_jit_ctx *jit, zend_jit_addr addr) +{ + ir_ref ref = jit_ZVAL_ADDR(jit, addr); + ref = jit_ZVAL_INDIRECT_DEREF_ref(jit, ref); + return ZEND_ADDR_REF_ZVAL(ref); +} + +static int zend_jit_simple_assign(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr var_addr, + uint32_t var_info, + uint32_t var_def_info, + uint8_t val_type, + zend_jit_addr val_addr, + uint32_t val_info, + zend_jit_addr res_addr, + bool check_exception) +{ + ir_ref end_inputs = IR_UNUSED; + + if (Z_MODE(val_addr) == IS_CONST_ZVAL) { + zval *zv = Z_ZV(val_addr); + + if (!res_addr) { + jit_ZVAL_COPY_CONST(jit, + var_addr, + var_info, var_def_info, + zv, 1); + } else { + jit_ZVAL_COPY_CONST(jit, + var_addr, + var_info, var_def_info, + zv, 1); + jit_ZVAL_COPY_CONST(jit, + res_addr, + -1, var_def_info, + zv, 1); + } + } else { + if (val_info & MAY_BE_UNDEF) { + ir_ref if_def, ret; + + if_def = jit_if_not_Z_TYPE(jit, val_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + + jit_set_Z_TYPE_INFO(jit, var_addr, IS_NULL); + if (res_addr) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + } + jit_SET_EX_OPLINE(jit, opline); + + ZEND_ASSERT(Z_MODE(val_addr) == IS_MEM_ZVAL); + // zend_error(E_WARNING, "Undefined variable $%s", ZSTR_VAL(CV_DEF_OF(EX_VAR_TO_NUM(opline->op1.var)))); + ret = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(Z_OFFSET(val_addr))); + + if (check_exception) { + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler_undef)); + } + + ir_END_list(end_inputs); + ir_IF_TRUE(if_def); + } + if (val_info & MAY_BE_REF) { + if (val_type == IS_CV) { + ir_ref ref = jit_ZVAL_ADDR(jit, val_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + val_addr = ZEND_ADDR_REF_ZVAL(ref); + } else { + ir_ref ref, type, if_ref, ref2, refcount, if_not_zero; + + ref = jit_ZVAL_ADDR(jit, val_addr); + type = jit_Z_TYPE_ref(jit, ref); + if_ref = ir_IF(ir_EQ(type, ir_CONST_U8(IS_REFERENCE))); + + ir_IF_TRUE_cold(if_ref); + ref = jit_Z_PTR_ref(jit, ref); + ref2 = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + if (!res_addr) { + jit_ZVAL_COPY(jit, + var_addr, + var_info, + ZEND_ADDR_REF_ZVAL(ref2), val_info, 1); + } else { + jit_ZVAL_COPY_2(jit, + res_addr, + var_addr, + var_info, + ZEND_ADDR_REF_ZVAL(ref2), val_info, 2); + } + + refcount = jit_GC_DELREF(jit, ref); + if_not_zero = ir_IF(refcount); + ir_IF_FALSE(if_not_zero); + // TODO: instead of dtor() call and ADDREF above, we may call efree() and move addref at "true" path ??? + // This is related to GH-10168 (keep this before GH-10168 is completely closed) + // jit_EFREE(jit, ref, sizeof(zend_reference), NULL, NULL); + jit_ZVAL_DTOR(jit, ref, val_info, opline); + ir_END_list(end_inputs); + ir_IF_TRUE(if_not_zero); + ir_END_list(end_inputs); + + ir_IF_FALSE(if_ref); + } + } + + if (!res_addr) { + jit_ZVAL_COPY(jit, + var_addr, + var_info, + val_addr, val_info, val_type == IS_CV); + } else { + jit_ZVAL_COPY_2(jit, + res_addr, + var_addr, + var_info, + val_addr, val_info, val_type == IS_CV ? 2 : 1); + } + } + + if (end_inputs) { + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + + return 1; +} + +static int zend_jit_assign_to_variable_call(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr __var_use_addr, + zend_jit_addr var_addr, + uint32_t __var_info, + uint32_t __var_def_info, + uint8_t val_type, + zend_jit_addr val_addr, + uint32_t val_info, + zend_jit_addr __res_addr, + bool __check_exception) +{ + jit_stub_id func; + ir_ref undef_path = IR_UNUSED; + + if (val_info & MAY_BE_UNDEF) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + jit_guard_not_Z_TYPE(jit, val_addr, IS_UNDEF, exit_addr); + } else { + ir_ref if_def; + + ZEND_ASSERT(Z_MODE(val_addr) == IS_MEM_ZVAL && Z_REG(val_addr) == ZREG_FP); + if_def = ir_IF(jit_Z_TYPE(jit, val_addr)); + ir_IF_FALSE_cold(if_def); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(Z_OFFSET(val_addr))); + + ir_CALL_2(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_assign_const, IR_CONST_FASTCALL_FUNC), + jit_ZVAL_ADDR(jit, var_addr), + jit_EG(uninitialized_zval)); + + undef_path = ir_END(); + ir_IF_TRUE(if_def); + } + } + + if (!(val_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF))) { + func = jit_stub_assign_tmp; + } else if (val_type == IS_CONST) { + func = jit_stub_assign_const; + } else if (val_type == IS_TMP_VAR) { + func = jit_stub_assign_tmp; + } else if (val_type == IS_VAR) { + if (!(val_info & MAY_BE_REF)) { + func = jit_stub_assign_tmp; + } else { + func = jit_stub_assign_var; + } + } else if (val_type == IS_CV) { + if (!(val_info & MAY_BE_REF)) { + func = jit_stub_assign_cv_noref; + } else { + func = jit_stub_assign_cv; + } + } else { + ZEND_UNREACHABLE(); + } + + if (opline) { + jit_SET_EX_OPLINE(jit, opline); + } + + ir_CALL_2(IR_VOID, jit_STUB_FUNC_ADDR(jit, func, IR_CONST_FASTCALL_FUNC), + jit_ZVAL_ADDR(jit, var_addr), + jit_ZVAL_ADDR(jit, val_addr)); + + if (undef_path) { + ir_MERGE_WITH(undef_path); + } + + return 1; +} + +static int zend_jit_assign_to_variable(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr var_use_addr, + zend_jit_addr var_addr, + uint32_t var_info, + uint32_t var_def_info, + uint8_t val_type, + zend_jit_addr val_addr, + uint32_t val_info, + zend_jit_addr res_addr, + zend_jit_addr ref_addr, + bool check_exception) +{ + ir_ref if_refcounted = IR_UNUSED; + ir_ref simple_inputs = IR_UNUSED; + bool done = 0; + zend_jit_addr real_res_addr = 0; + ir_refs *end_inputs; + ir_refs *res_inputs; + + ir_refs_init(end_inputs, 6); + ir_refs_init(res_inputs, 6); + + if (Z_MODE(val_addr) == IS_REG && jit->ra[Z_SSA_VAR(val_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, val_addr); + } + + if (Z_MODE(var_addr) == IS_REG) { + jit->delay_var = Z_SSA_VAR(var_addr); + jit->delay_refs = res_inputs; + if (Z_MODE(res_addr) == IS_REG) { + real_res_addr = res_addr; + res_addr = 0; + } + } else if (Z_MODE(res_addr) == IS_REG) { + jit->delay_var = Z_SSA_VAR(res_addr); + jit->delay_refs = res_inputs; + } + + if ((var_info & MAY_BE_REF) || ref_addr) { + ir_ref ref = 0, if_ref = 0, ref2, arg2, if_typed, non_ref_path; + uintptr_t func; + + if (!ref_addr) { + ref = jit_ZVAL_ADDR(jit, var_use_addr); + if_ref = jit_if_Z_TYPE_ref(jit, ref, ir_CONST_U8(IS_REFERENCE)); + ir_IF_TRUE(if_ref); + ref2 = jit_Z_PTR_ref(jit, ref); + } else { + ref2 = jit_ZVAL_ADDR(jit, ref_addr); + } + if_typed = jit_if_TYPED_REF(jit, ref2); + ir_IF_TRUE_cold(if_typed); + jit_SET_EX_OPLINE(jit, opline); + if (Z_MODE(val_addr) == IS_REG) { + ZEND_ASSERT(opline->opcode == ZEND_ASSIGN); + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + if (!zend_jit_spill_store(jit, val_addr, real_addr, val_info, 1)) { + return 0; + } + arg2 = jit_ZVAL_ADDR(jit, real_addr); + } else { + arg2 = jit_ZVAL_ADDR(jit, val_addr); + } + if (!res_addr) { + if (val_type == IS_CONST) { + func = (uintptr_t)zend_jit_assign_const_to_typed_ref; + } else if (val_type == IS_TMP_VAR) { + func = (uintptr_t)zend_jit_assign_tmp_to_typed_ref; + } else if (val_type == IS_VAR) { + func = (uintptr_t)zend_jit_assign_var_to_typed_ref; + } else if (val_type == IS_CV) { + func = (uintptr_t)zend_jit_assign_cv_to_typed_ref; + } else { + ZEND_UNREACHABLE(); + } + ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(func), ref2, arg2); + } else { + if (val_type == IS_CONST) { + func = (uintptr_t)zend_jit_assign_const_to_typed_ref2; + } else if (val_type == IS_TMP_VAR) { + func = (uintptr_t)zend_jit_assign_tmp_to_typed_ref2; + } else if (val_type == IS_VAR) { + func = (uintptr_t)zend_jit_assign_var_to_typed_ref2; + } else if (val_type == IS_CV) { + func = (uintptr_t)zend_jit_assign_cv_to_typed_ref2; + } else { + ZEND_UNREACHABLE(); + } + ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(func), ref2, arg2, jit_ZVAL_ADDR(jit, res_addr)); + } + if (check_exception) { + zend_jit_check_exception(jit); + } + ir_refs_add(end_inputs, ir_END()); + + if (!ref_addr) { + ir_IF_FALSE(if_ref); + non_ref_path = ir_END(); + ir_IF_FALSE(if_typed); + ref2 = ir_ADD_OFFSET(ref2, offsetof(zend_reference, val)); + ir_MERGE_WITH(non_ref_path); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + var_addr = var_use_addr = ZEND_ADDR_REF_ZVAL(ref); + } else { + ir_IF_FALSE(if_typed); + } + } + + if (var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + ir_ref ref, counter, if_not_zero; + + if (var_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + if_refcounted = jit_if_REFCOUNTED(jit, var_use_addr); + ir_IF_FALSE(if_refcounted); + ir_END_list(simple_inputs); + ir_IF_TRUE_cold(if_refcounted); + } else if (RC_MAY_BE_1(var_info)) { + done = 1; + } + ref = jit_Z_PTR(jit, var_use_addr); + if (RC_MAY_BE_1(var_info)) { + if (!zend_jit_simple_assign(jit, opline, var_addr, var_info, var_def_info, val_type, val_addr, val_info, res_addr, 0)) { + return 0; + } + counter = jit_GC_DELREF(jit, ref); + + if_not_zero = ir_IF(counter); + ir_IF_FALSE(if_not_zero); + jit_ZVAL_DTOR(jit, ref, var_info, opline); + if (check_exception) { + zend_jit_check_exception(jit); + } + ir_refs_add(end_inputs, ir_END()); + ir_IF_TRUE(if_not_zero); + if (RC_MAY_BE_N(var_info) && (var_info & (MAY_BE_ARRAY|MAY_BE_OBJECT)) != 0) { + ir_ref if_may_leak = jit_if_GC_MAY_NOT_LEAK(jit, ref); + ir_IF_FALSE(if_may_leak); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(gc_possible_root), ref); + + if (Z_MODE(var_addr) == IS_REG || Z_MODE(res_addr) == IS_REG) { + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(res_inputs->count > 0); + ir_refs_add(res_inputs, res_inputs->refs[res_inputs->count - 1]); + } + if (check_exception && (val_info & MAY_BE_UNDEF)) { + zend_jit_check_exception(jit); + } + ir_refs_add(end_inputs, ir_END()); + ir_IF_TRUE(if_may_leak); + } + if (Z_MODE(var_addr) == IS_REG || Z_MODE(res_addr) == IS_REG) { + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(res_inputs->count > 0); + ir_refs_add(res_inputs, res_inputs->refs[res_inputs->count - 1]); + } + if (check_exception && (val_info & MAY_BE_UNDEF)) { + zend_jit_check_exception(jit); + } + ir_refs_add(end_inputs, ir_END()); + } else /* if (RC_MAY_BE_N(var_info)) */ { + jit_GC_DELREF(jit, ref); + if (var_info & (MAY_BE_ARRAY|MAY_BE_OBJECT)) { + ir_ref if_may_leak = jit_if_GC_MAY_NOT_LEAK(jit, ref); + ir_IF_FALSE(if_may_leak); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(gc_possible_root), ref); + ir_END_list(simple_inputs); + ir_IF_TRUE(if_may_leak); + } + ir_END_list(simple_inputs); + } + } + + if (simple_inputs) { + ir_MERGE_list(simple_inputs); + } + + if (!done) { + if (!zend_jit_simple_assign(jit, opline, var_addr, var_info, var_def_info, val_type, val_addr, val_info, res_addr, check_exception)) { + return 0; + } + if (end_inputs->count) { + ir_refs_add(end_inputs, ir_END()); + } + } + + if (end_inputs->count) { + ir_MERGE_N(end_inputs->count, end_inputs->refs); + } + + if (Z_MODE(var_addr) == IS_REG || Z_MODE(res_addr) == IS_REG) { + ir_ref phi; + + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(end_inputs->count == res_inputs->count || (end_inputs->count == 0 && res_inputs->count == 1)); + jit->delay_var = -1; + jit->delay_refs = NULL; + if (res_inputs->count == 1) { + phi = res_inputs->refs[0]; + } else { + phi = ir_PHI_N((var_def_info & MAY_BE_LONG & MAY_BE_LONG) ? IR_LONG : IR_DOUBLE, + res_inputs->count, res_inputs->refs); + } + if (Z_MODE(var_addr) == IS_REG) { + if ((var_info & (MAY_BE_REF|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || ref_addr) { + phi = ir_emit2(&jit->ctx, IR_OPT(IR_COPY, jit->ctx.ir_base[phi].type), phi, 1); + } + zend_jit_def_reg(jit, var_addr, phi); + if (real_res_addr) { + if (var_def_info & MAY_BE_LONG) { + jit_set_Z_LVAL(jit, real_res_addr, jit_Z_LVAL(jit, var_addr)); + } else { + jit_set_Z_DVAL(jit, real_res_addr, jit_Z_DVAL(jit, var_addr)); + } + } + } else { + zend_jit_def_reg(jit, res_addr, phi); + } + } + + return 1; +} + +static int zend_jit_qm_assign(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr op1_def_addr, uint32_t res_use_info, uint32_t res_info, zend_jit_addr res_addr) +{ + if (op1_addr != op1_def_addr) { + if (!zend_jit_update_regs(jit, opline->op1.var, op1_addr, op1_def_addr, op1_info)) { + return 0; + } + if (Z_MODE(op1_def_addr) == IS_REG && Z_MODE(op1_addr) != IS_REG) { + op1_addr = op1_def_addr; + } + } + + if (!zend_jit_simple_assign(jit, opline, res_addr, res_use_info, res_info, opline->op1_type, op1_addr, op1_info, 0, 1)) { + return 0; + } + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + return 1; +} + +static int zend_jit_assign(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_jit_addr op1_use_addr, + uint32_t op1_def_info, + zend_jit_addr op1_addr, + uint32_t op2_info, + zend_jit_addr op2_addr, + zend_jit_addr op2_def_addr, + uint32_t res_info, + zend_jit_addr res_addr, + zend_jit_addr ref_addr, + int may_throw) +{ + ZEND_ASSERT(opline->op1_type == IS_CV); + + if (op2_addr != op2_def_addr) { + if (!zend_jit_update_regs(jit, opline->op2.var, op2_addr, op2_def_addr, op2_info)) { + return 0; + } + if (Z_MODE(op2_def_addr) == IS_REG && Z_MODE(op2_addr) != IS_REG) { + op2_addr = op2_def_addr; + } + } + + if (Z_MODE(op1_addr) != IS_REG + && Z_MODE(op1_use_addr) == IS_REG + && !Z_LOAD(op1_use_addr) + && !Z_STORE(op1_use_addr)) { + /* Force type update */ + op1_info |= MAY_BE_UNDEF; + } + if (!zend_jit_assign_to_variable(jit, opline, op1_use_addr, op1_addr, op1_info, op1_def_info, + opline->op2_type, op2_addr, op2_info, res_addr, ref_addr, may_throw)) { + return 0; + } + if (Z_MODE(op1_addr) == IS_REG) { + if (Z_STORE(op1_addr)) { + if (!zend_jit_store_var_if_necessary_ex(jit, opline->op1.var, op1_addr, op1_def_info, op1_use_addr, op1_info)) { + return 0; + } + } else if ((op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) + && Z_MODE(op1_use_addr) == IS_MEM_ZVAL + && Z_REG(op1_use_addr) == ZREG_FP + && EX_VAR_TO_NUM(Z_OFFSET(op1_use_addr)) < jit->current_op_array->last_var) { + /* We have to update type of CV because it may be captured by exception backtrace or released on RETURN */ + if ((op1_def_info & MAY_BE_ANY) == MAY_BE_LONG) { + jit_set_Z_TYPE_INFO(jit, op1_use_addr, IS_LONG); + } else if ((op1_def_info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, op1_use_addr, IS_DOUBLE); + } else { + ZEND_UNREACHABLE(); + } + } + } + if (opline->result_type != IS_UNUSED) { + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + } + + return 1; +} + +static ir_op zend_jit_cmp_op(const zend_op *opline) +{ + ir_op op; + + switch (opline->opcode) { + case ZEND_IS_EQUAL: + case ZEND_IS_IDENTICAL: + case ZEND_CASE: + case ZEND_CASE_STRICT: + op = IR_EQ; + break; + case ZEND_IS_NOT_EQUAL: + case ZEND_IS_NOT_IDENTICAL: + op = IR_NE; + break; + case ZEND_IS_SMALLER: + op = IR_LT; + break; + case ZEND_IS_SMALLER_OR_EQUAL: + op = IR_LE; + break; + default: + ZEND_UNREACHABLE(); + } + return op; +} + +static ir_ref zend_jit_cmp_long_long(zend_jit_ctx *jit, + const zend_op *opline, + zend_ssa_range *op1_range, + zend_jit_addr op1_addr, + zend_ssa_range *op2_range, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint8_t smart_branch_opcode, + uint32_t target_label, + uint32_t target_label2, + const void *exit_addr, + bool skip_comparison) +{ + ir_ref ref; + bool result; + + if (zend_jit_is_constant_cmp_long_long(opline, op1_range, op1_addr, op2_range, op2_addr, &result)) { + if (!smart_branch_opcode || + smart_branch_opcode == ZEND_JMPZ_EX || + smart_branch_opcode == ZEND_JMPNZ_EX) { + jit_set_Z_TYPE_INFO(jit, res_addr, result ? IS_TRUE : IS_FALSE); + } + if (smart_branch_opcode && !exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || + smart_branch_opcode == ZEND_JMPZ_EX) { + return jit_IF_ex(jit, IR_FALSE, result ? target_label : target_label2); + } else if (smart_branch_opcode == ZEND_JMPNZ || + smart_branch_opcode == ZEND_JMPNZ_EX) { + return jit_IF_ex(jit, IR_TRUE, result ? target_label : target_label2); + } else { + ZEND_UNREACHABLE(); + } + } + if (opline->opcode != ZEND_IS_IDENTICAL + && opline->opcode != ZEND_IS_NOT_IDENTICAL + && opline->opcode != ZEND_CASE_STRICT) { + return ir_END(); + } else { + return IR_NULL; /* success */ + } + } + + ref = ir_CMP_OP(zend_jit_cmp_op(opline), jit_Z_LVAL(jit, op1_addr), jit_Z_LVAL(jit, op2_addr)); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + if (opline->opcode != ZEND_IS_NOT_IDENTICAL) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else { + if (opline->opcode != ZEND_IS_NOT_IDENTICAL) { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + + if (opline->opcode != ZEND_IS_IDENTICAL + && opline->opcode != ZEND_IS_NOT_IDENTICAL + && opline->opcode != ZEND_CASE_STRICT) { + return ir_END(); + } else { + return IR_NULL; /* success */ + } +} + +static ir_ref zend_jit_cmp_long_double(zend_jit_ctx *jit, const zend_op *opline, zend_jit_addr op1_addr, zend_jit_addr op2_addr, zend_jit_addr res_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + ir_ref ref = ir_CMP_OP(zend_jit_cmp_op(opline), ir_INT2D(jit_Z_LVAL(jit, op1_addr)), jit_Z_DVAL(jit, op2_addr)); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + return ir_END(); +} + +static ir_ref zend_jit_cmp_double_long(zend_jit_ctx *jit, const zend_op *opline, zend_jit_addr op1_addr, zend_jit_addr op2_addr, zend_jit_addr res_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + ir_ref ref = ir_CMP_OP(zend_jit_cmp_op(opline), jit_Z_DVAL(jit, op1_addr), ir_INT2D(jit_Z_LVAL(jit, op2_addr))); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + return ir_END(); +} + +static ir_ref zend_jit_cmp_double_double(zend_jit_ctx *jit, const zend_op *opline, zend_jit_addr op1_addr, zend_jit_addr op2_addr, zend_jit_addr res_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + ir_ref ref = ir_CMP_OP(zend_jit_cmp_op(opline), jit_Z_DVAL(jit, op1_addr), jit_Z_DVAL(jit, op2_addr)); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + if (opline->opcode != ZEND_IS_NOT_IDENTICAL) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else { + if (opline->opcode != ZEND_IS_NOT_IDENTICAL) { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + if (opline->opcode != ZEND_IS_IDENTICAL + && opline->opcode != ZEND_IS_NOT_IDENTICAL + && opline->opcode != ZEND_CASE_STRICT) { + return ir_END(); + } else { + return IR_NULL; /* success */ + } +} + +static ir_ref zend_jit_cmp_slow(zend_jit_ctx *jit, ir_ref ref, const zend_op *opline, zend_jit_addr res_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + ref = ir_CMP_OP(zend_jit_cmp_op(opline), ref, ir_CONST_I32(0)); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + + return ir_END(); +} + +static int zend_jit_cmp(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_ssa_range *op1_range, + zend_jit_addr op1_addr, + uint32_t op2_info, + zend_ssa_range *op2_range, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + int may_throw, + uint8_t smart_branch_opcode, + uint32_t target_label, + uint32_t target_label2, + const void *exit_addr, + bool skip_comparison) +{ + ir_ref ref = IR_UNUSED; + ir_ref if_op1_long = IR_UNUSED; + ir_ref if_op1_double = IR_UNUSED; + ir_ref if_op2_double = IR_UNUSED; + ir_ref if_op1_long_op2_long = IR_UNUSED; + ir_ref if_op1_long_op2_double = IR_UNUSED; + ir_ref if_op1_double_op2_double = IR_UNUSED; + ir_ref if_op1_double_op2_long = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + bool has_slow = + (op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) && + (op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) && + ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) || + (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE)))); + ir_refs *end_inputs; + + ir_refs_init(end_inputs, 8); + + if (Z_MODE(op1_addr) == IS_REG) { + if (!has_concrete_type(op2_info & MAY_BE_ANY) && jit->ra[Z_SSA_VAR(op1_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, op1_addr); + } + } else if (Z_MODE(op2_addr) == IS_REG) { + if (!has_concrete_type(op1_info & MAY_BE_ANY) && jit->ra[Z_SSA_VAR(op2_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, op2_addr); + } + } + + if ((op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG)) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + if_op1_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_op1_long); + } + if (!same_ops && (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG))) { + if_op1_long_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_long_op2_long); + if (op2_info & MAY_BE_DOUBLE) { + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_long_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_long_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_long_op2_double); + } + ref = zend_jit_cmp_long_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } else { + ir_END_list(slow_inputs); + } + ir_IF_TRUE(if_op1_long_op2_long); + } + ref = zend_jit_cmp_long_long(jit, opline, op1_range, op1_addr, op2_range, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr, skip_comparison); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + + if (if_op1_long) { + ir_IF_FALSE_cold(if_op1_long); + } + if (op1_info & MAY_BE_DOUBLE) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double); + } + if (op2_info & MAY_BE_DOUBLE) { + if (!same_ops && (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + ref = zend_jit_cmp_double_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops) { + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_double_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_double_op2_long); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double_op2_long); + } + ref = zend_jit_cmp_double_long(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } else if (if_op1_long) { + ir_END_list(slow_inputs); + } + } else if ((op1_info & MAY_BE_DOUBLE) && + !(op1_info & MAY_BE_LONG) && + (op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE)) { + if_op1_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double); + } + if (op2_info & MAY_BE_DOUBLE) { + if (!same_ops && (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + ref = zend_jit_cmp_double_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops && (op2_info & MAY_BE_LONG)) { + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_DOUBLE|MAY_BE_LONG))) { + if_op1_double_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_double_op2_long); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double_op2_long); + } + ref = zend_jit_cmp_double_long(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } else if ((op2_info & MAY_BE_DOUBLE) && + !(op2_info & MAY_BE_LONG) && + (op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE)) { + if_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op2_double); + } + if (op1_info & MAY_BE_DOUBLE) { + if (!same_ops && (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + ref = zend_jit_cmp_double_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops && (op1_info & MAY_BE_LONG)) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_DOUBLE|MAY_BE_LONG))) { + if_op1_long_op2_double = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_long_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_long_op2_double); + } + ref = zend_jit_cmp_long_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } + + if (has_slow || + (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) || + (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE)))) { + ir_ref op1, op2, ref; + + if (slow_inputs) { + ir_MERGE_list(slow_inputs); + } + jit_SET_EX_OPLINE(jit, opline); + + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + op2_addr = real_addr; + } + + op1 = jit_ZVAL_ADDR(jit, op1_addr); + if (opline->op1_type == IS_CV && (op1_info & MAY_BE_UNDEF)) { + op1 = zend_jit_zval_check_undef(jit, op1, opline->op1.var, NULL, 0); + } + op2 = jit_ZVAL_ADDR(jit, op2_addr); + if (opline->op2_type == IS_CV && (op2_info & MAY_BE_UNDEF)) { + op2 = zend_jit_zval_check_undef(jit, op2, opline->op2.var, NULL, 0); + } + ref = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zend_compare), op1, op2); + if (opline->opcode != ZEND_CASE) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, NULL); + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, NULL); + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + + ref = zend_jit_cmp_slow(jit, ref, opline, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } + + if (end_inputs->count) { + uint32_t n = end_inputs->count; + + if (smart_branch_opcode && !exit_addr) { + zend_basic_block *bb; + ir_ref ref; + uint32_t label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label2 : target_label; + uint32_t label2 = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label : target_label2; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + ZEND_ASSERT(bb->successors_count == 2); + + if (UNEXPECTED(bb->successors[0] == bb->successors[1])) { + ir_ref merge_inputs = IR_UNUSED; + + while (n) { + n--; + ir_IF_TRUE(end_inputs->refs[n]); + ir_END_list(merge_inputs); + ir_IF_FALSE(end_inputs->refs[n]); + ir_END_list(merge_inputs); + } + ir_MERGE_list(merge_inputs); + _zend_jit_add_predecessor_ref(jit, label, jit->b, ir_END()); + } else if (n == 1) { + ref = end_inputs->refs[0]; + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ref); + } else { + ir_ref true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + + while (n) { + n--; + ir_IF_TRUE(end_inputs->refs[n]); + ir_END_list(true_inputs); + ir_IF_FALSE(end_inputs->refs[n]); + ir_END_list(false_inputs); + } + ir_MERGE_list(true_inputs); + _zend_jit_add_predecessor_ref(jit, label, jit->b, ir_END()); + ir_MERGE_list(false_inputs); + _zend_jit_add_predecessor_ref(jit, label2, jit->b, ir_END()); + } + jit->b = -1; + } else { + ir_MERGE_N(n, end_inputs->refs); + } + } + + return 1; +} + +static int zend_jit_identical(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_ssa_range *op1_range, + zend_jit_addr op1_addr, + uint32_t op2_info, + zend_ssa_range *op2_range, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + int may_throw, + uint8_t smart_branch_opcode, + uint32_t target_label, + uint32_t target_label2, + const void *exit_addr, + bool skip_comparison) +{ + bool always_false = 0, always_true = 0; + ir_ref ref = IR_UNUSED; + + if (opline->op1_type == IS_CV && (op1_info & MAY_BE_UNDEF)) { + ir_ref op1 = jit_ZVAL_ADDR(jit, op1_addr); + op1 = zend_jit_zval_check_undef(jit, op1, opline->op1.var, NULL, 0); + op1_info |= MAY_BE_NULL; + op1_addr = ZEND_ADDR_REF_ZVAL(op1); + } + if (opline->op2_type == IS_CV && (op2_info & MAY_BE_UNDEF)) { + ir_ref op2 = jit_ZVAL_ADDR(jit, op2_addr); + op2 = zend_jit_zval_check_undef(jit, op2, opline->op2.var, NULL, 0); + op2_info |= MAY_BE_NULL; + op2_addr = ZEND_ADDR_REF_ZVAL(op2); + } + + if ((op1_info & op2_info & MAY_BE_ANY) == 0) { + always_false = 1; + } else if (has_concrete_type(op1_info) + && has_concrete_type(op2_info) + && concrete_type(op1_info) == concrete_type(op2_info) + && concrete_type(op1_info) <= IS_TRUE) { + always_true = 1; + } else if (Z_MODE(op1_addr) == IS_CONST_ZVAL && Z_MODE(op2_addr) == IS_CONST_ZVAL) { + if (zend_is_identical(Z_ZV(op1_addr), Z_ZV(op2_addr))) { + always_true = 1; + } else { + always_false = 1; + } + } + + if (always_true) { + if (opline->opcode != ZEND_CASE_STRICT) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + if (!smart_branch_opcode + || smart_branch_opcode == ZEND_JMPZ_EX + || smart_branch_opcode == ZEND_JMPNZ_EX) { + jit_set_Z_TYPE_INFO(jit, res_addr, opline->opcode != ZEND_IS_NOT_IDENTICAL ? IS_TRUE : IS_FALSE); + } + if (may_throw) { + zend_jit_check_exception(jit); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ || smart_branch_opcode == ZEND_JMPNZ_EX) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + uint32_t label; + + if (opline->opcode == ZEND_IS_NOT_IDENTICAL) { + label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label : target_label2; + } else { + label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label2 : target_label; + } + _zend_jit_add_predecessor_ref(jit, label, jit->b, ir_END()); + jit->b = -1; + } + return 1; + } else if (always_false) { + if (opline->opcode != ZEND_CASE_STRICT) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + if (!smart_branch_opcode + || smart_branch_opcode == ZEND_JMPZ_EX + || smart_branch_opcode == ZEND_JMPNZ_EX) { + jit_set_Z_TYPE_INFO(jit, res_addr, opline->opcode != ZEND_IS_NOT_IDENTICAL ? IS_FALSE : IS_TRUE); + } + if (may_throw) { + zend_jit_check_exception(jit); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + uint32_t label; + + if (opline->opcode == ZEND_IS_NOT_IDENTICAL) { + label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label2 : target_label; + } else { + label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label : target_label2; + } + _zend_jit_add_predecessor_ref(jit, label, jit->b, ir_END()); + jit->b = -1; + } + return 1; + } + + if ((opline->op1_type & (IS_CV|IS_VAR)) && (op1_info & MAY_BE_REF)) { + ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + if ((opline->op2_type & (IS_CV|IS_VAR)) && (op2_info & MAY_BE_REF)) { + ref = jit_ZVAL_ADDR(jit, op2_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op2_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if ((op1_info & (MAY_BE_REF|MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_LONG && + (op2_info & (MAY_BE_REF|MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_LONG) { + ref = zend_jit_cmp_long_long(jit, opline, op1_range, op1_addr, op2_range, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr, skip_comparison); + if (!ref) { + return 0; + } + } else if ((op1_info & (MAY_BE_REF|MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_DOUBLE && + (op2_info & (MAY_BE_REF|MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_DOUBLE) { + ref = zend_jit_cmp_double_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + } else { + if (opline->op1_type != IS_CONST) { + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + } + if (opline->op2_type != IS_CONST) { + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + } + } + + if (Z_MODE(op1_addr) == IS_CONST_ZVAL && Z_TYPE_P(Z_ZV(op1_addr)) <= IS_TRUE) { + zval *val = Z_ZV(op1_addr); + + ref = ir_EQ(jit_Z_TYPE(jit, op2_addr), ir_CONST_U8(Z_TYPE_P(val))); + } else if (Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_TYPE_P(Z_ZV(op2_addr)) <= IS_TRUE) { + zval *val = Z_ZV(op2_addr); + + ref = ir_EQ(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(Z_TYPE_P(val))); + } else { + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + op2_addr = real_addr; + } + + ref = ir_CALL_2(IR_BOOL, ir_CONST_FC_FUNC(zend_is_identical), + jit_ZVAL_ADDR(jit, op1_addr), + jit_ZVAL_ADDR(jit, op2_addr)); + } + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + if (opline->opcode == ZEND_IS_NOT_IDENTICAL) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_SUB_U32(ir_CONST_U32(IS_TRUE), ir_ZEXT_U32(ref))); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + } + if (opline->opcode != ZEND_CASE_STRICT) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, NULL); + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, NULL); + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + if (opline->opcode == ZEND_IS_NOT_IDENTICAL) { + /* swap labels */ + uint32_t tmp = target_label; + target_label = target_label2; + target_label2 = tmp; + } + ref = jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + } + + if (smart_branch_opcode && !exit_addr) { + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + ZEND_ASSERT(bb->successors_count == 2); + + if (bb->successors_count == 2 && bb->successors[0] == bb->successors[1]) { + ir_IF_TRUE(ref); + ir_MERGE_WITH_EMPTY_FALSE(ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ir_END()); + } else { + ZEND_ASSERT(bb->successors_count == 2); + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ref); + } + jit->b = -1; + } + + return 1; +} + +static int zend_jit_bool_jmpznz(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr res_addr, uint32_t target_label, uint32_t target_label2, int may_throw, uint8_t branch_opcode, const void *exit_addr) +{ + uint32_t true_label = -1; + uint32_t false_label = -1; + bool set_bool = 0; + bool set_bool_not = 0; + bool always_true = 0, always_false = 0; + ir_ref ref, end_inputs = IR_UNUSED, true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + ir_type type = IR_UNUSED; + + if (branch_opcode == ZEND_BOOL) { + set_bool = 1; + } else if (branch_opcode == ZEND_BOOL_NOT) { + set_bool = 1; + set_bool_not = 1; + } else if (branch_opcode == ZEND_JMPZ) { + true_label = target_label2; + false_label = target_label; + } else if (branch_opcode == ZEND_JMPNZ) { + true_label = target_label; + false_label = target_label2; + } else if (branch_opcode == ZEND_JMPZ_EX) { + set_bool = 1; + true_label = target_label2; + false_label = target_label; + } else if (branch_opcode == ZEND_JMPNZ_EX) { + set_bool = 1; + true_label = target_label; + false_label = target_label2; + } else { + ZEND_UNREACHABLE(); + } + + if (opline->op1_type == IS_CV && (op1_info & MAY_BE_REF)) { + ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (Z_MODE(op1_addr) == IS_CONST_ZVAL) { + if (zend_is_true(Z_ZV(op1_addr))) { + always_true = 1; + } else { + always_false = 1; + } + } else if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE)) { + if (!(op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)-MAY_BE_TRUE))) { + always_true = 1; + } else if (!(op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_FALSE)))) { + if (opline->op1_type == IS_CV && (op1_info & MAY_BE_UNDEF)) { + ref = jit_ZVAL_ADDR(jit, op1_addr); + zend_jit_zval_check_undef(jit, ref, opline->op1.var, opline, 0); + } + always_false = 1; + } + } + + if (always_true) { + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + if (may_throw) { + zend_jit_check_exception(jit); + } + if (true_label != (uint32_t)-1) { + ZEND_ASSERT(exit_addr == NULL); + _zend_jit_add_predecessor_ref(jit, true_label, jit->b, ir_END()); + jit->b = -1; + } + return 1; + } else if (always_false) { + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + if (may_throw) { + zend_jit_check_exception(jit); + } + if (false_label != (uint32_t)-1) { + ZEND_ASSERT(exit_addr == NULL); + _zend_jit_add_predecessor_ref(jit, false_label, jit->b, ir_END()); + jit->b = -1; + } + return 1; + } + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE)) { + type = jit_Z_TYPE(jit, op1_addr); + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE)) { + ir_ref if_type = ir_IF(ir_LT(type, ir_CONST_U8(IS_TRUE))); + + ir_IF_TRUE_cold(if_type); + + if (op1_info & MAY_BE_UNDEF) { + zend_jit_type_check_undef(jit, + type, + opline->op1.var, + opline, 1, 0); + } + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + if (exit_addr) { + if (branch_opcode == ZEND_JMPNZ || branch_opcode == ZEND_JMPNZ_EX) { + ir_END_list(end_inputs); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else if (false_label != (uint32_t)-1) { + ir_END_list(false_inputs); + } else { + ir_END_list(end_inputs); + } + ir_IF_FALSE(if_type); + } + + if (op1_info & MAY_BE_TRUE) { + ir_ref if_type = IR_UNUSED; + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE))) { + if_type = ir_IF(ir_EQ(type, ir_CONST_U8(IS_TRUE))); + + ir_IF_TRUE(if_type); + } + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + if (exit_addr) { + if (branch_opcode == ZEND_JMPZ || branch_opcode == ZEND_JMPZ_EX) { + ir_END_list(end_inputs); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else if (true_label != (uint32_t)-1) { + ir_END_list(true_inputs); + } else { + ir_END_list(end_inputs); + } + if (if_type) { + ir_IF_FALSE(if_type); + } + } + } + + if (op1_info & MAY_BE_LONG) { + ir_ref if_long = IR_UNUSED; + ir_ref ref; + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG))) { + if (!type) { + type = jit_Z_TYPE(jit, op1_addr); + } + if_long = ir_IF(ir_EQ(type, ir_CONST_U8(IS_LONG))); + ir_IF_TRUE(if_long); + } + ref = jit_Z_LVAL(jit, op1_addr); + if (branch_opcode == ZEND_BOOL || branch_opcode == ZEND_BOOL_NOT) { + ref = ir_NE(ref, ir_CONST_LONG(0)); + if (set_bool_not) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_SUB_U32(ir_CONST_U32(IS_TRUE), ir_ZEXT_U32(ref))); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + ir_END_list(end_inputs); + } else if (exit_addr) { + if (set_bool) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ir_NE(ref, ir_CONST_LONG(0))), ir_CONST_U32(IS_FALSE))); + } + if (branch_opcode == ZEND_JMPZ || branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + ir_END_list(end_inputs); + } else { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + ir_END_list(false_inputs); + } + if (if_long) { + ir_IF_FALSE(if_long); + } + } + + if (op1_info & MAY_BE_DOUBLE) { + ir_ref if_double = IR_UNUSED; + ir_ref ref; + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE))) { + if (!type) { + type = jit_Z_TYPE(jit, op1_addr); + } + if_double = ir_IF(ir_EQ(type, ir_CONST_U8(IS_DOUBLE))); + ir_IF_TRUE(if_double); + } + ref = ir_NE(jit_Z_DVAL(jit, op1_addr), ir_CONST_DOUBLE(0.0)); + if (branch_opcode == ZEND_BOOL || branch_opcode == ZEND_BOOL_NOT) { + if (set_bool_not) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_SUB_U32(ir_CONST_U32(IS_TRUE), ir_ZEXT_U32(ref))); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + ir_END_list(end_inputs); + } else if (exit_addr) { + if (set_bool) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (branch_opcode == ZEND_JMPZ || branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + ir_END_list(end_inputs); + } else { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + ir_END_list(false_inputs); + } + if (if_double) { + ir_IF_FALSE(if_double); + } + } + + if (op1_info & (MAY_BE_ANY - (MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE))) { + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_is_true), jit_ZVAL_ADDR(jit, op1_addr)); + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, NULL); + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + if (branch_opcode == ZEND_BOOL || branch_opcode == ZEND_BOOL_NOT) { + if (set_bool_not) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_SUB_U32(ir_CONST_U32(IS_TRUE), ir_ZEXT_U32(ref))); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (end_inputs) { + ir_END_list(end_inputs); + } + } else if (exit_addr) { + if (set_bool) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (branch_opcode == ZEND_JMPZ || branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + if (end_inputs) { + ir_END_list(end_inputs); + } + } else { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + ir_END_list(false_inputs); + } + } + + if (branch_opcode == ZEND_BOOL || branch_opcode == ZEND_BOOL_NOT || exit_addr) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + } else { + _zend_jit_merge_smart_branch_inputs(jit, true_label, false_label, true_inputs, false_inputs); + } + + return 1; +} + +static int zend_jit_defined(zend_jit_ctx *jit, const zend_op *opline, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + uint32_t defined_label = (uint32_t)-1; + uint32_t undefined_label = (uint32_t)-1; + zval *zv = RT_CONSTANT(opline, opline->op1); + zend_jit_addr res_addr; + ir_ref ref, ref2, if_set, if_zero, if_set2; + ir_ref end_inputs = IR_UNUSED, true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + + if (smart_branch_opcode && !exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + defined_label = target_label2; + undefined_label = target_label; + } else if (smart_branch_opcode == ZEND_JMPNZ) { + defined_label = target_label; + undefined_label = target_label2; + } else { + ZEND_UNREACHABLE(); + } + } else { + res_addr = RES_ADDR(); + } + + // if (CACHED_PTR(opline->extended_value)) { + ref = ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->extended_value)); + + if_set = ir_IF(ref); + + ir_IF_FALSE_cold(if_set); + if_zero = ir_END(); + + ir_IF_TRUE(if_set); + if_set2 = ir_IF(ir_AND_A(ref, ir_CONST_ADDR(CACHE_SPECIAL))); + ir_IF_FALSE(if_set2); + + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(true_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + + ir_IF_TRUE_cold(if_set2); + + ref2 = jit_EG(zend_constants); + ref = ir_SHR_A(ref, ir_CONST_ADDR(1)); + if (sizeof(void*) == 8) { + ref = ir_TRUNC_U32(ref); + } + ref2 = ir_EQ(ref, ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(ref2), offsetof(HashTable, nNumOfElements)))); + ref2 = ir_IF(ref2); + ir_IF_TRUE(ref2); + + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + ir_END_list(end_inputs); + } + + ir_IF_FALSE(ref2); + ir_MERGE_2(if_zero, ir_END()); + + jit_SET_EX_OPLINE(jit, opline); + ref2 = ir_NE(ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_check_constant), ir_CONST_ADDR(zv)), IR_NULL); + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + ir_GUARD(ref2, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref2, ir_CONST_ADDR(exit_addr)); + } + ir_END_list(end_inputs); + } else if (smart_branch_opcode) { + ref2 = ir_IF(ref2); + ir_IF_TRUE(ref2); + ir_END_list(true_inputs); + ir_IF_FALSE(ref2); + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref2), ir_CONST_U32(IS_FALSE))); + ir_END_list(end_inputs); + } + + if (!smart_branch_opcode || exit_addr) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + } else { + _zend_jit_merge_smart_branch_inputs(jit, defined_label, undefined_label, true_inputs, false_inputs); + } + + return 1; +} + +static int zend_jit_escape_if_undef(zend_jit_ctx *jit, int var, uint32_t flags, const zend_op *opline, int8_t reg) +{ + zend_jit_addr reg_addr = ZEND_ADDR_REF_ZVAL(zend_jit_deopt_rload(jit, IR_ADDR, reg)); + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, reg_addr)); + + ir_IF_FALSE_cold(if_def); + + if (flags & ZEND_JIT_EXIT_RESTORE_CALL) { + if (!zend_jit_save_call_chain(jit, -1)) { + return 0; + } + } + + if ((opline-1)->opcode != ZEND_FETCH_CONSTANT + && (opline-1)->opcode != ZEND_FETCH_LIST_R + && ((opline-1)->op1_type & (IS_VAR|IS_TMP_VAR)) + && !(flags & ZEND_JIT_EXIT_FREE_OP1)) { + zend_jit_addr val_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, (opline-1)->op1.var); + + zend_jit_zval_try_addref(jit, val_addr); + } + + jit_LOAD_IP_ADDR(jit, opline - 1); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_trace_escape)); + + ir_IF_TRUE(if_def); + + return 1; +} + +static int zend_jit_restore_zval(zend_jit_ctx *jit, int var, int8_t reg) +{ + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + zend_jit_addr reg_addr = ZEND_ADDR_REF_ZVAL(zend_jit_deopt_rload(jit, IR_ADDR, reg)); + + // JIT: ZVAL_COPY_OR_DUP(EX_VAR(opline->result.var), &c->value); (no dup) + jit_ZVAL_COPY(jit, var_addr, MAY_BE_ANY, reg_addr, MAY_BE_ANY, 1); + return 1; +} + +static zend_jit_addr zend_jit_guard_fetch_result_type(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr val_addr, + uint8_t type, + bool deref, + uint32_t flags, + bool op1_avoid_refcounting) +{ + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + int32_t exit_point; + const void *res_exit_addr = NULL; + ir_ref end1 = IR_UNUSED, ref1 = IR_UNUSED; + ir_ref ref = jit_ZVAL_ADDR(jit, val_addr); + uint32_t old_op1_info = 0; + uint32_t old_info; + ir_ref old_ref; + + + if (opline->op1_type & (IS_VAR|IS_TMP_VAR|IS_CV)) { + old_op1_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var)); + if (op1_avoid_refcounting + || ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && STACK_FLAGS(stack, EX_VAR_TO_NUM(opline->op1.var)) & (ZREG_ZVAL_ADDREF|ZREG_THIS))) { + SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); + } + } + old_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + old_ref = STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var)); + CLEAR_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_UNKNOWN, 1); + + if (deref) { + ir_ref if_type = jit_if_Z_TYPE(jit, val_addr, type); + + ir_IF_TRUE(if_type); + end1 = ir_END(); + ref1 = ref; + ir_IF_FALSE_cold(if_type); + + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->result.var), ref, ZREG_ZVAL_COPY); + exit_point = zend_jit_trace_get_exit_point(opline+1, flags); + res_exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!res_exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, val_addr, IS_REFERENCE, res_exit_addr); + ref = ir_ADD_OFFSET(jit_Z_PTR(jit, val_addr), offsetof(zend_reference, val)); + val_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->result.var), ref, ZREG_ZVAL_COPY); + exit_point = zend_jit_trace_get_exit_point(opline+1, flags); + res_exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!res_exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, val_addr, type, res_exit_addr); + + if (deref) { + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref, ref1); + } + + val_addr = ZEND_ADDR_REF_ZVAL(ref); + + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), old_ref); + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_info); + if (opline->op1_type & (IS_VAR|IS_TMP_VAR|IS_CV)) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var), old_op1_info); + } + + return val_addr; +} + +static int zend_jit_fetch_constant(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + zend_jit_addr res_addr) +{ + zval *zv = RT_CONSTANT(opline, opline->op2) + 1; + uint32_t res_info = RES_INFO(); + ir_ref ref, ref2, if_set, if_special, not_set_path, special_path, fast_path; + + // JIT: c = CACHED_PTR(opline->extended_value); + ref = ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->extended_value)); + + // JIT: if (c != NULL) + if_set = ir_IF(ref); + + if (!zend_jit_is_persistent_constant(zv, opline->op1.num)) { + // JIT: if (!IS_SPECIAL_CACHE_VAL(c)) + ir_IF_FALSE_cold(if_set); + not_set_path = ir_END(); + ir_IF_TRUE(if_set); + if_special = ir_IF(ir_AND_A(ref, ir_CONST_ADDR(CACHE_SPECIAL))); + ir_IF_TRUE_cold(if_special); + special_path = ir_END(); + ir_IF_FALSE(if_special); + fast_path = ir_END(); + ir_MERGE_2(not_set_path, special_path); + } else { + ir_IF_TRUE(if_set); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_set); + } + + // JIT: zend_jit_get_constant(RT_CONSTANT(opline, opline->op2) + 1, opline->op1.num); + jit_SET_EX_OPLINE(jit, opline); + ref2 = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_get_constant), + ir_CONST_ADDR(zv), + ir_CONST_U32(opline->op1.num)); + ir_GUARD(ref2, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + ir_MERGE_WITH(fast_path); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + + if ((res_info & MAY_BE_GUARD) && JIT_G(current_frame)) { + uint8_t type = concrete_type(res_info); + zend_jit_addr const_addr = ZEND_ADDR_REF_ZVAL(ref); + + const_addr = zend_jit_guard_fetch_result_type(jit, opline, const_addr, type, 0, 0, 0); + if (!const_addr) { + return 0; + } + + res_info &= ~MAY_BE_GUARD; + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; + + // JIT: ZVAL_COPY_OR_DUP(EX_VAR(opline->result.var), &c->value); (no dup) + jit_ZVAL_COPY(jit, res_addr, MAY_BE_ANY, const_addr, res_info, 1); + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + } else { + ir_ref const_addr = ZEND_ADDR_REF_ZVAL(ref); + + // JIT: ZVAL_COPY_OR_DUP(EX_VAR(opline->result.var), &c->value); (no dup) + jit_ZVAL_COPY(jit, res_addr, MAY_BE_ANY, const_addr, MAY_BE_ANY, 1); + } + + + return 1; +} + +static int zend_jit_type_check(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + uint32_t mask; + zend_jit_addr op1_addr = OP1_ADDR(); + zend_jit_addr res_addr = 0; + uint32_t true_label = -1, false_label = -1; + ir_ref end_inputs = IR_UNUSED, true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + + // TODO: support for is_resource() ??? + ZEND_ASSERT(opline->extended_value != MAY_BE_RESOURCE); + + if (smart_branch_opcode && !exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + true_label = target_label2; + false_label = target_label; + } else if (smart_branch_opcode == ZEND_JMPNZ) { + true_label = target_label; + false_label = target_label2; + } else { + ZEND_UNREACHABLE(); + } + } else { + res_addr = RES_ADDR(); + } + + if (op1_info & MAY_BE_UNDEF) { + ir_ref if_def = IR_UNUSED; + + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + } + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op1.var)); + zend_jit_check_exception_undef_result(jit, opline); + if (opline->extended_value & MAY_BE_NULL) { + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(true_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + } else { + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + if (if_def) { + ir_END_list(end_inputs); + } + } + } + + if (if_def) { + ir_IF_TRUE(if_def); + op1_info |= MAY_BE_NULL; + } + } + + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + mask = opline->extended_value; + if (!(op1_info & MAY_BE_GUARD) && !(op1_info & (MAY_BE_ANY - mask))) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else if (end_inputs) { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(true_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + } else if (!(op1_info & MAY_BE_GUARD) && !(op1_info & mask)) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else if (end_inputs) { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + ir_END_list(end_inputs); + } + } else { + ir_ref ref; + bool invert = 0; + uint8_t type; + + switch (mask) { + case MAY_BE_NULL: type = IS_NULL; break; + case MAY_BE_FALSE: type = IS_FALSE; break; + case MAY_BE_TRUE: type = IS_TRUE; break; + case MAY_BE_LONG: type = IS_LONG; break; + case MAY_BE_DOUBLE: type = IS_DOUBLE; break; + case MAY_BE_STRING: type = IS_STRING; break; + case MAY_BE_ARRAY: type = IS_ARRAY; break; + case MAY_BE_OBJECT: type = IS_OBJECT; break; + case MAY_BE_ANY - MAY_BE_NULL: type = IS_NULL; invert = 1; break; + case MAY_BE_ANY - MAY_BE_FALSE: type = IS_FALSE; invert = 1; break; + case MAY_BE_ANY - MAY_BE_TRUE: type = IS_TRUE; invert = 1; break; + case MAY_BE_ANY - MAY_BE_LONG: type = IS_LONG; invert = 1; break; + case MAY_BE_ANY - MAY_BE_DOUBLE: type = IS_DOUBLE; invert = 1; break; + case MAY_BE_ANY - MAY_BE_STRING: type = IS_STRING; invert = 1; break; + case MAY_BE_ANY - MAY_BE_ARRAY: type = IS_ARRAY; invert = 1; break; + case MAY_BE_ANY - MAY_BE_OBJECT: type = IS_OBJECT; invert = 1; break; + case MAY_BE_ANY - MAY_BE_RESOURCE: type = IS_OBJECT; invert = 1; break; + default: + type = 0; + } + + if (op1_info & MAY_BE_REF) { + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + if (type == 0) { + ref = ir_AND_U32(ir_SHL_U32(ir_CONST_U32(1), jit_Z_TYPE(jit, op1_addr)), ir_CONST_U32(mask)); + if (!smart_branch_opcode) { + ref = ir_NE(ref, ir_CONST_U32(0)); + } + } else if (invert) { + ref = ir_NE(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(type)); + } else { + ref = ir_EQ(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(type)); + } + + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + if (end_inputs) { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + ir_END_list(end_inputs); + } + } + } + + if (!smart_branch_opcode || exit_addr) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + } else { + _zend_jit_merge_smart_branch_inputs(jit, true_label, false_label, true_inputs, false_inputs); + } + + return 1; +} + +static int zend_jit_isset_isempty_cv(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + zend_jit_addr res_addr = RES_ADDR(); + uint32_t true_label = -1, false_label = -1; + ir_ref end_inputs = IR_UNUSED, true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + + // TODO: support for empty() ??? + ZEND_ASSERT(opline->extended_value != MAY_BE_RESOURCE); + + if (smart_branch_opcode && !exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + true_label = target_label2; + false_label = target_label; + } else if (smart_branch_opcode == ZEND_JMPNZ) { + true_label = target_label; + false_label = target_label2; + } else { + ZEND_UNREACHABLE(); + } + } else { + res_addr = RES_ADDR(); + } + + if (op1_info & MAY_BE_REF) { + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (!(op1_info & (MAY_BE_UNDEF|MAY_BE_NULL))) { + if (exit_addr) { + ZEND_ASSERT(smart_branch_opcode == ZEND_JMPZ); + } else if (smart_branch_opcode) { + ir_END_list(true_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + } else if (!(op1_info & (MAY_BE_ANY - MAY_BE_NULL))) { + if (exit_addr) { + ZEND_ASSERT(smart_branch_opcode == ZEND_JMPNZ); + } else if (smart_branch_opcode) { + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + ir_END_list(end_inputs); + } + } else { + ir_ref ref = ir_GT(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(IS_NULL)); + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + ir_END_list(end_inputs); + } + } + + if (!smart_branch_opcode || exit_addr) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + } else { + _zend_jit_merge_smart_branch_inputs(jit, true_label, false_label, true_inputs, false_inputs); + } + + return 1; +} + +/* copy of hidden zend_closure */ +typedef struct _zend_closure { + zend_object std; + zend_function func; + zval this_ptr; + zend_class_entry *called_scope; + zif_handler orig_internal_handler; +} zend_closure; + +static int zend_jit_stack_check(zend_jit_ctx *jit, const zend_op *opline, uint32_t used_stack) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + // JIT: if (EG(vm_stack_end) - EG(vm_stack_top) < used_stack) + ir_GUARD( + ir_UGE( + ir_SUB_A(ir_LOAD_A(jit_EG(vm_stack_end)), ir_LOAD_A(jit_EG(vm_stack_top))), + ir_CONST_ADDR(used_stack)), + ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static int zend_jit_free_trampoline(zend_jit_ctx *jit, int8_t func_reg) +{ + // JIT: if (UNEXPECTED(func->common.fn_flags & ZEND_ACC_CALL_VIA_TRAMPOLINE)) + ir_ref func = ir_RLOAD_A(func_reg); + ir_ref if_trampoline = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func, offsetof(zend_function, common.fn_flags))), + ir_CONST_U32(ZEND_ACC_CALL_VIA_TRAMPOLINE))); + + ir_IF_TRUE(if_trampoline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_free_trampoline_helper), func); + ir_MERGE_WITH_EMPTY_FALSE(if_trampoline); + + return 1; +} + +static int zend_jit_push_call_frame(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, zend_function *func, bool is_closure, bool delayed_fetch_this, int checked_stack, ir_ref func_ref, ir_ref this_ref) +{ + uint32_t used_stack; + ir_ref used_stack_ref = IR_UNUSED; + bool stack_check = 1; + ir_ref rx, ref, top, if_enough_stack, cold_path = IR_UNUSED; + + ZEND_ASSERT(func_ref != IR_NULL); + if (func) { + used_stack = zend_vm_calc_used_stack(opline->extended_value, func); + if ((int)used_stack <= checked_stack) { + stack_check = 0; + } + used_stack_ref = ir_CONST_ADDR(used_stack); + } else { + ir_ref num_args_ref; + ir_ref if_internal_func = IR_UNUSED; + + used_stack = (ZEND_CALL_FRAME_SLOT + opline->extended_value + ZEND_OBSERVER_ENABLED) * sizeof(zval); + used_stack_ref = ir_CONST_ADDR(used_stack); + + if (!is_closure) { + used_stack_ref = ir_HARD_COPY_A(used_stack_ref); /* load constant once */ + + // JIT: if (EXPECTED(ZEND_USER_CODE(func->type))) { + ir_ref tmp = ir_LOAD_U8(ir_ADD_OFFSET(func_ref, offsetof(zend_function, type))); + if_internal_func = ir_IF(ir_AND_U8(tmp, ir_CONST_U8(1))); + ir_IF_FALSE(if_internal_func); + } + + // JIT: used_stack += (func->op_array.last_var + func->op_array.T - MIN(func->op_array.num_args, num_args)) * sizeof(zval); + num_args_ref = ir_CONST_U32(opline->extended_value); + if (!is_closure) { + ref = ir_SUB_U32( + ir_SUB_U32( + ir_MIN_U32( + num_args_ref, + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_function, op_array.num_args)))), + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_function, op_array.last_var)))), + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_function, op_array.T)))); + } else { + ref = ir_SUB_U32( + ir_SUB_U32( + ir_MIN_U32( + num_args_ref, + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.op_array.num_args)))), + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.op_array.last_var)))), + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.op_array.T)))); + } + ref = ir_MUL_U32(ref, ir_CONST_U32(sizeof(zval))); + if (sizeof(void*) == 8) { + ref = ir_SEXT_A(ref); + } + ref = ir_SUB_A(used_stack_ref, ref); + + if (is_closure) { + used_stack_ref = ref; + } else { + ir_MERGE_WITH_EMPTY_TRUE(if_internal_func); + used_stack_ref = ir_PHI_2(IR_ADDR, ref, used_stack_ref); + } + } + + zend_jit_start_reuse_ip(jit); + + // JIT: if (UNEXPECTED(used_stack > (size_t)(((char*)EG(vm_stack_end)) - (char*)call))) { + jit_STORE_IP(jit, ir_LOAD_A(jit_EG(vm_stack_top))); + + if (stack_check) { + // JIT: Check Stack Overflow + ref = ir_UGE( + ir_SUB_A( + ir_LOAD_A(jit_EG(vm_stack_end)), + jit_IP(jit)), + used_stack_ref); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + if_enough_stack = ir_IF(ref); + ir_IF_FALSE_cold(if_enough_stack); + +#ifdef _WIN32 + if (0) { +#else + if (opline->opcode == ZEND_INIT_FCALL && func && func->type == ZEND_INTERNAL_FUNCTION) { +#endif + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_int_extend_stack_helper), used_stack_ref); + } else { + if (!is_closure) { + ref = func_ref; + } else { + ref = ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func)); + } + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_extend_stack_helper), + used_stack_ref, ref); + } + jit_STORE_IP(jit, ref); + + cold_path = ir_END(); + ir_IF_TRUE(if_enough_stack); + } + } + + ref = jit_EG(vm_stack_top); + rx = jit_IP(jit); +#if !OPTIMIZE_FOR_SIZE + /* JIT: EG(vm_stack_top) = (zval*)((char*)call + used_stack); + * This vesions is longer but faster + * mov EG(vm_stack_top), %CALL + * lea size(%call), %tmp + * mov %tmp, EG(vm_stack_top) + */ + top = rx; +#else + /* JIT: EG(vm_stack_top) += used_stack; + * Use ir_emit() because ir_LOAD() makes load forwarding and doesn't allow load/store fusion + * mov EG(vm_stack_top), %CALL + * add $size, EG(vm_stack_top) + */ + top = jit->ctx.control = ir_emit2(&jit->ctx, IR_OPT(IR_LOAD, IR_ADDR), jit->ctx.control, ref); +#endif + ir_STORE(ref, ir_ADD_A(top, used_stack_ref)); + + // JIT: zend_vm_init_call_frame(call, call_info, func, num_args, called_scope, object); + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || opline->opcode != ZEND_INIT_METHOD_CALL) { + // JIT: ZEND_SET_CALL_INFO(call, 0, call_info); + ir_STORE(jit_CALL(rx, This.u1.type_info), ir_CONST_U32(IS_UNDEF | ZEND_CALL_NESTED_FUNCTION)); + } +#ifdef _WIN32 + if (0) { +#else + if (opline->opcode == ZEND_INIT_FCALL && func && func->type == ZEND_INTERNAL_FUNCTION) { +#endif + if (cold_path) { + ir_MERGE_WITH(cold_path); + rx = jit_IP(jit); + } + + // JIT: call->func = func; + ir_STORE(jit_CALL(rx, func), func_ref); + } else { + if (!is_closure) { + // JIT: call->func = func; + ir_STORE(jit_CALL(rx, func), func_ref); + } else { + // JIT: call->func = &closure->func; + ir_STORE(jit_CALL(rx, func), ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func))); + } + if (cold_path) { + ir_MERGE_WITH(cold_path); + rx = jit_IP(jit); + } + } + if (opline->opcode == ZEND_INIT_METHOD_CALL) { + // JIT: Z_PTR(call->This) = obj; + ZEND_ASSERT(this_ref != IR_NULL); + ir_STORE(jit_CALL(rx, This.value.ptr), this_ref); + if (opline->op1_type == IS_UNUSED || delayed_fetch_this) { + // JIT: call->call_info |= ZEND_CALL_HAS_THIS; + ref = jit_CALL(rx, This.u1.type_info); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + ir_STORE(ref, ir_CONST_U32( ZEND_CALL_HAS_THIS)); + } else { + ir_STORE(ref, ir_OR_U32(ir_LOAD_U32(ref), ir_CONST_U32(ZEND_CALL_HAS_THIS))); + } + } else { + if (opline->op1_type == IS_CV) { + // JIT: GC_ADDREF(obj); + jit_GC_ADDREF(jit, this_ref); + } + + // JIT: call->call_info |= ZEND_CALL_HAS_THIS | ZEND_CALL_RELEASE_THIS; + ref = jit_CALL(rx, This.u1.type_info); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + ir_STORE(ref, ir_CONST_U32( ZEND_CALL_HAS_THIS | ZEND_CALL_RELEASE_THIS)); + } else { + ir_STORE(ref, + ir_OR_U32(ir_LOAD_U32(ref), + ir_CONST_U32(ZEND_CALL_HAS_THIS | ZEND_CALL_RELEASE_THIS))); + } + } + } else if (!is_closure) { + // JIT: Z_CE(call->This) = called_scope; + ir_STORE(jit_CALL(rx, This), IR_NULL); + } else { + ir_ref object_or_called_scope, call_info, call_info2, object, if_cond; + + if (opline->op2_type == IS_CV) { + // JIT: GC_ADDREF(closure); + jit_GC_ADDREF(jit, func_ref); + } + + // JIT: RX(object_or_called_scope) = closure->called_scope; + object_or_called_scope = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, called_scope))); + + // JIT: call_info = ZEND_CALL_NESTED_FUNCTION | ZEND_CALL_DYNAMIC | ZEND_CALL_CLOSURE | + // (closure->func->common.fn_flags & ZEND_ACC_FAKE_CLOSURE); + call_info = ir_OR_U32( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.common.fn_flags))), + ir_CONST_U32(ZEND_ACC_FAKE_CLOSURE)), + ir_CONST_U32(ZEND_CALL_NESTED_FUNCTION | ZEND_CALL_DYNAMIC | ZEND_CALL_CLOSURE)); + // JIT: if (Z_TYPE(closure->this_ptr) != IS_UNDEF) { + if_cond = ir_IF(ir_LOAD_U8(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, this_ptr.u1.v.type)))); + ir_IF_TRUE(if_cond); + + // JIT: call_info |= ZEND_CALL_HAS_THIS; + call_info2 = ir_OR_U32(call_info, ir_CONST_U32(ZEND_CALL_HAS_THIS)); + + // JIT: object_or_called_scope = Z_OBJ(closure->this_ptr); + object = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, this_ptr.value.ptr))); + + ir_MERGE_WITH_EMPTY_FALSE(if_cond); + call_info = ir_PHI_2(IR_U32, call_info2, call_info); + object_or_called_scope = ir_PHI_2(IR_ADDR, object, object_or_called_scope); + + // JIT: ZEND_SET_CALL_INFO(call, 0, call_info); + ref = jit_CALL(rx, This.u1.type_info); + ir_STORE(ref, ir_OR_U32(ir_LOAD_U32(ref), call_info)); + + // JIT: Z_PTR(call->This) = object_or_called_scope; + ir_STORE(jit_CALL(rx, This.value.ptr), object_or_called_scope); + + // JIT: if (closure->func.op_array.run_time_cache__ptr) + if_cond = ir_IF(ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.op_array.run_time_cache__ptr)))); + ir_IF_FALSE(if_cond); + + // JIT: zend_jit_init_func_run_time_cache_helper(closure->func); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_init_func_run_time_cache_helper), + ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func))); + + ir_MERGE_WITH_EMPTY_TRUE(if_cond); + } + + // JIT: ZEND_CALL_NUM_ARGS(call) = num_args; + ir_STORE(jit_CALL(rx, This.u2.num_args), ir_CONST_U32(opline->extended_value)); + + return 1; +} + +static int zend_jit_init_fcall_guard(zend_jit_ctx *jit, uint32_t level, const zend_function *func, const zend_op *to_opline) +{ + int32_t exit_point; + const void *exit_addr; + ir_ref call; + + if (func->type == ZEND_INTERNAL_FUNCTION) { +#ifdef ZEND_WIN32 + // TODO: ASLR may cause different addresses in different workers ??? + return 0; +#endif + } else if (func->type == ZEND_USER_FUNCTION) { + if (!zend_accel_in_shm(func->op_array.opcodes)) { + /* op_array and op_array->opcodes are not persistent. We can't link. */ + return 0; + } + } else { + ZEND_UNREACHABLE(); + return 0; + } + + exit_point = zend_jit_trace_get_exit_point(to_opline, ZEND_JIT_EXIT_POLYMORPHISM); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + // call = EX(call); + call = ir_LOAD_A(jit_EX(call)); + while (level > 0) { + // call = call->prev_execute_data + call = ir_LOAD_A(jit_CALL(call, prev_execute_data)); + level--; + } + + if (func->type == ZEND_USER_FUNCTION && + (!(func->common.fn_flags & ZEND_ACC_IMMUTABLE) || + (func->common.fn_flags & ZEND_ACC_CLOSURE) || + !func->common.function_name)) { + const zend_op *opcodes = func->op_array.opcodes; + + // JIT: if (call->func.op_array.opcodes != opcodes) goto exit_addr; + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_CALL(call, func)), offsetof(zend_op_array, opcodes))), + ir_CONST_ADDR(opcodes)), + ir_CONST_ADDR(exit_addr)); + } else { + // JIT: if (call->func != func) goto exit_addr; + ir_GUARD(ir_EQ(ir_LOAD_A(jit_CALL(call, func)), ir_CONST_ADDR(func)), ir_CONST_ADDR(exit_addr)); + } + + return 1; +} + +static int zend_jit_init_fcall(zend_jit_ctx *jit, const zend_op *opline, uint32_t b, const zend_op_array *op_array, zend_ssa *ssa, const zend_ssa_op *ssa_op, int call_level, zend_jit_trace_rec *trace, int checked_stack) +{ + zend_func_info *info = ZEND_FUNC_INFO(op_array); + zend_call_info *call_info = NULL; + zend_function *func = NULL; + ir_ref func_ref = IR_UNUSED; + + if (jit->delayed_call_level) { + if (!zend_jit_save_call_chain(jit, jit->delayed_call_level)) { + return 0; + } + } + + if (info) { + call_info = info->callee_info; + while (call_info && call_info->caller_init_opline != opline) { + call_info = call_info->next_callee; + } + if (call_info && call_info->callee_func && !call_info->is_prototype) { + func = call_info->callee_func; + } + } + + if (!func + && trace + && trace->op == ZEND_JIT_TRACE_INIT_CALL) { +#ifdef _WIN32 + /* ASLR */ + if (trace->func->type != ZEND_INTERNAL_FUNCTION) { + func = (zend_function*)trace->func; + } +#else + func = (zend_function*)trace->func; +#endif + } + +#ifdef _WIN32 + if (0) { +#else + if (opline->opcode == ZEND_INIT_FCALL + && func + && func->type == ZEND_INTERNAL_FUNCTION) { +#endif + /* load constant address later */ + func_ref = ir_CONST_ADDR(func); + } else if (func && op_array == &func->op_array) { + /* recursive call */ + if (!(func->op_array.fn_flags & ZEND_ACC_IMMUTABLE) + || zend_jit_prefer_const_addr_load(jit, (uintptr_t)func)) { + func_ref = ir_LOAD_A(jit_EX(func)); + } else { + func_ref = ir_CONST_ADDR(func); + } + } else { + ir_ref if_func, cache_slot_ref, ref; + + // JIT: if (CACHED_PTR(opline->result.num)) + cache_slot_ref = ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->result.num); + func_ref = ir_LOAD_A(cache_slot_ref); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && func + && (func->common.fn_flags & ZEND_ACC_IMMUTABLE) + && opline->opcode != ZEND_INIT_FCALL) { + /* Called func may be changed because of recompilation. See ext/opcache/tests/jit/init_fcall_003.phpt */ + if_func = ir_IF(ir_EQ(func_ref, ir_CONST_ADDR(func))); + } else { + if_func = ir_IF(func_ref); + } + ir_IF_FALSE_cold(if_func); + if (opline->opcode == ZEND_INIT_FCALL + && func + && func->type == ZEND_USER_FUNCTION + && (func->op_array.fn_flags & ZEND_ACC_IMMUTABLE)) { + ref = ir_HARD_COPY_A(ir_CONST_ADDR(func)); /* load constant once */ + ir_STORE(cache_slot_ref, ref); + ref = ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_init_func_run_time_cache_helper), ref); + } else { + zval *zv = RT_CONSTANT(opline, opline->op2); + + if (opline->opcode == ZEND_INIT_FCALL) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_func_helper), + ir_CONST_ADDR(Z_STR_P(zv)), + cache_slot_ref); + } else if (opline->opcode == ZEND_INIT_FCALL_BY_NAME) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_func_helper), + ir_CONST_ADDR(Z_STR_P(zv + 1)), + cache_slot_ref); + } else if (opline->opcode == ZEND_INIT_NS_FCALL_BY_NAME) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_ns_func_helper), + ir_CONST_ADDR(zv), + cache_slot_ref); + } else { + ZEND_UNREACHABLE(); + } + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, + func && (func->common.fn_flags & ZEND_ACC_IMMUTABLE) ? ZEND_JIT_EXIT_INVALIDATE : 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + if (!func || opline->opcode == ZEND_INIT_FCALL) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else if (func->type == ZEND_USER_FUNCTION + && !(func->common.fn_flags & ZEND_ACC_IMMUTABLE)) { + const zend_op *opcodes = func->op_array.opcodes; + + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_op_array, opcodes))), + ir_CONST_ADDR(opcodes)), + ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ir_EQ(ref, ir_CONST_ADDR(func)), ir_CONST_ADDR(exit_addr)); + } + } else { +jit_SET_EX_OPLINE(jit, opline); + ir_GUARD(ref, jit_STUB_ADDR(jit, jit_stub_undefined_function)); + } + } + ir_MERGE_WITH_EMPTY_TRUE(if_func); + func_ref = ir_PHI_2(IR_ADDR, ref, func_ref); + } + + if (!zend_jit_push_call_frame(jit, opline, op_array, func, 0, 0, checked_stack, func_ref, IR_UNUSED)) { + return 0; + } + + if (zend_jit_needs_call_chain(call_info, b, op_array, ssa, ssa_op, opline, call_level, trace)) { + if (!zend_jit_save_call_chain(jit, call_level)) { + return 0; + } + } else { + ZEND_ASSERT(call_level > 0); + jit->delayed_call_level = call_level; + delayed_call_chain = 1; + } + + return 1; +} + +static int zend_jit_init_method_call(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t b, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + int call_level, + uint32_t op1_info, + zend_jit_addr op1_addr, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + zend_class_entry *trace_ce, + zend_jit_trace_rec *trace, + int checked_stack, + int8_t func_reg, + int8_t this_reg, + bool polymorphic_side_trace) +{ + zend_func_info *info = ZEND_FUNC_INFO(op_array); + zend_call_info *call_info = NULL; + zend_function *func = NULL; + zval *function_name; + ir_ref if_static = IR_UNUSED, cold_path, this_ref = IR_NULL, func_ref = IR_NULL; + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + + function_name = RT_CONSTANT(opline, opline->op2); + + if (info) { + call_info = info->callee_info; + while (call_info && call_info->caller_init_opline != opline) { + call_info = call_info->next_callee; + } + if (call_info && call_info->callee_func && !call_info->is_prototype) { + func = call_info->callee_func; + } + } + + if (polymorphic_side_trace) { + /* function is passed in r0 from parent_trace */ + ZEND_ASSERT(func_reg >= 0 && this_reg >= 0); + func_ref = zend_jit_deopt_rload(jit, IR_ADDR, func_reg); + this_ref = zend_jit_deopt_rload(jit, IR_ADDR, this_reg); + } else { + ir_ref ref, ref2, if_found, fast_path, run_time_cache, this_ref2; + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + this_ref = jit_Z_PTR(jit, this_addr); + } else { + if (op1_info & MAY_BE_REF) { + if (opline->op1_type == IS_CV) { + // JIT: ZVAL_DEREF(op1) + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } else { + ir_ref if_ref; + + /* Hack: Convert reference to regular value to simplify JIT code */ + ZEND_ASSERT(Z_REG(op1_addr) == ZREG_FP); + + if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_TRUE(if_ref); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_unref_helper), jit_ZVAL_ADDR(jit, op1_addr)); + + ir_MERGE_WITH_EMPTY_FALSE(if_ref); + } + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_EQ(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(IS_OBJECT)), + ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_object = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + + ir_IF_FALSE_cold(if_object); + + jit_SET_EX_OPLINE(jit, opline); + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_method_call_tmp), + jit_ZVAL_ADDR(jit, op1_addr)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_method_call), + jit_ZVAL_ADDR(jit, op1_addr)); + } + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_IF_TRUE(if_object); + } + } + + this_ref = jit_Z_PTR(jit, op1_addr); + } + + if (jit->delayed_call_level) { + if (!zend_jit_save_call_chain(jit, jit->delayed_call_level)) { + return 0; + } + } + + if (func) { + // JIT: fbc = CACHED_PTR(opline->result.num + sizeof(void*)); + ref = ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->result.num + sizeof(void*))); + + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + fast_path = ir_END(); + } else { + // JIT: if (CACHED_PTR(opline->result.num) == obj->ce)) { + run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ref = ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->result.num)), + ir_LOAD_A(ir_ADD_OFFSET(this_ref, offsetof(zend_object, ce)))); + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + + // JIT: fbc = CACHED_PTR(opline->result.num + sizeof(void*)); + ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->result.num + sizeof(void*))); + fast_path = ir_END(); + + } + + ir_IF_FALSE_cold(if_found); + jit_SET_EX_OPLINE(jit, opline); + + if (!jit->ctx.fixed_call_stack_size) { + // JIT: alloca(sizeof(void*)); + this_ref2 = ir_ALLOCA(ir_CONST_ADDR(0x10)); + } else { + this_ref2 = ir_HARD_COPY_A(ir_RLOAD_A(IR_REG_SP)); + } + ir_STORE(this_ref2, this_ref); + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this) { + ref2 = ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_method_tmp_helper), + this_ref, + ir_CONST_ADDR(function_name), + this_ref2); + } else { + ref2 = ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_method_helper), + this_ref, + ir_CONST_ADDR(function_name), + this_ref2); + } + + this_ref2 = ir_LOAD_A(ir_RLOAD_A(IR_REG_SP)); + if (!jit->ctx.fixed_call_stack_size) { + // JIT: revert alloca + ir_AFREE(ir_CONST_ADDR(0x10)); + } + + ir_GUARD(ref2, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + ir_MERGE_WITH(fast_path); + func_ref = ir_PHI_2(IR_ADDR, ref2, ref); + this_ref = ir_PHI_2(IR_ADDR, this_ref2, this_ref); + } + + if ((!func || zend_jit_may_be_modified(func, op_array)) + && trace + && trace->op == ZEND_JIT_TRACE_INIT_CALL + && trace->func +#ifdef _WIN32 + && trace->func->type != ZEND_INTERNAL_FUNCTION +#endif + ) { + int32_t exit_point; + const void *exit_addr; + + exit_point = zend_jit_trace_get_exit_point(opline, func ? ZEND_JIT_EXIT_INVALIDATE : ZEND_JIT_EXIT_METHOD_CALL); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + jit->trace->exit_info[exit_point].poly_func_ref = func_ref; + jit->trace->exit_info[exit_point].poly_this_ref = this_ref; + + func = (zend_function*)trace->func; + + if (func->type == ZEND_USER_FUNCTION && + (!(func->common.fn_flags & ZEND_ACC_IMMUTABLE) || + (func->common.fn_flags & ZEND_ACC_CLOSURE) || + !func->common.function_name)) { + const zend_op *opcodes = func->op_array.opcodes; + + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, opcodes))), + ir_CONST_ADDR(opcodes)), + ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ir_EQ(func_ref, ir_CONST_ADDR(func)), ir_CONST_ADDR(exit_addr)); + } + } + + if (!func) { + // JIT: if (fbc->common.fn_flags & ZEND_ACC_STATIC) { + if_static = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_function, common.fn_flags))), + ir_CONST_U32(ZEND_ACC_STATIC))); + ir_IF_TRUE_cold(if_static); + } + + if (!func || (func->common.fn_flags & ZEND_ACC_STATIC) != 0) { + ir_ref ret; + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this) { + ret = ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_push_static_metod_call_frame_tmp), + this_ref, + func_ref, + ir_CONST_U32(opline->extended_value)); + } else { + ret = ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_push_static_metod_call_frame), + this_ref, + func_ref, + ir_CONST_U32(opline->extended_value)); + } + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR) && !delayed_fetch_this)) { + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + jit_STORE_IP(jit, ret); + } + + if (!func) { + cold_path = ir_END(); + ir_IF_FALSE(if_static); + } + + if (!func || (func->common.fn_flags & ZEND_ACC_STATIC) == 0) { + if (!zend_jit_push_call_frame(jit, opline, NULL, func, 0, delayed_fetch_this, checked_stack, func_ref, this_ref)) { + return 0; + } + } + + if (!func) { + ir_MERGE_WITH(cold_path); + } + zend_jit_start_reuse_ip(jit); + + if (zend_jit_needs_call_chain(call_info, b, op_array, ssa, ssa_op, opline, call_level, trace)) { + if (!zend_jit_save_call_chain(jit, call_level)) { + return 0; + } + } else { + ZEND_ASSERT(call_level > 0); + delayed_call_chain = 1; + jit->delayed_call_level = call_level; + } + + return 1; +} + +static int zend_jit_init_closure_call(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t b, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + int call_level, + zend_jit_trace_rec *trace, + int checked_stack) +{ + zend_function *func = NULL; + zend_jit_addr op2_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + ir_ref ref; + + ref = jit_Z_PTR(jit, op2_addr); + + if (ssa->var_info[ssa_op->op2_use].ce != zend_ce_closure + && !(ssa->var_info[ssa_op->op2_use].type & MAY_BE_CLASS_GUARD)) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_object, ce))), + ir_CONST_ADDR(zend_ce_closure)), + ir_CONST_ADDR(exit_addr)); + + if (ssa->var_info && ssa_op->op2_use >= 0) { + ssa->var_info[ssa_op->op2_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op2_use].ce = zend_ce_closure; + ssa->var_info[ssa_op->op2_use].is_instanceof = 0; + } + } + + if (trace + && trace->op == ZEND_JIT_TRACE_INIT_CALL + && trace->func + && trace->func->type == ZEND_USER_FUNCTION) { + const zend_op *opcodes; + int32_t exit_point; + const void *exit_addr; + + func = (zend_function*)trace->func; + opcodes = func->op_array.opcodes; + exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_CLOSURE_CALL); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_closure, func.op_array.opcodes))), + ir_CONST_ADDR(opcodes)), + ir_CONST_ADDR(exit_addr)); + } + + if (jit->delayed_call_level) { + if (!zend_jit_save_call_chain(jit, jit->delayed_call_level)) { + return 0; + } + } + + if (!zend_jit_push_call_frame(jit, opline, NULL, func, 1, 0, checked_stack, ref, IR_UNUSED)) { + return 0; + } + + if (zend_jit_needs_call_chain(NULL, b, op_array, ssa, ssa_op, opline, call_level, trace)) { + if (!zend_jit_save_call_chain(jit, call_level)) { + return 0; + } + } else { + ZEND_ASSERT(call_level > 0); + delayed_call_chain = 1; + jit->delayed_call_level = call_level; + } + + if (trace + && trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_INTERPRETER) { + if (!zend_jit_set_ip(jit, opline + 1)) { + return 0; + } + } + + return 1; +} + +static int zend_jit_send_val(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr) +{ + uint32_t arg_num = opline->op2.num; + zend_jit_addr arg_addr; + + ZEND_ASSERT(opline->opcode == ZEND_SEND_VAL || arg_num <= MAX_ARG_FLAG_NUM); + + if (!zend_jit_reuse_ip(jit)) { + return 0; + } + + if (opline->opcode == ZEND_SEND_VAL_EX) { + uint32_t mask = ZEND_SEND_BY_REF << ((arg_num + 3) * 2); + + ZEND_ASSERT(arg_num <= MAX_ARG_FLAG_NUM); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_MUST_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + /* Don't generate code that always throws exception */ + return 0; + } + } else { + ir_ref cond = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(jit_RX(func)), offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask)); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + ir_GUARD_NOT(cond, ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_pass_by_ref; + + if_pass_by_ref = ir_IF(cond); + + ir_IF_TRUE_cold(if_pass_by_ref); + if (Z_MODE(op1_addr) == IS_REG) { + /* set type to avoid zval_ptr_dtor() on uninitialized value */ + zend_jit_addr addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + jit_set_Z_TYPE_INFO(jit, addr, IS_UNDEF); + } + jit_SET_EX_OPLINE(jit, opline); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_throw_cannot_pass_by_ref)); + + ir_IF_FALSE(if_pass_by_ref); + } + } + } + + arg_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, opline->result.var); + + if (opline->op1_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op1); + + jit_ZVAL_COPY_CONST(jit, + arg_addr, + MAY_BE_ANY, MAY_BE_ANY, + zv, 1); + } else { + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + } + + return 1; +} + +static int zend_jit_send_ref(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, uint32_t op1_info, int cold) +{ + zend_jit_addr op1_addr, arg_addr, ref_addr; + ir_ref ref_path = IR_UNUSED; + + op1_addr = OP1_ADDR(); + arg_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, opline->result.var); + + if (!zend_jit_reuse_ip(jit)) { + return 0; + } + + if (opline->op1_type == IS_VAR) { + if (op1_info & MAY_BE_INDIRECT) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + } else if (opline->op1_type == IS_CV) { + if (op1_info & MAY_BE_UNDEF) { + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + // JIT: if (Z_TYPE_P(op1) == IS_UNDEF) + ir_ref if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE(if_def); + // JIT: ZVAL_NULL(op1) + jit_set_Z_TYPE_INFO(jit,op1_addr, IS_NULL); + ir_MERGE_WITH_EMPTY_TRUE(if_def); + } + op1_info &= ~MAY_BE_UNDEF; + op1_info |= MAY_BE_NULL; + } + } else { + ZEND_UNREACHABLE(); + } + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) { + ir_ref ref, ref2; + + if (op1_info & MAY_BE_REF) { + ir_ref if_ref; + + // JIT: if (Z_TYPE_P(op1) == IS_UNDEF) + if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_TRUE(if_ref); + // JIT: ref = Z_PTR_P(op1) + ref = jit_Z_PTR(jit, op1_addr); + // JIT: GC_ADDREF(ref) + jit_GC_ADDREF(jit, ref); + // JIT: ZVAL_REFERENCE(arg, ref) + jit_set_Z_PTR(jit, arg_addr, ref); + jit_set_Z_TYPE_INFO(jit, arg_addr, IS_REFERENCE_EX); + ref_path = ir_END(); + ir_IF_FALSE(if_ref); + } + + // JIT: ZVAL_NEW_REF(arg, varptr); + // JIT: ref = emalloc(sizeof(zend_reference)); + ref = jit_EMALLOC(jit, sizeof(zend_reference), op_array, opline); + // JIT: GC_REFCOUNT(ref) = 2 + jit_set_GC_REFCOUNT(jit, ref, 2); + // JIT: GC_TYPE(ref) = GC_REFERENCE + ir_STORE(ir_ADD_OFFSET(ref, offsetof(zend_reference, gc.u.type_info)), ir_CONST_U32(GC_REFERENCE)); + ir_STORE(ir_ADD_OFFSET(ref, offsetof(zend_reference, sources.ptr)), IR_NULL); + ref2 = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + ref_addr = ZEND_ADDR_REF_ZVAL(ref2); + + // JIT: ZVAL_COPY_VALUE(&ref->val, op1) + jit_ZVAL_COPY(jit, + ref_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + + // JIT: ZVAL_REFERENCE(arg, ref) + jit_set_Z_PTR(jit, op1_addr, ref); + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_REFERENCE_EX); + + // JIT: ZVAL_REFERENCE(arg, ref) + jit_set_Z_PTR(jit, arg_addr, ref); + jit_set_Z_TYPE_INFO(jit, arg_addr, IS_REFERENCE_EX); + } + + if (ref_path) { + ir_MERGE_WITH(ref_path); + } + + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + + return 1; +} + +static int zend_jit_send_var(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr op1_def_addr) +{ + uint32_t arg_num = opline->op2.num; + zend_jit_addr arg_addr; + ir_ref end_inputs = IR_UNUSED; + + ZEND_ASSERT((opline->opcode != ZEND_SEND_VAR_EX && + opline->opcode != ZEND_SEND_VAR_NO_REF_EX) || + arg_num <= MAX_ARG_FLAG_NUM); + + arg_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, opline->result.var); + + if (!zend_jit_reuse_ip(jit)) { + return 0; + } + + if (opline->opcode == ZEND_SEND_VAR_EX) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_SHOULD_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + if (!zend_jit_send_ref(jit, opline, op_array, op1_info, 0)) { + return 0; + } + return 1; + } + } else { + uint32_t mask = (ZEND_SEND_BY_REF|ZEND_SEND_PREFER_REF) << ((arg_num + 3) * 2); + + // JIT: if (RX->func->quick_arg_flags & mask) + ir_ref if_send_by_ref = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(jit_RX(func)), offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask))); + ir_IF_TRUE_cold(if_send_by_ref); + + if (!zend_jit_send_ref(jit, opline, op_array, op1_info, 1)) { + return 0; + } + + ir_END_list(end_inputs); + ir_IF_FALSE(if_send_by_ref); + } + } else if (opline->opcode == ZEND_SEND_VAR_NO_REF_EX) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_SHOULD_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + + // JIT: ZVAL_COPY_VALUE(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + + if (!ARG_MAY_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + if (!(op1_info & MAY_BE_REF)) { + /* Don't generate code that always throws exception */ + return 0; + } else { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + // JIT: if (Z_TYPE_P(op1) != IS_REFERENCE) + ir_GUARD(ir_EQ(jit_Z_TYPE(jit, op1_addr), ir_CONST_U32(IS_REFERENCE)), + ir_CONST_ADDR(exit_addr)); + } + } + return 1; + } + } else { + uint32_t mask = (ZEND_SEND_BY_REF|ZEND_SEND_PREFER_REF) << ((arg_num + 3) * 2); + ir_ref func, if_send_by_ref, if_prefer_ref; + + // JIT: if (RX->func->quick_arg_flags & mask) + func = ir_LOAD_A(jit_RX(func)); + if_send_by_ref = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func, offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask))); + ir_IF_TRUE_cold(if_send_by_ref); + + mask = ZEND_SEND_PREFER_REF << ((arg_num + 3) * 2); + + // JIT: ZVAL_COPY_VALUE(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + + if (op1_info & MAY_BE_REF) { + ir_ref if_ref = jit_if_Z_TYPE(jit, arg_addr, IS_REFERENCE); + ir_IF_TRUE(if_ref); + ir_END_list(end_inputs); + ir_IF_FALSE(if_ref); + } + + // JIT: if (RX->func->quick_arg_flags & mask) + if_prefer_ref = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func, offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask))); + ir_IF_TRUE(if_prefer_ref); + ir_END_list(end_inputs); + ir_IF_FALSE(if_prefer_ref); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_only_vars_by_reference), + jit_ZVAL_ADDR(jit, arg_addr)); + zend_jit_check_exception(jit); + ir_END_list(end_inputs); + } + + ir_IF_FALSE(if_send_by_ref); + } + } else if (opline->opcode == ZEND_SEND_FUNC_ARG) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_SHOULD_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + if (!zend_jit_send_ref(jit, opline, op_array, op1_info, 0)) { + return 0; + } + return 1; + } + } else { + // JIT: if (RX->This.u1.type_info & ZEND_CALL_SEND_ARG_BY_REF) + ir_ref if_send_by_ref = ir_IF(ir_AND_U32( + ir_LOAD_U32(jit_RX(This.u1.type_info)), + ir_CONST_U32(ZEND_CALL_SEND_ARG_BY_REF))); + ir_IF_TRUE_cold(if_send_by_ref); + + if (!zend_jit_send_ref(jit, opline, op_array, op1_info, 1)) { + return 0; + } + + ir_END_list(end_inputs); + ir_IF_FALSE(if_send_by_ref); + } + } + + if (op1_info & MAY_BE_UNDEF) { + ir_ref ref, if_def = IR_UNUSED; + + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + } + + // JIT: zend_jit_undefined_op_helper(opline->op1.var) + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), + ir_CONST_U32(opline->op1.var)); + + // JIT: ZVAL_NULL(arg) + jit_set_Z_TYPE_INFO(jit, arg_addr, IS_NULL); + + // JIT: check_exception + ir_GUARD(ref, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + ir_END_list(end_inputs); + ir_IF_TRUE(if_def); + } else { + if (end_inputs) { + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + return 1; + } + } + + if (opline->opcode == ZEND_SEND_VAR_NO_REF) { + // JIT: ZVAL_COPY_VALUE(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + if (op1_info & MAY_BE_REF) { + // JIT: if (Z_TYPE_P(arg) == IS_REFERENCE) + ir_ref if_ref = jit_if_Z_TYPE(jit, arg_addr, IS_REFERENCE); + ir_IF_TRUE(if_ref); + ir_END_list(end_inputs); + ir_IF_FALSE(if_ref); + } + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + ir_GUARD(IR_FALSE, ir_CONST_ADDR(exit_addr)); + } else { + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_only_vars_by_reference), + jit_ZVAL_ADDR(jit, arg_addr)); + zend_jit_check_exception(jit); + } + } else { + if (op1_info & MAY_BE_REF) { + if (opline->op1_type == IS_CV) { + ir_ref ref; + + // JIT: ZVAL_DEREF(op1) + ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + + // JIT: ZVAL_COPY(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 1); + } else { + ir_ref if_ref, ref, ref2, refcount, if_not_zero, if_refcounted; + zend_jit_addr ref_addr; + + // JIT: if (Z_TYPE_P(op1) == IS_REFERENCE) + if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_TRUE_cold(if_ref); + + // JIT: ref = Z_COUNTED_P(op1); + ref = jit_Z_PTR(jit, op1_addr); + ref2 = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + ref_addr = ZEND_ADDR_REF_ZVAL(ref2); + + // JIT: ZVAL_COPY_VALUE(arg, op1); + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + ref_addr, op1_info, 0); + + // JIT: if (GC_DELREF(ref) != 0) + refcount = jit_GC_DELREF(jit, ref); + if_not_zero = ir_IF(refcount); + ir_IF_TRUE(if_not_zero); + + // JIT: if (Z_REFCOUNTED_P(arg) + if_refcounted = jit_if_REFCOUNTED(jit, arg_addr); + ir_IF_TRUE(if_refcounted); + // JIT: Z_ADDREF_P(arg) + jit_GC_ADDREF(jit, jit_Z_PTR(jit, arg_addr)); + ir_END_list(end_inputs); + ir_IF_FALSE(if_refcounted); + ir_END_list(end_inputs); + + ir_IF_FALSE(if_not_zero); + + // JIT: efree(ref) + jit_EFREE(jit, ref, sizeof(zend_reference), op_array, opline); + ir_END_list(end_inputs); + + ir_IF_FALSE(if_ref); + + // JIT: ZVAL_COPY_VALUE(arg, op1); + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + } + } else { + if (op1_addr != op1_def_addr) { + if (!zend_jit_update_regs(jit, opline->op1.var, op1_addr, op1_def_addr, op1_info)) { + return 0; + } + if (Z_MODE(op1_def_addr) == IS_REG && Z_MODE(op1_addr) != IS_REG) { + op1_addr = op1_def_addr; + } + } + + // JIT: ZVAL_COPY_VALUE(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, opline->op1_type == IS_CV); + } + } + + if (end_inputs) { + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + + return 1; +} + +static int zend_jit_check_func_arg(zend_jit_ctx *jit, const zend_op *opline) +{ + uint32_t arg_num = opline->op2.num; + ir_ref ref; + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_SHOULD_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + if (!TRACE_FRAME_IS_LAST_SEND_BY_REF(JIT_G(current_frame)->call)) { + TRACE_FRAME_SET_LAST_SEND_BY_REF(JIT_G(current_frame)->call); + // JIT: ZEND_ADD_CALL_FLAG(EX(call), ZEND_CALL_SEND_ARG_BY_REF); + if (jit->reuse_ip) { + ref = jit_IP(jit); + } else { + ref = ir_LOAD_A(jit_EX(call)); + } + ref = jit_CALL(ref, This.u1.type_info); + ir_STORE(ref, ir_OR_U32(ir_LOAD_U32(ref), ir_CONST_U32(ZEND_CALL_SEND_ARG_BY_REF))); + } + } else { + if (!TRACE_FRAME_IS_LAST_SEND_BY_VAL(JIT_G(current_frame)->call)) { + TRACE_FRAME_SET_LAST_SEND_BY_VAL(JIT_G(current_frame)->call); + // JIT: ZEND_DEL_CALL_FLAG(EX(call), ZEND_CALL_SEND_ARG_BY_REF); + if (jit->reuse_ip) { + ref = jit_IP(jit); + } else { + ref = ir_LOAD_A(jit_EX(call)); + } + ref = jit_CALL(ref, This.u1.type_info); + ir_STORE(ref, ir_AND_U32(ir_LOAD_U32(ref), ir_CONST_U32(~ZEND_CALL_SEND_ARG_BY_REF))); + } + } + } else { + // JIT: if (QUICK_ARG_SHOULD_BE_SENT_BY_REF(EX(call)->func, arg_num)) { + uint32_t mask = (ZEND_SEND_BY_REF|ZEND_SEND_PREFER_REF) << ((arg_num + 3) * 2); + ir_ref rx, if_ref, cold_path; + + if (!zend_jit_reuse_ip(jit)) { + return 0; + } + + rx = jit_IP(jit); + + ref = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(jit_CALL(rx, func)), offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask)); + if_ref = ir_IF(ref); + ir_IF_TRUE_cold(if_ref); + + // JIT: ZEND_ADD_CALL_FLAG(EX(call), ZEND_CALL_SEND_ARG_BY_REF); + ref = jit_CALL(rx, This.u1.type_info); + ir_STORE(ref, ir_OR_U32(ir_LOAD_U32(ref), ir_CONST_U32(ZEND_CALL_SEND_ARG_BY_REF))); + + cold_path = ir_END(); + ir_IF_FALSE(if_ref); + + // JIT: ZEND_DEL_CALL_FLAG(EX(call), ZEND_CALL_SEND_ARG_BY_REF); + ref = jit_CALL(rx, This.u1.type_info); + ir_STORE(ref, ir_AND_U32(ir_LOAD_U32(ref), ir_CONST_U32(~ZEND_CALL_SEND_ARG_BY_REF))); + + ir_MERGE_WITH(cold_path); + } + + return 1; +} + +static int zend_jit_check_undef_args(zend_jit_ctx *jit, const zend_op *opline) +{ + ir_ref call, if_may_have_undef, ret; + + if (jit->reuse_ip) { + call = jit_IP(jit); + } else { + call = ir_LOAD_A(jit_EX(call)); + } + + if_may_have_undef = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(call, offsetof(zend_execute_data, This.u1.type_info) + 3)), + ir_CONST_U8(ZEND_CALL_MAY_HAVE_UNDEF >> 24))); + + ir_IF_TRUE_cold(if_may_have_undef); + jit_SET_EX_OPLINE(jit, opline); + ret = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(zend_handle_undef_args), call); + ir_GUARD_NOT(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_MERGE_WITH_EMPTY_FALSE(if_may_have_undef); + + return 1; +} + +static int zend_jit_do_fcall(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, zend_ssa *ssa, int call_level, unsigned int next_block, zend_jit_trace_rec *trace) +{ + zend_func_info *info = ZEND_FUNC_INFO(op_array); + zend_call_info *call_info = NULL; + const zend_function *func = NULL; + uint32_t i; + uint32_t call_num_args = 0; + bool unknown_num_args = 0; + const void *exit_addr = NULL; + const zend_op *prev_opline; + ir_ref rx, func_ref = IR_UNUSED, if_user = IR_UNUSED, user_path = IR_UNUSED; + + prev_opline = opline - 1; + while (prev_opline->opcode == ZEND_EXT_FCALL_BEGIN || prev_opline->opcode == ZEND_TICKS) { + prev_opline--; + } + if (prev_opline->opcode == ZEND_SEND_UNPACK || prev_opline->opcode == ZEND_SEND_ARRAY || + prev_opline->opcode == ZEND_CHECK_UNDEF_ARGS) { + unknown_num_args = 1; + } + + if (info) { + call_info = info->callee_info; + while (call_info && call_info->caller_call_opline != opline) { + call_info = call_info->next_callee; + } + if (call_info && call_info->callee_func && !call_info->is_prototype) { + func = call_info->callee_func; + } + if ((op_array->fn_flags & ZEND_ACC_TRAIT_CLONE) + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && !JIT_G(current_frame)->call->func) { + call_info = NULL; func = NULL; /* megamorphic call from trait */ + } + } + if (!func) { + /* resolve function at run time */ + } else if (func->type == ZEND_USER_FUNCTION) { + ZEND_ASSERT(opline->opcode != ZEND_DO_ICALL); + call_num_args = call_info->num_args; + } else if (func->type == ZEND_INTERNAL_FUNCTION) { + ZEND_ASSERT(opline->opcode != ZEND_DO_UCALL); + call_num_args = call_info->num_args; + } else { + ZEND_UNREACHABLE(); + } + + if (trace && !func) { + if (trace->op == ZEND_JIT_TRACE_DO_ICALL) { + ZEND_ASSERT(trace->func->type == ZEND_INTERNAL_FUNCTION); +#ifndef ZEND_WIN32 + // TODO: ASLR may cause different addresses in different workers ??? + func = trace->func; + if (JIT_G(current_frame) && + JIT_G(current_frame)->call && + TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)->call) >= 0) { + call_num_args = TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)->call); + } else { + unknown_num_args = 1; + } +#endif + } else if (trace->op == ZEND_JIT_TRACE_ENTER) { + ZEND_ASSERT(trace->func->type == ZEND_USER_FUNCTION); + if (zend_accel_in_shm(trace->func->op_array.opcodes)) { + func = trace->func; + if (JIT_G(current_frame) && + JIT_G(current_frame)->call && + TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)->call) >= 0) { + call_num_args = TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)->call); + } else { + unknown_num_args = 1; + } + } + } + } + + bool may_have_extra_named_params = + opline->extended_value == ZEND_FCALL_MAY_HAVE_EXTRA_NAMED_PARAMS && + (!func || func->common.fn_flags & ZEND_ACC_VARIADIC); + + if (!jit->reuse_ip) { + zend_jit_start_reuse_ip(jit); + // JIT: call = EX(call); + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(call))); + } + rx = jit_IP(jit); + zend_jit_stop_reuse_ip(jit); + + jit_SET_EX_OPLINE(jit, opline); + + if (opline->opcode == ZEND_DO_FCALL) { + if (!func) { + if (trace) { + uint32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + ir_GUARD_NOT( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_DEPRECATED)), + ir_CONST_ADDR(exit_addr)); + } + } + } + + if (!jit->delayed_call_level) { + // JIT: EX(call) = call->prev_execute_data; + ir_STORE(jit_EX(call), + (call_level == 1) ? IR_NULL : ir_LOAD_A(jit_CALL(rx, prev_execute_data))); + } + delayed_call_chain = 0; + jit->delayed_call_level = 0; + + // JIT: call->prev_execute_data = execute_data; + ir_STORE(jit_CALL(rx, prev_execute_data), jit_FP(jit)); + + if (!func) { + if (!func_ref) { + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + } + } + + if (opline->opcode == ZEND_DO_FCALL) { + if (!func) { + if (!trace) { + ir_ref if_deprecated, ret; + + if_deprecated = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_DEPRECATED))); + ir_IF_TRUE_cold(if_deprecated); + + if (GCC_GLOBAL_REGS) { + ret = ir_CALL(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper)); + } else { + ret = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper), rx); + } + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_MERGE_WITH_EMPTY_FALSE(if_deprecated); + } + } else if (func->common.fn_flags & ZEND_ACC_DEPRECATED) { + ir_ref ret; + + if (GCC_GLOBAL_REGS) { + ret = ir_CALL(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper)); + } else { + ret = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper), rx); + } + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + } + + if (!func + && opline->opcode != ZEND_DO_UCALL + && opline->opcode != ZEND_DO_ICALL) { + ir_ref type_ref = ir_LOAD_U8(ir_ADD_OFFSET(func_ref, offsetof(zend_function, type))); + if_user = ir_IF(ir_EQ(type_ref, ir_CONST_U8(ZEND_USER_FUNCTION))); + ir_IF_TRUE(if_user); + } + + if ((!func || func->type == ZEND_USER_FUNCTION) + && opline->opcode != ZEND_DO_ICALL) { + bool recursive_call_through_jmp = 0; + + // JIT: EX(call) = NULL; + ir_STORE(jit_CALL(rx, call), IR_NULL); + + // JIT: EX(return_value) = RETURN_VALUE_USED(opline) ? EX_VAR(opline->result.var) : 0; + ir_STORE(jit_CALL(rx, return_value), + RETURN_VALUE_USED(opline) ? + jit_ZVAL_ADDR(jit, ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var)) : + IR_NULL); + + // JIT: EX_LOAD_RUN_TIME_CACHE(op_array); + if (!func || func->op_array.cache_size) { + ir_ref run_time_cache; + + if (func && op_array == &func->op_array) { + /* recursive call */ + run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + } else if (func + && !(func->op_array.fn_flags & ZEND_ACC_CLOSURE) + && ZEND_MAP_PTR_IS_OFFSET(func->op_array.run_time_cache)) { + run_time_cache = ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_CG(map_ptr_base)), + (uintptr_t)ZEND_MAP_PTR(func->op_array.run_time_cache))); + } else if ((func && (func->op_array.fn_flags & ZEND_ACC_CLOSURE)) || + (JIT_G(current_frame) && + JIT_G(current_frame)->call && + TRACE_FRAME_IS_CLOSURE_CALL(JIT_G(current_frame)->call))) { + /* Closures always use direct pointers */ + ir_ref local_func_ref = func_ref ? func_ref : ir_LOAD_A(jit_CALL(rx, func)); + + run_time_cache = ir_LOAD_A(ir_ADD_OFFSET(local_func_ref, offsetof(zend_op_array, run_time_cache__ptr))); + } else { + ir_ref if_odd, run_time_cache2; + ir_ref local_func_ref = func_ref ? func_ref : ir_LOAD_A(jit_CALL(rx, func)); + + run_time_cache = ir_LOAD_A(ir_ADD_OFFSET(local_func_ref, offsetof(zend_op_array, run_time_cache__ptr))); + if_odd = ir_IF(ir_AND_A(run_time_cache, ir_CONST_ADDR(1))); + ir_IF_TRUE(if_odd); + + run_time_cache2 = ir_LOAD_A(ir_ADD_A(run_time_cache, ir_LOAD_A(jit_CG(map_ptr_base)))); + + ir_MERGE_WITH_EMPTY_FALSE(if_odd); + run_time_cache = ir_PHI_2(IR_ADDR, run_time_cache2, run_time_cache); + } + + ir_STORE(jit_CALL(rx, run_time_cache), run_time_cache); + } + + // JIT: EG(current_execute_data) = execute_data = call; + ir_STORE(jit_EG(current_execute_data), rx); + jit_STORE_FP(jit, rx); + + // JIT: opline = op_array->opcodes; + if (func && !unknown_num_args) { + + for (i = call_num_args; i < func->op_array.last_var; i++) { + uint32_t n = EX_NUM_TO_VAR(i); + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, n); + + jit_set_Z_TYPE_INFO_ex(jit, var_addr, ir_CONST_U32(IS_UNDEF)); + } + + if (call_num_args <= func->op_array.num_args) { + if (!trace || (trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_INTERPRETER)) { + uint32_t num_args; + + if ((func->op_array.fn_flags & ZEND_ACC_HAS_TYPE_HINTS) != 0) { + if (trace) { + num_args = 0; + } else if (call_info) { + num_args = skip_valid_arguments(op_array, ssa, call_info); + } else { + num_args = call_num_args; + } + } else { + num_args = call_num_args; + } + if (zend_accel_in_shm(func->op_array.opcodes)) { + jit_LOAD_IP_ADDR(jit, func->op_array.opcodes + num_args); + } else { + if (!func_ref) { + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + } + ir_ref ip = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, opcodes))); + if (num_args) { + ip = ir_ADD_OFFSET(ip, num_args * sizeof(zend_op)); + } + jit_LOAD_IP(jit, ip); + } + + if (!trace && op_array == &func->op_array && call_num_args >= op_array->required_num_args) { + /* recursive call */ + recursive_call_through_jmp = 1; + } + } + } else { + if (!trace || (trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_INTERPRETER)) { + ir_ref ip; + + if (zend_accel_in_shm(func->op_array.opcodes)) { + ip = ir_CONST_ADDR(func->op_array.opcodes); + } else { + if (!func_ref) { + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + } + ip = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, opcodes))); + } + jit_LOAD_IP(jit, ip); + } + if (GCC_GLOBAL_REGS) { + ir_CALL(IR_VOID, ir_CONST_FC_FUNC(zend_jit_copy_extra_args_helper)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_copy_extra_args_helper), jit_FP(jit)); + } + } + } else { + ir_ref ip; + ir_ref merge_inputs = IR_UNUSED; + + // JIT: opline = op_array->opcodes + if (func && zend_accel_in_shm(func->op_array.opcodes)) { + ip = ir_CONST_ADDR(func->op_array.opcodes); + } else { + if (!func_ref) { + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + } + ip = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, opcodes))); + } + jit_LOAD_IP(jit, ip); + + // JIT: num_args = EX_NUM_ARGS(); + ir_ref num_args, first_extra_arg; + + num_args = ir_LOAD_U32(jit_EX(This.u2.num_args)); + if (func) { + first_extra_arg = ir_CONST_U32(func->op_array.num_args); + } else { + // JIT: first_extra_arg = op_array->num_args; + ZEND_ASSERT(func_ref); + first_extra_arg = ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, num_args))); + } + + // JIT: if (UNEXPECTED(num_args > first_extra_arg)) + ir_ref if_extra_args = ir_IF(ir_GT(num_args, first_extra_arg)); + ir_IF_TRUE_cold(if_extra_args); + if (GCC_GLOBAL_REGS) { + ir_CALL(IR_VOID, ir_CONST_FC_FUNC(zend_jit_copy_extra_args_helper)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_copy_extra_args_helper), jit_FP(jit)); + } + ir_END_list(merge_inputs); + ir_IF_FALSE(if_extra_args); + if (!func || (func->op_array.fn_flags & ZEND_ACC_HAS_TYPE_HINTS) == 0) { + if (!func) { + // JIT: if (EXPECTED((op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS) == 0)) + ir_ref if_has_type_hints = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_HAS_TYPE_HINTS))); + ir_IF_TRUE(if_has_type_hints); + ir_END_list(merge_inputs); + ir_IF_FALSE(if_has_type_hints); + } + // JIT: opline += num_args; + + ir_ref ref = ir_MUL_U32(num_args, ir_CONST_U32(sizeof(zend_op))); + + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ir_ADD_A(jit_IP(jit), ref)); + } else { + ir_ref addr = jit_EX(opline); + + ir_STORE(addr, ir_ADD_A(ir_LOAD_A(addr), ref)); + } + } + + ir_END_list(merge_inputs); + ir_MERGE_list(merge_inputs); + + // JIT: if (EXPECTED((int)num_args < op_array->last_var)) { + ir_ref last_var; + + if (func) { + last_var = ir_CONST_U32(func->op_array.last_var); + } else { + ZEND_ASSERT(func_ref); + last_var = ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, last_var))); + } + + ir_ref idx = ir_SUB_U32(last_var, num_args); + ir_ref if_need = ir_IF(ir_GT(idx, ir_CONST_U32(0))); + ir_IF_TRUE(if_need); + + // JIT: zval *var = EX_VAR_NUM(num_args); + if (sizeof(void*) == 8) { + num_args = ir_ZEXT_A(num_args); + } + ir_ref var_ref = ir_ADD_OFFSET( + ir_ADD_A(jit_FP(jit), ir_MUL_A(num_args, ir_CONST_ADDR(sizeof(zval)))), + (ZEND_CALL_FRAME_SLOT * sizeof(zval)) + offsetof(zval, u1.type_info)); + + ir_ref loop = ir_LOOP_BEGIN(ir_END()); + var_ref = ir_PHI_2(IR_ADDR, var_ref, IR_UNUSED); + idx = ir_PHI_2(IR_U32, idx, IR_UNUSED); + ir_STORE(var_ref, ir_CONST_I32(IS_UNDEF)); + ir_PHI_SET_OP(var_ref, 2, ir_ADD_OFFSET(var_ref, sizeof(zval))); + ir_ref idx2 = ir_SUB_U32(idx, ir_CONST_U32(1)); + ir_PHI_SET_OP(idx, 2, idx2); + ir_ref if_not_zero = ir_IF(idx2); + ir_IF_TRUE(if_not_zero); + ir_MERGE_SET_OP(loop, 2, ir_LOOP_END()); + ir_IF_FALSE(if_not_zero); + ir_MERGE_WITH_EMPTY_FALSE(if_need); + } + + if (ZEND_OBSERVER_ENABLED) { + if (GCC_GLOBAL_REGS) { + // EX(opline) = opline + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_observer_fcall_begin), jit_FP(jit)); + } + + if (trace) { + if (!func && (opline->opcode != ZEND_DO_UCALL)) { + user_path = ir_END(); + } + } else { + zend_basic_block *bb; + + do { + if (recursive_call_through_jmp) { + ir_ref begin, end; + ir_insn *insn; + + /* attempt to convert direct recursive call into loop */ + begin = jit->bb_start_ref[call_num_args]; + ZEND_ASSERT(begin != IR_UNUSED); + insn = &jit->ctx.ir_base[begin]; + if (insn->op == IR_BEGIN) { + end = ir_LOOP_END(); + insn = &jit->ctx.ir_base[begin]; + insn->op = IR_LOOP_BEGIN; + insn->inputs_count = 2; + insn->op2 = end; + break; + } else if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) + && insn->inputs_count == 2) { + end = ir_LOOP_END(); + insn = &jit->ctx.ir_base[begin]; + insn->op = IR_LOOP_BEGIN; + insn->inputs_count = 3; + insn->op3 = end; + break; + } else if (insn->op == IR_LOOP_BEGIN && insn->inputs_count == 3) { + ZEND_ASSERT(jit->ctx.ir_base[insn->op3].op == IR_LOOP_END); + jit->ctx.ir_base[insn->op3].op = IR_END; + ir_MERGE_2(insn->op3, ir_END()); + end = ir_LOOP_END(); + insn = &jit->ctx.ir_base[begin]; + insn->op3 = end; + break; + } + } + /* fallback to indirect JMP or RETURN */ + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(1)); + } + } while (0); + + bb = &jit->ssa->cfg.blocks[jit->b]; + if (bb->successors_count > 0) { + int succ; + ir_ref ref; + + ZEND_ASSERT(bb->successors_count == 1); + succ = bb->successors[0]; + /* Add a fake control edge from UNREACHABLE/RETURN to the following ENTRY */ + ref = jit->ctx.insns_count - 1; + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_UNREACHABLE + || jit->ctx.ir_base[ref].op == IR_RETURN + || jit->ctx.ir_base[ref].op == IR_LOOP_END); + ZEND_ASSERT(jit->ssa->cfg.blocks[succ].flags & ZEND_BB_ENTRY); + ref = zend_jit_continue_entry(jit, ref, jit->ssa->cfg.blocks[succ].start); + if (func || (opline->opcode == ZEND_DO_UCALL)) { + _zend_jit_add_predecessor_ref(jit, succ, jit->b, ref); + jit->b = -1; + } else { + user_path = ref; + } + } + } + } + + if ((!func || func->type == ZEND_INTERNAL_FUNCTION) + && (opline->opcode != ZEND_DO_UCALL)) { + if (!func && (opline->opcode != ZEND_DO_ICALL)) { + ir_IF_FALSE(if_user); + } + if (opline->opcode == ZEND_DO_FCALL_BY_NAME) { + if (!func) { + if (trace) { + uint32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + ZEND_ASSERT(func_ref); + ir_GUARD_NOT( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_DEPRECATED)), + ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_deprecated, ret; + + if_deprecated = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_DEPRECATED))); + ir_IF_TRUE_cold(if_deprecated); + + if (GCC_GLOBAL_REGS) { + ret = ir_CALL(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper)); + } else { + ret = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper), rx); + } + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_MERGE_WITH_EMPTY_FALSE(if_deprecated); + } + } else if (func->common.fn_flags & ZEND_ACC_DEPRECATED) { + ir_ref ret; + + if (GCC_GLOBAL_REGS) { + ret = ir_CALL(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper)); + } else { + ret = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper), rx); + } + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + } + + // JIT: EG(current_execute_data) = execute_data; + ir_STORE(jit_EG(current_execute_data), rx); + + if (ZEND_OBSERVER_ENABLED) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_observer_fcall_begin), rx); + } + + // JIT: ZVAL_NULL(EX_VAR(opline->result.var)); + ir_ref res_addr = IR_UNUSED, func_ptr; + + if (RETURN_VALUE_USED(opline)) { + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + } else { + /* CPU stack allocated temporary zval */ + ir_ref ptr; + + if (!jit->ctx.fixed_call_stack_size) { + // JIT: alloca(sizeof(void*)); + ptr = ir_ALLOCA(ir_CONST_ADDR(sizeof(zval))); + } else { + ptr = ir_HARD_COPY_A(ir_RLOAD_A(IR_REG_SP)); + } + res_addr = ZEND_ADDR_REF_ZVAL(ptr); + } + + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + + zend_jit_reset_last_valid_opline(jit); + + // JIT: (zend_execute_internal ? zend_execute_internal : fbc->internal_function.handler)(call, ret); + if (zend_execute_internal) { + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_execute_internal), rx, jit_ZVAL_ADDR(jit, res_addr)); + } else { + if (func) { + func_ptr = ir_CONST_FC_FUNC(func->internal_function.handler); + } else { + func_ptr = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_internal_function, handler))); +#if defined(IR_TARGET_X86) + func_ptr = ir_CAST_FC_FUNC(func_ptr); +#endif + } + ir_CALL_2(IR_VOID, func_ptr, rx, jit_ZVAL_ADDR(jit, res_addr)); + } + + if (ZEND_OBSERVER_ENABLED) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_observer_fcall_end), + rx, jit_ZVAL_ADDR(jit, res_addr)); + } + + // JIT: EG(current_execute_data) = execute_data; + ir_STORE(jit_EG(current_execute_data), jit_FP(jit)); + + // JIT: zend_vm_stack_free_args(call); + if (func && !unknown_num_args) { + for (i = 0; i < call_num_args; i++ ) { + if (zend_jit_needs_arg_dtor(func, i, call_info)) { + uint32_t offset = EX_NUM_TO_VAR(i); + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, offset); + + jit_ZVAL_PTR_DTOR(jit, var_addr, MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN, 0, opline); + } + } + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_vm_stack_free_args_helper), rx); + } + + if (may_have_extra_named_params) { + // JIT: if (UNEXPECTED(ZEND_CALL_INFO(call) & ZEND_CALL_HAS_EXTRA_NAMED_PARAMS)) + ir_ref if_has_named = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(rx, offsetof(zend_execute_data, This.u1.type_info) + 3)), + ir_CONST_U8(ZEND_CALL_HAS_EXTRA_NAMED_PARAMS >> 24))); + ir_IF_TRUE_cold(if_has_named); + + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_free_extra_named_params), + ir_LOAD_A(jit_CALL(rx, extra_named_params))); + + ir_MERGE_WITH_EMPTY_FALSE(if_has_named); + } + + if (opline->opcode == ZEND_DO_FCALL) { + // TODO: optimize ??? + // JIT: if (UNEXPECTED(ZEND_CALL_INFO(call) & ZEND_CALL_RELEASE_THIS)) + ir_ref if_release_this = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(rx, offsetof(zend_execute_data, This.u1.type_info) + 2)), + ir_CONST_U8(ZEND_CALL_RELEASE_THIS >> 16))); + ir_IF_TRUE_cold(if_release_this); + + // JIT: OBJ_RELEASE(Z_OBJ(RX->This)); + jit_OBJ_RELEASE(jit, ir_LOAD_A(jit_CALL(rx, This.value.obj))); + + ir_MERGE_WITH_EMPTY_FALSE(if_release_this); + } + + + ir_ref allocated_path = IR_UNUSED; + + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + !JIT_G(current_frame) || + !JIT_G(current_frame)->call || + !TRACE_FRAME_IS_NESTED(JIT_G(current_frame)->call) || + prev_opline->opcode == ZEND_SEND_UNPACK || + prev_opline->opcode == ZEND_SEND_ARRAY || + prev_opline->opcode == ZEND_CHECK_UNDEF_ARGS) { + + // JIT: zend_vm_stack_free_call_frame(call); + // JIT: if (UNEXPECTED(ZEND_CALL_INFO(call) & ZEND_CALL_ALLOCATED)) + ir_ref if_allocated = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(rx, offsetof(zend_execute_data, This.u1.type_info) + 2)), + ir_CONST_U8(ZEND_CALL_ALLOCATED >> 16))); + ir_IF_TRUE_cold(if_allocated); + + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_free_call_frame), rx); + + allocated_path = ir_END(); + ir_IF_FALSE(if_allocated); + } + + ir_STORE(jit_EG(vm_stack_top), rx); + + if (allocated_path) { + ir_MERGE_WITH(allocated_path); + } + + if (!RETURN_VALUE_USED(opline)) { + zend_class_entry *ce; + bool ce_is_instanceof; + uint32_t func_info = call_info ? + zend_get_func_info(call_info, ssa, &ce, &ce_is_instanceof) : + (MAY_BE_ANY|MAY_BE_REF|MAY_BE_RC1|MAY_BE_RCN); + + /* If an exception is thrown, the return_value may stay at the + * original value of null. */ + func_info |= MAY_BE_NULL; + + if (func_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + ir_ref sp = ir_RLOAD_A(IR_REG_SP); + res_addr = ZEND_ADDR_REF_ZVAL(sp); + jit_ZVAL_PTR_DTOR(jit, res_addr, func_info, 1, opline); + } + if (!jit->ctx.fixed_call_stack_size) { + // JIT: revert alloca + ir_AFREE(ir_CONST_ADDR(sizeof(zval))); + } + } + + // JIT: if (UNEXPECTED(EG(exception) != NULL)) { + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_icall_throw)); + + // TODO: Can we avoid checking for interrupts after each call ??? + if (trace && jit->last_valid_opline != opline) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline + 1, ZEND_JIT_EXIT_TO_VM); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + } else { + exit_addr = NULL; + } + + if (!zend_jit_check_timeout(jit, opline + 1, exit_addr)) { + return 0; + } + + if ((!trace || !func) && opline->opcode != ZEND_DO_ICALL) { + jit_LOAD_IP_ADDR(jit, opline + 1); + } else if (trace + && trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_INTERPRETER) { + jit_LOAD_IP_ADDR(jit, opline + 1); + } + } + + if (user_path) { + ir_MERGE_WITH(user_path); + } + + return 1; +} + +static int zend_jit_constructor(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, zend_ssa *ssa, int call_level, int next_block) +{ + ir_ref if_skip_constructor = jit_IF_ex(jit, jit_CMP_IP(jit, IR_NE, opline), next_block); + + ir_IF_FALSE(if_skip_constructor); + + if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { + if (!zend_jit_tail_handler(jit, opline)) { + return 0; + } + } else { + if (!zend_jit_do_fcall(jit, opline, op_array, ssa, call_level, next_block, NULL)) { + return 0; + } + } + + /* override predecessors of the next block */ + ZEND_ASSERT(jit->ssa->cfg.blocks[next_block].predecessors_count == 1); + if (!jit->ctx.control) { + ZEND_ASSERT(jit->bb_edges[jit->bb_predecessors[next_block]]); + ir_IF_TRUE(if_skip_constructor); + ir_MERGE_2(jit->bb_edges[jit->bb_predecessors[next_block]], ir_END()); + jit->bb_edges[jit->bb_predecessors[next_block]] = ir_END(); + } else { + ZEND_ASSERT(!jit->bb_edges[jit->bb_predecessors[next_block]]); + /* merge current control path with the true branch of constructor skip condition */ + ir_MERGE_WITH_EMPTY_TRUE(if_skip_constructor); + jit->bb_edges[jit->bb_predecessors[next_block]] = ir_END(); + + jit->b = -1; + } + + return 1; +} + +static int zend_jit_verify_arg_type(zend_jit_ctx *jit, const zend_op *opline, zend_arg_info *arg_info, bool check_exception) +{ + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + uint32_t type_mask = ZEND_TYPE_PURE_MASK(arg_info->type) & MAY_BE_ANY; + ir_ref ref, fast_path = IR_UNUSED; + + ref = jit_ZVAL_ADDR(jit, res_addr); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->prev) { + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + uint8_t type = STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var)); + + if (type != IS_UNKNOWN && (type_mask & (1u << type))) { + return 1; + } + } + + if (ZEND_ARG_SEND_MODE(arg_info)) { + if (opline->opcode == ZEND_RECV_INIT) { + ref = jit_ZVAL_DEREF_ref(jit, ref); + } else { + ref = jit_Z_PTR_ref(jit, ref); + ref = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + } + } + + if (type_mask != 0) { + if (is_power_of_two(type_mask)) { + uint32_t type_code = concrete_type(type_mask); + ir_ref if_ok = jit_if_Z_TYPE_ref(jit, ref, ir_CONST_U8(type_code)); + ir_IF_TRUE(if_ok); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_ok); + } else { + ir_ref if_ok = ir_IF(ir_AND_U32( + ir_SHL_U32(ir_CONST_U32(1), jit_Z_TYPE_ref(jit, ref)), + ir_CONST_U32(type_mask))); + ir_IF_TRUE(if_ok); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_ok); + } + } + + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_2(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_verify_arg_slow), + ref, ir_CONST_ADDR(arg_info)); + + if (check_exception) { + ir_GUARD(ref, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + + if (fast_path) { + ir_MERGE_WITH(fast_path); + } + + return 1; +} + +static int zend_jit_recv(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array) +{ + uint32_t arg_num = opline->op1.num; + zend_arg_info *arg_info = NULL; + + if (op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS) { + if (EXPECTED(arg_num <= op_array->num_args)) { + arg_info = &op_array->arg_info[arg_num-1]; + } else if (UNEXPECTED(op_array->fn_flags & ZEND_ACC_VARIADIC)) { + arg_info = &op_array->arg_info[op_array->num_args]; + } + if (arg_info && !ZEND_TYPE_IS_SET(arg_info->type)) { + arg_info = NULL; + } + } + + if (arg_info || (opline+1)->opcode != ZEND_RECV) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + if (!JIT_G(current_frame) || + TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)) < 0 || + arg_num > TRACE_FRAME_NUM_ARGS(JIT_G(current_frame))) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_GE(ir_LOAD_U32(jit_EX(This.u2.num_args)), ir_CONST_U32(arg_num)), + ir_CONST_ADDR(exit_addr)); + } + } else { + ir_ref if_ok =ir_IF(ir_GE(ir_LOAD_U32(jit_EX(This.u2.num_args)), ir_CONST_U32(arg_num))); + ir_IF_FALSE_cold(if_ok); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_missing_arg_error), jit_FP(jit)); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_IF_TRUE(if_ok); + } + } + + if (arg_info) { + if (!zend_jit_verify_arg_type(jit, opline, arg_info, 1)) { + return 0; + } + } + + return 1; +} + +static int zend_jit_recv_init(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, bool is_last, int may_throw) +{ + uint32_t arg_num = opline->op1.num; + zval *zv = RT_CONSTANT(opline, opline->op2); + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + ir_ref ref, if_fail, skip_path = IR_UNUSED; + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)) >= 0) { + if (arg_num > TRACE_FRAME_NUM_ARGS(JIT_G(current_frame))) { + jit_ZVAL_COPY_CONST(jit, + res_addr, + -1, -1, + zv, 1); + } + } else { + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + (op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS)) { + ir_ref if_skip = ir_IF(ir_GE(ir_LOAD_U32(jit_EX(This.u2.num_args)), ir_CONST_U32(arg_num))); + ir_IF_TRUE(if_skip); + skip_path = ir_END(); + ir_IF_FALSE(if_skip); + } + jit_ZVAL_COPY_CONST(jit, + res_addr, + -1, -1, + zv, 1); + } + + if (Z_CONSTANT_P(zv)) { + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zval_update_constant_ex), + jit_ZVAL_ADDR(jit, res_addr), + ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_EX(func)), offsetof(zend_op_array, scope)))); + + if_fail = ir_IF(ref); + ir_IF_TRUE_cold(if_fail); + jit_ZVAL_PTR_DTOR(jit, res_addr, MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN, 1, opline); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_IF_FALSE(if_fail); + } + + if (skip_path) { + ir_MERGE_WITH(skip_path); + } + + if (op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS) { + do { + zend_arg_info *arg_info; + + if (arg_num <= op_array->num_args) { + arg_info = &op_array->arg_info[arg_num-1]; + } else if (op_array->fn_flags & ZEND_ACC_VARIADIC) { + arg_info = &op_array->arg_info[op_array->num_args]; + } else { + break; + } + if (!ZEND_TYPE_IS_SET(arg_info->type)) { + break; + } + if (!zend_jit_verify_arg_type(jit, opline, arg_info, may_throw)) { + return 0; + } + } while (0); + } + + return 1; +} + +static bool zend_jit_verify_return_type(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, uint32_t op1_info) +{ + zend_arg_info *arg_info = &op_array->arg_info[-1]; + ZEND_ASSERT(ZEND_TYPE_IS_SET(arg_info->type)); + zend_jit_addr op1_addr = OP1_ADDR(); + bool needs_slow_check = 1; + uint32_t type_mask = ZEND_TYPE_PURE_MASK(arg_info->type) & MAY_BE_ANY; + ir_ref fast_path = IR_UNUSED; + + if (type_mask != 0) { + if (((op1_info & MAY_BE_ANY) & type_mask) == 0) { + /* pass */ + } else if (((op1_info & MAY_BE_ANY) | type_mask) == type_mask) { + needs_slow_check = 0; + } else if (is_power_of_two(type_mask)) { + uint32_t type_code = concrete_type(type_mask); + ir_ref if_ok = jit_if_Z_TYPE(jit, op1_addr, type_code); + + ir_IF_TRUE(if_ok); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_ok); + } else { + ir_ref if_ok = ir_IF(ir_AND_U32( + ir_SHL_U32(ir_CONST_U32(1), jit_Z_TYPE(jit, op1_addr)), + ir_CONST_U32(type_mask))); + + ir_IF_TRUE(if_ok); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_ok); + } + } + if (needs_slow_check) { + ir_ref ref; + + jit_SET_EX_OPLINE(jit, opline); + ref = jit_ZVAL_ADDR(jit, op1_addr); + if (op1_info & MAY_BE_UNDEF) { + ref = zend_jit_zval_check_undef(jit, ref, opline->op1.var, NULL, 1); + } + + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_verify_return_slow), + ref, + ir_LOAD_A(jit_EX(func)), + ir_CONST_ADDR(arg_info), + ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->op2.num)); + + zend_jit_check_exception(jit); + + if (fast_path) { + ir_MERGE_WITH(fast_path); + } + } + + return 1; +} + +static int zend_jit_leave_frame(zend_jit_ctx *jit) +{ + // JIT: EG(current_execute_data) = EX(prev_execute_data); + ir_STORE(jit_EG(current_execute_data), ir_LOAD_A(jit_EX(prev_execute_data))); + return 1; +} + +static int zend_jit_free_cvs(zend_jit_ctx *jit) +{ + // JIT: EG(current_execute_data) = EX(prev_execute_data); + ir_STORE(jit_EG(current_execute_data), ir_LOAD_A(jit_EX(prev_execute_data))); + + // JIT: zend_free_compiled_variables(execute_data); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_free_compiled_variables), jit_FP(jit)); + return 1; +} + +static int zend_jit_free_cv(zend_jit_ctx *jit, uint32_t info, uint32_t var) +{ + if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + jit_ZVAL_PTR_DTOR(jit, var_addr, info, 1, NULL); + } + return 1; +} + +static int zend_jit_free_op(zend_jit_ctx *jit, const zend_op *opline, uint32_t info, uint32_t var_offset) +{ + if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + jit_ZVAL_PTR_DTOR(jit, ZEND_ADDR_MEM_ZVAL(ZREG_FP, var_offset), info, 0, opline); + } + return 1; +} + +static int zend_jit_leave_func(zend_jit_ctx *jit, + const zend_op_array *op_array, + const zend_op *opline, + uint32_t op1_info, + bool left_frame, + zend_jit_trace_rec *trace, + zend_jit_trace_info *trace_info, + int indirect_var_access, + int may_throw) +{ + bool may_be_top_frame = + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + !JIT_G(current_frame) || + !TRACE_FRAME_IS_NESTED(JIT_G(current_frame)); + bool may_need_call_helper = + indirect_var_access || /* may have symbol table */ + !op_array->function_name || /* may have symbol table */ + may_be_top_frame || + (op_array->fn_flags & ZEND_ACC_VARIADIC) || /* may have extra named args */ + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + !JIT_G(current_frame) || + TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)) == -1 || /* unknown number of args */ + (uint32_t)TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)) > op_array->num_args; /* extra args */ + bool may_need_release_this = + !(op_array->fn_flags & ZEND_ACC_CLOSURE) && + op_array->scope && + !(op_array->fn_flags & ZEND_ACC_STATIC) && + (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + !JIT_G(current_frame) || + !TRACE_FRAME_NO_NEED_RELEASE_THIS(JIT_G(current_frame))); + ir_ref call_info = IR_UNUSED, ref, cold_path = IR_UNUSED; + + if (may_need_call_helper) { + if (!left_frame) { + left_frame = 1; + if (!zend_jit_leave_frame(jit)) { + return 0; + } + } + /* ZEND_CALL_FAKE_CLOSURE handled on slow path to eliminate check for ZEND_CALL_CLOSURE on fast path */ + call_info = ir_LOAD_U32(jit_EX(This.u1.type_info)); + ref = ir_AND_U32(call_info, + ir_CONST_U32(ZEND_CALL_TOP|ZEND_CALL_HAS_SYMBOL_TABLE|ZEND_CALL_FREE_EXTRA_ARGS|ZEND_CALL_ALLOCATED|ZEND_CALL_HAS_EXTRA_NAMED_PARAMS|ZEND_CALL_FAKE_CLOSURE)); + if (trace && trace->op != ZEND_JIT_TRACE_END) { + ir_ref if_slow = ir_IF(ref); + + ir_IF_TRUE_cold(if_slow); + if (!GCC_GLOBAL_REGS) { + ref = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(zend_jit_leave_func_helper), jit_FP(jit)); + } else { + ir_CALL(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_func_helper)); + } + + if (may_be_top_frame) { + // TODO: try to avoid this check ??? + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { +#if 0 + /* this check should be handled by the following OPLINE guard */ + | cmp IP, zend_jit_halt_op + | je ->trace_halt +#endif + } else if (GCC_GLOBAL_REGS) { + ir_GUARD(jit_IP(jit), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } else { + ir_GUARD(ir_GE(ref, ir_CONST_I32(0)), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } + } + + if (!GCC_GLOBAL_REGS) { + // execute_data = EG(current_execute_data) + jit_STORE_FP(jit, ir_LOAD_A(jit_EG(current_execute_data))); + } + cold_path = ir_END(); + ir_IF_FALSE(if_slow); + } else { + ir_GUARD_NOT(ref, jit_STUB_ADDR(jit, jit_stub_leave_function_handler)); + } + } + + if ((op_array->fn_flags & (ZEND_ACC_CLOSURE|ZEND_ACC_FAKE_CLOSURE)) == ZEND_ACC_CLOSURE) { + if (!left_frame) { + left_frame = 1; + if (!zend_jit_leave_frame(jit)) { + return 0; + } + } + // JIT: OBJ_RELEASE(ZEND_CLOSURE_OBJECT(EX(func))); + jit_OBJ_RELEASE(jit, ir_ADD_OFFSET(ir_LOAD_A(jit_EX(func)), -sizeof(zend_object))); + } else if (may_need_release_this) { + ir_ref if_release, fast_path = IR_UNUSED; + + if (!left_frame) { + left_frame = 1; + if (!zend_jit_leave_frame(jit)) { + return 0; + } + } + if (!JIT_G(current_frame) || !TRACE_FRAME_ALWAYS_RELEASE_THIS(JIT_G(current_frame))) { + // JIT: if (call_info & ZEND_CALL_RELEASE_THIS) + if (!call_info) { + call_info = ir_LOAD_U32(jit_EX(This.u1.type_info)); + } + if_release = ir_IF(ir_AND_U32(call_info, ir_CONST_U32(ZEND_CALL_RELEASE_THIS))); + ir_IF_FALSE(if_release); + fast_path = ir_END(); + ir_IF_TRUE(if_release); + } + // JIT: OBJ_RELEASE(execute_data->This)) + jit_OBJ_RELEASE(jit, ir_LOAD_A(jit_EX(This.value.obj))); + if (fast_path) { + ir_MERGE_WITH(fast_path); + } + // TODO: avoid EG(excption) check for $this->foo() calls + may_throw = 1; + } + + // JIT: EG(vm_stack_top) = (zval*)execute_data + ir_STORE(jit_EG(vm_stack_top), jit_FP(jit)); + + // JITL execute_data = EX(prev_execute_data) + jit_STORE_FP(jit, ir_LOAD_A(jit_EX(prev_execute_data))); + + if (!left_frame) { + // JIT: EG(current_execute_data) = execute_data + ir_STORE(jit_EG(current_execute_data), jit_FP(jit)); + } + + if (trace) { + if (trace->op != ZEND_JIT_TRACE_END + && (JIT_G(current_frame) && !TRACE_FRAME_IS_UNKNOWN_RETURN(JIT_G(current_frame)))) { + zend_jit_reset_last_valid_opline(jit); + } else { + if (GCC_GLOBAL_REGS) { + /* We add extra RLOAD and RSTORE to make fusion for persistent register + * mov (%FP), %IP + * add $0x1c, %IP + * The naive (commented) code leads to extra register allocation and move. + * mov (%FP), %tmp + * add $0x1c, %tmp + * mov %tmp, %FP + */ +#if 0 + jit_STORE_IP(jit, ir_ADD_OFFSET(ir_LOAD_A(jit_EX(opline)), sizeof(zend_op))); +#else + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + jit_STORE_IP(jit, ir_ADD_OFFSET(jit_IP(jit), sizeof(zend_op))); +#endif + } else { + ir_ref ref = jit_EX(opline); + + ir_STORE(ref, ir_ADD_OFFSET(ir_LOAD_A(ref), sizeof(zend_op))); + } + } + + if (cold_path) { + ir_MERGE_WITH(cold_path); + } + + if (trace->op == ZEND_JIT_TRACE_BACK + && (!JIT_G(current_frame) || TRACE_FRAME_IS_UNKNOWN_RETURN(JIT_G(current_frame)))) { + const zend_op *next_opline = trace->opline; + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && (op1_info & MAY_BE_RC1) + && (op1_info & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_RESOURCE|MAY_BE_ARRAY_OF_ARRAY))) { + /* exception might be thrown during destruction of unused return value */ + // JIT: if (EG(exception)) + ir_GUARD_NOT(ir_LOAD_A(jit_EG(exception)), jit_STUB_ADDR(jit, jit_stub_leave_throw)); + } + do { + trace++; + } while (trace->op == ZEND_JIT_TRACE_INIT_CALL); + ZEND_ASSERT(trace->op == ZEND_JIT_TRACE_VM || trace->op == ZEND_JIT_TRACE_END); + next_opline = trace->opline; + ZEND_ASSERT(next_opline != NULL); + + if (trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { + trace_info->flags |= ZEND_JIT_TRACE_LOOP; + + ir_ref if_eq = ir_IF(jit_CMP_IP(jit, IR_EQ, next_opline)); + + ir_IF_TRUE(if_eq); + ZEND_ASSERT(jit->trace_loop_ref); + ZEND_ASSERT(jit->ctx.ir_base[jit->trace_loop_ref].op2 == IR_UNUSED); + ir_MERGE_SET_OP(jit->trace_loop_ref, 2, ir_END()); + ir_IF_FALSE(if_eq); + +#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); +#else + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_trace_escape)); +#endif + } else { + ir_GUARD(jit_CMP_IP(jit, IR_EQ, next_opline), jit_STUB_ADDR(jit, jit_stub_trace_escape)); + } + + zend_jit_set_last_valid_opline(jit, trace->opline); + + return 1; + } else if (may_throw || + (((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && (op1_info & MAY_BE_RC1) + && (op1_info & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_RESOURCE|MAY_BE_ARRAY_OF_ARRAY))) + && (!JIT_G(current_frame) || TRACE_FRAME_IS_RETURN_VALUE_UNUSED(JIT_G(current_frame))))) { + // JIT: if (EG(exception)) + ir_GUARD_NOT(ir_LOAD_A(jit_EG(exception)), jit_STUB_ADDR(jit, jit_stub_leave_throw)); + } + + return 1; + } else { + // JIT: if (EG(exception)) + ir_GUARD_NOT(ir_LOAD_A(jit_EG(exception)), jit_STUB_ADDR(jit, jit_stub_leave_throw)); + // JIT: opline = EX(opline) + 1 + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + jit_STORE_IP(jit, ir_ADD_OFFSET(jit_IP(jit), sizeof(zend_op))); + } else { + ir_ref ref = jit_EX(opline); + + ir_STORE(ref, ir_ADD_OFFSET(ir_LOAD_A(ref), sizeof(zend_op))); + } + } + + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(2)); // ZEND_VM_LEAVE + } + + jit->b = -1; + + return 1; +} + +static void zend_jit_common_return(zend_jit_ctx *jit) +{ + ZEND_ASSERT(jit->return_inputs); + ir_MERGE_list(jit->return_inputs); +} + +static int zend_jit_return(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, uint32_t op1_info, zend_jit_addr op1_addr) +{ + zend_jit_addr ret_addr; + int8_t return_value_used = -1; + ir_ref return_value = IR_UNUSED, ref, refcount, if_return_value_used = IR_UNUSED; + + ZEND_ASSERT(op_array->type != ZEND_EVAL_CODE && op_array->function_name); + ZEND_ASSERT(!(op1_info & MAY_BE_UNDEF)); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + jit->return_inputs = IR_UNUSED; + if (JIT_G(current_frame)) { + if (TRACE_FRAME_IS_RETURN_VALUE_USED(JIT_G(current_frame))) { + return_value_used = 1; + } else if (TRACE_FRAME_IS_RETURN_VALUE_UNUSED(JIT_G(current_frame))) { + return_value_used = 0; + } else { + return_value_used = -1; + } + } + } + + if (ZEND_OBSERVER_ENABLED) { + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + + if (!zend_jit_spill_store(jit, op1_addr, dst, op1_info, 1)) { + return 0; + } + op1_addr = dst; + } + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_observer_fcall_end), + jit_FP(jit), + jit_ZVAL_ADDR(jit, op1_addr)); + } + + // JIT: if (!EX(return_value)) + return_value = ir_LOAD_A(jit_EX(return_value)); + ret_addr = ZEND_ADDR_REF_ZVAL(return_value); + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && + (op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + if (return_value_used == -1) { + if_return_value_used = ir_IF(return_value); + ir_IF_FALSE_cold(if_return_value_used); + } + if (return_value_used != 1) { + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)-(MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + ir_ref if_refcounted = jit_if_REFCOUNTED(jit, op1_addr); + ir_IF_FALSE(if_refcounted); + ir_END_list(jit->return_inputs); + ir_IF_TRUE(if_refcounted); + } + ref = jit_Z_PTR(jit, op1_addr); + refcount = jit_GC_DELREF(jit, ref); + + if (RC_MAY_BE_1(op1_info)) { + if (RC_MAY_BE_N(op1_info)) { + ir_ref if_non_zero = ir_IF(refcount); + ir_IF_TRUE(if_non_zero); + ir_END_list(jit->return_inputs); + ir_IF_FALSE(if_non_zero); + } + jit_ZVAL_DTOR(jit, ref, op1_info, opline); + } + if (return_value_used == -1) { + ir_END_list(jit->return_inputs); + } + } + } else if (return_value_used == -1) { + if_return_value_used = ir_IF(return_value); + ir_IF_FALSE_cold(if_return_value_used); + ir_END_list(jit->return_inputs); + } + + if (if_return_value_used) { + ir_IF_TRUE(if_return_value_used); + } + + if (return_value_used == 0) { + if (jit->return_inputs) { + ZEND_ASSERT(JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE); + ir_END_list(jit->return_inputs); + ir_MERGE_list(jit->return_inputs); + jit->return_inputs = IR_UNUSED; + } + return 1; + } + + if (opline->op1_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op1); + + jit_ZVAL_COPY_CONST(jit, ret_addr, MAY_BE_ANY, MAY_BE_ANY, zv, 1); + } else if (opline->op1_type == IS_TMP_VAR) { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + } else if (opline->op1_type == IS_CV) { + if (op1_info & MAY_BE_REF) { + ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + (op1_info & (MAY_BE_REF|MAY_BE_OBJECT)) || + !op_array->function_name) { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 1); + } else if (return_value_used != 1) { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + // JIT: if (EXPECTED(!(EX_CALL_INFO() & ZEND_CALL_CODE))) ZVAL_NULL(retval_ptr); + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_NULL); + } else { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + } + } else { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + } + } else { + if (op1_info & MAY_BE_REF) { + ir_ref if_ref, ref2, if_non_zero; + zend_jit_addr ref_addr; + + if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_TRUE_cold(if_ref); + + // JIT: zend_refcounted *ref = Z_COUNTED_P(retval_ptr) + ref = jit_Z_PTR(jit, op1_addr); + + // JIT: ZVAL_COPY_VALUE(return_value, &ref->value) + ref2 = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + ref_addr = ZEND_ADDR_REF_ZVAL(ref2); + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, ref_addr, op1_info, 0); + ref2 = jit_GC_DELREF(jit, ref); + if_non_zero = ir_IF(ref2); + ir_IF_TRUE(if_non_zero); + + // JIT: if (IS_REFCOUNTED()) + ir_ref if_refcounted = jit_if_REFCOUNTED(jit, ret_addr); + ir_IF_FALSE(if_refcounted); + ir_END_list(jit->return_inputs); + ir_IF_TRUE(if_refcounted); + + // JIT: ADDREF + ref2 = jit_Z_PTR(jit, ret_addr); + jit_GC_ADDREF(jit, ref2); + ir_END_list(jit->return_inputs); + + ir_IF_FALSE(if_non_zero); + + jit_EFREE(jit, ref, sizeof(zend_reference), op_array, opline); + ir_END_list(jit->return_inputs); + + ir_IF_FALSE(if_ref); + } + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + } + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + if (jit->return_inputs) { + ir_END_list(jit->return_inputs); + ir_MERGE_list(jit->return_inputs); + jit->return_inputs = IR_UNUSED; + } + } else { + ir_END_list(jit->return_inputs); + jit->b = -1; + } + + return 1; +} + +static int zend_jit_bind_global(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info) +{ + zend_jit_addr op1_addr = OP1_ADDR(); + zend_string *varname = Z_STR_P(RT_CONSTANT(opline, opline->op2)); + ir_ref cache_slot_ref, idx_ref, num_used_ref, bucket_ref, ref, ref2; + ir_ref if_fit, if_reference, if_same_key, fast_path; + ir_ref slow_inputs = IR_UNUSED, end_inputs = IR_UNUSED; + + // JIT: idx = (uintptr_t)CACHED_PTR(opline->extended_value) - 1; + cache_slot_ref = ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->extended_value); + idx_ref = ir_SUB_A(ir_LOAD_A(cache_slot_ref), ir_CONST_ADDR(1)); + + // JIT: if (EXPECTED(idx < EG(symbol_table).nNumUsed * sizeof(Bucket))) + num_used_ref = ir_MUL_U32(ir_LOAD_U32(jit_EG(symbol_table.nNumUsed)), + ir_CONST_U32(sizeof(Bucket))); + if (sizeof(void*) == 8) { + num_used_ref = ir_ZEXT_A(num_used_ref); + } + if_fit = ir_IF(ir_ULT(idx_ref, num_used_ref)); + ir_IF_FALSE_cold(if_fit); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_fit); + + // JIT: Bucket *p = (Bucket*)((char*)EG(symbol_table).arData + idx); + bucket_ref = ir_ADD_A(ir_LOAD_A(jit_EG(symbol_table.arData)), idx_ref); + if_reference = jit_if_Z_TYPE_ref(jit, bucket_ref, ir_CONST_U8(IS_REFERENCE)); + ir_IF_FALSE_cold(if_reference); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_reference); + + // JIT: (EXPECTED(p->key == varname)) + if_same_key = ir_IF(ir_EQ(ir_LOAD_A(ir_ADD_OFFSET(bucket_ref, offsetof(Bucket, key))), ir_CONST_ADDR(varname))); + ir_IF_FALSE_cold(if_same_key); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_same_key); + + // JIT: GC_ADDREF(Z_PTR(p->val)) + ref = jit_Z_PTR_ref(jit, bucket_ref); + jit_GC_ADDREF(jit, ref); + + fast_path = ir_END(); + ir_MERGE_list(slow_inputs); + + ref2 = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_global_helper), + ir_CONST_ADDR(varname), + cache_slot_ref); + + ir_MERGE_WITH(fast_path); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + + if (op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + ir_ref if_refcounted = IR_UNUSED, refcount, if_non_zero, if_may_not_leak; + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + // JIT: if (UNEXPECTED(Z_REFCOUNTED_P(variable_ptr))) + if_refcounted = jit_if_REFCOUNTED(jit, op1_addr); + ir_IF_TRUE_cold(if_refcounted); + } + + // JIT:zend_refcounted *garbage = Z_COUNTED_P(variable_ptr); + ref2 = jit_Z_PTR(jit, op1_addr); + + // JIT: ZVAL_REF(variable_ptr, ref) + jit_set_Z_PTR(jit, op1_addr, ref); + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_REFERENCE_EX); + + // JIT: if (GC_DELREF(garbage) == 0) + refcount = jit_GC_DELREF(jit, ref2); + if_non_zero = ir_IF(refcount); + if (!(op1_info & (MAY_BE_REF|MAY_BE_ARRAY|MAY_BE_OBJECT))) { + ir_IF_TRUE(if_non_zero); + ir_END_list(end_inputs); + } + ir_IF_FALSE(if_non_zero); + + jit_ZVAL_DTOR(jit, ref2, op1_info, opline); + if (op1_info & (MAY_BE_REF|MAY_BE_ARRAY|MAY_BE_OBJECT)) { + ir_END_list(end_inputs); + ir_IF_TRUE(if_non_zero); + + // JIT: GC_ZVAL_CHECK_POSSIBLE_ROOT(variable_ptr) + if_may_not_leak = jit_if_GC_MAY_NOT_LEAK(jit, ref2); + ir_IF_TRUE(if_may_not_leak); + ir_END_list(end_inputs); + ir_IF_FALSE(if_may_not_leak); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(gc_possible_root), ref2); + } + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + ir_END_list(end_inputs); + ir_IF_FALSE(if_refcounted); + } + } + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + // JIT: ZVAL_REF(variable_ptr, ref) + jit_set_Z_PTR(jit, op1_addr, ref); + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_REFERENCE_EX); + } + + if (end_inputs) { + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + + return 1; +} + +static int zend_jit_free(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, int may_throw) +{ + zend_jit_addr op1_addr = OP1_ADDR(); + + if (op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + if (may_throw) { + jit_SET_EX_OPLINE(jit, opline); + } + if (opline->opcode == ZEND_FE_FREE && (op1_info & (MAY_BE_OBJECT|MAY_BE_REF))) { + ir_ref ref, if_array, if_exists, end_inputs = IR_UNUSED; + + if (op1_info & MAY_BE_ARRAY) { + if_array = jit_if_Z_TYPE(jit, op1_addr, IS_ARRAY); + ir_IF_TRUE(if_array); + ir_END_list(end_inputs); + ir_IF_FALSE(if_array); + } + ref = ir_LOAD_U32(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_iter_idx))); + if_exists = ir_IF(ir_EQ(ref, ir_CONST_U32(-1))); + ir_IF_TRUE(if_exists); + ir_END_list(end_inputs); + ir_IF_FALSE(if_exists); + + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_hash_iterator_del), ref); + + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + + jit_ZVAL_PTR_DTOR(jit, op1_addr, op1_info, 0, opline); + + if (may_throw) { + zend_jit_check_exception(jit); + } + } + + return 1; +} + +static int zend_jit_echo(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info) +{ + if (opline->op1_type == IS_CONST) { + zval *zv; + size_t len; + + zv = RT_CONSTANT(opline, opline->op1); + ZEND_ASSERT(Z_TYPE_P(zv) == IS_STRING); + len = Z_STRLEN_P(zv); + + if (len > 0) { + const char *str = Z_STRVAL_P(zv); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_write), + ir_CONST_ADDR(str), ir_CONST_ADDR(len)); + + zend_jit_check_exception(jit); + } + } else { + zend_jit_addr op1_addr = OP1_ADDR(); + ir_ref ref; + + ZEND_ASSERT((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_STRING); + + jit_SET_EX_OPLINE(jit, opline); + + ref = jit_Z_PTR(jit, op1_addr); + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_write), + ir_ADD_OFFSET(ref, offsetof(zend_string, val)), + ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_string, len)))); + + if (opline->op1_type & (IS_VAR|IS_TMP_VAR)) { + jit_ZVAL_PTR_DTOR(jit, op1_addr, op1_info, 0, opline); + } + + zend_jit_check_exception(jit); + } + return 1; +} + +static int zend_jit_strlen(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr res_addr) +{ + if (opline->op1_type == IS_CONST) { + zval *zv; + size_t len; + + zv = RT_CONSTANT(opline, opline->op1); + ZEND_ASSERT(Z_TYPE_P(zv) == IS_STRING); + len = Z_STRLEN_P(zv); + + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(len)); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } else if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, MAY_BE_LONG)) { + return 0; + } + } else { + ir_ref ref; + + ZEND_ASSERT((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_STRING); + + ref = jit_Z_PTR(jit, op1_addr); + ref = ir_LOAD_L(ir_ADD_OFFSET(ref, offsetof(zend_string, len))); + jit_set_Z_LVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) == IS_REG) { + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, MAY_BE_LONG)) { + return 0; + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + return 1; +} + +static int zend_jit_count(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr res_addr, int may_throw) +{ + if (opline->op1_type == IS_CONST) { + zval *zv; + zend_long count; + + zv = RT_CONSTANT(opline, opline->op1); + ZEND_ASSERT(Z_TYPE_P(zv) == IS_ARRAY); + count = zend_hash_num_elements(Z_ARRVAL_P(zv)); + + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(count)); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } else if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, MAY_BE_LONG)) { + return 0; + } + } else { + ir_ref ref; + + ZEND_ASSERT((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_ARRAY); + // Note: See the implementation of ZEND_COUNT in Zend/zend_vm_def.h - arrays do not contain IS_UNDEF starting in php 8.1+. + + ref = jit_Z_PTR(jit, op1_addr); + if (sizeof(void*) == 8) { + ref = ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(HashTable, nNumOfElements))); + ref = ir_ZEXT_L(ref); + } else { + ref = ir_LOAD_L(ir_ADD_OFFSET(ref, offsetof(HashTable, nNumOfElements))); + } + jit_set_Z_LVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) == IS_REG) { + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, MAY_BE_LONG)) { + return 0; + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + return 1; +} + +static int zend_jit_in_array(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + HashTable *ht = Z_ARRVAL_P(RT_CONSTANT(opline, opline->op2)); + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + ir_ref ref; + + ZEND_ASSERT(opline->op1_type != IS_VAR && opline->op1_type != IS_TMP_VAR); + ZEND_ASSERT((op1_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)) == MAY_BE_STRING); + + // JIT: result = zend_hash_find_ex(ht, Z_STR_P(op1), OP1_TYPE == IS_CONST); + if (opline->op1_type != IS_CONST) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find), + ir_CONST_ADDR(ht), + jit_Z_PTR(jit, op1_addr)); + } else { + zend_string *str = Z_STR_P(RT_CONSTANT(opline, opline->op1)); + + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find_known_hash), + ir_CONST_ADDR(ht), ir_CONST_ADDR(str)); + } + + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + ZEND_ASSERT(bb->successors_count == 2); + ref = jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ) ? target_label2 : target_label); + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ref); + jit->b = -1; + } else { + jit_set_Z_TYPE_INFO_ex(jit, res_addr, + ir_ADD_U32(ir_ZEXT_U32(ir_NE(ref, IR_NULL)), ir_CONST_U32(IS_FALSE))); + } + + return 1; +} + +static int zend_jit_rope(zend_jit_ctx *jit, const zend_op *opline, uint32_t op2_info) +{ + uint32_t offset; + + offset = (opline->opcode == ZEND_ROPE_INIT) ? + opline->result.var : + opline->op1.var + opline->extended_value * sizeof(zend_string*); + + if (opline->op2_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op2); + zend_string *str; + + ZEND_ASSERT(Z_TYPE_P(zv) == IS_STRING); + str = Z_STR_P(zv); + + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), offset), ir_CONST_ADDR(str)); + } else { + zend_jit_addr op2_addr = OP2_ADDR(); + ir_ref ref; + + ZEND_ASSERT((op2_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_STRING); + + ref = jit_Z_PTR(jit, op2_addr); + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), offset), ref); + if (opline->op2_type == IS_CV) { + ir_ref if_refcounted, long_path; + + if_refcounted = jit_if_REFCOUNTED(jit, op2_addr); + ir_IF_TRUE(if_refcounted); + jit_GC_ADDREF(jit, ref); + long_path = ir_END(); + + ir_IF_FALSE(if_refcounted); + ir_MERGE_WITH(long_path); + } + } + + if (opline->opcode == ZEND_ROPE_END) { + zend_jit_addr res_addr = RES_ADDR(); + ir_ref ref; + + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_rope_end), + ir_ADD_OFFSET(jit_FP(jit), opline->op1.var), + ir_CONST_U32(opline->extended_value)); + + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_STRING_EX); + } + + return 1; +} + +static int zend_jit_zval_copy_deref(zend_jit_ctx *jit, zend_jit_addr res_addr, zend_jit_addr val_addr, ir_ref type) +{ + ir_ref if_refcounted, if_reference, if_refcounted2, ptr, val2, ptr2, type2; + ir_refs *merge_inputs, *types, *ptrs; +#if SIZEOF_ZEND_LONG == 4 + ir_ref val = jit_ZVAL_ADDR(jit, val_addr); + ir_refs *values; /* we need this only for zval.w2 copy */ +#endif + + ir_refs_init(merge_inputs, 4); + ir_refs_init(types, 4); + ir_refs_init(ptrs, 4); +#if SIZEOF_ZEND_LONG == 4 + ir_refs_init(values, 4); +#endif + + // JIT: ptr = Z_PTR_P(val); + ptr = jit_Z_PTR(jit, val_addr); + + // JIT: if (Z_OPT_REFCOUNTED_P(val)) { + if_refcounted = ir_IF(ir_AND_U32(type, ir_CONST_U32(Z_TYPE_FLAGS_MASK))); + ir_IF_FALSE_cold(if_refcounted); + ir_refs_add(merge_inputs, ir_END()); + ir_refs_add(types, type); + ir_refs_add(ptrs, ptr); +#if SIZEOF_ZEND_LONG == 4 + ir_refs_add(values, val); +#endif + + ir_IF_TRUE(if_refcounted); + + // JIT: if (UNEXPECTED(Z_OPT_ISREF_P(val))) { + if_reference = ir_IF(ir_EQ(type, ir_CONST_U32(IS_REFERENCE_EX))); +// if_reference = ir_IF(ir_EQ(ir_TRUNC_U8(type), ir_CONST_U8(IS_REFERENCE))); // TODO: fix IR to avoid need for extra register ??? + ir_IF_TRUE(if_reference); + + // JIT: val = Z_REFVAL_P(val); + val2 = ir_ADD_OFFSET(ptr, offsetof(zend_reference, val)); + type2 = jit_Z_TYPE_INFO_ref(jit, val2); + ptr2 = jit_Z_PTR_ref(jit, val2); + + // JIT: if (Z_OPT_REFCOUNTED_P(val)) { + if_refcounted2 = ir_IF(ir_AND_U32(type2, ir_CONST_U32(Z_TYPE_FLAGS_MASK))); + ir_IF_FALSE_cold(if_refcounted2); + ir_refs_add(merge_inputs, ir_END()); + ir_refs_add(types, type2); + ir_refs_add(ptrs, ptr2); +#if SIZEOF_ZEND_LONG == 4 + ir_refs_add(values, val2); +#endif + + ir_IF_TRUE(if_refcounted2); + ir_MERGE_WITH_EMPTY_FALSE(if_reference); + type = ir_PHI_2(IR_U32, type2, type); + ptr = ir_PHI_2(IR_ADDR, ptr2, ptr); +#if SIZEOF_ZEND_LONG == 4 + val = ir_PHI_2(IR_ADDR, val2, val); +#endif + + // JIT: Z_ADDREF_P(val); + jit_GC_ADDREF(jit, ptr); + ir_refs_add(merge_inputs, ir_END()); + ir_refs_add(types, type); + ir_refs_add(ptrs, ptr); +#if SIZEOF_ZEND_LONG == 4 + ir_refs_add(values, val); +#endif + + ir_MERGE_N(merge_inputs->count, merge_inputs->refs); + type = ir_PHI_N(IR_U32, types->count, types->refs); + ptr = ir_PHI_N(IR_ADDR, ptrs->count, ptrs->refs); +#if SIZEOF_ZEND_LONG == 4 + val = ir_PHI_N(IR_ADDR, values->count, values->refs); + val_addr = ZEND_ADDR_REF_ZVAL(val); +#endif + + // JIT: Z_PTR_P(res) = ptr; + jit_set_Z_PTR(jit, res_addr, ptr); +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_W2(jit, res_addr, jit_Z_W2(jit, val_addr)); +#endif + jit_set_Z_TYPE_INFO_ex(jit, res_addr, type); + + return 1; +} + +static int zend_jit_fetch_dimension_address_inner(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t type, + uint32_t op1_info, + uint32_t op2_info, + uint8_t dim_type, + const void *found_exit_addr, + const void *not_found_exit_addr, + const void *exit_addr, + bool result_type_guard, + ir_ref ht_ref, + ir_refs *found_inputs, + ir_refs *found_vals, + ir_ref *end_inputs, + ir_ref *not_found_inputs) +{ + zend_jit_addr op2_addr = OP2_ADDR(); + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + ir_ref ref = IR_UNUSED, cond, if_found; + ir_ref if_type = IS_UNUSED; + ir_refs *test_zval_inputs, *test_zval_values; + + ir_refs_init(test_zval_inputs, 4); + ir_refs_init(test_zval_values, 4); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && type == BP_VAR_R + && !exit_addr) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + } + + if (op2_info & MAY_BE_LONG) { + bool op2_loaded = 0; + bool packed_loaded = 0; + bool bad_packed_key = 0; + ir_ref if_packed = IS_UNDEF; + ir_ref h = IR_UNUSED; + ir_ref idx_not_found_inputs = IR_UNUSED; + + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_LONG)) { + // JIT: if (EXPECTED(Z_TYPE_P(dim) == IS_LONG)) + if_type = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_TRUE(if_type); + } + if (op1_info & MAY_BE_PACKED_GUARD) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_PACKED_GUARD); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + cond = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, u.flags))), + ir_CONST_U32(HASH_FLAG_PACKED)); + if (op1_info & MAY_BE_ARRAY_PACKED) { + ir_GUARD(cond, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(cond, ir_CONST_ADDR(exit_addr)); + } + } + if (type == BP_VAR_W) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + op2_loaded = 1; + } + if (op1_info & MAY_BE_ARRAY_PACKED) { + zend_long val = -1; + + if (Z_MODE(op2_addr) == IS_CONST_ZVAL) { + val = Z_LVAL_P(Z_ZV(op2_addr)); + if (val >= 0 && val < HT_MAX_SIZE) { + packed_loaded = 1; + } else { + bad_packed_key = 1; + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } + h = ir_CONST_LONG(val); + } else { + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + op2_loaded = 1; + } + packed_loaded = 1; + } + + if (dim_type == IS_UNDEF && type == BP_VAR_W && packed_loaded) { + /* don't generate "fast" code for packed array */ + packed_loaded = 0; + } + + if (packed_loaded) { + // JIT: ZEND_HASH_INDEX_FIND(ht, hval, retval, num_undef); + if (op1_info & MAY_BE_ARRAY_NUMERIC_HASH) { + if_packed = ir_IF( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, u.flags))), + ir_CONST_U32(HASH_FLAG_PACKED))); + ir_IF_TRUE(if_packed); + } + // JIT: if (EXPECTED((zend_ulong)(_h) < (zend_ulong)(_ht)->nNumUsed)) + ref = ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, nNumUsed))); +#if SIZEOF_ZEND_LONG == 8 + ref = ir_ZEXT_L(ref); +#endif + cond = ir_ULT(h, ref); + if (type == BP_JIT_IS) { + if (not_found_exit_addr) { + ir_GUARD(cond, ir_CONST_ADDR(not_found_exit_addr)); + } else { + ir_ref if_fit = ir_IF(cond); + ir_IF_FALSE(if_fit); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_fit); + } + } else if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + ir_GUARD(cond, ir_CONST_ADDR(exit_addr)); + } else if (type == BP_VAR_IS && not_found_exit_addr) { + ir_GUARD(cond, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_RW && not_found_exit_addr) { + ir_GUARD(cond, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + ir_ref if_fit = ir_IF(cond); + ir_IF_FALSE(if_fit); + ir_END_list(*not_found_inputs); + ir_IF_TRUE(if_fit); + } else { + ir_ref if_fit = ir_IF(cond); + ir_IF_FALSE(if_fit); + ir_END_list(idx_not_found_inputs); + ir_IF_TRUE(if_fit); + } + // JIT: _ret = &_ht->arPacked[h]; + ref = ir_MUL_L(h, ir_CONST_LONG(sizeof(zval))); + ref = ir_BITCAST_A(ref); + ref = ir_ADD_A(ir_LOAD_A(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, arPacked))), ref); + if (type == BP_JIT_IS) { + ir_refs_add(test_zval_values, ref); + ir_refs_add(test_zval_inputs, ir_END()); + } + } + } + switch (type) { + case BP_JIT_IS: + if (op1_info & MAY_BE_ARRAY_NUMERIC_HASH) { + if (if_packed) { + ir_IF_FALSE(if_packed); + if_packed = IR_UNUSED; + } + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + } + if (packed_loaded) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(_zend_hash_index_find), ht_ref, h); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_index_find), ht_ref, h); + } + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(test_zval_values, ref); + ir_refs_add(test_zval_inputs, ir_END()); + } else if (!not_found_exit_addr && !packed_loaded) { + ir_END_list(*end_inputs); + } + break; + case BP_VAR_R: + case BP_VAR_IS: + case BP_VAR_UNSET: + if (packed_loaded) { + ir_ref type_ref = jit_Z_TYPE_ref(jit, ref); + + if (op1_info & MAY_BE_ARRAY_NUMERIC_HASH) { + ir_ref if_def = ir_IF(type_ref); + ir_IF_TRUE(if_def); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE(if_def); + } else if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + /* perform IS_UNDEF check only after result type guard (during deoptimization) */ + if (!result_type_guard || (op1_info & MAY_BE_ARRAY_NUMERIC_HASH)) { + ir_GUARD(type_ref, ir_CONST_ADDR(exit_addr)); + } + } else if (type == BP_VAR_IS && not_found_exit_addr) { + ir_GUARD(type_ref, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + ir_ref if_def = ir_IF(type_ref); + ir_IF_FALSE(if_def); + ir_END_list(*not_found_inputs); + ir_IF_TRUE(if_def); + } else { + ir_ref if_def = ir_IF(type_ref); + ir_IF_FALSE(if_def); + ir_END_list(idx_not_found_inputs); + ir_IF_TRUE(if_def); + } + } + if (!(op1_info & MAY_BE_ARRAY_KEY_LONG) || (packed_loaded && (op1_info & MAY_BE_ARRAY_NUMERIC_HASH))) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else if (type == BP_VAR_IS && not_found_exit_addr) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + ir_END_list(*not_found_inputs); + } else { + ir_END_list(idx_not_found_inputs); + } + } + if (/*!packed_loaded ||*/ (op1_info & MAY_BE_ARRAY_NUMERIC_HASH)) { + if (if_packed) { + ir_IF_FALSE(if_packed); + if_packed = IR_UNUSED; + } + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + } + if (packed_loaded) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(_zend_hash_index_find), ht_ref, h); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_index_find), ht_ref, h); + } + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else if (type == BP_VAR_IS && not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*not_found_inputs); + ir_IF_TRUE(if_found); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(idx_not_found_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } else if (packed_loaded) { + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } + + if (idx_not_found_inputs) { + ir_MERGE_list(idx_not_found_inputs); + switch (type) { + case BP_VAR_R: + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + // JIT: zend_error(E_WARNING,"Undefined array key " ZEND_LONG_FMT, hval); + // JIT: retval = &EG(uninitialized_zval); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_undefined_offset, IR_CONST_FASTCALL_FUNC)); + ir_END_list(*end_inputs); + break; + case BP_VAR_IS: + case BP_VAR_UNSET: + if (!not_found_exit_addr) { + // JIT: retval = &EG(uninitialized_zval); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + ir_END_list(*end_inputs); + } + break; + default: + ZEND_UNREACHABLE(); + } + } + break; + case BP_VAR_RW: + if (packed_loaded) { + if (not_found_exit_addr) { + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, ref)); + ir_IF_TRUE(if_def); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE_cold(if_def); + ir_END_list(idx_not_found_inputs); + } + } + if (!packed_loaded || + !not_found_exit_addr || + (op1_info & MAY_BE_ARRAY_NUMERIC_HASH)) { + if (if_packed) { + ir_IF_FALSE(if_packed); + if_packed = IR_UNUSED; + ir_END_list(idx_not_found_inputs); + } else if (!packed_loaded) { + ir_END_list(idx_not_found_inputs); + } + + ir_MERGE_list(idx_not_found_inputs); + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + } + if (packed_loaded) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_hash_index_lookup_rw_no_packed), + ht_ref, h); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_hash_index_lookup_rw), ht_ref, h); + } + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } + break; + case BP_VAR_W: + if (packed_loaded) { + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, ref)); + ir_IF_TRUE_cold(if_def); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE(if_def); + ir_END_list(idx_not_found_inputs); + } + if (!(op1_info & MAY_BE_ARRAY_KEY_LONG) || (op1_info & MAY_BE_ARRAY_NUMERIC_HASH) || packed_loaded || bad_packed_key || dim_type == IS_UNDEF) { + if (if_packed) { + ir_IF_FALSE(if_packed); + if_packed = IR_UNUSED; + ir_END_list(idx_not_found_inputs); + } else if (!packed_loaded) { + ir_END_list(idx_not_found_inputs); + } + ir_MERGE_list(idx_not_found_inputs); + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + } + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_index_lookup), ht_ref, h); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } + break; + default: + ZEND_UNREACHABLE(); + } + } + + if (op2_info & MAY_BE_STRING) { + ir_ref key; + + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IS_UNUSED; + } + + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING))) { + // JIT: if (EXPECTED(Z_TYPE_P(dim) == IS_STRING)) + if_type = jit_if_Z_TYPE(jit, op2_addr, IS_STRING); + ir_IF_TRUE(if_type); + } + + // JIT: offset_key = Z_STR_P(dim); + key = jit_Z_PTR(jit, op2_addr); + + // JIT: retval = zend_hash_find(ht, offset_key); + switch (type) { + case BP_JIT_IS: + if (opline->op2_type != IS_CONST) { + ir_ref if_num, end1, ref2; + + if_num = ir_IF( + ir_ULE( + ir_LOAD_C(ir_ADD_OFFSET(key, offsetof(zend_string, val))), + ir_CONST_CHAR('9'))); + ir_IF_TRUE_cold(if_num); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_symtable_find), ht_ref, key); + end1 = ir_END(); + ir_IF_FALSE(if_num); + ref2 = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find), ht_ref, key); + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find_known_hash), ht_ref, key); + } + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(test_zval_values, ref); + ir_refs_add(test_zval_inputs, ir_END()); + break; + case BP_VAR_R: + case BP_VAR_IS: + case BP_VAR_UNSET: + if (opline->op2_type != IS_CONST) { + ir_ref if_num, end1, ref2; + + if_num = ir_IF( + ir_ULE( + ir_LOAD_C(ir_ADD_OFFSET(key, offsetof(zend_string, val))), + ir_CONST_CHAR('9'))); + ir_IF_TRUE_cold(if_num); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_symtable_find), ht_ref, key); + end1 = ir_END(); + ir_IF_FALSE(if_num); + ref2 = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find), ht_ref, key); + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find_known_hash), ht_ref, key); + } + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else if (type == BP_VAR_IS && not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*not_found_inputs); + ir_IF_TRUE(if_found); + } else { + if_found = ir_IF(ref); + switch (type) { + case BP_VAR_R: + ir_IF_FALSE_cold(if_found); + // JIT: zend_error(E_WARNING, "Undefined array key \"%s\"", ZSTR_VAL(offset_key)); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_undefined_key, IR_CONST_FASTCALL_FUNC)); + ir_END_list(*end_inputs); + break; + case BP_VAR_IS: + case BP_VAR_UNSET: + ir_IF_FALSE(if_found); + // JIT: retval = &EG(uninitialized_zval); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + ir_END_list(*end_inputs); + break; + default: + ZEND_UNREACHABLE(); + } + ir_IF_TRUE(if_found); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + break; + case BP_VAR_RW: + if (opline->op2_type != IS_CONST) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_symtable_lookup_rw), ht_ref, key); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_hash_lookup_rw), ht_ref, key); + } + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + break; + case BP_VAR_W: + if (opline->op2_type != IS_CONST) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_symtable_lookup_w), ht_ref, key); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_lookup), ht_ref, key); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + break; + default: + ZEND_UNREACHABLE(); + } + } + + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING))) { + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNDEF; + } + if (type != BP_VAR_RW) { + jit_SET_EX_OPLINE(jit, opline); + } + ref = jit_ZVAL_ADDR(jit, op2_addr); + switch (type) { + case BP_VAR_R: + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_r_helper), + ht_ref, + ref, + jit_ZVAL_ADDR(jit, res_addr)); + ir_END_list(*end_inputs); + break; + case BP_JIT_IS: + ref = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_fetch_dim_isset_helper), ht_ref, ref); + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + ir_refs_add(found_inputs, ir_END()); + } else if (found_exit_addr) { + ir_GUARD_NOT(ref, ir_CONST_ADDR(found_exit_addr)); + ir_END_list(*end_inputs); + } else { + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + ir_refs_add(found_inputs, ir_END()); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + } + break; + case BP_VAR_IS: + case BP_VAR_UNSET: + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_is_helper), + ht_ref, + ref, + jit_ZVAL_ADDR(jit, res_addr)); + ir_END_list(*end_inputs); + break; + case BP_VAR_RW: + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_dim_rw_helper), ht_ref, ref); + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + break; + case BP_VAR_W: + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_dim_w_helper), ht_ref, ref); + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + break; + default: + ZEND_UNREACHABLE(); + } + } + + if (type == BP_JIT_IS + && (op1_info & MAY_BE_ARRAY) + && (op2_info & (MAY_BE_LONG|MAY_BE_STRING)) + && test_zval_inputs->count) { + + ir_MERGE_N(test_zval_inputs->count, test_zval_inputs->refs); + ref = ir_PHI_N(IR_ADDR, test_zval_values->count, test_zval_values->refs); + + if (op1_info & MAY_BE_ARRAY_OF_REF) { + ref = jit_ZVAL_DEREF_ref(jit, ref); + } + cond = ir_GT(jit_Z_TYPE_ref(jit, ref), ir_CONST_U8(IS_NULL)); + if (not_found_exit_addr) { + ir_GUARD(cond, ir_CONST_ADDR(not_found_exit_addr)); + ir_refs_add(found_inputs, ir_END()); + } else if (found_exit_addr) { + ir_GUARD_NOT(cond, ir_CONST_ADDR(found_exit_addr)); + ir_END_list(*end_inputs); + } else { + ir_ref if_set = ir_IF(cond); + ir_IF_FALSE(if_set); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_set); + ir_refs_add(found_inputs, ir_END()); + } + } + + return 1; +} + +static int zend_jit_fetch_dim_read(zend_jit_ctx *jit, + const zend_op *opline, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + bool op1_avoid_refcounting, + uint32_t op2_info, + uint32_t res_info, + zend_jit_addr res_addr, + uint8_t dim_type) +{ + zend_jit_addr orig_op1_addr, op2_addr; + const void *exit_addr = NULL; + const void *not_found_exit_addr = NULL; + bool result_type_guard = 0; + bool result_avoid_refcounting = 0; + uint32_t may_be_string = (opline->opcode != ZEND_FETCH_LIST_R) ? MAY_BE_STRING : 0; + int may_throw = 0; + ir_ref if_type = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref not_found_inputs = IR_UNUSED; + + orig_op1_addr = OP1_ADDR(); + op2_addr = OP2_ADDR(); + + if (opline->opcode != ZEND_FETCH_DIM_IS + && JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && !has_concrete_type(op1_info)) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + } + + if ((res_info & MAY_BE_GUARD) + && JIT_G(current_frame) + && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) { + + if (!(op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF) - (MAY_BE_STRING|MAY_BE_LONG)))) { + result_type_guard = 1; + res_info &= ~MAY_BE_GUARD; + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; + } + + if ((opline->result_type & (IS_VAR|IS_TMP_VAR)) + && (opline->opcode == ZEND_FETCH_LIST_R + || !(opline->op1_type & (IS_VAR|IS_TMP_VAR)) + || op1_avoid_refcounting) + && (res_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) + && (ssa_op+1)->op1_use == ssa_op->result_def + && !(op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF) - (MAY_BE_STRING|MAY_BE_LONG))) + && zend_jit_may_avoid_refcounting(opline+1, res_info)) { + result_avoid_refcounting = 1; + ssa->var_info[ssa_op->result_def].avoid_refcounting = 1; + } + + if (opline->opcode == ZEND_FETCH_DIM_IS + && !(res_info & MAY_BE_NULL)) { + uint32_t flags = 0; + uint32_t old_op1_info = 0; + uint32_t old_info; + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + int32_t exit_point; + + if (opline->opcode != ZEND_FETCH_LIST_R + && (opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && !op1_avoid_refcounting) { + flags |= ZEND_JIT_EXIT_FREE_OP1; + } + if ((opline->op2_type & (IS_VAR|IS_TMP_VAR)) + && (op2_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + flags |= ZEND_JIT_EXIT_FREE_OP2; + } + + if (op1_avoid_refcounting) { + old_op1_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var)); + SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); + } + + old_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_NULL, 0); + SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_NONE, ZREG_TYPE_ONLY); + exit_point = zend_jit_trace_get_exit_point(opline+1, flags); + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_info); + not_found_exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!not_found_exit_addr) { + return 0; + } + + if (op1_avoid_refcounting) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var), old_op1_info); + } + } + } + + if (op1_info & MAY_BE_REF) { + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (op1_info & MAY_BE_ARRAY) { + ir_ref ht_ref, ref; + zend_jit_addr val_addr; + ir_refs *found_inputs, *found_vals; + + ir_refs_init(found_inputs, 10); + ir_refs_init(found_vals, 10); + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_ARRAY)) { + if (exit_addr && !(op1_info & (MAY_BE_OBJECT|may_be_string))) { + jit_guard_Z_TYPE(jit, op1_addr, IS_ARRAY, exit_addr); + } else { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_ARRAY); + ir_IF_TRUE(if_type); + } + } + + ht_ref = jit_Z_PTR(jit, op1_addr); + + if ((op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING))) || + (opline->opcode != ZEND_FETCH_DIM_IS && JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE)) { + may_throw = 1; + } + + if (!zend_jit_fetch_dimension_address_inner(jit, opline, + (opline->opcode != ZEND_FETCH_DIM_IS) ? BP_VAR_R : BP_VAR_IS, + op1_info, op2_info, dim_type, NULL, not_found_exit_addr, exit_addr, + result_type_guard, ht_ref, found_inputs, found_vals, + &end_inputs, ¬_found_inputs)) { + return 0; + } + + if (found_inputs->count) { + ir_MERGE_N(found_inputs->count, found_inputs->refs); + ref = ir_PHI_N(IR_ADDR, found_vals->count, found_vals->refs); + val_addr = ZEND_ADDR_REF_ZVAL(ref); + + if (result_type_guard) { + uint8_t type = concrete_type(res_info); + uint32_t flags = 0; + + if (opline->opcode != ZEND_FETCH_LIST_R + && (opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && !op1_avoid_refcounting) { + flags |= ZEND_JIT_EXIT_FREE_OP1; + } + if ((opline->op2_type & (IS_VAR|IS_TMP_VAR)) + && (op2_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + flags |= ZEND_JIT_EXIT_FREE_OP2; + } + + val_addr = zend_jit_guard_fetch_result_type(jit, opline, val_addr, type, + (op1_info & MAY_BE_ARRAY_OF_REF) != 0, flags, op1_avoid_refcounting); + if (!val_addr) { + return 0; + } + + if (not_found_inputs) { + ir_END_list(not_found_inputs); + ir_MERGE_list(not_found_inputs); + } + + // ZVAL_COPY + jit_ZVAL_COPY(jit, res_addr, -1, val_addr, res_info, !result_avoid_refcounting); + if (Z_MODE(res_addr) != IS_REG) { + } else if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + } else if (op1_info & MAY_BE_ARRAY_OF_REF) { + // ZVAL_COPY_DEREF + ir_ref type_info = jit_Z_TYPE_INFO(jit, val_addr); + if (!zend_jit_zval_copy_deref(jit, res_addr, val_addr, type_info)) { + return 0; + } + } else { + // ZVAL_COPY + jit_ZVAL_COPY(jit, res_addr, -1, val_addr, res_info, 1); + } + + ir_END_list(end_inputs); + } + } + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_ARRAY)) { + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNDEF; + } + + if (opline->opcode != ZEND_FETCH_LIST_R && (op1_info & MAY_BE_STRING)) { + ir_ref str_ref; + + may_throw = 1; + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_ARRAY|MAY_BE_STRING))) { + if (exit_addr && !(op1_info & MAY_BE_OBJECT)) { + jit_guard_Z_TYPE(jit, op1_addr, IS_STRING, exit_addr); + } else { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_TRUE(if_type); + } + } + jit_SET_EX_OPLINE(jit, opline); + str_ref = jit_Z_PTR(jit, op1_addr); + if (opline->opcode != ZEND_FETCH_DIM_IS) { + ir_ref ref; + + if ((op2_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_GUARD)) == MAY_BE_LONG) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_dim_str_offset_r_helper), + str_ref, jit_Z_LVAL(jit, op2_addr)); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_dim_str_r_helper), + str_ref, jit_ZVAL_ADDR(jit, op2_addr)); + } + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_STRING); + } else { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_str_is_helper), + str_ref, + jit_ZVAL_ADDR(jit, op2_addr), + jit_ZVAL_ADDR(jit, res_addr)); + } + ir_END_list(end_inputs); + } + + if (op1_info & MAY_BE_OBJECT) { + ir_ref arg2; + + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNDEF; + } + + may_throw = 1; + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_ARRAY|MAY_BE_OBJECT|may_be_string))) { + if (exit_addr) { + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + ir_IF_TRUE(if_type); + } + } + + jit_SET_EX_OPLINE(jit, opline); + if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr)+1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + + if (opline->opcode != ZEND_FETCH_DIM_IS) { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_obj_r_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, res_addr)); + } else { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_obj_is_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, res_addr)); + } + + ir_END_list(end_inputs); + } + + if ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_ARRAY|MAY_BE_OBJECT|may_be_string))) + && (!exit_addr || !(op1_info & (MAY_BE_ARRAY|MAY_BE_OBJECT|may_be_string)))) { + + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNDEF; + } + + if ((opline->opcode != ZEND_FETCH_DIM_IS && (op1_info & MAY_BE_UNDEF)) || (op2_info & MAY_BE_UNDEF)) { + jit_SET_EX_OPLINE(jit, opline); + if (opline->opcode != ZEND_FETCH_DIM_IS && (op1_info & MAY_BE_UNDEF)) { + may_throw = 1; + zend_jit_type_check_undef(jit, jit_Z_TYPE(jit, op1_addr), opline->op1.var, NULL, 0, 1); + } + + if (op2_info & MAY_BE_UNDEF) { + may_throw = 1; + zend_jit_type_check_undef(jit, jit_Z_TYPE(jit, op2_addr), opline->op2.var, NULL, 0, 1); + } + } + + if (opline->opcode != ZEND_FETCH_DIM_IS && opline->opcode != ZEND_FETCH_LIST_R) { + ir_ref ref; + + may_throw = 1; + if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { + ref = jit_ZVAL_ADDR(jit, orig_op1_addr); + } else { + jit_SET_EX_OPLINE(jit, opline); + ref = jit_ZVAL_ADDR(jit, op1_addr); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_array_access), ref); + } + + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + ir_END_list(end_inputs); + } + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) && (op1_info & MAY_BE_OBJECT)) { + /* Magic offsetGet() may increase refcount of the key */ + op2_info |= MAY_BE_RCN; + } +#endif + + if (opline->op2_type & (IS_TMP_VAR|IS_VAR)) { + if ((op2_info & MAY_HAVE_DTOR) && (op2_info & MAY_BE_RC1)) { + may_throw = 1; + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + } + if (opline->opcode != ZEND_FETCH_LIST_R && !op1_avoid_refcounting) { + if (opline->op1_type & (IS_TMP_VAR|IS_VAR)) { + if ((op1_info & MAY_HAVE_DTOR) && (op1_info & MAY_BE_RC1)) { + may_throw = 1; + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + } else if (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) { + ir_BEGIN(IR_UNUSED); /* unreachable tail */ + } + + return 1; +} + +static zend_jit_addr zend_jit_prepare_array_update(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_jit_addr op1_addr, + ir_ref *if_type, + ir_ref *ht_ref, + int *may_throw) +{ + ir_ref ref = IR_UNUSED; + ir_ref array_reference_end = IR_UNUSED, array_reference_ref = IR_UNUSED; + ir_refs *array_inputs, *array_values; + + ir_refs_init(array_inputs, 4); + ir_refs_init(array_values, 4); + + ref = jit_ZVAL_ADDR(jit, op1_addr); + if (op1_info & MAY_BE_REF) { + ir_ref if_reference, if_array, end1, ref2; + + *may_throw = 1; + if_reference = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_FALSE(if_reference); + end1 = ir_END(); + ir_IF_TRUE_cold(if_reference); + array_reference_ref = ir_ADD_OFFSET(jit_Z_PTR_ref(jit, ref), offsetof(zend_reference, val)); + if_array = jit_if_Z_TYPE_ref(jit, array_reference_ref, ir_CONST_U8(IS_ARRAY)); + ir_IF_TRUE(if_array); + array_reference_end = ir_END(); + ir_IF_FALSE_cold(if_array); + if (opline->opcode != ZEND_FETCH_DIM_RW && opline->opcode != ZEND_ASSIGN_DIM_OP) { + jit_SET_EX_OPLINE(jit, opline); + } + ref2 = ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_prepare_assign_dim_ref), ref); + ir_GUARD(ref2, jit_STUB_ADDR(jit, jit_stub_exception_handler_undef)); + + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (op1_info & MAY_BE_ARRAY) { + ir_ref op1_ref = ref; + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_ARRAY)) { + *if_type = jit_if_Z_TYPE(jit, op1_addr, IS_ARRAY); + ir_IF_TRUE(*if_type); + } + if (array_reference_end) { + ir_MERGE_WITH(array_reference_end); + op1_ref = ir_PHI_2(IR_ADDR, ref, array_reference_ref); + } + // JIT: SEPARATE_ARRAY() + ref = jit_Z_PTR_ref(jit, op1_ref); + if (RC_MAY_BE_N(op1_info)) { + if (RC_MAY_BE_1(op1_info)) { + ir_ref if_refcount_1 = ir_IF(ir_EQ(jit_GC_REFCOUNT(jit, ref), ir_CONST_U32(1))); + ir_IF_TRUE(if_refcount_1); + ir_refs_add(array_inputs, ir_END()); + ir_refs_add(array_values, ref); + ir_IF_FALSE(if_refcount_1); + } + ref = ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_zval_array_dup), op1_ref); + } + if (array_inputs->count || (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL))) { + ir_refs_add(array_inputs, ir_END()); + ir_refs_add(array_values, ref); + } + } + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL)) { + if (*if_type) { + ir_IF_FALSE_cold(*if_type); + *if_type = IR_UNUSED; + } + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_ARRAY))) { + *if_type = ir_IF(ir_LE(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(IS_NULL))); + ir_IF_TRUE(*if_type); + } + if ((op1_info & MAY_BE_UNDEF) + && (opline->opcode == ZEND_FETCH_DIM_RW || opline->opcode == ZEND_ASSIGN_DIM_OP)) { + ir_ref end1 = IR_UNUSED; + + *may_throw = 1; + if (op1_info & MAY_BE_NULL) { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op1_addr)); + ir_IF_TRUE(if_def); + end1 = ir_END(); + ir_IF_FALSE(if_def); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op1.var)); + if (end1) { + ir_MERGE_WITH(end1); + } + } + // JIT: ZVAL_ARR(container, zend_new_array(8)); + ref = ir_CALL_1(IR_ADDR, + jit_STUB_FUNC_ADDR(jit, jit_stub_new_array, IR_CONST_FASTCALL_FUNC), + jit_ZVAL_ADDR(jit, op1_addr)); + if (array_inputs->count) { + ir_refs_add(array_inputs, ir_END()); + ir_refs_add(array_values, ref); + } + } + + if (array_inputs->count) { + ir_MERGE_N(array_inputs->count, array_inputs->refs); + ref = ir_PHI_N(IR_ADDR, array_values->count, array_values->refs); + } + + *ht_ref = ref; + return op1_addr; +} + +static int zend_jit_fetch_dim(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_jit_addr op1_addr, + uint32_t op2_info, + zend_jit_addr res_addr, + uint8_t dim_type) +{ + zend_jit_addr op2_addr; + int may_throw = 0; + ir_ref end_inputs = IR_UNUSED; + ir_ref ref, if_type = IR_UNUSED, ht_ref; + + op2_addr = (opline->op2_type != IS_UNUSED) ? OP2_ADDR() : 0; + + if (opline->opcode == ZEND_FETCH_DIM_RW) { + jit_SET_EX_OPLINE(jit, opline); + } + + op1_addr = zend_jit_prepare_array_update(jit, opline, op1_info, op1_addr, &if_type, &ht_ref, &may_throw); + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_ARRAY)) { + ir_refs *found_inputs, *found_vals; + + ir_refs_init(found_inputs, 8); + ir_refs_init(found_vals, 8); + + if (opline->op2_type == IS_UNUSED) { + ir_ref if_ok; + + may_throw = 1; + // JIT:var_ptr = zend_hash_next_index_insert(Z_ARRVAL_P(container), &EG(uninitialized_zval)); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_next_index_insert), + ht_ref, jit_EG(uninitialized_zval)); + + // JIT: if (UNEXPECTED(!var_ptr)) { + if_ok = ir_IF(ref); + ir_IF_FALSE_cold(if_ok); + if (opline->opcode != ZEND_FETCH_DIM_RW) { + jit_SET_EX_OPLINE(jit, opline); + } + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_cannot_add_element, IR_CONST_FASTCALL_FUNC)); + ir_END_list(end_inputs); + + ir_IF_TRUE(if_ok); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_INDIRECT); + + ir_END_list(end_inputs); + } else { + uint32_t type; + + switch (opline->opcode) { + case ZEND_FETCH_DIM_W: + case ZEND_FETCH_LIST_W: + type = BP_VAR_W; + break; + case ZEND_FETCH_DIM_RW: + may_throw = 1; + type = BP_VAR_RW; + break; + case ZEND_FETCH_DIM_UNSET: + type = BP_VAR_UNSET; + break; + default: + ZEND_UNREACHABLE(); + } + + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING))) { + may_throw = 1; + } + if (!zend_jit_fetch_dimension_address_inner(jit, opline, type, op1_info, op2_info, dim_type, NULL, NULL, NULL, + 0, ht_ref, found_inputs, found_vals, &end_inputs, NULL)) { + return 0; + } + + if (type == BP_VAR_RW || (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING)))) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + end_inputs = ir_END(); + } + } else { + ZEND_ASSERT(end_inputs == IR_UNUSED); + } + + if (found_inputs->count) { + ir_MERGE_N(found_inputs->count, found_inputs->refs); + ref = ir_PHI_N(IR_ADDR, found_vals->count, found_vals->refs); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_INDIRECT); + ir_END_list(end_inputs); + } + + } + } + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_ARRAY))) { + ir_ref arg2; + + may_throw = 1; + + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IR_UNUSED; + } + + if (opline->opcode != ZEND_FETCH_DIM_RW) { + jit_SET_EX_OPLINE(jit, opline); + } + + if (opline->op2_type == IS_UNUSED) { + arg2 = IR_NULL; + } else if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr) + 1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + + switch (opline->opcode) { + case ZEND_FETCH_DIM_W: + case ZEND_FETCH_LIST_W: + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_obj_w_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, res_addr)); + break; + case ZEND_FETCH_DIM_RW: + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_obj_rw_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, res_addr)); + break; +// case ZEND_FETCH_DIM_UNSET: +// | EXT_CALL zend_jit_fetch_dim_obj_unset_helper, r0 +// break; + default: + ZEND_UNREACHABLE(); + } + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_ARRAY)) { + ir_END_list(end_inputs); + } + } + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) && (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_ARRAY|MAY_BE_OBJECT))) { + /* ASSIGN_DIM may increase refcount of the key */ + op2_info |= MAY_BE_RCN; + } +#endif + + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) + && (op2_info & MAY_HAVE_DTOR) + && (op2_info & MAY_BE_RC1)) { + may_throw = 1; + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_isset_isempty_dim(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_jit_addr op1_addr, + bool op1_avoid_refcounting, + uint32_t op2_info, + uint8_t dim_type, + int may_throw, + uint8_t smart_branch_opcode, + uint32_t target_label, + uint32_t target_label2, + const void *exit_addr) +{ + zend_jit_addr op2_addr, res_addr; + ir_ref if_type = IR_UNUSED; + ir_ref false_inputs = IR_UNUSED, end_inputs = IR_UNUSED; + ir_refs *true_inputs; + + ir_refs_init(true_inputs, 8); + + // TODO: support for empty() ??? + ZEND_ASSERT(!(opline->extended_value & ZEND_ISEMPTY)); + + op2_addr = OP2_ADDR(); + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + + if (op1_info & MAY_BE_REF) { + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (op1_info & MAY_BE_ARRAY) { + const void *found_exit_addr = NULL; + const void *not_found_exit_addr = NULL; + ir_ref ht_ref; + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_ARRAY)) { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_ARRAY); + ir_IF_TRUE(if_type); + } + + ht_ref = jit_Z_PTR(jit, op1_addr); + + if (exit_addr + && !(op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_ARRAY)) + && !may_throw + && (!(opline->op1_type & (IS_TMP_VAR|IS_VAR)) || op1_avoid_refcounting) + && (!(opline->op2_type & (IS_TMP_VAR|IS_VAR)) || !(op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)))) { + if (smart_branch_opcode == ZEND_JMPNZ) { + found_exit_addr = exit_addr; + } else { + not_found_exit_addr = exit_addr; + } + } + if (!zend_jit_fetch_dimension_address_inner(jit, opline, BP_JIT_IS, op1_info, op2_info, dim_type, found_exit_addr, not_found_exit_addr, NULL, + 0, ht_ref, true_inputs, NULL, &false_inputs, NULL)) { + return 0; + } + + if (found_exit_addr) { + ir_MERGE_list(false_inputs); + return 1; + } else if (not_found_exit_addr) { + ir_MERGE_N(true_inputs->count, true_inputs->refs); + return 1; + } + } + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_ARRAY)) { + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IR_UNUSED; + } + + if (op1_info & (MAY_BE_STRING|MAY_BE_OBJECT)) { + ir_ref ref, arg1, arg2, if_true; + + jit_SET_EX_OPLINE(jit, opline); + arg1 = jit_ZVAL_ADDR(jit, op1_addr); + if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr)+1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + ref = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_isset_dim_helper), arg1, arg2); + if_true = ir_IF(ref); + ir_IF_TRUE(if_true); + ir_refs_add(true_inputs, ir_END()); + ir_IF_FALSE(if_true); + ir_END_list(false_inputs); + } else { + if (op2_info & MAY_BE_UNDEF) { + ir_ref end1 = IR_UNUSED; + + if (op2_info & MAY_BE_ANY) { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op2_addr)); + ir_IF_TRUE(if_def); + end1 = ir_END(); + ir_IF_FALSE(if_def); + } + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op2.var)); + if (end1) { + ir_MERGE_WITH(end1); + } + } + ir_END_list(false_inputs); + } + } + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) && (op1_info & MAY_BE_OBJECT)) { + /* Magic offsetExists() may increase refcount of the key */ + op2_info |= MAY_BE_RCN; + } +#endif + + if (true_inputs->count) { + ir_MERGE_N(true_inputs->count, true_inputs->refs); + + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + if (!op1_avoid_refcounting) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + if (!(opline->extended_value & ZEND_ISEMPTY)) { + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + if (smart_branch_opcode == ZEND_JMPZ) { + _zend_jit_add_predecessor_ref(jit, target_label2, jit->b, ir_END()); + } else if (smart_branch_opcode == ZEND_JMPNZ) { + _zend_jit_add_predecessor_ref(jit, target_label, jit->b, ir_END()); + } else { + ZEND_UNREACHABLE(); + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + } else { + ZEND_UNREACHABLE(); // TODO: support for empty() + } + } + + ir_MERGE_list(false_inputs); + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + if (!op1_avoid_refcounting) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + if (!(opline->extended_value & ZEND_ISEMPTY)) { + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + if (smart_branch_opcode == ZEND_JMPZ) { + _zend_jit_add_predecessor_ref(jit, target_label, jit->b, ir_END()); + } else if (smart_branch_opcode == ZEND_JMPNZ) { + _zend_jit_add_predecessor_ref(jit, target_label2, jit->b, ir_END()); + } else { + ZEND_UNREACHABLE(); + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + ir_END_list(end_inputs); + } + } else { + ZEND_UNREACHABLE(); // TODO: support for empty() + } + + if (!exit_addr && smart_branch_opcode) { + jit->b = -1; + } else { + ir_MERGE_list(end_inputs); + } + + return 1; +} + +static int zend_jit_assign_dim(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint32_t op2_info, uint32_t val_info, uint8_t dim_type, int may_throw) +{ + zend_jit_addr op2_addr, op3_addr, res_addr; + ir_ref if_type = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED, ht_ref; + + op2_addr = (opline->op2_type != IS_UNUSED) ? OP2_ADDR() : 0; + op3_addr = OP1_DATA_ADDR(); + if (opline->result_type == IS_UNUSED) { + res_addr = 0; + } else { + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + } + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && (val_info & MAY_BE_UNDEF)) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + jit_guard_not_Z_TYPE(jit, op3_addr, IS_UNDEF, exit_addr); + + val_info &= ~MAY_BE_UNDEF; + } + + op1_addr = zend_jit_prepare_array_update(jit, opline, op1_info, op1_addr, &if_type, &ht_ref, &may_throw); + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_ARRAY)) { + if (opline->op2_type == IS_UNUSED) { + uint32_t var_info = MAY_BE_NULL; + ir_ref if_ok, ref; + zend_jit_addr var_addr; + + // JIT: var_ptr = zend_hash_next_index_insert(Z_ARRVAL_P(container), &EG(uninitialized_zval)); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_next_index_insert), + ht_ref, jit_EG(uninitialized_zval)); + + // JIT: if (UNEXPECTED(!var_ptr)) { + if_ok = ir_IF(ref); + ir_IF_FALSE_cold(if_ok); + + // JIT: zend_throw_error(NULL, "Cannot add element to the array as the next element is already occupied"); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_cannot_add_element, IR_CONST_FASTCALL_FUNC)); + + ir_END_list(end_inputs); + + ir_IF_TRUE(if_ok); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + if (!zend_jit_simple_assign(jit, opline, var_addr, var_info, -1, (opline+1)->op1_type, op3_addr, val_info, res_addr, 0)) { + return 0; + } + } else { + uint32_t var_info = zend_array_element_type(op1_info, opline->op1_type, 0, 0); + zend_jit_addr var_addr; + ir_ref ref; + ir_refs *found_inputs, *found_values; + + ir_refs_init(found_inputs, 8); + ir_refs_init(found_values, 8); + + if (!zend_jit_fetch_dimension_address_inner(jit, opline, BP_VAR_W, op1_info, op2_info, dim_type, NULL, NULL, NULL, + 0, ht_ref, found_inputs, found_values, &end_inputs, NULL)) { + return 0; + } + + if (op1_info & (MAY_BE_ARRAY_OF_REF|MAY_BE_OBJECT)) { + var_info |= MAY_BE_REF; + } + if (var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + var_info |= MAY_BE_RC1; + } + + ir_MERGE_N(found_inputs->count, found_inputs->refs); + ref = ir_PHI_N(IR_ADDR, found_values->count, found_values->refs); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + + // JIT: value = zend_assign_to_variable(variable_ptr, value, OP_DATA_TYPE); + if (opline->op1_type == IS_VAR) { + ZEND_ASSERT(opline->result_type == IS_UNUSED); + if (!zend_jit_assign_to_variable_call(jit, opline, var_addr, var_addr, var_info, -1, (opline+1)->op1_type, op3_addr, val_info, res_addr, 0)) { + return 0; + } + } else { + if (!zend_jit_assign_to_variable(jit, opline, var_addr, var_addr, var_info, -1, (opline+1)->op1_type, op3_addr, val_info, res_addr, 0, 0)) { + return 0; + } + } + } + + ir_END_list(end_inputs); + } + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_ARRAY))) { + ir_ref arg2, arg4; + + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IR_UNUSED; + } + + jit_SET_EX_OPLINE(jit, opline); + + if (opline->op2_type == IS_UNUSED) { + arg2 = IR_NULL; + } else if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr) + 1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + + if (opline->result_type == IS_UNUSED) { + arg4 = IR_NULL; + } else { + arg4 = jit_ZVAL_ADDR(jit, res_addr); + } + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_dim_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, op3_addr), + arg4); + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if (((opline+1)->op1_type & (IS_TMP_VAR|IS_VAR)) && (val_info & MAY_BE_RC1)) { + /* ASSIGN_DIM may increase refcount of the value */ + val_info |= MAY_BE_RCN; + } +#endif + + jit_FREE_OP(jit, (opline+1)->op1_type, (opline+1)->op1, val_info, NULL); + + ir_END_list(end_inputs); + } + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) && (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_ARRAY|MAY_BE_OBJECT))) { + /* ASSIGN_DIM may increase refcount of the key */ + op2_info |= MAY_BE_RCN; + } +#endif + + ir_MERGE_list(end_inputs); + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_assign_dim_op(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint32_t op1_def_info, zend_jit_addr op1_addr, uint32_t op2_info, uint32_t op1_data_info, zend_ssa_range *op1_data_range, uint8_t dim_type, int may_throw) +{ + zend_jit_addr op2_addr, op3_addr, var_addr = IS_UNUSED; + const void *not_found_exit_addr = NULL; + uint32_t var_info = MAY_BE_NULL; + ir_ref if_type = IS_UNUSED; + ir_ref end_inputs = IR_UNUSED, ht_ref; + bool emit_fast_path = 1; + + ZEND_ASSERT(opline->result_type == IS_UNUSED); + + op2_addr = (opline->op2_type != IS_UNUSED) ? OP2_ADDR() : 0; + op3_addr = OP1_DATA_ADDR(); + + jit_SET_EX_OPLINE(jit, opline); + + op1_addr = zend_jit_prepare_array_update(jit, opline, op1_info, op1_addr, &if_type, &ht_ref, &may_throw); + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_ARRAY)) { + uint32_t var_def_info = zend_array_element_type(op1_def_info, opline->op1_type, 1, 0); + + if (opline->op2_type == IS_UNUSED) { + var_info = MAY_BE_NULL; + ir_ref if_ok, ref; + + // JIT: var_ptr = zend_hash_next_index_insert(Z_ARRVAL_P(container), &EG(uninitialized_zval)); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_next_index_insert), + ht_ref, jit_EG(uninitialized_zval)); + + // JIT: if (UNEXPECTED(!var_ptr)) { + if_ok = ir_IF(ref); + ir_IF_FALSE_cold(if_ok); + + // JIT: zend_throw_error(NULL, "Cannot add element to the array as the next element is already occupied"); + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_cannot_add_element, IR_CONST_FASTCALL_FUNC)); + + ir_END_list(end_inputs); + + ir_IF_TRUE(if_ok); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + } else { + ir_ref ref; + ir_refs *found_inputs, *found_values; + + ir_refs_init(found_inputs, 8); + ir_refs_init(found_values, 8); + + var_info = zend_array_element_type(op1_info, opline->op1_type, 0, 0); + if (op1_info & (MAY_BE_ARRAY_OF_REF|MAY_BE_OBJECT)) { + var_info |= MAY_BE_REF; + } + if (var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + var_info |= MAY_BE_RC1; + } + + if (dim_type != IS_UNKNOWN + && dim_type != IS_UNDEF + && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY + && (op2_info & (MAY_BE_LONG|MAY_BE_STRING)) + && !(op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING)))) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + not_found_exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!not_found_exit_addr) { + return 0; + } + } + + if (!zend_jit_fetch_dimension_address_inner(jit, opline, BP_VAR_RW, op1_info, op2_info, dim_type, NULL, not_found_exit_addr, NULL, + 0, ht_ref, found_inputs, found_values, &end_inputs, NULL)) { + return 0; + } + + if (found_inputs->count) { + ir_MERGE_N(found_inputs->count, found_inputs->refs); + ref = ir_PHI_N(IR_ADDR, found_values->count, found_values->refs); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + + if (not_found_exit_addr && dim_type != IS_REFERENCE) { + jit_guard_Z_TYPE(jit, var_addr, dim_type, not_found_exit_addr); + var_info = (1 << dim_type) | (var_info & ~(MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)); + } + if (var_info & MAY_BE_REF) { + binary_op_type binary_op = get_binary_op(opline->extended_value); + ir_ref if_ref, if_typed, noref_path, ref_path, ref, reference, ref2, arg2; + + ref = jit_ZVAL_ADDR(jit, var_addr); + if_ref = jit_if_Z_TYPE(jit, var_addr, IS_REFERENCE); + ir_IF_FALSE(if_ref); + noref_path = ir_END(); + ir_IF_TRUE(if_ref); + + reference = jit_Z_PTR_ref(jit, ref); + ref2 = ir_ADD_OFFSET(reference, offsetof(zend_reference, val)); + if_typed = jit_if_TYPED_REF(jit, reference); + ir_IF_FALSE(if_typed); + ref_path = ir_END(); + ir_IF_TRUE_cold(if_typed); + + arg2 = jit_ZVAL_ADDR(jit, op3_addr); + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref), + reference, arg2, ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + + ir_MERGE_2(noref_path, ref_path); + ref = ir_PHI_2(IR_ADDR, ref, ref2); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + } + } else { + emit_fast_path = 0; + } + } + + if (emit_fast_path) { + uint8_t val_op_type = (opline+1)->op1_type; + + if (val_op_type & (IS_TMP_VAR|IS_VAR)) { + /* prevent FREE_OP in the helpers */ + val_op_type = IS_CV; + } + + switch (opline->extended_value) { + case ZEND_ADD: + case ZEND_SUB: + case ZEND_MUL: + case ZEND_DIV: + if (!zend_jit_math_helper(jit, opline, opline->extended_value, IS_CV, opline->op1, var_addr, var_info, val_op_type, (opline+1)->op1, op3_addr, op1_data_info, 0, var_addr, var_def_info, var_info, + 1 /* may overflow */, may_throw)) { + return 0; + } + break; + case ZEND_BW_OR: + case ZEND_BW_AND: + case ZEND_BW_XOR: + case ZEND_SL: + case ZEND_SR: + case ZEND_MOD: + if (!zend_jit_long_math_helper(jit, opline, opline->extended_value, + IS_CV, opline->op1, var_addr, var_info, NULL, + val_op_type, (opline+1)->op1, op3_addr, op1_data_info, + op1_data_range, + 0, var_addr, var_def_info, var_info, may_throw)) { + return 0; + } + break; + case ZEND_CONCAT: + if (!zend_jit_concat_helper(jit, opline, IS_CV, opline->op1, var_addr, var_info, val_op_type, (opline+1)->op1, op3_addr, op1_data_info, var_addr, + may_throw)) { + return 0; + } + break; + default: + ZEND_UNREACHABLE(); + } + + ir_END_list(end_inputs); + } + } + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_ARRAY))) { + binary_op_type binary_op; + ir_ref arg2; + + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNUSED; + } + + if (opline->op2_type == IS_UNUSED) { + arg2 = IR_NULL; + } else if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr) + 1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + binary_op = get_binary_op(opline->extended_value); + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_dim_op_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, op3_addr), + ir_CONST_FC_FUNC(binary_op)); + ir_END_list(end_inputs); + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + + jit_FREE_OP(jit, (opline+1)->op1_type, (opline+1)->op1, op1_data_info, NULL); + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, NULL); + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_fe_reset(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info) +{ + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + + // JIT: ZVAL_COPY(res, value); + if (opline->op1_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op1); + + jit_ZVAL_COPY_CONST(jit, res_addr, MAY_BE_ANY, MAY_BE_ANY, zv, 1); + } else { + zend_jit_addr op1_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + + jit_ZVAL_COPY(jit, res_addr, -1, op1_addr, op1_info, opline->op1_type == IS_CV); + } + + // JIT: Z_FE_POS_P(res) = 0; + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), opline->result.var + offsetof(zval, u2.fe_pos)), ir_CONST_U32(0)); + + return 1; +} + +static int zend_jit_packed_guard(zend_jit_ctx *jit, const zend_op *opline, uint32_t var, uint32_t op_info) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_PACKED_GUARD); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + zend_jit_addr addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + ir_ref ref; + + if (!exit_addr) { + return 0; + } + + ref = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(jit_Z_PTR(jit, addr), offsetof(zend_array, u.flags))), + ir_CONST_U32(HASH_FLAG_PACKED)); + if (op_info & MAY_BE_ARRAY_PACKED) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + + return 1; +} + +static int zend_jit_fe_fetch(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint32_t op2_info, unsigned int target_label, uint8_t exit_opcode, const void *exit_addr) +{ + zend_jit_addr op1_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + ir_ref ref, ht_ref, hash_pos_ref, packed_pos_ref, hash_p_ref = IR_UNUSED, packed_p_ref = IR_UNUSED, if_packed = IR_UNUSED; + ir_ref if_def_hash = IR_UNUSED, if_def_packed = IR_UNUSED; + ir_ref exit_inputs = IR_UNUSED; + + if (!MAY_BE_HASH(op1_info) && !MAY_BE_PACKED(op1_info)) { + /* empty array */ + if (exit_addr) { + if (exit_opcode == ZEND_JMP) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else { + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ir_END()); + jit->b = -1; + } + return 1; + } + + // JIT: array = EX_VAR(opline->op1.var); + // JIT: fe_ht = Z_ARRVAL_P(array); + ht_ref = jit_Z_PTR(jit, op1_addr); + + if (op1_info & MAY_BE_PACKED_GUARD) { + if (!zend_jit_packed_guard(jit, opline, opline->op1.var, op1_info)) { + return 0; + } + } + + // JIT: pos = Z_FE_POS_P(array); + hash_pos_ref = packed_pos_ref = ir_LOAD_U32(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_pos))); + + if (MAY_BE_HASH(op1_info)) { + ir_ref loop_ref, pos2_ref, p2_ref; + + if (MAY_BE_PACKED(op1_info)) { + ref = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, u.flags))), + ir_CONST_U32(HASH_FLAG_PACKED)); + if_packed = ir_IF(ref); + ir_IF_FALSE(if_packed); + } + + // JIT: p = fe_ht->arData + pos; + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(hash_pos_ref); + } else { + ref = ir_BITCAST_A(hash_pos_ref); + } + hash_p_ref = ir_ADD_A( + ir_MUL_A(ref, ir_CONST_ADDR(sizeof(Bucket))), + ir_LOAD_A(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, arData)))); + + loop_ref = ir_LOOP_BEGIN(ir_END()); + hash_pos_ref = ir_PHI_2(IR_U32, hash_pos_ref, IR_UNUSED); + hash_p_ref = ir_PHI_2(IR_ADDR, hash_p_ref, IR_UNUSED); + + // JIT: if (UNEXPECTED(pos >= fe_ht->nNumUsed)) { + ref = ir_ULT(hash_pos_ref, + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, nNumUsed)))); + + // JIT: ZEND_VM_SET_RELATIVE_OPCODE(opline, opline->extended_value); + // JIT: ZEND_VM_CONTINUE(); + + if (exit_addr) { + if (exit_opcode == ZEND_JMP) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_fit = ir_IF(ref); + ir_IF_FALSE(if_fit); + ir_END_list(exit_inputs); + ir_IF_TRUE(if_fit); + } + } else { + ir_ref if_fit = ir_IF(ref); + ir_IF_FALSE(if_fit); + ir_END_list(exit_inputs); + ir_IF_TRUE(if_fit); + } + + // JIT: pos++; + pos2_ref = ir_ADD_U32(hash_pos_ref, ir_CONST_U32(1)); + + // JIT: value_type = Z_TYPE_INFO_P(value); + // JIT: if (EXPECTED(value_type != IS_UNDEF)) { + if (!exit_addr || exit_opcode == ZEND_JMP) { + if_def_hash = ir_IF(jit_Z_TYPE_ref(jit, hash_p_ref)); + ir_IF_FALSE(if_def_hash); + } else { + ir_GUARD_NOT(jit_Z_TYPE_ref(jit, hash_p_ref), ir_CONST_ADDR(exit_addr)); + } + + // JIT: p++; + p2_ref = ir_ADD_OFFSET(hash_p_ref, sizeof(Bucket)); + + ir_MERGE_SET_OP(loop_ref, 2, ir_LOOP_END()); + ir_PHI_SET_OP(hash_pos_ref, 2, pos2_ref); + ir_PHI_SET_OP(hash_p_ref, 2, p2_ref); + + if (MAY_BE_PACKED(op1_info)) { + ir_IF_TRUE(if_packed); + } + } + if (MAY_BE_PACKED(op1_info)) { + ir_ref loop_ref, pos2_ref, p2_ref; + + // JIT: p = fe_ht->arPacked + pos; + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(packed_pos_ref); + } else { + ref = ir_BITCAST_A(packed_pos_ref); + } + packed_p_ref = ir_ADD_A( + ir_MUL_A(ref, ir_CONST_ADDR(sizeof(zval))), + ir_LOAD_A(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, arPacked)))); + + loop_ref = ir_LOOP_BEGIN(ir_END()); + packed_pos_ref = ir_PHI_2(IR_U32, packed_pos_ref, IR_UNUSED); + packed_p_ref = ir_PHI_2(IR_ADDR, packed_p_ref, IR_UNUSED); + + // JIT: if (UNEXPECTED(pos >= fe_ht->nNumUsed)) { + ref = ir_ULT(packed_pos_ref, + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, nNumUsed)))); + + // JIT: ZEND_VM_SET_RELATIVE_OPCODE(opline, opline->extended_value); + // JIT: ZEND_VM_CONTINUE(); + if (exit_addr) { + if (exit_opcode == ZEND_JMP) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_fit = ir_IF(ref); + ir_IF_FALSE(if_fit); + ir_END_list(exit_inputs); + ir_IF_TRUE(if_fit); + } + } else { + ir_ref if_fit = ir_IF(ref); + ir_IF_FALSE(if_fit); + ir_END_list(exit_inputs); + ir_IF_TRUE(if_fit); + } + + // JIT: pos++; + pos2_ref = ir_ADD_U32(packed_pos_ref, ir_CONST_U32(1)); + + // JIT: value_type = Z_TYPE_INFO_P(value); + // JIT: if (EXPECTED(value_type != IS_UNDEF)) { + if (!exit_addr || exit_opcode == ZEND_JMP) { + if_def_packed = ir_IF(jit_Z_TYPE_ref(jit, packed_p_ref)); + ir_IF_FALSE(if_def_packed); + } else { + ir_GUARD_NOT(jit_Z_TYPE_ref(jit, packed_p_ref), ir_CONST_ADDR(exit_addr)); + } + + // JIT: p++; + p2_ref = ir_ADD_OFFSET(packed_p_ref, sizeof(zval)); + + ir_MERGE_SET_OP(loop_ref, 2, ir_LOOP_END()); + ir_PHI_SET_OP(packed_pos_ref, 2, pos2_ref); + ir_PHI_SET_OP(packed_p_ref, 2, p2_ref); + } + + if (!exit_addr || exit_opcode == ZEND_JMP) { + zend_jit_addr val_addr; + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + uint32_t val_info; + ir_ref p_ref = IR_UNUSED, hash_path = IR_UNUSED; + + if (RETURN_VALUE_USED(opline)) { + zend_jit_addr res_addr = RES_ADDR(); + + if (MAY_BE_HASH(op1_info)) { + ir_ref key_ref = IR_UNUSED, if_key = IR_UNUSED, key_path = IR_UNUSED; + + ZEND_ASSERT(if_def_hash); + ir_IF_TRUE(if_def_hash); + + // JIT: Z_FE_POS_P(array) = pos + 1; + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_pos)), + ir_ADD_U32(hash_pos_ref, ir_CONST_U32(1))); + + if (op1_info & MAY_BE_ARRAY_KEY_STRING) { + key_ref = ir_LOAD_A(ir_ADD_OFFSET(hash_p_ref, offsetof(Bucket, key))); + } + if ((op1_info & MAY_BE_ARRAY_KEY_LONG) + && (op1_info & MAY_BE_ARRAY_KEY_STRING)) { + // JIT: if (!p->key) { + if_key = ir_IF(key_ref); + ir_IF_TRUE(if_key); + } + if (op1_info & MAY_BE_ARRAY_KEY_STRING) { + ir_ref if_interned, interned_path; + + // JIT: ZVAL_STR_COPY(EX_VAR(opline->result.var), p->key); + jit_set_Z_PTR(jit, res_addr, key_ref); + ref = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(key_ref, offsetof(zend_refcounted, gc.u.type_info))), + ir_CONST_U32(IS_STR_INTERNED)); + if_interned = ir_IF(ref); + ir_IF_TRUE(if_interned); + + jit_set_Z_TYPE_INFO(jit, res_addr, IS_STRING); + + interned_path = ir_END(); + ir_IF_FALSE(if_interned); + + jit_GC_ADDREF(jit, key_ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_STRING_EX); + + ir_MERGE_WITH(interned_path); + + if (op1_info & MAY_BE_ARRAY_KEY_LONG) { + key_path = ir_END(); + } + } + if (op1_info & MAY_BE_ARRAY_KEY_LONG) { + if (op1_info & MAY_BE_ARRAY_KEY_STRING) { + ir_IF_FALSE(if_key); + } + // JIT: ZVAL_LONG(EX_VAR(opline->result.var), p->h); + ref = ir_LOAD_L(ir_ADD_OFFSET(hash_p_ref, offsetof(Bucket, h))); + jit_set_Z_LVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + + if (op1_info & MAY_BE_ARRAY_KEY_STRING) { + ir_MERGE_WITH(key_path); + } + } + if (MAY_BE_PACKED(op1_info)) { + hash_path = ir_END(); + } else { + p_ref = hash_p_ref; + } + } + if (MAY_BE_PACKED(op1_info)) { + ZEND_ASSERT(if_def_packed); + ir_IF_TRUE(if_def_packed); + + // JIT: Z_FE_POS_P(array) = pos + 1; + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_pos)), + ir_ADD_U32(packed_pos_ref, ir_CONST_U32(1))); + + // JIT: ZVAL_LONG(EX_VAR(opline->result.var), pos); + if (sizeof(zend_long) == 8) { + packed_pos_ref = ir_ZEXT_L(packed_pos_ref); + } else { + packed_pos_ref = ir_BITCAST_L(packed_pos_ref); + } + jit_set_Z_LVAL(jit, res_addr, packed_pos_ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + + if (MAY_BE_HASH(op1_info)) { + ir_MERGE_WITH(hash_path); + p_ref = ir_PHI_2(IR_ADDR, packed_p_ref, hash_p_ref); + } else { + p_ref = packed_p_ref; + } + } + } else { + ir_ref pos_ref = IR_UNUSED; + + if (if_def_hash && if_def_packed) { + ir_IF_TRUE(if_def_hash); + ir_MERGE_WITH_EMPTY_TRUE(if_def_packed); + pos_ref = ir_PHI_2(IR_U32, hash_pos_ref, packed_pos_ref); + p_ref = ir_PHI_2(IR_ADDR, hash_p_ref, packed_p_ref); + } else if (if_def_hash) { + ir_IF_TRUE(if_def_hash); + pos_ref = hash_pos_ref; + p_ref = hash_p_ref; + } else if (if_def_packed) { + ir_IF_TRUE(if_def_packed); + pos_ref = packed_pos_ref; + p_ref = packed_p_ref; + } else { + ZEND_UNREACHABLE(); + } + + // JIT: Z_FE_POS_P(array) = pos + 1; + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_pos)), + ir_ADD_U32(pos_ref, ir_CONST_U32(1))); + } + + val_info = ((op1_info & MAY_BE_ARRAY_OF_ANY) >> MAY_BE_ARRAY_SHIFT); + if (val_info & MAY_BE_ARRAY) { + val_info |= MAY_BE_ARRAY_KEY_ANY | MAY_BE_ARRAY_OF_ANY | MAY_BE_ARRAY_OF_REF; + } + if (op1_info & MAY_BE_ARRAY_OF_REF) { + val_info |= MAY_BE_REF | MAY_BE_RC1 | MAY_BE_RCN | MAY_BE_ANY | + MAY_BE_ARRAY_KEY_ANY | MAY_BE_ARRAY_OF_ANY | MAY_BE_ARRAY_OF_REF; + } else if (val_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + val_info |= MAY_BE_RC1 | MAY_BE_RCN; + } + + val_addr = ZEND_ADDR_REF_ZVAL(p_ref); + if (opline->op2_type == IS_CV) { + // JIT: zend_assign_to_variable(variable_ptr, value, IS_CV, EX_USES_STRICT_TYPES()); + if (!zend_jit_assign_to_variable(jit, opline, var_addr, var_addr, op2_info, -1, IS_CV, val_addr, val_info, 0, 0, 1)) { + return 0; + } + } else { + // JIT: ZVAL_COPY(res, value); + jit_ZVAL_COPY(jit, var_addr, -1, val_addr, val_info, 1); + } + + if (!exit_addr) { + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ir_END()); + ZEND_ASSERT(exit_inputs); + if (!jit->ctx.ir_base[exit_inputs].op2) { + ref = exit_inputs; + } else { + ir_MERGE_list(exit_inputs); + ref = ir_END(); + } + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + jit->b = -1; + } + } else { + ZEND_ASSERT(exit_inputs); + ir_MERGE_list(exit_inputs); + } + + return 1; +} + +static int zend_jit_load_this(zend_jit_ctx *jit, uint32_t var) +{ + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + ir_ref ref = jit_Z_PTR(jit, this_addr); + + jit_set_Z_PTR(jit, var_addr, ref); + jit_set_Z_TYPE_INFO(jit, var_addr, IS_OBJECT_EX); + jit_GC_ADDREF(jit, ref); + + return 1; +} + +static int zend_jit_fetch_this(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, bool check_only) +{ + if (!op_array->scope || (op_array->fn_flags & ZEND_ACC_STATIC)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + if (!JIT_G(current_frame) || + !TRACE_FRAME_IS_THIS_CHECKED(JIT_G(current_frame))) { + + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, this_addr, IS_OBJECT, exit_addr); + + if (JIT_G(current_frame)) { + TRACE_FRAME_SET_THIS_CHECKED(JIT_G(current_frame)); + } + } + } else { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + ir_ref if_object = jit_if_Z_TYPE(jit, this_addr, IS_OBJECT); + + ir_IF_FALSE_cold(if_object); + jit_SET_EX_OPLINE(jit, opline); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_invalid_this)); + + ir_IF_TRUE(if_object); + } + } + + if (!check_only) { + if (!zend_jit_load_this(jit, opline->result.var)) { + return 0; + } + } + + return 1; +} + +static int zend_jit_class_guard(zend_jit_ctx *jit, const zend_op *opline, ir_ref obj_ref, zend_class_entry *ce) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + ir_GUARD(ir_EQ(ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))), ir_CONST_ADDR(ce)), + ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static int zend_jit_fetch_obj(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + bool op1_indirect, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + bool op1_avoid_refcounting, + zend_class_entry *trace_ce, + uint8_t prop_type, + int may_throw) +{ + zval *member; + zend_property_info *prop_info; + bool may_be_dynamic = 1; + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + zend_jit_addr prop_addr; + uint32_t res_info = RES_INFO(); + ir_ref prop_type_ref = IR_UNUSED; + ir_ref obj_ref = IR_UNUSED; + ir_ref prop_ref = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + + member = RT_CONSTANT(opline, opline->op2); + ZEND_ASSERT(Z_TYPE_P(member) == IS_STRING && Z_STRVAL_P(member)[0] != '\0'); + prop_info = zend_get_known_property_info(op_array, ce, Z_STR_P(member), on_this, op_array->filename); + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + obj_ref = jit_Z_PTR(jit, this_addr); + } else { + if (opline->op1_type == IS_VAR + && opline->opcode == ZEND_FETCH_OBJ_W + && (op1_info & MAY_BE_INDIRECT) + && Z_REG(op1_addr) == ZREG_FP) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + ir_ref if_obj = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + + ir_IF_FALSE_cold(if_obj); + if (opline->opcode != ZEND_FETCH_OBJ_IS) { + ir_ref op1_ref = IR_UNUSED; + + jit_SET_EX_OPLINE(jit, opline); + if (opline->opcode != ZEND_FETCH_OBJ_W && (op1_info & MAY_BE_UNDEF)) { + zend_jit_addr orig_op1_addr = OP1_ADDR(); + ir_ref fast_path = IR_UNUSED; + + if (op1_info & MAY_BE_ANY) { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op1_addr)); + ir_IF_TRUE(if_def); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_def); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), + ir_CONST_U32(opline->op1.var)); + if (fast_path) { + ir_MERGE_WITH(fast_path); + } + op1_ref = jit_ZVAL_ADDR(jit, orig_op1_addr); + } else { + op1_ref = jit_ZVAL_ADDR(jit, op1_addr); + } + if (opline->opcode == ZEND_FETCH_OBJ_W) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_property_write), + op1_ref, ir_CONST_ADDR(Z_STRVAL_P(member))); + jit_set_Z_TYPE_INFO(jit, res_addr, _IS_ERROR); + } else { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_property_read), + op1_ref, ir_CONST_ADDR(Z_STRVAL_P(member))); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + } + ir_END_list(end_inputs); + + ir_IF_TRUE(if_obj); + } + } + obj_ref = jit_Z_PTR(jit, op1_addr); + } + + ZEND_ASSERT(obj_ref); + if (!prop_info && trace_ce && (trace_ce->ce_flags & ZEND_ACC_IMMUTABLE)) { + prop_info = zend_get_known_property_info(op_array, trace_ce, Z_STR_P(member), on_this, op_array->filename); + if (prop_info) { + ce = trace_ce; + ce_is_instanceof = 0; + if (!(op1_info & MAY_BE_CLASS_GUARD)) { + if (on_this && JIT_G(current_frame) + && TRACE_FRAME_IS_THIS_CLASS_CHECKED(JIT_G(current_frame))) { + ZEND_ASSERT(JIT_G(current_frame)->ce == ce); + } else if (zend_jit_class_guard(jit, opline, obj_ref, ce)) { + if (on_this && JIT_G(current_frame)) { + JIT_G(current_frame)->ce = ce; + TRACE_FRAME_SET_THIS_CLASS_CHECKED(JIT_G(current_frame)); + } + } else { + return 0; + } + if (ssa->var_info && ssa_op->op1_use >= 0) { + ssa->var_info[ssa_op->op1_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_use].ce = ce; + ssa->var_info[ssa_op->op1_use].is_instanceof = ce_is_instanceof; + } + } + } + } + + if (!prop_info) { + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_ref ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS)); + ir_ref if_same = ir_IF(ir_EQ(ref, + ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))))); + + ir_IF_FALSE_cold(if_same); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_same); + ir_ref offset_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*))); + + may_be_dynamic = zend_may_be_dynamic_property(ce, Z_STR_P(member), opline->op1_type == IS_UNUSED, op_array->filename); + if (may_be_dynamic) { + ir_ref if_dynamic = ir_IF(ir_LT(offset_ref, IR_NULL)); + if (opline->opcode == ZEND_FETCH_OBJ_W) { + ir_IF_TRUE_cold(if_dynamic); + ir_END_list(slow_inputs); + } else { + ir_IF_TRUE_cold(if_dynamic); + jit_SET_EX_OPLINE(jit, opline); + + if (opline->opcode != ZEND_FETCH_OBJ_IS) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_r_dynamic), + obj_ref, offset_ref); + } else { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_is_dynamic), + obj_ref, offset_ref); + } + ir_END_list(end_inputs); + } + ir_IF_FALSE(if_dynamic); + } + prop_ref = ir_ADD_A(obj_ref, offset_ref); + prop_type_ref = jit_Z_TYPE_ref(jit, prop_ref); + ir_ref if_def = ir_IF(prop_type_ref); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + if (opline->opcode == ZEND_FETCH_OBJ_W + && (!ce || ce_is_instanceof || (ce->ce_flags & (ZEND_ACC_HAS_TYPE_HINTS|ZEND_ACC_TRAIT)))) { + uint32_t flags = opline->extended_value & ZEND_FETCH_OBJ_FLAGS; + + ir_ref prop_info_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*) * 2)); + ir_ref if_has_prop_info = ir_IF(prop_info_ref); + + ir_IF_TRUE_cold(if_has_prop_info); + + ir_ref if_readonly = ir_IF( + ir_AND_U32(ir_LOAD_U32(ir_ADD_OFFSET(prop_info_ref, offsetof(zend_property_info, flags))), + ir_CONST_U32(ZEND_ACC_READONLY))); + ir_IF_TRUE(if_readonly); + + ir_ref if_prop_obj = jit_if_Z_TYPE(jit, prop_addr, IS_OBJECT); + ir_IF_TRUE(if_prop_obj); + ref = jit_Z_PTR(jit, prop_addr); + jit_GC_ADDREF(jit, ref); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_OBJECT_EX); + ir_END_list(end_inputs); + + ir_IF_FALSE_cold(if_prop_obj); + + ir_ref extra_addr = ir_ADD_OFFSET(jit_ZVAL_ADDR(jit, prop_addr), offsetof(zval, u2.extra)); + ir_ref extra = ir_LOAD_U32(extra_addr); + ir_ref if_reinitable = ir_IF(ir_AND_U32(extra, ir_CONST_U32(IS_PROP_REINITABLE))); + ir_IF_TRUE(if_reinitable); + ir_STORE(extra_addr, ir_AND_U32(extra, ir_CONST_U32(~IS_PROP_REINITABLE))); + ir_ref reinit_path = ir_END(); + + ir_IF_FALSE(if_reinitable); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_readonly_property_modification_error), prop_info_ref); + jit_set_Z_TYPE_INFO(jit, res_addr, _IS_ERROR); + ir_END_list(end_inputs); + + if (flags == ZEND_FETCH_DIM_WRITE) { + ir_IF_FALSE_cold(if_readonly); + ir_MERGE_WITH(reinit_path); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_check_array_promotion), + prop_ref, prop_info_ref); + ir_END_list(end_inputs); + ir_IF_FALSE(if_has_prop_info); + } else if (flags == ZEND_FETCH_REF) { + ir_IF_FALSE_cold(if_readonly); + ir_MERGE_WITH(reinit_path); + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_create_typed_ref), + prop_ref, + prop_info_ref, + jit_ZVAL_ADDR(jit, res_addr)); + ir_END_list(end_inputs); + ir_IF_FALSE(if_has_prop_info); + } else { + ir_ref list = reinit_path; + + ZEND_ASSERT(flags == 0); + ir_IF_FALSE(if_has_prop_info); + ir_END_list(list); + ir_IF_FALSE(if_readonly); + ir_END_list(list); + ir_MERGE_list(list); + } + } + } else { + prop_ref = ir_ADD_OFFSET(obj_ref, prop_info->offset); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + if (opline->opcode == ZEND_FETCH_OBJ_W || !(res_info & MAY_BE_GUARD) || !JIT_G(current_frame)) { + /* perform IS_UNDEF check only after result type guard (during deoptimization) */ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + ir_GUARD(prop_type_ref, ir_CONST_ADDR(exit_addr)); + } + } else { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + ir_ref if_def = ir_IF(prop_type_ref); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + } + if (opline->opcode == ZEND_FETCH_OBJ_W && (prop_info->flags & ZEND_ACC_READONLY)) { + if (!prop_type_ref) { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + } + ir_ref if_prop_obj = jit_if_Z_TYPE(jit, prop_addr, IS_OBJECT); + ir_IF_TRUE(if_prop_obj); + ir_ref ref = jit_Z_PTR(jit, prop_addr); + jit_GC_ADDREF(jit, ref); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_OBJECT_EX); + ir_END_list(end_inputs); + + ir_IF_FALSE_cold(if_prop_obj); + + ir_ref extra_addr = ir_ADD_OFFSET(jit_ZVAL_ADDR(jit, prop_addr), offsetof(zval, u2.extra)); + ir_ref extra = ir_LOAD_U32(extra_addr); + ir_ref if_reinitable = ir_IF(ir_AND_U32(extra, ir_CONST_U32(IS_PROP_REINITABLE))); + + ir_IF_FALSE(if_reinitable); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_readonly_property_modification_error), ir_CONST_ADDR(prop_info)); + jit_set_Z_TYPE_INFO(jit, res_addr, _IS_ERROR); + ir_END_list(end_inputs); + + ir_IF_TRUE(if_reinitable); + ir_STORE(extra_addr, ir_AND_U32(extra, ir_CONST_U32(~IS_PROP_REINITABLE))); + } + + if (opline->opcode == ZEND_FETCH_OBJ_W + && (opline->extended_value & ZEND_FETCH_OBJ_FLAGS) + && ZEND_TYPE_IS_SET(prop_info->type)) { + uint32_t flags = opline->extended_value & ZEND_FETCH_OBJ_FLAGS; + + if (flags == ZEND_FETCH_DIM_WRITE) { + if ((ZEND_TYPE_FULL_MASK(prop_info->type) & MAY_BE_ARRAY) == 0) { + if (!prop_type_ref) { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + } + ir_ref if_null_or_flase = ir_IF(ir_LE(prop_type_ref, ir_CONST_U32(IR_FALSE))); + ir_IF_TRUE_cold(if_null_or_flase); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_check_array_promotion), + prop_ref, ir_CONST_ADDR(prop_info)); + ir_END_list(end_inputs); + ir_IF_FALSE(if_null_or_flase); + } + } else if (flags == ZEND_FETCH_REF) { + ir_ref ref; + + if (!prop_type_ref) { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + } + + ir_ref if_reference = ir_IF(ir_EQ(prop_type_ref, ir_CONST_U32(IS_REFERENCE_EX))); + ir_IF_FALSE(if_reference); + if (ce && ce->ce_flags & ZEND_ACC_IMMUTABLE) { + ref = ir_CONST_ADDR(prop_info); + } else { + int prop_info_offset = + (((prop_info->offset - (sizeof(zend_object) - sizeof(zval))) / sizeof(zval)) * sizeof(void*)); + + ref = ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_class_entry, properties_info_table))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, prop_info_offset)); + } + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_create_typed_ref), + prop_ref, + ref, + jit_ZVAL_ADDR(jit, res_addr)); + ir_END_list(end_inputs); + ir_IF_TRUE(if_reference); + } else { + ZEND_UNREACHABLE(); + } + } + } + + if (opline->opcode == ZEND_FETCH_OBJ_W) { + ZEND_ASSERT(prop_ref); + jit_set_Z_PTR(jit, res_addr, prop_ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_INDIRECT); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && prop_info) { + ssa->var_info[ssa_op->result_def].indirect_reference = 1; + } + ir_END_list(end_inputs); + } else { + bool result_avoid_refcounting = 0; + + if ((res_info & MAY_BE_GUARD) && JIT_G(current_frame) && prop_info) { + uint8_t type = concrete_type(res_info); + uint32_t flags = 0; + zend_jit_addr val_addr = prop_addr; + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && !delayed_fetch_this + && !op1_avoid_refcounting) { + flags = ZEND_JIT_EXIT_FREE_OP1; + } + + if ((opline->result_type & (IS_VAR|IS_TMP_VAR)) + && !(flags & ZEND_JIT_EXIT_FREE_OP1) + && (res_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) + && (ssa_op+1)->op1_use == ssa_op->result_def + && zend_jit_may_avoid_refcounting(opline+1, res_info)) { + result_avoid_refcounting = 1; + ssa->var_info[ssa_op->result_def].avoid_refcounting = 1; + } + + val_addr = zend_jit_guard_fetch_result_type(jit, opline, val_addr, type, + 1, flags, op1_avoid_refcounting); + if (!val_addr) { + return 0; + } + + res_info &= ~MAY_BE_GUARD; + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; + + // ZVAL_COPY + jit_ZVAL_COPY(jit, res_addr, -1, val_addr, res_info, !result_avoid_refcounting); + } else { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + + if (!zend_jit_zval_copy_deref(jit, res_addr, prop_addr, prop_type_ref)) { + return 0; + } + } + ir_END_list(end_inputs); + } + + if (op1_avoid_refcounting) { + SET_STACK_REG(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); + } + + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || !prop_info) { + ir_MERGE_list(slow_inputs); + jit_SET_EX_OPLINE(jit, opline); + + if (opline->opcode == ZEND_FETCH_OBJ_W) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_w_slow), obj_ref); + } else if (opline->opcode != ZEND_FETCH_OBJ_IS) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_r_slow), obj_ref); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_is_slow), obj_ref); + } + ir_END_list(end_inputs); + } + + ir_MERGE_list(end_inputs); + + if (opline->op1_type != IS_UNUSED && !delayed_fetch_this && !op1_indirect) { + if (opline->op1_type == IS_VAR + && opline->opcode == ZEND_FETCH_OBJ_W + && (op1_info & MAY_BE_RC1)) { + zend_jit_addr orig_op1_addr = OP1_ADDR(); + ir_ref if_refcounted, ptr, refcount, if_non_zero; + ir_ref merge_inputs = IR_UNUSED; + + if_refcounted = jit_if_REFCOUNTED(jit, orig_op1_addr); + ir_IF_FALSE( if_refcounted); + ir_END_list(merge_inputs); + ir_IF_TRUE( if_refcounted); + ptr = jit_Z_PTR(jit, orig_op1_addr); + refcount = jit_GC_DELREF(jit, ptr); + if_non_zero = ir_IF(refcount); + ir_IF_TRUE( if_non_zero); + ir_END_list(merge_inputs); + ir_IF_FALSE( if_non_zero); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_extract_helper), ptr); + ir_END_list(merge_inputs); + ir_MERGE_list(merge_inputs); + } else if (!op1_avoid_refcounting) { + if (on_this) { + op1_info &= ~MAY_BE_RC1; + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + } + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && prop_info + && (opline->opcode != ZEND_FETCH_OBJ_W || + !(opline->extended_value & ZEND_FETCH_OBJ_FLAGS) || + !ZEND_TYPE_IS_SET(prop_info->type)) + && (!(opline->op1_type & (IS_VAR|IS_TMP_VAR)) || on_this || op1_indirect)) { + may_throw = 0; + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_assign_obj(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + uint32_t val_info, + bool op1_indirect, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + zend_class_entry *trace_ce, + uint8_t prop_type, + int may_throw) +{ + zval *member; + zend_string *name; + zend_property_info *prop_info; + zend_jit_addr val_addr = OP1_DATA_ADDR(); + zend_jit_addr res_addr = 0; + zend_jit_addr prop_addr; + ir_ref obj_ref = IR_UNUSED; + ir_ref prop_ref = IR_UNUSED; + ir_ref delayed_end_input = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + + if (RETURN_VALUE_USED(opline)) { + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + } + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + + member = RT_CONSTANT(opline, opline->op2); + ZEND_ASSERT(Z_TYPE_P(member) == IS_STRING && Z_STRVAL_P(member)[0] != '\0'); + name = Z_STR_P(member); + prop_info = zend_get_known_property_info(op_array, ce, name, on_this, op_array->filename); + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + obj_ref = jit_Z_PTR(jit, this_addr); + } else { + if (opline->op1_type == IS_VAR + && (op1_info & MAY_BE_INDIRECT) + && Z_REG(op1_addr) == ZREG_FP) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + ir_ref if_obj = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + ir_IF_FALSE_cold(if_obj); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_property_assign), + jit_ZVAL_ADDR(jit, op1_addr), + ir_CONST_ADDR(ZSTR_VAL(name))); + + if (RETURN_VALUE_USED(opline)) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + } + + ir_END_list(end_inputs); + + ir_IF_TRUE(if_obj); + } + } + obj_ref = jit_Z_PTR(jit, op1_addr); + } + + ZEND_ASSERT(obj_ref); + if (!prop_info && trace_ce && (trace_ce->ce_flags & ZEND_ACC_IMMUTABLE)) { + prop_info = zend_get_known_property_info(op_array, trace_ce, name, on_this, op_array->filename); + if (prop_info) { + ce = trace_ce; + ce_is_instanceof = 0; + if (!(op1_info & MAY_BE_CLASS_GUARD)) { + if (on_this && JIT_G(current_frame) + && TRACE_FRAME_IS_THIS_CLASS_CHECKED(JIT_G(current_frame))) { + ZEND_ASSERT(JIT_G(current_frame)->ce == ce); + } else if (zend_jit_class_guard(jit, opline, obj_ref, ce)) { + if (on_this && JIT_G(current_frame)) { + JIT_G(current_frame)->ce = ce; + TRACE_FRAME_SET_THIS_CLASS_CHECKED(JIT_G(current_frame)); + } + } else { + return 0; + } + if (ssa->var_info && ssa_op->op1_use >= 0) { + ssa->var_info[ssa_op->op1_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_use].ce = ce; + ssa->var_info[ssa_op->op1_use].is_instanceof = ce_is_instanceof; + } + if (ssa->var_info && ssa_op->op1_def >= 0) { + ssa->var_info[ssa_op->op1_def].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_def].ce = ce; + ssa->var_info[ssa_op->op1_def].is_instanceof = ce_is_instanceof; + } + } + } + } + + if (!prop_info) { + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_ref ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS)); + ir_ref if_same = ir_IF(ir_EQ(ref, ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))))); + + ir_IF_FALSE_cold(if_same); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_same); + ir_ref offset_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*))); + + ir_ref if_dynamic = ir_IF(ir_LT(offset_ref, IR_NULL)); + ir_IF_TRUE_cold(if_dynamic); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_dynamic); + prop_ref = ir_ADD_A(obj_ref, offset_ref); + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, prop_ref)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_def); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + if (!ce || ce_is_instanceof || (ce->ce_flags & (ZEND_ACC_HAS_TYPE_HINTS|ZEND_ACC_TRAIT))) { + ir_ref prop_info_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*) * 2)); + ir_ref if_has_prop_info = ir_IF(prop_info_ref); + ir_IF_TRUE_cold(if_has_prop_info); + + // JIT: value = zend_assign_to_typed_prop(prop_info, property_val, value EXECUTE_DATA_CC); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_to_typed_prop), + prop_ref, + prop_info_ref, + jit_ZVAL_ADDR(jit, val_addr), + RETURN_VALUE_USED(opline) ? jit_ZVAL_ADDR(jit, res_addr) : IR_NULL); + + if ((opline+1)->op1_type == IS_CONST) { + // TODO: ??? + // if (Z_TYPE_P(value) == orig_type) { + // CACHE_PTR_EX(cache_slot + 2, NULL); + } + + ir_END_list(end_inputs); + ir_IF_FALSE(if_has_prop_info); + } + } else { + prop_ref = ir_ADD_OFFSET(obj_ref, prop_info->offset); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + if (!ce || ce_is_instanceof || !(ce->ce_flags & ZEND_ACC_IMMUTABLE) || ce->__get || ce->__set || (prop_info->flags & ZEND_ACC_READONLY)) { + // Undefined property with magic __get()/__set() + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(jit_Z_TYPE_INFO(jit, prop_addr), ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE_INFO(jit, prop_addr)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + } + } + if (ZEND_TYPE_IS_SET(prop_info->type)) { + ir_ref ref; + + // JIT: value = zend_assign_to_typed_prop(prop_info, property_val, value EXECUTE_DATA_CC); + jit_SET_EX_OPLINE(jit, opline); + if (ce && ce->ce_flags & ZEND_ACC_IMMUTABLE) { + ref = ir_CONST_ADDR(prop_info); + } else { + int prop_info_offset = + (((prop_info->offset - (sizeof(zend_object) - sizeof(zval))) / sizeof(zval)) * sizeof(void*)); + + ref = ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_class_entry, properties_info_table))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, prop_info_offset)); + } + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_to_typed_prop), + prop_ref, + ref, + jit_ZVAL_ADDR(jit, val_addr), + RETURN_VALUE_USED(opline) ? jit_ZVAL_ADDR(jit, res_addr) : IR_NULL); + + ir_END_list(end_inputs); + } + } + + if (!prop_info || !ZEND_TYPE_IS_SET(prop_info->type)) { + if (opline->result_type == IS_UNUSED) { + if (!zend_jit_assign_to_variable_call(jit, opline, prop_addr, prop_addr, -1, -1, (opline+1)->op1_type, val_addr, val_info, res_addr, 0)) { + return 0; + } + } else { + if (!zend_jit_assign_to_variable(jit, opline, prop_addr, prop_addr, -1, -1, (opline+1)->op1_type, val_addr, val_info, res_addr, 0, 0)) { + return 0; + } + } + if (end_inputs || slow_inputs) { + if (((opline+1)->op1_type & (IS_VAR|IS_TMP_VAR)) + && (val_info & (MAY_BE_REF|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + /* skip FREE_OP_DATA() */ + delayed_end_input = ir_END(); + } else { + ir_END_list(end_inputs); + } + } + } + + if (slow_inputs) { + ir_MERGE_list(slow_inputs); + jit_SET_EX_OPLINE(jit, opline); + + // JIT: value = zobj->handlers->write_property(zobj, name, value, CACHE_ADDR(opline->extended_value)); + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_CALL_5(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_obj_helper), + obj_ref, + ir_CONST_ADDR(name), + jit_ZVAL_ADDR(jit, val_addr), + ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS), + RETURN_VALUE_USED(opline) ? jit_ZVAL_ADDR(jit, res_addr) : IR_NULL); + + ir_END_list(end_inputs); + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + + if (val_info & (MAY_BE_REF|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + val_info |= MAY_BE_RC1|MAY_BE_RCN; + } + jit_FREE_OP(jit, (opline+1)->op1_type, (opline+1)->op1, val_info, opline); + + if (delayed_end_input) { + ir_MERGE_WITH(delayed_end_input); + } + } + + if (opline->op1_type != IS_UNUSED && !delayed_fetch_this && !op1_indirect) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_assign_obj_op(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + uint32_t val_info, + zend_ssa_range *val_range, + bool op1_indirect, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + zend_class_entry *trace_ce, + uint8_t prop_type) +{ + zval *member; + zend_string *name; + zend_property_info *prop_info; + zend_jit_addr val_addr = OP1_DATA_ADDR(); + zend_jit_addr prop_addr; + bool use_prop_guard = 0; + bool may_throw = 0; + binary_op_type binary_op = get_binary_op(opline->extended_value); + ir_ref obj_ref = IR_UNUSED; + ir_ref prop_ref = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + ZEND_ASSERT(opline->result_type == IS_UNUSED); + + member = RT_CONSTANT(opline, opline->op2); + ZEND_ASSERT(Z_TYPE_P(member) == IS_STRING && Z_STRVAL_P(member)[0] != '\0'); + name = Z_STR_P(member); + prop_info = zend_get_known_property_info(op_array, ce, name, on_this, op_array->filename); + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + obj_ref = jit_Z_PTR(jit, this_addr); + } else { + if (opline->op1_type == IS_VAR + && (op1_info & MAY_BE_INDIRECT) + && Z_REG(op1_addr) == ZREG_FP) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + ir_ref if_obj = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + ir_IF_FALSE_cold(if_obj); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, + (op1_info & MAY_BE_UNDEF) ? + ir_CONST_FC_FUNC(zend_jit_invalid_property_assign_op) : + ir_CONST_FC_FUNC(zend_jit_invalid_property_assign), + jit_ZVAL_ADDR(jit, op1_addr), + ir_CONST_ADDR(ZSTR_VAL(name))); + + may_throw = 1; + + ir_END_list(end_inputs); + ir_IF_TRUE(if_obj); + } + } + obj_ref = jit_Z_PTR(jit, op1_addr); + } + + ZEND_ASSERT(obj_ref); + if (!prop_info && trace_ce && (trace_ce->ce_flags & ZEND_ACC_IMMUTABLE)) { + prop_info = zend_get_known_property_info(op_array, trace_ce, name, on_this, op_array->filename); + if (prop_info) { + ce = trace_ce; + ce_is_instanceof = 0; + if (!(op1_info & MAY_BE_CLASS_GUARD)) { + if (on_this && JIT_G(current_frame) + && TRACE_FRAME_IS_THIS_CLASS_CHECKED(JIT_G(current_frame))) { + ZEND_ASSERT(JIT_G(current_frame)->ce == ce); + } else if (zend_jit_class_guard(jit, opline, obj_ref, ce)) { + if (on_this && JIT_G(current_frame)) { + JIT_G(current_frame)->ce = ce; + TRACE_FRAME_SET_THIS_CLASS_CHECKED(JIT_G(current_frame)); + } + } else { + return 0; + } + if (ssa->var_info && ssa_op->op1_use >= 0) { + ssa->var_info[ssa_op->op1_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_use].ce = ce; + ssa->var_info[ssa_op->op1_use].is_instanceof = ce_is_instanceof; + } + if (ssa->var_info && ssa_op->op1_def >= 0) { + ssa->var_info[ssa_op->op1_def].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_def].ce = ce; + ssa->var_info[ssa_op->op1_def].is_instanceof = ce_is_instanceof; + } + } + } + } + + use_prop_guard = (prop_type != IS_UNKNOWN + && prop_type != IS_UNDEF + && prop_type != IS_REFERENCE + && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_OBJECT); + + if (!prop_info) { + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_ref ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, (opline+1)->extended_value & ~ZEND_FETCH_OBJ_FLAGS)); + ir_ref if_same = ir_IF(ir_EQ(ref, ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))))); + + ir_IF_FALSE_cold(if_same); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_same); + if (!ce || ce_is_instanceof || (ce->ce_flags & (ZEND_ACC_HAS_TYPE_HINTS|ZEND_ACC_TRAIT))) { + ir_ref prop_info_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, ((opline+1)->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*) * 2)); + ir_ref if_has_prop_info = ir_IF(prop_info_ref); + ir_IF_TRUE_cold(if_has_prop_info); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_has_prop_info); + } + ir_ref offset_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, ((opline+1)->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*))); + + ir_ref if_dynamic = ir_IF(ir_LT(offset_ref, IR_NULL)); + ir_IF_TRUE_cold(if_dynamic); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_dynamic); + + prop_ref = ir_ADD_A(obj_ref, offset_ref); + if (!use_prop_guard) { + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, prop_ref)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_def); + } + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + } else { + prop_ref = ir_ADD_OFFSET(obj_ref, prop_info->offset); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + if (ZEND_TYPE_IS_SET(prop_info->type) || !use_prop_guard) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(jit_Z_TYPE_INFO(jit, prop_addr), ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE_INFO(jit, prop_addr)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + } + } + if (ZEND_TYPE_IS_SET(prop_info->type)) { + ir_ref if_ref, if_typed, noref_path, ref_path, reference, ref; + + may_throw = 1; + + jit_SET_EX_OPLINE(jit, opline); + + if_ref = jit_if_Z_TYPE(jit, prop_addr, IS_REFERENCE); + ir_IF_FALSE(if_ref); + noref_path = ir_END(); + ir_IF_TRUE(if_ref); + + reference = jit_Z_PTR(jit, prop_addr); + ref = ir_ADD_OFFSET(reference, offsetof(zend_reference, val)); + if_typed = jit_if_TYPED_REF(jit, reference); + ir_IF_FALSE(if_typed); + ref_path = ir_END(); + ir_IF_TRUE_cold(if_typed); + + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref), + reference, + jit_ZVAL_ADDR(jit, val_addr), + ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + + ir_MERGE_2(noref_path, ref_path); + prop_ref = ir_PHI_2(IR_ADDR, prop_ref, ref); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + // JIT: value = zend_assign_to_typed_prop(prop_info, property_val, value EXECUTE_DATA_CC); + if (ce && ce->ce_flags & ZEND_ACC_IMMUTABLE) { + ref = ir_CONST_ADDR(prop_info); + } else { + int prop_info_offset = + (((prop_info->offset - (sizeof(zend_object) - sizeof(zval))) / sizeof(zval)) * sizeof(void*)); + + ref = ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_class_entry, properties_info_table))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, prop_info_offset)); + } + + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_prop), + prop_ref, + ref, + jit_ZVAL_ADDR(jit, val_addr), + ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + } + } + + if (!prop_info || !ZEND_TYPE_IS_SET(prop_info->type)) { + zend_jit_addr var_addr = prop_addr; + uint32_t var_info = MAY_BE_ANY|MAY_BE_REF|MAY_BE_RC1|MAY_BE_RCN; + uint32_t var_def_info = MAY_BE_ANY|MAY_BE_REF|MAY_BE_RC1|MAY_BE_RCN; + + if (use_prop_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, prop_addr, prop_type, exit_addr); + var_info = (1 << prop_type) | (var_info & ~(MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)); + } + + if (var_info & MAY_BE_REF) { + ir_ref if_ref, if_typed, noref_path, ref_path, reference, ref; + + may_throw = 1; + + if_ref = jit_if_Z_TYPE(jit, prop_addr, IS_REFERENCE); + ir_IF_FALSE(if_ref); + noref_path = ir_END(); + ir_IF_TRUE(if_ref); + + reference = jit_Z_PTR(jit, var_addr); + ref = ir_ADD_OFFSET(reference, offsetof(zend_reference, val)); + if_typed = jit_if_TYPED_REF(jit, reference); + ir_IF_FALSE(if_typed); + ref_path = ir_END(); + ir_IF_TRUE_cold(if_typed); + + jit_SET_EX_OPLINE(jit, opline); + + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref), + reference, + jit_ZVAL_ADDR(jit, val_addr), + ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + + ir_MERGE_2(noref_path, ref_path); + prop_ref = ir_PHI_2(IR_ADDR, prop_ref, ref); + var_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + var_info &= ~MAY_BE_REF; + } + + uint8_t val_op_type = (opline+1)->op1_type; + if (val_op_type & (IS_TMP_VAR|IS_VAR)) { + /* prevent FREE_OP in the helpers */ + val_op_type = IS_CV; + } + + switch (opline->extended_value) { + case ZEND_ADD: + case ZEND_SUB: + case ZEND_MUL: + if ((var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || + (val_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + if (opline->extended_value != ZEND_ADD || + (var_info & MAY_BE_ANY) != MAY_BE_ARRAY || + (val_info & MAY_BE_ANY) == MAY_BE_ARRAY) { + may_throw = 1; + } + } + if (!zend_jit_math_helper(jit, opline, opline->extended_value, IS_CV, opline->op1, var_addr, var_info, val_op_type, (opline+1)->op1, val_addr, val_info, 0, var_addr, var_def_info, var_info, + 1 /* may overflow */, 0)) { + return 0; + } + break; + case ZEND_BW_OR: + case ZEND_BW_AND: + case ZEND_BW_XOR: + if ((var_info & (MAY_BE_STRING|MAY_BE_DOUBLE|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || + (val_info & (MAY_BE_STRING|MAY_BE_DOUBLE|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + if ((var_info & MAY_BE_ANY) != MAY_BE_STRING || + (val_info & MAY_BE_ANY) != MAY_BE_STRING) { + may_throw = 1; + } + } + goto long_math; + case ZEND_SL: + case ZEND_SR: + if ((var_info & (MAY_BE_STRING|MAY_BE_DOUBLE|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || + (val_info & (MAY_BE_STRING|MAY_BE_DOUBLE|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + may_throw = 1; + } + if (val_op_type != IS_CONST || + Z_TYPE_P(RT_CONSTANT((opline+1), (opline+1)->op1)) != IS_LONG || + Z_LVAL_P(RT_CONSTANT((opline+1), (opline+1)->op1)) < 0) { + may_throw = 1; + } + goto long_math; + case ZEND_MOD: + if ((var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || + (val_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + may_throw = 1; + } + if (val_op_type != IS_CONST || + Z_TYPE_P(RT_CONSTANT((opline+1), (opline+1)->op1)) != IS_LONG || + Z_LVAL_P(RT_CONSTANT((opline+1), (opline+1)->op1)) == 0) { + may_throw = 1; + } +long_math: + if (!zend_jit_long_math_helper(jit, opline, opline->extended_value, + IS_CV, opline->op1, var_addr, var_info, NULL, + val_op_type, (opline+1)->op1, val_addr, val_info, + val_range, + 0, var_addr, var_def_info, var_info, /* may throw */ 1)) { + return 0; + } + break; + case ZEND_CONCAT: + may_throw = 1; + if (!zend_jit_concat_helper(jit, opline, IS_CV, opline->op1, var_addr, var_info, val_op_type, (opline+1)->op1, val_addr, val_info, var_addr, + 0)) { + return 0; + } + break; + default: + ZEND_UNREACHABLE(); + } + if (end_inputs || slow_inputs) { + ir_END_list(end_inputs); + } + } + + if (slow_inputs) { + ir_MERGE_list(slow_inputs); + + may_throw = 1; + + jit_SET_EX_OPLINE(jit, opline); + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_CALL_5(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_obj_op_helper), + obj_ref, + ir_CONST_ADDR(name), + jit_ZVAL_ADDR(jit, val_addr), + ir_ADD_OFFSET(run_time_cache, (opline+1)->extended_value & ~ZEND_FETCH_OBJ_FLAGS), + ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + + if (val_info & (MAY_BE_REF|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + val_info |= MAY_BE_RC1|MAY_BE_RCN; + } + + // JIT: FREE_OP_DATA(); + jit_FREE_OP(jit, (opline+1)->op1_type, (opline+1)->op1, val_info, opline); + + if (opline->op1_type != IS_UNUSED && !delayed_fetch_this && !op1_indirect) { + if ((op1_info & MAY_HAVE_DTOR) && (op1_info & MAY_BE_RC1)) { + may_throw = 1; + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_incdec_obj(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + bool op1_indirect, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + zend_class_entry *trace_ce, + uint8_t prop_type) +{ + zval *member; + zend_string *name; + zend_property_info *prop_info; + zend_jit_addr res_addr = 0; + zend_jit_addr prop_addr; + bool use_prop_guard = 0; + bool may_throw = 0; + uint32_t res_info = (opline->result_type != IS_UNDEF) ? RES_INFO() : 0; + ir_ref obj_ref = IR_UNUSED; + ir_ref prop_ref = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + + if (opline->result_type != IS_UNUSED) { + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + } + + member = RT_CONSTANT(opline, opline->op2); + ZEND_ASSERT(Z_TYPE_P(member) == IS_STRING && Z_STRVAL_P(member)[0] != '\0'); + name = Z_STR_P(member); + prop_info = zend_get_known_property_info(op_array, ce, name, on_this, op_array->filename); + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + obj_ref = jit_Z_PTR(jit, this_addr); + } else { + if (opline->op1_type == IS_VAR + && (op1_info & MAY_BE_INDIRECT) + && Z_REG(op1_addr) == ZREG_FP) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + ir_ref if_obj = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + ir_IF_FALSE_cold(if_obj); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_property_incdec), + jit_ZVAL_ADDR(jit, op1_addr), + ir_CONST_ADDR(ZSTR_VAL(name))); + + may_throw = 1; + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this && !op1_indirect) { + ir_END_list(end_inputs); + } else { + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + ir_IF_TRUE(if_obj); + } + } + obj_ref = jit_Z_PTR(jit, op1_addr); + } + + ZEND_ASSERT(obj_ref); + if (!prop_info && trace_ce && (trace_ce->ce_flags & ZEND_ACC_IMMUTABLE)) { + prop_info = zend_get_known_property_info(op_array, trace_ce, name, on_this, op_array->filename); + if (prop_info) { + ce = trace_ce; + ce_is_instanceof = 0; + if (!(op1_info & MAY_BE_CLASS_GUARD)) { + if (on_this && JIT_G(current_frame) + && TRACE_FRAME_IS_THIS_CLASS_CHECKED(JIT_G(current_frame))) { + ZEND_ASSERT(JIT_G(current_frame)->ce == ce); + } else if (zend_jit_class_guard(jit, opline, obj_ref, ce)) { + if (on_this && JIT_G(current_frame)) { + JIT_G(current_frame)->ce = ce; + TRACE_FRAME_SET_THIS_CLASS_CHECKED(JIT_G(current_frame)); + } + } else { + return 0; + } + if (ssa->var_info && ssa_op->op1_use >= 0) { + ssa->var_info[ssa_op->op1_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_use].ce = ce; + ssa->var_info[ssa_op->op1_use].is_instanceof = ce_is_instanceof; + } + if (ssa->var_info && ssa_op->op1_def >= 0) { + ssa->var_info[ssa_op->op1_def].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_def].ce = ce; + ssa->var_info[ssa_op->op1_def].is_instanceof = ce_is_instanceof; + } + } + } + } + + use_prop_guard = (prop_type != IS_UNKNOWN + && prop_type != IS_UNDEF + && prop_type != IS_REFERENCE + && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_OBJECT); + + if (!prop_info) { + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_ref ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS)); + ir_ref if_same = ir_IF(ir_EQ(ref, ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))))); + + ir_IF_FALSE_cold(if_same); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_same); + if (!ce || ce_is_instanceof || (ce->ce_flags & (ZEND_ACC_HAS_TYPE_HINTS|ZEND_ACC_TRAIT))) { + ir_ref prop_info_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*) * 2)); + ir_ref if_has_prop_info = ir_IF(prop_info_ref); + ir_IF_TRUE_cold(if_has_prop_info); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_has_prop_info); + } + ir_ref offset_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*))); + + ir_ref if_dynamic = ir_IF(ir_LT(offset_ref, IR_NULL)); + ir_IF_TRUE_cold(if_dynamic); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_dynamic); + + prop_ref = ir_ADD_A(obj_ref, offset_ref); + if (!use_prop_guard) { + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, prop_ref)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_def); + } + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + } else { + prop_ref = ir_ADD_OFFSET(obj_ref, prop_info->offset); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + if (ZEND_TYPE_IS_SET(prop_info->type) || !use_prop_guard) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(jit_Z_TYPE_INFO(jit, prop_addr), ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE_INFO(jit, prop_addr)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + } + } + + if (ZEND_TYPE_IS_SET(prop_info->type)) { + const void *func; + ir_ref ref; + + may_throw = 1; + jit_SET_EX_OPLINE(jit, opline); + + if (ce && ce->ce_flags & ZEND_ACC_IMMUTABLE) { + ref = ir_CONST_ADDR(prop_info); + } else { + int prop_info_offset = + (((prop_info->offset - (sizeof(zend_object) - sizeof(zval))) / sizeof(zval)) * sizeof(void*)); + + ref = ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_class_entry, properties_info_table))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, prop_info_offset)); + } + + if (opline->result_type == IS_UNUSED) { + switch (opline->opcode) { + case ZEND_PRE_INC_OBJ: + case ZEND_POST_INC_OBJ: + func = zend_jit_inc_typed_prop; + break; + case ZEND_PRE_DEC_OBJ: + case ZEND_POST_DEC_OBJ: + func = zend_jit_dec_typed_prop; + break; + default: + ZEND_UNREACHABLE(); + } + + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(func), prop_ref, ref); + } else { + switch (opline->opcode) { + case ZEND_PRE_INC_OBJ: + func = zend_jit_pre_inc_typed_prop; + break; + case ZEND_PRE_DEC_OBJ: + func = zend_jit_pre_dec_typed_prop; + break; + case ZEND_POST_INC_OBJ: + func = zend_jit_post_inc_typed_prop; + break; + case ZEND_POST_DEC_OBJ: + func = zend_jit_post_dec_typed_prop; + break; + default: + ZEND_UNREACHABLE(); + } + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(func), + prop_ref, + ref, + jit_ZVAL_ADDR(jit, res_addr)); + } + ir_END_list(end_inputs); + } + } + + if (!prop_info || !ZEND_TYPE_IS_SET(prop_info->type)) { + uint32_t var_info = MAY_BE_ANY|MAY_BE_REF|MAY_BE_RC1|MAY_BE_RCN; + zend_jit_addr var_addr = prop_addr; + ir_ref if_long = IR_UNUSED; + ir_ref if_overflow = IR_UNUSED; + + if (use_prop_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, prop_addr, prop_type, exit_addr); + var_info = (1 << prop_type) | (var_info & ~(MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)); + } + + if (var_info & MAY_BE_REF) { + const void *func; + ir_ref if_ref, if_typed, noref_path, ref_path, reference, ref; + + if_ref = jit_if_Z_TYPE(jit, prop_addr, IS_REFERENCE); + ir_IF_FALSE(if_ref); + noref_path = ir_END(); + ir_IF_TRUE(if_ref); + + reference = jit_Z_PTR(jit, var_addr); + ref = ir_ADD_OFFSET(reference, offsetof(zend_reference, val)); + if_typed = jit_if_TYPED_REF(jit, reference); + ir_IF_FALSE(if_typed); + ref_path = ir_END(); + ir_IF_TRUE_cold(if_typed); + + switch (opline->opcode) { + case ZEND_PRE_INC_OBJ: + func = zend_jit_pre_inc_typed_ref; + break; + case ZEND_PRE_DEC_OBJ: + func = zend_jit_pre_dec_typed_ref; + break; + case ZEND_POST_INC_OBJ: + func = zend_jit_post_inc_typed_ref; + break; + case ZEND_POST_DEC_OBJ: + func = zend_jit_post_dec_typed_ref; + break; + default: + ZEND_UNREACHABLE(); + } + + may_throw = 1; + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(func), + reference, + (opline->result_type == IS_UNUSED) ? IR_NULL : jit_ZVAL_ADDR(jit, res_addr)); + + ir_END_list(end_inputs); + + ir_MERGE_2(noref_path, ref_path); + prop_ref = ir_PHI_2(IR_ADDR, prop_ref, ref); + var_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + var_info &= ~MAY_BE_REF; + } + + if (var_info & MAY_BE_LONG) { + ir_ref addr, ref; + + if (var_info & (MAY_BE_ANY - MAY_BE_LONG)) { + if_long = jit_if_Z_TYPE(jit, var_addr, IS_LONG); + ir_IF_TRUE(if_long); + } + + addr = jit_ZVAL_ADDR(jit, var_addr); + ref = ir_LOAD_L(addr); + if (opline->opcode == ZEND_POST_INC_OBJ || opline->opcode == ZEND_POST_DEC_OBJ) { + if (opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + if (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_POST_INC_OBJ) { + ref = ir_ADD_OV_L(ref, ir_CONST_LONG(1)); + } else { + ref = ir_SUB_OV_L(ref, ir_CONST_LONG(1)); + } + + ir_STORE(addr, ref); + if_overflow = ir_IF(ir_OVERFLOW(ref)); + ir_IF_FALSE(if_overflow); + + if (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_PRE_DEC_OBJ) { + if (opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + ir_END_list(end_inputs); + } + + if (var_info & (MAY_BE_ANY - MAY_BE_LONG)) { + if (var_info & (MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + may_throw = 1; + } + if (if_long) { + ir_IF_FALSE_cold(if_long); + } + if (opline->opcode == ZEND_POST_INC_OBJ || opline->opcode == ZEND_POST_DEC_OBJ) { + jit_ZVAL_COPY(jit, res_addr, -1, var_addr, var_info, 1); + } + if (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_POST_INC_OBJ) { + if (opline->opcode == ZEND_PRE_INC_OBJ && opline->result_type != IS_UNUSED) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_pre_inc), + jit_ZVAL_ADDR(jit, var_addr), + jit_ZVAL_ADDR(jit, res_addr)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(increment_function), + jit_ZVAL_ADDR(jit, var_addr)); + } + } else { + if (opline->opcode == ZEND_PRE_DEC_OBJ && opline->result_type != IS_UNUSED) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_pre_dec), + jit_ZVAL_ADDR(jit, var_addr), + jit_ZVAL_ADDR(jit, res_addr)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(decrement_function), + jit_ZVAL_ADDR(jit, var_addr)); + } + } + + ir_END_list(end_inputs); + } + if (var_info & MAY_BE_LONG) { + ir_IF_TRUE_cold(if_overflow); + if (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_POST_INC_OBJ) { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, var_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, var_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, var_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, var_addr, IS_DOUBLE); + if (opline->opcode == ZEND_PRE_INC_OBJ && opline->result_type != IS_UNUSED) { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, var_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, var_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, var_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, var_addr, IS_DOUBLE); + if (opline->opcode == ZEND_PRE_DEC_OBJ && opline->result_type != IS_UNUSED) { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + if (opline->result_type != IS_UNUSED + && (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_PRE_DEC_OBJ) + && prop_info + && !ZEND_TYPE_IS_SET(prop_info->type) + && (res_info & MAY_BE_GUARD) + && (res_info & MAY_BE_LONG)) { + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + uint32_t old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + int32_t exit_point; + const void *exit_addr; + + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + ssa->var_info[ssa_op->result_def].type = res_info & ~MAY_BE_GUARD; + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } + } + + if (slow_inputs) { + const void *func; + + ir_MERGE_list(slow_inputs); + + // JIT: zend_jit_pre_inc_obj_helper(zobj, name, CACHE_ADDR(opline->extended_value), result); + switch (opline->opcode) { + case ZEND_PRE_INC_OBJ: + func = zend_jit_pre_inc_obj_helper; + break; + case ZEND_PRE_DEC_OBJ: + func = zend_jit_pre_dec_obj_helper; + break; + case ZEND_POST_INC_OBJ: + func = zend_jit_post_inc_obj_helper; + break; + case ZEND_POST_DEC_OBJ: + func = zend_jit_post_dec_obj_helper; + break; + default: + ZEND_UNREACHABLE(); + } + + may_throw = 1; + jit_SET_EX_OPLINE(jit, opline); + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(func), + obj_ref, + ir_CONST_ADDR(name), + ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS), + (opline->result_type == IS_UNUSED) ? IR_NULL : jit_ZVAL_ADDR(jit, res_addr)); + + ir_END_list(end_inputs); + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this && !op1_indirect) { + if ((op1_info & MAY_HAVE_DTOR) && (op1_info & MAY_BE_RC1)) { + may_throw = 1; + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_switch(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, zend_ssa *ssa, zend_jit_trace_rec *trace, zend_jit_trace_info *trace_info) +{ + HashTable *jumptable = Z_ARRVAL_P(RT_CONSTANT(opline, opline->op2)); + const zend_op *next_opline = NULL; + ir_refs *slow_inputs; + + ir_refs_init(slow_inputs, 8); + + if (trace) { + ZEND_ASSERT(trace->op == ZEND_JIT_TRACE_VM || trace->op == ZEND_JIT_TRACE_END); + ZEND_ASSERT(trace->opline != NULL); + next_opline = trace->opline; + } + + if (opline->op1_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op1); + zval *jump_zv = NULL; + int b; + + if (opline->opcode == ZEND_SWITCH_LONG) { + if (Z_TYPE_P(zv) == IS_LONG) { + jump_zv = zend_hash_index_find(jumptable, Z_LVAL_P(zv)); + } + } else if (opline->opcode == ZEND_SWITCH_STRING) { + if (Z_TYPE_P(zv) == IS_STRING) { + jump_zv = zend_hash_find_known_hash(jumptable, Z_STR_P(zv)); + } + } else if (opline->opcode == ZEND_MATCH) { + if (Z_TYPE_P(zv) == IS_LONG) { + jump_zv = zend_hash_index_find(jumptable, Z_LVAL_P(zv)); + } else if (Z_TYPE_P(zv) == IS_STRING) { + jump_zv = zend_hash_find_known_hash(jumptable, Z_STR_P(zv)); + } + } else { + ZEND_UNREACHABLE(); + } + if (next_opline) { + const zend_op *target; + + if (jump_zv != NULL) { + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(jump_zv)); + } else { + target = ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value); + } + ZEND_ASSERT(target == next_opline); + } else { + if (jump_zv != NULL) { + b = ssa->cfg.map[ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(jump_zv)) - op_array->opcodes]; + } else { + b = ssa->cfg.map[ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value) - op_array->opcodes]; + } + _zend_jit_add_predecessor_ref(jit, b, jit->b, ir_END()); + jit->b = -1; + } + } else { + zend_ssa_op *ssa_op = &ssa->ops[opline - op_array->opcodes]; + uint32_t op1_info = OP1_INFO(); + zend_jit_addr op1_addr = OP1_ADDR(); + const zend_op *default_opline = ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value); + const zend_op *target; + int default_b = next_opline ? -1 : ssa->cfg.map[default_opline - op_array->opcodes]; + int b; + int32_t exit_point; + const void *exit_addr; + const void *fallback_label = NULL; + const void *default_label = NULL; + zval *zv; + + if (next_opline) { + if (opline->opcode != ZEND_MATCH && next_opline != opline + 1) { + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + fallback_label = zend_jit_trace_get_exit_addr(exit_point); + if (!fallback_label) { + return 0; + } + } + if (next_opline != default_opline) { + exit_point = zend_jit_trace_get_exit_point(default_opline, 0); + default_label = zend_jit_trace_get_exit_addr(exit_point); + if (!default_label) { + return 0; + } + } + } + + if (opline->opcode == ZEND_SWITCH_LONG) { + if (op1_info & MAY_BE_LONG) { + if (op1_info & MAY_BE_REF) { + ir_ref ref, if_long, fast_path, ref2; + + ref = jit_ZVAL_ADDR(jit, op1_addr); + if_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_long); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_long); + + // JIT: ZVAL_DEREF(op) + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_REFERENCE, fallback_label); + } else { + ir_ref if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_FALSE_cold(if_ref); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_ref); + } + + ref2 = ir_ADD_OFFSET(jit_Z_PTR(jit, op1_addr), offsetof(zend_reference, val)); + op1_addr = ZEND_ADDR_REF_ZVAL(ref2); + + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_LONG, fallback_label); + } else { + if_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE_cold(if_long); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_long); + } + + ir_MERGE_2(fast_path, ir_END()); + ref = ir_PHI_2(IR_ADDR, ref, ref2); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } else if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_LONG, fallback_label); + } else { + ir_ref if_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE_cold(if_long); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_long); + } + } + ir_ref ref = jit_Z_LVAL(jit, op1_addr); + + if (!HT_IS_PACKED(jumptable)) { + ref = ir_CALL_2(IR_LONG, ir_CONST_FC_FUNC(zend_hash_index_find), + ir_CONST_ADDR(jumptable), ref); + ref = ir_SUB_L(ref, ir_CONST_LONG((uintptr_t)jumptable->arData)); + ref = ir_DIV_L(ref, ir_CONST_LONG(sizeof(Bucket))); + } + ref = ir_SWITCH(ref); + + if (next_opline) { + ir_ref continue_list = IR_UNUSED; + + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + ir_ref idx; + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + + if (HT_IS_PACKED(jumptable)) { + idx = ir_CONST_LONG(zv - jumptable->arPacked); + } else { + idx = ir_CONST_LONG((Bucket*)zv - jumptable->arData); + } + ir_CASE_VAL(ref, idx); + if (target == next_opline) { + ir_END_list(continue_list); + } else { + exit_point = zend_jit_trace_get_exit_point(target, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } ZEND_HASH_FOREACH_END(); + + ir_CASE_DEFAULT(ref); + if (next_opline == default_opline) { + ir_END_list(continue_list); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(default_label)); + } + if (continue_list) { + ir_MERGE_list(continue_list); + } else { + ZEND_ASSERT(slow_inputs->count); + ir_MERGE_N(slow_inputs->count, slow_inputs->refs); + } + } else { + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + b = ssa->cfg.map[target - op_array->opcodes]; + _zend_jit_add_predecessor_ref(jit, b, jit->b, ref); + } ZEND_HASH_FOREACH_END(); + + _zend_jit_add_predecessor_ref(jit, default_b, jit->b, ref); + if (slow_inputs->count) { + ir_MERGE_N(slow_inputs->count, slow_inputs->refs); + _zend_jit_add_predecessor_ref(jit, jit->b + 1, jit->b, ir_END()); + } + jit->b = -1; + } + } else { + ZEND_ASSERT(!next_opline); + _zend_jit_add_predecessor_ref(jit, jit->b + 1, jit->b, ir_END()); + jit->b = -1; + } + } else if (opline->opcode == ZEND_SWITCH_STRING) { + if (op1_info & MAY_BE_STRING) { + if (op1_info & MAY_BE_REF) { + ir_ref ref, if_string, fast_path, ref2; + + ref = jit_ZVAL_ADDR(jit, op1_addr); + if_string = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_TRUE(if_string); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_string); + + // JIT: ZVAL_DEREF(op) + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_REFERENCE, fallback_label); + } else { + ir_ref if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE_cold(if_ref); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_ref); + } + + ref2 = ir_ADD_OFFSET(jit_Z_PTR(jit, op1_addr), offsetof(zend_reference, val)); + op1_addr = ZEND_ADDR_REF_ZVAL(ref2); + + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_LONG, fallback_label); + } else { + if_string = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE_cold(if_string); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_string); + } + + ir_MERGE_2(fast_path, ir_END()); + ref = ir_PHI_2(IR_ADDR, ref, ref2); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } else if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_STRING)) { + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_STRING, fallback_label); + } else { + ir_ref if_string = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE_cold(if_string); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_string); + } + } + + ir_ref ref = jit_Z_PTR(jit, op1_addr); + ref = ir_CALL_2(IR_LONG, ir_CONST_FC_FUNC(zend_hash_find), + ir_CONST_ADDR(jumptable), ref); + ref = ir_SUB_L(ref, ir_CONST_LONG((uintptr_t)jumptable->arData)); + ref = ir_DIV_L(ref, ir_CONST_LONG(sizeof(Bucket))); + ref = ir_SWITCH(ref); + + if (next_opline) { + ir_ref continue_list = IR_UNUSED; + + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + ir_ref idx; + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + + if (HT_IS_PACKED(jumptable)) { + idx = ir_CONST_LONG(zv - jumptable->arPacked); + } else { + idx = ir_CONST_LONG((Bucket*)zv - jumptable->arData); + } + ir_CASE_VAL(ref, idx); + if (target == next_opline) { + ir_END_list(continue_list); + } else { + exit_point = zend_jit_trace_get_exit_point(target, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } ZEND_HASH_FOREACH_END(); + + ir_CASE_DEFAULT(ref); + if (next_opline == default_opline) { + ir_END_list(continue_list); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(default_label)); + } + if (continue_list) { + ir_MERGE_list(continue_list); + } else { + ZEND_ASSERT(slow_inputs->count); + ir_MERGE_N(slow_inputs->count, slow_inputs->refs); + } + } else { + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + b = ssa->cfg.map[target - op_array->opcodes]; + _zend_jit_add_predecessor_ref(jit, b, jit->b, ref); + } ZEND_HASH_FOREACH_END(); + _zend_jit_add_predecessor_ref(jit, default_b, jit->b, ref); + if (slow_inputs->count) { + ir_MERGE_N(slow_inputs->count, slow_inputs->refs); + _zend_jit_add_predecessor_ref(jit, jit->b + 1, jit->b, ir_END()); + } + jit->b = -1; + } + } else { + ZEND_ASSERT(!next_opline); + _zend_jit_add_predecessor_ref(jit, jit->b + 1, jit->b, ir_END()); + jit->b = -1; + } + } else if (opline->opcode == ZEND_MATCH) { + ir_ref if_type = IR_UNUSED, default_input_list = IR_UNUSED, ref = IR_UNUSED; + ir_ref continue_list = IR_UNUSED; + + if (op1_info & (MAY_BE_LONG|MAY_BE_STRING)) { + ir_ref long_path = IR_UNUSED; + + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_LONG) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + if (op1_info & (MAY_BE_STRING|MAY_BE_UNDEF)) { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_type); + } else if (default_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_LONG, default_label); + } else if (next_opline) { + ir_ref if_type = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE(if_type); + ir_END_list(continue_list); + ir_IF_TRUE(if_type); + } else { + ir_ref if_type = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE(if_type); + ir_END_list(default_input_list); + ir_IF_TRUE(if_type); + } + } + ref = jit_Z_LVAL(jit, op1_addr); + ref = ir_CALL_2(IR_LONG, ir_CONST_FC_FUNC(zend_hash_index_find), + ir_CONST_ADDR(jumptable), ref); + if (op1_info & MAY_BE_STRING) { + long_path = ir_END(); + } + } + if (op1_info & MAY_BE_STRING) { + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IS_UNUSED; + } + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_STRING))) { + if (op1_info & MAY_BE_UNDEF) { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_TRUE(if_type); + } else if (default_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_STRING, default_label); + } else if (next_opline) { + ir_ref if_type = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE(if_type); + ir_END_list(continue_list); + ir_IF_TRUE(if_type); + } else { + ir_ref if_type = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE(if_type); + ir_END_list(default_input_list); + ir_IF_TRUE(if_type); + } + } + ir_ref ref2 = jit_Z_PTR(jit, op1_addr); + ref2 = ir_CALL_2(IR_LONG, ir_CONST_FC_FUNC(zend_hash_find), + ir_CONST_ADDR(jumptable), ref2); + if (op1_info & MAY_BE_LONG) { + ir_MERGE_WITH(long_path); + ref = ir_PHI_2(IR_LONG, ref2, ref); + } else { + ref = ref2; + } + } + + ref = ir_SUB_L(ref, ir_CONST_LONG((uintptr_t)jumptable->arData)); + ref = ir_DIV_L(ref, ir_CONST_LONG(HT_IS_PACKED(jumptable) ? sizeof(zval) : sizeof(Bucket))); + ref = ir_SWITCH(ref); + + if (next_opline) { + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + ir_ref idx; + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + + if (HT_IS_PACKED(jumptable)) { + idx = ir_CONST_LONG(zv - jumptable->arPacked); + } else { + idx = ir_CONST_LONG((Bucket*)zv - jumptable->arData); + } + ir_CASE_VAL(ref, idx); + if (target == next_opline) { + ir_END_list(continue_list); + } else { + exit_point = zend_jit_trace_get_exit_point(target, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } ZEND_HASH_FOREACH_END(); + + ir_CASE_DEFAULT(ref); + if (next_opline == default_opline) { + ir_END_list(continue_list); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(default_label)); + } + } else { + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + b = ssa->cfg.map[target - op_array->opcodes]; + _zend_jit_add_predecessor_ref(jit, b, jit->b, ref); + } ZEND_HASH_FOREACH_END(); + _zend_jit_add_predecessor_ref(jit, default_b, jit->b, ref); + } + } + + if (op1_info & MAY_BE_UNDEF) { + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IS_UNUSED; + } + if (op1_info & (MAY_BE_ANY-(MAY_BE_LONG|MAY_BE_STRING))) { + if (default_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_UNDEF, default_label); + } else if (next_opline) { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op1_addr)); + ir_IF_TRUE(if_def); + ir_END_list(continue_list); + ir_IF_FALSE_cold(if_def); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op1_addr)); + ir_IF_TRUE(if_def); + ir_END_list(default_input_list); + ir_IF_FALSE_cold(if_def); + } + } + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), + ir_CONST_U32(opline->op1.var)); + zend_jit_check_exception_undef_result(jit, opline); + if (default_label) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(default_label)); + } else if (next_opline) { + ir_END_list(continue_list); + } else { + ir_END_list(default_input_list); + } + } + if (next_opline) { + ZEND_ASSERT(continue_list); + ir_MERGE_list(continue_list); + } else { + if (default_input_list) { + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_SWITCH); + ZEND_ASSERT(jit->ctx.ir_base[ref].op3 == IR_UNUSED); + jit->ctx.ir_base[ref].op3 = default_input_list; + } + jit->b = -1; + } + } else { + ZEND_UNREACHABLE(); + } + } + return 1; +} + +static int zend_jit_start(zend_jit_ctx *jit, const zend_op_array *op_array, zend_ssa *ssa) +{ + int i, count; + zend_basic_block *bb; + + zend_jit_init_ctx(jit, (zend_jit_vm_kind == ZEND_VM_KIND_CALL) ? 0 : (IR_START_BR_TARGET|IR_ENTRY_BR_TARGET)); + + jit->ctx.spill_base = ZREG_FP; + + jit->op_array = jit->current_op_array = op_array; + jit->ssa = ssa; + jit->bb_start_ref = zend_arena_calloc(&CG(arena), ssa->cfg.blocks_count * 2, sizeof(ir_ref)); + jit->bb_predecessors = jit->bb_start_ref + ssa->cfg.blocks_count; + + count = 0; + for (i = 0, bb = ssa->cfg.blocks; i < ssa->cfg.blocks_count; i++, bb++) { + jit->bb_predecessors[i] = count; + count += bb->predecessors_count; + } + jit->bb_edges = zend_arena_calloc(&CG(arena), count, sizeof(ir_ref)); + + if (!GCC_GLOBAL_REGS) { + ir_ref ref = ir_PARAM(IR_ADDR, "execute_data", 1); + jit_STORE_FP(jit, ref); + jit->ctx.flags |= IR_FASTCALL_FUNC; + } + + return 1; +} + +static void *zend_jit_finish(zend_jit_ctx *jit) +{ + void *entry; + size_t size; + zend_string *str = NULL; + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_GDB|ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP| + ZEND_JIT_DEBUG_IR_SRC|ZEND_JIT_DEBUG_IR_AFTER_SCCP|ZEND_JIT_DEBUG_IR_AFTER_SCCP| + ZEND_JIT_DEBUG_IR_AFTER_SCHEDULE|ZEND_JIT_DEBUG_IR_AFTER_REGS|ZEND_JIT_DEBUG_IR_FINAL|ZEND_JIT_DEBUG_IR_CODEGEN)) { + if (jit->name) { + str = zend_string_copy(jit->name); + } else { + str = zend_jit_func_name(jit->op_array); + } + } + + if (jit->op_array) { + /* Only for function JIT */ + _zend_jit_fix_merges(jit); +#if defined(IR_TARGET_AARCH64) + } else if (jit->trace) { + jit->ctx.deoptimization_exits = jit->trace->exit_count; + jit->ctx.get_exit_addr = zend_jit_trace_get_exit_addr; +#endif + } + + entry = zend_jit_ir_compile(&jit->ctx, &size, str ? ZSTR_VAL(str) : NULL); + if (entry) { + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_GDB|ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP)) { +#if HAVE_CAPSTONE + if (JIT_G(debug) & ZEND_JIT_DEBUG_ASM) { + if (str) { + ir_disasm_add_symbol(ZSTR_VAL(str), (uintptr_t)entry, size); + } + ir_disasm(str ? ZSTR_VAL(str) : "unknown", + entry, size, + (JIT_G(debug) & ZEND_JIT_DEBUG_ASM_ADDR) != 0, + &jit->ctx, stderr); + } +#endif +#ifndef _WIN32 + if (str) { + if (JIT_G(debug) & ZEND_JIT_DEBUG_GDB) { + uintptr_t sp_offset = 0; + +// ir_mem_unprotect(entry, size); + if (!(jit->ctx.flags & IR_FUNCTION) + && zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { +#if !ZEND_WIN32 && !defined(IR_TARGET_AARCH64) + sp_offset = zend_jit_hybrid_vm_sp_adj; +#else + sp_offset = sizeof(void*); +#endif + } else { + sp_offset = sizeof(void*); + } + ir_gdb_register(ZSTR_VAL(str), entry, size, sp_offset, 0); +// ir_mem_protect(entry, size); + } + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP)) { + ir_perf_map_register(ZSTR_VAL(str), entry, size); + if (JIT_G(debug) & ZEND_JIT_DEBUG_PERF_DUMP) { + ir_perf_jitdump_register(ZSTR_VAL(str), entry, size); + } + } + } +#endif + } + + if (jit->op_array) { + /* Only for function JIT */ + const zend_op_array *op_array = jit->op_array; + zend_op *opline = (zend_op*)op_array->opcodes; + + if (!(op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS)) { + while (opline->opcode == ZEND_RECV) { + opline++; + } + } + opline->handler = entry; + + if (jit->ctx.entries_count) { + /* For all entries */ + int i = jit->ctx.entries_count; + do { + ir_insn *insn = &jit->ctx.ir_base[jit->ctx.entries[--i]]; + op_array->opcodes[insn->op2].handler = (char*)entry + insn->op3; + } while (i != 0); + } + } else { + /* Only for tracing JIT */ + zend_jit_trace_info *t = jit->trace; + zend_jit_trace_stack *stack; + uint32_t i; + + if (t) { + for (i = 0; i < t->stack_map_size; i++) { + stack = t->stack_map + i; + if (stack->flags & ZREG_SPILL_SLOT) { + stack->reg = (jit->ctx.flags & IR_USE_FRAME_POINTER) ? IR_REG_FP : IR_REG_SP; + stack->ref = ir_get_spill_slot_offset(&jit->ctx, stack->ref); + } + } + } + + zend_jit_trace_add_code(entry, size); + +#if ZEND_JIT_SUPPORT_CLDEMOTE + if (cpu_support_cldemote) { + shared_cacheline_demote((uintptr_t)entry, size); + } +#endif + } + } + + if (str) { + zend_string_release(str); + } + + return entry; +} + +static const void *zend_jit_trace_allocate_exit_group(uint32_t n) +{ + const void *entry; + size_t size; + + entry = ir_emit_exitgroup(n, ZEND_JIT_EXIT_POINTS_PER_GROUP, zend_jit_stub_handlers[jit_stub_trace_exit], + *dasm_ptr, (char*)dasm_end - (char*)*dasm_ptr, &size); + + if (entry) { + *dasm_ptr = (char*)entry + ZEND_MM_ALIGNED_SIZE_EX(size, 16); +#ifdef HAVE_CAPSTONE + if (JIT_G(debug) & ZEND_JIT_DEBUG_ASM) { + uint32_t i; + char name[32]; + + for (i = 0; i < ZEND_JIT_EXIT_POINTS_PER_GROUP; i++) { + sprintf(name, "jit$$trace_exit_%d", n + i); + ir_disasm_add_symbol(name, (uintptr_t)entry + (i * ZEND_JIT_EXIT_POINTS_SPACING), ZEND_JIT_EXIT_POINTS_SPACING); + } + } +#endif + } + + return entry; +} + +static int zend_jit_type_guard(zend_jit_ctx *jit, const zend_op *opline, uint32_t var, uint8_t type) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + zend_jit_addr addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_EQ(jit_Z_TYPE(jit, addr), ir_CONST_U8(type)), ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static int zend_jit_scalar_type_guard(zend_jit_ctx *jit, const zend_op *opline, uint32_t var) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + zend_jit_addr addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_LT(jit_Z_TYPE(jit, addr), ir_CONST_U8(IS_STRING)), ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static bool zend_jit_noref_guard(zend_jit_ctx *jit, const zend_op *opline, zend_jit_addr var_addr) +{ + uint32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_NE(jit_Z_TYPE(jit, var_addr), ir_CONST_U8(IS_REFERENCE)), ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static int zend_jit_trace_opline_guard(zend_jit_ctx *jit, const zend_op *opline) +{ + uint32_t exit_point = zend_jit_trace_get_exit_point(NULL, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + ir_GUARD(jit_CMP_IP(jit, IR_EQ, opline), ir_CONST_ADDR(exit_addr)); + zend_jit_set_last_valid_opline(jit, opline); + + return 1; +} + +static bool zend_jit_guard_reference(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr *var_addr_ptr, + zend_jit_addr *ref_addr_ptr, + bool add_ref_guard) +{ + zend_jit_addr var_addr = *var_addr_ptr; + const void *exit_addr = NULL; + ir_ref ref; + + if (add_ref_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + ref = jit_Z_TYPE(jit, var_addr); + ir_GUARD(ir_EQ(ref, ir_CONST_U8(IS_REFERENCE)), ir_CONST_ADDR(exit_addr)); + } + + ref = jit_Z_PTR(jit, var_addr); + *ref_addr_ptr = ZEND_ADDR_REF_ZVAL(ref); + ref = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + *var_addr_ptr = var_addr; + + return 1; +} + +static bool zend_jit_fetch_reference(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t var_type, + uint32_t *var_info_ptr, + zend_jit_addr *var_addr_ptr, + bool add_ref_guard, + bool add_type_guard) +{ + zend_jit_addr var_addr = *var_addr_ptr; + uint32_t var_info = *var_info_ptr; + const void *exit_addr = NULL; + ir_ref ref; + + if (add_ref_guard || add_type_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + } + + if (add_ref_guard) { + ref = jit_Z_TYPE(jit, var_addr); + ir_GUARD(ir_EQ(ref, ir_CONST_U8(IS_REFERENCE)), ir_CONST_ADDR(exit_addr)); + } + if (opline->opcode == ZEND_INIT_METHOD_CALL && opline->op1_type == IS_VAR) { + /* Hack: Convert reference to regular value to simplify JIT code for INIT_METHOD_CALL */ + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_unref_helper), + jit_ZVAL_ADDR(jit, var_addr)); + *var_addr_ptr = var_addr; + } else { + ref = jit_Z_PTR(jit, var_addr); + ref = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + *var_addr_ptr = var_addr; + } + + if (var_type != IS_UNKNOWN) { + var_type &= ~(IS_TRACE_REFERENCE|IS_TRACE_INDIRECT|IS_TRACE_PACKED); + } + if (add_type_guard + && var_type != IS_UNKNOWN + && (var_info & (MAY_BE_ANY|MAY_BE_UNDEF)) != (1 << var_type)) { + ref = jit_Z_TYPE(jit, var_addr); + ir_GUARD(ir_EQ(ref, ir_CONST_U8(var_type)), ir_CONST_ADDR(exit_addr)); + + ZEND_ASSERT(var_info & (1 << var_type)); + if (var_type < IS_STRING) { + var_info = (1 << var_type); + } else if (var_type != IS_ARRAY) { + var_info = (1 << var_type) | (var_info & (MAY_BE_RC1|MAY_BE_RCN)); + } else { + var_info = MAY_BE_ARRAY | (var_info & (MAY_BE_ARRAY_OF_ANY|MAY_BE_ARRAY_OF_REF|MAY_BE_ARRAY_KEY_ANY|MAY_BE_RC1|MAY_BE_RCN)); + } + + *var_info_ptr = var_info; + } else { + var_info &= ~MAY_BE_REF; + *var_info_ptr = var_info; + } + *var_info_ptr |= MAY_BE_GUARD; /* prevent generation of specialized zval dtor */ + + return 1; +} + +static bool zend_jit_fetch_indirect_var(zend_jit_ctx *jit, const zend_op *opline, uint8_t var_type, uint32_t *var_info_ptr, zend_jit_addr *var_addr_ptr, bool add_indirect_guard) +{ + zend_jit_addr var_addr = *var_addr_ptr; + uint32_t var_info = *var_info_ptr; + int32_t exit_point; + const void *exit_addr; + ir_ref ref = IR_UNUSED; + + if (add_indirect_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, var_addr, IS_INDIRECT, exit_addr); + ref = jit_Z_PTR(jit, var_addr); + } else { + /* This LOAD of INDIRECT VAR, stored by the previous FETCH_(DIM/OBJ)_W, + * is eliminated by store forwarding (S2L) */ + ref = jit_Z_PTR(jit, var_addr); + } + *var_info_ptr &= ~MAY_BE_INDIRECT; + var_addr = ZEND_ADDR_REF_ZVAL(ref); + *var_addr_ptr = var_addr; + + if (var_type != IS_UNKNOWN) { + var_type &= ~(IS_TRACE_INDIRECT|IS_TRACE_PACKED); + } + if (!(var_type & IS_TRACE_REFERENCE) + && var_type != IS_UNKNOWN + && (var_info & (MAY_BE_ANY|MAY_BE_UNDEF)) != (1 << var_type)) { + exit_point = zend_jit_trace_get_exit_point(opline, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, var_addr, var_type, exit_addr); + + //var_info = zend_jit_trace_type_to_info_ex(var_type, var_info); + ZEND_ASSERT(var_info & (1 << var_type)); + if (var_type < IS_STRING) { + var_info = (1 << var_type); + } else if (var_type != IS_ARRAY) { + var_info = (1 << var_type) | (var_info & (MAY_BE_RC1|MAY_BE_RCN)); + } else { + var_info = MAY_BE_ARRAY | (var_info & (MAY_BE_ARRAY_OF_ANY|MAY_BE_ARRAY_OF_REF|MAY_BE_ARRAY_KEY_ANY|MAY_BE_RC1|MAY_BE_RCN)); + } + + *var_info_ptr = var_info; + } + + return 1; +} + +static int zend_jit_trace_handler(zend_jit_ctx *jit, const zend_op_array *op_array, const zend_op *opline, int may_throw, zend_jit_trace_rec *trace) +{ + zend_jit_op_array_trace_extension *jit_extension = + (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); + size_t offset = jit_extension->offset; + const void *handler = + (zend_vm_opcode_handler_t)ZEND_OP_TRACE_INFO(opline, offset)->call_handler; + ir_ref ref; + + zend_jit_set_ip(jit, opline); + if (GCC_GLOBAL_REGS) { + ir_CALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + ref = jit_FP(jit); + ref = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(handler), ref); + } + if (may_throw + && opline->opcode != ZEND_RETURN + && opline->opcode != ZEND_RETURN_BY_REF) { + zend_jit_check_exception(jit); + } + + while (trace->op != ZEND_JIT_TRACE_VM && trace->op != ZEND_JIT_TRACE_END) { + trace++; + } + + if (!GCC_GLOBAL_REGS + && (trace->op != ZEND_JIT_TRACE_END || trace->stop != ZEND_JIT_TRACE_STOP_RETURN)) { + if (opline->opcode == ZEND_RETURN || + opline->opcode == ZEND_RETURN_BY_REF || + opline->opcode == ZEND_DO_UCALL || + opline->opcode == ZEND_DO_FCALL_BY_NAME || + opline->opcode == ZEND_DO_FCALL || + opline->opcode == ZEND_GENERATOR_CREATE) { + + ir_ref addr = jit_EG(current_execute_data); + + jit_STORE_FP(jit, ir_LOAD_A(addr)); + } + } + + if (zend_jit_trace_may_exit(op_array, opline)) { + if (opline->opcode == ZEND_RETURN || + opline->opcode == ZEND_RETURN_BY_REF || + opline->opcode == ZEND_GENERATOR_CREATE) { + + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + if (trace->op != ZEND_JIT_TRACE_END || + (trace->stop != ZEND_JIT_TRACE_STOP_RETURN && + trace->stop != ZEND_JIT_TRACE_STOP_INTERPRETER)) { + /* this check may be handled by the following OPLINE guard or jmp [IP] */ + ir_GUARD(ir_NE(jit_IP(jit), ir_CONST_ADDR(zend_jit_halt_op)), + jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } + } else if (GCC_GLOBAL_REGS) { + ir_GUARD(jit_IP(jit), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } else { + ir_GUARD(ir_GE(ref, ir_CONST_I32(0)), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } + } else if (opline->opcode == ZEND_EXIT || + opline->opcode == ZEND_GENERATOR_RETURN || + opline->opcode == ZEND_YIELD || + opline->opcode == ZEND_YIELD_FROM) { + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_trace_halt)); + ir_BEGIN(IR_UNUSED); /* unreachable block */ + } + if (trace->op != ZEND_JIT_TRACE_END || + (trace->stop != ZEND_JIT_TRACE_STOP_RETURN && + trace->stop != ZEND_JIT_TRACE_STOP_INTERPRETER)) { + + const zend_op *next_opline = trace->opline; + const zend_op *exit_opline = NULL; + uint32_t exit_point; + const void *exit_addr; + uint32_t old_info = 0; + uint32_t old_res_info = 0; + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + + if (zend_is_smart_branch(opline)) { + bool exit_if_true = 0; + exit_opline = zend_jit_trace_get_exit_opline(trace, opline + 1, &exit_if_true); + } else { + switch (opline->opcode) { + case ZEND_JMPZ: + case ZEND_JMPNZ: + case ZEND_JMPZ_EX: + case ZEND_JMPNZ_EX: + case ZEND_JMP_SET: + case ZEND_COALESCE: + case ZEND_JMP_NULL: + case ZEND_FE_RESET_R: + case ZEND_FE_RESET_RW: + exit_opline = (trace->opline == opline + 1) ? + OP_JMP_ADDR(opline, opline->op2) : + opline + 1; + break; + case ZEND_FE_FETCH_R: + case ZEND_FE_FETCH_RW: + exit_opline = (trace->opline == opline + 1) ? + ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value) : + opline + 1; + break; + + } + } + + switch (opline->opcode) { + case ZEND_FE_FETCH_R: + case ZEND_FE_FETCH_RW: + if (opline->op2_type != IS_UNUSED) { + old_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op2.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op2.var), IS_UNKNOWN, 1); + } + break; + case ZEND_BIND_INIT_STATIC_OR_JMP: + if (opline->op1_type == IS_CV) { + old_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), IS_UNKNOWN, 1); + } + break; + } + if (opline->result_type == IS_VAR || opline->result_type == IS_TMP_VAR) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_UNKNOWN, 1); + } + exit_point = zend_jit_trace_get_exit_point(exit_opline, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (opline->result_type == IS_VAR || opline->result_type == IS_TMP_VAR) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } + switch (opline->opcode) { + case ZEND_FE_FETCH_R: + case ZEND_FE_FETCH_RW: + if (opline->op2_type != IS_UNUSED) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op2.var), old_info); + } + break; + case ZEND_BIND_INIT_STATIC_OR_JMP: + if (opline->op1_type == IS_CV) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var), old_info); + } + break; + } + + if (!exit_addr) { + return 0; + } + ir_GUARD(jit_CMP_IP(jit, IR_EQ, next_opline), ir_CONST_ADDR(exit_addr)); + } + } + + zend_jit_set_last_valid_opline(jit, trace->opline); + + return 1; +} + +static int zend_jit_deoptimizer_start(zend_jit_ctx *jit, + zend_string *name, + uint32_t trace_num, + uint32_t exit_num) +{ + zend_jit_init_ctx(jit, (zend_jit_vm_kind == ZEND_VM_KIND_CALL) ? 0 : IR_START_BR_TARGET); + + jit->ctx.spill_base = ZREG_FP; + + jit->op_array = NULL; + jit->ssa = NULL; + jit->name = zend_string_copy(name); + + jit->ctx.flags |= IR_SKIP_PROLOGUE; + + return 1; +} + +static int zend_jit_trace_start(zend_jit_ctx *jit, + const zend_op_array *op_array, + zend_ssa *ssa, + zend_string *name, + uint32_t trace_num, + zend_jit_trace_info *parent, + uint32_t exit_num) +{ + zend_jit_init_ctx(jit, (zend_jit_vm_kind == ZEND_VM_KIND_CALL) ? 0 : IR_START_BR_TARGET); + + jit->ctx.spill_base = ZREG_FP; + + jit->op_array = NULL; + jit->current_op_array = op_array; + jit->ssa = ssa; + jit->name = zend_string_copy(name); + + if (!GCC_GLOBAL_REGS) { + if (!parent) { + ir_ref ref = ir_PARAM(IR_ADDR, "execute_data", 1); + jit_STORE_FP(jit, ref); + jit->ctx.flags |= IR_FASTCALL_FUNC; + } + } + + if (parent) { + jit->ctx.flags |= IR_SKIP_PROLOGUE; + } + + if (parent) { + int i; + int parent_vars_count = parent->exit_info[exit_num].stack_size; + zend_jit_trace_stack *parent_stack = + parent->stack_map + + parent->exit_info[exit_num].stack_offset; + + /* prevent clobbering of registers used for deoptimization */ + for (i = 0; i < parent_vars_count; i++) { + if (STACK_FLAGS(parent_stack, i) != ZREG_CONST + && STACK_REG(parent_stack, i) != ZREG_NONE) { + int32_t reg = STACK_REG(parent_stack, i); + ir_type type; + + if (STACK_FLAGS(parent_stack, i) == ZREG_ZVAL_COPY) { + type = IR_ADDR; + } else if (STACK_TYPE(parent_stack, i) == IS_LONG) { + type = IR_LONG; + } else if (STACK_TYPE(parent_stack, i) == IS_DOUBLE) { + type = IR_DOUBLE; + } else { + ZEND_UNREACHABLE(); + } + if (ssa && ssa->vars[i].no_val) { + /* pass */ + } else { + ir_ref ref = ir_RLOAD(type, reg); + + if (STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) { + /* op3 is used as a flag that the value is already stored in memory. + * In case the IR framework decides to spill the result of IR_LOAD, + * it doesn't have to store the value once again. + * + * See: insn->op3 check in ir_emit_rload() + */ + ir_set_op(&jit->ctx, ref, 3, EX_NUM_TO_VAR(i)); + } + } + } + } + } + + if (parent && parent->exit_info[exit_num].flags & ZEND_JIT_EXIT_METHOD_CALL) { + ZEND_ASSERT(parent->exit_info[exit_num].poly_func_reg >= 0 && parent->exit_info[exit_num].poly_this_reg >= 0); + ir_RLOAD_A(parent->exit_info[exit_num].poly_func_reg); + ir_RLOAD_A(parent->exit_info[exit_num].poly_this_reg); + } + + ir_STORE(jit_EG(jit_trace_num), ir_CONST_U32(trace_num)); + + return 1; +} + +static int zend_jit_trace_begin_loop(zend_jit_ctx *jit) +{ + return ir_LOOP_BEGIN(ir_END()); +} + +static void zend_jit_trace_gen_phi(zend_jit_ctx *jit, zend_ssa_phi *phi) +{ + int dst_var = phi->ssa_var; + int src_var = phi->sources[0]; + ir_ref ref; + + ZEND_ASSERT(!(jit->ra[dst_var].flags & ZREG_LOAD)); + ZEND_ASSERT(jit->ra[src_var].ref != IR_UNUSED && jit->ra[src_var].ref != IR_NULL); + + ref = ir_PHI_2( + (jit->ssa->var_info[src_var].type & MAY_BE_LONG) ? IR_LONG : IR_DOUBLE, + zend_jit_use_reg(jit, ZEND_ADDR_REG(src_var)), IR_UNUSED); + + src_var = phi->sources[1]; + ZEND_ASSERT(jit->ra[src_var].ref == IR_NULL); + jit->ra[src_var].flags |= ZREG_FORWARD; + + zend_jit_def_reg(jit, ZEND_ADDR_REG(dst_var), ref); +} + +static int zend_jit_trace_end_loop(zend_jit_ctx *jit, int loop_ref, const void *timeout_exit_addr) +{ + if (timeout_exit_addr) { + zend_jit_check_timeout(jit, NULL, timeout_exit_addr); + } + ZEND_ASSERT(jit->ctx.ir_base[loop_ref].op2 == IR_UNUSED); + ir_MERGE_SET_OP(loop_ref, 2, ir_LOOP_END()); + return 1; +} + +static int zend_jit_trace_return(zend_jit_ctx *jit, bool original_handler, const zend_op *opline) +{ + if (GCC_GLOBAL_REGS) { + if (!original_handler) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_TAILCALL(IR_VOID, zend_jit_orig_opline_handler(jit)); + } + } else { + if (original_handler) { + ir_ref ref; + ir_ref addr = zend_jit_orig_opline_handler(jit); + +#if defined(IR_TARGET_X86) + addr = ir_CAST_FC_FUNC(addr); +#endif + ref = ir_CALL_1(IR_I32, addr, jit_FP(jit)); + if (opline && + (opline->opcode == ZEND_RETURN + || opline->opcode == ZEND_RETURN_BY_REF + || opline->opcode == ZEND_GENERATOR_RETURN + || opline->opcode == ZEND_GENERATOR_CREATE + || opline->opcode == ZEND_YIELD + || opline->opcode == ZEND_YIELD_FROM)) { + ir_RETURN(ref); + } + } + ir_RETURN(ir_CONST_I32(2)); // ZEND_VM_LEAVE + } + return 1; +} + +static int zend_jit_link_side_trace(const void *code, size_t size, uint32_t jmp_table_size, uint32_t exit_num, const void *addr) +{ + return ir_patch(code, size, jmp_table_size, zend_jit_trace_get_exit_addr(exit_num), addr); +} + +static int zend_jit_trace_link_to_root(zend_jit_ctx *jit, zend_jit_trace_info *t, const void *timeout_exit_addr) +{ + const void *link_addr; + + /* Skip prologue. */ + ZEND_ASSERT(zend_jit_trace_prologue_size != (size_t)-1); + link_addr = (const void*)((const char*)t->code_start + zend_jit_trace_prologue_size); + + if (timeout_exit_addr) { + zend_jit_check_timeout(jit, NULL, timeout_exit_addr); + } + ir_IJMP(ir_CONST_ADDR(link_addr)); + + return 1; +} + +static bool zend_jit_opline_supports_reg(const zend_op_array *op_array, zend_ssa *ssa, const zend_op *opline, const zend_ssa_op *ssa_op, zend_jit_trace_rec *trace) +{ + uint32_t op1_info, op2_info; + + switch (opline->opcode) { + case ZEND_SEND_VAR: + case ZEND_SEND_VAL: + case ZEND_SEND_VAL_EX: + return (opline->op2_type != IS_CONST) && (opline->opcode != ZEND_SEND_VAL_EX || opline->op2.num <= MAX_ARG_FLAG_NUM); + case ZEND_QM_ASSIGN: + case ZEND_IS_SMALLER: + case ZEND_IS_SMALLER_OR_EQUAL: + case ZEND_IS_EQUAL: + case ZEND_IS_NOT_EQUAL: + case ZEND_IS_IDENTICAL: + case ZEND_IS_NOT_IDENTICAL: + case ZEND_CASE: + return 1; + case ZEND_RETURN: + return (op_array->type != ZEND_EVAL_CODE && op_array->function_name); + case ZEND_ASSIGN: + return (opline->op1_type == IS_CV); + case ZEND_ADD: + case ZEND_SUB: + case ZEND_MUL: + op1_info = OP1_INFO(); + op2_info = OP2_INFO(); + return !(op1_info & MAY_BE_UNDEF) + && !(op2_info & MAY_BE_UNDEF) + && (op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) + && (op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE)); + case ZEND_BW_OR: + case ZEND_BW_AND: + case ZEND_BW_XOR: + case ZEND_SL: + case ZEND_SR: + case ZEND_MOD: + op1_info = OP1_INFO(); + op2_info = OP2_INFO(); + return (op1_info & MAY_BE_LONG) + && (op2_info & MAY_BE_LONG); + case ZEND_PRE_INC: + case ZEND_PRE_DEC: + case ZEND_POST_INC: + case ZEND_POST_DEC: + op1_info = OP1_INFO(); + return opline->op1_type == IS_CV + && (op1_info & MAY_BE_LONG) + && !(op1_info & MAY_BE_REF); + case ZEND_STRLEN: + op1_info = OP1_INFO(); + return (opline->op1_type & (IS_CV|IS_CONST)) + && (op1_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) == MAY_BE_STRING; + case ZEND_COUNT: + op1_info = OP1_INFO(); + return (opline->op1_type & (IS_CV|IS_CONST)) + && (op1_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) == MAY_BE_ARRAY; + case ZEND_JMPZ: + case ZEND_JMPNZ: + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE) { + if (!ssa->cfg.map) { + return 0; + } + if (opline > op_array->opcodes + ssa->cfg.blocks[ssa->cfg.map[opline-op_array->opcodes]].start && + ((opline-1)->result_type & (IS_SMART_BRANCH_JMPZ|IS_SMART_BRANCH_JMPNZ)) != 0) { + return 0; + } + } + ZEND_FALLTHROUGH; + case ZEND_BOOL: + case ZEND_BOOL_NOT: + case ZEND_JMPZ_EX: + case ZEND_JMPNZ_EX: + return 1; + case ZEND_FETCH_CONSTANT: + return 1; + case ZEND_FETCH_DIM_R: + op1_info = OP1_INFO(); + op2_info = OP2_INFO(); + if (trace + && trace->op1_type != IS_UNKNOWN + && (trace->op1_type & ~(IS_TRACE_REFERENCE|IS_TRACE_INDIRECT|IS_TRACE_PACKED)) == IS_ARRAY) { + op1_info &= ~((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_ARRAY); + } + return ((op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) && + (!(opline->op1_type & (IS_TMP_VAR|IS_VAR)) || !(op1_info & MAY_BE_RC1)) && + (((op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_LONG) || + (((op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_STRING) && + (!(opline->op2_type & (IS_TMP_VAR|IS_VAR)) || !(op2_info & MAY_BE_RC1)))); + } + return 0; +} + +static bool zend_jit_var_supports_reg(zend_ssa *ssa, int var) +{ + if (ssa->vars[var].no_val) { + /* we don't need the value */ + return 0; + } + + if (!(JIT_G(opt_flags) & ZEND_JIT_REG_ALLOC_GLOBAL)) { + /* Disable global register allocation, + * register allocation for SSA variables connected through Phi functions + */ + if (ssa->vars[var].definition_phi) { + return 0; + } + if (ssa->vars[var].phi_use_chain) { + zend_ssa_phi *phi = ssa->vars[var].phi_use_chain; + do { + if (!ssa->vars[phi->ssa_var].no_val) { + return 0; + } + phi = zend_ssa_next_use_phi(ssa, var, phi); + } while (phi); + } + } + + if (((ssa->var_info[var].type & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)) != MAY_BE_DOUBLE) && + ((ssa->var_info[var].type & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)) != MAY_BE_LONG)) { + /* bad type */ + return 0; + } + + return 1; +} + +static bool zend_jit_may_be_in_reg(const zend_op_array *op_array, zend_ssa *ssa, int var) +{ + if (!zend_jit_var_supports_reg(ssa, var)) { + return 0; + } + + if (ssa->vars[var].definition >= 0) { + uint32_t def = ssa->vars[var].definition; + if (!zend_jit_opline_supports_reg(op_array, ssa, op_array->opcodes + def, ssa->ops + def, NULL)) { + return 0; + } + } + + if (ssa->vars[var].use_chain >= 0) { + int use = ssa->vars[var].use_chain; + + do { + if (!zend_ssa_is_no_val_use(op_array->opcodes + use, ssa->ops + use, var) && + !zend_jit_opline_supports_reg(op_array, ssa, op_array->opcodes + use, ssa->ops + use, NULL)) { + return 0; + } + /* Quick workaround to disable register allocation for unsupported operand */ + // TODO: Find a general solution ??? + if (op_array->opcodes[use].opcode == ZEND_FETCH_DIM_R) { + return 0; + } + use = zend_ssa_next_use(ssa->ops, var, use); + } while (use >= 0); + } + + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE) { + int def_block, use_block, b, use, j; + zend_basic_block *bb; + zend_ssa_phi *p; + bool ret = 1; + zend_worklist worklist; + ALLOCA_FLAG(use_heap) + + /* Check if live range is split by ENTRY block */ + if (ssa->vars[var].definition >= 0) { + def_block =ssa->cfg.map[ssa->vars[var].definition]; + } else { + ZEND_ASSERT(ssa->vars[var].definition_phi); + def_block = ssa->vars[var].definition_phi->block; + } + + ZEND_WORKLIST_ALLOCA(&worklist, ssa->cfg.blocks_count, use_heap); + + if (ssa->vars[var].use_chain >= 0) { + use = ssa->vars[var].use_chain; + do { + use_block = ssa->cfg.map[use]; + if (use_block != def_block) { + zend_worklist_push(&worklist, use_block); + } + use = zend_ssa_next_use(ssa->ops, var, use); + } while (use >= 0); + } + + p = ssa->vars[var].phi_use_chain; + while (p) { + use_block = p->block; + if (use_block != def_block) { + bb = &ssa->cfg.blocks[use_block]; + for (j = 0; j < bb->predecessors_count; j++) { + if (p->sources[j] == var) { + use_block = ssa->cfg.predecessors[bb->predecessor_offset + j]; + if (use_block != def_block) { + zend_worklist_push(&worklist, use_block); + } + } + } + } + p = zend_ssa_next_use_phi(ssa, var, p); + } + + while (zend_worklist_len(&worklist) != 0) { + b = zend_worklist_pop(&worklist); + bb = &ssa->cfg.blocks[b]; + if (bb->flags & (ZEND_BB_ENTRY|ZEND_BB_RECV_ENTRY)) { + ret = 0; + break; + } + for (j = 0; j < bb->predecessors_count; j++) { + b = ssa->cfg.predecessors[bb->predecessor_offset + j]; + if (b != def_block) { + zend_worklist_push(&worklist, b); + } + } + } + + ZEND_WORKLIST_FREE_ALLOCA(&worklist, use_heap); + + return ret; + } + + return 1; +} + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * indent-tabs-mode: t + * End: + */ diff --git a/ext/opcache/jit/zend_jit_trace.c b/ext/opcache/jit/zend_jit_trace.c index c3ac9fb4a74..85fcb657ffe 100644 --- a/ext/opcache/jit/zend_jit_trace.c +++ b/ext/opcache/jit/zend_jit_trace.c @@ -86,6 +86,7 @@ static int zend_jit_trace_startup(bool reattached) return SUCCESS; } +#ifndef ZEND_JIT_IR static const void *zend_jit_trace_allocate_exit_group(uint32_t n) { dasm_State* dasm_state = NULL; @@ -115,6 +116,7 @@ static const void *zend_jit_trace_allocate_exit_group(uint32_t n) return entry; } +#endif static const void *zend_jit_trace_allocate_exit_point(uint32_t n) { @@ -147,7 +149,22 @@ static const void *zend_jit_trace_get_exit_addr(uint32_t n) ((n % ZEND_JIT_EXIT_POINTS_PER_GROUP) * ZEND_JIT_EXIT_POINTS_SPACING)); } -#if ZEND_JIT_TARGET_ARM64 +#ifdef ZEND_JIT_IR +static uint32_t zend_jit_exit_point_by_addr(void *addr) +{ + uint32_t n = (ZEND_JIT_EXIT_NUM + (ZEND_JIT_EXIT_POINTS_PER_GROUP - 1)) / ZEND_JIT_EXIT_POINTS_PER_GROUP; + uint32_t i; + + for (i = 0; i < n; i++) { + if ((char*)addr >= (char*)zend_jit_exit_groups[i] + && (char*)addr <= (char*)zend_jit_exit_groups[i] + ((ZEND_JIT_EXIT_POINTS_PER_GROUP - 1) * ZEND_JIT_EXIT_POINTS_SPACING)) { + return (i * ZEND_JIT_EXIT_POINTS_PER_GROUP) + + (((char*)addr - (char*)zend_jit_exit_groups[i]) / ZEND_JIT_EXIT_POINTS_SPACING); + } + } + return (uint32_t)-1; +} +#elif ZEND_JIT_TARGET_ARM64 static zend_jit_trace_info *zend_jit_get_current_trace_info(void) { return &zend_jit_traces[ZEND_JIT_TRACE_NUM]; @@ -191,7 +208,12 @@ static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t do { if (STACK_TYPE(stack, stack_size-1) != IS_UNKNOWN || STACK_MEM_TYPE(stack, stack_size-1) != IS_UNKNOWN - || STACK_REG(stack, stack_size-1) != ZREG_NONE) { +#ifndef ZEND_JIT_IR + || STACK_REG(stack, stack_size-1) != ZREG_NONE +#else + || STACK_REF(stack, stack_size-1) != IR_UNUSED +#endif + ) { break; } stack_size--; @@ -203,7 +225,11 @@ static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t } /* Try to reuse exit points */ - if (to_opline != NULL && t->exit_count > 0) { + if (to_opline != NULL +#ifdef ZEND_JIT_IR + && !(flags & ZEND_JIT_EXIT_METHOD_CALL) +#endif + && t->exit_count > 0) { uint32_t i = t->exit_count; do { @@ -211,7 +237,9 @@ static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t if (stack_size == 0 || (t->exit_info[i].stack_size >= stack_size && memcmp(t->stack_map + t->exit_info[i].stack_offset, stack, stack_size * sizeof(zend_jit_trace_stack)) == 0)) { +#ifndef ZEND_JIT_IR stack_offset = t->exit_info[i].stack_offset; +#endif if (t->exit_info[i].opline == to_opline && t->exit_info[i].flags == flags && t->exit_info[i].stack_size == stack_size) { @@ -236,6 +264,12 @@ static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t t->exit_info[exit_point].flags = flags; t->exit_info[exit_point].stack_size = stack_size; t->exit_info[exit_point].stack_offset = stack_offset; +#ifdef ZEND_JIT_IR + t->exit_info[exit_point].poly_func_ref = 0; + t->exit_info[exit_point].poly_this_ref = 0; + t->exit_info[exit_point].poly_func_reg = ZREG_NONE; + t->exit_info[exit_point].poly_this_reg = ZREG_NONE; +#endif } return exit_point; @@ -432,7 +466,7 @@ static zend_always_inline void zend_jit_trace_add_op_guard(zend_ssa #define CHECK_OP_TRACE_TYPE(_var, _ssa_var, op_info, op_type) do { \ if (op_type != IS_UNKNOWN) { \ if ((op_info & MAY_BE_GUARD) != 0) { \ - if (!zend_jit_type_guard(&dasm_state, opline, _var, op_type)) { \ + if (!zend_jit_type_guard(&ctx, opline, _var, op_type)) { \ goto jit_failure; \ } \ if (ssa->vars[_ssa_var].alias != NO_ALIAS) { \ @@ -821,11 +855,88 @@ static int zend_jit_trace_add_ret_phis(zend_jit_trace_rec *trace_buffer, uint32_ static int zend_jit_trace_copy_ssa_var_info(const zend_op_array *op_array, const zend_ssa *ssa, const zend_op **tssa_opcodes, zend_ssa *tssa, int ssa_var) { - int var, use; + int var, use, def, src; zend_ssa_op *op; - zend_ssa_var_info *info; - unsigned int no_val; - zend_ssa_alias_kind alias; + uint32_t n; + + if (tssa->vars[ssa_var].definition_phi) { + uint32_t b = ssa->cfg.map[tssa_opcodes[0] - op_array->opcodes]; + zend_basic_block *bb = ssa->cfg.blocks + b; + + if (bb->flags & ZEND_BB_LOOP_HEADER) { + zend_ssa_phi *phi = ssa->blocks[b].phis; + zend_ssa_phi *pi = NULL; + + var = tssa->vars[ssa_var].var; + while (phi) { + if (ssa->vars[phi->ssa_var].var == var) { + if (phi->pi >= 0) { + pi = phi; + } else { + src = phi->ssa_var; + goto copy_info; + } + } + phi = phi->next; + } + if (pi) { + src = pi->ssa_var; + goto copy_info; + } + + while (bb->idom >= 0) { + b = bb->idom; + bb = ssa->cfg.blocks + b; + + for (n = bb->len, op = ssa->ops + bb->start + n; n > 0; n--) { + op--; + if (op->result_def >= 0 && ssa->vars[op->result_def].var == var) { + src = op->result_def; + goto copy_info; + } else if (op->op2_def >= 0 && ssa->vars[op->op2_def].var == var) { + src = op->op2_def; + goto copy_info; + } else if (op->op1_def >= 0 && ssa->vars[op->op1_def].var == var) { + src = op->op1_def; + goto copy_info; + } + } + + phi = ssa->blocks[b].phis; + zend_ssa_phi *pi = NULL; + while (phi) { + if (ssa->vars[phi->ssa_var].var == var) { + if (phi->pi >= 0) { + pi = phi; + } else { + src = phi->ssa_var; + goto copy_info; + } + } + phi = phi->next; + } + if (pi) { + src = pi->ssa_var; + goto copy_info; + } + } + } + } else if (tssa->vars[ssa_var].definition >= 0) { + def = tssa->vars[ssa_var].definition; + ZEND_ASSERT((tssa_opcodes[def] - op_array->opcodes) < op_array->last); + op = ssa->ops + (tssa_opcodes[def] - op_array->opcodes); + if (tssa->ops[def].op1_def == ssa_var) { + src = op->op1_def; + } else if (tssa->ops[def].op2_def == ssa_var) { + src = op->op2_def; + } else if (tssa->ops[def].result_def == ssa_var) { + src = op->result_def; + } else { + assert(0); + return 0; + } + goto copy_info; + } if (tssa->vars[ssa_var].phi_use_chain) { // TODO: this may be incorrect ??? @@ -838,27 +949,24 @@ static int zend_jit_trace_copy_ssa_var_info(const zend_op_array *op_array, const ZEND_ASSERT((tssa_opcodes[use] - op_array->opcodes) < op_array->last); op = ssa->ops + (tssa_opcodes[use] - op_array->opcodes); if (tssa->ops[use].op1_use == var) { - no_val = ssa->vars[op->op1_use].no_val; - alias = ssa->vars[op->op1_use].alias; - info = ssa->var_info + op->op1_use; + src = op->op1_use; } else if (tssa->ops[use].op2_use == var) { - no_val = ssa->vars[op->op2_use].no_val; - alias = ssa->vars[op->op2_use].alias; - info = ssa->var_info + op->op2_use; + src = op->op2_use; } else if (tssa->ops[use].result_use == var) { - no_val = ssa->vars[op->result_use].no_val; - alias = ssa->vars[op->result_use].alias; - info = ssa->var_info + op->result_use; + src = op->result_use; } else { assert(0); return 0; } - tssa->vars[ssa_var].no_val = no_val; - tssa->vars[ssa_var].alias = alias; - memcpy(&tssa->var_info[ssa_var], info, sizeof(zend_ssa_var_info)); - return 1; + goto copy_info; } return 0; + +copy_info: + tssa->vars[ssa_var].no_val = ssa->vars[src].no_val; + tssa->vars[ssa_var].alias = ssa->vars[src].alias; + memcpy(&tssa->var_info[ssa_var], &ssa->var_info[src], sizeof(zend_ssa_var_info)); + return 1; } static void zend_jit_trace_propagate_range(const zend_op_array *op_array, const zend_op **tssa_opcodes, zend_ssa *tssa, int ssa_var) @@ -1636,9 +1744,6 @@ static zend_ssa *zend_jit_trace_build_tssa(zend_jit_trace_rec *trace_buffer, uin TRACE_FRAME_INIT(frame, op_array, 0, 0); TRACE_FRAME_SET_RETURN_SSA_VAR(frame, -1); frame->used_stack = 0; - for (i = 0; i < op_array->last_var + op_array->T; i++) { - SET_STACK_TYPE(frame->stack, i, IS_UNKNOWN, 1); - } memset(&return_value_info, 0, sizeof(return_value_info)); if (trace_buffer->stop == ZEND_JIT_TRACE_STOP_LOOP) { @@ -2325,9 +2430,6 @@ propagate_arg: TRACE_FRAME_INIT(call, op_array, 0, 0); call->used_stack = 0; top = zend_jit_trace_call_frame(top, op_array); - for (i = 0; i < op_array->last_var + op_array->T; i++) { - SET_STACK_TYPE(call->stack, i, IS_UNKNOWN, 1); - } } else { ZEND_ASSERT(&call->func->op_array == op_array); } @@ -2454,9 +2556,6 @@ propagate_arg: TRACE_FRAME_INIT(frame, op_array, 0, 0); TRACE_FRAME_SET_RETURN_SSA_VAR(frame, -1); frame->used_stack = 0; - for (i = 0; i < op_array->last_var + op_array->T; i++) { - SET_STACK_TYPE(frame->stack, i, IS_UNKNOWN, 1); - } } } else if (p->op == ZEND_JIT_TRACE_INIT_CALL) { @@ -2661,25 +2760,53 @@ propagate_arg: return tssa; } +#ifndef ZEND_JIT_IR +# define RA_HAS_IVAL(var) (start[var] >= 0) +# define RA_IVAL_FLAGS(var) flags[var] +# define RA_IVAL_START(var, line) do {start[var] = (line);} while (0) +# define RA_IVAL_END(var, line) do {end[var] = (line);} while (0) +# define RA_IVAL_CLOSE(var, line) zend_jit_close_var(stack, var, start, end, flags, line) +# define RA_IVAL_DEL(var) do {start[var] = end[var] = -1;} while (0) +# define RA_HAS_REG(var) (ra[var] != NULL) +# define RA_REG_FLAGS(var) ra[var]->flags +# define RA_REG_DEL(var) do {ra[var] = NULL;} while (0) + static void zend_jit_close_var(zend_jit_trace_stack *stack, uint32_t n, int *start, int *end, uint8_t *flags, int line) { int32_t var = STACK_VAR(stack, n); - if (var >= 0 && start[var] >= 0 && !(flags[var] & ZREG_LAST_USE)) { + if (var >= 0 && RA_HAS_IVAL(var) && !(RA_IVAL_FLAGS(var) & ZREG_LAST_USE)) { // TODO: shrink interval to last side exit ???? - end[var] = line; + RA_IVAL_END(var, line); } } +#else +# define RA_HAS_IVAL(var) (ra[var].ref != 0) +# define RA_IVAL_FLAGS(var) ra[var].flags +# define RA_IVAL_START(var, line) do {ra[var].ref = IR_NULL;} while (0) +# define RA_IVAL_END(var, line) +# define RA_IVAL_CLOSE(var, line) +# define RA_IVAL_DEL(var) do {ra[var].ref = IR_UNUSED;} while (0) +# define RA_HAS_REG(var) (ra[var].ref != 0) +# define RA_REG_FLAGS(var) ra[var].flags +# define RA_REG_START(var, line) do {ra[var].ref = IR_NULL;} while (0) +# define RA_REG_DEL(var) do {ra[var].ref = IR_UNUSED;} while (0) +#endif + +#ifndef ZEND_JIT_IR static void zend_jit_trace_use_var(int line, int var, int def, int use_chain, int *start, int *end, uint8_t *flags, const zend_ssa *ssa, const zend_op **ssa_opcodes, const zend_op_array *op_array, const zend_ssa *op_array_ssa) +#else +static void zend_jit_trace_use_var(int line, int var, int def, int use_chain, zend_jit_reg_var *ra, const zend_ssa *ssa, const zend_op **ssa_opcodes, const zend_op_array *op_array, const zend_ssa *op_array_ssa) +#endif { - ZEND_ASSERT(start[var] >= 0); - ZEND_ASSERT(!(flags[var] & ZREG_LAST_USE)); - end[var] = line; + ZEND_ASSERT(RA_HAS_IVAL(var)); + ZEND_ASSERT(!(RA_IVAL_FLAGS(var) & ZREG_LAST_USE)); + RA_IVAL_END(var, line); if (def >= 0) { - flags[var] |= ZREG_LAST_USE; - } else if (use_chain < 0 && (flags[var] & (ZREG_LOAD|ZREG_STORE))) { - flags[var] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; + } else if (use_chain < 0 && (RA_IVAL_FLAGS(var) & (ZREG_LOAD|ZREG_STORE))) { + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; } else if (use_chain >= 0 && !zend_ssa_is_no_val_use(ssa_opcodes[use_chain], ssa->ops + use_chain, var)) { /* pass */ } else if (op_array_ssa->vars) { @@ -2687,21 +2814,25 @@ static void zend_jit_trace_use_var(int line, int var, int def, int use_chain, in if (ssa->ops[line].op1_use == var) { if (zend_ssa_is_last_use(op_array, op_array_ssa, op_array_ssa->ops[use].op1_use, use)) { - flags[var] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; } } else if (ssa->ops[line].op2_use == var) { if (zend_ssa_is_last_use(op_array, op_array_ssa, op_array_ssa->ops[use].op2_use, use)) { - flags[var] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; } } else if (ssa->ops[line].result_use == var) { if (zend_ssa_is_last_use(op_array, op_array_ssa, op_array_ssa->ops[use].result_use, use)) { - flags[var] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; } } } } +#ifndef ZEND_JIT_IR static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace_rec *trace_buffer, zend_ssa *ssa, uint32_t parent_trace, uint32_t exit_num) +#else +static zend_jit_reg_var* zend_jit_trace_allocate_registers(zend_jit_trace_rec *trace_buffer, zend_ssa *ssa, uint32_t parent_trace, uint32_t exit_num) +#endif { const zend_op **ssa_opcodes = ((zend_tssa*)ssa)->tssa_opcodes; zend_jit_trace_rec *p; @@ -2710,11 +2841,15 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace const zend_ssa *op_array_ssa; const zend_ssa_op *ssa_op; int i, j, idx, count, level; +#ifndef ZEND_JIT_IR int last_idx = -1; int *start, *end; uint8_t *flags; + zend_lifetime_interval **ra, *list, *ival; +#else + zend_jit_reg_var *ra; +#endif const zend_op_array **vars_op_array; - zend_lifetime_interval **intervals, *list, *ival; void *checkpoint; zend_jit_trace_stack_frame *frame; zend_jit_trace_stack *stack; @@ -2723,6 +2858,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace zend_jit_trace_stack *parent_stack = parent_trace ? zend_jit_traces[parent_trace].stack_map + zend_jit_traces[parent_trace].exit_info[exit_num].stack_offset : NULL; +#ifndef ZEND_JIT_IR ALLOCA_FLAG(use_heap); ZEND_ASSERT(ssa->var_info != NULL); @@ -2741,6 +2877,12 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace memset(start, -1, sizeof(int) * ssa->vars_count * 2); memset(flags, 0, sizeof(uint8_t) * ssa->vars_count); memset(ZEND_VOIDP(vars_op_array), 0, sizeof(zend_op_array*) * ssa->vars_count); +#else + checkpoint = zend_arena_checkpoint(CG(arena)); + ra = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_jit_reg_var)); + vars_op_array = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_op_array*)); + memset(ZEND_VOIDP(vars_op_array), 0, sizeof(zend_op_array*) * ssa->vars_count); +#endif op_array = trace_buffer->op_array; jit_extension = @@ -2766,15 +2908,20 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace && !zend_ssa_is_no_val_use(ssa_opcodes[ssa->vars[i].use_chain], ssa->ops + ssa->vars[i].use_chain, i) && ssa->vars[i].alias == NO_ALIAS && zend_jit_var_supports_reg(ssa, i)) { - start[i] = 0; + RA_IVAL_START(i, 0); if (i < parent_vars_count && STACK_REG(parent_stack, i) != ZREG_NONE - && STACK_REG(parent_stack, i) < ZREG_NUM) { +#ifndef ZEND_JIT_IR + && STACK_REG(parent_stack, i) < ZREG_NUM +#else + && STACK_FLAGS(parent_stack, i) != ZREG_ZVAL_COPY +#endif + ) { /* We will try to reuse register from parent trace */ - flags[i] = STACK_FLAGS(parent_stack, i); + RA_IVAL_FLAGS(i) = STACK_FLAGS(parent_stack, i); count += 2; } else { - flags[i] = ZREG_LOAD; + RA_IVAL_FLAGS(i) = ZREG_LOAD; count++; } } @@ -2800,7 +2947,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace if (ssa->vars[phi->ssa_var].use_chain >= 0 && ssa->vars[phi->ssa_var].alias == NO_ALIAS && zend_jit_var_supports_reg(ssa, phi->ssa_var)) { - start[phi->ssa_var] = 0; + RA_IVAL_START(phi->ssa_var, 0); count++; } phi = phi->next; @@ -2830,10 +2977,16 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } if (ssa_op->op1_use >= 0 - && start[ssa_op->op1_use] >= 0 + && RA_HAS_IVAL(ssa_op->op1_use) && !zend_ssa_is_no_val_use(opline, ssa_op, ssa_op->op1_use)) { if (support_opline) { - zend_jit_trace_use_var(idx, ssa_op->op1_use, ssa_op->op1_def, ssa_op->op1_use_chain, start, end, flags, ssa, ssa_opcodes, op_array, op_array_ssa); + zend_jit_trace_use_var(idx, ssa_op->op1_use, ssa_op->op1_def, ssa_op->op1_use_chain, +#ifndef ZEND_JIT_IR + start, end, flags, +#else + ra, +#endif + ssa, ssa_opcodes, op_array, op_array_ssa); if (opline->op1_type != IS_CV) { if (opline->opcode == ZEND_CASE || opline->opcode == ZEND_CASE_STRICT @@ -2849,56 +3002,71 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace || opline->opcode == ZEND_BIND_LEXICAL || opline->opcode == ZEND_ROPE_ADD) { /* The value is kept alive and may be used outside of the trace */ - flags[ssa_op->op1_use] |= ZREG_STORE; + RA_IVAL_FLAGS(ssa_op->op1_use) |= ZREG_STORE; } else { - flags[ssa_op->op1_use] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(ssa_op->op1_use) |= ZREG_LAST_USE; } } } else { - start[ssa_op->op1_use] = -1; - end[ssa_op->op1_use] = -1; + RA_IVAL_DEL(ssa_op->op1_use); count--; } } if (ssa_op->op2_use >= 0 && ssa_op->op2_use != ssa_op->op1_use - && start[ssa_op->op2_use] >= 0 + && RA_HAS_IVAL(ssa_op->op2_use) && !zend_ssa_is_no_val_use(opline, ssa_op, ssa_op->op2_use)) { +#ifndef ZEND_JIT_IR if (support_opline) { - zend_jit_trace_use_var(idx, ssa_op->op2_use, ssa_op->op2_def, ssa_op->op2_use_chain, start, end, flags, ssa, ssa_opcodes, op_array, op_array_ssa); +#else + /* Quick workaround to disable register allocation for unsupported operand */ + // TODO: Find a general solution ??? + if (support_opline && opline->opcode != ZEND_FETCH_DIM_R) { +#endif + zend_jit_trace_use_var(idx, ssa_op->op2_use, ssa_op->op2_def, ssa_op->op2_use_chain, +#ifndef ZEND_JIT_IR + start, end, flags, +#else + ra, +#endif + ssa, ssa_opcodes, op_array, op_array_ssa); if (opline->op2_type != IS_CV) { - flags[ssa_op->op2_use] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(ssa_op->op2_use) |= ZREG_LAST_USE; } } else { - start[ssa_op->op2_use] = -1; - end[ssa_op->op2_use] = -1; + RA_IVAL_DEL(ssa_op->op2_use); count--; } } if (ssa_op->result_use >= 0 && ssa_op->result_use != ssa_op->op1_use && ssa_op->result_use != ssa_op->op2_use - && start[ssa_op->result_use] >= 0 + && RA_HAS_IVAL(ssa_op->result_use) && !zend_ssa_is_no_val_use(opline, ssa_op, ssa_op->result_use)) { if (support_opline) { - zend_jit_trace_use_var(idx, ssa_op->result_use, ssa_op->result_def, ssa_op->res_use_chain, start, end, flags, ssa, ssa_opcodes, op_array, op_array_ssa); + zend_jit_trace_use_var(idx, ssa_op->result_use, ssa_op->result_def, ssa_op->res_use_chain, +#ifndef ZEND_JIT_IR + start, end, flags, +#else + ra, +#endif + ssa, ssa_opcodes, op_array, op_array_ssa); } else { - start[ssa_op->result_use] = -1; - end[ssa_op->result_use] = -1; + RA_IVAL_DEL(ssa_op->result_use); count--; } } if (ssa_op->op1_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->op1.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->op1.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->op1.var), ssa_op->op1_def); } if (ssa_op->op2_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->op2.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->op2.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->op2.var), ssa_op->op2_def); } if (ssa_op->result_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->result.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->result.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->result.var), ssa_op->result_def); } @@ -2920,8 +3088,8 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace || opline->opcode == ZEND_FETCH_CONSTANT) { if (!(ssa->var_info[ssa_op->result_def].type & MAY_BE_DOUBLE) || (opline->opcode != ZEND_PRE_INC && opline->opcode != ZEND_PRE_DEC)) { - start[ssa_op->result_def] = idx; vars_op_array[ssa_op->result_def] = op_array; + RA_IVAL_START(ssa_op->result_def, idx); count++; } } @@ -2930,18 +3098,24 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace && (ssa->vars[ssa_op->op1_def].use_chain >= 0 || ssa->vars[ssa_op->op1_def].phi_use_chain) && ssa->vars[ssa_op->op1_def].alias == NO_ALIAS - && zend_jit_var_supports_reg(ssa, ssa_op->op1_def)) { - start[ssa_op->op1_def] = idx; + && zend_jit_var_supports_reg(ssa, ssa_op->op1_def) + && (!(ssa->var_info[ssa_op->op1_def].type & MAY_BE_GUARD) + || opline->opcode == ZEND_PRE_INC + || opline->opcode == ZEND_PRE_DEC + || opline->opcode == ZEND_POST_INC + || opline->opcode == ZEND_POST_DEC)) { vars_op_array[ssa_op->op1_def] = op_array; + RA_IVAL_START(ssa_op->op1_def, idx); count++; } if (ssa_op->op2_def >= 0 && (ssa->vars[ssa_op->op2_def].use_chain >= 0 || ssa->vars[ssa_op->op2_def].phi_use_chain) && ssa->vars[ssa_op->op2_def].alias == NO_ALIAS - && zend_jit_var_supports_reg(ssa, ssa_op->op2_def)) { - start[ssa_op->op2_def] = idx; + && zend_jit_var_supports_reg(ssa, ssa_op->op2_def) + && !(ssa->var_info[ssa_op->op2_def].type & MAY_BE_GUARD)) { vars_op_array[ssa_op->op2_def] = op_array; + RA_IVAL_START(ssa_op->op2_def, idx); count++; } } @@ -2960,29 +3134,34 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace ssa_op++; opline++; if (ssa_op->op1_use >= 0 - && start[ssa_op->op1_use] >= 0 + && RA_HAS_IVAL(ssa_op->op1_use) && !zend_ssa_is_no_val_use(opline, ssa_op, ssa_op->op1_use)) { if (support_opline) { - zend_jit_trace_use_var(idx, ssa_op->op1_use, ssa_op->op1_def, ssa_op->op1_use_chain, start, end, flags, ssa, ssa_opcodes, op_array, op_array_ssa); + zend_jit_trace_use_var(idx, ssa_op->op1_use, ssa_op->op1_def, ssa_op->op1_use_chain, +#ifndef ZEND_JIT_IR + start, end, flags, +#else + ra, +#endif + ssa, ssa_opcodes, op_array, op_array_ssa); if (opline->op1_type != IS_CV) { - flags[ssa_op->op1_use] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(ssa_op->op1_use) |= ZREG_LAST_USE; } } else { - start[ssa_op->op1_use] = -1; - end[ssa_op->op1_use] = -1; + RA_IVAL_DEL(ssa_op->op1_use); count--; } } if (ssa_op->op1_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->op1.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->op1.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->op1.var), ssa_op->op1_def); if (support_opline && (ssa->vars[ssa_op->op1_def].use_chain >= 0 || ssa->vars[ssa_op->op1_def].phi_use_chain) && ssa->vars[ssa_op->op1_def].alias == NO_ALIAS && zend_jit_var_supports_reg(ssa, ssa_op->op1_def)) { - start[ssa_op->op1_def] = idx; vars_op_array[ssa_op->op1_def] = op_array; + RA_IVAL_START(ssa_op->op1_def, idx); count++; } } @@ -2996,8 +3175,14 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace idx++; while (opline->opcode == ZEND_RECV_INIT) { /* RECV_INIT doesn't support registers */ +#ifdef ZEND_JIT_IR + if (ssa_op->result_use >= 0 && RA_HAS_IVAL(ssa_op->result_use)) { + RA_IVAL_DEL(ssa_op->result_use); + count--; + } +#endif if (ssa_op->result_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->result.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->result.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->result.var), ssa_op->result_def); } ssa_op++; @@ -3012,7 +3197,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace while (opline->opcode == ZEND_BIND_GLOBAL) { /* BIND_GLOBAL doesn't support registers */ if (ssa_op->op1_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->op1.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->op1.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->op1.var), ssa_op->op1_def); } ssa_op++; @@ -3029,6 +3214,17 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace /* New call frames */ zend_jit_trace_stack_frame *prev_frame = frame; +#ifdef ZEND_JIT_IR + /* Clear allocated registers */ + for (i = 0; i < op_array->last_var + op_array->T; i++) { + j = STACK_VAR(stack, i); + if (j >= 0 && RA_HAS_IVAL(j) && !(RA_IVAL_FLAGS(j) & ZREG_LAST_USE)) { + RA_IVAL_DEL(j); + count--; + } + } +#endif + frame = zend_jit_trace_call_frame(frame, op_array); frame->prev = prev_frame; frame->func = (const zend_function*)p->op_array; @@ -3044,8 +3240,8 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace if (ssa->vars[j].use_chain >= 0 && ssa->vars[j].alias == NO_ALIAS && zend_jit_var_supports_reg(ssa, j)) { - start[j] = idx; - flags[j] = ZREG_LOAD; + RA_IVAL_START(j, idx); + RA_IVAL_FLAGS(j) = ZREG_LOAD; count++; } j++; @@ -3057,7 +3253,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } else if (p->op == ZEND_JIT_TRACE_BACK) { /* Close exiting call frames */ for (i = 0; i < op_array->last_var; i++) { - zend_jit_close_var(stack, i, start, end, flags, idx-1); + RA_IVAL_CLOSE(i, idx-1); } op_array = p->op_array; jit_extension = @@ -3075,9 +3271,10 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace vars_op_array[j] = op_array; if (ssa->vars[j].use_chain >= 0 && ssa->vars[j].alias == NO_ALIAS - && zend_jit_var_supports_reg(ssa, j)) { - start[j] = idx; - flags[j] = ZREG_LOAD; + && zend_jit_var_supports_reg(ssa, j) + && !(ssa->var_info[j].type & MAY_BE_GUARD)) { + RA_IVAL_START(j, idx); + RA_IVAL_FLAGS(j) = ZREG_LOAD; count++; } j++; @@ -3097,46 +3294,49 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace while (phi) { i = phi->sources[1]; - if (start[i] >= 0 && !ssa->vars[phi->ssa_var].no_val) { - end[i] = idx; - flags[i] &= ~ZREG_LAST_USE; + if (RA_HAS_IVAL(i) && !ssa->vars[phi->ssa_var].no_val) { + RA_IVAL_END(i, idx); + RA_IVAL_FLAGS(i) &= ~ZREG_LAST_USE; } phi = phi->next; } if (trace_buffer->stop == ZEND_JIT_TRACE_STOP_LOOP) { for (i = 0; i < op_array->last_var; i++) { - if (start[i] >= 0 && !ssa->vars[i].phi_use_chain) { - end[i] = idx; - flags[i] &= ~ZREG_LAST_USE; + if (RA_HAS_IVAL(i) && !ssa->vars[i].phi_use_chain) { + RA_IVAL_END(i, idx); + RA_IVAL_FLAGS(i) &= ~ZREG_LAST_USE; } else { - zend_jit_close_var(stack, i, start, end, flags, idx); + RA_IVAL_CLOSE(i, idx); } } } +#ifndef ZEND_JIT_IR } else { last_idx = idx; for (i = 0; i < op_array->last_var; i++) { - zend_jit_close_var(stack, i, start, end, flags, idx); + RA_IVAL_CLOSE(i, idx); } while (frame->prev) { frame = frame->prev; op_array = &frame->func->op_array; stack = frame->stack; for (i = 0; i < op_array->last_var; i++) { - zend_jit_close_var(stack, i, start, end, flags, idx); + RA_IVAL_CLOSE(i, idx); } } +#endif } +#ifndef ZEND_JIT_IR if (!count) { free_alloca(start, use_heap); zend_arena_release(&CG(arena), checkpoint); return NULL; } - intervals = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_lifetime_interval)); - memset(intervals, 0, sizeof(zend_lifetime_interval*) * ssa->vars_count); + ra = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_lifetime_interval)); + memset(ra, 0, sizeof(zend_lifetime_interval*) * ssa->vars_count); list = zend_arena_alloc(&CG(arena), sizeof(zend_lifetime_interval) * count); j = 0; for (i = 0; i < ssa->vars_count; i++) { @@ -3148,7 +3348,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace /* skip life range with single use */ continue; } - intervals[i] = &list[j]; + ra[i] = &list[j]; list[j].ssa_var = i; list[j].reg = ZREG_NONE; list[j].flags = flags[i]; @@ -3181,7 +3381,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } while (i > 0) { i--; - if (intervals[i] + if (RA_HAS_REG(i) && STACK_REG(parent_stack, i) != ZREG_NONE && STACK_REG(parent_stack, i) < ZREG_NUM) { list[j].ssa_var = - 1; @@ -3193,7 +3393,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace list[j].hint = NULL; list[j].used_as_hint = NULL; list[j].list_next = NULL; - intervals[i]->hint = &list[j]; + ra[i]->hint = &list[j]; j++; } } @@ -3205,11 +3405,11 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace zend_ssa_phi *phi = ssa->blocks[1].phis; while (phi) { - if (intervals[phi->ssa_var]) { - if (intervals[phi->sources[1]] + if (RA_HAS_REG(phi->ssa_var)) { + if (RA_HAS_REG(phi->sources[1]) && (ssa->var_info[phi->sources[1]].type & MAY_BE_ANY) == (ssa->var_info[phi->ssa_var].type & MAY_BE_ANY)) { - intervals[phi->sources[1]]->hint = intervals[phi->ssa_var]; + ra[phi->sources[1]]->hint = ra[phi->ssa_var]; } } phi = phi->next; @@ -3217,7 +3417,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } for (i = 0; i < ssa->vars_count; i++) { - if (intervals[i] && !intervals[i]->hint) { + if (RA_HAS_REG(i) && !ra[i]->hint) { if (ssa->vars[i].definition >= 0) { uint32_t line = ssa->vars[i].definition; @@ -3228,12 +3428,12 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace case ZEND_POST_INC: case ZEND_POST_DEC: if (ssa->ops[line].op1_use >= 0 && - intervals[ssa->ops[line].op1_use] && + RA_HAS_REG(ssa->ops[line].op1_use) && (i == ssa->ops[line].op1_def || (i == ssa->ops[line].result_def && (ssa->ops[line].op1_def < 0 || - !intervals[ssa->ops[line].op1_def])))) { - zend_jit_add_hint(intervals, i, ssa->ops[line].op1_use); + !RA_HAS_REG(ssa->ops[line].op1_def))))) { + zend_jit_add_hint(ra, i, ssa->ops[line].op1_use); } break; case ZEND_SEND_VAR: @@ -3245,23 +3445,23 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace case ZEND_PRE_DEC: if (i == ssa->ops[line].op1_def && ssa->ops[line].op1_use >= 0 && - intervals[ssa->ops[line].op1_use]) { - zend_jit_add_hint(intervals, i, ssa->ops[line].op1_use); + RA_HAS_REG(ssa->ops[line].op1_use)) { + zend_jit_add_hint(ra, i, ssa->ops[line].op1_use); } break; case ZEND_ASSIGN: if (ssa->ops[line].op2_use >= 0 && - intervals[ssa->ops[line].op2_use] && + RA_HAS_REG(ssa->ops[line].op2_use) && (i == ssa->ops[line].op2_def || (i == ssa->ops[line].op1_def && (ssa->ops[line].op2_def < 0 || - !intervals[ssa->ops[line].op2_def])) || + !RA_HAS_REG(ssa->ops[line].op2_def))) || (i == ssa->ops[line].result_def && (ssa->ops[line].op2_def < 0 || - !intervals[ssa->ops[line].op2_def]) && + !RA_HAS_REG(ssa->ops[line].op2_def)) && (ssa->ops[line].op1_def < 0 || - !intervals[ssa->ops[line].op1_def])))) { - zend_jit_add_hint(intervals, i, ssa->ops[line].op2_use); + !RA_HAS_REG(ssa->ops[line].op1_def))))) { + zend_jit_add_hint(ra, i, ssa->ops[line].op2_use); } break; case ZEND_SUB: @@ -3272,7 +3472,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace case ZEND_BW_XOR: if (i == ssa->ops[line].result_def) { if (ssa->ops[line].op1_use >= 0 && - intervals[ssa->ops[line].op1_use] && + RA_HAS_REG(ssa->ops[line].op1_use) && ssa->ops[line].op1_use_chain < 0 && !ssa->vars[ssa->ops[line].op1_use].phi_use_chain && (ssa->var_info[i].type & MAY_BE_ANY) == @@ -3280,14 +3480,14 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace zend_ssa_phi *phi = ssa->vars[ssa->ops[line].op1_use].definition_phi; if (phi && - intervals[phi->sources[1]] && - intervals[phi->sources[1]]->hint == intervals[ssa->ops[line].op1_use]) { + RA_HAS_REG(phi->sources[1]) && + ra[phi->sources[1]]->hint == ra[ssa->ops[line].op1_use]) { break; } - zend_jit_add_hint(intervals, i, ssa->ops[line].op1_use); + zend_jit_add_hint(ra, i, ssa->ops[line].op1_use); } else if (opline->opcode != ZEND_SUB && ssa->ops[line].op2_use >= 0 && - intervals[ssa->ops[line].op2_use] && + RA_HAS_REG(ssa->ops[line].op2_use) && ssa->ops[line].op2_use_chain < 0 && !ssa->vars[ssa->ops[line].op2_use].phi_use_chain && (ssa->var_info[i].type & MAY_BE_ANY) == @@ -3295,11 +3495,11 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace zend_ssa_phi *phi = ssa->vars[ssa->ops[line].op2_use].definition_phi; if (phi && - intervals[phi->sources[1]] && - intervals[phi->sources[1]]->hint == intervals[ssa->ops[line].op2_use]) { + RA_HAS_REG(phi->sources[1]) && + ra[phi->sources[1]]->hint == ra[ssa->ops[line].op2_use]) { break; } - zend_jit_add_hint(intervals, i, ssa->ops[line].op2_use); + zend_jit_add_hint(ra, i, ssa->ops[line].op2_use); } } break; @@ -3308,7 +3508,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } } - list = zend_jit_sort_intervals(intervals, ssa->vars_count); + list = zend_jit_sort_intervals(ra, ssa->vars_count); if (list) { ival = list; @@ -3337,15 +3537,15 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace if (list) { zend_lifetime_interval *ival, *next; - memset(intervals, 0, ssa->vars_count * sizeof(zend_lifetime_interval*)); + memset(ra, 0, ssa->vars_count * sizeof(zend_lifetime_interval*)); ival = list; count = 0; while (ival != NULL) { ZEND_ASSERT(ival->reg != ZREG_NONE); count++; next = ival->list_next; - ival->list_next = intervals[ival->ssa_var]; - intervals[ival->ssa_var] = ival; + ival->list_next = ra[ival->ssa_var]; + ra[ival->ssa_var] = ival; ival = next; } @@ -3365,11 +3565,28 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } while (i > 0) { i--; - if (intervals[i] && intervals[i]->reg != STACK_REG(parent_stack, i)) { - intervals[i]->flags |= ZREG_LOAD; + if (RA_HAS_REG(i) && ra[i]->reg != STACK_REG(parent_stack, i)) { + RA_REG_FLAGS(i) |= ZREG_LOAD; } } } +#else /* ZEND_JIT_IR */ + if (count) { + for (i = 0; i < ssa->vars_count; i++) { + if (RA_HAS_REG(i)) { + if ((RA_REG_FLAGS(i) & ZREG_LOAD) && + (RA_REG_FLAGS(i) & ZREG_LAST_USE) && + (i >= parent_vars_count || STACK_REG(parent_stack, i) == ZREG_NONE) && + zend_ssa_next_use(ssa->ops, i, ssa->vars[i].use_chain) < 0) { + /* skip life range with single use */ + RA_REG_DEL(i); + count--; + } + } + } + } + if (count) { +#endif /* SSA resolution */ if (trace_buffer->stop == ZEND_JIT_TRACE_STOP_LOOP @@ -3381,58 +3598,95 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace int def = phi->ssa_var; int use = phi->sources[1]; - if (intervals[def]) { - if (!intervals[use]) { - intervals[def]->flags |= ZREG_LOAD; - if ((intervals[def]->flags & ZREG_LAST_USE) + if (RA_HAS_REG(def)) { + if (!RA_HAS_REG(use)) { + RA_REG_FLAGS(def) |= ZREG_LOAD; + if ((RA_REG_FLAGS(def) & ZREG_LAST_USE) && ssa->vars[def].use_chain >= 0 - && ssa->vars[def].use_chain == intervals[def]->range.end) { + && !ssa->vars[def].phi_use_chain +#ifndef ZEND_JIT_IR + && ssa->vars[def].use_chain == ra[def]->range.end +#else + && zend_ssa_next_use(ssa->ops, def, ssa->vars[def].use_chain) < 0 +#endif + ) { /* remove interval used once */ - intervals[def] = NULL; + RA_REG_DEL(def); count--; } - } else if (intervals[def]->reg != intervals[use]->reg) { - intervals[def]->flags |= ZREG_LOAD; +#ifndef ZEND_JIT_IR + } else if (ra[def]->reg != ra[use]->reg) { + RA_REG_FLAGS(def) |= ZREG_LOAD; if (ssa->vars[use].use_chain >= 0) { - intervals[use]->flags |= ZREG_STORE; + RA_REG_FLAGS(use) |= ZREG_STORE; } else { - intervals[use] = NULL; + RA_REG_DEL(use); count--; } } else { use = phi->sources[0]; - ZEND_ASSERT(!intervals[use]); - intervals[use] = zend_arena_alloc(&CG(arena), sizeof(zend_lifetime_interval)); - intervals[use]->ssa_var = phi->sources[0]; - intervals[use]->reg = intervals[def]->reg; - intervals[use]->flags = ZREG_LOAD; - intervals[use]->range.start = 0; - intervals[use]->range.end = 0; - intervals[use]->range.next = NULL; - intervals[use]->hint = NULL; - intervals[use]->used_as_hint = NULL; - intervals[use]->list_next = NULL; - } - } else if (intervals[use] - && (!ssa->vars[def].no_val - || ssa->var_info[def].type != ssa->var_info[use].type)) { - if (ssa->vars[use].use_chain >= 0) { - intervals[use]->flags |= ZREG_STORE; + ZEND_ASSERT(!RA_HAS_REG(use)); + ra[use] = zend_arena_alloc(&CG(arena), sizeof(zend_lifetime_interval)); + ra[use]->ssa_var = phi->sources[0]; + ra[use]->reg = ra[def]->reg; + ra[use]->flags = ZREG_LOAD; + ra[use]->range.start = 0; + ra[use]->range.end = 0; + ra[use]->range.next = NULL; + ra[use]->hint = NULL; + ra[use]->used_as_hint = NULL; + ra[use]->list_next = NULL; +#else + } else if ((ssa->var_info[def].type & MAY_BE_ANY) != (ssa->var_info[use].type & MAY_BE_ANY)) { + RA_REG_FLAGS(def) |= ZREG_LOAD; + RA_REG_FLAGS(use) |= ZREG_STORE; } else { - intervals[use] = NULL; + use = phi->sources[0]; + if (zend_jit_var_supports_reg(ssa, use)) { + ZEND_ASSERT(!RA_HAS_REG(use)); + RA_REG_START(use, 0); + RA_REG_FLAGS(use) = ZREG_LOAD; + count++; + } else { + RA_REG_FLAGS(def) |= ZREG_LOAD; + } +#endif + } + } else if (RA_HAS_REG(use) + && (!ssa->vars[def].no_val +#ifndef ZEND_JIT_IR + || ssa->var_info[def].type != ssa->var_info[use].type +#endif + )) { + if (ssa->vars[use].use_chain >= 0) { + RA_REG_FLAGS(use) |= ZREG_STORE; // TODO: ext/opcache/tests/jit/reg_alloc_00[67].phpt ??? + } else { + RA_REG_DEL(use); count--; } } phi = phi->next; } +#ifndef ZEND_JIT_IR } else { for (i = 0; i < ssa->vars_count; i++) { - if (intervals[i] - && intervals[i]->range.end == last_idx - && !(intervals[i]->flags & (ZREG_LOAD|ZREG_STORE))) { - intervals[i]->flags |= ZREG_STORE; + if (RA_HAS_REG(i) + && ra[i]->range.end == last_idx + && !(RA_REG_FLAGS(i) & (ZREG_LOAD|ZREG_STORE))) { + RA_REG_FLAGS(i) |= ZREG_STORE; } } +#else + } else if (p->stop == ZEND_JIT_TRACE_STOP_LINK + || p->stop == ZEND_JIT_TRACE_STOP_INTERPRETER) { + for (i = 0; i < op_array->last_var + op_array->T; i++) { + int var = STACK_VAR(stack, i); + if (var >= 0 && RA_HAS_REG(var) + && !(RA_REG_FLAGS(var) & (ZREG_LOAD|ZREG_STORE|ZREG_LAST_USE))) { + RA_REG_FLAGS(var) |= ZREG_STORE; + } + } +#endif } if (!count) { @@ -3441,24 +3695,47 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } if (JIT_G(debug) & ZEND_JIT_DEBUG_REG_ALLOC) { +#ifndef ZEND_JIT_IR fprintf(stderr, "---- TRACE %d Allocated Live Ranges\n", ZEND_JIT_TRACE_NUM); for (i = 0; i < ssa->vars_count; i++) { - ival = intervals[i]; + ival = ra[i]; while (ival) { zend_jit_dump_lifetime_interval(vars_op_array[ival->ssa_var], ssa, ival); ival = ival->list_next; } } +#else + fprintf(stderr, "---- TRACE %d Live Ranges \"%s\"\n", ZEND_JIT_TRACE_NUM, op_array->function_name ? ZSTR_VAL(op_array->function_name) : "[main]"); + for (i = 0; i < ssa->vars_count; i++) { + if (RA_HAS_REG(i)) { + fprintf(stderr, "#%d.", i); + uint32_t var_num = ssa->vars[i].var; + zend_dump_var(vars_op_array[i], (var_num < vars_op_array[i]->last_var ? IS_CV : 0), var_num); + if (RA_REG_FLAGS(i) & ZREG_LAST_USE) { + fprintf(stderr, " last_use"); + } + if (RA_REG_FLAGS(i) & ZREG_LOAD) { + fprintf(stderr, " load"); + } + if (RA_REG_FLAGS(i) & ZREG_STORE) { + fprintf(stderr, " store"); + } + fprintf(stderr, "\n"); + } + } + fprintf(stderr, "\n"); +#endif } - return intervals; + return ra; } - zend_arena_release(&CG(arena), checkpoint); //??? + zend_arena_release(&CG(arena), checkpoint); return NULL; } -static void zend_jit_trace_clenup_stack(zend_jit_trace_stack *stack, const zend_op *opline, const zend_ssa_op *ssa_op, const zend_ssa *ssa, zend_lifetime_interval **ra) +#ifndef ZEND_JIT_IR +static void zend_jit_trace_cleanup_stack(zend_jit_trace_stack *stack, const zend_op *opline, const zend_ssa_op *ssa_op, const zend_ssa *ssa, zend_lifetime_interval **ra) { uint32_t line = ssa_op - ssa->ops; @@ -3478,6 +3755,32 @@ static void zend_jit_trace_clenup_stack(zend_jit_trace_stack *stack, const zend_ SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_NONE); } } +#else +static void zend_jit_trace_cleanup_stack(zend_jit_ctx *jit, zend_jit_trace_stack *stack, const zend_op *opline, const zend_ssa_op *ssa_op, const zend_ssa *ssa, const zend_op **ssa_opcodes) +{ + if (ssa_op->op1_use >= 0 + && jit->ra[ssa_op->op1_use].ref + && (jit->ra[ssa_op->op1_use].flags & ZREG_LAST_USE) + && (ssa_op->op1_use_chain == -1 + || zend_ssa_is_no_val_use(ssa_opcodes[ssa_op->op1_use_chain], ssa->ops + ssa_op->op1_use_chain, ssa_op->op1_use))) { + CLEAR_STACK_REF(stack, EX_VAR_TO_NUM(opline->op1.var)); + } + if (ssa_op->op2_use >= 0 + && jit->ra[ssa_op->op2_use].ref + && (jit->ra[ssa_op->op2_use].flags & ZREG_LAST_USE) + && (ssa_op->op2_use_chain == -1 + || zend_ssa_is_no_val_use(ssa_opcodes[ssa_op->op2_use_chain], ssa->ops + ssa_op->op2_use_chain, ssa_op->op2_use))) { + CLEAR_STACK_REF(stack, EX_VAR_TO_NUM(opline->op2.var)); + } + if (ssa_op->result_use >= 0 + && jit->ra[ssa_op->result_use].ref + && (jit->ra[ssa_op->result_use].flags & ZREG_LAST_USE) + && (ssa_op->res_use_chain == -1 + || zend_ssa_is_no_val_use(ssa_opcodes[ssa_op->res_use_chain], ssa->ops + ssa_op->res_use_chain, ssa_op->result_use))) { + CLEAR_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var)); + } +} +#endif static void zend_jit_trace_setup_ret_counter(const zend_op *opline, size_t offset) { @@ -3568,10 +3871,18 @@ static int zend_jit_trace_stack_needs_deoptimization(zend_jit_trace_stack *stack uint32_t i; for (i = 0; i < stack_size; i++) { +#ifndef ZEND_JIT_IR if (STACK_REG(stack, i) != ZREG_NONE && !(STACK_FLAGS(stack, i) & (ZREG_LOAD|ZREG_STORE))) { return 1; } +#else + if (STACK_FLAGS(stack, i) & ~(ZREG_LOAD|ZREG_STORE|ZREG_LAST_USE)) { + return 1; + } else if (STACK_REG(stack, i) != ZREG_NONE) { + return 1; + } +#endif } return 0; } @@ -3592,24 +3903,39 @@ static int zend_jit_trace_exit_needs_deoptimization(uint32_t trace_num, uint32_t return zend_jit_trace_stack_needs_deoptimization(stack, stack_size); } -static int zend_jit_trace_deoptimization(dasm_State **Dst, +static int zend_jit_trace_deoptimization( +#ifndef ZEND_JIT_IR + dasm_State **jit, +#else + zend_jit_ctx *jit, +#endif uint32_t flags, const zend_op *opline, zend_jit_trace_stack *parent_stack, int parent_vars_count, zend_ssa *ssa, zend_jit_trace_stack *stack, +#ifndef ZEND_JIT_IR zend_lifetime_interval **ra, - bool polymorphic_side_trace) +#else + zend_jit_exit_const *constants, + int8_t func_reg, +#endif + bool polymorphic_side_trace) { int i; +#ifndef ZEND_JIT_IR bool has_constants = 0; bool has_unsaved_vars = 0; +#else + int check2 = -1; +#endif // TODO: Merge this loop with the following register LOAD loop to implement parallel move ??? for (i = 0; i < parent_vars_count; i++) { int8_t reg = STACK_REG(parent_stack, i); +#ifndef ZEND_JIT_IR if (reg != ZREG_NONE) { if (reg < ZREG_NUM) { if (ssa && ssa->vars[i].no_val) { @@ -3624,7 +3950,7 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, uint8_t type = STACK_TYPE(parent_stack, i); if (!(STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) - && !zend_jit_store_var(Dst, 1 << type, i, reg, + && !zend_jit_store_var(jit, 1 << type, i, reg, STACK_MEM_TYPE(parent_stack, i) != type)) { return 0; } @@ -3637,8 +3963,106 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, has_constants = 1; } } +#else + if (STACK_FLAGS(parent_stack, i) == ZREG_CONST) { + uint8_t type = STACK_TYPE(parent_stack, i); + + if (type == IS_LONG) { + if (!zend_jit_store_const_long(jit, i, + (zend_long)constants[STACK_REF(parent_stack, i)].i)) { + return 0; + } + } else if (type == IS_DOUBLE) { + if (!zend_jit_store_const_double(jit, i, + constants[STACK_REF(parent_stack, i)].d)) { + return 0; + } + } else { + ZEND_UNREACHABLE(); + } + if (stack) { + SET_STACK_TYPE(stack, i, type, 1); + if (jit->ra && jit->ra[i].ref) { + SET_STACK_REF(stack, i, jit->ra[i].ref); + } + } + } else if (STACK_FLAGS(parent_stack, i) == ZREG_TYPE_ONLY) { + uint8_t type = STACK_TYPE(parent_stack, i); + + if (!zend_jit_store_type(jit, i, type)) { + return 0; + } + if (stack) { + SET_STACK_TYPE(stack, i, type, 1); + } + } else if (STACK_FLAGS(parent_stack, i) == ZREG_THIS) { + if (polymorphic_side_trace) { + ssa->var_info[i].delayed_fetch_this = 1; + if (stack) { + SET_STACK_REG_EX(stack, i, ZREG_NONE, ZREG_THIS); + } + } else if (!zend_jit_load_this(jit, EX_NUM_TO_VAR(i))) { + return 0; + } + } else if (STACK_FLAGS(parent_stack, i) == ZREG_ZVAL_ADDREF) { + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(i)); + zend_jit_zval_try_addref(jit, dst); + } else if (STACK_FLAGS(parent_stack, i) == ZREG_ZVAL_COPY) { + ZEND_ASSERT(reg != ZREG_NONE); + ZEND_ASSERT(check2 == -1); + check2 = i; + } else if (STACK_FLAGS(parent_stack, i) & ZREG_SPILL_SLOT) { + if (ssa && ssa->vars[i].no_val) { + /* pass */ + } else { + uint8_t type = STACK_TYPE(parent_stack, i); + + if (!zend_jit_store_spill_slot(jit, 1 << type, i, reg, STACK_REF(parent_stack, i), + STACK_MEM_TYPE(parent_stack, i) != type)) { + return 0; + } + if (stack) { + if (jit->ra && jit->ra[i].ref) { + SET_STACK_TYPE(stack, i, type, 0); + if ((STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) != 0) { + SET_STACK_REF_EX(stack, i, jit->ra[i].ref, ZREG_LOAD); + } else { + SET_STACK_REF(stack, i, jit->ra[i].ref); + } + } else { + SET_STACK_TYPE(stack, i, type, 1); + } + } + } + } else if (reg != ZREG_NONE) { + if (ssa && ssa->vars[i].no_val) { + /* pass */ + } else { + uint8_t type = STACK_TYPE(parent_stack, i); + + if (!zend_jit_store_reg(jit, 1 << type, i, reg, + (STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) != 0, + STACK_MEM_TYPE(parent_stack, i) != type)) { + return 0; + } + if (stack) { + if (jit->ra && jit->ra[i].ref) { + SET_STACK_TYPE(stack, i, type, 0); + if ((STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) != 0) { + SET_STACK_REF_EX(stack, i, jit->ra[i].ref, ZREG_LOAD); + } else { + SET_STACK_REF(stack, i, jit->ra[i].ref); + } + } else { + SET_STACK_TYPE(stack, i, type, 1); + } + } + } + } +#endif } +#ifndef ZEND_JIT_IR if (has_unsaved_vars && (has_constants || (flags & (ZEND_JIT_EXIT_RESTORE_CALL|ZEND_JIT_EXIT_FREE_OP1|ZEND_JIT_EXIT_FREE_OP2)))) { @@ -3656,7 +4080,7 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, SET_STACK_TYPE(stack, i, type, 1); } if (!(STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) - && !zend_jit_store_var(Dst, 1 << type, i, reg, + && !zend_jit_store_var(jit, 1 << type, i, reg, STACK_MEM_TYPE(parent_stack, i) != type)) { return 0; } @@ -3679,24 +4103,38 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, if (stack) { SET_STACK_REG(stack, i, ZREG_THIS); } - } else if (!zend_jit_load_this(Dst, EX_NUM_TO_VAR(i))) { + } else if (!zend_jit_load_this(jit, EX_NUM_TO_VAR(i))) { return 0; } } else { if (reg == ZREG_ZVAL_COPY_GPR0 - &&!zend_jit_escape_if_undef_r0(Dst, i, flags, opline)) { + &&!zend_jit_escape_if_undef_r0(jit, i, flags, opline)) { return 0; } - if (!zend_jit_store_const(Dst, i, reg)) { + if (!zend_jit_store_const(jit, i, reg)) { return 0; } } } } } +#else + if (check2 != -1) { + int8_t reg = STACK_REG(parent_stack, check2); + + ZEND_ASSERT(STACK_FLAGS(parent_stack, check2) == ZREG_ZVAL_COPY); + ZEND_ASSERT(reg != ZREG_NONE); + if (!zend_jit_escape_if_undef(jit, check2, flags, opline, reg)) { + return 0; + } + if (!zend_jit_restore_zval(jit, EX_NUM_TO_VAR(check2), reg)) { + return 0; + } + } +#endif if (flags & ZEND_JIT_EXIT_RESTORE_CALL) { - if (!zend_jit_save_call_chain(Dst, -1)) { + if (!zend_jit_save_call_chain(jit, -1)) { return 0; } } @@ -3704,7 +4142,7 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, if (flags & ZEND_JIT_EXIT_FREE_OP2) { const zend_op *op = opline - 1; - if (!zend_jit_free_op(Dst, op, -1, op->op2.var)) { + if (!zend_jit_free_op(jit, op, -1, op->op2.var)) { return 0; } } @@ -3712,21 +4150,31 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, if (flags & ZEND_JIT_EXIT_FREE_OP1) { const zend_op *op = opline - 1; - if (!zend_jit_free_op(Dst, op, -1, op->op1.var)) { + if (!zend_jit_free_op(jit, op, -1, op->op1.var)) { return 0; } } if (flags & (ZEND_JIT_EXIT_FREE_OP1|ZEND_JIT_EXIT_FREE_OP2)) { - if (!zend_jit_check_exception(Dst)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_check_exception(jit)) { return 0; } +#else + zend_jit_check_exception(jit); +#endif } if ((flags & ZEND_JIT_EXIT_METHOD_CALL) && !polymorphic_side_trace) { - if (!zend_jit_free_trampoline(Dst)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_free_trampoline(jit)) { return 0; } +#else + if (!zend_jit_free_trampoline(jit, func_reg)) { + return 0; + } +#endif } return 1; @@ -3984,9 +4432,15 @@ static bool zend_jit_trace_next_is_send_result(const zend_op *oplin static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t parent_trace, uint32_t exit_num) { const void *handler = NULL; - dasm_State* dasm_state = NULL; - zend_script *script = NULL; +#ifndef ZEND_JIT_IR + dasm_State* ctx = NULL; zend_lifetime_interval **ra = NULL; +#else + zend_jit_ctx ctx; + zend_jit_ctx *jit = &ctx; + zend_jit_reg_var *ra = NULL; +#endif + zend_script *script = NULL; zend_string *name = NULL; void *checkpoint; const zend_op_array *op_array; @@ -4033,10 +4487,23 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par ssa_opcodes = ((zend_tssa*)ssa)->tssa_opcodes; +#ifdef ZEND_JIT_IR + op_array = trace_buffer->op_array; + opline = trace_buffer[1].opline; + name = zend_jit_trace_name(op_array, opline->lineno); + zend_jit_trace_start(&ctx, op_array, ssa, name, ZEND_JIT_TRACE_NUM, + parent_trace ? &zend_jit_traces[parent_trace] : NULL, exit_num); + ctx.trace = &zend_jit_traces[ZEND_JIT_TRACE_NUM]; +#endif + /* Register allocation */ if ((JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) && JIT_G(opt_level) >= ZEND_JIT_LEVEL_INLINE) { +#ifndef ZEND_JIT_IR ra = zend_jit_trace_allocate_registers(trace_buffer, ssa, parent_trace, exit_num); +#else + ctx.ra = ra = zend_jit_trace_allocate_registers(trace_buffer, ssa, parent_trace, exit_num); +#endif } p = trace_buffer; @@ -4051,26 +4518,27 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par SET_STACK_TYPE(stack, i, IS_UNKNOWN, 1); } +#ifndef ZEND_JIT_IR opline = p[1].opline; name = zend_jit_trace_name(op_array, opline->lineno); p += ZEND_JIT_TRACE_START_REC_SIZE; - dasm_init(&dasm_state, DASM_MAXSECTION); - dasm_setupglobal(&dasm_state, dasm_labels, zend_lb_MAX); - dasm_setup(&dasm_state, dasm_actions); + dasm_init(&ctx, DASM_MAXSECTION); + dasm_setupglobal(&ctx, dasm_labels, zend_lb_MAX); + dasm_setup(&ctx, dasm_actions); jit_extension = (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); op_array_ssa = &jit_extension->func_info.ssa; - dasm_growpc(&dasm_state, 2); /* =>0: loop header */ + dasm_growpc(&ctx, 2); /* =>0: loop header */ /* =>1: end of code */ - zend_jit_align_func(&dasm_state); + zend_jit_align_func(&ctx); if (!parent_trace) { - zend_jit_prologue(&dasm_state); + zend_jit_prologue(&ctx); } - zend_jit_trace_begin(&dasm_state, ZEND_JIT_TRACE_NUM, + zend_jit_trace_begin(&ctx, ZEND_JIT_TRACE_NUM, parent_trace ? &zend_jit_traces[parent_trace] : NULL, exit_num); if (!parent_trace) { @@ -4078,11 +4546,30 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par zend_jit_track_last_valid_opline(); } else { if (zend_jit_traces[parent_trace].exit_info[exit_num].opline == NULL) { - zend_jit_trace_opline_guard(&dasm_state, opline); + zend_jit_trace_opline_guard(&ctx, opline); } else { zend_jit_reset_last_valid_opline(); } } +#else + opline = p[1].opline; + p += ZEND_JIT_TRACE_START_REC_SIZE; + + jit_extension = + (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); + op_array_ssa = &jit_extension->func_info.ssa; + + if (!parent_trace) { + zend_jit_set_last_valid_opline(&ctx, opline); + zend_jit_track_last_valid_opline(&ctx); + } else { + if (zend_jit_traces[parent_trace].exit_info[exit_num].opline == NULL) { + zend_jit_trace_opline_guard(&ctx, opline); + } else { + zend_jit_reset_last_valid_opline(&ctx); + } + } +#endif if (JIT_G(opt_level) >= ZEND_JIT_LEVEL_INLINE) { int last_var; @@ -4092,7 +4579,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (used_stack > 0) { peek_checked_stack = used_stack; - if (!zend_jit_stack_check(&dasm_state, opline, used_stack)) { + if (!zend_jit_stack_check(&ctx, opline, used_stack)) { goto jit_failure; } } @@ -4146,7 +4633,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par || (ssa->vars[i].phi_use_chain && !(ssa->var_info[ssa->vars[i].phi_use_chain->ssa_var].type & MAY_BE_GUARD)))) { /* Check loop-invariant variable type */ - if (!zend_jit_type_guard(&dasm_state, opline, EX_NUM_TO_VAR(i), concrete_type(info))) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(i), concrete_type(info))) { goto jit_failure; } info &= ~MAY_BE_GUARD; @@ -4167,7 +4654,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && (ssa->vars[i].use_chain != -1 || (ssa->vars[i].phi_use_chain && !(ssa->var_info[ssa->vars[i].phi_use_chain->ssa_var].type & MAY_BE_PACKED_GUARD)))) { - if (!zend_jit_packed_guard(&dasm_state, opline, EX_NUM_TO_VAR(i), info)) { + if (!zend_jit_packed_guard(&ctx, opline, EX_NUM_TO_VAR(i), info)) { goto jit_failure; } info &= ~MAY_BE_PACKED_GUARD; @@ -4177,10 +4664,16 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (parent_trace) { /* Deoptimization */ - if (!zend_jit_trace_deoptimization(&dasm_state, + if (!zend_jit_trace_deoptimization(&ctx, zend_jit_traces[parent_trace].exit_info[exit_num].flags, zend_jit_traces[parent_trace].exit_info[exit_num].opline, - parent_stack, parent_vars_count, ssa, stack, ra, + parent_stack, parent_vars_count, ssa, stack, +#ifndef ZEND_JIT_IR + ra, +#else + zend_jit_traces[parent_trace].constants, + zend_jit_traces[parent_trace].exit_info[exit_num].poly_func_reg, +#endif polymorphic_side_trace)) { goto jit_failure; } @@ -4190,25 +4683,41 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && trace_buffer->stop != ZEND_JIT_TRACE_STOP_RECURSIVE_CALL && trace_buffer->stop != ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { for (i = 0; i < last_var; i++) { - if (ra[i] - && (ra[i]->flags & ZREG_LOAD) != 0 - && ra[i]->reg != stack[i].reg) { + if (RA_HAS_REG(i) + && (RA_REG_FLAGS(i) & ZREG_LOAD) != 0 +#ifndef ZEND_JIT_IR + && ra[i]->reg != stack[i].reg +#else + && ra[i].ref != STACK_REF(stack, i) +#endif + ) { if ((ssa->var_info[i].type & MAY_BE_GUARD) != 0) { uint8_t op_type; ssa->var_info[i].type &= ~MAY_BE_GUARD; op_type = concrete_type(ssa->var_info[i].type); - if (!zend_jit_type_guard(&dasm_state, opline, EX_NUM_TO_VAR(i), op_type)) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(i), op_type)) { goto jit_failure; } SET_STACK_TYPE(stack, i, op_type, 1); } - SET_STACK_REG_EX(stack, i, ra[i]->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[i].type, i, ra[i]->reg)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_load_var(&ctx, ssa->var_info[i].type, i, ra[i]->reg)) { goto jit_failure; } + SET_STACK_REG_EX(stack, i, ra[i]->reg, ZREG_LOAD); +#else + if (trace_buffer->stop == ZEND_JIT_TRACE_STOP_LOOP) { + if (!zend_jit_load_var(&ctx, ssa->var_info[i].type, i, i)) { + goto jit_failure; + } + SET_STACK_REF_EX(stack, i, ra[i].ref, ZREG_LOAD); + } else { + SET_STACK_REF_EX(stack, i, IR_NULL, ZREG_LOAD); + } +#endif } } } @@ -4218,7 +4727,8 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par || trace_buffer->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_CALL || trace_buffer->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { - zend_jit_label(&dasm_state, 0); /* start of of trace loop */ +#ifndef ZEND_JIT_IR + zend_jit_label(&ctx, 0); /* start of of trace loop */ if (ra) { zend_ssa_phi *phi = ssa->blocks[1].phis; @@ -4232,7 +4742,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par ZEND_ASSERT(ival->reg != ZREG_NONE); if (info & MAY_BE_GUARD) { - if (!zend_jit_type_guard(&dasm_state, opline, EX_NUM_TO_VAR(phi->var), concrete_type(info))) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(phi->var), concrete_type(info))) { goto jit_failure; } info &= ~MAY_BE_GUARD; @@ -4240,14 +4750,14 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par SET_STACK_TYPE(stack, phi->var, concrete_type(info), 1); } SET_STACK_REG_EX(stack, phi->var, ival->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg)) { + if (!zend_jit_load_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg)) { goto jit_failure; } } else if (ival->flags & ZREG_STORE) { ZEND_ASSERT(ival->reg != ZREG_NONE); SET_STACK_REG_EX(stack, phi->var, ival->reg, ZREG_STORE); - if (!zend_jit_store_var(&dasm_state, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg, + if (!zend_jit_store_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg, STACK_MEM_TYPE(stack, phi->var) != ssa->var_info[phi->ssa_var].type)) { goto jit_failure; } @@ -4259,9 +4769,59 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par phi = phi->next; } } +#else + jit->trace_loop_ref = zend_jit_trace_begin_loop(&ctx); /* start of of trace loop */ + + if (ra) { + zend_ssa_phi *phi = ssa->blocks[1].phis; + + /* First try to insert IR Phi */ + while (phi) { + if (RA_HAS_REG(phi->ssa_var) + && !(RA_REG_FLAGS(phi->ssa_var) & ZREG_LOAD)) { + zend_jit_trace_gen_phi(&ctx, phi); + SET_STACK_REF(stack, phi->var, ra[phi->ssa_var].ref); + } + phi = phi->next; + } + + phi = ssa->blocks[1].phis; + while (phi) { + if (RA_HAS_REG(phi->ssa_var)) { + if (RA_REG_FLAGS(phi->ssa_var) & ZREG_LOAD) { + uint32_t info = ssa->var_info[phi->ssa_var].type; + + if (info & MAY_BE_GUARD) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(phi->var), concrete_type(info))) { + goto jit_failure; + } + info &= ~MAY_BE_GUARD; + ssa->var_info[phi->ssa_var].type = info; + SET_STACK_TYPE(stack, phi->var, concrete_type(info), 1); + } + if (!zend_jit_load_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, phi->ssa_var)) { + goto jit_failure; + } + SET_STACK_REF_EX(stack, phi->var, ra[phi->ssa_var].ref, ZREG_LOAD); + } else if (RA_REG_FLAGS(phi->ssa_var) & ZREG_STORE) { + + if (!zend_jit_store_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, phi->ssa_var, + STACK_MEM_TYPE(stack, phi->var) != ssa->var_info[phi->ssa_var].type)) { + goto jit_failure; + } + SET_STACK_REF_EX(stack, phi->var, ra[phi->ssa_var].ref, ZREG_STORE); + } else { + /* Register has to be written back on side exit */ + SET_STACK_REF(stack, phi->var, ra[phi->ssa_var].ref); + } + } + phi = phi->next; + } + } +#endif // if (trace_buffer->stop != ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { -// if (ra && zend_jit_trace_stack_needs_deoptimization(stack, op_array->last_var + op_array->T)) { +// if (ra && dzend_jit_trace_stack_needs_deoptimization(stack, op_array->last_var + op_array->T)) { // uint32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); // // timeout_exit_addr = zend_jit_trace_get_exit_addr(exit_point); @@ -4271,6 +4831,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par // } // } +#ifndef ZEND_JIT_IR if (ra && trace_buffer->stop != ZEND_JIT_TRACE_STOP_LOOP) { int last_var = op_array->last_var; @@ -4278,14 +4839,15 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par last_var += op_array->T; } for (i = 0; i < last_var; i++) { - if (ra && ra[i] && (ra[i]->flags & ZREG_LOAD) != 0) { + if (ra && RA_HAS_REG(i) && (RA_REG_FLAGS(i) & ZREG_LOAD) != 0) { SET_STACK_REG_EX(stack, i, ra[i]->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[i].type, i, ra[i]->reg)) { + if (!zend_jit_load_var(&ctx, ssa->var_info[i].type, i, ra[i]->reg)) { goto jit_failure; } } } } +#endif } ssa_op = (JIT_G(opt_level) >= ZEND_JIT_LEVEL_INLINE) ? ssa->ops : NULL; @@ -4344,7 +4906,6 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (JIT_G(opt_level) >= ZEND_JIT_LEVEL_INLINE) { - gen_handler = 0; switch (opline->opcode) { case ZEND_PRE_INC: case ZEND_PRE_DEC: @@ -4360,8 +4921,10 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (opline->result_type != IS_UNUSED) { res_use_info = zend_jit_trace_type_to_info( - STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))) - & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))); + if (opline->result_type == IS_CV) { + res_use_info &= (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + } res_info = RES_INFO(); res_addr = RES_REG_ADDR(); } else { @@ -4374,7 +4937,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && !has_concrete_type(op1_def_info)) { op1_def_info &= ~MAY_BE_GUARD; } - if (!zend_jit_inc_dec(&dasm_state, opline, + if (!zend_jit_inc_dec(&ctx, opline, op1_info, OP1_REG_ADDR(), op1_def_info, OP1_DEF_REG_ADDR(), res_use_info, res_info, @@ -4389,11 +4952,25 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (opline->result_type != IS_UNUSED) { ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; } +#ifdef ZEND_JIT_IR + } else if ((op1_def_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_DOUBLE|MAY_BE_GUARD) + && !(op1_info & MAY_BE_STRING)) { + ssa->var_info[ssa_op->op1_def].type &= ~MAY_BE_GUARD; + if (opline->result_type != IS_UNUSED) { + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; + } +#endif } if (opline->result_type != IS_UNUSED && (res_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_LONG|MAY_BE_GUARD) && !(op1_info & MAY_BE_STRING)) { ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; +#ifdef ZEND_JIT_IR + } else if (opline->result_type != IS_UNUSED + && (res_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_DOUBLE|MAY_BE_GUARD) + && !(res_info & MAY_BE_STRING)) { + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; +#endif } goto done; case ZEND_BW_OR: @@ -4406,9 +4983,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par CHECK_OP1_TRACE_TYPE(); op2_info = OP2_INFO(); CHECK_OP2_TRACE_TYPE(); +#ifndef ZEND_JIT_IR if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { break; } +#endif if (!(op1_info & MAY_BE_LONG) || !(op2_info & MAY_BE_LONG)) { break; @@ -4419,16 +4998,18 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par send_result = 1; res_use_info = -1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } else { res_use_info = zend_jit_trace_type_to_info( - STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))) - & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))); + if (opline->result_type == IS_CV) { + res_use_info &= (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + } } res_info = RES_INFO(); - if (!zend_jit_long_math(&dasm_state, opline, + if (!zend_jit_long_math(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), res_use_info, res_info, res_addr, @@ -4444,12 +5025,27 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); op2_info = OP2_INFO(); op2_addr = OP2_REG_ADDR(); +#ifdef ZEND_JIT_IR + if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { + break; + } + if (opline->opcode == ZEND_ADD && + (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY && + (op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) { + /* pass */ + } else if (!(op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) || + !(op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { + break; + } +#endif if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE) && opline->op1_type == IS_CV +#ifndef ZEND_JIT_IR && (Z_MODE(op2_addr) != IS_REG || Z_REG(op2_addr) != ZREG_FCARG1) +#endif && (orig_op2_type == IS_UNKNOWN || !(orig_op2_type & IS_TRACE_REFERENCE))) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4462,9 +5058,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op2_type != IS_UNKNOWN && (orig_op2_type & IS_TRACE_REFERENCE) && opline->op2_type == IS_CV +#ifndef ZEND_JIT_IR && (Z_MODE(op1_addr) != IS_REG || Z_REG(op1_addr) != ZREG_FCARG1) +#endif && (orig_op1_type == IS_UNKNOWN || !(orig_op1_type & IS_TRACE_REFERENCE))) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op2_type, &op2_info, &op2_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op2_type, &op2_info, &op2_addr, !ssa->var_info[ssa_op->op2_use].guarded_reference, 1)) { goto jit_failure; } @@ -4474,6 +5072,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { CHECK_OP2_TRACE_TYPE(); } +#ifndef ZEND_JIT_IR if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { break; } @@ -4485,33 +5084,50 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par !(op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { break; } +#endif res_addr = RES_REG_ADDR(); if (Z_MODE(res_addr) != IS_REG && zend_jit_trace_next_is_send_result(opline, p, frame)) { send_result = 1; res_use_info = -1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } else { res_use_info = zend_jit_trace_type_to_info( - STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))) - & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))); + if (opline->result_type == IS_CV) { + res_use_info &= (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + } } res_info = RES_INFO(); if (opline->opcode == ZEND_ADD && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY && (op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) { - if (!zend_jit_add_arrays(&dasm_state, opline, op1_info, op1_addr, op2_info, op2_addr, res_addr)) { + if (!zend_jit_add_arrays(&ctx, opline, op1_info, op1_addr, op2_info, op2_addr, res_addr)) { goto jit_failure; } } else { - if (!zend_jit_math(&dasm_state, opline, + bool may_overflow = (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (res_info & (MAY_BE_DOUBLE|MAY_BE_GUARD)) && zend_may_overflow(opline, ssa_op, op_array, ssa); + +#ifdef ZEND_JIT_IR + if (ra + && may_overflow + && ((res_info & MAY_BE_GUARD) + && (res_info & MAY_BE_ANY) == MAY_BE_LONG) + && ((opline->opcode == ZEND_ADD + && Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) + || (opline->opcode == ZEND_SUB + && Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1))) { + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); + } +#endif + if (!zend_jit_math(&ctx, opline, op1_info, op1_addr, op2_info, op2_addr, res_use_info, res_info, res_addr, - (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (res_info & (MAY_BE_DOUBLE|MAY_BE_GUARD)) && zend_may_overflow(opline, ssa_op, op_array, ssa), + may_overflow, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } @@ -4540,11 +5156,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (zend_jit_trace_next_is_send_result(opline, p, frame)) { send_result = 1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } - if (!zend_jit_concat(&dasm_state, opline, + if (!zend_jit_concat(&ctx, opline, op1_info, op2_info, res_addr, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -4567,7 +5183,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && !has_concrete_type(op1_def_info)) { op1_def_info &= ~MAY_BE_GUARD; } - if (!zend_jit_assign_op(&dasm_state, opline, + if (!zend_jit_assign_op(&ctx, opline, op1_info, op1_def_info, OP1_RANGE(), op2_info, OP2_RANGE(), (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (op1_def_info & (MAY_BE_DOUBLE|MAY_BE_GUARD)) && zend_may_overflow(opline, ssa_op, op_array, ssa), @@ -4602,7 +5218,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (opline->op1_type == IS_VAR) { if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4612,7 +5228,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4628,7 +5244,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_data_info = OP1_DATA_INFO(); CHECK_OP1_DATA_TRACE_TYPE(); op1_def_info = OP1_DEF_INFO(); - if (!zend_jit_assign_dim_op(&dasm_state, opline, + if (!zend_jit_assign_dim_op(&ctx, opline, op1_info, op1_def_info, op1_addr, op2_info, op1_data_info, OP1_DATA_RANGE(), val_type, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info))) { @@ -4667,7 +5283,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { op1_indirect = 1; - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4675,7 +5291,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4709,7 +5325,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par on_this = op_array->opcodes[op_array_ssa->vars[op_array_ssa->ops[opline-op_array->opcodes].op1_use].definition].opcode == ZEND_FETCH_THIS; } } - if (!zend_jit_incdec_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_incdec_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, op1_indirect, ce, ce_is_instanceof, on_this, delayed_fetch_this, op1_ce, val_type)) { @@ -4758,7 +5374,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { op1_indirect = 1; - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4766,7 +5382,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4802,7 +5418,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } op1_data_info = OP1_DATA_INFO(); CHECK_OP1_DATA_TRACE_TYPE(); - if (!zend_jit_assign_obj_op(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_assign_obj_op(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, op1_data_info, OP1_DATA_RANGE(), op1_indirect, ce, ce_is_instanceof, on_this, delayed_fetch_this, op1_ce, val_type)) { @@ -4838,7 +5454,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { op1_indirect = 1; - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4846,7 +5462,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4882,7 +5498,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } op1_data_info = OP1_DATA_INFO(); CHECK_OP1_DATA_TRACE_TYPE(); - if (!zend_jit_assign_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_assign_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, op1_data_info, op1_indirect, ce, ce_is_instanceof, on_this, delayed_fetch_this, op1_ce, val_type, @@ -4908,7 +5524,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT) && opline->result_type == IS_UNUSED) { - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4918,7 +5534,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4933,7 +5549,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par CHECK_OP2_TRACE_TYPE(); op1_data_info = OP1_DATA_INFO(); CHECK_OP1_DATA_TRACE_TYPE(); - if (!zend_jit_assign_dim(&dasm_state, opline, + if (!zend_jit_assign_dim(&ctx, opline, op1_info, op1_addr, op2_info, op1_data_info, val_type, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info))) { goto jit_failure; @@ -4950,6 +5566,10 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } op2_addr = OP2_REG_ADDR(); op2_info = OP2_INFO(); +#ifdef ZEND_JIT_IR + zend_jit_addr ref_addr = 0; +#endif + if (ra && ssa_op->op2_def >= 0 && (!ssa->vars[ssa_op->op2_def].no_val @@ -4965,7 +5585,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (op1_type != IS_UNKNOWN && (op1_info & MAY_BE_GUARD)) { if (op1_type < IS_STRING && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) != (op1_def_info & (MAY_BE_ANY|MAY_BE_UNDEF))) { - if (!zend_jit_scalar_type_guard(&dasm_state, opline, opline->op1.var)) { + if (!zend_jit_scalar_type_guard(&ctx, opline, opline->op1.var)) { goto jit_failure; } op1_info &= ~(MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF|MAY_BE_GUARD); @@ -4983,10 +5603,18 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN) { if (orig_op1_type & IS_TRACE_REFERENCE) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, +#ifndef ZEND_JIT_IR + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 0)) { goto jit_failure; } +#else + if (!zend_jit_guard_reference(&ctx, opline, &op1_addr, &ref_addr, + !ssa->var_info[ssa_op->op1_use].guarded_reference)) { + goto jit_failure; + } + op1_info &= ~MAY_BE_REF; +#endif if (opline->op1_type == IS_CV && ssa->vars[ssa_op->op1_def].alias == NO_ALIAS) { ssa->var_info[ssa_op->op1_def].guarded_reference = 1; @@ -4999,18 +5627,20 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && zend_jit_trace_next_is_send_result(opline, p, frame)) { send_result = 1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } } - if (!zend_jit_assign_to_typed_ref(&dasm_state, opline, opline->op2_type, op2_addr, res_addr, 1)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_assign_to_typed_ref(&ctx, opline, opline->op2_type, op2_addr, res_addr, 1)) { goto jit_failure; } +#endif op1_def_addr = op1_addr; op1_def_info &= ~MAY_BE_REF; } else if (op1_info & MAY_BE_REF) { - if (!zend_jit_noref_guard(&dasm_state, opline, op1_addr)) { + if (!zend_jit_noref_guard(&ctx, opline, op1_addr)) { goto jit_failure; } op1_info &= ~MAY_BE_REF; @@ -5027,16 +5657,19 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && zend_jit_trace_next_is_send_result(opline, p, frame)) { send_result = 1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } } - if (!zend_jit_assign(&dasm_state, opline, + if (!zend_jit_assign(&ctx, opline, op1_info, op1_addr, op1_def_info, op1_def_addr, op2_info, op2_addr, op2_def_addr, res_info, res_addr, +#ifdef ZEND_JIT_IR + ref_addr, +#endif zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info))) { goto jit_failure; } @@ -5064,8 +5697,10 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par CHECK_OP1_TRACE_TYPE(); res_info = RES_INFO(); res_use_info = zend_jit_trace_type_to_info( - STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))) - & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))); + if (opline->result_type == IS_CV) { + res_use_info &= (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + } res_addr = RES_REG_ADDR(); if (Z_MODE(res_addr) != IS_REG && STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var)) != @@ -5073,7 +5708,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par /* type may be not set */ res_use_info |= MAY_BE_NULL; } - if (!zend_jit_qm_assign(&dasm_state, opline, + if (!zend_jit_qm_assign(&ctx, opline, op1_info, op1_addr, op1_def_addr, res_use_info, res_info, res_addr)) { goto jit_failure; @@ -5088,7 +5723,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par case ZEND_INIT_FCALL_BY_NAME: case ZEND_INIT_NS_FCALL_BY_NAME: frame_flags = TRACE_FRAME_MASK_NESTED; - if (!zend_jit_init_fcall(&dasm_state, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, p + 1, peek_checked_stack - checked_stack)) { + if (!zend_jit_init_fcall(&ctx, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, p + 1, peek_checked_stack - checked_stack)) { goto jit_failure; } goto done; @@ -5104,7 +5739,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } op1_info = OP1_INFO(); CHECK_OP1_TRACE_TYPE(); - if (!zend_jit_send_val(&dasm_state, opline, + if (!zend_jit_send_val(&ctx, opline, op1_info, OP1_REG_ADDR())) { goto jit_failure; } @@ -5125,7 +5760,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par break; } op1_info = OP1_INFO(); - if (!zend_jit_send_ref(&dasm_state, opline, op_array, + if (!zend_jit_send_ref(&ctx, opline, op_array, op1_info, 0)) { goto jit_failure; } @@ -5151,14 +5786,16 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (ra && ssa_op->op1_def >= 0 - && !ssa->vars[ssa_op->op1_def].no_val) { + && (!ssa->vars[ssa_op->op1_def].no_val + || STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var)) == IS_UNKNOWN + || STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var)) >= IS_STRING)) { op1_def_addr = OP1_DEF_REG_ADDR(); } else { op1_def_addr = op1_addr; } op1_info = OP1_INFO(); CHECK_OP1_TRACE_TYPE(); - if (!zend_jit_send_var(&dasm_state, opline, op_array, + if (!zend_jit_send_var(&ctx, opline, op_array, op1_info, op1_addr, op1_def_addr)) { goto jit_failure; } @@ -5193,7 +5830,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par TRACE_FRAME_SET_LAST_SEND_UNKNOWN(JIT_G(current_frame)->call); break; } - if (!zend_jit_check_func_arg(&dasm_state, opline)) { + if (!zend_jit_check_func_arg(&ctx, opline)) { goto jit_failure; } goto done; @@ -5202,7 +5839,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && JIT_G(current_frame)->call) { TRACE_FRAME_SET_UNKNOWN_NUM_ARGS(JIT_G(current_frame)->call); } - if (!zend_jit_check_undef_args(&dasm_state, opline)) { + if (!zend_jit_check_undef_args(&ctx, opline)) { goto jit_failure; } goto done; @@ -5210,7 +5847,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par case ZEND_DO_ICALL: case ZEND_DO_FCALL_BY_NAME: case ZEND_DO_FCALL: - if (!zend_jit_do_fcall(&dasm_state, opline, op_array, op_array_ssa, frame->call_level, -1, p + 1)) { + if (!zend_jit_do_fcall(&ctx, opline, op_array, op_array_ssa, frame->call_level, -1, p + 1)) { goto jit_failure; } goto done; @@ -5234,7 +5871,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par uint32_t exit_point; if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } exit_point = zend_jit_trace_get_exit_point(exit_opline, 0); exit_addr = zend_jit_trace_get_exit_addr(exit_point); @@ -5242,7 +5883,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par goto jit_failure; } smart_branch_opcode = exit_if_true ? ZEND_JMPNZ : ZEND_JMPZ; - if (!zend_jit_cmp(&dasm_state, opline, + if (!zend_jit_cmp(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), RES_REG_ADDR(), @@ -5254,7 +5895,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { smart_branch_opcode = 0; exit_addr = NULL; - if (!zend_jit_cmp(&dasm_state, opline, + if (!zend_jit_cmp(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), RES_REG_ADDR(), @@ -5282,7 +5923,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par uint32_t exit_point; if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } exit_point = zend_jit_trace_get_exit_point(exit_opline, 0); exit_addr = zend_jit_trace_get_exit_addr(exit_point); @@ -5293,7 +5938,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par exit_if_true = !exit_if_true; } smart_branch_opcode = exit_if_true ? ZEND_JMPNZ : ZEND_JMPZ; - if (!zend_jit_identical(&dasm_state, opline, + if (!zend_jit_identical(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), RES_REG_ADDR(), @@ -5305,7 +5950,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { smart_branch_opcode = 0; exit_addr = NULL; - if (!zend_jit_identical(&dasm_state, opline, + if (!zend_jit_identical(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), RES_REG_ADDR(), @@ -5330,7 +5975,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par smart_branch_opcode = 0; exit_addr = NULL; } - if (!zend_jit_defined(&dasm_state, opline, smart_branch_opcode, -1, -1, exit_addr)) { + if (!zend_jit_defined(&ctx, opline, smart_branch_opcode, -1, -1, exit_addr)) { goto jit_failure; } goto done; @@ -5347,7 +5992,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par uint32_t exit_point; if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } exit_point = zend_jit_trace_get_exit_point(exit_opline, 0); exit_addr = zend_jit_trace_get_exit_addr(exit_point); @@ -5359,7 +6008,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par smart_branch_opcode = 0; exit_addr = NULL; } - if (!zend_jit_type_check(&dasm_state, opline, op1_info, smart_branch_opcode, -1, -1, exit_addr)) { + if (!zend_jit_type_check(&ctx, opline, op1_info, smart_branch_opcode, -1, -1, exit_addr)) { goto jit_failure; } goto done; @@ -5376,7 +6025,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par || !op_array->function_name // TODO: support for IS_UNDEF ??? || (op1_info & MAY_BE_UNDEF)) { - if (!zend_jit_trace_handler(&dasm_state, op_array, opline, zend_may_throw(opline, ssa_op, op_array, ssa), p + 1)) { + if (!zend_jit_trace_handler(&ctx, op_array, opline, zend_may_throw(opline, ssa_op, op_array, ssa), p + 1)) { goto jit_failure; } } else { @@ -5384,13 +6033,13 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par int may_throw = 0; bool left_frame = 0; - if (!zend_jit_return(&dasm_state, opline, op_array, + if (!zend_jit_return(&ctx, opline, op_array, op1_info, OP1_REG_ADDR())) { goto jit_failure; } if (op_array->last_var > 100) { /* To many CVs to unroll */ - if (!zend_jit_free_cvs(&dasm_state)) { + if (!zend_jit_free_cvs(&ctx)) { goto jit_failure; } left_frame = 1; @@ -5416,11 +6065,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { if (!left_frame) { left_frame = 1; - if (!zend_jit_leave_frame(&dasm_state)) { + if (!zend_jit_leave_frame(&ctx)) { goto jit_failure; } } - if (!zend_jit_free_cv(&dasm_state, info, j)) { + if (!zend_jit_free_cv(&ctx, info, j)) { goto jit_failure; } if (info & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_ARRAY|MAY_BE_ARRAY_OF_RESOURCE)) { @@ -5431,7 +6080,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } } } - if (!zend_jit_leave_func(&dasm_state, op_array, opline, op1_info, left_frame, + if (!zend_jit_leave_func(&ctx, op_array, opline, op1_info, left_frame, p + 1, &zend_jit_traces[ZEND_JIT_TRACE_NUM], (op_array_ssa->cfg.flags & ZEND_FUNC_INDIRECT_VAR_ACCESS) != 0, may_throw)) { goto jit_failure; @@ -5442,7 +6091,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par case ZEND_BOOL_NOT: op1_info = OP1_INFO(); CHECK_OP1_TRACE_TYPE(); - if (!zend_jit_bool_jmpznz(&dasm_state, opline, + if (!zend_jit_bool_jmpznz(&ctx, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR(), -1, -1, zend_may_throw(opline, ssa_op, op_array, ssa), @@ -5480,7 +6129,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par ZEND_UNREACHABLE(); } if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } if (!(op1_info & MAY_BE_GUARD) && has_concrete_type(op1_info) @@ -5513,7 +6166,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { res_addr = RES_REG_ADDR(); } - if (!zend_jit_bool_jmpznz(&dasm_state, opline, + if (!zend_jit_bool_jmpznz(&ctx, opline, op1_info, OP1_REG_ADDR(), res_addr, -1, -1, zend_may_throw(opline, ssa_op, op_array, ssa), @@ -5530,7 +6183,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5555,7 +6208,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par smart_branch_opcode = 0; exit_addr = NULL; } - if (!zend_jit_isset_isempty_cv(&dasm_state, opline, + if (!zend_jit_isset_isempty_cv(&ctx, opline, op1_info, op1_addr, smart_branch_opcode, -1, -1, exit_addr)) { goto jit_failure; @@ -5585,7 +6238,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par smart_branch_opcode = 0; exit_addr = NULL; } - if (!zend_jit_in_array(&dasm_state, opline, + if (!zend_jit_in_array(&ctx, opline, op1_info, op1_addr, smart_branch_opcode, -1, -1, exit_addr)) { goto jit_failure; @@ -5606,7 +6259,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5646,7 +6299,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } } } - if (!zend_jit_fetch_dim_read(&dasm_state, opline, ssa, ssa_op, + if (!zend_jit_fetch_dim_read(&ctx, opline, ssa, ssa_op, op1_info, op1_addr, avoid_refcounting, op2_info, res_info, RES_REG_ADDR(), val_type)) { goto jit_failure; @@ -5669,7 +6322,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (opline->op1_type == IS_VAR) { if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -5679,7 +6332,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5693,7 +6346,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op2_info = OP2_INFO(); CHECK_OP2_TRACE_TYPE(); op1_def_info = OP1_DEF_INFO(); - if (!zend_jit_fetch_dim(&dasm_state, opline, + if (!zend_jit_fetch_dim(&ctx, opline, op1_info, op1_addr, op2_info, RES_REG_ADDR(), val_type)) { goto jit_failure; } @@ -5713,7 +6366,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5732,7 +6385,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par uint32_t exit_point; if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } if (ssa_op->op1_use >= 0 && ssa->var_info[ssa_op->op1_use].avoid_refcounting) { @@ -5772,7 +6429,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_info &= ~MAY_BE_ARRAY_PACKED; } } - if (!zend_jit_isset_isempty_dim(&dasm_state, opline, + if (!zend_jit_isset_isempty_dim(&ctx, opline, op1_info, op1_addr, avoid_refcounting, op2_info, val_type, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info), @@ -5819,7 +6476,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { op1_indirect = 1; - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -5827,7 +6484,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5865,7 +6522,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par on_this = op_array->opcodes[op_array_ssa->vars[op_array_ssa->ops[opline-op_array->opcodes].op1_use].definition].opcode == ZEND_FETCH_THIS; } } - if (!zend_jit_fetch_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_fetch_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, op1_indirect, ce, ce_is_instanceof, on_this, delayed_fetch_this, avoid_refcounting, op1_ce, val_type, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, MAY_BE_STRING))) { @@ -5884,7 +6541,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (ssa->vars[ssa_op->op1_def].alias == NO_ALIAS) { ssa->var_info[ssa_op->op1_def].guarded_reference = 1; } - if (!zend_jit_bind_global(&dasm_state, opline, op1_info)) { + if (!zend_jit_bind_global(&ctx, opline, op1_info)) { goto jit_failure; } if ((opline+1)->opcode == ZEND_BIND_GLOBAL) { @@ -5898,7 +6555,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par ssa_op = orig_ssa_op; goto done; case ZEND_RECV: - if (!zend_jit_recv(&dasm_state, opline, op_array)) { + if (!zend_jit_recv(&ctx, opline, op_array)) { goto jit_failure; } goto done; @@ -5906,7 +6563,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par orig_opline = opline; orig_ssa_op = ssa_op; while (1) { - if (!zend_jit_recv_init(&dasm_state, opline, op_array, + if (!zend_jit_recv_init(&ctx, opline, op_array, (opline + 1)->opcode != ZEND_RECV_INIT, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -5924,7 +6581,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par case ZEND_FREE: case ZEND_FE_FREE: op1_info = OP1_INFO(); - if (!zend_jit_free(&dasm_state, opline, op1_info, + if (!zend_jit_free(&ctx, opline, op1_info, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } @@ -5935,7 +6592,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if ((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_echo(&dasm_state, opline, op1_info)) { + if (!zend_jit_echo(&ctx, opline, op1_info)) { goto jit_failure; } goto done; @@ -5943,7 +6600,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_info = OP1_INFO(); op1_addr = OP1_REG_ADDR(); if (orig_op1_type == (IS_TRACE_REFERENCE|IS_STRING)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5957,7 +6614,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par break; } } - if (!zend_jit_strlen(&dasm_state, opline, op1_info, op1_addr, RES_REG_ADDR())) { + if (!zend_jit_strlen(&ctx, opline, op1_info, op1_addr, RES_REG_ADDR())) { goto jit_failure; } goto done; @@ -5965,7 +6622,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_info = OP1_INFO(); op1_addr = OP1_REG_ADDR(); if (orig_op1_type == (IS_TRACE_REFERENCE|IS_ARRAY)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5979,7 +6636,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par break; } } - if (!zend_jit_count(&dasm_state, opline, op1_info, op1_addr, RES_REG_ADDR(), zend_may_throw(opline, ssa_op, op_array, ssa))) { + if (!zend_jit_count(&ctx, opline, op1_info, op1_addr, RES_REG_ADDR(), zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } goto done; @@ -5991,14 +6648,14 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par delayed_fetch_this = 1; } } - if (!zend_jit_fetch_this(&dasm_state, opline, op_array, delayed_fetch_this)) { + if (!zend_jit_fetch_this(&ctx, opline, op_array, delayed_fetch_this)) { goto jit_failure; } goto done; case ZEND_SWITCH_LONG: case ZEND_SWITCH_STRING: case ZEND_MATCH: - if (!zend_jit_switch(&dasm_state, opline, op_array, op_array_ssa, p+1, &zend_jit_traces[ZEND_JIT_TRACE_NUM])) { + if (!zend_jit_switch(&ctx, opline, op_array, op_array_ssa, p+1, &zend_jit_traces[ZEND_JIT_TRACE_NUM])) { goto jit_failure; } goto done; @@ -6021,7 +6678,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par /* TODO May need reference unwrapping. */ break; } - if (!zend_jit_verify_return_type(&dasm_state, opline, op_array, op1_info)) { + if (!zend_jit_verify_return_type(&ctx, opline, op_array, op1_info)) { goto jit_failure; } goto done; @@ -6031,7 +6688,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if ((op1_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) != MAY_BE_ARRAY) { break; } - if (!zend_jit_fe_reset(&dasm_state, opline, op1_info)) { + if (!zend_jit_fe_reset(&ctx, opline, op1_info)) { goto jit_failure; } goto done; @@ -6063,13 +6720,13 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { ZEND_UNREACHABLE(); } - if (!zend_jit_fe_fetch(&dasm_state, opline, op1_info, OP2_INFO(), + if (!zend_jit_fe_fetch(&ctx, opline, op1_info, OP2_INFO(), -1, smart_branch_opcode, exit_addr)) { goto jit_failure; } goto done; case ZEND_FETCH_CONSTANT: - if (!zend_jit_fetch_constant(&dasm_state, opline, op_array, ssa, ssa_op, RES_REG_ADDR())) { + if (!zend_jit_fetch_constant(&ctx, opline, op_array, ssa, ssa_op, RES_REG_ADDR())) { goto jit_failure; } goto done; @@ -6092,10 +6749,12 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (polymorphic_side_trace) { op1_info = MAY_BE_OBJECT; +#ifndef ZEND_JIT_IR op1_addr = 0; +#endif } else if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -6130,11 +6789,16 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } } frame_flags = TRACE_FRAME_MASK_NESTED; - if (!zend_jit_init_method_call(&dasm_state, opline, + if (!zend_jit_init_method_call(&ctx, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, op1_info, op1_addr, ce, ce_is_instanceof, on_this, delayed_fetch_this, op1_ce, - p + 1, peek_checked_stack - checked_stack, polymorphic_side_trace)) { + p + 1, peek_checked_stack - checked_stack, +#ifdef ZEND_JIT_IR + polymorphic_side_trace ? zend_jit_traces[parent_trace].exit_info[exit_num].poly_func_reg : -1, + polymorphic_side_trace ? zend_jit_traces[parent_trace].exit_info[exit_num].poly_this_reg : -1, +#endif + polymorphic_side_trace)) { goto jit_failure; } goto done; @@ -6145,7 +6809,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op2_info = OP2_INFO(); CHECK_OP2_TRACE_TYPE(); frame_flags = TRACE_FRAME_MASK_NESTED; - if (!zend_jit_init_closure_call(&dasm_state, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, p + 1, peek_checked_stack - checked_stack)) { + if (!zend_jit_init_closure_call(&ctx, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, p + 1, peek_checked_stack - checked_stack)) { goto jit_failure; } goto done; @@ -6164,7 +6828,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if ((op2_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_rope(&dasm_state, opline, op2_info)) { + if (!zend_jit_rope(&ctx, opline, op2_info)) { goto jit_failure; } goto done; @@ -6183,17 +6847,17 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (op2_info & MAY_BE_GUARD) { op2_info = MAY_BE_RC1 | MAY_BE_RCN | MAY_BE_REF | MAY_BE_ANY | MAY_BE_ARRAY_KEY_ANY | MAY_BE_ARRAY_OF_ANY | MAY_BE_ARRAY_OF_REF; } - if (!zend_jit_trace_handler(&dasm_state, op_array, opline, + if (!zend_jit_trace_handler(&ctx, op_array, opline, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info), p + 1)) { goto jit_failure; } if ((p+1)->op == ZEND_JIT_TRACE_INIT_CALL && (p+1)->func) { - if (opline->opcode == ZEND_NEW && ssa_op->result_def >= 0) { + if (opline->opcode == ZEND_NEW && opline->result_type != IS_UNUSED) { SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_OBJECT, 1); } if (zend_jit_may_be_polymorphic_call(opline) || zend_jit_may_be_modified((p+1)->func, op_array)) { - if (!zend_jit_init_fcall_guard(&dasm_state, 0, (p+1)->func, opline+1)) { + if (!zend_jit_init_fcall_guard(&ctx, 0, (p+1)->func, opline+1)) { goto jit_failure; } } @@ -6212,13 +6876,25 @@ done: } if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } +#ifndef ZEND_JIT_IR if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var)) > ZREG_NUM) { SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); } +#else + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && STACK_FLAGS(stack, EX_VAR_TO_NUM(opline->op1.var)) & (ZREG_ZVAL_ADDREF|ZREG_THIS)) { + SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); + } + +#endif if (opline->opcode == ZEND_ROPE_INIT) { /* clear stack slots used by rope */ @@ -6275,21 +6951,33 @@ done: } } else { SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), type, - (gen_handler || type == IS_UNKNOWN || !ra || !ra[ssa_op->result_def])); + (gen_handler || type == IS_UNKNOWN || !ra || !RA_HAS_REG(ssa_op->result_def))); if (ssa->var_info[ssa_op->result_def].type & MAY_BE_INDIRECT) { RESET_STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var)); } if (type != IS_UNKNOWN) { ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; +#ifndef ZEND_JIT_IR if (opline->opcode == ZEND_FETCH_THIS && delayed_fetch_this) { SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_THIS); } else if (ssa->var_info[ssa_op->result_def].avoid_refcounting) { SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_ZVAL_TRY_ADDREF); - } else if (ra && ra[ssa_op->result_def]) { + } else if (ra && RA_HAS_REG(ssa_op->result_def)) { SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ra[ssa_op->result_def]->reg, - ra[ssa_op->result_def]->flags & ZREG_STORE); + RA_REG_FLAGS(ssa_op->result_def) & ZREG_STORE); } +#else + if (opline->opcode == ZEND_FETCH_THIS + && delayed_fetch_this) { + SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_NONE, ZREG_THIS); + } else if (ssa->var_info[ssa_op->result_def].avoid_refcounting) { + SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_NONE, ZREG_ZVAL_ADDREF); + } else if (ra && RA_HAS_REG(ssa_op->result_def)) { + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->result.var), ra[ssa_op->result_def].ref, + RA_REG_FLAGS(ssa_op->result_def) & ZREG_STORE); + } +#endif } } @@ -6331,12 +7019,12 @@ done: } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), type, (gen_handler || type == IS_UNKNOWN || !ra || - (!ra[ssa_op->op1_def] && + (!RA_HAS_REG(ssa_op->op1_def) && (opline->opcode == ZEND_ASSIGN || !ssa->vars[ssa_op->op1_def].no_val)))); if (type != IS_UNKNOWN) { ssa->var_info[ssa_op->op1_def].type &= ~MAY_BE_GUARD; - if (ra && ra[ssa_op->op1_def]) { - uint8_t flags = ra[ssa_op->op1_def]->flags & ZREG_STORE; + if (ra && RA_HAS_REG(ssa_op->op1_def)) { + uint8_t flags = RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE; if (ssa_op->op1_use >= 0) { if (opline->opcode == ZEND_SEND_VAR @@ -6346,12 +7034,16 @@ done: || opline->opcode == ZEND_COALESCE || opline->opcode == ZEND_JMP_NULL || opline->opcode == ZEND_FE_RESET_R) { - if (!ra[ssa_op->op1_use]) { + if (!RA_HAS_REG(ssa_op->op1_use)) { flags |= ZREG_LOAD; } } } +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def]->reg, flags); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def].ref, flags); +#endif } } if (type == IS_LONG @@ -6378,21 +7070,28 @@ done: } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op2.var), type, (gen_handler || type == IS_UNKNOWN || !ra || - (!ra[ssa_op->op2_def] /*&& !ssa->vars[ssa_op->op2_def].no_val*/))); + (!RA_HAS_REG(ssa_op->op2_def) /*&& !ssa->vars[ssa_op->op2_def].no_val*/))); if (type != IS_UNKNOWN) { ssa->var_info[ssa_op->op2_def].type &= ~MAY_BE_GUARD; - if (ra && ra[ssa_op->op2_def]) { - uint8_t flags = ra[ssa_op->op2_def]->flags & ZREG_STORE; + if (ra && RA_HAS_REG(ssa_op->op2_def)) { + uint8_t flags = RA_REG_FLAGS(ssa_op->op2_def) & ZREG_STORE; if (ssa_op->op2_use >= 0) { if (opline->opcode == ZEND_ASSIGN) { - if (!ra[ssa_op->op2_use] - || ra[ssa_op->op2_use]->reg != ra[ssa_op->op2_def]->reg) { + if (!RA_HAS_REG(ssa_op->op2_use) +#ifndef ZEND_JIT_IR + || ra[ssa_op->op2_use]->reg != ra[ssa_op->op2_def]->reg +#endif + ) { flags |= ZREG_LOAD; } } } +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->op2.var), ra[ssa_op->op2_def]->reg, flags); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->op2.var), ra[ssa_op->op2_def].ref, flags); +#endif } } if (type == IS_LONG @@ -6430,12 +7129,17 @@ done: type = STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var)); } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), type, - (gen_handler || type == IS_UNKNOWN || !ra || !ra[ssa_op->op1_def])); + (gen_handler || type == IS_UNKNOWN || !ra || !RA_HAS_REG(ssa_op->op1_def))); if (type != IS_UNKNOWN) { ssa->var_info[ssa_op->op1_def].type &= ~MAY_BE_GUARD; - if (ra && ra[ssa_op->op1_def]) { + if (ra && RA_HAS_REG(ssa_op->op1_def)) { +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def]->reg, - ra[ssa_op->op1_def]->flags & ZREG_STORE); + RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def].ref, + RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE); +#endif } } if (type == IS_LONG @@ -6461,10 +7165,15 @@ done: type = concrete_type(ssa->var_info[ssa_op->result_def].type); } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), type, - (gen_handler || !ra || !ra[ssa_op->result_def])); - if (ra && ra[ssa_op->result_def]) { + (gen_handler || !ra || !RA_HAS_REG(ssa_op->result_def))); + if (ra && RA_HAS_REG(ssa_op->result_def)) { +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ra[ssa_op->result_def]->reg, - ra[ssa_op->result_def]->flags & ZREG_STORE); + RA_REG_FLAGS(ssa_op->result_def) & ZREG_STORE); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->result.var), ra[ssa_op->result_def].ref, + RA_REG_FLAGS(ssa_op->result_def) & ZREG_STORE); +#endif } } ssa_op++; @@ -6483,10 +7192,15 @@ done: type = concrete_type(ssa->var_info[ssa_op->op1_def].type); } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), type, - (gen_handler || !ra || !ra[ssa_op->op1_def])); - if (ra && ra[ssa_op->op1_def]) { + (gen_handler || !ra || !RA_HAS_REG(ssa_op->op1_def))); + if (ra && RA_HAS_REG(ssa_op->op1_def)) { +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def]->reg, - ra[ssa_op->op1_def]->flags & ZREG_STORE); + RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def].ref, + RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE); +#endif } } ssa_op++; @@ -6534,7 +7248,7 @@ done: || (p+1)->op == ZEND_JIT_TRACE_END) && (TRACE_FRAME_NUM_ARGS(call) < 0 || TRACE_FRAME_NUM_ARGS(call) < p->op_array->num_args) - && !zend_jit_trace_opline_guard(&dasm_state, (p+1)->opline)) { + && !zend_jit_trace_opline_guard(&ctx, (p+1)->opline)) { goto jit_failure; } JIT_G(current_frame) = frame; @@ -6553,6 +7267,9 @@ done: TRACE_FRAME_SET_THIS_CHECKED(call); } op_array = (zend_op_array*)p->op_array; +#ifdef ZEND_JIT_IR + ctx.current_op_array = op_array; +#endif jit_extension = (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); op_array_ssa = &jit_extension->func_info.ssa; @@ -6568,27 +7285,37 @@ done: if (ra) { int j = ZEND_JIT_TRACE_GET_FIRST_SSA_VAR(p->info); - for (i = 0; i < op_array->last_var; i++,j++) { - if (ra[j] && (ra[j]->flags & ZREG_LOAD) != 0) { + for (i = 0; i < op_array->last_var; i++, j++) { + if (RA_HAS_REG(j) && (RA_REG_FLAGS(j) & ZREG_LOAD) != 0) { if ((ssa->var_info[j].type & MAY_BE_GUARD) != 0) { uint8_t op_type; ssa->var_info[j].type &= ~MAY_BE_GUARD; op_type = concrete_type(ssa->var_info[j].type); - if (!zend_jit_type_guard(&dasm_state, opline, EX_NUM_TO_VAR(i), op_type)) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(i), op_type)) { goto jit_failure; } SET_STACK_TYPE(stack, i, op_type, 1); } - SET_STACK_REG_EX(stack, i, ra[j]->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[j].type, i, ra[j]->reg)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_load_var(&ctx, ssa->var_info[j].type, i, ra[j]->reg)) { goto jit_failure; } + SET_STACK_REG_EX(stack, i, ra[j]->reg, ZREG_LOAD); +#else + if (!zend_jit_load_var(&ctx, ssa->var_info[j].type, i, j)) { + goto jit_failure; + } + SET_STACK_REF_EX(stack, i, ra[j].ref, ZREG_LOAD); +#endif } } } } else if (p->op == ZEND_JIT_TRACE_BACK) { op_array = (zend_op_array*)p->op_array; +#ifdef ZEND_JIT_IR + ctx.current_op_array = op_array; +#endif jit_extension = (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); op_array_ssa = &jit_extension->func_info.ssa; @@ -6619,11 +7346,18 @@ done: if (ra) { j = ZEND_JIT_TRACE_GET_FIRST_SSA_VAR(p->info); for (i = 0; i < op_array->last_var + op_array->T; i++, j++) { - if (ra[j] && (ra[j]->flags & ZREG_LOAD) != 0) { - SET_STACK_REG_EX(stack, i, ra[j]->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[j].type, i, ra[j]->reg)) { + if (RA_HAS_REG(j) && (RA_REG_FLAGS(j) & ZREG_LOAD) != 0) { +#ifndef ZEND_JIT_IR + if (!zend_jit_load_var(&ctx, ssa->var_info[j].type, i, ra[j]->reg)) { goto jit_failure; } + SET_STACK_REG_EX(stack, i, ra[j]->reg, ZREG_LOAD); +#else + if (!zend_jit_load_var(&ctx, ssa->var_info[j].type, i, j)) { + goto jit_failure; + } + SET_STACK_REF_EX(stack, i, ra[j].ref, ZREG_LOAD); +#endif } } } @@ -6761,7 +7495,7 @@ done: opline = q->opline; ZEND_ASSERT(opline != NULL); } - if (!zend_jit_init_fcall_guard(&dasm_state, + if (!zend_jit_init_fcall_guard(&ctx, ZEND_JIT_TRACE_FAKE_LEVEL(p->info), p->func, opline)) { goto jit_failure; } @@ -6814,9 +7548,15 @@ done: t = &zend_jit_traces[ZEND_JIT_TRACE_NUM]; +#ifndef ZEND_JIT_IR if (!parent_trace && zend_jit_trace_uses_initial_ip()) { t->flags |= ZEND_JIT_TRACE_USES_INITIAL_IP; } +#else + if (!parent_trace && zend_jit_trace_uses_initial_ip(&ctx)) { + t->flags |= ZEND_JIT_TRACE_USES_INITIAL_IP; + } +#endif if (p->stop == ZEND_JIT_TRACE_STOP_LOOP || p->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_CALL @@ -6825,21 +7565,22 @@ done: zend_ssa_phi *phi = ssa->blocks[1].phis; while (phi) { - if (ra[phi->ssa_var] - && ra[phi->sources[1]] + if (RA_HAS_REG(phi->sources[1]) && STACK_MEM_TYPE(stack, phi->var) != STACK_TYPE(stack, phi->var) - && (ra[phi->ssa_var]->flags & (ZREG_LOAD|ZREG_STORE)) == 0 - && (ra[phi->sources[1]]->flags & (ZREG_LOAD|ZREG_STORE)) == 0) { - /* Store actual type to memory to avoid deoptimization mistakes */ - /* TODO: Alternatively, we may try to update alredy generated deoptimization info */ - zend_jit_store_var_type(&dasm_state, phi->var, STACK_TYPE(stack, phi->var)); + && (RA_REG_FLAGS(phi->sources[1]) & (ZREG_LOAD|ZREG_STORE)) == 0) { + + if (!RA_HAS_REG(phi->ssa_var) + || (RA_REG_FLAGS(phi->ssa_var) & (ZREG_LOAD|ZREG_STORE)) == 0) { + /* Store actual type to memory to avoid deoptimization mistakes */ + zend_jit_store_var_type(&ctx, phi->var, STACK_TYPE(stack, phi->var)); + } } phi = phi->next; } } if (p->stop != ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { if ((t->flags & ZEND_JIT_TRACE_USES_INITIAL_IP) - && !zend_jit_set_ip(&dasm_state, p->opline)) { + && !zend_jit_set_ip(&ctx, p->opline)) { goto jit_failure; } } @@ -6864,18 +7605,44 @@ done: goto jit_failure; } } else { +#ifndef ZEND_JIT_IR timeout_exit_addr = dasm_labels[zend_lbinterrupt_handler]; +#else + timeout_exit_addr = zend_jit_stub_handlers[jit_stub_interrupt_handler]; +#endif } } - zend_jit_trace_end_loop(&dasm_state, 0, timeout_exit_addr); /* jump back to start of the trace loop */ +#ifndef ZEND_JIT_IR + zend_jit_trace_end_loop(&ctx, 0, timeout_exit_addr); /* jump back to start of the trace loop */ +#else + zend_jit_trace_end_loop(&ctx, jit->trace_loop_ref, timeout_exit_addr); /* jump back to start of the trace loop */ +#endif } } else if (p->stop == ZEND_JIT_TRACE_STOP_LINK || p->stop == ZEND_JIT_TRACE_STOP_INTERPRETER) { - if (!zend_jit_trace_deoptimization(&dasm_state, 0, NULL, +#ifndef ZEND_JIT_IR + if (!zend_jit_trace_deoptimization(&ctx, 0, NULL, stack, op_array->last_var + op_array->T, NULL, NULL, NULL, 0)) { goto jit_failure; } +#else + if (ra && (p-1)->op != ZEND_JIT_TRACE_ENTER) { + for (i = 0; i < op_array->last_var + op_array->T; i++) { + int32_t ref = STACK_REF(stack, i); + + if (ref) { + uint8_t type = STACK_TYPE(stack, i); + + if (!(STACK_FLAGS(stack, i) & (ZREG_LOAD|ZREG_STORE)) + && !zend_jit_store_ref(jit, 1 << type, i, ref, STACK_MEM_TYPE(stack, i) != type)) { + goto jit_failure; + } + } + CLEAR_STACK_REF(stack, i); + } + } +#endif if (p->stop == ZEND_JIT_TRACE_STOP_LINK) { const void *timeout_exit_addr = NULL; @@ -6890,12 +7657,18 @@ done: goto jit_failure; } if ((zend_jit_traces[t->link].flags & ZEND_JIT_TRACE_USES_INITIAL_IP) - && !zend_jit_set_ip(&dasm_state, p->opline)) { + && !zend_jit_set_ip(&ctx, p->opline)) { goto jit_failure; } +#ifndef ZEND_JIT_IR if (!parent_trace && zend_jit_trace_uses_initial_ip()) { t->flags |= ZEND_JIT_TRACE_USES_INITIAL_IP; } +#else + if (!parent_trace && zend_jit_trace_uses_initial_ip(&ctx)) { + t->flags |= ZEND_JIT_TRACE_USES_INITIAL_IP; + } +#endif if (parent_trace && (zend_jit_traces[t->link].flags & ZEND_JIT_TRACE_CHECK_INTERRUPT) && zend_jit_traces[parent_trace].root == t->link) { @@ -6911,15 +7684,19 @@ done: goto jit_failure; } } else { +#ifndef ZEND_JIT_IR timeout_exit_addr = dasm_labels[zend_lbinterrupt_handler]; +#else + timeout_exit_addr = zend_jit_stub_handlers[jit_stub_interrupt_handler]; +#endif } } - zend_jit_trace_link_to_root(&dasm_state, &zend_jit_traces[t->link], timeout_exit_addr); + zend_jit_trace_link_to_root(&ctx, &zend_jit_traces[t->link], timeout_exit_addr); } else { - zend_jit_trace_return(&dasm_state, 0, NULL); + zend_jit_trace_return(&ctx, 0, NULL); } } else if (p->stop == ZEND_JIT_TRACE_STOP_RETURN) { - zend_jit_trace_return(&dasm_state, 0, NULL); + zend_jit_trace_return(&ctx, 0, NULL); } else { // TODO: not implemented ??? ZEND_ASSERT(0 && p->stop); @@ -6929,13 +7706,17 @@ done: goto jit_failure; } - if (!zend_jit_trace_end(&dasm_state, t)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_trace_end(&ctx, t)) { goto jit_failure; } - handler = dasm_link_and_encode(&dasm_state, NULL, NULL, NULL, NULL, ZSTR_VAL(name), ZEND_JIT_TRACE_NUM, + handler = dasm_link_and_encode(&ctx, NULL, NULL, NULL, NULL, ZSTR_VAL(name), ZEND_JIT_TRACE_NUM, parent_trace ? SP_ADJ_JIT : ((zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) ? SP_ADJ_VM : SP_ADJ_RET), parent_trace ? SP_ADJ_NONE : SP_ADJ_JIT); +#else + handler = zend_jit_finish(&ctx); +#endif if (handler) { if (p->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_CALL) { @@ -6991,7 +7772,11 @@ done: } jit_failure: - dasm_free(&dasm_state); +#ifndef ZEND_JIT_IR + dasm_free(&ctx); +#else + zend_jit_free_ctx(&ctx); +#endif if (name) { zend_string_release(name); @@ -7020,57 +7805,117 @@ jit_cleanup: return handler; } +#ifdef ZEND_JIT_IR +static zend_string *zend_jit_trace_escape_name(uint32_t trace_num, uint32_t exit_num) +{ + smart_str buf = {0}; + + smart_str_appends(&buf," ESCAPE-"); + smart_str_append_long(&buf, (zend_long)trace_num); + smart_str_appendc(&buf, '-'); + smart_str_append_long(&buf, (zend_long)exit_num); + smart_str_0(&buf); + return buf.s; +} +#endif + static const void *zend_jit_trace_exit_to_vm(uint32_t trace_num, uint32_t exit_num) { const void *handler = NULL; - dasm_State* dasm_state = NULL; - void *checkpoint; +#ifndef ZEND_JIT_IR + dasm_State* ctx = NULL; char name[32]; +#else + zend_jit_ctx ctx; + zend_string *name; +#endif + void *checkpoint; const zend_op *opline; uint32_t stack_size; zend_jit_trace_stack *stack; bool original_handler = 0; if (!zend_jit_trace_exit_needs_deoptimization(trace_num, exit_num)) { +#ifndef ZEND_JIT_IR return dasm_labels[zend_lbtrace_escape]; +#else + return zend_jit_stub_handlers[jit_stub_trace_escape]; +#endif } +#ifndef ZEND_JIT_IR checkpoint = zend_arena_checkpoint(CG(arena));; sprintf(name, "ESCAPE-%d-%d", trace_num, exit_num); - dasm_init(&dasm_state, DASM_MAXSECTION); - dasm_setupglobal(&dasm_state, dasm_labels, zend_lb_MAX); - dasm_setup(&dasm_state, dasm_actions); + dasm_init(&ctx, DASM_MAXSECTION); + dasm_setupglobal(&ctx, dasm_labels, zend_lb_MAX); + dasm_setup(&ctx, dasm_actions); - zend_jit_align_func(&dasm_state); + zend_jit_align_func(&ctx); +#else + name = zend_jit_trace_escape_name(trace_num, exit_num); + + if (!zend_jit_deoptimizer_start(&ctx, name, trace_num, exit_num)) { + zend_string_release(name); + return NULL; + } + + checkpoint = zend_arena_checkpoint(CG(arena));; +#endif /* Deoptimization */ stack_size = zend_jit_traces[trace_num].exit_info[exit_num].stack_size; stack = zend_jit_traces[trace_num].stack_map + zend_jit_traces[trace_num].exit_info[exit_num].stack_offset; - if (!zend_jit_trace_deoptimization(&dasm_state, + if (!zend_jit_trace_deoptimization(&ctx, zend_jit_traces[trace_num].exit_info[exit_num].flags, zend_jit_traces[trace_num].exit_info[exit_num].opline, - stack, stack_size, NULL, NULL, NULL, 0)) { + stack, stack_size, NULL, NULL, +#ifndef ZEND_JIT_IR + NULL, +#else + zend_jit_traces[trace_num].constants, + zend_jit_traces[trace_num].exit_info[exit_num].poly_func_reg, +#endif + 0)) { goto jit_failure; } opline = zend_jit_traces[trace_num].exit_info[exit_num].opline; if (opline) { if (opline == zend_jit_traces[zend_jit_traces[trace_num].root].opline) { +#ifndef ZEND_JIT_IR /* prevent endless loop */ original_handler = 1; +#else + zend_jit_op_array_trace_extension *jit_extension = + (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(zend_jit_traces[zend_jit_traces[trace_num].root].op_array); + + if (ZEND_OP_TRACE_INFO(opline, jit_extension->offset)->orig_handler != opline->handler) { + /* prevent endless loop */ + original_handler = 1; + } +#endif } - zend_jit_set_ip_ex(&dasm_state, opline, original_handler); + zend_jit_set_ip_ex(&ctx, opline, original_handler); } - zend_jit_trace_return(&dasm_state, original_handler, opline); + zend_jit_trace_return(&ctx, original_handler, opline); - handler = dasm_link_and_encode(&dasm_state, NULL, NULL, NULL, NULL, name, ZEND_JIT_TRACE_NUM, SP_ADJ_JIT, SP_ADJ_NONE); +#ifndef ZEND_JIT_IR + handler = dasm_link_and_encode(&ctx, NULL, NULL, NULL, NULL, name, ZEND_JIT_TRACE_NUM, SP_ADJ_JIT, SP_ADJ_NONE); +#else + handler = zend_jit_finish(&ctx); +#endif jit_failure: - dasm_free(&dasm_state); +#ifndef ZEND_JIT_IR + dasm_free(&ctx); +#else + zend_jit_free_ctx(&ctx); + zend_string_release(name); +#endif zend_arena_release(&CG(arena), checkpoint); return handler; } @@ -7112,6 +7957,10 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace t->opline = trace_buffer[1].opline; t->exit_info = exit_info; t->stack_map = NULL; +#ifdef ZEND_JIT_IR + t->consts_count = 0; + t->constants = NULL; +#endif orig_trigger = JIT_G(trigger); JIT_G(trigger) = ZEND_JIT_ON_HOT_TRACE; @@ -7134,6 +7983,12 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_NO_SHM; goto exit; } @@ -7146,6 +8001,13 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace zend_jit_trace_stack *shared_stack_map = (zend_jit_trace_stack*)zend_shared_alloc(t->stack_map_size * sizeof(zend_jit_trace_stack)); if (!shared_stack_map) { efree(t->stack_map); + t->stack_map = NULL; +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_NO_SHM; goto exit; } @@ -7154,6 +8016,20 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace t->stack_map = shared_stack_map; } +#ifdef ZEND_JIT_IR + if (t->consts_count) { + zend_jit_exit_const *constants = (zend_jit_exit_const*)zend_shared_alloc(t->consts_count * sizeof(zend_jit_exit_const)); + if (!constants) { + efree(t->constants); + ret = ZEND_JIT_TRACE_STOP_NO_SHM; + goto exit; + } + memcpy(constants, t->constants, t->consts_count * sizeof(zend_jit_exit_const)); + efree(t->constants); + t->constants = constants; + } +#endif + t->exit_counters = ZEND_JIT_EXIT_COUNTERS; ZEND_JIT_EXIT_COUNTERS += t->exit_count; @@ -7169,12 +8045,24 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_TOO_MANY_EXITS; } else { if (t->stack_map) { efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_COMPILER_ERROR; } @@ -7570,6 +8458,13 @@ static void zend_jit_dump_exit_info(zend_jit_trace_info *t) } if (t->exit_info[i].flags & (ZEND_JIT_EXIT_POLYMORPHISM|ZEND_JIT_EXIT_METHOD_CALL|ZEND_JIT_EXIT_CLOSURE_CALL)) { fprintf(stderr, "/POLY"); +#ifdef ZEND_JIT_IR + if (t->exit_info[i].flags & ZEND_JIT_EXIT_METHOD_CALL) { + fprintf(stderr, "(%s, %s)", + t->exit_info[i].poly_func_reg != ZREG_NONE ? zend_reg_name(t->exit_info[i].poly_func_reg) : "?", + t->exit_info[i].poly_this_reg != ZREG_NONE ? zend_reg_name(t->exit_info[i].poly_this_reg) : "?"); + } +#endif } if (t->exit_info[i].flags & ZEND_JIT_EXIT_FREE_OP1) { fprintf(stderr, "/FREE_OP1"); @@ -7588,6 +8483,7 @@ static void zend_jit_dump_exit_info(zend_jit_trace_info *t) } else { fprintf(stderr, "%s", zend_get_type_by_const(type)); } +#ifndef ZEND_JIT_IR if (STACK_REG(stack, j) != ZREG_NONE) { if (STACK_REG(stack, j) < ZREG_NUM) { fprintf(stderr, "(%s)", zend_reg_name[STACK_REG(stack, j)]); @@ -7599,6 +8495,42 @@ static void zend_jit_dump_exit_info(zend_jit_trace_info *t) fprintf(stderr, "(const_%d)", STACK_REG(stack, j) - ZREG_NUM); } } +#else + if (STACK_FLAGS(stack, j) == ZREG_CONST) { + if (type == IS_LONG) { + fprintf(stderr, "(" ZEND_LONG_FMT ")", (zend_long)t->constants[STACK_REF(stack, j)].i); + } else if (type == IS_DOUBLE) { + fprintf(stderr, "(%g)", t->constants[STACK_REF(stack, j)].d); + } else { + ZEND_UNREACHABLE(); + } + } else if (STACK_FLAGS(stack, j) == ZREG_TYPE_ONLY) { + fprintf(stderr, "(type_only)"); + } else if (STACK_FLAGS(stack, j) == ZREG_THIS) { + fprintf(stderr, "(this)"); + } else if (STACK_FLAGS(stack, j) == ZREG_ZVAL_ADDREF) { + fprintf(stderr, "(zval_try_addref)"); + } else if (STACK_FLAGS(stack, j) == ZREG_ZVAL_COPY) { + fprintf(stderr, "zval_copy(%s)", zend_reg_name(STACK_REG(stack, j))); + } else if (STACK_FLAGS(stack, j) & ZREG_SPILL_SLOT) { + if (STACK_REG(stack, j) == ZREG_NONE) { + fprintf(stderr, "(spill=0x%x", STACK_REF(stack, j)); + } else { + fprintf(stderr, "(spill=0x%x(%s)", STACK_REF(stack, j), zend_reg_name(STACK_REG(stack, j))); + } + if (STACK_FLAGS(stack, j) != 0) { + fprintf(stderr, ":%x", STACK_FLAGS(stack, j)); + } + fprintf(stderr, ")"); + } else if (STACK_REG(stack, j) != ZREG_NONE) { + fprintf(stderr, "(%s", zend_reg_name(STACK_REG(stack, j))); + if (STACK_FLAGS(stack, j) != 0) { + fprintf(stderr, ":%x", STACK_FLAGS(stack, j)); + } + fprintf(stderr, ")"); + } +#endif +#ifndef ZEND_JIT_IR } else if (STACK_REG(stack, j) == ZREG_ZVAL_TRY_ADDREF) { fprintf(stderr, " "); zend_dump_var(op_array, (j < op_array->last_var) ? IS_CV : 0, j); @@ -7607,6 +8539,14 @@ static void zend_jit_dump_exit_info(zend_jit_trace_info *t) fprintf(stderr, " "); zend_dump_var(op_array, (j < op_array->last_var) ? IS_CV : 0, j); fprintf(stderr, ":unknown(zval_copy(%s))", zend_reg_name[ZREG_COPY]); +#else + } else if (STACK_FLAGS(stack, j) == ZREG_ZVAL_ADDREF) { + fprintf(stderr, ":unknown(zval_try_addref)"); + } else if (STACK_FLAGS(stack, j) == ZREG_ZVAL_COPY) { + fprintf(stderr, " "); + zend_dump_var(op_array, (j < op_array->last_var) ? IS_CV : 0, j); + fprintf(stderr, ":unknown(zval_copy(%s))", zend_reg_name(STACK_REG(stack, j))); +#endif } } fprintf(stderr, "\n"); @@ -7856,6 +8796,10 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace t->opline = NULL; t->exit_info = exit_info; t->stack_map = NULL; +#ifdef ZEND_JIT_IR + t->consts_count = 0; + t->constants = NULL; +#endif orig_trigger = JIT_G(trigger); JIT_G(trigger) = ZEND_JIT_ON_HOT_TRACE; @@ -7878,6 +8822,12 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_NO_SHM; goto exit; } @@ -7890,6 +8840,13 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace zend_jit_trace_stack *shared_stack_map = (zend_jit_trace_stack*)zend_shared_alloc(t->stack_map_size * sizeof(zend_jit_trace_stack)); if (!shared_stack_map) { efree(t->stack_map); + t->stack_map = NULL; +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_NO_SHM; goto exit; } @@ -7898,6 +8855,20 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace t->stack_map = shared_stack_map; } +#ifdef ZEND_JIT_IR + if (t->consts_count) { + zend_jit_exit_const *constants = (zend_jit_exit_const*)zend_shared_alloc(t->consts_count * sizeof(zend_jit_exit_const)); + if (!constants) { + efree(t->constants); + ret = ZEND_JIT_TRACE_STOP_NO_SHM; + goto exit; + } + memcpy(constants, t->constants, t->consts_count * sizeof(zend_jit_exit_const)); + efree(t->constants); + t->constants = constants; + } +#endif + zend_jit_link_side_trace( zend_jit_traces[parent_num].code_start, zend_jit_traces[parent_num].code_size, @@ -7919,12 +8890,24 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_TOO_MANY_EXITS; } else { if (t->stack_map) { efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_COMPILER_ERROR; } @@ -8119,6 +9102,7 @@ int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf } for (i = 0; i < stack_size; i++) { +#ifndef ZEND_JIT_IR if (STACK_REG(stack, i) != ZREG_NONE) { if (STACK_TYPE(stack, i) == IS_LONG) { zend_long val; @@ -8168,6 +9152,61 @@ int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf ZEND_UNREACHABLE(); } } +#else + if (STACK_FLAGS(stack, i) == ZREG_CONST) { + if (STACK_TYPE(stack, i) == IS_LONG) { + ZVAL_LONG(EX_VAR_NUM(i), (zend_long)t->constants[STACK_REF(stack, i)].i); + } else if (STACK_TYPE(stack, i) == IS_DOUBLE) { + ZVAL_DOUBLE(EX_VAR_NUM(i), t->constants[STACK_REF(stack, i)].d); + } else { + ZEND_UNREACHABLE(); + } + } else if (STACK_FLAGS(stack, i) == ZREG_TYPE_ONLY) { + uint32_t type = STACK_TYPE(stack, i); + if (type <= IS_DOUBLE) { + Z_TYPE_INFO_P(EX_VAR_NUM(i)) = type; + } else { + ZEND_UNREACHABLE(); + } + } else if (STACK_FLAGS(stack, i) == ZREG_THIS) { + zend_object *obj = Z_OBJ(EX(This)); + + GC_ADDREF(obj); + ZVAL_OBJ(EX_VAR_NUM(i), obj); + } else if (STACK_FLAGS(stack, i) == ZREG_ZVAL_ADDREF) { + Z_TRY_ADDREF_P(EX_VAR_NUM(i)); + } else if (STACK_FLAGS(stack, i) == ZREG_ZVAL_COPY) { + zval *val = (zval*)regs->gpr[STACK_REG(stack, i)]; + + if (UNEXPECTED(Z_TYPE_P(val) == IS_UNDEF)) { + /* Undefined array index or property */ + repeat_last_opline = 1; + } else { + ZVAL_COPY(EX_VAR_NUM(i), val); + } + } else if (STACK_FLAGS(stack, i) & ZREG_SPILL_SLOT) { + ZEND_ASSERT(STACK_REG(stack, i) != ZREG_NONE); + uintptr_t ptr = (uintptr_t)regs->gpr[STACK_REG(stack, i)] + STACK_REF(stack, i); + + if (STACK_TYPE(stack, i) == IS_LONG) { + ZVAL_LONG(EX_VAR_NUM(i), *(zend_long*)ptr); + } else if (STACK_TYPE(stack, i) == IS_DOUBLE) { + ZVAL_DOUBLE(EX_VAR_NUM(i), *(double*)ptr); + } else { + ZEND_UNREACHABLE(); + } + } else if (STACK_REG(stack, i) != ZREG_NONE) { + if (STACK_TYPE(stack, i) == IS_LONG) { + zend_long val = regs->gpr[STACK_REG(stack, i)]; + ZVAL_LONG(EX_VAR_NUM(i), val); + } else if (STACK_TYPE(stack, i) == IS_DOUBLE) { + double val = regs->fpr[STACK_REG(stack, i) - ZREG_FIRST_FPR]; + ZVAL_DOUBLE(EX_VAR_NUM(i), val); + } else { + ZEND_UNREACHABLE(); + } + } +#endif } if (repeat_last_opline) { @@ -8207,7 +9246,12 @@ int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf } } if (t->exit_info[exit_num].flags & ZEND_JIT_EXIT_METHOD_CALL) { +#ifndef ZEND_JIT_IR zend_function *func = (zend_function*)regs->gpr[ZREG_COPY]; +#else + ZEND_ASSERT(t->exit_info[exit_num].poly_func_reg >= 0); + zend_function *func = (zend_function*)regs->gpr[t->exit_info[exit_num].poly_func_reg]; +#endif if (UNEXPECTED(func->common.fn_flags & ZEND_ACC_CALL_VIA_TRAMPOLINE)) { zend_string_release_ex(func->common.function_name, 0); diff --git a/ext/opcache/jit/zend_jit_vm_helpers.c b/ext/opcache/jit/zend_jit_vm_helpers.c index 3bed3c36f96..c346835e597 100644 --- a/ext/opcache/jit/zend_jit_vm_helpers.c +++ b/ext/opcache/jit/zend_jit_vm_helpers.c @@ -28,11 +28,13 @@ #include "Optimizer/zend_func_info.h" #include "Optimizer/zend_call_graph.h" #include "zend_jit.h" +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_X86 # include "zend_jit_x86.h" #elif ZEND_JIT_TARGET_ARM64 # include "zend_jit_arm64.h" #endif +#endif /* ZEND_JIT_IR */ #include "zend_jit_internal.h"