1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

A new PHP JIT implementation based on IR JIT framework (#12079)

* IR update

* Use folding to allow constant folding and common subexpression elimination

* Implement IR JIT for INIT_FCALL, INIT_FCALL_BY_NAME and INIT_NS_FCALL_BY_NAME

* Implement IR JIT for SEND_VAL and SEND_VAL_EX

* Implement IR JIT for SEND_REF

* Implement IR JIT for SEND_VAR* instructions (incompltere - few tests failures)

* Implement IR JIT for CHECK_FUNC_ARG

* Implement IR JIT for CHECK_UNDEF_ARGS

* Implement IR JIT for ROPE_INIT, ROPE_ADD and ROPE_END

* Implement IR JIT for FREE, FE_FREE, ECHO, STRLEN and COUNT

* Implement IR JIT for IN_ARRAY

* Implement IR JIT support for separate VM stack overflow check

* Implement IR JIT for INIT_DYNAMIC_CALL

* Implemenr IR JIT for INIT_METHOD_CALL

* Fix IR JIT for IN_ARRAY and COUNT

* Implement IR JIT for VERIFY_RETURN_TYPE

* Force C compiler to store preserved registers to allow JIT using them

* Implement IR JIT for DO_FCALL, DO_UCALL, DO_ICALL and DO_FCALL_BY_NAME

* Implement IR JIT for FETCH_CONSTANT

* Fix (reverse) guard conditions

* Implement IR JIT for RECV and RECV_INIT

* Implement IR JIT for RETURN

* Implement IR JIT for BIND_GLOBAL

* Fix guard for: int++ => double

* Fix exception handling

* Allow deoptimization of zval type only (if some register is spilled by the IR engine)

* Fix overflow handling

* Implement IR JIT for FE_RESET_R and FE_FETCH_R

* Eliminate extra temporary register

* Better registers usage

* Implement IR JIT for FETCH_DIM_* and ISSET_DIM

* Implement IR JIT for ASSIGN_DIM and ASSIGN_DIM_OP

* cleanup

* Generae IR that produces a better x86[_64] code

* Allow trace register allocation for live ranges terminated before entering a called function

* Remove following END->BEGIN nodes during IR construction

* Remove useless (duplicate) guard

* Avoid useless exception check

* Prevent duplicate store

* Eliminate repatable re-assignment of stack zval types

* Enable combination of some instructions with the following SEND_VAL for IR JIT

* Avoid generation of useless RLOADs

* Eliminatare refcouting in a sequence of FETCH_DIM_R

* Fix assertion

* Remove ZREG_ZVAL_ADDREF flag from an element of abstract stack

* Implement IR JIT for FETCH_OBJ_*

* Implement IR JIT for ASSIGN_OBJ

* Implement IR JIT for ASSIGN_OBJ_OP

* cleanup

* Implement IR JIT for (PRE/POST)_(INC/DEC)_OBJ

* ws

* cleanup

* Fix IR JIT for constructor call

* Fix opcache.jit=1201 IR JIT.

With opcache.jit=1201  we still have to generate code for follow and target basic blocks with single exiting VM instruction. We mat just omit the entry point.

* Fix IR construction for the case when both IF targets are the same

* Avoid PHP LEAVE code duplication in function IR JIT.

* Reload operands from memeory when overflow (this improves hot code)

* Implement IR JIT for SWITCH_LONG, SWITCH_STRING and MATCH

* Initialize result to IS_UNDEF

* Fix JIT integraion with observer (Zend/tests/gh10346.phpt failure)

* Fix incorrect compilation of FE_FETCH with predicted empty array

* Fix register allocation

* Use sign extension inxted of zero

* Fix trace register allocator

* cleanp

* Fix address sanitizer warning

* Calculate JIT trace prologue sixe on startup (to avoid magic constants).

* Add cgecks for merge arrays overflow (this should be refactored using lists)

* Cache TLS access to perform corresponding read once per basic block

* cleanup unused variable

* Fix IR JIT support for CLANG build (CALL VM without global register variables)

* Fix IR JIT for CALL VM with global register variables

* Allow %rpb ysage in JIT for CALL VM (we save and restore it in prologue/epilogue anyway)

* cleanup

* Allocate enough fixed stack to keep preserved registers

* We don't have to care about x29 and x30

* cleanup (JMPZ/NZ_EX work fine)

* Revert "cleanup (JMPZ/NZ_EX work fine)"

This reverts commit cf8dd74a040e225d290d8ac4f5e33df638e6f8b8.

* Don't allocate register for PHP variables that are loaded from memory and used once

* Eliminate redundand deoptimization stores

* cleanup

* cleanup

* cleanup

* Optimization for constant comparison

* Cleanup and elimination of dead deoptimization stores

* Eliminate duplicate constant loading

* Set proper initial SP offset info for GDB backtraces

This doesn't take into account the following SP/FP modifications

* Add spill stores

* Remove low limit on number of deoptimization constants

* Emit dead code only when it's really necessary for IR graph

* cleanup

* cleanup

* Prefer loading long constants from memory (instead of loading immediate value)

* Regiter disasm labels using macros (add missing helpers)

* Make IR franework to care about GUARD JMP reordering

* Avoid reloading

* Improve register allocation for IR tracing JIT

* Add comment

* Fix deoptimization on result type guard of FETCH_DIM_R and FETCH_OBJ_R

* If HYBRID VM can't provide some stack space for JIT code in "red zone" then JIT has to reserve stack space itself

* Dump IR for stubs only if disassembling of stubs is requested

* Revert "Dump IR for stubs only if disassembling of stubs is requested"

This reverts commit d8b56bec129bc23c2b16f1f3c6367190181b6fdb.

* Dump IR for stubs only if disassembling of stubs is requested (another approach)

* Improve overflow deoptimization for ADD(_,1) and SUB(_,1)

Now we deoptimize to the next instruction, load constant result, and remove op1 from SNAPSHOT

* Switch to IR Builder API

* Switch to new IR builder macros

* Fix jit_set_Z_TYPE_INFO() call. op3 is a simple constant (not a ir_ref).

* Generate better code

* Enable empty ENTRY block merging

* Improve code generated for array separation/creation before an update

(ASSIGN_DIM, ASSING_DIM_OP, etc)

* Fix incorrect deleteion of PHI source (op1 is used for control link)

* Load constant once

* cleanup

* Improve control-flow to avoid two IS_ARRAY checks for REFERENCEs

* Update comments

* cleanup

* Clenup comments

* Fix AAarch 64 build (disable stack adjustment auto-detection)

* Add filename and line number to closure names

* Reserve stack for parameter passing

* Increase size of CPU stack reserved for JIT-ed code

* Fix addess sanitizer warnings

* Clenup: introduce OPTIMIZE_FOR_SIZE macro (disabled by default)

* Port 08e7591206 to IR JIT

Fix (at lease part of the) #GH-10635: ARM64 function JIT causes impossible assertion

* cleanup

* Preload constant and use tests that may be compiled into better code

* Convert helpers to stubs

* Introduce a helper data structure (ir_refs) to collect references for the following use in (MERGE/PHI)_N

* Use ir_refs

* Improve code generated by zend_jit_zval_copy_deref()

* Use "cold" attribute to influence IR block scheduler and achieve better code layout

* Keep info collected by recursion analyzer

* Use HTTPS URL to allow fetching without a SSH key

* Update IR

* Update IR

* Add IR JIT support for Wondows (Win64 support is incomplete)

* Update IR

* Update IR

* Fix support for Windows ZTS build

* Fix stack alignment

* Cleanup ir_ctx.control usage

* Fixed support for irreducable (incomplete) and merged loops

* Revert "Fixed support for irreducable (incomplete) and merged loops"

This reverts commit 672b5b89f47e8b81745fb73c86e0bcb0937daf16.

* Generate better code for RECV_ENTRies

* Use simpler and more efficient checks

* Switch to new ENTRY node concept

* Limit register usage across the OSR ENTRY point

* Upate MEM type only if we write to memory

* Use LOOP_END without a reference edge

* Use new ir_init() prototype

* Delay LOAD for better LOAD fusion

* Fix RECV/RECV_INIT compilation with opcache.jit=1235

* iPtoperly compile fake closures (they mau be called as regular functions)

* Fix reabase

* Fix rebase and add --with-capstone support for IR JIT

* Replace zend_uchar -> uint8_t

* IR JIT support for delayed destructor for zend_assign_to_typed_ref/prop

* Handle zend_execute_internal in IR JIT

* Fix readonly+clone IR JIT issues

* Switch to ir_ctx.mflags

* Ckeanup "inputs_count" access

* Disable CSE for nodes bound to PHP local varibles

The stack slots for temporaty variables may be reused and in case of
spilling this may cause clobbering of the value.

(ext/standard/tests/strings/htmlentities20.phpt on x86 with tracing JIT)

* Fix deoptimization code when link traces

See ext/zlib/tests/bug75273.phpt failure

* Fix missing type store

This fixes ext/openssl/tests/openssl_error_string_basic_openssl3.phpt

* Fix tracing JIT for overflowing INC/DEC

Fixes tests/lang/operators/preinc_basiclong_64bit.phpt

* Remove ir_remove_unreachable_blocks() call. Now it's called by ir_build_cfg(), when necessary.

* IR JIT: Fixed inaccurate range inference usage for UNDEF/NULL/FALSE

* IR JIT: Fixed GH-11127 (JIT fault)

* Avoid allocation of unused exit point

* Don't record already stored PHP variables in SNAPSHOTs

* Delay variable load

* Disable CSE across ENTRY

* Fixed disabling CSE

* Fix deoptimization

* Fixed deoptimization

* Disable incorrect register allocation

* Fix JIT for INDENTICAL+JMPZ_EX

* Add comments

* Fixed missed type stores

* IR JIT: added support for CLDEMOTE

* Fixed incorrect constant usage

* Disable compilation of PHP functions with irreducible CGF

* Fixed liveness check

* Fixed code for constant conditional jump

* Add type store to avoid use-after-free

* Fixed liveness analyses

* Gnerate SNAPSHOT for virtual method calls

* More accurate search for staticaly inferred info about a trace SSA vaiable

* Fix incorrect result use type_info

* Fix JMPZ/NZ_EX support and missing type store

* Fixed trace type inference and missing type store

* Store type of unused CV to prevent possible following use after free

* Fixed deoptimizaton info

* Fixed stack layout

* Implemented support for veneers on AArch64

* Dsable CSE to avoid over-optimization

* Don't bind nodes for TMP PHP variables

* Re-enable CSE for temporary variables as we don't bind them anymore

* Switch to CPU stack spill slots

* Add codegen info dump

* Initialize CV variables through FP (this enables some folding optimizatios)

* Use zero-extension that can be eliminated

* Avoid generation of dead PHIs

* Increase preallocated spill stack size

* Enable IR based JIT by default

* Fixed build with -disable-opcache-jit

* Use explicit type conversion & force load values to registerts

* Fix IR build

* Checkout submodules in github actions

* Fixed Windows build

* Fixed Windows build

* Fixed reattach to IR JIT SHM

* Update IR

* Checkout submodules in nightly CI

* Fix MACOS ZTS in IR JIT

* Update ir

* Fixed incorrect register allocation

* Fixed incorect code generation

* Fixed tracing jit for BIND_INIT_STATIC_OR_JMP

* Update README

* Typos

* Revert JIT disabling for run-tests.php workers

* Fixed code review issues

* Update IR

* Update IR

* Update IR

* Allow exit_point duplication, when the deoptimization info differs because of spilling

* Use bound spill slots for CV (once again)

* Improve error handling

* Removed IR submodule

* Remove IR submodule from workflows

* Embed IR

IR commit: 8977307f4e96ee03847d7f2eb809b3080f9ed662

* Add .gitignore

* Fixed according to feedback

* Force C saving preserved registers only for HYBRID VM

* Update IR

IR commit: a2f8452b3d35a756cba38924f5c51a48a7207494

* cleanup

* Replace ZEND_ASSERT(0) by ZEND_UNREACHABLE()

* Update IR and remove unused IR files

IR commit: 399a38771393c202a741336643118991290b4b1b

* Fixed inconsistency between IR code-generation and register-allocation

* Update IR

IR commit: 86685504274b0c71d9985b3c926dccaca2cacf9b

* Update ir_PHI*() according to IR construction API changes

* Fixed 32-bit build

* Update IR

IR commit: d0686408e20cd8c8640e37ed52ab81403a2383cb

* Support for ir_TAILCALL() prototype changes

* Update IR

IR commit: d72ae866e09d17e879378767aceb91d51894818c

* Fixed incorrect extension (ZEXT->SEXT)

* Fix SSA dominance

* Update IR

IR commit: d60d92516dc5f89b93cdf1df7a54141e83226b07

* Fixed support ir_ctx.ret_type
This commit is contained in:
Dmitry Stogov
2023-10-23 10:15:52 +03:00
committed by GitHub
parent 226b92b1dc
commit caf102dfae
60 changed files with 71906 additions and 534 deletions

16
Zend/zend_vm_execute.h generated
View File

@@ -53444,14 +53444,14 @@ ZEND_API void execute_ex(zend_execute_data *ex)
#if defined(ZEND_VM_IP_GLOBAL_REG) || defined(ZEND_VM_FP_GLOBAL_REG)
struct {
#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE
char hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE];
#endif
#ifdef ZEND_VM_IP_GLOBAL_REG
const zend_op *orig_opline;
#endif
#ifdef ZEND_VM_FP_GLOBAL_REG
zend_execute_data *orig_execute_data;
#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE
char hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE];
#endif
#endif
} vm_stack_data;
#endif
@@ -56960,6 +56960,16 @@ ZEND_API void execute_ex(zend_execute_data *ex)
}
#endif
#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID)
/* Force C compiler to store preserved registers to allow JIT using them */
# if defined(__GNUC__) && defined(__i386__)
__asm__ __volatile__ (""::: "ebx");
# elif defined(__GNUC__) && defined(__x86_64__)
__asm__ __volatile__ (""::: "rbx","r12","r13");
# elif defined(__GNUC__) && defined(__aarch64__)
__asm__ __volatile__ (""::: "x19","x20","x21","x22","x23","x24","x25","x26");
# endif
#endif
LOAD_OPLINE();
ZEND_VM_LOOP_INTERRUPT_CHECK();

View File

@@ -13,6 +13,16 @@ ZEND_API void {%EXECUTOR_NAME%}_ex(zend_execute_data *ex)
{%INTERNAL_LABELS%}
#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID)
/* Force C compiler to store preserved registers to allow JIT using them */
# if defined(__GNUC__) && defined(__i386__)
__asm__ __volatile__ (""::: "ebx");
# elif defined(__GNUC__) && defined(__x86_64__)
__asm__ __volatile__ (""::: "rbx","r12","r13");
# elif defined(__GNUC__) && defined(__aarch64__)
__asm__ __volatile__ (""::: "x19","x20","x21","x22","x23","x24","x25","x26");
# endif
#endif
LOAD_OPLINE();
ZEND_VM_LOOP_INTERRUPT_CHECK();

View File

@@ -2046,14 +2046,14 @@ function gen_executor($f, $skl, $spec, $kind, $executor_name, $initializer_name)
} else {
out($f,"#if defined(ZEND_VM_IP_GLOBAL_REG) || defined(ZEND_VM_FP_GLOBAL_REG)\n");
out($f,$m[1]."struct {\n");
out($f,"#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE\n");
out($f,$m[1]."\tchar hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE];\n");
out($f,"#endif\n");
out($f,"#ifdef ZEND_VM_IP_GLOBAL_REG\n");
out($f,$m[1]."\tconst zend_op *orig_opline;\n");
out($f,"#endif\n");
out($f,"#ifdef ZEND_VM_FP_GLOBAL_REG\n");
out($f,$m[1]."\tzend_execute_data *orig_execute_data;\n");
out($f,"#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE\n");
out($f,$m[1]."\tchar hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE];\n");
out($f,"#endif\n");
out($f,"#endif\n");
out($f,$m[1]."} vm_stack_data;\n");
out($f,"#endif\n");
@@ -2339,7 +2339,7 @@ function gen_vm_opcodes_header(
$str .= "\n";
$str .= "#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) && !defined(__SANITIZE_ADDRESS__)\n";
$str .= "# if ((defined(i386) && !defined(__PIC__)) || defined(__x86_64__) || defined(_M_X64))\n";
$str .= "# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 16\n";
$str .= "# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 48\n";
$str .= "# endif\n";
$str .= "#endif\n";
$str .= "\n";

View File

@@ -36,7 +36,7 @@
#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) && !defined(__SANITIZE_ADDRESS__)
# if ((defined(i386) && !defined(__PIC__)) || defined(__x86_64__) || defined(_M_X64))
# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 16
# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 48
# endif
#endif

View File

@@ -122,6 +122,11 @@ clean:
rm -f ext/opcache/jit/zend_jit_x86.c
rm -f ext/opcache/jit/zend_jit_arm64.c
rm -f ext/opcache/minilua
rm -f ext/opcache/jit/ir/gen_ir_fold_hash
rm -f ext/opcache/jit/ir/minilua
rm -f ext/opcache/jit/ir/ir_fold_hash.h
rm -f ext/opcache/jit/ir/ir_emit_x86.h
rm -f ext/opcache/jit/ir/ir_emit_aarch64.h
distclean: clean
rm -f Makefile config.cache config.log config.status Makefile.objects Makefile.fragments libtool main/php_config.h main/internal_functions_cli.c main/internal_functions.c Zend/zend_dtrace_gen.h Zend/zend_dtrace_gen.h.bak Zend/zend_config.h

View File

@@ -24,6 +24,13 @@ PHP_ARG_WITH([capstone],,
[no],
[no])
PHP_ARG_ENABLE([opcache-jit-ir],
[whether to enable JIT based on IR framework],
[AS_HELP_STRING([--disable-opcache-jit-ir],
[Disable JIT based on IR framework (use old JIT)])],
[yes],
[no])
if test "$PHP_OPCACHE" != "no"; then
dnl Always build as shared extension
@@ -44,7 +51,7 @@ if test "$PHP_OPCACHE" != "no"; then
esac
fi
if test "$PHP_OPCACHE_JIT" = "yes"; then
if test "$PHP_OPCACHE_JIT" = "yes" -a "$PHP_OPCACHE_JIT_IR" = "no" ; then
AC_DEFINE(HAVE_JIT, 1, [Define to enable JIT])
ZEND_JIT_SRC="jit/zend_jit.c jit/zend_jit_gdb.c jit/zend_jit_vm_helpers.c"
@@ -86,6 +93,62 @@ if test "$PHP_OPCACHE" != "no"; then
PHP_SUBST(DASM_FLAGS)
PHP_SUBST(DASM_ARCH)
JIT_CFLAGS=
elif test "$PHP_OPCACHE_JIT" = "yes" -a "$PHP_OPCACHE_JIT_IR" = "yes"; then
AC_DEFINE(HAVE_JIT, 1, [Define to enable JIT])
AC_DEFINE(ZEND_JIT_IR, 1, [Use JIT IR framework])
ZEND_JIT_SRC="jit/zend_jit.c jit/zend_jit_vm_helpers.c jit/ir/ir.c jit/ir/ir_strtab.c \
jit/ir/ir_cfg.c jit/ir/ir_sccp.c jit/ir/ir_gcm.c jit/ir/ir_ra.c jit/ir/ir_save.c \
jit/ir/ir_dump.c jit/ir/ir_gdb.c jit/ir/ir_perf.c jit/ir/ir_check.c \
jit/ir/ir_patch.c jit/ir/ir_emit.c"
dnl Find out which ABI we are using.
case $host_alias in
x86_64-*-darwin*)
IR_TARGET=IR_TARGET_X64
DASM_FLAGS="-D X64APPLE=1 -D X64=1"
DASM_ARCH="x86"
;;
x86_64*)
IR_TARGET=IR_TARGET_X64
DASM_FLAGS="-D X64=1"
DASM_ARCH="x86"
;;
i[[34567]]86*)
IR_TARGET=IR_TARGET_X86
DASM_ARCH="x86"
;;
x86*)
IR_TARGET=IR_TARGET_X86
DASM_ARCH="x86"
;;
aarch64*)
IR_TARGET=IR_TARGET_AARCH64
DASM_ARCH="aarch64"
;;
esac
AS_IF([test x"$with_capstone" = "xyes"],[
PKG_CHECK_MODULES([CAPSTONE],[capstone >= 3.0.0],[
AC_DEFINE([HAVE_CAPSTONE], [1], [Capstone is available])
PHP_EVAL_LIBLINE($CAPSTONE_LIBS, OPCACHE_SHARED_LIBADD)
PHP_EVAL_INCLINE($CAPSTONE_CFLAGS)
ZEND_JIT_SRC+=" jit/ir/ir_disasm.c"
],[
AC_MSG_ERROR([capstone >= 3.0 required but not found])
])
])
PHP_SUBST(IR_TARGET)
PHP_SUBST(DASM_FLAGS)
PHP_SUBST(DASM_ARCH)
JIT_CFLAGS="-I@ext_builddir@/jit/ir -D${IR_TARGET} -DIR_PHP"
if test "$ZEND_DEBUG" = "yes"; then
JIT_CFLAGS="${JIT_CFLAGS} -DIR_DEBUG"
fi
fi
AC_CHECK_FUNCS([mprotect memfd_create shm_create_largepage])
@@ -310,7 +373,7 @@ int main(void) {
shared_alloc_mmap.c \
shared_alloc_posix.c \
$ZEND_JIT_SRC,
shared,,"-Wno-implicit-fallthrough -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1",,yes)
shared,,"-Wno-implicit-fallthrough -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 ${JIT_CFLAGS}",,yes)
PHP_ADD_EXTENSION_DEP(opcache, pcre)
@@ -320,6 +383,9 @@ int main(void) {
if test "$PHP_OPCACHE_JIT" = "yes"; then
PHP_ADD_BUILD_DIR([$ext_builddir/jit], 1)
if test "$PHP_OPCACHE_JIT_IR" = "yes"; then
PHP_ADD_BUILD_DIR([$ext_builddir/jit/ir], 1)
fi
PHP_ADD_MAKEFILE_FRAGMENT($ext_srcdir/jit/Makefile.frag)
fi
PHP_SUBST(OPCACHE_SHARED_LIBADD)

View File

@@ -5,6 +5,8 @@ if (PHP_OPCACHE != "no") {
ARG_ENABLE("opcache-jit", "whether to enable JIT", "yes");
ARG_ENABLE("opcache-jit-ir", "whether to enable JIT based on IR framework", "yes");
ZEND_EXTENSION('opcache', "\
ZendAccelerator.c \
zend_accelerator_blacklist.c \
@@ -18,7 +20,7 @@ if (PHP_OPCACHE != "no") {
zend_shared_alloc.c \
shared_alloc_win32.c", true, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
if (PHP_OPCACHE_JIT == "yes") {
if (PHP_OPCACHE_JIT == "yes" && PHP_OPCACHE_JIT_IR == "no") {
if (CHECK_HEADER_ADD_INCLUDE("dynasm/dasm_x86.h", "CFLAGS_OPCACHE", PHP_OPCACHE + ";ext\\opcache\\jit")) {
var dasm_flags = (X64 ? "-D X64=1" : "") + (X64 ? " -D X64WIN=1" : "") + " -D WIN=1";
if (PHP_ZTS == "yes") {
@@ -37,6 +39,45 @@ if (PHP_OPCACHE != "no") {
} else {
WARNING("JIT not enabled, headers not found");
}
} else if (PHP_OPCACHE_JIT == "yes" && PHP_OPCACHE_JIT_IR == "yes") {
if (CHECK_HEADER_ADD_INCLUDE("ir/ir.h", "CFLAGS_OPCACHE", PHP_OPCACHE + ";ext\\opcache\\jit")) {
var dasm_flags = (X64 ? "-D X64=1" : "") + (X64 ? " -D X64WIN=1" : "") + " -D WIN=1";
var ir_target = (X64 ? "IR_TARGET_X64" : "IR_TARGET_X86");
var ir_src = "ir_strtab.c ir_cfg.c ir_sccp.c ir_gcm.c ir_ra.c ir_save.c \
ir_dump.c ir_check.c ir_patch.c";
DEFINE("IR_TARGET", ir_target);
DEFINE("DASM_FLAGS", dasm_flags);
DEFINE("DASM_ARCH", "x86");
AC_DEFINE('HAVE_JIT', 1, 'Define to enable JIT');
AC_DEFINE('ZEND_JIT_IR', 1, 'Use JIT IR framework');
ADD_FLAG("CFLAGS_OPCACHE", "/I \"ext\\opcache\\jit\\ir\" /D "+ir_target+" /D IR_PHP");
if (PHP_DEBUG == "yes") {
ADD_FLAG("CFLAGS_OPCACHE", "/D IR_DEBUG");
}
if (CHECK_HEADER_ADD_INCLUDE("capstone\\capstone.h", "CFLAGS_OPCACHE", PHP_OPCACHE+ ";" + PHP_PHP_BUILD + "\\include") &&
CHECK_LIB("capstone.lib", "opcache", PHP_OPCACHE)) {
AC_DEFINE('HAVE_CAPSTONE', 1, 'capstone support enabled');
ir_src += " ir_disasm.c";
}
ADD_MAKEFILE_FRAGMENT(configure_module_dirname + "\\jit\\Makefile.frag.w32");
ADD_SOURCES(configure_module_dirname + "\\jit",
"zend_jit.c zend_jit_vm_helpers.c",
"opcache", "ext\\opcache\\jit");
ADD_SOURCES(configure_module_dirname + "\\jit\\ir",
"ir.c", "opcache", "ext\\opcache\\jit\\ir");
ADD_SOURCES(configure_module_dirname + "\\jit\\ir",
"ir_emit.c", "opcache", "ext\\opcache\\jit\\ir");
ADD_SOURCES(configure_module_dirname + "\\jit\\ir",
ir_src, "opcache", "ext\\opcache\\jit\\ir");
} else {
WARNING("JIT not enabled, headers not found");
}
}
ADD_FLAG('CFLAGS_OPCACHE', "/I " + configure_module_dirname);

View File

@@ -1,4 +1,29 @@
ifdef IR_TARGET
# New IR based JIT
$(builddir)/jit/ir/minilua: $(srcdir)/jit/ir/dynasm/minilua.c
$(BUILD_CC) $(srcdir)/jit/ir/dynasm/minilua.c -lm -o $@
$(builddir)/jit/ir/ir_emit_$(DASM_ARCH).h: $(srcdir)/jit/ir/ir_$(DASM_ARCH).dasc $(srcdir)/jit/ir/dynasm/*.lua $(builddir)/jit/ir/minilua
$(builddir)/jit/ir/minilua $(srcdir)/jit/ir/dynasm/dynasm.lua $(DASM_FLAGS) -o $@ $(srcdir)/jit/ir/ir_$(DASM_ARCH).dasc
$(builddir)/jit/ir/ir_emit.lo: \
$(srcdir)/jit/ir/ir_emit.c $(builddir)/jit/ir/ir_emit_$(DASM_ARCH).h
$(builddir)/jit/ir/gen_ir_fold_hash: $(srcdir)/jit/ir/gen_ir_fold_hash.c $(srcdir)/jit/ir/ir_strtab.c
$(BUILD_CC) -D${IR_TARGET} -DIR_PHP -DIR_PHP_MM=0 -o $@ $<
$(builddir)/jit/ir/ir_fold_hash.h: $(builddir)/jit/ir/gen_ir_fold_hash $(srcdir)/jit/ir/ir_fold.h $(srcdir)/jit/ir/ir.h
$(builddir)/jit/ir/gen_ir_fold_hash < $(srcdir)/jit/ir/ir_fold.h > $(builddir)/jit/ir/ir_fold_hash.h
$(builddir)/jit/ir/ir.lo: \
$(builddir)/jit/ir/ir_fold_hash.h
$(builddir)/jit/zend_jit.lo: \
$(srcdir)/jit/zend_jit_helpers.c \
$(srcdir)/jit/zend_jit_ir.c
else
# Old DynAsm based JIT
$(builddir)/minilua: $(srcdir)/jit/dynasm/minilua.c
$(BUILD_CC) $(srcdir)/jit/dynasm/minilua.c -lm -o $@
@@ -15,6 +40,8 @@ $(builddir)/jit/zend_jit.lo: \
$(srcdir)/jit/zend_jit_trace.c \
$(srcdir)/jit/zend_elf.c
endif
# For non-GNU make, jit/zend_jit.lo and ./jit/zend_jit.lo are considered distinct targets.
# Use this workaround to allow building from inside ext/opcache.
jit/zend_jit.lo: $(builddir)/jit/zend_jit.lo

View File

@@ -1,3 +1,48 @@
!if "$(IR_TARGET)" != ""
# New IR based JIT
$(BUILD_DIR)\\minilua.exe: ext\opcache\jit\ir\dynasm\minilua.c
@if exist $(BUILD_DIR)\\minilua.exe del $(BUILD_DIR)\\minilua.exe
$(PHP_CL) /Fo$(BUILD_DIR)\ /Fd$(BUILD_DIR)\ /Fp$(BUILD_DIR)\ /FR$(BUILD_DIR) /Fe$(BUILD_DIR)\minilua.exe ext\opcache\jit\ir\dynasm\minilua.c
ext\opcache\jit\ir\ir_emit_x86.h: ext\opcache\jit\ir\ir_x86.dasc $(BUILD_DIR)\\minilua.exe
@if exist ext\opcache\jit\ir\ir_emit_x86.h del ext\opcache\jit\ir\ir_emit_x86.h
$(BUILD_DIR)\\minilua.exe ext/opcache/jit/ir/dynasm/dynasm.lua $(DASM_FLAGS) -o $@ ext/opcache/jit/ir/ir_x86.dasc
$(BUILD_DIR)\\gen_ir_fold_hash.exe: ext\opcache\jit\ir\gen_ir_fold_hash.c ext\opcache\jit\ir\ir_strtab.c
@if exist $(BUILD_DIR)\\gen_ir_fold_hash.exe del $(BUILD_DIR)\\gen_ir_fold_hash.exe
$(PHP_CL) /D $(IR_TARGET) /Fo$(BUILD_DIR)\ /Fd$(BUILD_DIR)\ /Fp$(BUILD_DIR)\ /Fe$(BUILD_DIR)\\gen_ir_fold_hash.exe ext\opcache\jit\ir\gen_ir_fold_hash.c
ext\opcache\jit\ir\ir_fold_hash.h: $(BUILD_DIR)\\gen_ir_fold_hash.exe ext\opcache\jit\ir\ir_fold.h ext\opcache\jit\ir\ir.h
@if exist ext\opcache\jit\ir\ir_fold_hash.h del ext\opcache\jit\ir\ir_fold_hash.h
$(BUILD_DIR)\\gen_ir_fold_hash.exe < ext\opcache\jit\ir\ir_fold.h > ext\opcache\jit\ir\ir_fold_hash.h
$(BUILD_DIR)\ext\opcache\jit\ir\ir_ra.obj: \
ext\opcache\jit\ir\ir.h \
ext\opcache\jit\ir\ir_private.h \
ext\opcache\jit\ir\ir_x86.h
$(BUILD_DIR)\ext\opcache\jit\ir\ir_emit.obj: \
ext\opcache\jit\ir\ir.h \
ext\opcache\jit\ir\ir_private.h \
ext\opcache\jit\ir\ir_x86.h \
ext\opcache\jit\ir\ir_emit_x86.h
$(BUILD_DIR)\ext\opcache\jit\ir\ir.obj: \
ext\opcache\jit\ir\ir.h \
ext\opcache\jit\ir\ir_private.h \
ext\opcache\jit\ir\ir_fold.h \
ext\opcache\jit\ir\ir_fold_hash.h
$(BUILD_DIR)\ext\opcache\jit\zend_jit.obj: \
ext\opcache\jit\zend_jit_ir.c \
ext\opcache\jit\zend_jit_helpers.c \
ext\opcache\jit\ir\ir.h \
ext\opcache\jit\ir\ir_builder.h
!else
# Old DynAsm based JIT
$(BUILD_DIR)\\minilua.exe: ext\opcache\jit\dynasm\minilua.c
@if exist $(BUILD_DIR)\\minilua.exe del $(BUILD_DIR)\\minilua.exe
$(PHP_CL) /Fo$(BUILD_DIR)\ /Fd$(BUILD_DIR)\ /Fp$(BUILD_DIR)\ /FR$(BUILD_DIR) /Fe$(BUILD_DIR)\minilua.exe ext\opcache\jit\dynasm\minilua.c
@@ -14,3 +59,4 @@ $(BUILD_DIR)\ext\opcache\jit\zend_jit.obj: \
ext/opcache/jit/zend_jit_perf_dump.c \
ext/opcache/jit/zend_jit_trace.c \
ext/opcache/jit/zend_jit_vtune.c
!endif

View File

@@ -0,0 +1,32 @@
New JIT implementation
======================
This branch provides a new JIT implementation based on [IR - Lightweight
JIT Compilation Framework](https://github.com/dstogov/ir).
As opposed to the PHP 8.* JIT approach that generates native code directly from
PHP byte-code, this implementation generates intermediate representation (IR)
and delegates all lower-level tasks to the IR Framework. IR for JIT is like an
AST for compiler.
Key benefits of the new JIT implementation:
- Usage of IR opens possibilities for better optimization and register
allocation (the resulting native code is more efficient)
- PHP doesn't have to care about most low-level details (different CPUs,
calling conventions, TLS details, etc)
- it's much easier to implement support for new targets (e.g. RISCV)
- IR framework is going to be developed separately from PHP and may accept
contributions from other projects (new optimizations, improvements, bug fixes)
Disadvantages:
- JIT compilation becomes slower (this is almost invisible for tracing
JIT, but function JIT compilation of Wordpress becomes 4 times slower)
The necessary part of the IR Framework is embedded into php-src. So, the PR
doesn't introduce new dependencies.
The new JIT implementation successfully passes all CI workflows, but it's still
not mature and may cause failures. To reduce risks, this patch doesn't remove
the old JIT implementation (that is the same as PHP-8.3 JIT). It's possible
to build PHP with the old JIT by configuring with **--disable-opcache-jit-ir**.
In the future the old implementation should be removed.

22
ext/opcache/jit/ir/.gitignore vendored Normal file
View File

@@ -0,0 +1,22 @@
*.o
*.log
*.dot
*.pdf
ir_fold_hash.h
ir_emit_x86.h
ir_emit_aarch64.h
minilua
gen_ir_fold_hash
ir_test
tester
ir
b.c
tests/**/*.diff
tests/**/*.exp
tests/**/*.ir
tests/**/*.out
tests/**/*.log
win32/vcpkg
win32/build_*

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 Zend by Perforce
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,2 @@
This directory contains an embeded version of IR Framework.
See the full version at https://github.com/dstogov/ir

View File

@@ -0,0 +1,461 @@
/*
** DynASM ARM encoding engine.
** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "arm"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. */
enum {
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC,
DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12, DASM_IMMV8,
DASM__MAX
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_REL 0x15000000
#define DASM_S_UNDEF_LG 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned int *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals (bias -10). */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
int i;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
for (i = 0; i < maxsection; i++) {
D->sections[i].buf = NULL; /* Need this for pass3. */
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
D->sections[i].bsize = 0;
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
}
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl - 10; /* Negative bias to compensate for locals. */
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList)actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
static int dasm_imm12(unsigned int n)
{
int i;
for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30))
if (n <= 255) return (int)(n + (i << 8));
return -1;
}
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
if (action >= DASM__MAX) {
ofs += 4;
} else {
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
switch (action) {
case DASM_STOP: goto stop;
case DASM_SECTION:
n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
D->section = &D->sections[n]; goto stop;
case DASM_ESC: p++; ofs += 4; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
case DASM_REL_LG:
n = (ins & 2047) - 10; pl = D->lglabels + n;
/* Bkwd rel or global. */
if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
pl += 10; n = *pl;
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
pos++;
break;
case DASM_LABEL_LG:
pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
}
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_IMM:
case DASM_IMM16:
#ifdef DASM_CHECKS
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
if ((ins & 0x8000))
CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
else
CK((n>>((ins>>5)&31)) == 0, RANGE_I);
#endif
b[pos++] = n;
break;
case DASM_IMMV8:
CK((n & 3) == 0, RANGE_I);
n >>= 2;
/* fallthrough */
case DASM_IMML8:
case DASM_IMML12:
CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
(((-n)>>((ins>>5)&31)) == 0), RANGE_I);
b[pos++] = n;
break;
case DASM_IMM12:
CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
b[pos++] = n;
break;
}
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t *szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK) return D->status;
{
int pc;
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: p++; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
case DASM_IMM: case DASM_IMM12: case DASM_IMM16:
case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break;
}
}
stop: (void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
#else
#define CK(x, st) ((void)0)
#endif
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
char *base = (char *)buffer;
unsigned int *cp = (unsigned int *)buffer;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = sec->rbuf + sec->pos;
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: *cp++ = *p++; break;
case DASM_REL_EXT:
n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
goto patchrel;
case DASM_ALIGN:
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
break;
case DASM_REL_LG:
if (n < 0) {
n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4);
goto patchrel;
}
/* fallthrough */
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4;
patchrel:
if ((ins & 0x800) == 0) {
CK((n & 3) == 0 && ((n+0x02000000) >> 26) == 0, RANGE_REL);
cp[-1] |= ((n >> 2) & 0x00ffffff);
} else if ((ins & 0x1000)) {
CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL);
goto patchimml8;
} else if ((ins & 0x2000) == 0) {
CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL);
goto patchimml;
} else {
CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL);
n >>= 2;
goto patchimml;
}
break;
case DASM_LABEL_LG:
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
break;
case DASM_LABEL_PC: break;
case DASM_IMM:
cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31);
break;
case DASM_IMM12:
cp[-1] |= dasm_imm12((unsigned int)n);
break;
case DASM_IMM16:
cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff);
break;
case DASM_IMML8: patchimml8:
cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) :
((-n & 0x0f) | ((-n & 0xf0) << 4));
break;
case DASM_IMML12: case DASM_IMMV8: patchimml:
cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n);
break;
default: *cp++ = ins; break;
}
}
stop: (void)0;
}
}
if (base + D->codesize != (char *)cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
#undef CK
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc*sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0) return *DASM_POS2PTR(D, -pos);
if (pos > 0) return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
return D->status;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,570 @@
/*
** DynASM ARM64 encoding engine.
** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "arm64"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. */
enum {
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A,
DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
DASM_IMMV, DASM_VREG,
DASM__MAX
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_REL 0x15000000
#define DASM_S_RANGE_VREG 0x16000000
#define DASM_S_UNDEF_LG 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned int *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals (bias -10). */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
int i;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
for (i = 0; i < maxsection; i++) {
D->sections[i].buf = NULL; /* Need this for pass3. */
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
D->sections[i].bsize = 0;
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
}
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl - 10; /* Negative bias to compensate for locals. */
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList)actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
static int dasm_imm12(unsigned int n)
{
if ((n >> 12) == 0)
return n;
else if ((n & 0xff000fff) == 0)
return (n >> 12) | 0x1000;
else
return -1;
}
static int dasm_ffs(unsigned long long x)
{
int n = -1;
while (x) { x >>= 1; n++; }
return n;
}
static int dasm_imm13(int lo, int hi)
{
int inv = 0, w = 64, s = 0xfff, xa, xb;
unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
unsigned long long m = 1ULL, a, b, c;
if (n & 1) { n = ~n; inv = 1; }
a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
xa = dasm_ffs(a); xb = dasm_ffs(b);
if (c) {
w = dasm_ffs(c) - xa;
if (w == 32) m = 0x0000000100000001UL;
else if (w == 16) m = 0x0001000100010001UL;
else if (w == 8) m = 0x0101010101010101UL;
else if (w == 4) m = 0x1111111111111111UL;
else if (w == 2) m = 0x5555555555555555UL;
else return -1;
s = (-2*w & 0x3f) - 1;
} else if (!a) {
return -1;
} else if (xb == -1) {
xb = 64;
}
if ((b-a) * m != n) return -1;
if (inv) {
return ((w - xb) << 6) | (s+w+xa-xb);
} else {
return ((w - xa) << 6) | (s+xb-xa);
}
return -1;
}
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
if (action >= DASM__MAX) {
ofs += 4;
} else {
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
switch (action) {
case DASM_STOP: goto stop;
case DASM_SECTION:
n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
D->section = &D->sections[n]; goto stop;
case DASM_ESC: p++; ofs += 4; break;
case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break;
case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
case DASM_REL_LG:
n = (ins & 2047) - 10; pl = D->lglabels + n;
/* Bkwd rel or global. */
if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
pl += 10; n = *pl;
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
pos++;
if ((ins & 0x8000)) ofs += 8;
break;
case DASM_REL_A:
b[pos++] = n;
b[pos++] = va_arg(ap, int);
break;
case DASM_LABEL_LG:
pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
}
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_IMM:
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
n >>= ((ins>>10)&31);
#ifdef DASM_CHECKS
if ((ins & 0x8000))
CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
else
CK((n>>((ins>>5)&31)) == 0, RANGE_I);
#endif
b[pos++] = n;
break;
case DASM_IMM6:
CK((n >> 6) == 0, RANGE_I);
b[pos++] = n;
break;
case DASM_IMM12:
CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
b[pos++] = n;
break;
case DASM_IMM13W:
CK(dasm_imm13(n, n) != -1, RANGE_I);
b[pos++] = n;
break;
case DASM_IMM13X: {
int m = va_arg(ap, int);
CK(dasm_imm13(n, m) != -1, RANGE_I);
b[pos++] = n;
b[pos++] = m;
break;
}
case DASM_IMML: {
#ifdef DASM_CHECKS
int scale = (ins & 3);
CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
(unsigned int)(n+256) < 512, RANGE_I);
#endif
b[pos++] = n;
break;
}
case DASM_IMMV:
ofs += 4;
b[pos++] = n;
break;
case DASM_VREG:
CK(n < 32, RANGE_VREG);
b[pos++] = n;
break;
}
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t *szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK) return D->status;
{
int pc;
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: p++; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break;
case DASM_IMM13X: case DASM_REL_A: pos += 2; break;
}
}
stop: (void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#ifdef DASM_ADD_VENEER
#define CK_REL(x, o) \
do { if (!(x) && !(n = DASM_ADD_VENEER(D, buffer, ins, b, cp, o))) \
return DASM_S_RANGE_REL|(p-D->actionlist-1); \
} while (0)
#else
#define CK_REL(x, o) CK(x, RANGE_REL)
#endif
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
#else
#define CK(x, st) ((void)0)
#endif
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
char *base = (char *)buffer;
unsigned int *cp = (unsigned int *)buffer;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = sec->rbuf + sec->pos;
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: *cp++ = *p++; break;
case DASM_REL_EXT:
n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
goto patchrel;
case DASM_ALIGN:
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xd503201f;
break;
case DASM_REL_LG:
if (n < 0) {
ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4;
n = (int)na;
CK_REL((ptrdiff_t)n == na, na);
goto patchrel;
}
/* fallthrough */
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
patchrel:
if (!(ins & 0xf800)) { /* B, BL */
CK_REL((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, n);
cp[-1] |= ((n >> 2) & 0x03ffffff);
} else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */
CK_REL((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, n);
cp[-1] |= ((n << 3) & 0x00ffffe0);
} else if ((ins & 0x3000) == 0x2000) { /* ADR */
CK_REL(((n+0x00100000) >> 21) == 0, n);
cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
} else if ((ins & 0x3000) == 0x3000) { /* ADRP */
cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
} else if ((ins & 0x1000)) { /* TBZ, TBNZ */
CK_REL((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, n);
cp[-1] |= ((n << 3) & 0x0007ffe0);
} else if ((ins & 0x8000)) { /* absolute */
cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n);
cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32);
cp += 2;
}
break;
case DASM_REL_A: {
ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n);
if ((ins & 0x3000) == 0x3000) { /* ADRP */
ins &= ~0x1000;
na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12);
} else {
na = na - (ptrdiff_t)cp + 4;
}
n = (int)na;
CK_REL((ptrdiff_t)n == na, na);
goto patchrel;
}
case DASM_LABEL_LG:
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
break;
case DASM_LABEL_PC: break;
case DASM_IMM:
cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
break;
case DASM_IMM6:
cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
break;
case DASM_IMM12:
cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
break;
case DASM_IMM13W:
cp[-1] |= (dasm_imm13(n, n) << 10);
break;
case DASM_IMM13X:
cp[-1] |= (dasm_imm13(n, *b++) << 10);
break;
case DASM_IMML: {
int scale = (ins & 3);
cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
break;
}
case DASM_IMMV:
*cp++ = n;
break;
case DASM_VREG:
cp[-1] |= (n & 0x1f) << (ins & 0x1f);
break;
default: *cp++ = ins; break;
}
}
stop: (void)0;
}
}
if (base + D->codesize != (char *)cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
#undef CK
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc*sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0) return *DASM_POS2PTR(D, -pos);
if (pos > 0) return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
return D->status;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,424 @@
/*
** DynASM MIPS encoding engine.
** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "mips"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. */
enum {
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
DASM__MAX
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_REL 0x15000000
#define DASM_S_UNDEF_LG 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned int *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals (bias -10). */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
int i;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
for (i = 0; i < maxsection; i++) {
D->sections[i].buf = NULL; /* Need this for pass3. */
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
D->sections[i].bsize = 0;
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
}
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl - 10; /* Negative bias to compensate for locals. */
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList)actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16) - 0xff00;
if (action >= DASM__MAX) {
ofs += 4;
} else {
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
switch (action) {
case DASM_STOP: goto stop;
case DASM_SECTION:
n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
D->section = &D->sections[n]; goto stop;
case DASM_ESC: p++; ofs += 4; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
case DASM_REL_LG:
n = (ins & 2047) - 10; pl = D->lglabels + n;
/* Bkwd rel or global. */
if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
pl += 10; n = *pl;
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
pos++;
break;
case DASM_LABEL_LG:
pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
}
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_IMM: case DASM_IMMS:
#ifdef DASM_CHECKS
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
#endif
n >>= ((ins>>10)&31);
#ifdef DASM_CHECKS
if (ins & 0x8000)
CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
else
CK((n>>((ins>>5)&31)) == 0, RANGE_I);
#endif
b[pos++] = n;
break;
}
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t *szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK) return D->status;
{
int pc;
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16) - 0xff00;
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: p++; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
case DASM_IMM: case DASM_IMMS: pos++; break;
}
}
stop: (void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
#else
#define CK(x, st) ((void)0)
#endif
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
char *base = (char *)buffer;
unsigned int *cp = (unsigned int *)buffer;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = sec->rbuf + sec->pos;
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16) - 0xff00;
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: *cp++ = *p++; break;
case DASM_REL_EXT:
n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1);
goto patchrel;
case DASM_ALIGN:
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
break;
case DASM_REL_LG:
if (n < 0) {
n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
goto patchrel;
}
/* fallthrough */
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
n = *DASM_POS2PTR(D, n);
if (ins & 2048)
n = (n + (int)(size_t)base) & 0x0fffffff;
else
n = n - (int)((char *)cp - base);
patchrel: {
unsigned int e = 16 + ((ins >> 12) & 15);
CK((n & 3) == 0 &&
((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
cp[-1] |= ((n>>2) & ((1<<e)-1));
}
break;
case DASM_LABEL_LG:
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
break;
case DASM_LABEL_PC: break;
case DASM_IMMS:
cp[-1] |= ((n>>3) & 4); n &= 0x1f;
/* fallthrough */
case DASM_IMM:
cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
break;
default: *cp++ = ins; break;
}
}
stop: (void)0;
}
}
if (base + D->codesize != (char *)cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
#undef CK
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc*sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0) return *DASM_POS2PTR(D, -pos);
if (pos > 0) return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
return D->status;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
------------------------------------------------------------------------------
-- DynASM MIPS64 module.
--
-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
-- All the interesting stuff is there.
------------------------------------------------------------------------------
mips64 = true -- Using a global is an ugly, but effective solution.
return require("dasm_mips")

View File

@@ -0,0 +1,423 @@
/*
** DynASM PPC/PPC64 encoding engine.
** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "ppc"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. */
enum {
DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
DASM__MAX
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_REL 0x15000000
#define DASM_S_UNDEF_LG 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned int *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals (bias -10). */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
int i;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
for (i = 0; i < maxsection; i++) {
D->sections[i].buf = NULL; /* Need this for pass3. */
D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
D->sections[i].bsize = 0;
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
}
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl - 10; /* Negative bias to compensate for locals. */
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList)actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
if (action >= DASM__MAX) {
ofs += 4;
} else {
int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
switch (action) {
case DASM_STOP: goto stop;
case DASM_SECTION:
n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
D->section = &D->sections[n]; goto stop;
case DASM_ESC: p++; ofs += 4; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
case DASM_REL_LG:
n = (ins & 2047) - 10; pl = D->lglabels + n;
/* Bkwd rel or global. */
if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
pl += 10; n = *pl;
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
pos++;
break;
case DASM_LABEL_LG:
pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC:
pl = D->pclabels + n; CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
}
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_IMM:
#ifdef DASM_CHECKS
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
#endif
n >>= ((ins>>10)&31);
#ifdef DASM_CHECKS
if (ins & 0x8000)
CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
else
CK((n>>((ins>>5)&31)) == 0, RANGE_I);
#endif
b[pos++] = n;
break;
case DASM_IMMSH:
CK((n >> 6) == 0, RANGE_I);
b[pos++] = n;
break;
}
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t *szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK) return D->status;
{
int pc;
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: p++; break;
case DASM_REL_EXT: break;
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
case DASM_IMM: case DASM_IMMSH: pos++; break;
}
}
stop: (void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
#else
#define CK(x, st) ((void)0)
#endif
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
char *base = (char *)buffer;
unsigned int *cp = (unsigned int *)buffer;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = sec->rbuf + sec->pos;
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
while (1) {
unsigned int ins = *p++;
unsigned int action = (ins >> 16);
int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
switch (action) {
case DASM_STOP: case DASM_SECTION: goto stop;
case DASM_ESC: *cp++ = *p++; break;
case DASM_REL_EXT:
n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
goto patchrel;
case DASM_ALIGN:
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
break;
case DASM_REL_LG:
if (n < 0) {
n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
goto patchrel;
}
/* fallthrough */
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
patchrel:
CK((n & 3) == 0 &&
(((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
break;
case DASM_LABEL_LG:
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
break;
case DASM_LABEL_PC: break;
case DASM_IMM:
cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
break;
case DASM_IMMSH:
cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
break;
default: *cp++ = ins; break;
}
}
stop: (void)0;
}
}
if (base + D->codesize != (char *)cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
#undef CK
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc*sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0) return *DASM_POS2PTR(D, -pos);
if (pos > 0) return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
return D->status;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,83 @@
/*
** DynASM encoding engine prototypes.
** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
#ifndef _DASM_PROTO_H
#define _DASM_PROTO_H
#include <stddef.h>
#include <stdarg.h>
#define DASM_IDENT "DynASM 1.5.0"
#define DASM_VERSION 10500 /* 1.5.0 */
#ifndef Dst_DECL
#define Dst_DECL dasm_State **Dst
#endif
#ifndef Dst_REF
#define Dst_REF (*Dst)
#endif
#ifndef DASM_FDEF
#define DASM_FDEF extern
#endif
#ifndef DASM_M_GROW
#define DASM_M_GROW(ctx, t, p, sz, need) \
do { \
size_t _sz = (sz), _need = (need); \
if (_sz < _need) { \
if (_sz < 16) _sz = 16; \
while (_sz < _need) _sz += _sz; \
(p) = (t *)realloc((p), _sz); \
if ((p) == NULL) exit(1); \
(sz) = _sz; \
} \
} while(0)
#endif
#ifndef DASM_M_FREE
#define DASM_M_FREE(ctx, p, sz) free(p)
#endif
/* Internal DynASM encoder state. */
typedef struct dasm_State dasm_State;
/* Initialize and free DynASM state. */
DASM_FDEF void dasm_init(Dst_DECL, int maxsection);
DASM_FDEF void dasm_free(Dst_DECL);
/* Setup global array. Must be called before dasm_setup(). */
DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl);
/* Grow PC label array. Can be called after dasm_setup(), too. */
DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc);
/* Setup encoder. */
DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist);
/* Feed encoder with actions. Calls are generated by pre-processor. */
DASM_FDEF void dasm_put(Dst_DECL, int start, ...);
/* Link sections and return the resulting size. */
DASM_FDEF int dasm_link(Dst_DECL, size_t *szp);
/* Encode sections into buffer. */
DASM_FDEF int dasm_encode(Dst_DECL, void *buffer);
/* Get PC label offset. */
DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc);
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch);
#else
#define dasm_checkstep(a, b) 0
#endif
#endif /* _DASM_PROTO_H */

View File

@@ -0,0 +1,12 @@
------------------------------------------------------------------------------
-- DynASM x64 module.
--
-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
-- This module just sets 64 bit mode for the combined x86/x64 module.
-- All the interesting stuff is there.
------------------------------------------------------------------------------
x64 = true -- Using a global is an ugly, but effective solution.
return require("dasm_x86")

View File

@@ -0,0 +1,546 @@
/*
** DynASM x86 encoding engine.
** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#define DASM_ARCH "x86"
#ifndef DASM_EXTERN
#define DASM_EXTERN(a,b,c,d) 0
#endif
/* Action definitions. DASM_STOP must be 255. */
enum {
DASM_DISP = 233,
DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB,
DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC,
DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN,
DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP
};
/* Maximum number of section buffer positions for a single dasm_put() call. */
#define DASM_MAXSECPOS 25
/* DynASM encoder status codes. Action list offset or number are or'ed in. */
#define DASM_S_OK 0x00000000
#define DASM_S_NOMEM 0x01000000
#define DASM_S_PHASE 0x02000000
#define DASM_S_MATCH_SEC 0x03000000
#define DASM_S_RANGE_I 0x11000000
#define DASM_S_RANGE_SEC 0x12000000
#define DASM_S_RANGE_LG 0x13000000
#define DASM_S_RANGE_PC 0x14000000
#define DASM_S_RANGE_VREG 0x15000000
#define DASM_S_UNDEF_L 0x21000000
#define DASM_S_UNDEF_PC 0x22000000
/* Macros to convert positions (8 bit section + 24 bit index). */
#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
#define DASM_SEC2POS(sec) ((sec)<<24)
#define DASM_POS2SEC(pos) ((pos)>>24)
#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
/* Action list type. */
typedef const unsigned char *dasm_ActList;
/* Per-section structure. */
typedef struct dasm_Section {
int *rbuf; /* Biased buffer pointer (negative section bias). */
int *buf; /* True buffer pointer. */
size_t bsize; /* Buffer size in bytes. */
int pos; /* Biased buffer position. */
int epos; /* End of biased buffer position - max single put. */
int ofs; /* Byte offset into section. */
} dasm_Section;
/* Core structure holding the DynASM encoding state. */
struct dasm_State {
size_t psize; /* Allocated size of this structure. */
dasm_ActList actionlist; /* Current actionlist pointer. */
int *lglabels; /* Local/global chain/pos ptrs. */
size_t lgsize;
int *pclabels; /* PC label chains/pos ptrs. */
size_t pcsize;
void **globals; /* Array of globals (bias -10). */
dasm_Section *section; /* Pointer to active section. */
size_t codesize; /* Total size of all code sections. */
int maxsection; /* 0 <= sectionidx < maxsection. */
int status; /* Status code. */
dasm_Section sections[1]; /* All sections. Alloc-extended. */
};
/* The size of the core structure depends on the max. number of sections. */
#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
/* Perform potentially overflowing pointer operations in a way that avoids UB. */
#define DASM_PTR_SUB(p1, off) ((void *) ((uintptr_t) (p1) - sizeof(*p1) * (uintptr_t) (off)))
#define DASM_PTR_ADD(p1, off) ((void *) ((uintptr_t) (p1) + sizeof(*p1) * (uintptr_t) (off)))
/* Initialize DynASM state. */
void dasm_init(Dst_DECL, int maxsection)
{
dasm_State *D;
size_t psz = 0;
int i;
Dst_REF = NULL;
DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
D = Dst_REF;
D->psize = psz;
D->lglabels = NULL;
D->lgsize = 0;
D->pclabels = NULL;
D->pcsize = 0;
D->globals = NULL;
D->maxsection = maxsection;
for (i = 0; i < maxsection; i++) {
D->sections[i].buf = NULL; /* Need this for pass3. */
D->sections[i].rbuf = DASM_PTR_SUB(D->sections[i].buf, DASM_SEC2POS(i));
D->sections[i].bsize = 0;
D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
}
}
/* Free DynASM state. */
void dasm_free(Dst_DECL)
{
dasm_State *D = Dst_REF;
int i;
for (i = 0; i < D->maxsection; i++)
if (D->sections[i].buf)
DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
DASM_M_FREE(Dst, D, D->psize);
}
/* Setup global label array. Must be called before dasm_setup(). */
void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
{
dasm_State *D = Dst_REF;
D->globals = gl - 10; /* Negative bias to compensate for locals. */
DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
}
/* Grow PC label array. Can be called after dasm_setup(), too. */
void dasm_growpc(Dst_DECL, unsigned int maxpc)
{
dasm_State *D = Dst_REF;
size_t osz = D->pcsize;
DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
}
/* Setup encoder. */
void dasm_setup(Dst_DECL, const void *actionlist)
{
dasm_State *D = Dst_REF;
int i;
D->actionlist = (dasm_ActList)actionlist;
D->status = DASM_S_OK;
D->section = &D->sections[0];
memset((void *)D->lglabels, 0, D->lgsize);
if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
for (i = 0; i < D->maxsection; i++) {
D->sections[i].pos = DASM_SEC2POS(i);
D->sections[i].ofs = 0;
}
}
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
#endif
/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
void dasm_put(Dst_DECL, int start, ...)
{
va_list ap;
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
int pos = sec->pos, ofs = sec->ofs, mrm = -1;
int *b;
if (pos >= sec->epos) {
DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
}
b = sec->rbuf;
b[pos++] = start;
va_start(ap, start);
while (1) {
int action = *p++;
while (action < DASM_DISP) {
ofs++;
action = *p++;
}
if (action <= DASM_REL_A) {
int n = va_arg(ap, int);
b[pos++] = n;
switch (action) {
case DASM_DISP:
if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
/* fallthrough */
case DASM_IMM_DB: if ((((unsigned)n+128)&-256) == 0) goto ob; /* fallthrough */
case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
case DASM_IMM_D: ofs += 4; break;
case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
case DASM_SPACE: p++; ofs += n; break;
case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
if (*p < 0x20 && (n&7) == 4) ofs++;
switch ((*p++ >> 3) & 3) {
case 3: n |= b[pos-3]; /* fallthrough */
case 2: n |= b[pos-2]; /* fallthrough */
case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
}
continue;
}
mrm = -1;
} else {
int *pl, n;
switch (action) {
case DASM_REL_LG:
case DASM_IMM_LG:
n = *p++; pl = D->lglabels + n;
/* Bkwd rel or global. */
if (n <= 246) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
pl -= 246; n = *pl;
if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
goto linkrel;
case DASM_REL_PC:
case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
putrel:
n = *pl;
if (n < 0) { /* Label exists. Get label pos and store it. */
b[pos] = -n;
} else {
linkrel:
b[pos] = n; /* Else link to rel chain, anchored at label. */
*pl = pos;
}
pos++;
ofs += 4; /* Maximum offset needed. */
if (action == DASM_REL_LG || action == DASM_REL_PC) {
b[pos++] = ofs; /* Store pass1 offset estimate. */
} else if (sizeof(ptrdiff_t) == 8) {
ofs += 4;
}
break;
case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
putlabel:
n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; }
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_ALIGN:
ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
case DASM_EXTERN: p += 2; ofs += 4; break;
case DASM_ESC: p++; ofs++; break;
case DASM_MARK: mrm = p[-2]; break;
case DASM_SECTION:
n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n];
case DASM_STOP: goto stop;
}
}
}
stop:
va_end(ap);
sec->pos = pos;
sec->ofs = ofs;
}
#undef CK
/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */
int dasm_link(Dst_DECL, size_t *szp)
{
dasm_State *D = Dst_REF;
int secnum;
int ofs = 0;
#ifdef DASM_CHECKS
*szp = 0;
if (D->status != DASM_S_OK) return D->status;
{
int pc;
for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
}
#endif
{ /* Handle globals not defined in this translation unit. */
int idx;
for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
}
}
/* Combine all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->rbuf;
int pos = DASM_SEC2POS(secnum);
int lastpos = sec->pos;
while (pos != lastpos) {
dasm_ActList p = D->actionlist + b[pos++];
int op = 0;
while (1) {
int action = *p++;
while (action < DASM_DISP) {
op = action;
action = *p++;
}
switch (action) {
case DASM_REL_LG: p++;
/* fallthrough */
case DASM_REL_PC: {
int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0);
if (shrink) { /* Shrinkable branch opcode? */
int lofs, lpos = b[pos];
if (lpos < 0) goto noshrink; /* Ext global? */
lofs = *DASM_POS2PTR(D, lpos);
if (lpos > pos) { /* Fwd label: add cumulative section offsets. */
int i;
for (i = secnum; i < DASM_POS2SEC(lpos); i++)
lofs += D->sections[i].ofs;
} else {
lofs -= ofs; /* Bkwd label: unfix offset. */
}
lofs -= b[pos+1]; /* Short branch ok? */
if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */
else { noshrink: shrink = 0; } /* No, cannot shrink op. */
}
b[pos+1] = shrink;
pos += 2;
break;
}
/* fallthrough */
case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
case DASM_LABEL_LG: p++;
/* fallthrough */
case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
case DASM_EXTERN: p += 2; break;
case DASM_ESC: op = *p++; break;
case DASM_MARK: break;
case DASM_SECTION: case DASM_STOP: goto stop;
}
}
stop: (void)0;
}
ofs += sec->ofs; /* Next section starts right after current section. */
}
D->codesize = ofs; /* Total size of all code sections */
*szp = ofs;
return DASM_S_OK;
}
#define dasmb(x) *cp++ = (unsigned char)(x)
#ifndef DASM_ALIGNED_WRITES
typedef IR_SET_ALIGNED(1, unsigned short unaligned_short);
typedef IR_SET_ALIGNED(1, unsigned int unaligned_int);
typedef IR_SET_ALIGNED(1, unsigned long long unaligned_long_long);
#define dasmw(x) \
do { *((unaligned_short *)cp) = (unsigned short)(x); cp+=2; } while (0)
#define dasmd(x) \
do { *((unaligned_int *)cp) = (unsigned int)(x); cp+=4; } while (0)
#define dasmq(x) \
do { *((unaligned_long_long *)cp) = (unsigned long long)(x); cp+=8; } while (0)
#else
#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0)
#endif
static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x)
{
if (sizeof(ptrdiff_t) == 8)
dasmq((unsigned long long)x);
else
dasmd((unsigned int)x);
return cp;
}
#define dasma(x) (cp = dasma_(cp, (x)))
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
{
dasm_State *D = Dst_REF;
unsigned char *base = (unsigned char *)buffer;
unsigned char *cp = base;
int secnum;
/* Encode all code sections. No support for data sections (yet). */
for (secnum = 0; secnum < D->maxsection; secnum++) {
dasm_Section *sec = D->sections + secnum;
int *b = sec->buf;
int *endb = DASM_PTR_ADD(sec->rbuf, sec->pos);
while (b != endb) {
dasm_ActList p = D->actionlist + *b++;
unsigned char *mark = NULL;
while (1) {
int n;
int action = *p++;
while (action < DASM_DISP) {
*cp++ = action;
action = *p++;
}
if (action >= DASM_ALIGN) {
switch (action) {
case DASM_ALIGN:
b++;
n = *p++;
while (((cp-base) & n)) *cp++ = 0x90; /* nop */
continue;
case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
case DASM_MARK: mark = cp; continue;
case DASM_ESC: action = *p++; *cp++ = action; continue;
case DASM_SECTION: case DASM_STOP: goto stop;
}
}
n = *b++;
switch (action) {
case DASM_DISP: if (!mark) mark = cp; {
unsigned char *mm = mark;
if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL;
if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7;
if (mrm != 5) { mm[-1] -= 0x80; break; } }
if ((((unsigned)n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
}
/* fallthrough */
case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
case DASM_IMM_DB: if ((((unsigned)n+128)&-256) == 0) {
db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
} else mark = NULL;
/* fallthrough */
case DASM_IMM_D: wd: dasmd(n); break;
case DASM_IMM_WB: if ((((unsigned)n+128)&-256) == 0) goto db; else mark = NULL;
/* fallthrough */
case DASM_IMM_W: dasmw(n); break;
case DASM_VREG: {
int t = *p++;
unsigned char *ex = cp - (t&7);
if ((n & 8) && t < 0xa0) {
if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
n &= 7;
} else if (n & 0x10) {
if (*ex & 0x80) {
*ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
}
while (++ex < cp) ex[-1] = *ex;
if (mark) mark--;
cp--;
n &= 7;
}
if (t >= 0xc0) n <<= 4;
else if (t >= 0x40) n <<= 3;
else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
cp[-1] ^= n;
break;
}
case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
b++; n = (int)(ptrdiff_t)D->globals[-n];
/* fallthrough */
case DASM_REL_A: rel_a:
n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
case DASM_REL_PC: rel_pc: {
int shrink = *b++;
int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
n = *pb - ((int)(cp-base) + 4-shrink);
if (shrink == 0) goto wd;
if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb;
goto wb;
}
case DASM_IMM_LG:
p++;
if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; }
/* fallthrough */
case DASM_IMM_PC: {
int *pb = DASM_POS2PTR(D, n);
dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base));
break;
}
case DASM_LABEL_LG: {
int idx = *p++;
if (idx >= 10)
D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
break;
}
case DASM_LABEL_PC: case DASM_SETLABEL: break;
case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; }
}
}
stop: (void)0;
}
}
if (base + D->codesize != cp) /* Check for phase errors. */
return DASM_S_PHASE;
return DASM_S_OK;
}
/* Get PC label offset. */
int dasm_getpclabel(Dst_DECL, unsigned int pc)
{
dasm_State *D = Dst_REF;
if (pc*sizeof(int) < D->pcsize) {
int pos = D->pclabels[pc];
if (pos < 0) return *DASM_POS2PTR(D, -pos);
if (pos > 0) return -1; /* Undefined. */
}
return -2; /* Unused or out of range. */
}
#ifdef DASM_CHECKS
/* Optional sanity checker to call between isolated encoding steps. */
int dasm_checkstep(Dst_DECL, int secmatch)
{
dasm_State *D = Dst_REF;
if (D->status == DASM_S_OK) {
int i;
for (i = 1; i <= 9; i++) {
if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; }
D->lglabels[i] = 0;
}
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
return D->status;
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,261 @@
/*
* IR - Lightweight JIT Compilation Framework
* (Folding engine generator)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*
* Based on Mike Pall's implementation for LuaJIT.
*/
#include "ir.h"
#include <string.h>
#include "ir_strtab.c"
#define MAX_RULES 2048
#define MAX_SLOTS (MAX_RULES * 4)
static ir_strtab strtab;
void print_hash(uint32_t *mask, uint32_t count)
{
uint32_t i;
printf("static const uint32_t _ir_fold_hash[%d] = {\n", count);
for (i = 0; i < count; i++) {
printf("\t0x%08x,\n", mask[i]);
}
printf("};\n\n");
}
static uint32_t hash_shl2(uint32_t mask, uint32_t r1, uint32_t r2)
{
return ((mask << r1) - mask) << r2;
}
#if 0
#define ir_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
#define ir_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
static uint32_t hash_rol2(uint32_t mask, uint32_t r1, uint32_t r2)
{
return ir_rol((ir_rol(mask, r1) - mask), r2);
}
#endif
/* Find a perfect hash function */
int find_hash(uint32_t *mask, uint32_t count)
{
uint32_t hash[MAX_SLOTS];
uint32_t n, r1, r2, i, h;
for (n = (count | 1); n < MAX_SLOTS; n += 2) {
for (r1 = 0; r1 < 31; r1++) {
for (r2 = 0; r2 < 32; r2++) {
memset(hash, 0, n * sizeof(uint32_t));
for (i = 0; i < count; i++) {
h = hash_shl2(mask[i] & 0x1fffff, r1, r2) % n;
if (hash[h]) break; /* collision */
hash[h] = mask[i];
}
if (i == count) {
print_hash(hash, n);
printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn (((h << %d) - h) << %d) %% %d;\n}\n", r1, r2, n);
return 1;
}
#if 0
memset(hash, 0, n * sizeof(uint32_t));
for (i = 0; i < count; i++) {
h = hash_rol2(mask[i] & 0x1fffff, r1, r2) % n;
if (hash[h]) break; /* collision */
hash[h] = mask[i];
}
if (i == count) {
print_hash(hash, n);
printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\nreturn 0; /*rol2(%u,%u,%u)*/\n}\n", r1, r2, n);
return 1;
}
#endif
}
}
}
hash[0] = 0;
print_hash(hash, 1);
printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn 0;\n}\n");
return 0;
}
static int find_op(const char *s, size_t len)
{
return ir_strtab_find(&strtab, s, (uint8_t)len) - 1;
}
static int parse_rule(const char *buf)
{
const char *p = buf + sizeof("IR_FOLD(") - 1;
const char *q;
int op, mask;
while (*p == ' ' || *p == '\t') {
p++;
}
if (*p < 'A' || *p > 'Z') {
return 0;
}
q = p + 1;
while ((*q >= 'A' && *q <= 'Z')
|| (*q >= '0' && *q <= '9')
|| *q == '_') {
q++;
}
op = find_op(p, q - p);
if (op < 0) {
return 0;
}
mask = op;
while (*q == ' ' || *q == '\t') {
q++;
}
if (*q == ')') {
return mask; /* unused operands */
} else if (*q != '(') {
return 0;
}
p = q + 1;
while (*p == ' ' || *p == '\t') {
p++;
}
if (*p == '_') {
q = p + 1;
} else if (*p >= 'A' && *p <= 'Z') {
q = p + 1;
while ((*q >= 'A' && *q <= 'Z')
|| (*q >= '0' && *q <= '9')
|| *q == '_') {
q++;
}
op = find_op(p, q - p);
if (op < 0) {
return 0;
}
mask |= op << 7;
} else {
return 0;
}
while (*q == ' ' || *q == '\t') {
q++;
}
if (*q == ')') {
return mask; /* unused op2 */
} else if (*q != ',') {
return 0;
}
p = q + 1;
while (*p == ' ' || *p == '\t') {
p++;
}
if (*p == '_') {
q = p + 1;
} else if (*p >= 'A' && *p <= 'Z') {
q = p + 1;
while ((*q >= 'A' && *q <= 'Z')
|| (*q >= '0' && *q <= '9')
|| *q == '_') {
q++;
}
op = find_op(p, q - p);
if (op < 0) {
return 0;
}
mask |= op << 14;
} else {
return 0;
}
while (*q == ' ' || *q == '\t') {
q++;
}
if (*q != ')') {
return 0;
}
q++;
while (*q == ' ' || *q == '\t') {
q++;
}
if (*q != ')') {
return 0;
}
return mask;
}
int main()
{
char buf[4096];
FILE *f = stdin;
int line = 0;
int rules = 0;
int i;
uint32_t mask[MAX_RULES];
uint32_t rule[MAX_RULES];
ir_strtab_init(&strtab, IR_LAST_OP, 0);
#define IR_OP_ADD(name, flags, op1, op2, op3) \
ir_strtab_lookup(&strtab, #name, sizeof(#name) - 1, IR_ ## name + 1);
IR_OPS(IR_OP_ADD)
while (fgets(buf, sizeof(buf) - 1, f)) {
size_t len = strlen(buf);
if (len > 0 && (buf[len - 1] == '\r' || buf[len - 1] == '\n')) {
buf[len - 1] = 0;
len--;
line++;
}
if (len >= sizeof("IR_FOLD(")-1
&& memcmp(buf, "IR_FOLD(", sizeof("IR_FOLD(")-1) == 0) {
if (rules >= MAX_RULES) {
fprintf(stderr, "ERROR: Too many rules\n");
return 1;
}
i = parse_rule(buf);
if (!i) {
fprintf(stderr, "ERROR: Incorrect '%s' rule on line %d\n", buf, line);
return 1;
}
// TODO: few masks may share the same rule ???
rule[rules] = line;
mask[rules] = i | (rules << 21);
rules++;
}
}
ir_strtab_free(&strtab);
#if 0
for (i = 0; i < rules; i++) {
printf("0x%08x\n", mask[i]);
}
#endif
printf("/* This file is generated from \"ir_fold.h\". Do not edit! */\n\n");
printf("typedef enum _ir_fold_rule_id {\n");
for (i = 0; i < rules; i++) {
printf("\tIR_RULE_%d,\n", rule[i]);
}
printf("\t_IR_RULE_LAST\n");
printf("} ir_fold_rule_id;\n\n");
if (!find_hash(mask, rules)) {
fprintf(stderr, "ERROR: Cannot find a good hash function\n");
return 1;
}
return 0;
}

2322
ext/opcache/jit/ir/ir.c Normal file

File diff suppressed because it is too large Load Diff

924
ext/opcache/jit/ir/ir.h Normal file
View File

@@ -0,0 +1,924 @@
/*
* IR - Lightweight JIT Compilation Framework
* (Public API)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#ifndef IR_H
#define IR_H
#ifdef __cplusplus
extern "C" {
#endif
#include <inttypes.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#define IR_VERSION "0.0.1"
#ifdef _WIN32
/* TODO Handle ARM, too. */
# if defined(_M_X64)
# define __SIZEOF_SIZE_T__ 8
# elif defined(_M_IX86)
# define __SIZEOF_SIZE_T__ 4
# endif
/* Only supported is little endian for any arch on Windows,
so just fake the same for all. */
# define __ORDER_LITTLE_ENDIAN__ 1
# define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
# ifndef __has_builtin
# define __has_builtin(arg) (0)
# endif
#endif
#if defined(IR_TARGET_X86)
# define IR_TARGET "x86"
#elif defined(IR_TARGET_X64)
# ifdef _WIN64
# define IR_TARGET "Windows-x86_64" /* 64-bit Windows use different ABI and calling convention */
# else
# define IR_TARGET "x86_64"
# endif
#elif defined(IR_TARGET_AARCH64)
# define IR_TARGET "aarch64"
#else
# error "Unknown IR target"
#endif
#if defined(__SIZEOF_SIZE_T__)
# if __SIZEOF_SIZE_T__ == 8
# define IR_64 1
# elif __SIZEOF_SIZE_T__ != 4
# error "Unknown addr size"
# endif
#else
# error "Unknown addr size"
#endif
#if defined(__BYTE_ORDER__)
# if defined(__ORDER_LITTLE_ENDIAN__)
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define IR_STRUCT_LOHI(lo, hi) struct {lo; hi;}
# endif
# endif
# if defined(__ORDER_BIG_ENDIAN__)
# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# define IR_STRUCT_LOHI(lo, hi) struct {hi; lo;}
# endif
# endif
#endif
#ifndef IR_STRUCT_LOHI
# error "Unknown byte order"
#endif
#ifdef __has_attribute
# if __has_attribute(always_inline)
# define IR_ALWAYS_INLINE static inline __attribute__((always_inline))
# endif
# if __has_attribute(noinline)
# define IR_NEVER_INLINE __attribute__((noinline))
# endif
#else
# define __has_attribute(x) 0
#endif
#ifndef IR_ALWAYS_INLINE
# define IR_ALWAYS_INLINE static inline
#endif
#ifndef IR_NEVER_INLINE
# define IR_NEVER_INLINE
#endif
#ifdef IR_PHP
# include "ir_php.h"
#endif
/* IR Type flags (low 4 bits are used for type size) */
#define IR_TYPE_SIGNED (1<<4)
#define IR_TYPE_UNSIGNED (1<<5)
#define IR_TYPE_FP (1<<6)
#define IR_TYPE_SPECIAL (1<<7)
#define IR_TYPE_BOOL (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED)
#define IR_TYPE_ADDR (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED)
#define IR_TYPE_CHAR (IR_TYPE_SPECIAL|IR_TYPE_SIGNED)
/* List of IR types */
#define IR_TYPES(_) \
_(BOOL, bool, b, IR_TYPE_BOOL) \
_(U8, uint8_t, u8, IR_TYPE_UNSIGNED) \
_(U16, uint16_t, u16, IR_TYPE_UNSIGNED) \
_(U32, uint32_t, u32, IR_TYPE_UNSIGNED) \
_(U64, uint64_t, u64, IR_TYPE_UNSIGNED) \
_(ADDR, uintptr_t, addr, IR_TYPE_ADDR) \
_(CHAR, char, c, IR_TYPE_CHAR) \
_(I8, int8_t, i8, IR_TYPE_SIGNED) \
_(I16, int16_t, i16, IR_TYPE_SIGNED) \
_(I32, int32_t, i32, IR_TYPE_SIGNED) \
_(I64, int64_t, i64, IR_TYPE_SIGNED) \
_(DOUBLE, double, d, IR_TYPE_FP) \
_(FLOAT, float, f, IR_TYPE_FP) \
#define IR_IS_TYPE_UNSIGNED(t) ((t) < IR_CHAR)
#define IR_IS_TYPE_SIGNED(t) ((t) >= IR_CHAR && (t) < IR_DOUBLE)
#define IR_IS_TYPE_INT(t) ((t) < IR_DOUBLE)
#define IR_IS_TYPE_FP(t) ((t) >= IR_DOUBLE)
#define IR_TYPE_ENUM(name, type, field, flags) IR_ ## name,
typedef enum _ir_type {
IR_VOID,
IR_TYPES(IR_TYPE_ENUM)
IR_LAST_TYPE
} ir_type;
/* List of IR opcodes
* ==================
*
* Each instruction is described by a type (opcode, flags, op1_type, op2_type, op3_type)
*
* flags
* -----
* v - void
* d - data IR_OP_FLAG_DATA
* r - ref IR_OP_FLAG_DATA alias
* p - pinned IR_OP_FLAG_DATA + IR_OP_FLAG_PINNED
* c - control IR_OP_FLAG_CONTROL
* S - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_START
* E - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END
* T - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END + IR_OP_FLAG_TERMINATOR
* l - load IR_OP_FLAG_MEM + IR_OP_FLAG_MEM_LOAD
* s - store IR_OP_FLAG_MEM + IR_OP_FLAG_STORE
* x - call IR_OP_FLAG_MEM + IR_OP_FLAG_CALL
* a - alloc IR_OP_FLAG_MEM + IR_OP_FLAG_ALLOC
* 0-3 - number of input edges
* N - number of arguments is defined in the insn->inputs_count (MERGE, PHI, CALL)
* X1-X3 - number of extra data ops
* C - commutative operation ("d2C" => IR_OP_FLAG_DATA + IR_OP_FLAG_COMMUTATIVE)
*
* operand types
* -------------
* ___ - unused
* def - reference to a definition op (data-flow use-def dependency edge)
* ref - memory reference (data-flow use-def dependency edge)
* var - variable reference (data-flow use-def dependency edge)
* arg - argument reference CALL/TAILCALL/CARG->CARG
* src - reference to a previous control region (IF, IF_TRUE, IF_FALSE, MERGE, LOOP_BEGIN, LOOP_END, RETURN)
* reg - data-control dependency on region (PHI, VAR, PARAM)
* ret - reference to a previous RETURN instruction (RETURN)
* str - string: variable/argument name (VAR, PARAM, CALL, TAILCALL)
* num - number: argument number (PARAM)
* prb - branch probability 1-99 (0 - unspecified): (IF_TRUE, IF_FALSE, CASE_VAL, CASE_DEFAULT)
* opt - optional number
*
* The order of IR opcodes is carefully selected for efficient folding.
* - foldable instruction go first
* - NOP is never used (code 0 is used as ANY pattern)
* - CONST is the most often used instruction (encode with 1 bit)
* - equality inversion: EQ <-> NE => op =^ 1
* - comparison inversion: [U]LT <-> [U]GT, [U]LE <-> [U]GE => op =^ 3
*/
#define IR_OPS(_) \
/* special op (must be the first !!!) */ \
_(NOP, v, ___, ___, ___) /* empty instruction */ \
\
/* constants reference */ \
_(C_BOOL, r0, ___, ___, ___) /* constant */ \
_(C_U8, r0, ___, ___, ___) /* constant */ \
_(C_U16, r0, ___, ___, ___) /* constant */ \
_(C_U32, r0, ___, ___, ___) /* constant */ \
_(C_U64, r0, ___, ___, ___) /* constant */ \
_(C_ADDR, r0, ___, ___, ___) /* constant */ \
_(C_CHAR, r0, ___, ___, ___) /* constant */ \
_(C_I8, r0, ___, ___, ___) /* constant */ \
_(C_I16, r0, ___, ___, ___) /* constant */ \
_(C_I32, r0, ___, ___, ___) /* constant */ \
_(C_I64, r0, ___, ___, ___) /* constant */ \
_(C_DOUBLE, r0, ___, ___, ___) /* constant */ \
_(C_FLOAT, r0, ___, ___, ___) /* constant */ \
\
/* equality ops */ \
_(EQ, d2C, def, def, ___) /* equal */ \
_(NE, d2C, def, def, ___) /* not equal */ \
\
/* comparison ops (order matters, LT must be a modulo of 4 !!!) */ \
_(LT, d2, def, def, ___) /* less */ \
_(GE, d2, def, def, ___) /* greater or equal */ \
_(LE, d2, def, def, ___) /* less or equal */ \
_(GT, d2, def, def, ___) /* greater */ \
_(ULT, d2, def, def, ___) /* unsigned less */ \
_(UGE, d2, def, def, ___) /* unsigned greater or equal */ \
_(ULE, d2, def, def, ___) /* unsigned less or equal */ \
_(UGT, d2, def, def, ___) /* unsigned greater */ \
\
/* arithmetic ops */ \
_(ADD, d2C, def, def, ___) /* addition */ \
_(SUB, d2, def, def, ___) /* subtraction (must be ADD+1) */ \
_(MUL, d2C, def, def, ___) /* multiplication */ \
_(DIV, d2, def, def, ___) /* division */ \
_(MOD, d2, def, def, ___) /* modulo */ \
_(NEG, d1, def, ___, ___) /* change sign */ \
_(ABS, d1, def, ___, ___) /* absolute value */ \
/* (LDEXP, MIN, MAX, FPMATH) */ \
\
/* type conversion ops */ \
_(SEXT, d1, def, ___, ___) /* sign extension */ \
_(ZEXT, d1, def, ___, ___) /* zero extension */ \
_(TRUNC, d1, def, ___, ___) /* truncates to int type */ \
_(BITCAST, d1, def, ___, ___) /* binary representation */ \
_(INT2FP, d1, def, ___, ___) /* int to float conversion */ \
_(FP2INT, d1, def, ___, ___) /* float to int conversion */ \
_(FP2FP, d1, def, ___, ___) /* float to float conversion */ \
\
/* overflow-check */ \
_(ADD_OV, d2C, def, def, ___) /* addition */ \
_(SUB_OV, d2, def, def, ___) /* subtraction */ \
_(MUL_OV, d2C, def, def, ___) /* multiplication */ \
_(OVERFLOW, d1, def, ___, ___) /* overflow check add/sub/mul */ \
\
/* bitwise and shift ops */ \
_(NOT, d1, def, ___, ___) /* bitwise NOT */ \
_(OR, d2C, def, def, ___) /* bitwise OR */ \
_(AND, d2C, def, def, ___) /* bitwise AND */ \
_(XOR, d2C, def, def, ___) /* bitwise XOR */ \
_(SHL, d2, def, def, ___) /* logic shift left */ \
_(SHR, d2, def, def, ___) /* logic shift right */ \
_(SAR, d2, def, def, ___) /* arithmetic shift right */ \
_(ROL, d2, def, def, ___) /* rotate left */ \
_(ROR, d2, def, def, ___) /* rotate right */ \
_(BSWAP, d1, def, ___, ___) /* byte swap */ \
\
/* branch-less conditional ops */ \
_(MIN, d2C, def, def, ___) /* min(op1, op2) */ \
_(MAX, d2C, def, def, ___) /* max(op1, op2) */ \
_(COND, d3, def, def, def) /* op1 ? op2 : op3 */ \
\
/* data-flow and miscellaneous ops */ \
_(PHI, pN, reg, def, def) /* SSA Phi function */ \
_(COPY, d1X1, def, opt, ___) /* COPY (last foldable op) */ \
_(PI, p2, reg, def, ___) /* e-SSA Pi constraint ??? */ \
/* (USE, RENAME) */ \
\
/* data ops */ \
_(PARAM, p1X2, reg, str, num) /* incoming parameter proj. */ \
_(VAR, p1X1, reg, str, ___) /* local variable */ \
_(FUNC_ADDR, r0, ___, ___, ___) /* constant func ref */ \
_(FUNC, r0, ___, ___, ___) /* constant func ref */ \
_(SYM, r0, ___, ___, ___) /* constant symbol ref */ \
_(STR, r0, ___, ___, ___) /* constant str ref */ \
\
/* call ops */ \
_(CALL, xN, src, def, def) /* CALL(src, func, args...) */ \
_(TAILCALL, xN, src, def, def) /* CALL+RETURN */ \
\
/* memory reference and load/store ops */ \
_(ALLOCA, a2, src, def, ___) /* alloca(def) */ \
_(AFREE, a2, src, def, ___) /* revert alloca(def) */ \
_(VADDR, d1, var, ___, ___) /* load address of local var */ \
_(VLOAD, l2, src, var, ___) /* load value of local var */ \
_(VSTORE, s3, src, var, def) /* store value to local var */ \
_(RLOAD, l1X2, src, num, opt) /* load value from register */ \
_(RSTORE, s2X1, src, def, num) /* store value into register */ \
_(LOAD, l2, src, ref, ___) /* load from memory */ \
_(STORE, s3, src, ref, def) /* store to memory */ \
_(TLS, l1X2, src, num, num) /* thread local variable */ \
_(TRAP, x1, src, ___, ___) /* DebugBreak */ \
/* memory reference ops (A, H, U, S, TMP, STR, NEW, X, V) ??? */ \
\
/* guards */ \
_(GUARD, c3, src, def, def) /* IF without second successor */ \
_(GUARD_NOT , c3, src, def, def) /* IF without second successor */ \
\
/* deoptimization */ \
_(SNAPSHOT, xN, src, def, def) /* SNAPSHOT(src, args...) */ \
\
/* control-flow nodes */ \
_(START, S0X1, ret, ___, ___) /* function start */ \
_(ENTRY, S1X1, src, num, ___) /* entry with a fake src edge */ \
_(BEGIN, S1, src, ___, ___) /* block start */ \
_(IF_TRUE, S1X1, src, prb, ___) /* IF TRUE proj. */ \
_(IF_FALSE, S1X1, src, prb, ___) /* IF FALSE proj. */ \
_(CASE_VAL, S2X1, src, def, prb) /* switch proj. */ \
_(CASE_DEFAULT, S1X1, src, prb, ___) /* switch proj. */ \
_(MERGE, SN, src, src, src) /* control merge */ \
_(LOOP_BEGIN, SN, src, src, src) /* loop start */ \
_(END, E1, src, ___, ___) /* block end */ \
_(LOOP_END, E1, src, ___, ___) /* loop end */ \
_(IF, E2, src, def, ___) /* conditional control split */ \
_(SWITCH, E2, src, def, ___) /* multi-way control split */ \
_(RETURN, T2X1, src, def, ret) /* function return */ \
_(IJMP, T2X1, src, def, ret) /* computed goto */ \
_(UNREACHABLE, T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \
\
/* deoptimization helper */ \
_(EXITCALL, x2, src, def, ___) /* save CPU regs and call op2 */ \
#define IR_OP_ENUM(name, flags, op1, op2, op3) IR_ ## name,
typedef enum _ir_op {
IR_OPS(IR_OP_ENUM)
#ifdef IR_PHP
IR_PHP_OPS(IR_OP_ENUM)
#endif
IR_LAST_OP
} ir_op;
/* IR Opcode and Type Union */
#define IR_OPT_OP_MASK 0x00ff
#define IR_OPT_TYPE_MASK 0xff00
#define IR_OPT_TYPE_SHIFT 8
#define IR_OPT_INPUTS_SHIFT 16
#define IR_OPT(op, type) ((uint16_t)(op) | ((uint16_t)(type) << IR_OPT_TYPE_SHIFT))
#define IR_OPTX(op, type, n) ((uint32_t)(op) | ((uint32_t)(type) << IR_OPT_TYPE_SHIFT) | ((uint32_t)(n) << IR_OPT_INPUTS_SHIFT))
#define IR_OPT_TYPE(opt) (((opt) & IR_OPT_TYPE_MASK) >> IR_OPT_TYPE_SHIFT)
/* IR References */
typedef int32_t ir_ref;
#define IR_IS_CONST_REF(ref) ((ref) < 0)
/* IR Constant Value */
#define IR_UNUSED 0
#define IR_NULL (-1)
#define IR_FALSE (-2)
#define IR_TRUE (-3)
#define IR_LAST_FOLDABLE_OP IR_COPY
#define IR_CONSTS_LIMIT_MIN (-(IR_TRUE - 1))
#define IR_INSNS_LIMIT_MIN (IR_UNUSED + 1)
#ifndef IR_64
# define ADDR_MEMBER uintptr_t addr;
#else
# define ADDR_MEMBER
#endif
typedef union _ir_val {
double d;
uint64_t u64;
int64_t i64;
#ifdef IR_64
uintptr_t addr;
#endif
IR_STRUCT_LOHI(
union {
uint32_t u32;
int32_t i32;
float f;
ADDR_MEMBER
IR_STRUCT_LOHI(
union {
uint16_t u16;
int16_t i16;
IR_STRUCT_LOHI(
union {
uint8_t u8;
int8_t i8;
bool b;
char c;
},
uint8_t u8_hi
);
},
uint16_t u16_hi
);
},
uint32_t u32_hi
);
} ir_val;
#undef ADDR_MEMBER
/* IR constant flags */
#define IR_CONST_EMIT (1<<0)
#define IR_CONST_FASTCALL_FUNC (1<<1)
#define IR_CONST_VARARG_FUNC (1<<2)
/* IR Instruction */
typedef struct _ir_insn {
IR_STRUCT_LOHI(
union {
IR_STRUCT_LOHI(
union {
IR_STRUCT_LOHI(
uint8_t op,
uint8_t type
);
uint16_t opt;
},
union {
uint16_t inputs_count; /* number of input control edges for MERGE, PHI, CALL, TAILCALL */
uint16_t prev_insn_offset; /* 16-bit backward offset from current instruction for CSE */
uint16_t const_flags; /* flag to emit constant in rodat section */
}
);
uint32_t optx;
ir_ref ops[1];
},
union {
ir_ref op1;
ir_ref prev_const;
}
);
union {
IR_STRUCT_LOHI(
ir_ref op2,
ir_ref op3
);
ir_val val;
};
} ir_insn;
/* IR Hash Tables API (private) */
typedef struct _ir_hashtab ir_hashtab;
/* IR String Tables API (implementation in ir_strtab.c) */
typedef struct _ir_strtab {
void *data;
uint32_t mask;
uint32_t size;
uint32_t count;
uint32_t pos;
char *buf;
uint32_t buf_size;
uint32_t buf_top;
} ir_strtab;
#define ir_strtab_count(strtab) (strtab)->count
typedef void (*ir_strtab_apply_t)(const char *str, uint32_t len, ir_ref val);
void ir_strtab_init(ir_strtab *strtab, uint32_t count, uint32_t buf_size);
ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val);
ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len);
ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val);
const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx);
void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func);
void ir_strtab_free(ir_strtab *strtab);
/* IR Context Flags */
#define IR_FUNCTION (1<<0) /* Generate a function. */
#define IR_FASTCALL_FUNC (1<<1) /* Generate a function with fastcall calling convention, x86 32-bit only. */
#define IR_VARARG_FUNC (1<<2)
#define IR_STATIC (1<<3)
#define IR_EXTERN (1<<4)
#define IR_CONST (1<<5)
#define IR_SKIP_PROLOGUE (1<<6) /* Don't generate function prologue. */
#define IR_USE_FRAME_POINTER (1<<7)
#define IR_PREALLOCATED_STACK (1<<8)
#define IR_HAS_ALLOCA (1<<9)
#define IR_HAS_CALLS (1<<10)
#define IR_NO_STACK_COMBINE (1<<11)
#define IR_START_BR_TARGET (1<<12)
#define IR_ENTRY_BR_TARGET (1<<13)
#define IR_GEN_ENDBR (1<<14)
#define IR_MERGE_EMPTY_ENTRIES (1<<15)
#define IR_CFG_HAS_LOOPS (1<<16)
#define IR_IRREDUCIBLE_CFG (1<<17)
#define IR_OPT_FOLDING (1<<18)
#define IR_OPT_CFG (1<<19) /* merge BBs, by remove END->BEGIN nodes during CFG construction */
#define IR_OPT_CODEGEN (1<<20)
#define IR_OPT_IN_SCCP (1<<21)
#define IR_LINEAR (1<<22)
#define IR_GEN_NATIVE (1<<23)
#define IR_GEN_CODE (1<<24) /* C or LLVM */
/* Temporary: SCCP -> CFG */
#define IR_SCCP_DONE (1<<25)
/* Temporary: Dominators -> Loops */
#define IR_NO_LOOPS (1<<25)
/* Temporary: Live Ranges */
#define IR_LR_HAVE_DESSA_MOVES (1<<25)
/* Temporary: Register Allocator */
#define IR_RA_HAVE_SPLITS (1<<25)
#define IR_RA_HAVE_SPILLS (1<<26)
/* debug related */
#ifdef IR_DEBUG
# define IR_DEBUG_SCCP (1<<27)
# define IR_DEBUG_GCM (1<<28)
# define IR_DEBUG_SCHEDULE (1<<29)
# define IR_DEBUG_RA (1<<30)
#endif
typedef struct _ir_ctx ir_ctx;
typedef struct _ir_use_list ir_use_list;
typedef struct _ir_block ir_block;
typedef struct _ir_arena ir_arena;
typedef struct _ir_live_interval ir_live_interval;
typedef struct _ir_live_range ir_live_range;
typedef struct _ir_loader ir_loader;
typedef int8_t ir_regs[4];
typedef void (*ir_snapshot_create_t)(ir_ctx *ctx, ir_ref addr);
#if defined(IR_TARGET_AARCH64)
typedef const void *(*ir_get_exit_addr_t)(uint32_t exit_num);
typedef const void *(*ir_get_veneer_t)(ir_ctx *ctx, const void *addr);
typedef bool (*ir_set_veneer_t)(ir_ctx *ctx, const void *addr, const void *veneer);
#endif
struct _ir_ctx {
ir_insn *ir_base; /* two directional array - instructions grow down, constants grow up */
ir_ref insns_count; /* number of instructions stored in instructions buffer */
ir_ref insns_limit; /* size of allocated instructions buffer (it's extended when overflow) */
ir_ref consts_count; /* number of constants stored in constants buffer */
ir_ref consts_limit; /* size of allocated constants buffer (it's extended when overflow) */
uint32_t flags; /* IR context flags (see IR_* defines above) */
ir_type ret_type; /* Function return type */
uint32_t mflags; /* CPU specific flags (see IR_X86_... macros below) */
int32_t status; /* non-zero error code (see IR_ERROR_... macros), app may use negative codes */
ir_ref fold_cse_limit; /* CSE finds identical insns backward from "insn_count" to "fold_cse_limit" */
ir_insn fold_insn; /* temporary storage for folding engine */
ir_hashtab *binding;
ir_use_list *use_lists; /* def->use lists for each instruction */
ir_ref *use_edges; /* the actual uses: use = ctx->use_edges[ctx->use_lists[def].refs + n] */
ir_ref use_edges_count; /* number of elements in use_edges[] array */
uint32_t cfg_blocks_count; /* number of elements in cfg_blocks[] array */
uint32_t cfg_edges_count; /* number of elements in cfg_edges[] array */
ir_block *cfg_blocks; /* list of basic blocks (starts from 1) */
uint32_t *cfg_edges; /* the actual basic blocks predecessors and successors edges */
uint32_t *cfg_map; /* map of instructions to basic block number */
uint32_t *rules; /* array of target specific code-generation rules (for each instruction) */
uint32_t *vregs;
ir_ref vregs_count;
int32_t spill_base; /* base register for special spill area (e.g. PHP VM frame pointer) */
uint64_t fixed_regset; /* fixed registers, excluded for regular register allocation */
int32_t fixed_stack_red_zone; /* reusable stack allocated by caller (default 0) */
int32_t fixed_stack_frame_size; /* fixed stack allocated by generated code for spills and registers save/restore */
int32_t fixed_call_stack_size; /* fixed preallocated stack for parameter passing (default 0) */
uint64_t fixed_save_regset; /* registers that always saved/restored in prologue/epilogue */
ir_live_interval **live_intervals;
ir_arena *arena;
ir_live_range *unused_ranges;
ir_regs *regs;
ir_ref *prev_ref;
union {
void *data;
ir_ref control; /* used by IR construction API (see ir_builder.h) */
ir_ref bb_start; /* used by target CPU instruction matcher */
ir_ref vars; /* list of VARs (used by register allocator) */
};
ir_snapshot_create_t snapshot_create;
int32_t stack_frame_alignment;
int32_t stack_frame_size; /* spill stack frame size (used by register allocator and code generator) */
int32_t call_stack_size; /* stack for parameter passing (used by register allocator and code generator) */
uint64_t used_preserved_regs;
#ifdef IR_TARGET_X86
int32_t param_stack_size;
int32_t ret_slot;
#endif
uint32_t rodata_offset;
uint32_t jmp_table_offset;
uint32_t entries_count;
uint32_t *entries; /* array of ENTRY blocks */
void *osr_entry_loads;
void *code_buffer;
size_t code_buffer_size;
#if defined(IR_TARGET_AARCH64)
int32_t deoptimization_exits;
int32_t veneers_size;
uint32_t code_size;
ir_get_exit_addr_t get_exit_addr;
ir_get_veneer_t get_veneer;
ir_set_veneer_t set_veneer;
#endif
ir_loader *loader;
ir_strtab strtab;
ir_ref prev_insn_chain[IR_LAST_FOLDABLE_OP + 1];
ir_ref prev_const_chain[IR_LAST_TYPE];
};
/* Basic IR Construction API (implementation in ir.c) */
void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limit);
void ir_free(ir_ctx *ctx);
void ir_truncate(ir_ctx *ctx);
ir_ref ir_const(ir_ctx *ctx, ir_val val, uint8_t type);
ir_ref ir_const_i8(ir_ctx *ctx, int8_t c);
ir_ref ir_const_i16(ir_ctx *ctx, int16_t c);
ir_ref ir_const_i32(ir_ctx *ctx, int32_t c);
ir_ref ir_const_i64(ir_ctx *ctx, int64_t c);
ir_ref ir_const_u8(ir_ctx *ctx, uint8_t c);
ir_ref ir_const_u16(ir_ctx *ctx, uint16_t c);
ir_ref ir_const_u32(ir_ctx *ctx, uint32_t c);
ir_ref ir_const_u64(ir_ctx *ctx, uint64_t c);
ir_ref ir_const_bool(ir_ctx *ctx, bool c);
ir_ref ir_const_char(ir_ctx *ctx, char c);
ir_ref ir_const_float(ir_ctx *ctx, float c);
ir_ref ir_const_double(ir_ctx *ctx, double c);
ir_ref ir_const_addr(ir_ctx *ctx, uintptr_t c);
ir_ref ir_const_func_addr(ir_ctx *ctx, uintptr_t c, uint16_t flags);
ir_ref ir_const_func(ir_ctx *ctx, ir_ref str, uint16_t flags);
ir_ref ir_const_sym(ir_ctx *ctx, ir_ref str);
ir_ref ir_const_str(ir_ctx *ctx, ir_ref str);
ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t c);
void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted);
ir_ref ir_str(ir_ctx *ctx, const char *s);
ir_ref ir_strl(ir_ctx *ctx, const char *s, size_t len);
const char *ir_get_str(const ir_ctx *ctx, ir_ref idx);
ir_ref ir_emit(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
ir_ref ir_emit0(ir_ctx *ctx, uint32_t opt);
ir_ref ir_emit1(ir_ctx *ctx, uint32_t opt, ir_ref op1);
ir_ref ir_emit2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2);
ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count);
void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val);
IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val)
{
ctx->ir_base[ref].op1 = val;
}
IR_ALWAYS_INLINE void ir_set_op2(ir_ctx *ctx, ir_ref ref, ir_ref val)
{
ctx->ir_base[ref].op2 = val;
}
IR_ALWAYS_INLINE void ir_set_op3(ir_ctx *ctx, ir_ref ref, ir_ref val)
{
ctx->ir_base[ref].op3 = val;
}
IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n)
{
const ir_ref *p = insn->ops + n;
return *p;
}
IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val)
{
ir_ref *p = insn->ops + n;
*p = val;
}
ir_ref ir_fold(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
ir_ref ir_fold0(ir_ctx *ctx, uint32_t opt);
ir_ref ir_fold1(ir_ctx *ctx, uint32_t opt, ir_ref op1);
ir_ref ir_fold2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2);
ir_ref ir_fold3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
ir_ref ir_param(ir_ctx *ctx, ir_type type, ir_ref region, const char *name, int pos);
ir_ref ir_var(ir_ctx *ctx, ir_type type, ir_ref region, const char *name);
ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def);
/* Def -> Use lists */
void ir_build_def_use_lists(ir_ctx *ctx);
/* CFG - Control Flow Graph (implementation in ir_cfg.c) */
int ir_build_cfg(ir_ctx *ctx);
int ir_remove_unreachable_blocks(ir_ctx *ctx);
int ir_build_dominators_tree(ir_ctx *ctx);
int ir_find_loops(ir_ctx *ctx);
int ir_schedule_blocks(ir_ctx *ctx);
void ir_build_prev_refs(ir_ctx *ctx);
/* SCCP - Sparse Conditional Constant Propagation (implementation in ir_sccp.c) */
int ir_sccp(ir_ctx *ctx);
/* GCM - Global Code Motion and scheduling (implementation in ir_gcm.c) */
int ir_gcm(ir_ctx *ctx);
int ir_schedule(ir_ctx *ctx);
/* Liveness & Register Allocation (implementation in ir_ra.c) */
#define IR_REG_NONE -1
#define IR_REG_SPILL_LOAD (1<<6)
#define IR_REG_SPILL_STORE (1<<6)
#define IR_REG_SPILL_SPECIAL (1<<7)
#define IR_REG_SPILLED(r) \
((r) & (IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL))
#define IR_REG_NUM(r) \
((int8_t)((r) == IR_REG_NONE ? IR_REG_NONE : ((r) & ~(IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL))))
int ir_assign_virtual_registers(ir_ctx *ctx);
int ir_compute_live_ranges(ir_ctx *ctx);
int ir_coalesce(ir_ctx *ctx);
int ir_compute_dessa_moves(ir_ctx *ctx);
int ir_reg_alloc(ir_ctx *ctx);
int ir_regs_number(void);
bool ir_reg_is_int(int32_t reg);
const char *ir_reg_name(int8_t reg, ir_type type);
int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref);
/* Target CPU instruction selection and code generation (see ir_x86.c) */
int ir_match(ir_ctx *ctx);
void *ir_emit_code(ir_ctx *ctx, size_t *size);
/* Target address resolution (implementation in ir_emit.c) */
void *ir_resolve_sym_name(const char *name);
/* Target CPU disassembler (implementation in ir_disasm.c) */
int ir_disasm_init(void);
void ir_disasm_free(void);
void ir_disasm_add_symbol(const char *name, uint64_t addr, uint64_t size);
const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset);
int ir_disasm(const char *name,
const void *start,
size_t size,
bool asm_addr,
ir_ctx *ctx,
FILE *f);
/* Linux perf interface (implementation in ir_perf.c) */
int ir_perf_jitdump_open(void);
int ir_perf_jitdump_close(void);
int ir_perf_jitdump_register(const char *name, const void *start, size_t size);
void ir_perf_map_register(const char *name, const void *start, size_t size);
/* GDB JIT interface (implementation in ir_gdb.c) */
int ir_gdb_register(const char *name,
const void *start,
size_t size,
uint32_t sp_offset,
uint32_t sp_adjustment);
void ir_gdb_unregister_all(void);
bool ir_gdb_present(void);
/* IR load API (implementation in ir_load.c) */
struct _ir_loader {
uint32_t default_func_flags;
bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target);
bool (*external_sym_dcl) (ir_loader *loader, const char *name, uint32_t flags);
bool (*external_func_dcl) (ir_loader *loader, const char *name,
uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types);
bool (*forward_func_dcl) (ir_loader *loader, const char *name,
uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types);
bool (*sym_dcl) (ir_loader *loader, const char *name, uint32_t flags, size_t size, bool has_data);
bool (*sym_data) (ir_loader *loader, ir_type type, uint32_t count, const void *data);
bool (*sym_data_end) (ir_loader *loader);
bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name);
bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name);
void*(*resolve_sym_name) (ir_loader *loader, const char *name);
};
void ir_loader_init(void);
void ir_loader_free(void);
int ir_load(ir_loader *loader, FILE *f);
/* IR LLVM load API (implementation in ir_load_llvm.c) */
int ir_load_llvm_bitcode(ir_loader *loader, const char *filename);
int ir_load_llvm_asm(ir_loader *loader, const char *filename);
/* IR save API (implementation in ir_save.c) */
void ir_save(const ir_ctx *ctx, FILE *f);
/* IR debug dump API (implementation in ir_dump.c) */
void ir_dump(const ir_ctx *ctx, FILE *f);
void ir_dump_dot(const ir_ctx *ctx, FILE *f);
void ir_dump_use_lists(const ir_ctx *ctx, FILE *f);
void ir_dump_cfg(ir_ctx *ctx, FILE *f);
void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f);
void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f);
void ir_dump_codegen(const ir_ctx *ctx, FILE *f);
/* IR to C conversion (implementation in ir_emit_c.c) */
int ir_emit_c(ir_ctx *ctx, const char *name, FILE *f);
void ir_emit_c_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types, FILE *f);
/* IR to LLVM conversion (implementation in ir_emit_llvm.c) */
int ir_emit_llvm(ir_ctx *ctx, const char *name, FILE *f);
void ir_emit_llvm_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types, FILE *f);
/* IR verification API (implementation in ir_check.c) */
bool ir_check(const ir_ctx *ctx);
void ir_consistency_check(void);
/* Code patching (implementation in ir_patch.c) */
int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void *from_addr, const void *to_addr);
/* CPU information (implementation in ir_cpuinfo.c) */
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
# define IR_X86_SSE2 (1<<0)
# define IR_X86_SSE3 (1<<1)
# define IR_X86_SSSE3 (1<<2)
# define IR_X86_SSE41 (1<<3)
# define IR_X86_SSE42 (1<<4)
# define IR_X86_AVX (1<<5)
# define IR_X86_AVX2 (1<<6)
#endif
uint32_t ir_cpuinfo(void);
/* Deoptimization helpers */
const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, void *code_buffer, size_t code_buffer_size, size_t *size_ptr);
/* A reference IR JIT compiler */
IR_ALWAYS_INLINE void *ir_jit_compile(ir_ctx *ctx, int opt_level, size_t *size)
{
if (opt_level == 0) {
if (ctx->flags & IR_OPT_FOLDING) {
// IR_ASSERT(0 && "IR_OPT_FOLDING is incompatible with -O0");
return NULL;
}
ctx->flags &= ~(IR_OPT_CFG | IR_OPT_CODEGEN);
ir_build_def_use_lists(ctx);
if (!ir_build_cfg(ctx)
|| !ir_match(ctx)
|| !ir_assign_virtual_registers(ctx)
|| !ir_compute_dessa_moves(ctx)) {
return NULL;
}
return ir_emit_code(ctx, size);
} else if (opt_level == 1 || opt_level == 2) {
if (!(ctx->flags & IR_OPT_FOLDING)) {
// IR_ASSERT(0 && "IR_OPT_FOLDING must be set in ir_init() for -O1 and -O2");
return NULL;
}
ctx->flags |= IR_OPT_CFG | IR_OPT_CODEGEN;
ir_build_def_use_lists(ctx);
if (opt_level == 2
&& !ir_sccp(ctx)) {
return NULL;
}
if (!ir_build_cfg(ctx)
|| !ir_build_dominators_tree(ctx)
|| !ir_find_loops(ctx)
|| !ir_gcm(ctx)
|| !ir_schedule(ctx)
|| !ir_match(ctx)
|| !ir_assign_virtual_registers(ctx)
|| !ir_compute_live_ranges(ctx)
|| !ir_coalesce(ctx)
|| !ir_reg_alloc(ctx)
|| !ir_schedule_blocks(ctx)) {
return NULL;
}
return ir_emit_code(ctx, size);
} else {
// IR_ASSERT(0 && "wrong optimization level");
return NULL;
}
}
#define IR_ERROR_CODE_MEM_OVERFLOW 1
#define IR_ERROR_FIXED_STACK_FRAME_OVERFLOW 2
#define IR_ERROR_UNSUPPORTED_CODE_RULE 3
#define IR_ERROR_LINK 4
#define IR_ERROR_ENCODE 5
/* IR Memmory Allocation */
#ifndef ir_mem_malloc
# define ir_mem_malloc malloc
#endif
#ifndef ir_mem_calloc
# define ir_mem_calloc calloc
#endif
#ifndef ir_mem_realloc
# define ir_mem_realloc realloc
#endif
#ifndef ir_mem_free
# define ir_mem_free free
#endif
#ifndef ir_mem_pmalloc
# define ir_mem_pmalloc malloc
#endif
#ifndef ir_mem_pcalloc
# define ir_mem_pcalloc calloc
#endif
#ifndef ir_mem_prealloc
# define ir_mem_prealloc realloc
#endif
#ifndef ir_mem_pfree
# define ir_mem_pfree free
#endif
void *ir_mem_mmap(size_t size);
int ir_mem_unmap(void *ptr, size_t size);
int ir_mem_protect(void *ptr, size_t size);
int ir_mem_unprotect(void *ptr, size_t size);
int ir_mem_flush(void *ptr, size_t size);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* IR_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,173 @@
/*
* IR - Lightweight JIT Compilation Framework
* (Aarch64 CPU specific definitions)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#ifndef IR_AARCH64_H
#define IR_AARCH64_H
#define IR_GP_REGS(_) \
_(X0, x0, w0) \
_(X1, x1, w1) \
_(X2, x2, w2) \
_(X3, x3, w3) \
_(X4, x4, w4) \
_(X5, x5, w5) \
_(X6, x6, w6) \
_(X7, x7, w7) \
_(X8, x8, w8) \
_(X9, x9, w9) \
_(X10, x10, w10) \
_(X11, x11, w11) \
_(X12, x12, w12) \
_(X13, x13, w13) \
_(X14, x14, w14) \
_(X15, x15, w15) \
_(X16, x16, w16) \
_(X17, x17, w17) \
_(X18, x18, w18) \
_(X19, x19, w18) \
_(X20, x20, w20) \
_(X21, x21, w21) \
_(X22, x22, w22) \
_(X23, x23, w23) \
_(X24, x24, w24) \
_(X25, x25, w25) \
_(X26, x26, w26) \
_(X27, x27, w27) \
_(X28, x28, w28) \
_(X29, x29, w29) \
_(X30, x30, w30) \
_(X31, x31, w31) \
# define IR_FP_REGS(_) \
_(V0, d0, s0, h0, b0) \
_(V1, d1, s1, h1, b1) \
_(V2, d2, s2, h2, b2) \
_(V3, d3, s3, h3, b3) \
_(V4, d4, s4, h4, b4) \
_(V5, d5, s5, h5, b5) \
_(V6, d6, s6, h6, b6) \
_(V7, d7, s7, h7, b7) \
_(V8, d8, s8, h8, b8) \
_(V9, d9, s9, h9, b9) \
_(V10, d10, s10, h10, b10) \
_(V11, d11, s11, h11, b11) \
_(V12, d12, s12, h12, b12) \
_(V13, d13, s13, h13, b13) \
_(V14, d14, s14, h14, b14) \
_(V15, d15, s15, h15, b15) \
_(V16, d16, s16, h16, b16) \
_(V17, d17, s17, h17, b17) \
_(V18, d18, s18, h18, b18) \
_(V19, d19, s19, h19, b18) \
_(V20, d20, s20, h20, b20) \
_(V21, d21, s21, h21, b21) \
_(V22, d22, s22, h22, b22) \
_(V23, d23, s23, h23, b23) \
_(V24, d24, s24, h24, b24) \
_(V25, d25, s25, h25, b25) \
_(V26, d26, s26, h26, b26) \
_(V27, d27, s27, h27, b27) \
_(V28, d28, s28, h28, b28) \
_(V29, d29, s29, h29, b29) \
_(V30, d30, s30, h30, b30) \
_(V31, d31, s31, h31, b31) \
#define IR_GP_REG_ENUM(code, name64, name32) \
IR_REG_ ## code,
#define IR_FP_REG_ENUM(code, name64, name32, name16, name8) \
IR_REG_ ## code,
enum _ir_reg {
_IR_REG_NONE = -1,
IR_GP_REGS(IR_GP_REG_ENUM)
IR_FP_REGS(IR_FP_REG_ENUM)
IR_REG_NUM,
};
#define IR_REG_GP_FIRST IR_REG_X0
#define IR_REG_FP_FIRST IR_REG_V0
#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1)
#define IR_REG_FP_LAST (IR_REG_NUM - 1)
#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */
#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */
#define IR_REGSET_64BIT 1
#define IR_REG_INT_TMP IR_REG_X17 /* reserved temporary register used by code-generator */
#define IR_REG_STACK_POINTER \
IR_REG_X31
#define IR_REG_FRAME_POINTER \
IR_REG_X29
#define IR_REGSET_FIXED \
( IR_REGSET(IR_REG_INT_TMP) \
| IR_REGSET(IR_REG_X18) /* platform specific register */ \
| IR_REGSET_INTERVAL(IR_REG_X29, IR_REG_X31))
#define IR_REGSET_GP \
IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_GP_FIRST, IR_REG_GP_LAST), IR_REGSET_FIXED)
#define IR_REGSET_FP \
IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_FP_FIRST, IR_REG_FP_LAST), IR_REGSET_FIXED)
#define IR_REG_IP0 IR_REG_X16
#define IR_REG_IP1 IR_REG_X17
#define IR_REG_PR IR_REG_X18
#define IR_REG_LR IR_REG_X30
#define IR_REG_ZR IR_REG_X31
/* Calling Convention */
#define IR_REG_INT_RET1 IR_REG_X0
#define IR_REG_FP_RET1 IR_REG_V0
#define IR_REG_INT_ARGS 8
#define IR_REG_FP_ARGS 8
#define IR_REG_INT_ARG1 IR_REG_X0
#define IR_REG_INT_ARG2 IR_REG_X1
#define IR_REG_INT_ARG3 IR_REG_X2
#define IR_REG_INT_ARG4 IR_REG_X3
#define IR_REG_INT_ARG5 IR_REG_X4
#define IR_REG_INT_ARG6 IR_REG_X5
#define IR_REG_INT_ARG7 IR_REG_X6
#define IR_REG_INT_ARG8 IR_REG_X7
#define IR_REG_FP_ARG1 IR_REG_V0
#define IR_REG_FP_ARG2 IR_REG_V1
#define IR_REG_FP_ARG3 IR_REG_V2
#define IR_REG_FP_ARG4 IR_REG_V3
#define IR_REG_FP_ARG5 IR_REG_V4
#define IR_REG_FP_ARG6 IR_REG_V5
#define IR_REG_FP_ARG7 IR_REG_V6
#define IR_REG_FP_ARG8 IR_REG_V7
#define IR_MAX_REG_ARGS 16
#define IR_SHADOW_ARGS 0
# define IR_REGSET_SCRATCH \
(IR_REGSET_INTERVAL(IR_REG_X0, IR_REG_X18) \
| IR_REGSET_INTERVAL(IR_REG_V0, IR_REG_V7) \
| IR_REGSET_INTERVAL(IR_REG_V16, IR_REG_V31))
# define IR_REGSET_PRESERVED \
(IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) \
| IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15))
typedef struct _ir_tmp_reg {
union {
uint8_t num;
int8_t reg;
};
uint8_t type;
uint8_t start;
uint8_t end;
} ir_tmp_reg;
struct _ir_target_constraints {
int8_t def_reg;
uint8_t tmps_count;
uint8_t hints_count;
ir_tmp_reg tmp_regs[3];
int8_t hints[IR_MAX_REG_ARGS + 3];
};
#endif /* IR_AARCH64_H */

View File

@@ -0,0 +1,639 @@
/*
* IR - Lightweight JIT Compilation Framework
* (IR Construction API)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#ifndef IR_BUILDER_H
#define IR_BUILDER_H
#ifdef __cplusplus
extern "C" {
#endif
/* _ir_CTX may be redefined by the user */
#define _ir_CTX ctx
#define ir_NOP() ir_emit0(_ir_CTX, IR_NOP)
#define ir_CONST_BOOL(_val) ir_const_bool(_ir_CTX, (_val))
#define ir_CONST_U8(_val) ir_const_u8(_ir_CTX, (_val))
#define ir_CONST_U16(_val) ir_const_u16(_ir_CTX, (_val))
#define ir_CONST_U32(_val) ir_const_u32(_ir_CTX, (_val))
#define ir_CONST_U64(_val) ir_const_u64(_ir_CTX, (_val))
#define ir_CONST_ADDR(_val) ir_const_addr(_ir_CTX, (uintptr_t)(_val))
#define ir_CONST_CHAR(_val) ir_const_char(_ir_CTX, (_val))
#define ir_CONST_I8(_val) ir_const_i8(_ir_CTX, (_val))
#define ir_CONST_I16(_val) ir_const_i16(_ir_CTX, (_val))
#define ir_CONST_I32(_val) ir_const_i32(_ir_CTX, (_val))
#define ir_CONST_I64(_val) ir_const_i64(_ir_CTX, (_val))
#define ir_CONST_DOUBLE(_val) ir_const_double(_ir_CTX, (_val))
#define ir_CONST_FLOAT(_val) ir_const_float(_ir_CTX, (_val))
#define ir_CMP_OP(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_BOOL), (_op1), (_op2))
#define ir_UNARY_OP(_op, _type, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), (_type)), (_op1))
#define ir_UNARY_OP_B(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_BOOL), (_op1))
#define ir_UNARY_OP_U8(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U8), (_op1))
#define ir_UNARY_OP_U16(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U16), (_op1))
#define ir_UNARY_OP_U32(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U32), (_op1))
#define ir_UNARY_OP_U64(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U64), (_op1))
#define ir_UNARY_OP_A(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_ADDR), (_op1))
#define ir_UNARY_OP_C(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_CHAR), (_op1))
#define ir_UNARY_OP_I8(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I8), (_op1))
#define ir_UNARY_OP_I16(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I16), (_op1))
#define ir_UNARY_OP_I32(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I32), (_op1))
#define ir_UNARY_OP_I64(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I64), (_op1))
#define ir_UNARY_OP_D(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_DOUBLE), (_op1))
#define ir_UNARY_OP_F(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_FLOAT), (_op1))
#define ir_BINARY_OP(_op, _t, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), (_t)), (_op1), (_op2))
#define ir_BINARY_OP_B(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_BOOL), (_op1), (_op2))
#define ir_BINARY_OP_U8(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U8), (_op1), (_op2))
#define ir_BINARY_OP_U16(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U16), (_op1), (_op2))
#define ir_BINARY_OP_U32(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U32), (_op1), (_op2))
#define ir_BINARY_OP_U64(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U64), (_op1), (_op2))
#define ir_BINARY_OP_A(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_ADDR), (_op1), (_op2))
#define ir_BINARY_OP_C(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_CHAR), (_op1), (_op2))
#define ir_BINARY_OP_I8(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I8), (_op1), (_op2))
#define ir_BINARY_OP_I16(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I16), (_op1), (_op2))
#define ir_BINARY_OP_I32(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I32), (_op1), (_op2))
#define ir_BINARY_OP_I64(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I64), (_op1), (_op2))
#define ir_BINARY_OP_D(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_DOUBLE), (_op1), (_op2))
#define ir_BINARY_OP_F(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_FLOAT), (_op1), (_op2))
#define ir_EQ(_op1, _op2) ir_CMP_OP(IR_EQ, (_op1), (_op2))
#define ir_NE(_op1, _op2) ir_CMP_OP(IR_NE, (_op1), (_op2))
#define ir_LT(_op1, _op2) ir_CMP_OP(IR_LT, (_op1), (_op2))
#define ir_GE(_op1, _op2) ir_CMP_OP(IR_GE, (_op1), (_op2))
#define ir_LE(_op1, _op2) ir_CMP_OP(IR_LE, (_op1), (_op2))
#define ir_GT(_op1, _op2) ir_CMP_OP(IR_GT, (_op1), (_op2))
#define ir_ULT(_op1, _op2) ir_CMP_OP(IR_ULT, (_op1), (_op2))
#define ir_UGE(_op1, _op2) ir_CMP_OP(IR_UGE, (_op1), (_op2))
#define ir_ULE(_op1, _op2) ir_CMP_OP(IR_ULE, (_op1), (_op2))
#define ir_UGT(_op1, _op2) ir_CMP_OP(IR_UGT, (_op1), (_op2))
#define ir_ADD(_type, _op1, _op2) ir_BINARY_OP(IR_ADD, (_type), (_op1), (_op2))
#define ir_ADD_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ADD, (_op1), (_op2))
#define ir_ADD_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ADD, (_op1), (_op2))
#define ir_ADD_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ADD, (_op1), (_op2))
#define ir_ADD_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ADD, (_op1), (_op2))
#define ir_ADD_A(_op1, _op2) ir_BINARY_OP_A(IR_ADD, (_op1), (_op2))
#define ir_ADD_C(_op1, _op2) ir_BINARY_OP_C(IR_ADD, (_op1), (_op2))
#define ir_ADD_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ADD, (_op1), (_op2))
#define ir_ADD_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ADD, (_op1), (_op2))
#define ir_ADD_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ADD, (_op1), (_op2))
#define ir_ADD_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ADD, (_op1), (_op2))
#define ir_ADD_D(_op1, _op2) ir_BINARY_OP_D(IR_ADD, (_op1), (_op2))
#define ir_ADD_F(_op1, _op2) ir_BINARY_OP_F(IR_ADD, (_op1), (_op2))
#define ir_SUB(_type, _op1, _op2) ir_BINARY_OP(IR_SUB, (_type), (_op1), (_op2))
#define ir_SUB_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SUB, (_op1), (_op2))
#define ir_SUB_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SUB, (_op1), (_op2))
#define ir_SUB_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SUB, (_op1), (_op2))
#define ir_SUB_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SUB, (_op1), (_op2))
#define ir_SUB_A(_op1, _op2) ir_BINARY_OP_A(IR_SUB, (_op1), (_op2))
#define ir_SUB_C(_op1, _op2) ir_BINARY_OP_C(IR_SUB, (_op1), (_op2))
#define ir_SUB_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SUB, (_op1), (_op2))
#define ir_SUB_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SUB, (_op1), (_op2))
#define ir_SUB_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SUB, (_op1), (_op2))
#define ir_SUB_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SUB, (_op1), (_op2))
#define ir_SUB_D(_op1, _op2) ir_BINARY_OP_D(IR_SUB, (_op1), (_op2))
#define ir_SUB_F(_op1, _op2) ir_BINARY_OP_F(IR_SUB, (_op1), (_op2))
#define ir_MUL(_type, _op1, _op2) ir_BINARY_OP(IR_MUL, (_type), (_op1), (_op2))
#define ir_MUL_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MUL, (_op1), (_op2))
#define ir_MUL_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MUL, (_op1), (_op2))
#define ir_MUL_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MUL, (_op1), (_op2))
#define ir_MUL_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MUL, (_op1), (_op2))
#define ir_MUL_A(_op1, _op2) ir_BINARY_OP_A(IR_MUL, (_op1), (_op2))
#define ir_MUL_C(_op1, _op2) ir_BINARY_OP_C(IR_MUL, (_op1), (_op2))
#define ir_NUL_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MUL, (_op1), (_op2))
#define ir_MUL_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MUL, (_op1), (_op2))
#define ir_MUL_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MUL, (_op1), (_op2))
#define ir_MUL_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MUL, (_op1), (_op2))
#define ir_MUL_D(_op1, _op2) ir_BINARY_OP_D(IR_MUL, (_op1), (_op2))
#define ir_MUL_F(_op1, _op2) ir_BINARY_OP_F(IR_MUL, (_op1), (_op2))
#define ir_DIV(_type, _op1, _op2) ir_BINARY_OP(IR_DIV, (_type), (_op1), (_op2))
#define ir_DIV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_DIV, (_op1), (_op2))
#define ir_DIV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_DIV, (_op1), (_op2))
#define ir_DIV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_DIV, (_op1), (_op2))
#define ir_DIV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_DIV, (_op1), (_op2))
#define ir_DIV_A(_op1, _op2) ir_BINARY_OP_A(IR_DIV, (_op1), (_op2))
#define ir_DIV_C(_op1, _op2) ir_BINARY_OP_C(IR_DIV, (_op1), (_op2))
#define ir_DIV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_DIV, (_op1), (_op2))
#define ir_DIV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_DIV, (_op1), (_op2))
#define ir_DIV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_DIV, (_op1), (_op2))
#define ir_DIV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_DIV, (_op1), (_op2))
#define ir_DIV_D(_op1, _op2) ir_BINARY_OP_D(IR_DIV, (_op1), (_op2))
#define ir_DIV_F(_op1, _op2) ir_BINARY_OP_F(IR_DIV, (_op1), (_op2))
#define ir_MOD(_type, _op1, _op2) ir_BINARY_OP(IR_MOD, (_type), (_op1), (_op2))
#define ir_MOD_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MOD, (_op1), (_op2))
#define ir_MOD_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MOD, (_op1), (_op2))
#define ir_MOD_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MOD, (_op1), (_op2))
#define ir_MOD_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MOD, (_op1), (_op2))
#define ir_MOD_A(_op1, _op2) ir_BINARY_OP_A(IR_MOD, (_op1), (_op2))
#define ir_MOD_C(_op1, _op2) ir_BINARY_OP_C(IR_MOD, (_op1), (_op2))
#define ir_MOD_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MOD, (_op1), (_op2))
#define ir_MOD_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MOD, (_op1), (_op2))
#define ir_MOD_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MOD, (_op1), (_op2))
#define ir_MOD_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MOD, (_op1), (_op2))
#define ir_NEG(_type, _op1) ir_UNARY_OP(IR_NEG, (_type), (_op1))
#define ir_NEG_C(_op1) ir_UNARY_OP_C(IR_NEG, (_op1))
#define ir_NEG_I8(_op1) ir_UNARY_OP_I8(IR_NEG, (_op1))
#define ir_NEG_I16(_op1) ir_UNARY_OP_I16(IR_NEG, (_op1))
#define ir_NEG_I32(_op1) ir_UNARY_OP_I32(IR_NEG, (_op1))
#define ir_NEG_I64(_op1) ir_UNARY_OP_I64(IR_NEG, (_op1))
#define ir_NEG_D(_op1) ir_UNARY_OP_D(IR_NEG, (_op1))
#define ir_NEG_F(_op1) ir_UNARY_OP_F(IR_NEG, (_op1))
#define ir_ABS(_type, _op1) ir_UNARY_OP(IR_ABS, (_type), (_op1))
#define ir_ABS_C(_op1) ir_UNARY_OP_C(IR_ABS, (_op1))
#define ir_ABS_I8(_op1) ir_UNARY_OP_I8(IR_ABS, (_op1))
#define ir_ABS_I16(_op1) ir_UNARY_OP_I16(IR_ABS, (_op1))
#define ir_ABS_I32(_op1) ir_UNARY_OP_I32(IR_ABS, (_op1))
#define ir_ABS_I64(_op1) ir_UNARY_OP_I64(IR_ABS, (_op1))
#define ir_ABS_D(_op1) ir_UNARY_OP_D(IR_ABS, (_op1))
#define ir_ABS_F(_op1) ir_UNARY_OP_F(IR_ABS, (_op1))
#define ir_SEXT(_type, _op1) ir_UNARY_OP(IR_SEXT, (_type), (_op1))
#define ir_SEXT_U8(_op1) ir_UNARY_OP_U8(IR_SEXT, (_op1))
#define ir_SEXT_U16(_op1) ir_UNARY_OP_U16(IR_SEXT, (_op1))
#define ir_SEXT_U32(_op1) ir_UNARY_OP_U32(IR_SEXT, (_op1))
#define ir_SEXT_U64(_op1) ir_UNARY_OP_U64(IR_SEXT, (_op1))
#define ir_SEXT_A(_op1) ir_UNARY_OP_A(IR_SEXT, (_op1))
#define ir_SEXT_C(_op1) ir_UNARY_OP_C(IR_SEXT, (_op1))
#define ir_SEXT_I8(_op1) ir_UNARY_OP_I8(IR_SEXT, (_op1))
#define ir_SEXT_I16(_op1) ir_UNARY_OP_I16(IR_SEXT, (_op1))
#define ir_SEXT_I32(_op1) ir_UNARY_OP_I32(IR_SEXT, (_op1))
#define ir_SEXT_I64(_op1) ir_UNARY_OP_I64(IR_SEXT, (_op1))
#define ir_ZEXT(_type, _op1) ir_UNARY_OP(IR_ZEXT, (_type), (_op1))
#define ir_ZEXT_U8(_op1) ir_UNARY_OP_U8(IR_ZEXT, (_op1))
#define ir_ZEXT_U16(_op1) ir_UNARY_OP_U16(IR_ZEXT, (_op1))
#define ir_ZEXT_U32(_op1) ir_UNARY_OP_U32(IR_ZEXT, (_op1))
#define ir_ZEXT_U64(_op1) ir_UNARY_OP_U64(IR_ZEXT, (_op1))
#define ir_ZEXT_A(_op1) ir_UNARY_OP_A(IR_ZEXT, (_op1))
#define ir_ZEXT_C(_op1) ir_UNARY_OP_C(IR_ZEXT, (_op1))
#define ir_ZEXT_I8(_op1) ir_UNARY_OP_I8(IR_ZEXT, (_op1))
#define ir_ZEXT_I16(_op1) ir_UNARY_OP_I16(IR_ZEXT, (_op1))
#define ir_ZEXT_I32(_op1) ir_UNARY_OP_I32(IR_ZEXT, (_op1))
#define ir_ZEXT_I64(_op1) ir_UNARY_OP_I64(IR_ZEXT, (_op1))
#define ir_TRUNC(_type, _op1) ir_UNARY_OP(IR_TRUNC, (_type), (_op1))
#define ir_TRUNC_U8(_op1) ir_UNARY_OP_U8(IR_TRUNC, (_op1))
#define ir_TRUNC_U16(_op1) ir_UNARY_OP_U16(IR_TRUNC, (_op1))
#define ir_TRUNC_U32(_op1) ir_UNARY_OP_U32(IR_TRUNC, (_op1))
#define ir_TRUNC_U64(_op1) ir_UNARY_OP_U64(IR_TRUNC, (_op1))
#define ir_TRUNC_A(_op1) ir_UNARY_OP_A(IR_TRUNC, (_op1))
#define ir_TRUNC_C(_op1) ir_UNARY_OP_C(IR_TRUNC, (_op1))
#define ir_TRUNC_I8(_op1) ir_UNARY_OP_I8(IR_TRUNC, (_op1))
#define ir_TRUNC_I16(_op1) ir_UNARY_OP_I16(IR_TRUNC, (_op1))
#define ir_TRUNC_I32(_op1) ir_UNARY_OP_I32(IR_TRUNC, (_op1))
#define ir_TRUNC_I64(_op1) ir_UNARY_OP_I64(IR_TRUNC, (_op1))
#define ir_BITCAST(_type, _op1) ir_UNARY_OP(IR_BITCAST, (_type), (_op1))
#define ir_BITCAST_U8(_op1) ir_UNARY_OP_U8(IR_BITCAST, (_op1))
#define ir_BITCAST_U16(_op1) ir_UNARY_OP_U16(IR_BITCAST, (_op1))
#define ir_BITCAST_U32(_op1) ir_UNARY_OP_U32(IR_BITCAST, (_op1))
#define ir_BITCAST_U64(_op1) ir_UNARY_OP_U64(IR_BITCAST, (_op1))
#define ir_BITCAST_A(_op1) ir_UNARY_OP_A(IR_BITCAST, (_op1))
#define ir_BITCAST_C(_op1) ir_UNARY_OP_C(IR_BITCAST, (_op1))
#define ir_BITCAST_I8(_op1) ir_UNARY_OP_I8(IR_BITCAST, (_op1))
#define ir_BITCAST_I16(_op1) ir_UNARY_OP_I16(IR_BITCAST, (_op1))
#define ir_BITCAST_I32(_op1) ir_UNARY_OP_I32(IR_BITCAST, (_op1))
#define ir_BITCAST_I64(_op1) ir_UNARY_OP_I64(IR_BITCAST, (_op1))
#define ir_BITCAST_D(_op1) ir_UNARY_OP_D(IR_BITCAST, (_op1))
#define ir_BITCAST_F(_op1) ir_UNARY_OP_F(IR_BITCAST, (_op1))
#define ir_INT2FP(_type, _op1) ir_UNARY_OP(IR_INT2FP, (_type), (_op1))
#define ir_INT2D(_op1) ir_UNARY_OP_D(IR_INT2FP, (_op1))
#define ir_INT2F(_op1) ir_UNARY_OP_F(IR_INT2FP, (_op1))
#define ir_FP2INT(_type, _op1) ir_UNARY_OP(IR_FP2INT, (_type), (_op1))
#define ir_FP2U8(_op1) ir_UNARY_OP_U8(IR_FP2INT, (_op1))
#define ir_FP2U16(_op1) ir_UNARY_OP_U16(IR_FP2INT, (_op1))
#define ir_FP2U32(_op1) ir_UNARY_OP_U32(IR_FP2INT, (_op1))
#define ir_FP2U64(_op1) ir_UNARY_OP_U64(IR_FP2INT, (_op1))
#define ir_FP2I8(_op1) ir_UNARY_OP_I8(IR_FP2INT, (_op1))
#define ir_FP2I16(_op1) ir_UNARY_OP_I16(IR_FP2INT, (_op1))
#define ir_FP2I32(_op1) ir_UNARY_OP_I32(IR_FP2INT, (_op1))
#define ir_FP2I64(_op1) ir_UNARY_OP_I64(IR_FP2INT, (_op1))
#define ir_FP2FP(_type, _op1) ir_UNARY_OP(IR_FP2FP, (_type), (_op1))
#define ir_F2D(_op1) ir_UNARY_OP_D(IR_FP2FP, (_op1))
#define ir_D2F(_op1) ir_UNARY_OP_F(IR_FP2FP, (_op1))
#define ir_ADD_OV(_type, _op1, _op2) ir_BINARY_OP(IR_ADD_OV, (_type), (_op1), (_op2))
#define ir_ADD_OV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ADD_OV, (_op1), (_op2))
#define ir_ADD_OV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ADD_OV, (_op1), (_op2))
#define ir_ADD_OV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ADD_OV, (_op1), (_op2))
#define ir_ADD_OV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ADD_OV, (_op1), (_op2))
#define ir_ADD_OV_A(_op1, _op2) ir_BINARY_OP_A(IR_ADD_OV, (_op1), (_op2))
#define ir_ADD_OV_C(_op1, _op2) ir_BINARY_OP_C(IR_ADD_OV, (_op1), (_op2))
#define ir_ADD_OV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ADD_OV, (_op1), (_op2))
#define ir_ADD_OV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ADD_OV, (_op1), (_op2))
#define ir_ADD_OV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ADD_OV, (_op1), (_op2))
#define ir_ADD_OV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ADD_OV, (_op1), (_op2))
#define ir_SUB_OV(_type, _op1, _op2) ir_BINARY_OP(IR_SUB_OV, (_type), (_op1), (_op2))
#define ir_SUB_OV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SUB_OV, (_op1), (_op2))
#define ir_SUB_OV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SUB_OV, (_op1), (_op2))
#define ir_SUB_OV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SUB_OV, (_op1), (_op2))
#define ir_SUB_OV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SUB_OV, (_op1), (_op2))
#define ir_SUB_OV_A(_op1, _op2) ir_BINARY_OP_A(IR_SUB_OV, (_op1), (_op2))
#define ir_SUB_OV_C(_op1, _op2) ir_BINARY_OP_C(IR_SUB_OV, (_op1), (_op2))
#define ir_SUB_OV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SUB_OV, (_op1), (_op2))
#define ir_SUB_OV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SUB_OV, (_op1), (_op2))
#define ir_SUB_OV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SUB_OV, (_op1), (_op2))
#define ir_SUB_OV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SUB_OV, (_op1), (_op2))
#define ir_MUL_OV(_type, _op1, _op2) ir_BINARY_OP(IR_MUL_OV, (_type), (_op1), (_op2))
#define ir_MUL_OV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MUL_OV, (_op1), (_op2))
#define ir_MUL_OV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MUL_OV, (_op1), (_op2))
#define ir_MUL_OV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MUL_OV, (_op1), (_op2))
#define ir_MUL_OV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MUL_OV, (_op1), (_op2))
#define ir_MUL_OV_A(_op1, _op2) ir_BINARY_OP_A(IR_MUL_OV, (_op1), (_op2))
#define ir_MUL_OV_C(_op1, _op2) ir_BINARY_OP_C(IR_MUL_OV, (_op1), (_op2))
#define ir_MUL_OV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MUL_OV, (_op1), (_op2))
#define ir_MUL_OV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MUL_OV, (_op1), (_op2))
#define ir_MUL_OV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MUL_OV, (_op1), (_op2))
#define ir_MUL_OV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MUL_OV, (_op1), (_op2))
#define ir_OVERFLOW(_op1) ir_fold1(_ir_CTX, IR_OPT(IR_OVERFLOW, IR_BOOL), (_op1))
#define ir_NOT(_type, _op1) ir_UNARY_OP(IR_NOT, (_type), (_op1))
#define ir_NOT_B(_op1) ir_UNARY_OP_B(IR_NOT, (_op1))
#define ir_NOT_U8(_op1) ir_UNARY_OP_U8(IR_NOT, (_op1))
#define ir_NOT_U16(_op1) ir_UNARY_OP_U16(IR_NOT, (_op1))
#define ir_NOT_U32(_op1) ir_UNARY_OP_U32(IR_NOT, (_op1))
#define ir_NOT_U64(_op1) ir_UNARY_OP_U64(IR_NOT, (_op1))
#define ir_NOT_A(_op1) ir_UNARY_OP_A(IR_NOT, (_op1))
#define ir_NOT_C(_op1) ir_UNARY_OP_C(IR_NOT, (_op1))
#define ir_NOT_I8(_op1) ir_UNARY_OP_I8(IR_NOT, (_op1))
#define ir_NOT_I16(_op1) ir_UNARY_OP_I16(IR_NOT, (_op1))
#define ir_NOT_I32(_op1) ir_UNARY_OP_I32(IR_NOT, (_op1))
#define ir_NOT_I64(_op1) ir_UNARY_OP_I64(IR_NOT, (_op1))
#define ir_OR(_type, _op1, _op2) ir_BINARY_OP(IR_OR, (_type), (_op1), (_op2))
#define ir_OR_B(_op1, _op2) ir_BINARY_OP_B(IR_OR, (_op1), (_op2))
#define ir_OR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_OR, (_op1), (_op2))
#define ir_OR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_OR, (_op1), (_op2))
#define ir_OR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_OR, (_op1), (_op2))
#define ir_OR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_OR, (_op1), (_op2))
#define ir_OR_A(_op1, _op2) ir_BINARY_OP_A(IR_OR, (_op1), (_op2))
#define ir_OR_C(_op1, _op2) ir_BINARY_OP_C(IR_OR, (_op1), (_op2))
#define ir_OR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_OR, (_op1), (_op2))
#define ir_OR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_OR, (_op1), (_op2))
#define ir_OR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_OR, (_op1), (_op2))
#define ir_OR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_OR, (_op1), (_op2))
#define ir_AND(_type, _op1, _op2) ir_BINARY_OP(IR_AND, (_type), (_op1), (_op2))
#define ir_AND_B(_op1, _op2) ir_BINARY_OP_B(IR_AND, (_op1), (_op2))
#define ir_AND_U8(_op1, _op2) ir_BINARY_OP_U8(IR_AND, (_op1), (_op2))
#define ir_AND_U16(_op1, _op2) ir_BINARY_OP_U16(IR_AND, (_op1), (_op2))
#define ir_AND_U32(_op1, _op2) ir_BINARY_OP_U32(IR_AND, (_op1), (_op2))
#define ir_AND_U64(_op1, _op2) ir_BINARY_OP_U64(IR_AND, (_op1), (_op2))
#define ir_AND_A(_op1, _op2) ir_BINARY_OP_A(IR_AND, (_op1), (_op2))
#define ir_AND_C(_op1, _op2) ir_BINARY_OP_C(IR_AND, (_op1), (_op2))
#define ir_AND_I8(_op1, _op2) ir_BINARY_OP_I8(IR_AND, (_op1), (_op2))
#define ir_AND_I16(_op1, _op2) ir_BINARY_OP_I16(IR_AND, (_op1), (_op2))
#define ir_AND_I32(_op1, _op2) ir_BINARY_OP_I32(IR_AND, (_op1), (_op2))
#define ir_AND_I64(_op1, _op2) ir_BINARY_OP_I64(IR_AND, (_op1), (_op2))
#define ir_XOR(_type, _op1, _op2) ir_BINARY_OP(IR_XOR, (_type), (_op1), (_op2))
#define ir_XOR_B(_op1, _op2) ir_BINARY_OP_B(IR_XOR, (_op1), (_op2))
#define ir_XOR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_XOR, (_op1), (_op2))
#define ir_XOR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_XOR, (_op1), (_op2))
#define ir_XOR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_XOR, (_op1), (_op2))
#define ir_XOR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_XOR, (_op1), (_op2))
#define ir_XOR_A(_op1, _op2) ir_BINARY_OP_A(IR_XOR, (_op1), (_op2))
#define ir_XOR_C(_op1, _op2) ir_BINARY_OP_C(IR_XOR, (_op1), (_op2))
#define ir_XOR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_XOR, (_op1), (_op2))
#define ir_XOR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_XOR, (_op1), (_op2))
#define ir_XOR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_XOR, (_op1), (_op2))
#define ir_XOR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_XOR, (_op1), (_op2))
#define ir_SHL(_type, _op1, _op2) ir_BINARY_OP(IR_SHL, (_type), (_op1), (_op2))
#define ir_SHL_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SHL, (_op1), (_op2))
#define ir_SHL_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SHL, (_op1), (_op2))
#define ir_SHL_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SHL, (_op1), (_op2))
#define ir_SHL_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SHL, (_op1), (_op2))
#define ir_SHL_A(_op1, _op2) ir_BINARY_OP_A(IR_SHL, (_op1), (_op2))
#define ir_SHL_C(_op1, _op2) ir_BINARY_OP_C(IR_SHL, (_op1), (_op2))
#define ir_SHL_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SHL, (_op1), (_op2))
#define ir_SHL_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SHL, (_op1), (_op2))
#define ir_SHL_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SHL, (_op1), (_op2))
#define ir_SHL_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SHL, (_op1), (_op2))
#define ir_SHR(_type, _op1, _op2) ir_BINARY_OP(IR_SHR, (_type), (_op1), (_op2))
#define ir_SHR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SHR, (_op1), (_op2))
#define ir_SHR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SHR, (_op1), (_op2))
#define ir_SHR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SHR, (_op1), (_op2))
#define ir_SHR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SHR, (_op1), (_op2))
#define ir_SHR_A(_op1, _op2) ir_BINARY_OP_A(IR_SHR, (_op1), (_op2))
#define ir_SHR_C(_op1, _op2) ir_BINARY_OP_C(IR_SHR, (_op1), (_op2))
#define ir_SHR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SHR, (_op1), (_op2))
#define ir_SHR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SHR, (_op1), (_op2))
#define ir_SHR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SHR, (_op1), (_op2))
#define ir_SHR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SHR, (_op1), (_op2))
#define ir_SAR(_type, _op1, _op2) ir_BINARY_OP(IR_SAR, (_type), (_op1), (_op2))
#define ir_SAR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SAR, (_op1), (_op2))
#define ir_SAR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SAR, (_op1), (_op2))
#define ir_SAR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SAR, (_op1), (_op2))
#define ir_SAR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SAR, (_op1), (_op2))
#define ir_SAR_A(_op1, _op2) ir_BINARY_OP_A(IR_SAR, (_op1), (_op2))
#define ir_SAR_C(_op1, _op2) ir_BINARY_OP_C(IR_SAR, (_op1), (_op2))
#define ir_SAR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SAR, (_op1), (_op2))
#define ir_SAR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SAR, (_op1), (_op2))
#define ir_SAR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SAR, (_op1), (_op2))
#define ir_SAR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SAR, (_op1), (_op2))
#define ir_ROL(_type, _op1, _op2) ir_BINARY_OP(IR_ROL, (_type), (_op1), (_op2))
#define ir_ROL_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ROL, (_op1), (_op2))
#define ir_ROL_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ROL, (_op1), (_op2))
#define ir_ROL_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ROL, (_op1), (_op2))
#define ir_ROL_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ROL, (_op1), (_op2))
#define ir_ROL_A(_op1, _op2) ir_BINARY_OP_A(IR_ROL, (_op1), (_op2))
#define ir_ROL_C(_op1, _op2) ir_BINARY_OP_C(IR_ROL, (_op1), (_op2))
#define ir_ROL_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ROL, (_op1), (_op2))
#define ir_ROL_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ROL, (_op1), (_op2))
#define ir_ROL_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ROL, (_op1), (_op2))
#define ir_ROL_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ROL, (_op1), (_op2))
#define ir_ROR(_type, _op1, _op2) ir_BINARY_OP(IR_ROR, (_type), (_op1), (_op2))
#define ir_ROR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ROR, (_op1), (_op2))
#define ir_ROR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ROR, (_op1), (_op2))
#define ir_ROR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ROR, (_op1), (_op2))
#define ir_ROR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ROR, (_op1), (_op2))
#define ir_ROR_A(_op1, _op2) ir_BINARY_OP_A(IR_ROR, (_op1), (_op2))
#define ir_ROR_C(_op1, _op2) ir_BINARY_OP_C(IR_ROR, (_op1), (_op2))
#define ir_ROR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ROR, (_op1), (_op2))
#define ir_ROR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ROR, (_op1), (_op2))
#define ir_ROR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ROR, (_op1), (_op2))
#define ir_ROR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ROR, (_op1), (_op2))
#define ir_BSWAP(_type, _op1) ir_UNARY_OP(IR_BSWAP, (_type), (_op1))
#define ir_BSWAP_U16(_op1) ir_UNARY_OP_U16(IR_BSWAP, (_op1))
#define ir_BSWAP_U32(_op1) ir_UNARY_OP_U32(IR_BSWAP, (_op1))
#define ir_BSWAP_U64(_op1) ir_UNARY_OP_U64(IR_BSWAP, (_op1))
#define ir_BSWAP_A(_op1) ir_UNARY_OP_A(IR_BSWAP, (_op1))
#define ir_BSWAP_I16(_op1) ir_UNARY_OP_I16(IR_BSWAP, (_op1))
#define ir_BSWAP_I32(_op1) ir_UNARY_OP_I32(IR_BSWAP, (_op1))
#define ir_BSWAP_I64(_op1) ir_UNARY_OP_I64(IR_BSWAP, (_op1))
#define ir_MIN(_type, _op1, _op2) ir_BINARY_OP(IR_MIN, (_type), (_op1), (_op2))
#define ir_MIN_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MIN, (_op1), (_op2))
#define ir_MIN_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MIN, (_op1), (_op2))
#define ir_MIN_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MIN, (_op1), (_op2))
#define ir_MIN_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MIN, (_op1), (_op2))
#define ir_MIN_A(_op1, _op2) ir_BINARY_OP_A(IR_MIN, (_op1), (_op2))
#define ir_MIN_C(_op1, _op2) ir_BINARY_OP_C(IR_MIN, (_op1), (_op2))
#define ir_MIN_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MIN, (_op1), (_op2))
#define ir_MIN_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MIN, (_op1), (_op2))
#define ir_MIN_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MIN, (_op1), (_op2))
#define ir_MIN_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MIN, (_op1), (_op2))
#define ir_MIN_D(_op1, _op2) ir_BINARY_OP_D(IR_MIN, (_op1), (_op2))
#define ir_MIN_F(_op1, _op2) ir_BINARY_OP_F(IR_MIN, (_op1), (_op2))
#define ir_MAX(_type, _op1, _op2) ir_BINARY_OP(IR_MAX, (_type), (_op1), (_op2))
#define ir_MAX_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MAX, (_op1), (_op2))
#define ir_MAX_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MAX, (_op1), (_op2))
#define ir_MAX_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MAX, (_op1), (_op2))
#define ir_MAX_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MAX, (_op1), (_op2))
#define ir_MAX_A(_op1, _op2) ir_BINARY_OP_A(IR_MAX, (_op1), (_op2))
#define ir_MAX_C(_op1, _op2) ir_BINARY_OP_C(IR_MAX, (_op1), (_op2))
#define ir_MAX_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MAX, (_op1), (_op2))
#define ir_MAX_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MAX, (_op1), (_op2))
#define ir_MAX_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MAX, (_op1), (_op2))
#define ir_MAX_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MAX, (_op1), (_op2))
#define ir_MAX_D(_op1, _op2) ir_BINARY_OP_D(IR_MAX, (_op1), (_op2))
#define ir_MAX_F(_op1, _op2) ir_BINARY_OP_F(IR_MAX, (_op1), (_op2))
#define ir_COND(_type, _op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, (_type)), (_op1), (_op2), (_op3))
#define ir_COND_U8(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U8), (_op1), (_op2), (_op3))
#define ir_COND_U16(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U16), (_op1), (_op2), (_op3))
#define ir_COND_U32(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U32), (_op1), (_op2), (_op3))
#define ir_COND_U64(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U64), (_op1), (_op2), (_op3))
#define ir_COND_A(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_ADDR), (_op1), (_op2), (_op3))
#define ir_COND_C(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_CHAR), (_op1), (_op2), (_op3))
#define ir_COND_I8(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_I8), (_op1), (_op2), (_op3))
#define ir_COND_I16(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COMD, IR_I16), (_op1), (_op2), (_op3))
#define ir_COND_I32(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_I32), (_op1), (_op2), (_op3))
#define ir_COND_I64(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_I64), (_op1), (_op2), (_op3))
#define ir_COND_D(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_DOUBLE), (_op1), (_op2), (_op3))
#define ir_COND_F(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_FLOAT), (_op1), (_op2), (_op3))
#define ir_PHI_2(type, _src1, _src2) _ir_PHI_2(_ir_CTX, type, (_src1), (_src2))
#define ir_PHI_N(type, _n, _inputs) _ir_PHI_N(_ir_CTX, type, (_n), (_inputs))
#define ir_PHI_SET_OP(_ref, _pos, _src) _ir_PHI_SET_OP(_ir_CTX, (_ref), (_pos), (_src))
#define ir_COPY(_type, _op1) ir_UNARY_OP(IR_COPY, (_type), (_op1))
#define ir_COPY_B(_op1) ir_UNARY_OP_B(IR_COPY, (_op1))
#define ir_COPY_U8(_op1) ir_UNARY_OP_U8(IR_COPY, (_op1))
#define ir_COPY_U16(_op1) ir_UNARY_OP_U16(IR_COPY, (_op1))
#define ir_COPY_U32(_op1) ir_UNARY_OP_U32(IR_COPY, (_op1))
#define ir_COPY_U64(_op1) ir_UNARY_OP_U64(IR_COPY, (_op1))
#define ir_COPY_A(_op1) ir_UNARY_OP_A(IR_COPY, (_op1))
#define ir_COPY_C(_op1) ir_UNARY_OP_C(IR_COPY, (_op1))
#define ir_COPY_I8(_op1) ir_UNARY_OP_I8(IR_COPY, (_op1))
#define ir_COPY_I16(_op1) ir_UNARY_OP_I16(IR_COPY, (_op1))
#define ir_COPY_I32(_op1) ir_UNARY_OP_I32(IR_COPY, (_op1))
#define ir_COPY_I64(_op1) ir_UNARY_OP_I64(IR_COPY, (_op1))
#define ir_COPY_D(_op1) ir_UNARY_OP_D(IR_COPY, (_op1))
#define ir_COPY_F(_op1) ir_UNARY_OP_F(IR_COPY, (_op1))
/* Helper to add address with a constant offset */
#define ir_ADD_OFFSET(_addr, _offset) _ir_ADD_OFFSET(_ir_CTX, (_addr), (_offset))
/* Unfoldable variant of COPY */
#define ir_HARD_COPY(_type, _op1) ir_BINARY_OP(IR_COPY, (_type), (_op1), 1)
#define ir_HARD_COPY_B(_op1) ir_BINARY_OP_B(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_U8(_op1) ir_BINARY_OP_U8(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_U16(_op1) ir_BINARY_OP_U16(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_U32(_op1) ir_BINARY_OP_U32(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_U64(_op1) ir_BINARY_OP_U64(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_A(_op1) ir_BINARY_OP_A(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_C(_op1) ir_BINARY_OP_C(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_I8(_op1) ir_BINARY_OP_I8(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_I16(_op1) ir_BINARY_OP_I16(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_I32(_op1) ir_BINARY_OP_I32(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_I64(_op1) ir_BINARY_OP_I64(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_D(_op1) ir_BINARY_OP_D(IR_COPY, (_op1), 1)
#define ir_HARD_COPY_F(_op1) ir_BINARY_OP_F(IR_COPY, (_op1), 1)
#define ir_PARAM(_type, _name, _num) _ir_PARAM(_ir_CTX, (_type), (_name), (_num))
#define ir_VAR(_type, _name) _ir_VAR(_ir_CTX, (_type), (_name))
#define ir_CALL(type, func) _ir_CALL(_ir_CTX, type, func)
#define ir_CALL_1(type, func, a1) _ir_CALL_1(_ir_CTX, type, func, a1)
#define ir_CALL_2(type, func, a1, a2) _ir_CALL_2(_ir_CTX, type, func, a1, a2)
#define ir_CALL_3(type, func, a1, a2, a3) _ir_CALL_3(_ir_CTX, type, func, a1, a2, a3)
#define ir_CALL_4(type, func, a1, a2, a3, a4) _ir_CALL_4(_ir_CTX, type, func, a1, a2, a3, a4)
#define ir_CALL_5(type, func, a1, a2, a3, a4, a5) _ir_CALL_5(_ir_CTX, type, func, a1, a2, a3, a4, a5)
#define ir_CALL_N(type, func, count, args) _ir_CALL_N(_ir_CTX, type, func, count, args)
#define ir_TAILCALL(type, func) _ir_TAILCALL(_ir_CTX, type, func)
#define ir_TAILCALL_1(type, func, a1) _ir_TAILCALL_1(_ir_CTX, type, func, a1)
#define ir_TAILCALL_2(type, func, a1, a2) _ir_TAILCALL_2(_ir_CTX, type, func, a1, a2)
#define ir_TAILCALL_3(type, func, a1, a2, a3) _ir_TAILCALL_3(_ir_CTX, type, func, a1, a2, a3)
#define ir_TAILCALL_4(type, func, a1, a2, a3, a4) _ir_TAILCALL_4(_ir_CTX, type, func, a1, a2, a3, a4)
#define ir_TAILCALL_5(type, func, a1, a2, a3, a4, a5) _ir_TAILCALL_5(_ir_CTX, type, func, a1, a2, a3, a4, a5)
#define ir_TAILCALL_N(type, func, count, args) _ir_TAILCALL_N(_ir_CTX, type, func, count, args)
#define ir_ALLOCA(_size) _ir_ALLOCA(_ir_CTX, (_size))
#define ir_AFREE(_size) _ir_AFREE(_ir_CTX, (_size))
#define ir_VADDR(_var) ir_emit1(_ir_CTX, IR_OPT(IR_VADDR, IR_ADDR), (_var))
#define ir_VLOAD(_type, _var) _ir_VLOAD(_ir_CTX, (_type), (_var))
#define ir_VLOAD_B(_var) _ir_VLOAD(_ir_CTX, IR_BOOL, (_var))
#define ir_VLOAD_U8(_var) _ir_VLOAD(_ir_CTX, IR_U8, (_var))
#define ir_VLOAD_U16(_var) _ir_VLOAD(_ir_CTX, IR_U16, (_var))
#define ir_VLOAD_U32(_var) _ir_VLOAD(_ir_CTX, IR_U32, (_var))
#define ir_VLOAD_U64(_var) _ir_VLOAD(_ir_CTX, IR_U64, (_var))
#define ir_VLOAD_A(_var) _ir_VLOAD(_ir_CTX, IR_ADDR, (_var))
#define ir_VLOAD_C(_var) _ir_VLOAD(_ir_CTX, IR_CHAR, (_var))
#define ir_VLOAD_I8(_var) _ir_VLOAD(_ir_CTX, IR_I8, (_var))
#define ir_VLOAD_I16(_var) _ir_VLOAD(_ir_CTX, IR_I16, (_var))
#define ir_VLOAD_I32(_var) _ir_VLOAD(_ir_CTX, IR_I32, (_var))
#define ir_VLOAD_I64(_var) _ir_VLOAD(_ir_CTX, IR_I64, (_var))
#define ir_VLOAD_D(_var) _ir_VLOAD(_ir_CTX, IR_DOUBLE, (_var))
#define ir_VLOAD_F(_var) _ir_VLOAD(_ir_CTX, IR_FLOAT, (_var))
#define ir_VSTORE(_var, _val) _ir_VSTORE(_ir_CTX, (_var), (_val))
#define ir_RLOAD(_type, _reg) _ir_RLOAD(_ir_CTX, (_type), (_reg))
#define ir_RLOAD_B(_reg) _ir_RLOAD(_ir_CTX, IR_BOOL, (_reg))
#define ir_RLOAD_U8(_reg) _ir_RLOAD(_ir_CTX, IR_U8, (_reg))
#define ir_RLOAD_U16(_reg) _ir_RLOAD(_ir_CTX, IR_U16, (_reg))
#define ir_RLOAD_U32(_reg) _ir_RLOAD(_ir_CTX, IR_U32, (_reg))
#define ir_RLOAD_U64(_reg) _ir_RLOAD(_ir_CTX, IR_U64, (_reg))
#define ir_RLOAD_A(_reg) _ir_RLOAD(_ir_CTX, IR_ADDR, (_reg))
#define ir_RLOAD_C(_reg) _ir_RLOAD(_ir_CTX, IR_CHAR, (_reg))
#define ir_RLOAD_I8(_reg) _ir_RLOAD(_ir_CTX, IR_I8, (_reg))
#define ir_RLOAD_I16(_reg) _ir_RLOAD(_ir_CTX, IR_I16, (_reg))
#define ir_RLOAD_I32(_reg) _ir_RLOAD(_ir_CTX, IR_I32, (_reg))
#define ir_RLOAD_I64(_reg) _ir_RLOAD(_ir_CTX, IR_I64, (_reg))
#define ir_RLOAD_D(_reg) _ir_RLOAD(_ir_CTX, IR_DOUBLE, (_reg))
#define ir_RLOAD_F(_reg) _ir_RLOAD(_ir_CTX, IR_FLOAT, (_reg))
#define ir_RSTORE(_reg, _val) _ir_RSTORE(_ir_CTX, (_reg), (_val))
#define ir_LOAD(_type, _addr) _ir_LOAD(_ir_CTX, (_type), (_addr))
#define ir_LOAD_B(_addr) _ir_LOAD(_ir_CTX, IR_BOOL, (_addr))
#define ir_LOAD_U8(_addr) _ir_LOAD(_ir_CTX, IR_U8, (_addr))
#define ir_LOAD_U16(_addr) _ir_LOAD(_ir_CTX, IR_U16, (_addr))
#define ir_LOAD_U32(_addr) _ir_LOAD(_ir_CTX, IR_U32, (_addr))
#define ir_LOAD_U64(_addr) _ir_LOAD(_ir_CTX, IR_U64, (_addr))
#define ir_LOAD_A(_addr) _ir_LOAD(_ir_CTX, IR_ADDR, (_addr))
#define ir_LOAD_C(_addr) _ir_LOAD(_ir_CTX, IR_CHAR, (_addr))
#define ir_LOAD_I8(_addr) _ir_LOAD(_ir_CTX, IR_I8, (_addr))
#define ir_LOAD_I16(_addr) _ir_LOAD(_ir_CTX, IR_I16, (_addr))
#define ir_LOAD_I32(_addr) _ir_LOAD(_ir_CTX, IR_I32, (_addr))
#define ir_LOAD_I64(_addr) _ir_LOAD(_ir_CTX, IR_I64, (_addr))
#define ir_LOAD_D(_addr) _ir_LOAD(_ir_CTX, IR_DOUBLE, (_addr))
#define ir_LOAD_F(_addr) _ir_LOAD(_ir_CTX, IR_FLOAT, (_addr))
#define ir_STORE(_addr, _val) _ir_STORE(_ir_CTX, (_addr), (_val))
#define ir_TLS(_index, _offset) _ir_TLS(_ir_CTX, (_index), (_offset))
#define ir_TRAP() do {_ir_CTX->control = ir_emit1(_ir_CTX, IR_TRAP, _ir_CTX->control);} while (0)
#define ir_START() _ir_START(_ir_CTX)
#define ir_ENTRY(_src, _num) _ir_ENTRY(_ir_CTX, (_src), (_num))
#define ir_BEGIN(_src) _ir_BEGIN(_ir_CTX, (_src))
#define ir_IF(_condition) _ir_IF(_ir_CTX, (_condition))
#define ir_IF_TRUE(_if) _ir_IF_TRUE(_ir_CTX, (_if))
#define ir_IF_TRUE_cold(_if) _ir_IF_TRUE_cold(_ir_CTX, (_if))
#define ir_IF_FALSE(_if) _ir_IF_FALSE(_ir_CTX, (_if))
#define ir_IF_FALSE_cold(_if) _ir_IF_FALSE_cold(_ir_CTX, (_if))
#define ir_END() _ir_END(_ir_CTX)
#define ir_MERGE_2(_src1, _src2) _ir_MERGE_2(_ir_CTX, (_src1), (_src2))
#define ir_MERGE_N(_n, _inputs) _ir_MERGE_N(_ir_CTX, (_n), (_inputs))
#define ir_MERGE_SET_OP(_ref, _pos, _src) _ir_MERGE_SET_OP(_ir_CTX, (_ref), (_pos), (_src))
#define ir_LOOP_BEGIN(_src1) _ir_LOOP_BEGIN(_ir_CTX, (_src1))
#define ir_LOOP_END() _ir_LOOP_END(_ir_CTX)
#define ir_SWITCH(_val) _ir_SWITCH(_ir_CTX, (_val))
#define ir_CASE_VAL(_switch, _val) _ir_CASE_VAL(_ir_CTX, (_switch), (_val))
#define ir_CASE_DEFAULT(_switch) _ir_CASE_DEFAULT(_ir_CTX, (_switch))
#define ir_RETURN(_val) _ir_RETURN(_ir_CTX, (_val))
#define ir_IJMP(_addr) _ir_IJMP(_ir_CTX, (_addr))
#define ir_UNREACHABLE() _ir_UNREACHABLE(_ir_CTX)
#define ir_GUARD(_condition, _addr) _ir_GUARD(_ir_CTX, (_condition), (_addr))
#define ir_GUARD_NOT(_condition, _addr) _ir_GUARD_NOT(_ir_CTX, (_condition), (_addr))
#define ir_SNAPSHOT(_n) _ir_SNAPSHOT(_ir_CTX, (_n))
#define ir_SNAPSHOT_SET_OP(_s, _pos, _v) _ir_SNAPSHOT_SET_OP(_ir_CTX, (_s), (_pos), (_v))
#define ir_EXITCALL(_func) _ir_EXITCALL(_ir_CTX,(_func))
#define ir_END_list(_list) do { _list = _ir_END_LIST(_ir_CTX, _list); } while (0)
#define ir_MERGE_list(_list) _ir_MERGE_LIST(_ir_CTX, (_list))
#define ir_MERGE_WITH(_src2) do {ir_ref end = ir_END(); ir_MERGE_2(end, _src2);} while (0)
#define ir_MERGE_WITH_EMPTY_TRUE(_if) do {ir_ref end = ir_END(); ir_IF_TRUE(_if); ir_MERGE_2(end, ir_END());} while (0)
#define ir_MERGE_WITH_EMPTY_FALSE(_if) do {ir_ref end = ir_END(); ir_IF_FALSE(_if); ir_MERGE_2(end, ir_END());} while (0)
ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset);
ir_ref _ir_PHI_2(ir_ctx *ctx, ir_type type, ir_ref src1, ir_ref src2);
ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs);
void _ir_PHI_SET_OP(ir_ctx *ctx, ir_ref phi, ir_ref pos, ir_ref src);
ir_ref _ir_PARAM(ir_ctx *ctx, ir_type type, const char* name, ir_ref num);
ir_ref _ir_VAR(ir_ctx *ctx, ir_type type, const char* name);
ir_ref _ir_CALL(ir_ctx *ctx, ir_type type, ir_ref func);
ir_ref _ir_CALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1);
ir_ref _ir_CALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2);
ir_ref _ir_CALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3);
ir_ref _ir_CALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4);
ir_ref _ir_CALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5);
ir_ref _ir_CALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args);
void _ir_TAILCALL(ir_ctx *ctx, ir_type type, ir_ref func);
void _ir_TAILCALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1);
void _ir_TAILCALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2);
void _ir_TAILCALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3);
void _ir_TAILCALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4);
void _ir_TAILCALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5);
ir_ref _ir_TAILCALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args);
ir_ref _ir_ALLOCA(ir_ctx *ctx, ir_ref size);
void _ir_AFREE(ir_ctx *ctx, ir_ref size);
ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var);
void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val);
ir_ref _ir_RLOAD(ir_ctx *ctx, ir_type type, ir_ref reg);
void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val);
ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr);
void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val);
void _ir_START(ir_ctx *ctx);
void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num);
void _ir_BEGIN(ir_ctx *ctx, ir_ref src);
ir_ref _ir_END(ir_ctx *ctx);
ir_ref _ir_END_LIST(ir_ctx *ctx, ir_ref list);
ir_ref _ir_IF(ir_ctx *ctx, ir_ref condition);
void _ir_IF_TRUE(ir_ctx *ctx, ir_ref if_ref);
void _ir_IF_TRUE_cold(ir_ctx *ctx, ir_ref if_ref);
void _ir_IF_FALSE(ir_ctx *ctx, ir_ref if_ref);
void _ir_IF_FALSE_cold(ir_ctx *ctx, ir_ref if_ref);
void _ir_MERGE_2(ir_ctx *ctx, ir_ref src1, ir_ref src2);
void _ir_MERGE_N(ir_ctx *ctx, ir_ref n, ir_ref *inputs);
void _ir_MERGE_SET_OP(ir_ctx *ctx, ir_ref merge, ir_ref pos, ir_ref src);
void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list);
ir_ref _ir_LOOP_BEGIN(ir_ctx *ctx, ir_ref src1);
ir_ref _ir_LOOP_END(ir_ctx *ctx);
ir_ref _ir_TLS(ir_ctx *ctx, ir_ref index, ir_ref offset);
void _ir_UNREACHABLE(ir_ctx *ctx);
ir_ref _ir_SWITCH(ir_ctx *ctx, ir_ref val);
void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val);
void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref);
void _ir_RETURN(ir_ctx *ctx, ir_ref val);
void _ir_IJMP(ir_ctx *ctx, ir_ref addr);
void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr);
void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr);
ir_ref _ir_SNAPSHOT(ir_ctx *ctx, ir_ref n);
void _ir_SNAPSHOT_SET_OP(ir_ctx *ctx, ir_ref snapshot, ir_ref pos, ir_ref val);
ir_ref _ir_EXITCALL(ir_ctx *ctx, ir_ref func);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* IR_BUILDER_H */

1219
ext/opcache/jit/ir/ir_cfg.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,381 @@
/*
* IR - Lightweight JIT Compilation Framework
* (IR verification)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#include "ir.h"
#include "ir_private.h"
void ir_consistency_check(void)
{
IR_ASSERT(IR_UNUSED == 0);
IR_ASSERT(IR_NOP == 0);
IR_ASSERT((int)IR_BOOL == (int)IR_C_BOOL);
IR_ASSERT((int)IR_U8 == (int)IR_C_U8);
IR_ASSERT((int)IR_U16 == (int)IR_C_U16);
IR_ASSERT((int)IR_U32 == (int)IR_C_U32);
IR_ASSERT((int)IR_U64 == (int)IR_C_U64);
IR_ASSERT((int)IR_ADDR == (int)IR_C_ADDR);
IR_ASSERT((int)IR_CHAR == (int)IR_C_CHAR);
IR_ASSERT((int)IR_I8 == (int)IR_C_I8);
IR_ASSERT((int)IR_I16 == (int)IR_C_I16);
IR_ASSERT((int)IR_I32 == (int)IR_C_I32);
IR_ASSERT((int)IR_I64 == (int)IR_C_I64);
IR_ASSERT((int)IR_DOUBLE == (int)IR_C_DOUBLE);
IR_ASSERT((int)IR_FLOAT == (int)IR_C_FLOAT);
IR_ASSERT((IR_EQ ^ 1) == IR_NE);
IR_ASSERT((IR_LT ^ 3) == IR_GT);
IR_ASSERT((IR_GT ^ 3) == IR_LT);
IR_ASSERT((IR_LE ^ 3) == IR_GE);
IR_ASSERT((IR_GE ^ 3) == IR_LE);
IR_ASSERT((IR_ULT ^ 3) == IR_UGT);
IR_ASSERT((IR_UGT ^ 3) == IR_ULT);
IR_ASSERT((IR_ULE ^ 3) == IR_UGE);
IR_ASSERT((IR_UGE ^ 3) == IR_ULE);
IR_ASSERT(IR_ADD + 1 == IR_SUB);
}
static bool ir_check_use_list(const ir_ctx *ctx, ir_ref from, ir_ref to)
{
ir_ref n, j, *p;
ir_use_list *use_list = &ctx->use_lists[from];
n = use_list->count;
for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
if (*p == to) {
return 1;
}
}
return 0;
}
static bool ir_check_input_list(const ir_ctx *ctx, ir_ref from, ir_ref to)
{
ir_insn *insn = &ctx->ir_base[to];
ir_ref n, j, *p;
n = ir_input_edges_count(ctx, insn);
for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
if (*p == from) {
return 1;
}
}
return 0;
}
static bool ir_check_domination(const ir_ctx *ctx, ir_ref def, ir_ref use)
{
uint32_t b1 = ctx->cfg_map[def];
uint32_t b2 = ctx->cfg_map[use];
ir_block *blocks = ctx->cfg_blocks;
uint32_t b1_depth = blocks[b1].dom_depth;
const ir_block *bb2 = &blocks[b2];
if (b1 == b2) {
return def < use;
}
while (bb2->dom_depth > b1_depth) {
b2 = bb2->dom_parent;
bb2 = &blocks[b2];
}
return b1 == b2;
}
bool ir_check(const ir_ctx *ctx)
{
ir_ref i, j, n, *p, use;
ir_insn *insn, *use_insn;
ir_type type;
uint32_t flags;
bool ok = 1;
for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) {
flags = ir_op_flags[insn->op];
n = ir_input_edges_count(ctx, insn);
for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
use = *p;
if (use != IR_UNUSED) {
if (IR_IS_CONST_REF(use)) {
if (use >= ctx->consts_count) {
fprintf(stderr, "ir_base[%d].ops[%d] constant reference (%d) is out of range\n", i, j, use);
ok = 0;
}
} else {
if (use >= ctx->insns_count) {
fprintf(stderr, "ir_base[%d].ops[%d] insn reference (%d) is out of range\n", i, j, use);
ok = 0;
}
use_insn = &ctx->ir_base[use];
switch (IR_OPND_KIND(flags, j)) {
case IR_OPND_DATA:
if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_DATA)) {
if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_MEM)
|| use_insn->type == IR_VOID) {
fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be DATA\n", i, j, use);
ok = 0;
}
}
if (use >= i
&& !(insn->op == IR_PHI
&& (!(ctx->flags & IR_LINEAR) || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN))) {
fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use);
ok = 0;
}
if (flags & IR_OP_FLAG_DATA) {
switch (insn->op) {
case IR_COND:
if (j == 1) {
break;
}
IR_FALLTHROUGH;
case IR_ADD:
case IR_SUB:
case IR_MUL:
case IR_DIV:
case IR_MOD:
case IR_NEG:
case IR_ABS:
case IR_ADD_OV:
case IR_SUB_OV:
case IR_MUL_OV:
case IR_NOT:
case IR_OR:
case IR_AND:
case IR_XOR:
case IR_SHL:
case IR_SHR:
case IR_SAR:
case IR_ROL:
case IR_ROR:
case IR_BSWAP:
case IR_MIN:
case IR_MAX:
case IR_PHI:
case IR_COPY:
case IR_PI:
if (insn->type != use_insn->type) {
if (j == 2
&& (insn->op == IR_SHL
|| insn->op == IR_SHR
|| insn->op == IR_SAR
|| insn->op == IR_ROL
|| insn->op == IR_ROR)
&& ir_type_size[use_insn->type] < ir_type_size[insn->type]) {
/* second argument of SHIFT may be incompatible with result */
break;
}
if (insn->op == IR_NOT && insn->type == IR_BOOL) {
/* boolean not */
break;
}
if (sizeof(void*) == 8) {
if (insn->type == IR_ADDR && (use_insn->type == IR_U64 || use_insn->type == IR_I64)) {
break;
}
} else {
if (insn->type == IR_ADDR && (use_insn->type == IR_U32 || use_insn->type == IR_I32)) {
break;
}
}
fprintf(stderr, "ir_base[%d].ops[%d] (%d) type is incompatible with result type (%d != %d)\n",
i, j, use, use_insn->type, insn->type);
ok = 0;
}
break;
}
}
if ((ctx->flags & IR_LINEAR)
&& ctx->cfg_map
&& insn->op != IR_PHI
&& !ir_check_domination(ctx, use, i)) {
fprintf(stderr, "ir_base[%d].ops[%d] -> %d, %d doesn't dominate %d\n", i, j, use, use, i);
ok = 0;
}
break;
case IR_OPND_CONTROL:
if (flags & IR_OP_FLAG_BB_START) {
if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_BB_END)) {
fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be BB_END\n", i, j, use);
ok = 0;
}
} else {
if (ir_op_flags[use_insn->op] & IR_OP_FLAG_BB_END) {
fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must not be BB_END\n", i, j, use);
ok = 0;
}
}
break;
case IR_OPND_CONTROL_DEP:
if (use >= i
&& !(insn->op == IR_LOOP_BEGIN)) {
fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use);
ok = 0;
} else if (insn->op == IR_PHI) {
ir_insn *merge_insn = &ctx->ir_base[insn->op1];
if (merge_insn->op != IR_MERGE && merge_insn->op != IR_LOOP_BEGIN) {
fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be MERGE or LOOP_BEGIN\n", i, j, use);
ok = 0;
}
}
break;
case IR_OPND_CONTROL_REF:
if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL)) {
fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be CONTROL\n", i, j, use);
ok = 0;
}
break;
default:
fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) of unsupported kind\n", i, j, use);
ok = 0;
}
}
} else if ((insn->op == IR_RETURN || insn->op == IR_UNREACHABLE) && j == 2) {
/* pass (function returns void) */
} else if (insn->op == IR_BEGIN && j == 1) {
/* pass (start of unreachable basic block) */
} else if (IR_OPND_KIND(flags, j) != IR_OPND_CONTROL_REF
&& (insn->op != IR_SNAPSHOT || j == 1)) {
fprintf(stderr, "ir_base[%d].ops[%d] missing reference (%d)\n", i, j, use);
ok = 0;
}
if (ctx->use_lists
&& use > 0
&& !ir_check_use_list(ctx, use, i)) {
fprintf(stderr, "ir_base[%d].ops[%d] is not in use list (%d)\n", i, j, use);
ok = 0;
}
}
switch (insn->op) {
case IR_PHI:
if (insn->inputs_count != ctx->ir_base[insn->op1].inputs_count + 1) {
fprintf(stderr, "ir_base[%d] inconsistent PHI inputs_count (%d != %d)\n",
i, insn->inputs_count, ctx->ir_base[insn->op1].inputs_count + 1);
ok = 0;
}
break;
case IR_LOAD:
case IR_STORE:
type = ctx->ir_base[insn->op2].type;
if (type != IR_ADDR
&& (!IR_IS_TYPE_INT(type) || ir_type_size[type] != ir_type_size[IR_ADDR])) {
fprintf(stderr, "ir_base[%d].op2 must have ADDR type (%s)\n",
i, ir_type_name[type]);
ok = 0;
}
break;
case IR_VLOAD:
case IR_VSTORE:
if (ctx->ir_base[insn->op2].op != IR_VAR) {
fprintf(stderr, "ir_base[%d].op2 must be 'VAR' (%s)\n",
i, ir_op_name[ctx->ir_base[insn->op2].op]);
ok = 0;
}
break;
case IR_RETURN:
if (ctx->ret_type != (insn->op2 ? ctx->ir_base[insn->op2].type : IR_VOID)) {
fprintf(stderr, "ir_base[%d].type incompatible return type\n", i);
ok = 0;
}
break;
case IR_TAILCALL:
if (ctx->ret_type != insn->type) {
fprintf(stderr, "ir_base[%d].type incompatible return type\n", i);
ok = 0;
}
break;
}
if (ctx->use_lists) {
ir_use_list *use_list = &ctx->use_lists[i];
ir_ref count;
for (j = 0, p = &ctx->use_edges[use_list->refs]; j < use_list->count; j++, p++) {
use = *p;
if (!ir_check_input_list(ctx, i, use)) {
fprintf(stderr, "ir_base[%d] is in use list of ir_base[%d]\n", use, i);
ok = 0;
}
}
if ((flags & IR_OP_FLAG_CONTROL) && !(flags & IR_OP_FLAG_MEM)) {
switch (insn->op) {
case IR_SWITCH:
/* may have many successors */
if (use_list->count < 1) {
fprintf(stderr, "ir_base[%d].op (SWITCH) must have at least 1 successor (%d)\n", i, use_list->count);
ok = 0;
}
break;
case IR_IF:
if (use_list->count != 2) {
fprintf(stderr, "ir_base[%d].op (IF) must have 2 successors (%d)\n", i, use_list->count);
ok = 0;
}
break;
case IR_UNREACHABLE:
case IR_RETURN:
if (use_list->count == 1) {
/* UNREACHABLE and RETURN may be linked with the following ENTRY by a fake edge */
if (ctx->ir_base[ctx->use_edges[use_list->refs]].op == IR_ENTRY) {
break;
}
}
IR_FALLTHROUGH;
case IR_IJMP:
if (use_list->count != 0) {
fprintf(stderr, "ir_base[%d].op (%s) must not have successors (%d)\n",
i, ir_op_name[insn->op], use_list->count);
ok = 0;
}
break;
default:
/* skip data references */
count = use_list->count;
for (j = 0, p = &ctx->use_edges[use_list->refs]; j < use_list->count; j++, p++) {
use = *p;
if (!(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL)) {
count--;
}
}
if (count != 1) {
if (insn->op == IR_CALL && count == 2) {
/* result of CALL may be used as data in control instruction */
break;
}
if ((insn->op == IR_LOOP_END || insn->op == IR_END) && count == 2) {
/* LOOP_END/END may be linked with the following ENTRY by a fake edge */
if (ctx->ir_base[ctx->use_edges[use_list->refs]].op == IR_ENTRY) {
count--;
}
if (ctx->ir_base[ctx->use_edges[use_list->refs + 1]].op == IR_ENTRY) {
count--;
}
if (count == 1) {
break;
}
}
fprintf(stderr, "ir_base[%d].op (%s) must have 1 successor (%d)\n",
i, ir_op_name[insn->op], count);
ok = 0;
}
break;
}
}
}
n = ir_insn_inputs_to_len(n);
i += n;
insn += n;
}
// if (!ok) {
// ir_dump_codegen(ctx, stderr);
// }
IR_ASSERT(ok);
return ok;
}

View File

@@ -0,0 +1,832 @@
/*
* IR - Lightweight JIT Compilation Framework
* (Disassembler based on libcapstone)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#ifndef _WIN32
# include <dlfcn.h>
# include <unistd.h>
# include <fcntl.h>
#endif
#include "ir.h"
#include "ir_private.h"
#ifndef _WIN32
# include "ir_elf.h"
#endif
#include <capstone/capstone.h>
#define HAVE_CAPSTONE_ITER
typedef struct _ir_sym_node {
uint64_t addr;
uint64_t end;
struct _ir_sym_node *parent;
struct _ir_sym_node *child[2];
unsigned char info;
char name[1];
} ir_sym_node;
static ir_sym_node *_symbols = NULL;
static void ir_syms_rotateleft(ir_sym_node *p)
{
ir_sym_node *r = p->child[1];
p->child[1] = r->child[0];
if (r->child[0]) {
r->child[0]->parent = p;
}
r->parent = p->parent;
if (p->parent == NULL) {
_symbols = r;
} else if (p->parent->child[0] == p) {
p->parent->child[0] = r;
} else {
p->parent->child[1] = r;
}
r->child[0] = p;
p->parent = r;
}
static void ir_syms_rotateright(ir_sym_node *p)
{
ir_sym_node *l = p->child[0];
p->child[0] = l->child[1];
if (l->child[1]) {
l->child[1]->parent = p;
}
l->parent = p->parent;
if (p->parent == NULL) {
_symbols = l;
} else if (p->parent->child[1] == p) {
p->parent->child[1] = l;
} else {
p->parent->child[0] = l;
}
l->child[1] = p;
p->parent = l;
}
void ir_disasm_add_symbol(const char *name,
uint64_t addr,
uint64_t size)
{
ir_sym_node *sym;
size_t len = strlen(name);
sym = ir_mem_pmalloc(sizeof(ir_sym_node) + len + 1);
if (!sym) {
return;
}
sym->addr = addr;
sym->end = (addr + size - 1);
memcpy((char*)&sym->name, name, len + 1);
sym->parent = sym->child[0] = sym->child[1] = NULL;
sym->info = 1;
if (_symbols) {
ir_sym_node *node = _symbols;
/* insert it into rbtree */
do {
if (sym->addr > node->addr) {
IR_ASSERT(sym->addr > (node->end));
if (node->child[1]) {
node = node->child[1];
} else {
node->child[1] = sym;
sym->parent = node;
break;
}
} else if (sym->addr < node->addr) {
if (node->child[0]) {
node = node->child[0];
} else {
node->child[0] = sym;
sym->parent = node;
break;
}
} else {
IR_ASSERT(sym->addr == node->addr);
if (strcmp(name, node->name) == 0 && sym->end < node->end) {
/* reduce size of the existing symbol */
node->end = sym->end;
}
ir_mem_pfree(sym);
return;
}
} while (1);
/* fix rbtree after inserting */
while (sym && sym != _symbols && sym->parent->info == 1) {
if (sym->parent == sym->parent->parent->child[0]) {
node = sym->parent->parent->child[1];
if (node && node->info == 1) {
sym->parent->info = 0;
node->info = 0;
sym->parent->parent->info = 1;
sym = sym->parent->parent;
} else {
if (sym == sym->parent->child[1]) {
sym = sym->parent;
ir_syms_rotateleft(sym);
}
sym->parent->info = 0;
sym->parent->parent->info = 1;
ir_syms_rotateright(sym->parent->parent);
}
} else {
node = sym->parent->parent->child[0];
if (node && node->info == 1) {
sym->parent->info = 0;
node->info = 0;
sym->parent->parent->info = 1;
sym = sym->parent->parent;
} else {
if (sym == sym->parent->child[0]) {
sym = sym->parent;
ir_syms_rotateright(sym);
}
sym->parent->info = 0;
sym->parent->parent->info = 1;
ir_syms_rotateleft(sym->parent->parent);
}
}
}
} else {
_symbols = sym;
}
_symbols->info = 0;
}
static void ir_disasm_destroy_symbols(ir_sym_node *n)
{
if (n) {
if (n->child[0]) {
ir_disasm_destroy_symbols(n->child[0]);
}
if (n->child[1]) {
ir_disasm_destroy_symbols(n->child[1]);
}
ir_mem_pfree(n);
}
}
const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset)
{
ir_sym_node *node = _symbols;
while (node) {
if (addr < node->addr) {
node = node->child[0];
} else if (addr > node->end) {
node = node->child[1];
} else {
*offset = addr - node->addr;
return node->name;
}
}
return NULL;
}
static uint64_t ir_disasm_branch_target(csh cs, const cs_insn *insn)
{
unsigned int i;
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
for (i = 0; i < insn->detail->x86.op_count; i++) {
if (insn->detail->x86.operands[i].type == X86_OP_IMM) {
return insn->detail->x86.operands[i].imm;
}
}
}
#elif defined(IR_TARGET_AARCH64)
if (cs_insn_group(cs, insn, ARM64_GRP_JUMP)
|| insn->id == ARM64_INS_BL
|| insn->id == ARM64_INS_ADR) {
for (i = 0; i < insn->detail->arm64.op_count; i++) {
if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
return insn->detail->arm64.operands[i].imm;
}
}
#endif
return 0;
}
static uint64_t ir_disasm_rodata_reference(csh cs, const cs_insn *insn)
{
#if defined(IR_TARGET_X86)
unsigned int i;
for (i = 0; i < insn->detail->x86.op_count; i++) {
if (insn->detail->x86.operands[i].type == X86_OP_MEM
&& insn->detail->x86.operands[i].mem.base == X86_REG_INVALID
&& insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
&& insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
&& insn->detail->x86.operands[i].mem.scale == 1) {
return (uint32_t)insn->detail->x86.operands[i].mem.disp;
}
}
if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
for (i = 0; i < insn->detail->x86.op_count; i++) {
if (insn->detail->x86.operands[i].type == X86_OP_MEM
&& insn->detail->x86.operands[i].mem.disp) {
return (uint32_t)insn->detail->x86.operands[i].mem.disp;
}
}
}
if (insn->id == X86_INS_MOV
&& insn->detail->x86.op_count == 2
&& insn->detail->x86.operands[0].type == X86_OP_IMM
&& insn->detail->x86.operands[0].size == sizeof(void*)) {
return (uint32_t)insn->detail->x86.operands[0].imm;
}
#elif defined(IR_TARGET_X64)
unsigned int i;
for (i = 0; i < insn->detail->x86.op_count; i++) {
if (insn->detail->x86.operands[i].type == X86_OP_MEM
&& insn->detail->x86.operands[i].mem.base == X86_REG_RIP
&& insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
// TODO: support for index and scale
&& insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
&& insn->detail->x86.operands[i].mem.scale == 1) {
return insn->detail->x86.operands[i].mem.disp + insn->address + insn->size;
}
}
#elif defined(IR_TARGET_AARCH64)
unsigned int i;
if (insn->id == ARM64_INS_ADR
|| insn->id == ARM64_INS_LDRB
|| insn->id == ARM64_INS_LDR
|| insn->id == ARM64_INS_LDRH
|| insn->id == ARM64_INS_LDRSB
|| insn->id == ARM64_INS_LDRSH
|| insn->id == ARM64_INS_LDRSW
|| insn->id == ARM64_INS_STRB
|| insn->id == ARM64_INS_STR
|| insn->id == ARM64_INS_STRH) {
for (i = 0; i < insn->detail->arm64.op_count; i++) {
if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
return insn->detail->arm64.operands[i].imm;
}
}
return 0;
#endif
return 0;
}
static const char* ir_disasm_resolver(uint64_t addr,
int64_t *offset)
{
#ifndef _WIN32
const char *name;
void *a = (void*)(uintptr_t)(addr);
Dl_info info;
name = ir_disasm_find_symbol(addr, offset);
if (name) {
return name;
}
if (dladdr(a, &info)
&& info.dli_sname != NULL
&& info.dli_saddr == a) {
*offset = 0;
return info.dli_sname;
}
#else
const char *name;
name = ir_disasm_find_symbol(addr, offset);
if (name) {
return name;
}
#endif
return NULL;
}
int ir_disasm(const char *name,
const void *start,
size_t size,
bool asm_addr,
ir_ctx *ctx,
FILE *f)
{
size_t orig_size = size;
const void *orig_end = (void *)((char *)start + size);
const void *end;
ir_hashtab labels;
int32_t l, n;
uint64_t addr;
csh cs;
cs_insn *insn;
# ifdef HAVE_CAPSTONE_ITER
const uint8_t *cs_code;
size_t cs_size;
uint64_t cs_addr;
# else
size_t count, i;
# endif
const char *sym;
int64_t offset = 0;
char *p, *q, *r;
uint32_t rodata_offset = 0;
uint32_t jmp_table_offset = 0;
ir_hashtab_bucket *b;
int32_t entry;
cs_err ret;
# if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
# ifdef IR_TARGET_X64
ret = cs_open(CS_ARCH_X86, CS_MODE_64, &cs);
if (ret != CS_ERR_OK) {
fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_64, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
return 0;
}
# else
ret = cs_open(CS_ARCH_X86, CS_MODE_32, &cs);
if (ret != CS_ERR_OK) {
fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_32, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
return 0;
}
# endif
cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
# if DISASM_INTEL_SYNTAX
cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
# else
cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
# endif
# elif defined(IR_TARGET_AARCH64)
ret = cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &cs);
if (ret != CS_ERR_OK) {
fprintf(stderr, "cs_open(CS_ARCH_ARM64, CS_MODE_ARM, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
return 0;
}
cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
# endif
if (name) {
fprintf(f, "%s:\n", name);
}
ir_hashtab_init(&labels, 32);
if (ctx) {
if (ctx->entries_count) {
int i = ctx->entries_count;
do {
ir_insn *insn = &ctx->ir_base[ctx->entries[--i]];
ir_hashtab_add(&labels, insn->op3, insn->op2);
} while (i != 0);
}
rodata_offset = ctx->rodata_offset;
if (rodata_offset) {
if (size > rodata_offset) {
size = rodata_offset;
}
}
jmp_table_offset = ctx->jmp_table_offset;
if (jmp_table_offset) {
uint32_t n;
uintptr_t *p;
IR_ASSERT(orig_size - jmp_table_offset <= 0xffffffff);
n = (uint32_t)(orig_size - jmp_table_offset);
if (size > jmp_table_offset) {
size = jmp_table_offset;
}
while (n > 0 && IR_ALIGNED_SIZE(n, sizeof(void*)) != n) {
jmp_table_offset++;
n--;
}
IR_ASSERT(n > 0 && n % sizeof(void*) == 0 && jmp_table_offset % sizeof(void*) == 0);
p = (uintptr_t*)((char*)start + jmp_table_offset);
while (n > 0) {
if (*p) {
if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) {
ir_hashtab_add(&labels, (uint32_t)((uintptr_t)*p - (uintptr_t)start), -1);
}
}
p++;
n -= sizeof(void*);
}
}
}
end = (void *)((char *)start + size);
# ifdef HAVE_CAPSTONE_ITER
cs_code = start;
cs_size = (uint8_t*)end - (uint8_t*)start;
cs_addr = (uint64_t)(uintptr_t)cs_code;
insn = cs_malloc(cs);
while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
if ((addr = ir_disasm_branch_target(cs, insn))
# else
count = cs_disasm(cs, start, (uint8_t*)end - (uint8_t*)start, (uintptr_t)start, 0, &insn);
for (i = 0; i < count; i++) {
if ((addr = ir_disasm_branch_target(cs, &(insn[i])))
# endif
&& (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)end)) {
ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1);
# ifdef HAVE_CAPSTONE_ITER
} else if ((addr = ir_disasm_rodata_reference(cs, insn))) {
# else
} else if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
# endif
if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) {
ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1);
}
}
}
ir_hashtab_key_sort(&labels);
/* renumber labels */
l = 0;
n = labels.count;
b = labels.data;
while (n > 0) {
if (b->val < 0) {
b->val = --l;
}
b++;
n--;
}
# ifdef HAVE_CAPSTONE_ITER
cs_code = start;
cs_size = (uint8_t*)end - (uint8_t*)start;
cs_addr = (uint64_t)(uintptr_t)cs_code;
while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start));
# else
for (i = 0; i < count; i++) {
entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start));
# endif
if (entry != (ir_ref)IR_INVALID_VAL) {
if (entry >= 0) {
fprintf(f, ".ENTRY_%d:\n", entry);
} else {
fprintf(f, ".L%d:\n", -entry);
}
}
# ifdef HAVE_CAPSTONE_ITER
if (asm_addr) {
fprintf(f, " %" PRIx64 ":", insn->address);
}
p = insn->op_str;
#if defined(IR_TARGET_X64) && (CS_API_MAJOR < 5)
/* Fix capstone MOVD/MOVQ disassemble mismatch */
if (insn->id == X86_INS_MOVQ && strcmp(insn->mnemonic, "movd") == 0) {
insn->mnemonic[3] = 'q';
}
#endif
if (strlen(p) == 0) {
fprintf(f, "\t%s\n", insn->mnemonic);
continue;
} else {
fprintf(f, "\t%s ", insn->mnemonic);
}
# else
if (asm_addr) {
fprintf(f, " %" PRIx64 ":", insn[i].address);
}
p = insn[i].op_str;
if (strlen(p) == 0) {
fprintf(f, "\t%s\n", insn[i].mnemonic);
continue;
} else {
fprintf(f, "\t%s ", insn[i].mnemonic);
}
# endif
/* Try to replace the target addresses with a symbols */
#if defined(IR_TARGET_X64)
# ifdef HAVE_CAPSTONE_ITER
if ((addr = ir_disasm_rodata_reference(cs, insn))) {
# else
if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
# endif
if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) {
entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start));
if (entry != (ir_ref)IR_INVALID_VAL) {
r = q = strstr(p, "(%rip)");
if (r && r > p) {
r--;
while (r > p && ((*r >= '0' && *r <= '9') || (*r >= 'a' && *r <= 'f') || (*r >= 'A' && *r <= 'F'))) {
r--;
}
if (r > p && *r == 'x' && *(r - 1) == '0') {
r -= 2;
}
if (r > p) {
fwrite(p, 1, r - p, f);
}
if (entry >= 0) {
fprintf(f, ".ENTRY_%d%s\n", entry, q);
} else {
fprintf(f, ".L%d%s\n", -entry, q);
}
continue;
}
}
}
}
#endif
#if defined(IR_TARGET_AARCH64)
while ((q = strstr(p, "#0x")) != NULL) {
r = q + 3;
#else
while ((q = strstr(p, "0x")) != NULL) {
r = q + 2;
#endif
addr = 0;
while (1) {
if (*r >= '0' && *r <= '9') {
addr = addr * 16 + (*r - '0');
} else if (*r >= 'A' && *r <= 'F') {
addr = addr * 16 + (*r - 'A' + 10);
} else if (*r >= 'a' && *r <= 'f') {
addr = addr * 16 + (*r - 'a' + 10);
} else {
break;
}
r++;
}
if (p != q && *(q-1) == '-') {
q--;
addr = (uint32_t)(-(int64_t)addr);
}
if (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)orig_end) {
entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start));
if (entry != (ir_ref)IR_INVALID_VAL) {
fwrite(p, 1, q - p, f);
if (entry >= 0) {
fprintf(f, ".ENTRY_%d", entry);
} else {
fprintf(f, ".L%d", -entry);
}
} else if (r > p) {
fwrite(p, 1, r - p, f);
}
} else if ((sym = ir_disasm_resolver(addr, &offset))) {
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
if (offset && p != q && *(q-1) == '$') {
if (r > p) {
fwrite(p, 1, r - p, f);
}
p = r;
continue;
}
#endif
if (q > p) {
fwrite(p, 1, q - p, f);
}
fputs(sym, f);
if (offset != 0) {
if (offset > 0) {
fprintf(f, "+0x%" PRIx64, offset);
} else {
fprintf(f, "-0x%" PRIx64, -offset);
}
}
} else if (r > p) {
fwrite(p, 1, r - p, f);
}
p = r;
}
fprintf(f, "%s\n", p);
}
# ifdef HAVE_CAPSTONE_ITER
cs_free(insn, 1);
# else
cs_free(insn, count);
# endif
if (rodata_offset || jmp_table_offset) {
fprintf(f, ".rodata\n");
}
if (rodata_offset) {
const unsigned char *p = (unsigned char*)start + rodata_offset;
uint32_t n = jmp_table_offset ?
(uint32_t)(jmp_table_offset - rodata_offset) :
(uint32_t)(orig_size - rodata_offset);
uint32_t j;
while (n > 0) {
entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
if (entry != (ir_ref)IR_INVALID_VAL) {
if (entry >= 0) {
fprintf(f, ".ENTRY_%d:\n", entry);
} else {
fprintf(f, ".L%d:\n", -entry);
}
}
fprintf(f, "\t.db 0x%02x", (int)*p);
p++;
n--;
j = 15;
while (n > 0 && j > 0) {
entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
if (entry != (ir_ref)IR_INVALID_VAL) {
break;
}
fprintf(f, ", 0x%02x", (int)*p);
p++;
n--;
j--;
}
fprintf(f, "\n");
}
}
if (jmp_table_offset) {
uintptr_t *p = (uintptr_t*)(unsigned char*)start + jmp_table_offset;
uint32_t n = (uint32_t)(orig_size - jmp_table_offset);
fprintf(f, ".align %d\n", (int)sizeof(void*));
p = (uintptr_t*)((char*)start + jmp_table_offset);
while (n > 0) {
entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
if (entry != (ir_ref)IR_INVALID_VAL) {
if (entry >= 0) {
fprintf(f, ".ENTRY_%d:\n", entry);
} else {
fprintf(f, ".L%d:\n", -entry);
}
}
if (*p) {
if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) {
entry = ir_hashtab_find(&labels, (uint32_t)(*p - (uintptr_t)start));
IR_ASSERT(entry != (ir_ref)IR_INVALID_VAL);
if (entry >= 0) {
if (sizeof(void*) == 8) {
fprintf(f, "\t.qword .ENTRY_%d\n", entry);
} else {
fprintf(f, "\t.dword .ENTRY_%d\n", entry);
}
} else {
if (sizeof(void*) == 8) {
fprintf(f, "\t.qword .L%d\n", -entry);
} else {
fprintf(f, "\t.dword .L%d\n", -entry);
}
}
} else {
int64_t offset;
const char *name = ir_disasm_find_symbol(*p, &offset);
if (name && offset == 0) {
if (sizeof(void*) == 8) {
fprintf(f, "\t.qword %s\n", name);
} else {
fprintf(f, "\t.dword %s\n", name);
}
} else {
if (sizeof(void*) == 8) {
fprintf(f, "\t.qword 0x%0llx\n", (long long)*p);
} else {
fprintf(f, "\t.dword 0x%0x\n", (int)*p);
}
}
}
} else {
if (sizeof(void*) == 8) {
fprintf(f, "\t.qword 0\n");
} else {
fprintf(f, "\t.dword 0\n");
}
}
p++;
n -= sizeof(void*);
}
}
fprintf(f, "\n");
ir_hashtab_free(&labels);
cs_close(&cs);
return 1;
}
#ifndef _WIN32
static void* ir_elf_read_sect(int fd, ir_elf_sectheader *sect)
{
void *s = ir_mem_malloc(sect->size);
if (lseek(fd, sect->ofs, SEEK_SET) < 0) {
ir_mem_free(s);
return NULL;
}
if (read(fd, s, sect->size) != (ssize_t)sect->size) {
ir_mem_free(s);
return NULL;
}
return s;
}
static void ir_elf_load_symbols(void)
{
ir_elf_header hdr;
ir_elf_sectheader sect;
int i;
#if defined(__linux__)
int fd = open("/proc/self/exe", O_RDONLY);
#elif defined(__NetBSD__)
int fd = open("/proc/curproc/exe", O_RDONLY);
#elif defined(__FreeBSD__) || defined(__DragonFly__)
char path[PATH_MAX];
size_t pathlen = sizeof(path);
int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
if (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) {
return;
}
int fd = open(path, O_RDONLY);
#elif defined(__sun)
int fd = open("/proc/self/path/a.out", O_RDONLY);
#elif defined(__HAIKU__)
char path[PATH_MAX];
if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH,
NULL, path, sizeof(path)) != B_OK) {
return;
}
int fd = open(path, O_RDONLY);
#else
// To complete eventually for other ELF platforms.
// Otherwise APPLE is Mach-O
int fd = -1;
#endif
if (fd >= 0) {
if (read(fd, &hdr, sizeof(hdr)) == sizeof(hdr)
&& hdr.emagic[0] == '\177'
&& hdr.emagic[1] == 'E'
&& hdr.emagic[2] == 'L'
&& hdr.emagic[3] == 'F'
&& lseek(fd, hdr.shofs, SEEK_SET) >= 0) {
for (i = 0; i < hdr.shnum; i++) {
if (read(fd, &sect, sizeof(sect)) == sizeof(sect)
&& sect.type == ELFSECT_TYPE_SYMTAB) {
uint32_t n, count = sect.size / sizeof(ir_elf_symbol);
ir_elf_symbol *syms = ir_elf_read_sect(fd, &sect);
char *str_tbl;
if (syms) {
if (lseek(fd, hdr.shofs + sect.link * sizeof(sect), SEEK_SET) >= 0
&& read(fd, &sect, sizeof(sect)) == sizeof(sect)
&& (str_tbl = (char*)ir_elf_read_sect(fd, &sect)) != NULL) {
for (n = 0; n < count; n++) {
if (syms[n].name
&& (ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_FUNC
/*|| ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_DATA*/)
&& (ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_LOCAL
/*|| ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_GLOBAL*/)) {
ir_disasm_add_symbol(str_tbl + syms[n].name, syms[n].value, syms[n].size);
}
}
ir_mem_free(str_tbl);
}
ir_mem_free(syms);
}
if (lseek(fd, hdr.shofs + (i + 1) * sizeof(sect), SEEK_SET) < 0) {
break;
}
}
}
}
close(fd);
}
}
#endif
int ir_disasm_init(void)
{
#ifndef _WIN32
ir_elf_load_symbols();
#endif
return 1;
}
void ir_disasm_free(void)
{
if (_symbols) {
ir_disasm_destroy_symbols(_symbols);
_symbols = NULL;
}
}

View File

@@ -0,0 +1,713 @@
/*
* IR - Lightweight JIT Compilation Framework
* (debug dumps)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#include "ir.h"
#include "ir_private.h"
void ir_dump(const ir_ctx *ctx, FILE *f)
{
ir_ref i, j, n, ref, *p;
ir_insn *insn;
uint32_t flags;
for (i = 1 - ctx->consts_count, insn = ctx->ir_base + i; i < IR_UNUSED; i++, insn++) {
fprintf(f, "%05d %s %s(", i, ir_op_name[insn->op], ir_type_name[insn->type]);
ir_print_const(ctx, insn, f, true);
fprintf(f, ")\n");
}
for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count; i++, insn++) {
flags = ir_op_flags[insn->op];
fprintf(f, "%05d %s", i, ir_op_name[insn->op]);
if ((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) {
fprintf(f, " %s", ir_type_name[insn->type]);
}
n = ir_operands_count(ctx, insn);
for (j = 1, p = insn->ops + 1; j <= 3; j++, p++) {
ref = *p;
if (ref) {
fprintf(f, " %05d", ref);
}
}
if (n > 3) {
n -= 3;
do {
i++;
insn++;
fprintf(f, "\n%05d", i);
for (j = 0; j < 4; j++, p++) {
ref = *p;
if (ref) {
fprintf(f, " %05d", ref);
}
}
n -= 4;
} while (n > 0);
}
fprintf(f, "\n");
}
}
void ir_dump_dot(const ir_ctx *ctx, FILE *f)
{
int DATA_WEIGHT = 0;
int CONTROL_WEIGHT = 5;
int REF_WEIGHT = 4;
ir_ref i, j, n, ref, *p;
ir_insn *insn;
uint32_t flags;
fprintf(f, "digraph ir {\n");
fprintf(f, "\trankdir=TB;\n");
for (i = 1 - ctx->consts_count, insn = ctx->ir_base + i; i < IR_UNUSED; i++, insn++) {
fprintf(f, "\tc%d [label=\"C%d: CONST %s(", -i, -i, ir_type_name[insn->type]);
/* FIXME(tony): We still cannot handle strings with escaped double quote inside */
ir_print_const(ctx, insn, f, false);
fprintf(f, ")\",style=filled,fillcolor=yellow];\n");
}
for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) {
flags = ir_op_flags[insn->op];
if (flags & IR_OP_FLAG_CONTROL) {
if (insn->op == IR_START) {
fprintf(f, "\t{rank=min; n%d [label=\"%d: %s\",shape=box,style=\"rounded,filled\",fillcolor=red];}\n", i, i, ir_op_name[insn->op]);
} else if (insn->op == IR_ENTRY) {
fprintf(f, "\t{n%d [label=\"%d: %s\",shape=box,style=\"rounded,filled\",fillcolor=red];}\n", i, i, ir_op_name[insn->op]);
} else if (flags & IR_OP_FLAG_TERMINATOR) {
fprintf(f, "\t{rank=max; n%d [label=\"%d: %s\",shape=box,style=\"rounded,filled\",fillcolor=red];}\n", i, i, ir_op_name[insn->op]);
} else if (flags & IR_OP_FLAG_MEM) {
fprintf(f, "\tn%d [label=\"%d: %s\",shape=box,style=filled,fillcolor=pink];\n", i, i, ir_op_name[insn->op]);
} else {
fprintf(f, "\tn%d [label=\"%d: %s\",shape=box,style=filled,fillcolor=lightcoral];\n", i, i, ir_op_name[insn->op]);
}
} else if (flags & IR_OP_FLAG_DATA) {
if (IR_OPND_KIND(flags, 1) == IR_OPND_DATA) {
/* not a leaf */
fprintf(f, "\tn%d [label=\"%d: %s\"", i, i, ir_op_name[insn->op]);
fprintf(f, ",shape=diamond,style=filled,fillcolor=deepskyblue];\n");
} else {
if (insn->op == IR_PARAM) {
fprintf(f, "\tn%d [label=\"%d: %s %s \\\"%s\\\"\",style=filled,fillcolor=lightblue];\n",
i, i, ir_op_name[insn->op], ir_type_name[insn->type], ir_get_str(ctx, insn->op2));
} else if (insn->op == IR_VAR) {
fprintf(f, "\tn%d [label=\"%d: %s %s \\\"%s\\\"\"];\n", i, i, ir_op_name[insn->op], ir_type_name[insn->type], ir_get_str(ctx, insn->op2));
} else {
fprintf(f, "\tn%d [label=\"%d: %s %s\",style=filled,fillcolor=deepskyblue];\n", i, i, ir_op_name[insn->op], ir_type_name[insn->type]);
}
}
}
n = ir_operands_count(ctx, insn);
for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
ref = *p;
if (ref) {
switch (IR_OPND_KIND(flags, j)) {
case IR_OPND_DATA:
if (IR_IS_CONST_REF(ref)) {
fprintf(f, "\tc%d -> n%d [color=blue,weight=%d];\n", -ref, i, DATA_WEIGHT);
} else if (insn->op == IR_PHI
&& ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN
&& ctx->ir_base[ir_insn_op(&ctx->ir_base[insn->op1], j - 1)].op == IR_LOOP_END) {
fprintf(f, "\tn%d -> n%d [color=blue,dir=back];\n", i, ref);
} else {
fprintf(f, "\tn%d -> n%d [color=blue,weight=%d];\n", ref, i, DATA_WEIGHT);
}
break;
case IR_OPND_CONTROL:
if (insn->op == IR_LOOP_BEGIN && ctx->ir_base[ref].op == IR_LOOP_END) {
fprintf(f, "\tn%d -> n%d [style=bold,color=red,dir=back];\n", i, ref);
} else if (insn->op == IR_ENTRY) {
fprintf(f, "\tn%d -> n%d [style=bold,color=red,style=dashed,weight=%d];\n", ref, i, CONTROL_WEIGHT);
} else {
fprintf(f, "\tn%d -> n%d [style=bold,color=red,weight=%d];\n", ref, i, CONTROL_WEIGHT);
}
break;
case IR_OPND_CONTROL_DEP:
case IR_OPND_CONTROL_REF:
fprintf(f, "\tn%d -> n%d [style=dashed,dir=back,weight=%d];\n", ref, i, REF_WEIGHT);
break;
}
}
}
n = ir_insn_inputs_to_len(n);
i += n;
insn += n;
}
fprintf(f, "}\n");
}
void ir_dump_use_lists(const ir_ctx *ctx, FILE *f)
{
ir_ref i, j, n, *p;
ir_use_list *list;
if (ctx->use_lists) {
fprintf(f, "{ # Use Lists\n");
for (i = 1, list = &ctx->use_lists[1]; i < ctx->insns_count; i++, list++) {
n = list->count;
if (n > 0) {
p = &ctx->use_edges[list->refs];
fprintf(f, "%05d(%d): [%05d", i, n, *p);
p++;
for (j = 1; j < n; j++, p++) {
fprintf(f, ", %05d", *p);
}
fprintf(f, "]\n");
}
}
fprintf(f, "}\n");
}
}
static int ir_dump_dessa_move(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
{
FILE *f = ctx->data;
int8_t reg;
if (IR_IS_CONST_REF(from)) {
fprintf(f, "\tmov c_%d -> ", -from);
} else if (from) {
fprintf(f, "\tmov R%d", ctx->vregs[from]);
if (ctx->live_intervals && ctx->live_intervals[ctx->vregs[from]]) {
reg = ctx->live_intervals[ctx->vregs[from]]->reg;
if (reg >= 0) {
fprintf(f, " [%%%s]", ir_reg_name(reg, type));
}
}
fprintf(f, " -> ");
} else {
fprintf(f, "\tmov TMP -> ");
}
if (to) {
fprintf(f, "R%d", ctx->vregs[to]);
if (ctx->live_intervals && ctx->live_intervals[ctx->vregs[to]]) {
reg = ctx->live_intervals[ctx->vregs[to]]->reg;
if (reg >= 0) {
fprintf(f, " [%%%s]", ir_reg_name(reg, type));
}
}
fprintf(f, "\n");
} else {
fprintf(f, "TMP\n");
}
return 1;
}
void ir_dump_cfg(ir_ctx *ctx, FILE *f)
{
if (ctx->cfg_blocks) {
uint32_t b, i, bb_count = ctx->cfg_blocks_count;
ir_block *bb = ctx->cfg_blocks + 1;
fprintf(f, "{ # CFG\n");
for (b = 1; b <= bb_count; b++, bb++) {
fprintf(f, "BB%d:\n", b);
fprintf(f, "\tstart=%d\n", bb->start);
fprintf(f, "\tend=%d\n", bb->end);
if (bb->successors_count) {
fprintf(f, "\tsuccessors(%d) [BB%d", bb->successors_count, ctx->cfg_edges[bb->successors]);
for (i = 1; i < bb->successors_count; i++) {
fprintf(f, ", BB%d", ctx->cfg_edges[bb->successors + i]);
}
fprintf(f, "]\n");
}
if (bb->predecessors_count) {
fprintf(f, "\tpredecessors(%d) [BB%d", bb->predecessors_count, ctx->cfg_edges[bb->predecessors]);
for (i = 1; i < bb->predecessors_count; i++) {
fprintf(f, ", BB%d", ctx->cfg_edges[bb->predecessors + i]);
}
fprintf(f, "]\n");
}
if (bb->dom_parent > 0) {
fprintf(f, "\tdom_parent=BB%d\n", bb->dom_parent);
}
fprintf(f, "\tdom_depth=%d\n", bb->dom_depth);
if (bb->dom_child > 0) {
int child = bb->dom_child;
fprintf(f, "\tdom_children [BB%d", child);
child = ctx->cfg_blocks[child].dom_next_child;
while (child > 0) {
fprintf(f, ", BB%d", child);
child = ctx->cfg_blocks[child].dom_next_child;
}
fprintf(f, "]\n");
}
if (bb->flags & IR_BB_ENTRY) {
fprintf(f, "\tENTRY\n");
}
if (bb->flags & IR_BB_UNREACHABLE) {
fprintf(f, "\tUNREACHABLE\n");
}
if (bb->flags & IR_BB_LOOP_HEADER) {
if (bb->flags & IR_BB_LOOP_WITH_ENTRY) {
fprintf(f, "\tLOOP_HEADER, LOOP_WITH_ENTRY\n");
} else {
fprintf(f, "\tLOOP_HEADER\n");
}
}
if (bb->flags & IR_BB_IRREDUCIBLE_LOOP) {
fprintf(stderr, "\tIRREDUCIBLE_LOOP\n");
}
if (bb->loop_header > 0) {
fprintf(f, "\tloop_header=BB%d\n", bb->loop_header);
}
if (bb->loop_depth != 0) {
fprintf(f, "\tloop_depth=%d\n", bb->loop_depth);
}
if (bb->flags & IR_BB_OSR_ENTRY_LOADS) {
ir_list *list = (ir_list*)ctx->osr_entry_loads;
uint32_t pos = 0, i, count;
IR_ASSERT(list);
while (1) {
i = ir_list_at(list, pos);
if (b == i) {
break;
}
IR_ASSERT(i != 0); /* end marker */
pos++;
count = ir_list_at(list, pos);
pos += count + 1;
}
pos++;
count = ir_list_at(list, pos);
pos++;
for (i = 0; i < count; i++, pos++) {
ir_ref ref = ir_list_at(list, pos);
fprintf(f, "\tOSR_ENTRY_LOAD=d_%d\n", ref);
}
}
if (bb->flags & IR_BB_DESSA_MOVES) {
ctx->data = f;
ir_gen_dessa_moves(ctx, b, ir_dump_dessa_move);
}
}
fprintf(f, "}\n");
}
}
void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f)
{
ir_ref i;
uint32_t *_blocks = ctx->cfg_map;
if (_blocks) {
fprintf(f, "{ # CFG map (insn -> bb)\n");
for (i = IR_UNUSED + 1; i < ctx->insns_count; i++) {
fprintf(f, "%d -> %d\n", i, _blocks[i]);
}
fprintf(f, "}\n");
}
}
void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f)
{
ir_ref i, j, n;
if (!ctx->live_intervals) {
return;
}
fprintf(f, "{ # LIVE-RANGES (vregs_count=%d)\n", ctx->vregs_count);
for (i = 0; i <= ctx->vregs_count; i++) {
ir_live_interval *ival = ctx->live_intervals[i];
if (ival) {
ir_live_range *p;
ir_use_pos *use_pos;
if (i == 0) {
fprintf(f, "TMP");
} else {
for (j = 1; j < ctx->insns_count; j++) {
if (ctx->vregs[j] == (uint32_t)i) {
break;
}
}
fprintf(f, "R%d (d_%d", i, j);
for (j++; j < ctx->insns_count; j++) {
if (ctx->vregs[j] == (uint32_t)i) {
fprintf(f, ", d_%d", j);
}
}
fprintf(f, ")");
if (ival->stack_spill_pos != -1) {
if (ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
IR_ASSERT(ctx->spill_base >= 0);
fprintf(f, " [SPILL=0x%x(%%%s)]", ival->stack_spill_pos, ir_reg_name(ctx->spill_base, IR_ADDR));
} else {
fprintf(f, " [SPILL=0x%x]", ival->stack_spill_pos);
}
}
}
if (ival->next) {
fprintf(f, "\n\t");
} else if (ival->reg != IR_REG_NONE) {
fprintf(f, " ");
}
do {
if (ival->reg != IR_REG_NONE) {
fprintf(f, "[%%%s]", ir_reg_name(ival->reg, ival->type));
}
p = &ival->range;
fprintf(f, ": [%d.%d-%d.%d)",
IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start),
IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end));
if (i == 0) {
/* This is a TMP register */
if (ival->tmp_ref == IR_LIVE_POS_TO_REF(p->start)) {
fprintf(f, "/%d", ival->tmp_op_num);
} else {
fprintf(f, "/%d.%d", ival->tmp_ref, ival->tmp_op_num);
}
} else {
p = p->next;
while (p) {
fprintf(f, ", [%d.%d-%d.%d)",
IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start),
IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end));
p = p->next;
}
}
use_pos = ival->use_pos;
while (use_pos) {
if (use_pos->flags & IR_PHI_USE) {
IR_ASSERT(use_pos->op_num > 0);
fprintf(f, ", PHI_USE(%d.%d, phi=d_%d/%d)",
IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos),
-use_pos->hint_ref, use_pos->op_num);
} else if (use_pos->flags & IR_FUSED_USE) {
fprintf(f, ", USE(%d.%d/%d.%d",
IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos),
-use_pos->hint_ref, use_pos->op_num);
if (use_pos->hint >= 0) {
fprintf(f, ", hint=%%%s", ir_reg_name(use_pos->hint, ival->type));
}
fprintf(f, ")");
if (use_pos->flags & IR_USE_MUST_BE_IN_REG) {
fprintf(f, "!");
}
} else {
if (!use_pos->op_num) {
fprintf(f, ", DEF(%d.%d",
IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos));
} else {
fprintf(f, ", USE(%d.%d/%d",
IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos),
use_pos->op_num);
}
if (use_pos->hint >= 0) {
fprintf(f, ", hint=%%%s", ir_reg_name(use_pos->hint, ival->type));
}
if (use_pos->hint_ref) {
fprintf(f, ", hint=R%d", ctx->vregs[use_pos->hint_ref]);
}
fprintf(f, ")");
if (use_pos->flags & IR_USE_MUST_BE_IN_REG) {
fprintf(f, "!");
}
}
use_pos = use_pos->next;
}
if (ival->next) {
fprintf(f, "\n\t");
}
ival = ival->next;
} while (ival);
fprintf(f, "\n");
}
}
#if 1
n = ctx->vregs_count + ir_regs_number() + 2;
for (i = ctx->vregs_count + 1; i <= n; i++) {
ir_live_interval *ival = ctx->live_intervals[i];
if (ival) {
ir_live_range *p = &ival->range;
fprintf(f, "[%%%s] : [%d.%d-%d.%d)",
ir_reg_name(ival->reg, ival->type),
IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start),
IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end));
p = p->next;
while (p) {
fprintf(f, ", [%d.%d-%d.%d)",
IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start),
IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end));
p = p->next;
}
fprintf(f, "\n");
}
}
#endif
fprintf(f, "}\n");
}
void ir_dump_codegen(const ir_ctx *ctx, FILE *f)
{
ir_ref i, j, n, ref, *p;
ir_insn *insn;
uint32_t flags, b;
ir_block *bb;
bool first;
fprintf(f, "{\n");
for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) {
fprintf(f, "\t%s c_%d = ", ir_type_cname[insn->type], i);
if (insn->op == IR_FUNC) {
if (!insn->const_flags) {
fprintf(f, "func(%s)", ir_get_str(ctx, insn->val.i32));
} else {
fprintf(f, "func(%s, %d)", ir_get_str(ctx, insn->val.i32), insn->const_flags);
}
} else if (insn->op == IR_SYM) {
fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.i32));
} else if (insn->op == IR_FUNC_ADDR) {
fprintf(f, "func_addr(");
ir_print_const(ctx, insn, f, true);
if (insn->const_flags) {
fprintf(f, ", %d", insn->const_flags);
}
fprintf(f, ")");
} else {
ir_print_const(ctx, insn, f, true);
}
fprintf(f, ";\n");
}
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
fprintf(f, "#BB%d:\n", b);
for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) {
flags = ir_op_flags[insn->op];
if (flags & IR_OP_FLAG_CONTROL) {
if (!(flags & IR_OP_FLAG_MEM) || insn->type == IR_VOID) {
fprintf(f, "\tl_%d = ", i);
} else {
fprintf(f, "\t%s d_%d", ir_type_cname[insn->type], i);
if (ctx->vregs && ctx->vregs[i]) {
fprintf(f, " {R%d}", ctx->vregs[i]);
}
if (ctx->regs) {
int8_t reg = ctx->regs[i][0];
if (reg != IR_REG_NONE) {
fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), insn->type),
(reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : "");
}
}
fprintf(f, ", l_%d = ", i);
}
} else {
fprintf(f, "\t");
if (flags & IR_OP_FLAG_DATA) {
fprintf(f, "%s d_%d", ir_type_cname[insn->type], i);
if (ctx->vregs && ctx->vregs[i]) {
fprintf(f, " {R%d}", ctx->vregs[i]);
}
if (ctx->regs) {
int8_t reg = ctx->regs[i][0];
if (reg != IR_REG_NONE) {
fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), insn->type),
(reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : "");
}
}
fprintf(f, " = ");
}
}
fprintf(f, "%s", ir_op_name[insn->op]);
n = ir_operands_count(ctx, insn);
if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) && n != 2) {
fprintf(f, "/%d", n);
} else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL) && n != 2) {
fprintf(f, "/%d", n - 2);
} else if (insn->op == IR_PHI && n != 3) {
fprintf(f, "/%d", n - 1);
} else if (insn->op == IR_SNAPSHOT) {
fprintf(f, "/%d", n - 1);
}
first = 1;
for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
uint32_t opnd_kind = IR_OPND_KIND(flags, j);
ref = *p;
if (ref) {
switch (opnd_kind) {
case IR_OPND_DATA:
if (IR_IS_CONST_REF(ref)) {
fprintf(f, "%sc_%d", first ? "(" : ", ", -ref);
} else {
fprintf(f, "%sd_%d", first ? "(" : ", ", ref);
}
if (ctx->vregs && ref > 0 && ctx->vregs[ref]) {
fprintf(f, " {R%d}", ctx->vregs[ref]);
}
if (ctx->regs) {
int8_t *regs = ctx->regs[i];
int8_t reg = regs[j];
if (reg != IR_REG_NONE) {
fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[ref].type),
(reg & (IR_REG_SPILL_LOAD|IR_REG_SPILL_SPECIAL)) ? ":load" : "");
}
}
first = 0;
break;
case IR_OPND_CONTROL:
case IR_OPND_CONTROL_DEP:
case IR_OPND_CONTROL_REF:
fprintf(f, "%sl_%d", first ? "(" : ", ", ref);
first = 0;
break;
case IR_OPND_STR:
fprintf(f, "%s\"%s\"", first ? "(" : ", ", ir_get_str(ctx, ref));
first = 0;
break;
case IR_OPND_PROB:
if (ref == 0) {
break;
}
IR_FALLTHROUGH;
case IR_OPND_NUM:
fprintf(f, "%s%d", first ? "(" : ", ", ref);
first = 0;
break;
}
} else if (opnd_kind == IR_OPND_NUM) {
fprintf(f, "%s%d", first ? "(" : ", ", ref);
first = 0;
} else if (IR_IS_REF_OPND_KIND(opnd_kind) && j != n) {
fprintf(f, "%snull", first ? "(" : ", ");
first = 0;
}
}
if (first) {
fprintf(f, ";");
} else {
fprintf(f, ");");
}
if (((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) && ctx->binding) {
ir_ref var = ir_binding_find(ctx, i);
if (var) {
IR_ASSERT(var < 0);
fprintf(f, " # BIND(0x%x);", -var);
}
}
if (ctx->rules) {
uint32_t rule = ctx->rules[i];
uint32_t id = rule & ~(IR_FUSED|IR_SKIPPED|IR_SIMPLE);
if (id < IR_LAST_OP) {
fprintf(f, " # RULE(%s", ir_op_name[id]);
} else {
IR_ASSERT(id > IR_LAST_OP /*&& id < IR_LAST_RULE*/);
fprintf(f, " # RULE(%s", ir_rule_name[id - IR_LAST_OP]);
}
if (rule & IR_FUSED) {
fprintf(f, ":FUSED");
}
if (rule & IR_SKIPPED) {
fprintf(f, ":SKIPPED");
}
if (rule & IR_SIMPLE) {
fprintf(f, ":SIMPLE");
}
fprintf(f, ")");
}
fprintf(f, "\n");
n = ir_insn_inputs_to_len(n);
i += n;
insn += n;
}
if (bb->flags & IR_BB_DESSA_MOVES) {
uint32_t succ;
ir_block *succ_bb;
ir_use_list *use_list;
ir_ref k, i, *p, use_ref, input;
ir_insn *use_insn;
IR_ASSERT(bb->successors_count == 1);
succ = ctx->cfg_edges[bb->successors];
succ_bb = &ctx->cfg_blocks[succ];
IR_ASSERT(succ_bb->predecessors_count > 1);
use_list = &ctx->use_lists[succ_bb->start];
k = ir_phi_input_number(ctx, succ_bb, b);
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
use_ref = *p;
use_insn = &ctx->ir_base[use_ref];
if (use_insn->op == IR_PHI) {
input = ir_insn_op(use_insn, k);
if (IR_IS_CONST_REF(input)) {
fprintf(f, "\t# DESSA MOV c_%d", -input);
} else if (ctx->vregs[input] != ctx->vregs[use_ref]) {
fprintf(f, "\t# DESSA MOV d_%d {R%d}", input, ctx->vregs[input]);
} else {
continue;
}
if (ctx->regs) {
int8_t *regs = ctx->regs[use_ref];
int8_t reg = regs[k];
if (reg != IR_REG_NONE) {
fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[input].type),
(reg & (IR_REG_SPILL_LOAD|IR_REG_SPILL_SPECIAL)) ? ":load" : "");
}
}
fprintf(f, " -> d_%d {R%d}", use_ref, ctx->vregs[use_ref]);
if (ctx->regs) {
int8_t reg = ctx->regs[use_ref][0];
if (reg != IR_REG_NONE) {
fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[use_ref].type),
(reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : "");
}
}
fprintf(f, "\n");
}
}
}
insn = &ctx->ir_base[bb->end];
if (insn->op == IR_END || insn->op == IR_LOOP_END) {
uint32_t succ;
if (bb->successors_count == 1) {
succ = ctx->cfg_edges[bb->successors];
} else {
/* END may have a fake control edge to ENTRY */
IR_ASSERT(bb->successors_count == 2);
succ = ctx->cfg_edges[bb->successors];
if (ctx->ir_base[ctx->cfg_blocks[succ].start].op == IR_ENTRY) {
succ = ctx->cfg_edges[bb->successors + 1];
#ifdef IR_DEBUG
} else {
uint32_t fake_succ = ctx->cfg_edges[bb->successors + 1];
IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[fake_succ].start].op == IR_ENTRY);
#endif
}
}
if (succ != b + 1) {
fprintf(f, "\t# GOTO BB%d\n", succ);
}
} else if (insn->op == IR_IF) {
uint32_t true_block, false_block, *p;
p = &ctx->cfg_edges[bb->successors];
true_block = *p;
if (ctx->ir_base[ctx->cfg_blocks[true_block].start].op == IR_IF_TRUE) {
false_block = *(p+1);
IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[false_block].start].op == IR_IF_FALSE);
} else {
false_block = true_block;
IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[false_block].start].op == IR_IF_FALSE);
true_block = *(p+1);
IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[true_block].start].op == IR_IF_TRUE);
}
fprintf(f, "\t# IF_TRUE BB%d, IF_FALSE BB%d\n", true_block, false_block);
} else if (insn->op == IR_SWITCH) {
fprintf(f, "\t# SWITCH ...\n");
}
}
fprintf(f, "}\n");
}

101
ext/opcache/jit/ir/ir_elf.h Normal file
View File

@@ -0,0 +1,101 @@
/*
* IR - Lightweight JIT Compilation Framework
* (ELF header definitions)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#ifndef IR_ELF
#define IR_ELF
#if defined(IR_TARGET_X64) || defined(IR_TARGET_AARCH64)
# define ELF64
#else
# undef ELF64
#endif
typedef struct _ir_elf_header {
uint8_t emagic[4];
uint8_t eclass;
uint8_t eendian;
uint8_t eversion;
uint8_t eosabi;
uint8_t eabiversion;
uint8_t epad[7];
uint16_t type;
uint16_t machine;
uint32_t version;
uintptr_t entry;
uintptr_t phofs;
uintptr_t shofs;
uint32_t flags;
uint16_t ehsize;
uint16_t phentsize;
uint16_t phnum;
uint16_t shentsize;
uint16_t shnum;
uint16_t shstridx;
} ir_elf_header;
typedef struct ir_elf_sectheader {
uint32_t name;
uint32_t type;
uintptr_t flags;
uintptr_t addr;
uintptr_t ofs;
uintptr_t size;
uint32_t link;
uint32_t info;
uintptr_t align;
uintptr_t entsize;
} ir_elf_sectheader;
#define ELFSECT_IDX_ABS 0xfff1
enum {
ELFSECT_TYPE_PROGBITS = 1,
ELFSECT_TYPE_SYMTAB = 2,
ELFSECT_TYPE_STRTAB = 3,
ELFSECT_TYPE_NOBITS = 8,
ELFSECT_TYPE_DYNSYM = 11,
};
#define ELFSECT_FLAGS_WRITE (1 << 0)
#define ELFSECT_FLAGS_ALLOC (1 << 1)
#define ELFSECT_FLAGS_EXEC (1 << 2)
#define ELFSECT_FLAGS_TLS (1 << 10)
typedef struct ir_elf_symbol {
#ifdef ELF64
uint32_t name;
uint8_t info;
uint8_t other;
uint16_t sectidx;
uintptr_t value;
uint64_t size;
#else
uint32_t name;
uintptr_t value;
uint32_t size;
uint8_t info;
uint8_t other;
uint16_t sectidx;
#endif
} ir_elf_symbol;
#define ELFSYM_BIND(info) ((info) >> 4)
#define ELFSYM_TYPE(info) ((info) & 0xf)
#define ELFSYM_INFO(bind, type) (((bind) << 4) | (type))
enum {
ELFSYM_TYPE_DATA = 2,
ELFSYM_TYPE_FUNC = 2,
ELFSYM_TYPE_FILE = 4,
};
enum {
ELFSYM_BIND_LOCAL = 0,
ELFSYM_BIND_GLOBAL = 1,
};
#endif

View File

@@ -0,0 +1,608 @@
/*
* IR - Lightweight JIT Compilation Framework
* (Native code generator based on DynAsm)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#include "ir.h"
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
# include "ir_x86.h"
#elif defined(IR_TARGET_AARCH64)
# include "ir_aarch64.h"
#else
# error "Unknown IR target"
#endif
#include "ir_private.h"
#ifndef _WIN32
# include <dlfcn.h>
#else
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
# include <psapi.h>
#endif
#define DASM_M_GROW(ctx, t, p, sz, need) \
do { \
size_t _sz = (sz), _need = (need); \
if (_sz < _need) { \
if (_sz < 16) _sz = 16; \
while (_sz < _need) _sz += _sz; \
(p) = (t *)ir_mem_realloc((p), _sz); \
(sz) = _sz; \
} \
} while(0)
#define DASM_M_FREE(ctx, p, sz) ir_mem_free(p)
#if IR_DEBUG
# define DASM_CHECKS
#endif
typedef struct _ir_copy {
ir_type type;
ir_reg from;
ir_reg to;
} ir_copy;
typedef struct _ir_delayed_copy {
ir_ref input;
ir_ref output;
ir_type type;
ir_reg from;
ir_reg to;
} ir_delayed_copy;
#if IR_REG_INT_ARGS
static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS];
#else
static const int8_t *_ir_int_reg_params;
#endif
#if IR_REG_FP_ARGS
static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS];
#else
static const int8_t *_ir_fp_reg_params;
#endif
#ifdef IR_HAVE_FASTCALL
static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS];
static const int8_t *_ir_fp_fc_reg_params;
bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
{
if (sizeof(void*) == 4) {
if (IR_IS_CONST_REF(insn->op2)) {
return (ctx->ir_base[insn->op2].const_flags & IR_CONST_FASTCALL_FUNC) != 0;
} else if (ctx->ir_base[insn->op2].op == IR_BITCAST) {
return (ctx->ir_base[insn->op2].op2 & IR_CONST_FASTCALL_FUNC) != 0;
}
return 0;
}
return 0;
}
#else
bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
{
return 0;
}
#endif
bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn)
{
if (IR_IS_CONST_REF(insn->op2)) {
return (ctx->ir_base[insn->op2].const_flags & IR_CONST_VARARG_FUNC) != 0;
} else if (ctx->ir_base[insn->op2].op == IR_BITCAST) {
return (ctx->ir_base[insn->op2].op2 & IR_CONST_VARARG_FUNC) != 0;
}
return 0;
}
IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref)
{
IR_ASSERT(!IR_IS_CONST_REF(ref));
return ctx->rules[ref];
}
IR_ALWAYS_INLINE bool ir_in_same_block(ir_ctx *ctx, ir_ref ref)
{
return ref > ctx->bb_start;
}
static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref)
{
ir_use_list *use_list = &ctx->use_lists[1];
int i;
ir_ref use, *p;
ir_insn *insn;
int int_param = 0;
int fp_param = 0;
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
#ifdef IR_HAVE_FASTCALL
if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
int_reg_params_count = IR_REG_INT_FCARGS;
fp_reg_params_count = IR_REG_FP_FCARGS;
int_reg_params = _ir_int_fc_reg_params;
fp_reg_params = _ir_fp_fc_reg_params;
}
#endif
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
use = *p;
insn = &ctx->ir_base[use];
if (insn->op == IR_PARAM) {
if (IR_IS_TYPE_INT(insn->type)) {
if (use == ref) {
if (int_param < int_reg_params_count) {
return int_reg_params[int_param];
} else {
return IR_REG_NONE;
}
}
int_param++;
#ifdef _WIN64
/* WIN64 calling convention use common couter for int and fp registers */
fp_param++;
#endif
} else {
IR_ASSERT(IR_IS_TYPE_FP(insn->type));
if (use == ref) {
if (fp_param < fp_reg_params_count) {
return fp_reg_params[fp_param];
} else {
return IR_REG_NONE;
}
}
fp_param++;
#ifdef _WIN64
/* WIN64 calling convention use common couter for int and fp registers */
int_param++;
#endif
}
}
}
return IR_REG_NONE;
}
static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs)
{
int j, n;
ir_type type;
int int_param = 0;
int fp_param = 0;
int count = 0;
int int_reg_params_count = IR_REG_INT_ARGS;
int fp_reg_params_count = IR_REG_FP_ARGS;
const int8_t *int_reg_params = _ir_int_reg_params;
const int8_t *fp_reg_params = _ir_fp_reg_params;
#ifdef IR_HAVE_FASTCALL
if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
int_reg_params_count = IR_REG_INT_FCARGS;
fp_reg_params_count = IR_REG_FP_FCARGS;
int_reg_params = _ir_int_fc_reg_params;
fp_reg_params = _ir_fp_fc_reg_params;
}
#endif
n = insn->inputs_count;
n = IR_MIN(n, IR_MAX_REG_ARGS + 2);
for (j = 3; j <= n; j++) {
type = ctx->ir_base[ir_insn_op(insn, j)].type;
if (IR_IS_TYPE_INT(type)) {
if (int_param < int_reg_params_count) {
regs[j] = int_reg_params[int_param];
count = j + 1;
} else {
regs[j] = IR_REG_NONE;
}
int_param++;
#ifdef _WIN64
/* WIN64 calling convention use common couter for int and fp registers */
fp_param++;
#endif
} else {
IR_ASSERT(IR_IS_TYPE_FP(type));
if (fp_param < fp_reg_params_count) {
regs[j] = fp_reg_params[fp_param];
count = j + 1;
} else {
regs[j] = IR_REG_NONE;
}
fp_param++;
#ifdef _WIN64
/* WIN64 calling convention use common couter for int and fp registers */
int_param++;
#endif
}
}
return count;
}
static bool ir_is_same_mem(const ir_ctx *ctx, ir_ref r1, ir_ref r2)
{
ir_live_interval *ival1, *ival2;
int32_t o1, o2;
if (IR_IS_CONST_REF(r1) || IR_IS_CONST_REF(r2)) {
return 0;
}
IR_ASSERT(ctx->vregs[r1] && ctx->vregs[r2]);
ival1 = ctx->live_intervals[ctx->vregs[r1]];
ival2 = ctx->live_intervals[ctx->vregs[r2]];
IR_ASSERT(ival1 && ival2);
o1 = ival1->stack_spill_pos;
o2 = ival2->stack_spill_pos;
IR_ASSERT(o1 != -1 && o2 != -1);
return o1 == o2;
}
static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset)
{
ir_live_interval *ival1;
int32_t o1;
if (IR_IS_CONST_REF(r1)) {
return 0;
}
IR_ASSERT(ctx->vregs[r1]);
ival1 = ctx->live_intervals[ctx->vregs[r1]];
IR_ASSERT(ival1);
o1 = ival1->stack_spill_pos;
IR_ASSERT(o1 != -1);
return o1 == offset;
}
void *ir_resolve_sym_name(const char *name)
{
void *handle = NULL;
void *addr;
#ifndef _WIN32
# ifdef RTLD_DEFAULT
handle = RTLD_DEFAULT;
# endif
addr = dlsym(handle, name);
#else
HMODULE mods[256];
DWORD cbNeeded;
uint32_t i = 0;
/* Quick workaraund to prevent *.irt tests failures */
// TODO: try to find a general solution ???
if (strcmp(name, "printf") == 0) {
return (void*)printf;
}
addr = NULL;
EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &cbNeeded);
while(i < (cbNeeded / sizeof(HMODULE))) {
addr = GetProcAddress(mods[i], name);
if (addr) {
return addr;
}
i++;
}
#endif
IR_ASSERT(addr != NULL);
return addr;
}
#ifdef IR_SNAPSHOT_HANDLER_DCL
IR_SNAPSHOT_HANDLER_DCL();
#endif
static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
{
void *addr;
IR_ASSERT(addr_insn->type == IR_ADDR);
if (addr_insn->op == IR_FUNC) {
addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) :
ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32));
} else {
IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR);
addr = (void*)addr_insn->val.addr;
}
#ifdef IR_SNAPSHOT_HANDLER
if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) {
addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr);
}
#endif
return addr;
}
#if defined(__GNUC__)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Warray-bounds"
# pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#endif
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
# include "dynasm/dasm_proto.h"
# include "dynasm/dasm_x86.h"
#elif defined(IR_TARGET_AARCH64)
# include "dynasm/dasm_proto.h"
static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset);
# define DASM_ADD_VENEER ir_add_veneer
# include "dynasm/dasm_arm64.h"
#else
# error "Unknown IR target"
#endif
#if defined(__GNUC__)
# pragma GCC diagnostic pop
#endif
/* Forward Declarations */
static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb);
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb);
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
# include "ir_emit_x86.h"
#elif defined(IR_TARGET_AARCH64)
# include "ir_emit_aarch64.h"
#else
# error "Unknown IR target"
#endif
static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb)
{
ir_list *list = (ir_list*)ctx->osr_entry_loads;
int pos = 0, count, i;
ir_ref ref;
IR_ASSERT(ctx->binding);
IR_ASSERT(list);
while (1) {
i = ir_list_at(list, pos);
if (b == i) {
break;
}
IR_ASSERT(i != 0); /* end marker */
pos++;
count = ir_list_at(list, pos);
pos += count + 1;
}
pos++;
count = ir_list_at(list, pos);
pos++;
for (i = 0; i < count; i++, pos++) {
ref = ir_list_at(list, pos);
IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) {
/* not spilled */
ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg;
ir_type type = ctx->ir_base[ref].type;
int32_t offset = -ir_binding_find(ctx, ref);
IR_ASSERT(offset > 0);
if (IR_IS_TYPE_INT(type)) {
ir_emit_load_mem_int(ctx, type, reg, ctx->spill_base, offset);
} else {
ir_emit_load_mem_fp(ctx, type, reg, ctx->spill_base, offset);
}
} else {
IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL);
}
}
}
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
{
uint32_t succ, k, n = 0, n2 = 0;
ir_block *succ_bb;
ir_use_list *use_list;
ir_ref i, *p;
ir_copy *copies;
ir_delayed_copy *copies2;
ir_reg tmp_reg = ctx->regs[bb->end][0];
ir_reg tmp_fp_reg = ctx->regs[bb->end][1];
IR_ASSERT(bb->successors_count == 1);
succ = ctx->cfg_edges[bb->successors];
succ_bb = &ctx->cfg_blocks[succ];
IR_ASSERT(succ_bb->predecessors_count > 1);
use_list = &ctx->use_lists[succ_bb->start];
k = ir_phi_input_number(ctx, succ_bb, b);
copies = ir_mem_malloc(use_list->count * sizeof(ir_copy) + use_list->count * sizeof(ir_delayed_copy));
copies2 = (ir_delayed_copy*)(copies + use_list->count);
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
ir_ref ref = *p;
ir_insn *insn = &ctx->ir_base[ref];
if (insn->op == IR_PHI) {
ir_ref input = ir_insn_op(insn, k);
ir_reg src = ir_get_alocated_reg(ctx, ref, k);
ir_reg dst = ctx->regs[ref][0];
if (dst == IR_REG_NONE) {
/* STORE to memory cannot clobber any input register (do it right now) */
if (IR_IS_CONST_REF(input)) {
IR_ASSERT(src == IR_REG_NONE);
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
if (IR_IS_TYPE_INT(insn->type)
&& (ir_type_size[insn->type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[input].val.i64))) {
ir_emit_store_imm(ctx, insn->type, ref, ctx->ir_base[input].val.i32);
continue;
}
#endif
ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg;
IR_ASSERT(tmp != IR_REG_NONE);
ir_emit_load(ctx, insn->type, tmp, input);
ir_emit_store(ctx, insn->type, ref, tmp);
} else if (src == IR_REG_NONE) {
if (!ir_is_same_mem(ctx, input, ref)) {
ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg;
IR_ASSERT(tmp != IR_REG_NONE);
ir_emit_load(ctx, insn->type, tmp, input);
ir_emit_store(ctx, insn->type, ref, tmp);
}
} else {
if (IR_REG_SPILLED(src)) {
src = IR_REG_NUM(src);
ir_emit_load(ctx, insn->type, src, input);
if (ir_is_same_mem(ctx, input, ref)) {
continue;
}
}
ir_emit_store(ctx, insn->type, ref, src);
}
} else if (src == IR_REG_NONE) {
/* STORE of constant or memory can't be clobbered by parallel reg->reg copies (delay it) */
copies2[n2].input = input;
copies2[n2].output = ref;
copies2[n2].type = insn->type;
copies2[n2].from = src;
copies2[n2].to = dst;
n2++;
} else {
IR_ASSERT(!IR_IS_CONST_REF(input));
if (IR_REG_SPILLED(src)) {
ir_emit_load(ctx, insn->type, IR_REG_NUM(src), input);
}
if (IR_REG_SPILLED(dst) && (!IR_REG_SPILLED(src) || !ir_is_same_mem(ctx, input, ref))) {
ir_emit_store(ctx, insn->type, ref, IR_REG_NUM(src));
}
if (IR_REG_NUM(src) != IR_REG_NUM(dst)) {
/* Schedule parallel reg->reg copy */
copies[n].type = insn->type;
copies[n].from = IR_REG_NUM(src);
copies[n].to = IR_REG_NUM(dst);
n++;
}
}
}
}
if (n > 0) {
ir_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg);
}
for (n = 0; n < n2; n++) {
ir_ref input = copies2[n].input;
ir_ref ref = copies2[n].output;
ir_type type = copies2[n].type;
ir_reg dst = copies2[n].to;
IR_ASSERT(dst != IR_REG_NONE);
if (IR_IS_CONST_REF(input)) {
ir_emit_load(ctx, type, IR_REG_NUM(dst), input);
} else {
IR_ASSERT(copies2[n].from == IR_REG_NONE);
if (IR_REG_SPILLED(dst) && ir_is_same_mem(ctx, input, ref)) {
/* avoid LOAD and STORE to the same memory */
continue;
}
ir_emit_load(ctx, type, IR_REG_NUM(dst), input);
}
if (IR_REG_SPILLED(dst)) {
ir_emit_store(ctx, type, ref, IR_REG_NUM(dst));
}
}
ir_mem_free(copies);
}
int ir_match(ir_ctx *ctx)
{
uint32_t b;
ir_ref start, ref, *prev_ref;
ir_block *bb;
ir_insn *insn;
uint32_t entries_count = 0;
ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
prev_ref = ctx->prev_ref;
if (!prev_ref) {
ir_build_prev_refs(ctx);
prev_ref = ctx->prev_ref;
}
if (ctx->entries_count) {
ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
}
for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
start = bb->start;
if (UNEXPECTED(bb->flags & IR_BB_ENTRY)) {
IR_ASSERT(entries_count < ctx->entries_count);
insn = &ctx->ir_base[start];
IR_ASSERT(insn->op == IR_ENTRY);
insn->op3 = entries_count;
ctx->entries[entries_count] = b;
entries_count++;
}
ctx->rules[start] = IR_SKIPPED | IR_NOP;
ref = bb->end;
if (bb->successors_count == 1) {
insn = &ctx->ir_base[ref];
if (insn->op == IR_END || insn->op == IR_LOOP_END) {
ctx->rules[ref] = insn->op;
ref = prev_ref[ref];
if (ref == start) {
if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) {
bb->flags |= IR_BB_EMPTY;
} else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) {
bb->flags |= IR_BB_EMPTY;
if (ctx->cfg_edges[bb->successors] == b + 1) {
(bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY;
}
}
continue;
}
}
}
ctx->bb_start = start; /* bb_start is used by matcher to avoid fusion of insns from different blocks */
while (ref != start) {
uint32_t rule = ctx->rules[ref];
if (!rule) {
ctx->rules[ref] = rule = ir_match_insn(ctx, ref);
}
ir_match_insn2(ctx, ref, rule);
ref = prev_ref[ref];
}
}
if (ctx->entries_count) {
ctx->entries_count = entries_count;
if (!entries_count) {
ir_mem_free(ctx->entries);
ctx->entries = NULL;
}
}
return 1;
}
int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref)
{
int32_t offset;
IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
IR_ASSERT(offset != -1);
return IR_SPILL_POS_TO_OFFSET(offset);
}

2129
ext/opcache/jit/ir/ir_fold.h Normal file

File diff suppressed because it is too large Load Diff

897
ext/opcache/jit/ir/ir_gcm.c Normal file
View File

@@ -0,0 +1,897 @@
/*
* IR - Lightweight JIT Compilation Framework
* (GCM - Global Code Motion and Scheduler)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*
* The GCM algorithm is based on Cliff Click's publication
* See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95.
*/
#include "ir.h"
#include "ir_private.h"
static int32_t ir_gcm_schedule_early(ir_ctx *ctx, int32_t *_blocks, ir_ref ref, ir_list *queue_rest)
{
ir_ref n, *p, input;
ir_insn *insn;
uint32_t dom_depth;
int32_t b, result;
bool reschedule_late = 1;
insn = &ctx->ir_base[ref];
IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
result = 1;
dom_depth = 0;
n = insn->inputs_count;
for (p = insn->ops + 1; n > 0; p++, n--) {
input = *p;
if (input > 0) {
b = _blocks[input];
if (b == 0) {
b = ir_gcm_schedule_early(ctx, _blocks, input, queue_rest);
} else if (b < 0) {
b = -b;
}
if (dom_depth < ctx->cfg_blocks[b].dom_depth) {
dom_depth = ctx->cfg_blocks[b].dom_depth;
result = b;
}
reschedule_late = 0;
}
}
_blocks[ref] = -result;
if (UNEXPECTED(reschedule_late)) {
/* Floating nodes that don't depend on other nodes
* (e.g. only on constants), have to be scheduled to the
* last common ancestor. Otherwise they always go to the
* first block.
*/
ir_list_push_unchecked(queue_rest, ref);
}
return result;
}
/* Last Common Ancestor */
static int32_t ir_gcm_find_lca(ir_ctx *ctx, int32_t b1, int32_t b2)
{
uint32_t dom_depth;
dom_depth = ctx->cfg_blocks[b2].dom_depth;
while (ctx->cfg_blocks[b1].dom_depth > dom_depth) {
b1 = ctx->cfg_blocks[b1].dom_parent;
}
dom_depth = ctx->cfg_blocks[b1].dom_depth;
while (ctx->cfg_blocks[b2].dom_depth > dom_depth) {
b2 = ctx->cfg_blocks[b2].dom_parent;
}
while (b1 != b2) {
b1 = ctx->cfg_blocks[b1].dom_parent;
b2 = ctx->cfg_blocks[b2].dom_parent;
}
return b2;
}
static void ir_gcm_schedule_late(ir_ctx *ctx, int32_t *_blocks, ir_ref ref)
{
ir_ref n, *p, use;
ir_insn *insn;
ir_use_list *use_list;
IR_ASSERT(_blocks[ref] < 0);
_blocks[ref] = -_blocks[ref];
use_list = &ctx->use_lists[ref];
n = use_list->count;
if (n) {
int32_t lca, b;
insn = &ctx->ir_base[ref];
IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
lca = 0;
for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
use = *p;
b = _blocks[use];
if (!b) {
continue;
} else if (b < 0) {
ir_gcm_schedule_late(ctx, _blocks, use);
b = _blocks[use];
IR_ASSERT(b != 0);
}
insn = &ctx->ir_base[use];
if (insn->op == IR_PHI) {
ir_ref *p = insn->ops + 2; /* PHI data inputs */
ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
ir_ref n = insn->inputs_count - 1;
for (;n > 0; p++, q++, n--) {
if (*p == ref) {
b = _blocks[*q];
lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
}
}
} else {
lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
}
}
IR_ASSERT(lca != 0 && "No Common Ancestor");
b = lca;
if (b != _blocks[ref]) {
ir_block *bb = &ctx->cfg_blocks[b];
uint32_t loop_depth = bb->loop_depth;
if (loop_depth) {
uint32_t flags;
use_list = &ctx->use_lists[ref];
if (use_list->count == 1) {
use = ctx->use_edges[use_list->refs];
insn = &ctx->ir_base[use];
if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
_blocks[ref] = b;
return;
}
}
flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
if ((flags & IR_BB_LOOP_WITH_ENTRY)
&& !(ctx->binding && ir_binding_find(ctx, ref))) {
/* Don't move loop invariant code across an OSR ENTRY if we can't restore it */
} else {
do {
lca = bb->dom_parent;
bb = &ctx->cfg_blocks[lca];
if (bb->loop_depth < loop_depth) {
if (!bb->loop_depth) {
b = lca;
break;
}
flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
if ((flags & IR_BB_LOOP_WITH_ENTRY)
&& !(ctx->binding && ir_binding_find(ctx, ref))) {
break;
}
loop_depth = bb->loop_depth;
b = lca;
}
} while (lca != _blocks[ref]);
}
}
_blocks[ref] = b;
if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) {
/* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */
_blocks[ref + 1] = b;
}
}
}
}
static void ir_gcm_schedule_rest(ir_ctx *ctx, int32_t *_blocks, ir_ref ref)
{
ir_ref n, *p, use;
ir_insn *insn;
IR_ASSERT(_blocks[ref] < 0);
_blocks[ref] = -_blocks[ref];
n = ctx->use_lists[ref].count;
if (n) {
uint32_t lca;
int32_t b;
insn = &ctx->ir_base[ref];
IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
lca = 0;
for (p = &ctx->use_edges[ctx->use_lists[ref].refs]; n > 0; p++, n--) {
use = *p;
b = _blocks[use];
if (!b) {
continue;
} else if (b < 0) {
ir_gcm_schedule_late(ctx, _blocks, use);
b = _blocks[use];
IR_ASSERT(b != 0);
}
insn = &ctx->ir_base[use];
if (insn->op == IR_PHI) {
ir_ref *p = insn->ops + 2; /* PHI data inputs */
ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
ir_ref n = insn->inputs_count - 1;
for (;n > 0; p++, q++, n--) {
if (*p == ref) {
b = _blocks[*q];
lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
}
}
} else {
lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
}
}
IR_ASSERT(lca != 0 && "No Common Ancestor");
b = lca;
_blocks[ref] = b;
if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) {
/* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */
_blocks[ref + 1] = b;
}
}
}
int ir_gcm(ir_ctx *ctx)
{
ir_ref k, n, *p, ref;
ir_block *bb;
ir_list queue_early;
ir_list queue_late;
ir_list queue_rest;
int32_t *_blocks, b;
ir_insn *insn, *use_insn;
ir_use_list *use_list;
IR_ASSERT(ctx->cfg_map);
_blocks = (int32_t*)ctx->cfg_map;
ir_list_init(&queue_early, ctx->insns_count);
if (ctx->cfg_blocks_count == 1) {
ref = ctx->cfg_blocks[1].end;
do {
insn = &ctx->ir_base[ref];
_blocks[ref] = 1; /* pin to block */
if (insn->inputs_count > 1) {
/* insn has input data edges */
ir_list_push_unchecked(&queue_early, ref);
}
ref = insn->op1; /* control predecessor */
} while (ref != 1); /* IR_START */
_blocks[1] = 1; /* pin to block */
use_list = &ctx->use_lists[1];
n = use_list->count;
for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
ref = *p;
use_insn = &ctx->ir_base[ref];
if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) {
ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR;
_blocks[ref] = 1; /* pin to block */
}
}
/* Place all live nodes to the first block */
while (ir_list_len(&queue_early)) {
ref = ir_list_pop(&queue_early);
insn = &ctx->ir_base[ref];
n = insn->inputs_count;
for (p = insn->ops + 1; n > 0; p++, n--) {
ref = *p;
if (ref > 0 && _blocks[ref] == 0) {
_blocks[ref] = 1;
ir_list_push_unchecked(&queue_early, ref);
}
}
}
ir_list_free(&queue_early);
return 1;
}
ir_list_init(&queue_late, ctx->insns_count);
/* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */
b = ctx->cfg_blocks_count;
for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
ref = bb->end;
/* process the last instruction of the block */
insn = &ctx->ir_base[ref];
_blocks[ref] = b; /* pin to block */
if (insn->inputs_count > 1) {
/* insn has input data edges */
ir_list_push_unchecked(&queue_early, ref);
}
ref = insn->op1; /* control predecessor */
while (ref != bb->start) {
insn = &ctx->ir_base[ref];
_blocks[ref] = b; /* pin to block */
if (insn->inputs_count > 1) {
/* insn has input data edges */
ir_list_push_unchecked(&queue_early, ref);
}
if (insn->type != IR_VOID) {
IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM);
ir_list_push_unchecked(&queue_late, ref);
}
ref = insn->op1; /* control predecessor */
}
/* process the first instruction of the block */
_blocks[ref] = b; /* pin to block */
use_list = &ctx->use_lists[ref];
n = use_list->count;
if (n > 1) {
for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
ref = *p;
use_insn = &ctx->ir_base[ref];
if (use_insn->op == IR_PHI || use_insn->op == IR_PI) {
bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI;
if (EXPECTED(ctx->use_lists[ref].count != 0)) {
_blocks[ref] = b; /* pin to block */
ir_list_push_unchecked(&queue_early, ref);
ir_list_push_unchecked(&queue_late, ref);
}
} else if (use_insn->op == IR_PARAM) {
bb->flags |= IR_BB_HAS_PARAM;
_blocks[ref] = b; /* pin to block */
if (EXPECTED(ctx->use_lists[ref].count != 0)) {
ir_list_push_unchecked(&queue_late, ref);
}
} else if (use_insn->op == IR_VAR) {
bb->flags |= IR_BB_HAS_VAR;
_blocks[ref] = b; /* pin to block */
if (EXPECTED(ctx->use_lists[ref].count != 0)) {
/* This is necessary only for VADDR */
ir_list_push_unchecked(&queue_late, ref);
}
}
}
}
}
ir_list_init(&queue_rest, ctx->insns_count);
n = ir_list_len(&queue_early);
while (n > 0) {
n--;
ref = ir_list_at(&queue_early, n);
insn = &ctx->ir_base[ref];
k = insn->inputs_count - 1;
for (p = insn->ops + 2; k > 0; p++, k--) {
ref = *p;
if (ref > 0 && _blocks[ref] == 0) {
ir_gcm_schedule_early(ctx, _blocks, ref, &queue_rest);
}
}
}
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_GCM) {
fprintf(stderr, "GCM Schedule Early\n");
for (n = 1; n < ctx->insns_count; n++) {
fprintf(stderr, "%d -> %d\n", n, _blocks[n]);
}
}
#endif
n = ir_list_len(&queue_late);
while (n > 0) {
n--;
ref = ir_list_at(&queue_late, n);
use_list = &ctx->use_lists[ref];
k = use_list->count;
for (p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
ref = *p;
if (_blocks[ref] < 0) {
ir_gcm_schedule_late(ctx, _blocks, ref);
}
}
}
n = ir_list_len(&queue_rest);
while (n > 0) {
n--;
ref = ir_list_at(&queue_rest, n);
ir_gcm_schedule_rest(ctx, _blocks, ref);
}
ir_list_free(&queue_early);
ir_list_free(&queue_late);
ir_list_free(&queue_rest);
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_GCM) {
fprintf(stderr, "GCM Schedule Late\n");
for (n = 1; n < ctx->insns_count; n++) {
fprintf(stderr, "%d -> %d\n", n, _blocks[n]);
}
}
#endif
return 1;
}
static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat)
{
uint32_t n1, n2, pos;
ir_ref key;
ir_hashtab_bucket *b1, *b2;
ir_hashtab *binding = ctx->binding;
uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask);
memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t));
n1 = binding->count;
n2 = 0;
pos = 0;
b1 = binding->data;
b2 = binding->data;
while (n1 > 0) {
key = b1->key;
IR_ASSERT(key < ctx->insns_count);
if (_xlat[key]) {
key = _xlat[key];
b2->key = key;
if (b1->val > 0) {
IR_ASSERT(_xlat[b1->val]);
b2->val = _xlat[b1->val];
} else {
b2->val = b1->val;
}
key |= binding->mask;
b2->next = ((uint32_t*)binding->data)[key];
((uint32_t*)binding->data)[key] = pos;
pos += sizeof(ir_hashtab_bucket);
b2++;
n2++;
}
b1++;
n1--;
}
binding->count = n2;
}
IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
{
if (!_xlat[ref]) {
_xlat[ref] = ref; /* this is only a "used constant" marker */
return 1;
}
return 0;
}
int ir_schedule(ir_ctx *ctx)
{
ir_ctx new_ctx;
ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
ir_ref *_xlat;
ir_ref *edges;
uint32_t b, prev_b;
uint32_t *_blocks = ctx->cfg_map;
ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
ir_ref _move_down = 0;
ir_block *bb;
ir_insn *insn, *new_insn;
ir_use_list *lists, *use_list, *new_list;
/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
prev_b = _blocks[1];
IR_ASSERT(prev_b);
_prev[1] = 0;
_prev[ctx->cfg_blocks[1].end] = 0;
for (i = 2, j = 1; i < ctx->insns_count; i++) {
b = _blocks[i];
IR_ASSERT((int32_t)b >= 0);
if (b == prev_b) {
/* add to the end of the list */
_next[j] = i;
_prev[i] = j;
j = i;
} else if (b > prev_b) {
bb = &ctx->cfg_blocks[b];
if (i == bb->start) {
IR_ASSERT(bb->end > bb->start);
prev_b = b;
_prev[bb->end] = 0;
/* add to the end of the list */
_next[j] = i;
_prev[i] = j;
j = i;
} else {
IR_ASSERT(i != bb->end);
/* move down late (see the following loop) */
_next[i] = _move_down;
_move_down = i;
}
} else if (b) {
bb = &ctx->cfg_blocks[b];
IR_ASSERT(i != bb->start);
if (_prev[bb->end]) {
/* move up, insert before the end of the already scheduled BB */
k = bb->end;
} else {
/* move up, insert at the end of the block */
k = ctx->cfg_blocks[b + 1].start;
}
/* insert before "k" */
_prev[i] = _prev[k];
_next[i] = k;
_next[_prev[k]] = i;
_prev[k] = i;
}
}
_next[j] = 0;
while (_move_down) {
i = _move_down;
_move_down = _next[i];
b = _blocks[i];
bb = &ctx->cfg_blocks[b];
k = _next[bb->start];
if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
/* insert after the start of the block and all PARAM, VAR, PI, PHI */
insn = &ctx->ir_base[k];
while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
k = _next[k];
insn = &ctx->ir_base[k];
}
}
/* insert before "k" */
_prev[i] = _prev[k];
_next[i] = k;
_next[_prev[k]] = i;
_prev[k] = i;
}
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_SCHEDULE) {
fprintf(stderr, "Before Schedule\n");
for (i = 1; i != 0; i = _next[i]) {
fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
}
}
#endif
_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
_xlat += ctx->consts_count;
_xlat[IR_TRUE] = IR_TRUE;
_xlat[IR_FALSE] = IR_FALSE;
_xlat[IR_NULL] = IR_NULL;
_xlat[IR_UNUSED] = IR_UNUSED;
insns_count = 1;
consts_count = -(IR_TRUE - 1);
/* Topological sort according dependencies inside each basic block */
for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
/* Schedule BB start */
i = bb->start;
_xlat[i] = bb->start = insns_count;
insn = &ctx->ir_base[i];
if (insn->op == IR_CASE_VAL) {
IR_ASSERT(insn->op2 < IR_TRUE);
consts_count += ir_count_constant(_xlat, insn->op2);
}
n = insn->inputs_count;
insns_count += ir_insn_inputs_to_len(n);
i = _next[i];
insn = &ctx->ir_base[i];
if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
/* Schedule PARAM, VAR, PI */
while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
_xlat[i] = insns_count;
insns_count += 1;
i = _next[i];
insn = &ctx->ir_base[i];
}
/* Schedule PHIs */
while (insn->op == IR_PHI) {
ir_ref j, *p, input;
_xlat[i] = insns_count;
/* Reuse "n" from MERGE and skip first input */
insns_count += ir_insn_inputs_to_len(n + 1);
for (j = n, p = insn->ops + 2; j > 0; p++, j--) {
input = *p;
if (input < IR_TRUE) {
consts_count += ir_count_constant(_xlat, input);
}
}
i = _next[i];
insn = &ctx->ir_base[i];
}
}
while (i != bb->end) {
ir_ref n, j, *p, input;
restart:
n = insn->inputs_count;
for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
input = *p;
if (!_xlat[input]) {
/* input is not scheduled yet */
if (input > 0) {
if (_blocks[input] == b) {
/* "input" should be before "i" to satisfy dependency */
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_SCHEDULE) {
fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
}
#endif
/* remove "input" */
_prev[_next[input]] = _prev[input];
_next[_prev[input]] = _next[input];
/* insert before "i" */
_prev[input] = _prev[i];
_next[input] = i;
_next[_prev[i]] = input;
_prev[i] = input;
/* restart from "input" */
i = input;
insn = &ctx->ir_base[i];
goto restart;
}
} else if (input < IR_TRUE) {
consts_count += ir_count_constant(_xlat, input);
}
}
}
_xlat[i] = insns_count;
insns_count += ir_insn_inputs_to_len(n);
i = _next[i];
insn = &ctx->ir_base[i];
}
/* Schedule BB end */
_xlat[i] = bb->end = insns_count;
insns_count++;
if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
if (insn->op2 < IR_TRUE) {
consts_count += ir_count_constant(_xlat, insn->op2);
}
}
}
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_SCHEDULE) {
fprintf(stderr, "After Schedule\n");
for (i = 1; i != 0; i = _next[i]) {
fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
}
}
#endif
#if 1
/* Check if scheduling didn't make any modifications */
if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
bool changed = 0;
for (i = 1; i != 0; i = _next[i]) {
if (_xlat[i] != i) {
changed = 1;
break;
}
}
if (!changed) {
_xlat -= ctx->consts_count;
ir_mem_free(_xlat);
ir_mem_free(_next);
ctx->prev_ref = _prev;
ctx->flags |= IR_LINEAR;
ir_truncate(ctx);
return 1;
}
}
#endif
ir_mem_free(_prev);
ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
new_ctx.insns_count = insns_count;
new_ctx.ret_type = ctx->ret_type;
new_ctx.mflags = ctx->mflags;
new_ctx.spill_base = ctx->spill_base;
new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
new_ctx.fixed_regset = ctx->fixed_regset;
new_ctx.fixed_save_regset = ctx->fixed_save_regset;
new_ctx.entries_count = ctx->entries_count;
#if defined(IR_TARGET_AARCH64)
new_ctx.deoptimization_exits = ctx->deoptimization_exits;
new_ctx.get_exit_addr = ctx->get_exit_addr;
new_ctx.get_veneer = ctx->get_veneer;
new_ctx.set_veneer = ctx->set_veneer;
#endif
new_ctx.loader = ctx->loader;
/* Copy constants */
if (consts_count == ctx->consts_count) {
new_ctx.consts_count = consts_count;
ref = 1 - consts_count;
insn = &ctx->ir_base[ref];
new_insn = &new_ctx.ir_base[ref];
memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
if (ctx->strtab.data) {
while (ref != IR_TRUE) {
if (new_insn->op == IR_FUNC || new_insn->op == IR_SYM || new_insn->op == IR_STR) {
new_insn->val.addr = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.i32));
}
new_insn++;
ref++;
}
}
} else {
new_ref = -new_ctx.consts_count;
new_insn = &new_ctx.ir_base[new_ref];
for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
if (!_xlat[ref]) {
continue;
}
new_insn->optx = insn->optx;
new_insn->prev_const = 0;
if (insn->op == IR_FUNC || insn->op == IR_SYM || insn->op == IR_STR) {
new_insn->val.addr = ir_str(&new_ctx, ir_get_str(ctx, insn->val.i32));
} else {
new_insn->val.u64 = insn->val.u64;
}
_xlat[ref] = new_ref;
new_ref--;
new_insn--;
}
new_ctx.consts_count = -new_ref;
}
new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
/* Copy instructions, use lists and use edges */
prev_ref = 0;
use_edges_count = 0;
for (i = 1; i != 0; i = _next[i]) {
new_ref = _xlat[i];
new_ctx.cfg_map[new_ref] = _blocks[i];
_prev[new_ref] = prev_ref;
prev_ref = new_ref;
use_list = &ctx->use_lists[i];
n = use_list->count;
k = 0;
if (n == 1) {
ref = ctx->use_edges[use_list->refs];
if (_xlat[ref]) {
*edges = _xlat[ref];
edges++;
k = 1;
}
} else {
p = &ctx->use_edges[use_list->refs];
while (n--) {
ref = *p;
if (_xlat[ref]) {
*edges = _xlat[ref];
edges++;
k++;
}
p++;
}
}
new_list = &lists[new_ref];
new_list->refs = use_edges_count;
use_edges_count += k;
new_list->count = k;
insn = &ctx->ir_base[i];
new_insn = &new_ctx.ir_base[new_ref];
new_insn->optx = insn->optx;
n = new_insn->inputs_count;
switch (n) {
case 0:
new_insn->op1 = insn->op1;
new_insn->op2 = insn->op2;
new_insn->op3 = insn->op3;
break;
case 1:
new_insn->op1 = _xlat[insn->op1];
if (new_insn->op == IR_PARAM || insn->op == IR_VAR) {
new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2));
} else {
new_insn->op2 = insn->op2;
}
new_insn->op3 = insn->op3;
break;
case 2:
new_insn->op1 = _xlat[insn->op1];
new_insn->op2 = _xlat[insn->op2];
new_insn->op3 = insn->op3;
break;
case 3:
new_insn->op1 = _xlat[insn->op1];
new_insn->op2 = _xlat[insn->op2];
new_insn->op3 = _xlat[insn->op3];
break;
default:
for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) {
*q = _xlat[*p];
}
break;
}
}
/* Update list of terminators (IR_OPND_CONTROL_REF) */
insn = &new_ctx.ir_base[1];
ref = insn->op1;
if (ref) {
insn->op1 = ref = _xlat[ref];
while (1) {
insn = &new_ctx.ir_base[ref];
ref = insn->op3;
if (!ref) {
break;
}
insn->op3 = ref = _xlat[ref];
}
}
IR_ASSERT(ctx->use_edges_count >= use_edges_count);
new_ctx.use_edges_count = use_edges_count;
new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
if (ctx->binding) {
ir_xlat_binding(ctx, _xlat);
new_ctx.binding = ctx->binding;
ctx->binding = NULL;
}
_xlat -= ctx->consts_count;
ir_mem_free(_xlat);
new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
new_ctx.cfg_edges_count = ctx->cfg_edges_count;
new_ctx.cfg_blocks = ctx->cfg_blocks;
new_ctx.cfg_edges = ctx->cfg_edges;
ctx->cfg_blocks = NULL;
ctx->cfg_edges = NULL;
ir_free(ctx);
IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
memcpy(ctx, &new_ctx, sizeof(ir_ctx));
ctx->flags |= IR_LINEAR;
ir_mem_free(_next);
return 1;
}
void ir_build_prev_refs(ir_ctx *ctx)
{
uint32_t b;
ir_block *bb;
ir_ref i, n, prev;
ir_insn *insn;
ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
prev = 0;
for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) {
ctx->prev_ref[i] = prev;
n = ir_insn_len(insn);
prev = i;
i += n;
insn += n;
}
ctx->prev_ref[i] = prev;
}
}

642
ext/opcache/jit/ir/ir_gdb.c Normal file
View File

@@ -0,0 +1,642 @@
/*
* IR - Lightweight JIT Compilation Framework
* (GDB interface)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*
* Based on Mike Pall's implementation of GDB interface for LuaJIT.
*/
#include <stddef.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#ifdef __FreeBSD__
# include <sys/types.h>
# include <sys/sysctl.h>
# include <sys/user.h>
# include <libutil.h>
#endif
#include "ir.h"
#include "ir_private.h"
#include "ir_elf.h"
/* DWARF definitions. */
#define DW_CIE_VERSION 1
/* CFA (Canonical frame address) */
enum {
DW_CFA_nop = 0x0,
DW_CFA_offset_extended = 0x5,
DW_CFA_def_cfa = 0xc,
DW_CFA_def_cfa_offset = 0xe,
DW_CFA_offset_extended_sf = 0x11,
DW_CFA_advance_loc = 0x40,
DW_CFA_offset = 0x80
};
enum {
DW_EH_PE_udata4 = 0x03,
DW_EH_PE_textrel = 0x20
};
enum {
DW_TAG_compile_unit = 0x11
};
enum {
DW_children_no = 0,
DW_children_yes = 1
};
enum {
DW_AT_name = 0x03,
DW_AT_stmt_list = 0x10,
DW_AT_low_pc = 0x11,
DW_AT_high_pc = 0x12
};
enum {
DW_FORM_addr = 0x01,
DW_FORM_data4 = 0x06,
DW_FORM_string = 0x08
};
enum {
DW_LNS_extended_op = 0,
DW_LNS_copy = 1,
DW_LNS_advance_pc = 2,
DW_LNS_advance_line = 3
};
enum {
DW_LNE_end_sequence = 1,
DW_LNE_set_address = 2
};
enum {
#if defined(IR_TARGET_X86)
DW_REG_AX, DW_REG_CX, DW_REG_DX, DW_REG_BX,
DW_REG_SP, DW_REG_BP, DW_REG_SI, DW_REG_DI,
DW_REG_RA,
#elif defined(IR_TARGET_X64)
/* Yes, the order is strange, but correct. */
DW_REG_AX, DW_REG_DX, DW_REG_CX, DW_REG_BX,
DW_REG_SI, DW_REG_DI, DW_REG_BP, DW_REG_SP,
DW_REG_8, DW_REG_9, DW_REG_10, DW_REG_11,
DW_REG_12, DW_REG_13, DW_REG_14, DW_REG_15,
DW_REG_RA,
#elif defined(IR_TARGET_AARCH64)
DW_REG_SP = 31,
DW_REG_RA = 30,
DW_REG_X29 = 29,
#else
#error "Unsupported target architecture"
#endif
};
enum {
GDBJIT_SECT_NULL,
GDBJIT_SECT_text,
GDBJIT_SECT_eh_frame,
GDBJIT_SECT_shstrtab,
GDBJIT_SECT_strtab,
GDBJIT_SECT_symtab,
GDBJIT_SECT_debug_info,
GDBJIT_SECT_debug_abbrev,
GDBJIT_SECT_debug_line,
GDBJIT_SECT__MAX
};
enum {
GDBJIT_SYM_UNDEF,
GDBJIT_SYM_FILE,
GDBJIT_SYM_FUNC,
GDBJIT_SYM__MAX
};
typedef struct _ir_gdbjit_obj {
ir_elf_header hdr;
ir_elf_sectheader sect[GDBJIT_SECT__MAX];
ir_elf_symbol sym[GDBJIT_SYM__MAX];
uint8_t space[4096];
} ir_gdbjit_obj;
static const ir_elf_header ir_elfhdr_template = {
.emagic = { 0x7f, 'E', 'L', 'F' },
#ifdef ELF64
.eclass = 2,
#else
.eclass = 1,
#endif
#ifdef WORDS_BIGENDIAN
.eendian = 2,
#else
.eendian = 1,
#endif
.eversion = 1,
#if defined(Linux)
.eosabi = 0, /* TODO: Nope, it's not 3. ??? */
#elif defined(__FreeBSD__)
.eosabi = 9,
#elif defined(__OpenBSD__)
.eosabi = 12,
#elif defined(__NetBSD__)
.eosabi = 2,
#elif defined(__DragonFly__)
.eosabi = 0,
#elif (defined(__sun__) && defined(__svr4__))
.eosabi = 6,
#else
.eosabi = 0,
#endif
.eabiversion = 0,
.epad = { 0, 0, 0, 0, 0, 0, 0 },
.type = 1,
#if defined(IR_TARGET_X86)
.machine = 3,
#elif defined(IR_TARGET_X64)
.machine = 62,
#elif defined(IR_TARGET_AARCH64)
.machine = 183,
#else
# error "Unsupported target architecture"
#endif
.version = 1,
.entry = 0,
.phofs = 0,
.shofs = offsetof(ir_gdbjit_obj, sect),
.flags = 0,
.ehsize = sizeof(ir_elf_header),
.phentsize = 0,
.phnum = 0,
.shentsize = sizeof(ir_elf_sectheader),
.shnum = GDBJIT_SECT__MAX,
.shstridx = GDBJIT_SECT_shstrtab
};
/* Context for generating the ELF object for the GDB JIT API. */
typedef struct _ir_gdbjit_ctx {
uint8_t *p; /* Pointer to next address in obj.space. */
uint8_t *startp; /* Pointer to start address in obj.space. */
uintptr_t mcaddr; /* Machine code address. */
uint32_t szmcode; /* Size of machine code. */
int32_t lineno; /* Starting line number. */
const char *name; /* JIT function name */
const char *filename; /* Starting file name. */
size_t objsize; /* Final size of ELF object. */
ir_gdbjit_obj obj; /* In-memory ELF object. */
} ir_gdbjit_ctx;
/* Add a zero-terminated string */
static uint32_t ir_gdbjit_strz(ir_gdbjit_ctx *ctx, const char *str)
{
uint8_t *p = ctx->p;
uint32_t ofs = (uint32_t)(p - ctx->startp);
do {
*p++ = (uint8_t)*str;
} while (*str++);
ctx->p = p;
return ofs;
}
/* Add a ULEB128 value */
static void ir_gdbjit_uleb128(ir_gdbjit_ctx *ctx, uint32_t v)
{
uint8_t *p = ctx->p;
for (; v >= 0x80; v >>= 7)
*p++ = (uint8_t)((v & 0x7f) | 0x80);
*p++ = (uint8_t)v;
ctx->p = p;
}
/* Add a SLEB128 value */
static void ir_gdbjit_sleb128(ir_gdbjit_ctx *ctx, int32_t v)
{
uint8_t *p = ctx->p;
for (; (uint32_t)(v+0x40) >= 0x80; v >>= 7)
*p++ = (uint8_t)((v & 0x7f) | 0x80);
*p++ = (uint8_t)(v & 0x7f);
ctx->p = p;
}
static void ir_gdbjit_secthdr(ir_gdbjit_ctx *ctx)
{
ir_elf_sectheader *sect;
*ctx->p++ = '\0';
#define SECTDEF(id, tp, al) \
sect = &ctx->obj.sect[GDBJIT_SECT_##id]; \
sect->name = ir_gdbjit_strz(ctx, "." #id); \
sect->type = ELFSECT_TYPE_##tp; \
sect->align = (al)
SECTDEF(text, NOBITS, 16);
sect->flags = ELFSECT_FLAGS_ALLOC|ELFSECT_FLAGS_EXEC;
sect->addr = ctx->mcaddr;
sect->ofs = 0;
sect->size = ctx->szmcode;
SECTDEF(eh_frame, PROGBITS, sizeof(uintptr_t));
sect->flags = ELFSECT_FLAGS_ALLOC;
SECTDEF(shstrtab, STRTAB, 1);
SECTDEF(strtab, STRTAB, 1);
SECTDEF(symtab, SYMTAB, sizeof(uintptr_t));
sect->ofs = offsetof(ir_gdbjit_obj, sym);
sect->size = sizeof(ctx->obj.sym);
sect->link = GDBJIT_SECT_strtab;
sect->entsize = sizeof(ir_elf_symbol);
sect->info = GDBJIT_SYM_FUNC;
SECTDEF(debug_info, PROGBITS, 1);
SECTDEF(debug_abbrev, PROGBITS, 1);
SECTDEF(debug_line, PROGBITS, 1);
#undef SECTDEF
}
static void ir_gdbjit_symtab(ir_gdbjit_ctx *ctx)
{
ir_elf_symbol *sym;
*ctx->p++ = '\0';
sym = &ctx->obj.sym[GDBJIT_SYM_FILE];
sym->name = ir_gdbjit_strz(ctx, "JIT code");
sym->sectidx = ELFSECT_IDX_ABS;
sym->info = ELFSYM_INFO(ELFSYM_BIND_LOCAL, ELFSYM_TYPE_FILE);
sym = &ctx->obj.sym[GDBJIT_SYM_FUNC];
sym->name = ir_gdbjit_strz(ctx, ctx->name);
sym->sectidx = GDBJIT_SECT_text;
sym->value = 0;
sym->size = ctx->szmcode;
sym->info = ELFSYM_INFO(ELFSYM_BIND_GLOBAL, ELFSYM_TYPE_FUNC);
}
typedef IR_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
typedef IR_SET_ALIGNED(1, uint32_t unaligned_uint32_t);
typedef IR_SET_ALIGNED(1, uintptr_t unaligned_uintptr_t);
#define SECTALIGN(p, a) \
((p) = (uint8_t *)(((uintptr_t)(p) + ((a)-1)) & ~(uintptr_t)((a)-1)))
/* Shortcuts to generate DWARF structures. */
#define DB(x) (*p++ = (x))
#define DI8(x) (*(int8_t *)p = (x), p++)
#define DU16(x) (*(unaligned_uint16_t *)p = (x), p += 2)
#define DU32(x) (*(unaligned_uint32_t *)p = (x), p += 4)
#define DADDR(x) (*(unaligned_uintptr_t *)p = (x), p += sizeof(uintptr_t))
#define DUV(x) (ctx->p = p, ir_gdbjit_uleb128(ctx, (x)), p = ctx->p)
#define DSV(x) (ctx->p = p, ir_gdbjit_sleb128(ctx, (x)), p = ctx->p)
#define DSTR(str) (ctx->p = p, ir_gdbjit_strz(ctx, (str)), p = ctx->p)
#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
#define DSECT(name, stmt) \
{ unaligned_uint32_t *szp_##name = (uint32_t *)p; p += 4; stmt \
*szp_##name = (uint32_t)((p-(uint8_t *)szp_##name)-4); }
static void ir_gdbjit_ehframe(ir_gdbjit_ctx *ctx, uint32_t sp_offset, uint32_t sp_adjustment)
{
uint8_t *p = ctx->p;
uint8_t *framep = p;
/* DWARF EH CIE (Common Information Entry) */
DSECT(CIE,
DU32(0); /* CIE ID. */
DB(DW_CIE_VERSION); /* Version */
DSTR("zR"); /* Augmentation String. */
DUV(1); /* Code alignment factor. */
DSV(-(int32_t)sizeof(uintptr_t)); /* Data alignment factor. */
DB(DW_REG_RA); /* Return address register. */
DB(1); DB(DW_EH_PE_textrel|DW_EH_PE_udata4); /* Augmentation data. */
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(sizeof(uintptr_t));
DB(DW_CFA_offset|DW_REG_RA); DUV(1);
#elif defined(IR_TARGET_AARCH64)
DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(0);
#endif
DALIGNNOP(sizeof(uintptr_t));
)
/* DWARF EH FDE (Frame Description Entry). */
DSECT(FDE,
DU32((uint32_t)(p-framep)); /* Offset to CIE Pointer. */
DU32(0); /* Machine code offset relative to .text. */
DU32(ctx->szmcode); /* Machine code length. */
DB(0); /* Augmentation data. */
DB(DW_CFA_def_cfa_offset); DUV(sp_offset);
#if defined(IR_TARGET_AARCH64)
if (sp_offset) {
if (sp_adjustment && sp_adjustment < sp_offset) {
DB(DW_CFA_offset|DW_REG_X29); DUV(sp_adjustment / sizeof(uintptr_t));
DB(DW_CFA_offset|DW_REG_RA); DUV((sp_adjustment / sizeof(uintptr_t)) - 1);
} else {
DB(DW_CFA_offset|DW_REG_X29); DUV(sp_offset / sizeof(uintptr_t));
DB(DW_CFA_offset|DW_REG_RA); DUV((sp_offset / sizeof(uintptr_t)) - 1);
}
}
#endif
if (sp_adjustment && sp_adjustment > sp_offset) {
DB(DW_CFA_advance_loc|1); DB(DW_CFA_def_cfa_offset); DUV(sp_adjustment);
#if defined(IR_TARGET_AARCH64)
if (!sp_offset) {
DB(DW_CFA_offset|DW_REG_X29); DUV(sp_adjustment / sizeof(uintptr_t));
DB(DW_CFA_offset|DW_REG_RA); DUV((sp_adjustment / sizeof(uintptr_t)) - 1);
}
#endif
}
DALIGNNOP(sizeof(uintptr_t));
)
ctx->p = p;
}
static void ir_gdbjit_debuginfo(ir_gdbjit_ctx *ctx)
{
uint8_t *p = ctx->p;
DSECT(info,
DU16(2); /* DWARF version. */
DU32(0); /* Abbrev offset. */
DB(sizeof(uintptr_t)); /* Pointer size. */
DUV(1); /* Abbrev #1: DW_TAG_compile_unit. */
DSTR(ctx->filename); /* DW_AT_name. */
DADDR(ctx->mcaddr); /* DW_AT_low_pc. */
DADDR(ctx->mcaddr + ctx->szmcode); /* DW_AT_high_pc. */
DU32(0); /* DW_AT_stmt_list. */
);
ctx->p = p;
}
static void ir_gdbjit_debugabbrev(ir_gdbjit_ctx *ctx)
{
uint8_t *p = ctx->p;
/* Abbrev #1: DW_TAG_compile_unit. */
DUV(1);
DUV(DW_TAG_compile_unit);
DB(DW_children_no);
DUV(DW_AT_name);
DUV(DW_FORM_string);
DUV(DW_AT_low_pc);
DUV(DW_FORM_addr);
DUV(DW_AT_high_pc);
DUV(DW_FORM_addr);
DUV(DW_AT_stmt_list);
DUV(DW_FORM_data4);
DB(0);
DB(0);
ctx->p = p;
}
#define DLNE(op, s) (DB(DW_LNS_extended_op), DUV(1+(s)), DB((op)))
static void ir_gdbjit_debugline(ir_gdbjit_ctx *ctx)
{
uint8_t *p = ctx->p;
DSECT(line,
DU16(2); /* DWARF version. */
DSECT(header,
DB(1); /* Minimum instruction length. */
DB(1); /* is_stmt. */
DI8(0); /* Line base for special opcodes. */
DB(2); /* Line range for special opcodes. */
DB(3+1); /* Opcode base at DW_LNS_advance_line+1. */
DB(0); DB(1); DB(1); /* Standard opcode lengths. */
/* Directory table. */
DB(0);
/* File name table. */
DSTR(ctx->filename); DUV(0); DUV(0); DUV(0);
DB(0);
);
DLNE(DW_LNE_set_address, sizeof(uintptr_t));
DADDR(ctx->mcaddr);
if (ctx->lineno) (DB(DW_LNS_advance_line), DSV(ctx->lineno-1));
DB(DW_LNS_copy);
DB(DW_LNS_advance_pc); DUV(ctx->szmcode);
DLNE(DW_LNE_end_sequence, 0);
);
ctx->p = p;
}
#undef DLNE
/* Undef shortcuts. */
#undef DB
#undef DI8
#undef DU16
#undef DU32
#undef DADDR
#undef DUV
#undef DSV
#undef DSTR
#undef DALIGNNOP
#undef DSECT
typedef void (*ir_gdbjit_initf) (ir_gdbjit_ctx *ctx);
static void ir_gdbjit_initsect(ir_gdbjit_ctx *ctx, int sect)
{
ctx->startp = ctx->p;
ctx->obj.sect[sect].ofs = (uintptr_t)((char *)ctx->p - (char *)&ctx->obj);
}
static void ir_gdbjit_initsect_done(ir_gdbjit_ctx *ctx, int sect)
{
ctx->obj.sect[sect].size = (uintptr_t)(ctx->p - ctx->startp);
}
static void ir_gdbjit_buildobj(ir_gdbjit_ctx *ctx, uint32_t sp_offset, uint32_t sp_adjustment)
{
ir_gdbjit_obj *obj = &ctx->obj;
/* Fill in ELF header and clear structures. */
memcpy(&obj->hdr, &ir_elfhdr_template, sizeof(ir_elf_header));
memset(&obj->sect, 0, sizeof(ir_elf_sectheader) * GDBJIT_SECT__MAX);
memset(&obj->sym, 0, sizeof(ir_elf_symbol) * GDBJIT_SYM__MAX);
/* Initialize sections. */
ctx->p = obj->space;
ir_gdbjit_initsect(ctx, GDBJIT_SECT_shstrtab); ir_gdbjit_secthdr(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_shstrtab);
ir_gdbjit_initsect(ctx, GDBJIT_SECT_strtab); ir_gdbjit_symtab(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_strtab);
ir_gdbjit_initsect(ctx, GDBJIT_SECT_debug_info); ir_gdbjit_debuginfo(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_debug_info);
ir_gdbjit_initsect(ctx, GDBJIT_SECT_debug_abbrev); ir_gdbjit_debugabbrev(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_debug_abbrev);
ir_gdbjit_initsect(ctx, GDBJIT_SECT_debug_line); ir_gdbjit_debugline(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_debug_line);
SECTALIGN(ctx->p, sizeof(uintptr_t));
ir_gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame); ir_gdbjit_ehframe(ctx, sp_offset, sp_adjustment); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_eh_frame);
ctx->objsize = (size_t)((char *)ctx->p - (char *)obj);
IR_ASSERT(ctx->objsize < sizeof(ir_gdbjit_obj));
}
enum {
IR_GDBJIT_NOACTION,
IR_GDBJIT_REGISTER,
IR_GDBJIT_UNREGISTER
};
typedef struct _ir_gdbjit_code_entry {
struct _ir_gdbjit_code_entry *next_entry;
struct _ir_gdbjit_code_entry *prev_entry;
const char *symfile_addr;
uint64_t symfile_size;
} ir_gdbjit_code_entry;
typedef struct _ir_gdbjit_descriptor {
uint32_t version;
uint32_t action_flag;
struct _ir_gdbjit_code_entry *relevant_entry;
struct _ir_gdbjit_code_entry *first_entry;
} ir_gdbjit_descriptor;
ir_gdbjit_descriptor __jit_debug_descriptor = {
1, IR_GDBJIT_NOACTION, NULL, NULL
};
#ifdef IR_EXTERNAL_GDB_ENTRY
void __jit_debug_register_code(void);
#else
IR_NEVER_INLINE void __jit_debug_register_code(void)
{
__asm__ __volatile__("");
}
#endif
static bool ir_gdb_register_code(const void *object, size_t size)
{
ir_gdbjit_code_entry *entry;
entry = malloc(sizeof(ir_gdbjit_code_entry) + size);
if (entry == NULL) {
return 0;
}
entry->symfile_addr = ((char*)entry) + sizeof(ir_gdbjit_code_entry);
entry->symfile_size = size;
memcpy((char *)entry->symfile_addr, object, size);
entry->prev_entry = NULL;
entry->next_entry = __jit_debug_descriptor.first_entry;
if (entry->next_entry) {
entry->next_entry->prev_entry = entry;
}
__jit_debug_descriptor.first_entry = entry;
/* Notify GDB */
__jit_debug_descriptor.relevant_entry = entry;
__jit_debug_descriptor.action_flag = IR_GDBJIT_REGISTER;
__jit_debug_register_code();
return 1;
}
void ir_gdb_unregister_all(void)
{
ir_gdbjit_code_entry *entry;
__jit_debug_descriptor.action_flag = IR_GDBJIT_UNREGISTER;
while ((entry = __jit_debug_descriptor.first_entry)) {
__jit_debug_descriptor.first_entry = entry->next_entry;
if (entry->next_entry) {
entry->next_entry->prev_entry = NULL;
}
/* Notify GDB */
__jit_debug_descriptor.relevant_entry = entry;
__jit_debug_register_code();
free(entry);
}
}
bool ir_gdb_present(void)
{
bool ret = 0;
#if defined(__linux__) /* netbsd while having this procfs part, does not hold the tracer pid */
int fd = open("/proc/self/status", O_RDONLY);
if (fd > 0) {
char buf[1024];
ssize_t n = read(fd, buf, sizeof(buf) - 1);
char *s;
pid_t pid;
if (n > 0) {
buf[n] = 0;
s = strstr(buf, "TracerPid:");
if (s) {
s += sizeof("TracerPid:") - 1;
while (*s == ' ' || *s == '\t') {
s++;
}
pid = atoi(s);
if (pid) {
char out[1024];
sprintf(buf, "/proc/%d/exe", (int)pid);
if (readlink(buf, out, sizeof(out) - 1) > 0) {
if (strstr(out, "gdb")) {
ret = 1;
}
}
}
}
}
close(fd);
}
#elif defined(__FreeBSD__)
struct kinfo_proc *proc = kinfo_getproc(getpid());
if (proc) {
if ((proc->ki_flag & P_TRACED) != 0) {
struct kinfo_proc *dbg = kinfo_getproc(proc->ki_tracer);
ret = (dbg && strstr(dbg->ki_comm, "gdb"));
}
}
#endif
return ret;
}
int ir_gdb_register(const char *name,
const void *start,
size_t size,
uint32_t sp_offset,
uint32_t sp_adjustment)
{
ir_gdbjit_ctx ctx;
ctx.mcaddr = (uintptr_t)start;
ctx.szmcode = (uint32_t)size;
ctx.name = name;
ctx.filename = "unknown";
ctx.lineno = 0;
ir_gdbjit_buildobj(&ctx, sp_offset, sp_adjustment);
return ir_gdb_register_code(&ctx.obj, ctx.objsize);
}
void ir_gdb_init(void)
{
/* This might enable registration of all JIT-ed code, but unfortunately,
* in case of many functions, this takes enormous time. */
if (ir_gdb_present()) {
#if 0
_debug |= IR_DEBUG_GDB;
#endif
}
}

View File

@@ -0,0 +1,270 @@
/*
* IR - Lightweight JIT Compilation Framework
* (Native code patcher)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*
* Based on Mike Pall's implementation for LuaJIT.
*/
#include "ir.h"
#include "ir_private.h"
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
static uint32_t _asm_x86_inslen(const uint8_t* p)
{
static const uint8_t map_op1[256] = {
0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20,
0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,
0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
#ifdef IR_TARGET_X64
0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14,
#else
0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
#endif
0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51,
0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51,
#ifdef IR_TARGET_X64
0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
#else
0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
#endif
0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,
0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51,
0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51,
0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92
};
static const uint8_t map_op2[256] = {
0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94,
0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51,
0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,
0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93,
0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93,
0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52
};
uint32_t result = 0;
uint32_t prefixes = 0;
uint32_t x = map_op1[*p];
for (;;) {
switch (x >> 4) {
case 0:
return result + x + (prefixes & 4);
case 1:
prefixes |= x;
x = map_op1[*++p];
result++;
break;
case 2:
x = map_op2[*++p];
break;
case 3:
p++;
goto mrm;
case 4:
result -= (prefixes & 2);
/* fallthrough */
case 5:
return result + (x & 15);
case 6: /* Group 3. */
if (p[1] & 0x38) {
x = 2;
} else if ((prefixes & 2) && (x == 0x66)) {
x = 4;
}
goto mrm;
case 7: /* VEX c4/c5. */
#ifdef IR_TARGET_X86
if (p[1] < 0xc0) {
x = 2;
goto mrm;
}
#endif
if (x == 0x70) {
x = *++p & 0x1f;
result++;
if (x >= 2) {
p += 2;
result += 2;
goto mrm;
}
}
p++;
result++;
x = map_op2[*++p];
break;
case 8:
result -= (prefixes & 2);
/* fallthrough */
case 9:
mrm:
/* ModR/M and possibly SIB. */
result += (x & 15);
x = *++p;
switch (x >> 6) {
case 0:
if ((x & 7) == 5) {
return result + 4;
}
break;
case 1:
result++;
break;
case 2:
result += 4;
break;
case 3:
return result;
}
if ((x & 7) == 4) {
result++;
if (x < 0x40 && (p[1] & 7) == 5) {
result += 4;
}
}
return result;
}
}
}
typedef IR_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
typedef IR_SET_ALIGNED(1, int32_t unaligned_int32_t);
static int ir_patch_code(const void *code, size_t size, const void *from_addr, const void *to_addr)
{
int ret = 0;
uint8_t *p, *end;
p = (uint8_t*)code;
end = p + size - 4;
while (p < end) {
if ((*(unaligned_uint16_t*)p & 0xf0ff) == 0x800f && p + *(unaligned_int32_t*)(p+2) == (uint8_t*)from_addr - 6) {
*(unaligned_int32_t*)(p+2) = ((uint8_t*)to_addr - (p + 6));
ret++;
} else if (*p == 0xe9 && p + *(unaligned_int32_t*)(p+1) == (uint8_t*)from_addr - 5) {
*(unaligned_int32_t*)(p+1) = ((uint8_t*)to_addr - (p + 5));
ret++;
}
p += _asm_x86_inslen(p);
}
if (ret) {
ir_mem_flush((void*)code, size);
}
return ret;
}
#elif defined(IR_TARGET_AARCH64)
static int ir_patch_code(const void *code, size_t size, const void *from_addr, const void *to_addr)
{
int ret = 0;
uint8_t *p, *end;
const void *veneer = NULL;
ptrdiff_t delta;
end = (uint8_t*)code;
p = end + size;
while (p > end) {
uint32_t *ins_ptr;
uint32_t ins;
p -= 4;
ins_ptr = (uint32_t*)p;
ins = *ins_ptr;
if ((ins & 0xfc000000u) == 0x14000000u) {
// B (imm26:0..25)
delta = (uint32_t*)from_addr - ins_ptr;
if (((ins ^ (uint32_t)delta) & 0x01ffffffu) == 0) {
delta = (uint32_t*)to_addr - ins_ptr;
if (((delta + 0x02000000) >> 26) != 0) {
abort(); // branch target out of range
}
*ins_ptr = (ins & 0xfc000000u) | ((uint32_t)delta & 0x03ffffffu);
ret++;
if (!veneer) {
veneer = p;
}
}
} else if ((ins & 0xff000000u) == 0x54000000u ||
(ins & 0x7e000000u) == 0x34000000u) {
// B.cond, CBZ, CBNZ (imm19:5..23)
delta = (uint32_t*)from_addr - ins_ptr;
if (((ins ^ ((uint32_t)delta << 5)) & 0x00ffffe0u) == 0) {
delta = (uint32_t*)to_addr - ins_ptr;
if (((delta + 0x40000) >> 19) != 0) {
if (veneer) {
delta = (uint32_t*)veneer - ins_ptr;
if (((delta + 0x40000) >> 19) != 0) {
abort(); // branch target out of range
}
} else {
abort(); // branch target out of range
}
}
*ins_ptr = (ins & 0xff00001fu) | (((uint32_t)delta & 0x7ffffu) << 5);
ret++;
}
} else if ((ins & 0x7e000000u) == 0x36000000u) {
// TBZ, TBNZ (imm14:5..18)
delta = (uint32_t*)from_addr - ins_ptr;
if (((ins ^ ((uint32_t)delta << 5)) & 0x0007ffe0u) == 0) {
delta = (uint32_t*)to_addr - ins_ptr;
if (((delta + 0x2000) >> 14) != 0) {
if (veneer) {
delta = (uint32_t*)veneer - ins_ptr;
if (((delta + 0x2000) >> 14) != 0) {
abort(); // branch target out of range
}
} else {
abort(); // branch target out of range
}
}
*ins_ptr = (ins & 0xfff8001fu) | (((uint32_t)delta & 0x3fffu) << 5);
ret++;
}
}
}
if (ret) {
ir_mem_flush((void*)code, size);
}
return ret;
}
#endif
int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void *from_addr, const void *to_addr)
{
int ret = 0;
if (jmp_table_size) {
const void **jmp_slot = (const void **)((char*)code + IR_ALIGNED_SIZE(size, sizeof(void*)));
do {
if (*jmp_slot == from_addr) {
*jmp_slot = to_addr;
ret++;
}
jmp_slot++;
} while (--jmp_table_size);
}
ret += ir_patch_code(code, size, from_addr, to_addr);
return ret;
}

View File

@@ -0,0 +1,266 @@
/*
* IR - Lightweight JIT Compilation Framework
* (Linux perf interface)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*
* 1) Profile using perf-<pid>.map
* perf record ./prog
* perf report
*
* 2) Profile using jit-<pid>.dump
* perf record -k 1 ./prog
* perf inject -j -i perf.data -o perf.data.jitted
* perf report -i perf.data.jitted
*/
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#if defined(__linux__)
#include <sys/syscall.h>
#elif defined(__darwin__)
# include <pthread.h>
#elif defined(__FreeBSD__)
# include <sys/thr.h>
# include <sys/sysctl.h>
#elif defined(__NetBSD__)
# include <lwp.h>
#elif defined(__DragonFly__)
# include <sys/lwp.h>
# include <sys/sysctl.h>
#elif defined(__sun)
// avoiding thread.h inclusion as it conflicts with vtunes types.
extern unsigned int thr_self(void);
#elif defined(__HAIKU__)
#include <FindDirectory.h>
#endif
#include "ir.h"
#include "ir_elf.h"
#define IR_PERF_JITDUMP_HEADER_MAGIC 0x4A695444
#define IR_PERF_JITDUMP_HEADER_VERSION 1
#define IR_PERF_JITDUMP_RECORD_LOAD 0
#define IR_PERF_JITDUMP_RECORD_MOVE 1
#define IR_PERF_JITDUMP_RECORD_DEBUG_INFO 2
#define IR_PERF_JITDUMP_RECORD_CLOSE 3
#define IR_PERF_JITDUMP_UNWINDING_UNFO 4
#define ALIGN8(size) (((size) + 7) & ~7)
#define PADDING8(size) (ALIGN8(size) - (size))
typedef struct ir_perf_jitdump_header {
uint32_t magic;
uint32_t version;
uint32_t size;
uint32_t elf_mach_target;
uint32_t reserved;
uint32_t process_id;
uint64_t time_stamp;
uint64_t flags;
} ir_perf_jitdump_header;
typedef struct _ir_perf_jitdump_record {
uint32_t event;
uint32_t size;
uint64_t time_stamp;
} ir_perf_jitdump_record;
typedef struct _ir_perf_jitdump_load_record {
ir_perf_jitdump_record hdr;
uint32_t process_id;
uint32_t thread_id;
uint64_t vma;
uint64_t code_address;
uint64_t code_size;
uint64_t code_id;
} ir_perf_jitdump_load_record;
static int jitdump_fd = -1;
static void *jitdump_mem = MAP_FAILED;
static uint64_t ir_perf_timestamp(void)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
return 0;
}
return ((uint64_t)ts.tv_sec * 1000000000) + ts.tv_nsec;
}
int ir_perf_jitdump_open(void)
{
char filename[64];
int fd, ret;
ir_elf_header elf_hdr;
ir_perf_jitdump_header jit_hdr;
sprintf(filename, "/tmp/jit-%d.dump", getpid());
if (!ir_perf_timestamp()) {
return 0;
}
#if defined(__linux__)
fd = open("/proc/self/exe", O_RDONLY);
#elif defined(__NetBSD__)
fd = open("/proc/curproc/exe", O_RDONLY);
#elif defined(__FreeBSD__) || defined(__DragonFly__)
char path[PATH_MAX];
size_t pathlen = sizeof(path);
int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
if (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) {
return 0;
}
fd = open(path, O_RDONLY);
#elif defined(__sun)
fd = open("/proc/self/path/a.out", O_RDONLY);
#elif defined(__HAIKU__)
char path[PATH_MAX];
if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH,
NULL, path, sizeof(path)) != B_OK) {
return 0;
}
fd = open(path, O_RDONLY);
#else
fd = -1;
#endif
if (fd < 0) {
return 0;
}
ret = read(fd, &elf_hdr, sizeof(elf_hdr));
close(fd);
if (ret != sizeof(elf_hdr) ||
elf_hdr.emagic[0] != 0x7f ||
elf_hdr.emagic[1] != 'E' ||
elf_hdr.emagic[2] != 'L' ||
elf_hdr.emagic[3] != 'F') {
return 0;
}
jitdump_fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666);
if (jitdump_fd < 0) {
return 0;
}
jitdump_mem = mmap(NULL,
sysconf(_SC_PAGESIZE),
PROT_READ|PROT_EXEC,
MAP_PRIVATE, jitdump_fd, 0);
if (jitdump_mem == MAP_FAILED) {
close(jitdump_fd);
jitdump_fd = -1;
return 0;
}
memset(&jit_hdr, 0, sizeof(jit_hdr));
jit_hdr.magic = IR_PERF_JITDUMP_HEADER_MAGIC;
jit_hdr.version = IR_PERF_JITDUMP_HEADER_VERSION;
jit_hdr.size = sizeof(jit_hdr);
jit_hdr.elf_mach_target = elf_hdr.machine;
jit_hdr.process_id = getpid();
jit_hdr.time_stamp = ir_perf_timestamp();
jit_hdr.flags = 0;
if (write(jitdump_fd, &jit_hdr, sizeof(jit_hdr)) != sizeof(jit_hdr)) {
return 0;
}
return 1;
}
int ir_perf_jitdump_close(void)
{
int ret = 1;
if (jitdump_fd >= 0) {
ir_perf_jitdump_record rec;
rec.event = IR_PERF_JITDUMP_RECORD_CLOSE;
rec.size = sizeof(rec);
rec.time_stamp = ir_perf_timestamp();
if (write(jitdump_fd, &rec, sizeof(rec)) != sizeof(rec)) {
ret = 0;
}
close(jitdump_fd);
if (jitdump_mem != MAP_FAILED) {
munmap(jitdump_mem, sysconf(_SC_PAGESIZE));
}
}
return ret;
}
int ir_perf_jitdump_register(const char *name, const void *start, size_t size)
{
if (jitdump_fd >= 0) {
static uint64_t id = 1;
ir_perf_jitdump_load_record rec;
size_t len = strlen(name);
uint32_t thread_id = 0;
#if defined(__linux__)
thread_id = syscall(SYS_gettid);
#elif defined(__darwin__)
uint64_t thread_id_u64;
pthread_threadid_np(NULL, &thread_id_u64);
thread_id = (uint32_t) thread_id_u64;
#elif defined(__FreeBSD__)
long tid;
thr_self(&tid);
thread_id = (uint32_t)tid;
#elif defined(__OpenBSD__)
thread_id = getthrid();
#elif defined(__NetBSD__)
thread_id = _lwp_self();
#elif defined(__DragonFly__)
thread_id = lwp_gettid();
#elif defined(__sun)
thread_id = thr_self();
#endif
memset(&rec, 0, sizeof(rec));
rec.hdr.event = IR_PERF_JITDUMP_RECORD_LOAD;
rec.hdr.size = sizeof(rec) + len + 1 + size;
rec.hdr.time_stamp = ir_perf_timestamp();
rec.process_id = getpid();
rec.thread_id = thread_id;
rec.vma = (uint64_t)(uintptr_t)start;
rec.code_address = (uint64_t)(uintptr_t)start;
rec.code_size = (uint64_t)size;
rec.code_id = id++;
if (write(jitdump_fd, &rec, sizeof(rec)) != sizeof(rec)
|| write(jitdump_fd, name, len + 1) < 0
|| write(jitdump_fd, start, size) < 0) {
return 0;
}
}
return 1;
}
void ir_perf_map_register(const char *name, const void *start, size_t size)
{
static FILE *fp = NULL;
if (!fp) {
char filename[64];
sprintf(filename, "/tmp/perf-%d.map", getpid());
fp = fopen(filename, "w");
if (!fp) {
return;
}
setlinebuf(fp);
}
fprintf(fp, "%zx %zx %s\n", (size_t)(uintptr_t)start, size, name);
}

View File

@@ -0,0 +1,37 @@
/*
* IR - Lightweight JIT Compilation Framework
* (IR/PHP integration)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#ifndef IR_PHP_H
#define IR_PHP_H
#define IR_PHP_OPS(_)
#define IR_SNAPSHOT_HANDLER_DCL() \
void *zend_jit_snapshot_handler(ir_ctx *ctx, ir_ref snapshot_ref, ir_insn *snapshot, void *addr)
#define IR_SNAPSHOT_HANDLER(ctx, ref, insn, addr) \
zend_jit_snapshot_handler(ctx, ref, insn, addr)
#ifndef IR_PHP_MM
# define IR_PHP_MM 1
#endif
#if IR_PHP_MM
# include "zend.h"
# define ir_mem_malloc emalloc
# define ir_mem_calloc ecalloc
# define ir_mem_realloc erealloc
# define ir_mem_free efree
#endif
#if defined(IR_TARGET_AARCH64)
# define IR_EXTERNAL_GDB_ENTRY
#endif
#endif /* IR_PHP_H */

File diff suppressed because it is too large Load Diff

3870
ext/opcache/jit/ir/ir_ra.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,128 @@
/*
* IR - Lightweight JIT Compilation Framework
* (IR saver)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#include "ir.h"
#include "ir_private.h"
void ir_save(const ir_ctx *ctx, FILE *f)
{
ir_ref i, j, n, ref, *p;
ir_insn *insn;
uint32_t flags;
bool first;
fprintf(f, "{\n");
for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) {
fprintf(f, "\t%s c_%d = ", ir_type_cname[insn->type], i);
if (insn->op == IR_FUNC) {
if (!insn->const_flags) {
fprintf(f, "func(%s)", ir_get_str(ctx, insn->val.i32));
} else {
fprintf(f, "func(%s, %d)", ir_get_str(ctx, insn->val.i32), insn->const_flags);
}
} else if (insn->op == IR_SYM) {
fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.i32));
} else if (insn->op == IR_FUNC_ADDR) {
fprintf(f, "func_addr(");
ir_print_const(ctx, insn, f, true);
if (insn->const_flags) {
fprintf(f, ", %d", insn->const_flags);
}
fprintf(f, ")");
} else {
ir_print_const(ctx, insn, f, true);
}
fprintf(f, ";\n");
}
for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) {
flags = ir_op_flags[insn->op];
if (flags & IR_OP_FLAG_CONTROL) {
if (!(flags & IR_OP_FLAG_MEM) || insn->type == IR_VOID) {
fprintf(f, "\tl_%d = ", i);
} else {
fprintf(f, "\t%s d_%d, l_%d = ", ir_type_cname[insn->type], i, i);
}
} else {
fprintf(f, "\t");
if (flags & IR_OP_FLAG_DATA) {
fprintf(f, "%s d_%d = ", ir_type_cname[insn->type], i);
}
}
fprintf(f, "%s", ir_op_name[insn->op]);
n = ir_operands_count(ctx, insn);
if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) && n != 2) {
fprintf(f, "/%d", n);
} else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL) && n != 2) {
fprintf(f, "/%d", n - 2);
} else if (insn->op == IR_PHI && n != 3) {
fprintf(f, "/%d", n - 1);
} else if (insn->op == IR_SNAPSHOT) {
fprintf(f, "/%d", n - 1);
}
first = 1;
for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
uint32_t opnd_kind = IR_OPND_KIND(flags, j);
ref = *p;
if (ref) {
switch (opnd_kind) {
case IR_OPND_DATA:
if (IR_IS_CONST_REF(ref)) {
fprintf(f, "%sc_%d", first ? "(" : ", ", -ref);
} else {
fprintf(f, "%sd_%d", first ? "(" : ", ", ref);
}
first = 0;
break;
case IR_OPND_CONTROL:
case IR_OPND_CONTROL_DEP:
case IR_OPND_CONTROL_REF:
fprintf(f, "%sl_%d", first ? "(" : ", ", ref);
first = 0;
break;
case IR_OPND_STR:
fprintf(f, "%s\"%s\"", first ? "(" : ", ", ir_get_str(ctx, ref));
first = 0;
break;
case IR_OPND_PROB:
if (ref == 0) {
break;
}
IR_FALLTHROUGH;
case IR_OPND_NUM:
fprintf(f, "%s%d", first ? "(" : ", ", ref);
first = 0;
break;
}
} else if (opnd_kind == IR_OPND_NUM) {
fprintf(f, "%s%d", first ? "(" : ", ", ref);
first = 0;
} else if (IR_IS_REF_OPND_KIND(opnd_kind) && j != n) {
fprintf(f, "%snull", first ? "(" : ", ");
first = 0;
}
}
if (first) {
fprintf(f, ";");
} else {
fprintf(f, ");");
}
if (((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) && ctx->binding) {
ir_ref var = ir_binding_find(ctx, i);
if (var) {
IR_ASSERT(var < 0);
fprintf(f, " # BIND(0x%x);", -var);
}
}
fprintf(f, "\n");
n = ir_insn_inputs_to_len(n);
i += n;
insn += n;
}
fprintf(f, "}\n");
}

View File

@@ -0,0 +1,885 @@
/*
* IR - Lightweight JIT Compilation Framework
* (SCCP - Sparse Conditional Constant Propagation)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*
* The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication
* See: M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches"
* ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991
*/
#include "ir.h"
#include "ir_private.h"
#define IR_TOP IR_UNUSED
#define IR_BOTTOM IR_LAST_OP
#define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0)
#define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].optx == IR_TOP)
#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].optx == IR_BOTTOM)
#define IR_IS_FEASIBLE(ref) (ref >= 0 && _values[ref].optx != IR_TOP)
#define IR_COMBO_COPY_PROPAGATION 1
#if IR_COMBO_COPY_PROPAGATION
IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_insn *_values, ir_ref a)
{
if (a > 0 && _values[a].op == IR_COPY) {
a = _values[a].op1;
IR_ASSERT(a <= 0 || _values[a].op != IR_COPY);
}
return a;
}
#endif
static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
{
ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
#if IR_COMBO_COPY_PROPAGATION
op1 = ir_sccp_identity(_values, op1);
op2 = ir_sccp_identity(_values, op2);
op3 = ir_sccp_identity(_values, op3);
#endif
restart:
op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2;
op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3;
switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
case IR_FOLD_DO_RESTART:
opt = ctx->fold_insn.optx;
op1 = ctx->fold_insn.op1;
op2 = ctx->fold_insn.op2;
op3 = ctx->fold_insn.op3;
goto restart;
case IR_FOLD_DO_EMIT:
IR_MAKE_BOTTOM(res);
return 1;
case IR_FOLD_DO_COPY:
op1 = ctx->fold_insn.op1;
#if IR_COMBO_COPY_PROPAGATION
op1 = ir_sccp_identity(_values, op1);
#endif
insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
if (IR_IS_CONST_OP(insn->op)) {
/* pass */
#if IR_COMBO_COPY_PROPAGATION
} else if (IR_IS_TOP(res)) {
_values[res].optx = IR_OPT(IR_COPY, insn->type);
_values[res].op1 = op1;
return 1;
} else if (_values[res].op == IR_COPY && _values[res].op1 == op1) {
return 0; /* not changed */
#endif
} else {
IR_MAKE_BOTTOM(res);
return 1;
}
break;
case IR_FOLD_DO_CONST:
insn = &ctx->fold_insn;
break;
default:
IR_ASSERT(0);
return 0;
}
if (IR_IS_TOP(res)) {
_values[res].optx = IR_OPT(insn->type, insn->type);
_values[res].val.u64 = insn->val.u64;
return 1;
} else if (_values[res].opt != IR_OPT(insn->type, insn->type) || _values[res].val.u64 != insn->val.u64) {
IR_MAKE_BOTTOM(res);
return 1;
}
return 0; /* not changed */
}
static bool ir_sccp_join_values(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
{
ir_insn *v;
if (!IR_IS_BOTTOM(a) && !IR_IS_TOP(b)) {
b = ir_sccp_identity(_values, b);
v = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
if (IR_IS_TOP(a)) {
#if IR_COMBO_COPY_PROPAGATION
if (v->op == IR_BOTTOM) {
_values[a].optx = IR_OPT(IR_COPY, ctx->ir_base[b].type);
_values[a].op1 = b;
return 1;
}
#endif
_values[a].optx = v->opt;
_values[a].val.u64 = v->val.u64;
return 1;
} else if (_values[a].opt == v->opt && _values[a].val.u64 == v->val.u64) {
/* pass */
#if IR_COMBO_COPY_PROPAGATION
} else if (_values[a].op == IR_COPY && _values[a].op1 == b) {
/* pass */
#endif
} else {
IR_MAKE_BOTTOM(a);
return 1;
}
}
return 0;
}
static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a)
{
ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
return ir_const_is_true(v);
}
static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
{
ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
return v1->val.u64 == v2->val.u64;
}
static void ir_sccp_remove_from_use_list(ir_ctx *ctx, ir_ref from, ir_ref ref)
{
ir_ref j, n, *p, *q, use;
ir_use_list *use_list = &ctx->use_lists[from];
ir_ref skip = 0;
n = use_list->count;
for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
use = *p;
if (use == ref) {
skip++;
} else {
if (p != q) {
*q = use;
}
q++;
}
}
use_list->count -= skip;
#if IR_COMBO_COPY_PROPAGATION
if (skip) {
do {
*q = IR_UNUSED;
q++;
} while (--skip);
}
#endif
}
#if IR_COMBO_COPY_PROPAGATION
static int ir_sccp_add_to_use_list(ir_ctx *ctx, ir_ref to, ir_ref ref)
{
ir_use_list *use_list = &ctx->use_lists[to];
ir_ref n = use_list->refs + use_list->count;
if (n < ctx->use_edges_count && ctx->use_edges[n] == IR_UNUSED) {
ctx->use_edges[n] = ref;
use_list->count++;
return 0;
} else {
/* Reallocate the whole edges buffer (this is inefficient) */
ctx->use_edges = ir_mem_realloc(ctx->use_edges, (ctx->use_edges_count + use_list->count + 1) * sizeof(ir_ref));
memcpy(ctx->use_edges + ctx->use_edges_count, ctx->use_edges + use_list->refs, use_list->count * sizeof(ir_ref));
use_list->refs = ctx->use_edges_count;
ctx->use_edges[use_list->refs + use_list->count] = ref;
use_list->count++;
ctx->use_edges_count += use_list->count;
return 1;
}
}
#endif
static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
{
ir_ref j, n, *p;
ir_use_list *use_list = &ctx->use_lists[ref];
ir_insn *insn;
use_list->refs = 0;
use_list->count = 0;
insn = &ctx->ir_base[ref];
n = insn->inputs_count;
insn->opt = IR_NOP; /* keep "inputs_count" */
for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
*p = IR_UNUSED;
}
}
static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
{
ir_ref j, n, *p;
ir_use_list *use_list = &ctx->use_lists[ref];
ir_insn *insn;
use_list->refs = 0;
use_list->count = 0;
insn = &ctx->ir_base[ref];
n = insn->inputs_count;
insn->opt = IR_NOP; /* keep "inputs_count" */
for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
ir_ref input = *p;
*p = IR_UNUSED;
if (input > 0 && _values[input].op == IR_BOTTOM) {
ir_sccp_remove_from_use_list(ctx, input, ref);
/* schedule DCE */
if ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0)
|| ((ir_op_flags[ctx->ir_base[input].op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)
&& ctx->use_lists[input].count == 1)) {
ir_bitqueue_add(worklist, input);
}
}
}
}
static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
{
ir_ref j, n, *p, use, k, l;
ir_insn *insn;
ir_use_list *use_list;
IR_ASSERT(ref != new_ref);
insn = &ctx->ir_base[ref];
n = insn->inputs_count;
insn->opt = IR_NOP; /* keep "inputs_count" */
for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
ir_ref input = *p;
*p = IR_UNUSED;
if (input > 0) {
ir_sccp_remove_from_use_list(ctx, input, ref);
/* schedule DCE */
if (worklist
&& ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0)
|| ((ir_op_flags[ctx->ir_base[input].op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)
&& ctx->use_lists[input].count == 1))) {
ir_bitqueue_add(worklist, input);
}
}
}
use_list = &ctx->use_lists[ref];
n = use_list->count;
for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
use = *p;
if (IR_IS_FEASIBLE(use)) {
insn = &ctx->ir_base[use];
l = insn->inputs_count;
for (k = 1; k <= l; k++) {
if (ir_insn_op(insn, k) == ref) {
ir_insn_set_op(insn, k, new_ref);
}
}
#if IR_COMBO_COPY_PROPAGATION
if (new_ref > 0 && IR_IS_BOTTOM(use)) {
if (ir_sccp_add_to_use_list(ctx, new_ref, use)) {
/* restore after reallocation */
use_list = &ctx->use_lists[ref];
n = use_list->count;
p = &ctx->use_edges[use_list->refs + j];
}
}
#endif
/* schedule folding */
if (worklist && _values[use].op == IR_BOTTOM) {
ir_bitqueue_add(worklist, use);
}
}
}
use_list->refs = 0;
use_list->count = 0;
}
static void ir_sccp_fold2(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
{
uint32_t opt;
ir_ref op1, op2, op3;
ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
insn = &ctx->ir_base[ref];
opt = insn->opt;
op1 = insn->op1;
op2 = insn->op2;
op3 = insn->op3;
restart:
op1_insn = ctx->ir_base + op1;
op2_insn = ctx->ir_base + op2;
op3_insn = ctx->ir_base + op3;
switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
case IR_FOLD_DO_RESTART:
opt = ctx->fold_insn.optx;
op1 = ctx->fold_insn.op1;
op2 = ctx->fold_insn.op2;
op3 = ctx->fold_insn.op3;
goto restart;
case IR_FOLD_DO_EMIT:
insn = &ctx->ir_base[ref];
if (insn->opt != ctx->fold_insn.opt
|| insn->op1 != ctx->fold_insn.op1
|| insn->op2 != ctx->fold_insn.op2
|| insn->op3 != ctx->fold_insn.op3) {
ir_use_list *use_list;
ir_ref n, j, *p, use;
insn->optx = ctx->fold_insn.opt;
IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
if (insn->op1 != ctx->fold_insn.op1) {
if (!IR_IS_CONST_REF(insn->op1) && insn->op1 != ctx->fold_insn.op2 && insn->op1 != ctx->fold_insn.op3) {
ir_sccp_remove_from_use_list(ctx, insn->op1, ref);
}
if (!IR_IS_CONST_REF(ctx->fold_insn.op1) && ctx->fold_insn.op1 != insn->op2 && ctx->fold_insn.op1 != insn->op3) {
ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op1, ref);
}
}
if (insn->op2 != ctx->fold_insn.op2) {
if (!IR_IS_CONST_REF(insn->op2) && insn->op2 != ctx->fold_insn.op1 && insn->op2 != ctx->fold_insn.op3) {
ir_sccp_remove_from_use_list(ctx, insn->op2, ref);
}
if (!IR_IS_CONST_REF(ctx->fold_insn.op2) && ctx->fold_insn.op2 != insn->op1 && ctx->fold_insn.op2 != insn->op3) {
ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op2, ref);
}
}
if (insn->op3 != ctx->fold_insn.op3) {
if (!IR_IS_CONST_REF(insn->op3) && insn->op3 != ctx->fold_insn.op1 && insn->op3 != ctx->fold_insn.op2) {
ir_sccp_remove_from_use_list(ctx, insn->op3, ref);
}
if (!IR_IS_CONST_REF(ctx->fold_insn.op3) && ctx->fold_insn.op3 != insn->op1 && ctx->fold_insn.op3 != insn->op2) {
ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op3, ref);
}
}
insn->op1 = ctx->fold_insn.op1;
insn->op2 = ctx->fold_insn.op2;
insn->op3 = ctx->fold_insn.op3;
use_list = &ctx->use_lists[ref];
n = use_list->count;
for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
use = *p;
if (_values[use].op == IR_BOTTOM) {
ir_bitqueue_add(worklist, use);
}
}
}
break;
case IR_FOLD_DO_COPY:
op1 = ctx->fold_insn.op1;
ir_sccp_replace_insn(ctx, _values, ref, op1, worklist);
break;
case IR_FOLD_DO_CONST:
op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type);
ir_sccp_replace_insn(ctx, _values, ref, op1, worklist);
break;
default:
IR_ASSERT(0);
break;
}
}
static void ir_sccp_replace_use(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use)
{
ir_use_list *use_list = &ctx->use_lists[ref];
ir_ref i, n, *p;
n = use_list->count;
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
if (*p == use) {
*p = new_use;
break;
}
}
}
static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
{
ir_ref j, n, *p, use, next;
ir_insn *insn, *next_insn;
ir_use_list *use_list = &ctx->use_lists[ref];
insn = &ctx->ir_base[ref];
n = use_list->count;
for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
use = *p;
if (use == dst) {
next = ctx->use_edges[ctx->use_lists[use].refs];
next_insn = &ctx->ir_base[next];
/* remove IF and IF_TRUE/FALSE from double linked control list */
next_insn->op1 = insn->op1;
ir_sccp_replace_use(ctx, insn->op1, ref, next);
/* remove IF and IF_TRUE/FALSE instructions */
ir_sccp_make_nop(ctx, ref);
ir_sccp_make_nop(ctx, use);
break;
}
}
}
static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref unfeasible_inputs)
{
ir_ref i, j, n, k, *p, use;
ir_insn *insn, *use_insn;
ir_use_list *use_list;
ir_bitset life_inputs;
insn = &ctx->ir_base[ref];
IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
n = insn->inputs_count;
if (n - unfeasible_inputs == 1) {
/* remove MERGE completely */
for (j = 1; j <= n; j++) {
ir_ref input = ir_insn_op(insn, j);
if (input && IR_IS_FEASIBLE(input)) {
ir_insn *input_insn = &ctx->ir_base[input];
IR_ASSERT(input_insn->op == IR_END || input_insn->op == IR_IJMP || input_insn->op == IR_UNREACHABLE);
if (input_insn->op == IR_END) {
ir_ref prev, next = IR_UNUSED;
ir_insn *next_insn = NULL;
prev = input_insn->op1;
use_list = &ctx->use_lists[ref];
for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) {
use = *p;
use_insn = &ctx->ir_base[use];
IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed");
if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) {
next = use;
next_insn = use_insn;
break;
}
}
IR_ASSERT(prev && next);
/* remove MERGE and input END from double linked control list */
next_insn->op1 = prev;
ir_sccp_replace_use(ctx, prev, input, next);
/* remove MERGE and input END instructions */
ir_sccp_make_nop(ctx, ref);
ir_sccp_make_nop(ctx, input);
break;
} else {
for (i = 2; i <= n; i++) {
ir_insn_set_op(insn, i, IR_UNUSED);
}
insn->op = IR_BEGIN;
insn->op1 = input;
}
}
}
} else {
n = insn->inputs_count;
i = 1;
life_inputs = ir_bitset_malloc(n + 1);
for (j = 1; j <= n; j++) {
ir_ref input = ir_insn_op(insn, j);
if (input) {
if (i != j) {
ir_insn_set_op(insn, i, input);
}
ir_bitset_incl(life_inputs, j);
i++;
}
}
j = i;
while (j < n) {
ir_insn_set_op(insn, j, IR_UNUSED);
j++;
}
i--;
insn->inputs_count = i;
n++;
use_list = &ctx->use_lists[ref];
if (use_list->count > 1) {
for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) {
use = *p;
use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_PHI) {
i = 2;
for (j = 2; j <= n; j++) {
ir_ref input = ir_insn_op(use_insn, j);
if (ir_bitset_in(life_inputs, j - 1)) {
IR_ASSERT(input);
if (i != j) {
ir_insn_set_op(use_insn, i, input);
}
i++;
} else if (!IR_IS_CONST_REF(input)) {
ir_sccp_remove_from_use_list(ctx, input, use);
}
}
while (i <= n) {
ir_insn_set_op(use_insn, i, IR_UNUSED);
i++;
}
use_insn->inputs_count = insn->inputs_count + 1;
}
}
}
ir_mem_free(life_inputs);
}
}
int ir_sccp(ir_ctx *ctx)
{
ir_ref i, j, n, *p, use;
ir_use_list *use_list;
ir_insn *insn, *use_insn, *value;
uint32_t flags;
ir_bitqueue worklist;
ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
ctx->flags |= IR_OPT_IN_SCCP;
/* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
ir_bitqueue_init(&worklist, ctx->insns_count);
worklist.pos = 0;
ir_bitset_incl(worklist.set, 1);
while ((i = ir_bitqueue_pop(&worklist)) >= 0) {
insn = &ctx->ir_base[i];
flags = ir_op_flags[insn->op];
if (flags & IR_OP_FLAG_DATA) {
if (insn->op == IR_PHI) {
ir_insn *merge_insn = &ctx->ir_base[insn->op1];
bool changed = 0;
if (!IR_IS_FEASIBLE(insn->op1)) {
continue;
}
n = merge_insn->inputs_count + 1;
if (n > 3 && _values[i].optx == IR_TOP) {
for (j = 0; j < (n>>2); j++) {
_values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
}
}
/* for all live merge inputs */
for (j = 1; j < n; j++) {
ir_ref merge_input = ir_insn_op(merge_insn, j);
IR_ASSERT(merge_input > 0);
if (_values[merge_input].optx != IR_TOP) {
ir_ref input = ir_insn_op(insn, j + 1);
if (input > 0 && IR_IS_TOP(input)) {
ir_bitqueue_add(&worklist, input);
} else if (ir_sccp_join_values(ctx, _values, i, input)) {
changed = 1;
}
}
}
if (!changed) {
continue;
}
} else if (ctx->use_lists[i].count == 0) {
/* dead code */
continue;
} else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) {
bool may_benefit = 0;
bool has_top = 0;
IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
n = IR_INPUT_EDGES_COUNT(flags);
for (p = insn->ops + 1; n > 0; p++, n--) {
ir_ref input = *p;
if (input > 0) {
if (_values[input].optx == IR_TOP) {
has_top = 1;
ir_bitqueue_add(&worklist, input);
} else if (_values[input].optx != IR_BOTTOM) {
/* Perform folding only if some of direct inputs
* is going to be replaced by a constant or copy.
* This approach may miss some folding optimizations
* dependent on indirect inputs. e.g. reassociation.
*/
may_benefit = 1;
}
}
}
if (has_top) {
continue;
}
if (!may_benefit) {
IR_MAKE_BOTTOM(i);
} else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) {
/* not changed */
continue;
}
} else {
IR_MAKE_BOTTOM(i);
}
} else if (flags & IR_OP_FLAG_BB_START) {
if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
ir_ref unfeasible_inputs = 0;
n = insn->inputs_count;
if (n > 3 && _values[i].optx == IR_TOP) {
for (j = 0; j < (n>>2); j++) {
_values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
}
}
for (p = insn->ops + 1; n > 0; p++, n--) {
ir_ref input = *p;
IR_ASSERT(input > 0);
if (_values[input].optx == IR_TOP) {
unfeasible_inputs++;
}
}
if (unfeasible_inputs == 0) {
IR_MAKE_BOTTOM(i);
} else if (_values[i].op1 != unfeasible_inputs) {
_values[i].optx = IR_MERGE;
_values[i].op1 = unfeasible_inputs;
} else {
continue;
}
} else {
IR_ASSERT(insn->op == IR_START || IR_IS_FEASIBLE(insn->op1));
IR_MAKE_BOTTOM(i);
}
} else {
IR_ASSERT(insn->op1 > 0);
if (_values[insn->op1].optx == IR_TOP) {
/* control inpt is not feasible */
continue;
}
if (insn->op == IR_IF) {
if (IR_IS_TOP(insn->op2)) {
ir_bitqueue_add(&worklist, insn->op2);
continue;
}
if (!IR_IS_BOTTOM(insn->op2)
#if IR_COMBO_COPY_PROPAGATION
&& (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY)
#endif
) {
bool b = ir_sccp_is_true(ctx, _values, insn->op2);
use_list = &ctx->use_lists[i];
IR_ASSERT(use_list->count == 2);
p = &ctx->use_edges[use_list->refs];
use = *p;
use_insn = &ctx->ir_base[use];
IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE);
if ((use_insn->op == IR_IF_TRUE) != b) {
use = *(p+1);
IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE);
}
if (_values[i].optx == IR_TOP) {
_values[i].optx = IR_IF;
_values[i].op1 = use;
} else if (_values[i].optx != IR_IF || _values[i].op1 != use) {
IR_MAKE_BOTTOM(i);
}
if (!IR_IS_BOTTOM(use)) {
ir_bitqueue_add(&worklist, use);
}
continue;
}
IR_MAKE_BOTTOM(i);
} else if (insn->op == IR_SWITCH) {
if (IR_IS_TOP(insn->op2)) {
ir_bitqueue_add(&worklist, insn->op2);
continue;
}
if (!IR_IS_BOTTOM(insn->op2)) {
ir_ref use_case = IR_UNUSED;
use_list = &ctx->use_lists[i];
n = use_list->count;
for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
use = *p;
IR_ASSERT(use > 0);
use_insn = &ctx->ir_base[use];
if (use_insn->op == IR_CASE_VAL) {
if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) {
use_case = use;
break;
}
} else if (use_insn->op == IR_CASE_DEFAULT) {
use_case = use;
}
}
if (use_case) {
use_insn = &ctx->ir_base[use_case];
if (_values[i].optx == IR_TOP) {
_values[i].optx = IR_IF;
_values[i].op1 = use_case;
} else if (_values[i].optx != IR_IF || _values[i].op1 != use_case) {
IR_MAKE_BOTTOM(i);
}
if (!IR_IS_BOTTOM(use_case)) {
ir_bitqueue_add(&worklist, use_case);
}
}
if (!IR_IS_BOTTOM(i)) {
continue;
}
}
IR_MAKE_BOTTOM(i);
} else if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)
&& ctx->use_lists[i].count == 1) {
/* dead load */
_values[i].optx = IR_LOAD;
} else {
IR_MAKE_BOTTOM(i);
/* control, call, load and store instructions may have unprocessed inputs */
n = IR_INPUT_EDGES_COUNT(flags);
if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) {
for (j = 0; j < (n>>2); j++) {
_values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
}
for (j = 2, p = insn->ops + j; j <= n; j++, p++) {
IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
use = *p;
if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) {
ir_bitqueue_add(&worklist, use);
}
}
} else if (n >= 2) {
IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA);
use = insn->op2;
if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) {
ir_bitqueue_add(&worklist, use);
}
if (n > 2) {
IR_ASSERT(n == 3);
IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA);
use = insn->op3;
if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) {
ir_bitqueue_add(&worklist, use);
}
}
}
}
}
use_list = &ctx->use_lists[i];
n = use_list->count;
for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
use = *p;
if (_values[use].optx != IR_BOTTOM) {
ir_bitqueue_add(&worklist, use);
}
}
}
#ifdef IR_DEBUG
if (ctx->flags & IR_DEBUG_SCCP) {
for (i = 1; i < ctx->insns_count; i++) {
if (IR_IS_CONST_OP(_values[i].op)) {
fprintf(stderr, "%d. CONST(", i);
ir_print_const(ctx, &_values[i], stderr, true);
fprintf(stderr, ")\n");
#if IR_COMBO_COPY_PROPAGATION
} else if (_values[i].op == IR_COPY) {
fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1);
#endif
} else if (IR_IS_TOP(i)) {
fprintf(stderr, "%d. TOP\n", i);
} else if (_values[i].op == IR_IF) {
fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1);
} else if (_values[i].op == IR_MERGE) {
fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1);
} else if (!IR_IS_BOTTOM(i)) {
fprintf(stderr, "%d. %d\n", i, _values[i].op);
}
}
}
#endif
for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
if (value->op == IR_BOTTOM) {
continue;
} else if (IR_IS_CONST_OP(value->op)) {
/* replace instruction by constant */
j = ir_const(ctx, value->val, value->type);
ir_sccp_replace_insn(ctx, _values, i, j, &worklist);
#if IR_COMBO_COPY_PROPAGATION
} else if (value->op == IR_COPY) {
ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist);
#endif
} else if (value->op == IR_TOP) {
/* remove unreachable instruction */
insn = &ctx->ir_base[i];
if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
if (insn->op != IR_PARAM && insn->op != IR_VAR) {
ir_sccp_remove_insn(ctx, _values, i, &worklist);
}
} else {
if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) {
/* remove from terminators list */
ir_ref prev = ctx->ir_base[1].op1;
if (prev == i) {
ctx->ir_base[1].op1 = insn->op3;
} else {
while (prev) {
if (ctx->ir_base[prev].op3 == i) {
ctx->ir_base[prev].op3 = insn->op3;
break;
}
prev = ctx->ir_base[prev].op3;
}
}
}
ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, NULL);
}
} else if (value->op == IR_IF) {
/* remove one way IF/SWITCH */
ir_sccp_remove_if(ctx, _values, i, value->op1);
} else if (value->op == IR_MERGE) {
/* schedule merge to remove unfeasible MERGE inputs */
ir_bitqueue_add(&worklist, i);
} else if (value->op == IR_LOAD) {
/* schedule dead load elimination */
ir_bitqueue_add(&worklist, i);
}
}
while ((i = ir_bitqueue_pop(&worklist)) >= 0) {
if (_values[i].op == IR_MERGE) {
ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1);
} else {
insn = &ctx->ir_base[i];
if (IR_IS_FOLDABLE_OP(insn->op)) {
if (ctx->use_lists[i].count == 0) {
ir_sccp_remove_insn(ctx, _values, i, &worklist);
} else {
ir_sccp_fold2(ctx, _values, i, &worklist);
}
} else if ((ir_op_flags[insn->op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)
&& ctx->use_lists[i].count == 1) {
/* dead load */
ir_ref next = ctx->use_edges[ctx->use_lists[i].refs];
/* remove LOAD from double linked control list */
ctx->ir_base[next].op1 = insn->op1;
ir_sccp_replace_use(ctx, insn->op1, i, next);
insn->op1 = IR_UNUSED;
ir_sccp_remove_insn(ctx, _values, i, &worklist);
}
}
}
ir_mem_free(_values);
ir_bitqueue_free(&worklist);
ctx->flags &= ~IR_OPT_IN_SCCP;
ctx->flags |= IR_SCCP_DONE;
return 1;
}

View File

@@ -0,0 +1,227 @@
/*
* IR - Lightweight JIT Compilation Framework
* (String table)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#include "ir.h"
#include "ir_private.h"
typedef struct _ir_strtab_bucket {
uint32_t h;
uint32_t len;
const char *str;
uint32_t next;
ir_ref val;
} ir_strtab_bucket;
static uint32_t ir_str_hash(const char *str, size_t len)
{
size_t i;
uint32_t h = 5381;
for (i = 0; i < len; i++) {
h = ((h << 5) + h) + *str;
}
return h | 0x10000000;
}
static uint32_t ir_strtab_hash_size(uint32_t size)
{
/* Use big enough power of 2 */
size -= 1;
size |= (size >> 1);
size |= (size >> 2);
size |= (size >> 4);
size |= (size >> 8);
size |= (size >> 16);
return size + 1;
}
static void ir_strtab_resize(ir_strtab *strtab)
{
uint32_t old_hash_size = (uint32_t)(-(int32_t)strtab->mask);
char *old_data = strtab->data;
uint32_t size = strtab->size * 2;
uint32_t hash_size = ir_strtab_hash_size(size);
char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket));
ir_strtab_bucket *p;
uint32_t pos, i;
memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t));
strtab->data = data + (hash_size * sizeof(uint32_t));
strtab->mask = (uint32_t)(-(int32_t)hash_size);
strtab->size = size;
memcpy(strtab->data, old_data, strtab->count * sizeof(ir_strtab_bucket));
ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t)));
i = strtab->count;
pos = 0;
p = (ir_strtab_bucket*)strtab->data;
do {
uint32_t h = p->h | strtab->mask;
p->next = ((uint32_t*)strtab->data)[(int32_t)h];
((uint32_t*)strtab->data)[(int32_t)h] = pos;
pos += sizeof(ir_strtab_bucket);
p++;
} while (--i);
}
static void ir_strtab_grow_buf(ir_strtab *strtab, uint32_t len)
{
size_t old = (size_t)strtab->buf;
do {
strtab->buf_size *= 2;
} while (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1));
strtab->buf = ir_mem_realloc(strtab->buf, strtab->buf_size);
if ((size_t)strtab->buf != old) {
size_t offset = (size_t)strtab->buf - old;
ir_strtab_bucket *p = (ir_strtab_bucket*)strtab->data;
uint32_t i;
for (i = strtab->count; i > 0; i--) {
p->str += offset;
p++;
}
}
}
void ir_strtab_init(ir_strtab *strtab, uint32_t size, uint32_t buf_size)
{
IR_ASSERT(size > 0);
uint32_t hash_size = ir_strtab_hash_size(size);
char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket));
memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t));
strtab->data = (data + (hash_size * sizeof(uint32_t)));
strtab->mask = (uint32_t)(-(int32_t)hash_size);
strtab->size = size;
strtab->count = 0;
strtab->pos = 0;
if (buf_size) {
strtab->buf = ir_mem_malloc(buf_size);
strtab->buf_size = buf_size;
strtab->buf_top = 0;
} else {
strtab->buf = NULL;
strtab->buf_size = 0;
strtab->buf_top = 0;
}
}
ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len)
{
uint32_t h = ir_str_hash(str, len);
const char *data = (const char*)strtab->data;
uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)];
ir_strtab_bucket *p;
while (pos != IR_INVALID_IDX) {
p = (ir_strtab_bucket*)(data + pos);
if (p->h == h
&& p->len == len
&& memcmp(p->str, str, len) == 0) {
return p->val;
}
pos = p->next;
}
return 0;
}
ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val)
{
uint32_t h = ir_str_hash(str, len);
char *data = (char*)strtab->data;
uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)];
ir_strtab_bucket *p;
while (pos != IR_INVALID_IDX) {
p = (ir_strtab_bucket*)(data + pos);
if (p->h == h
&& p->len == len
&& memcmp(p->str, str, len) == 0) {
return p->val;
}
pos = p->next;
}
IR_ASSERT(val != 0);
if (UNEXPECTED(strtab->count >= strtab->size)) {
ir_strtab_resize(strtab);
data = strtab->data;
}
if (strtab->buf) {
if (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1)) {
ir_strtab_grow_buf(strtab, len + 1);
}
memcpy(strtab->buf + strtab->buf_top, str, len);
strtab->buf[strtab->buf_top + len] = 0;
str = (const char*)strtab->buf + strtab->buf_top;
strtab->buf_top += len + 1;
}
pos = strtab->pos;
strtab->pos += sizeof(ir_strtab_bucket);
strtab->count++;
p = (ir_strtab_bucket*)(data + pos);
p->h = h;
p->len = len;
p->str = str;
h |= strtab->mask;
p->next = ((uint32_t*)data)[(int32_t)h];
((uint32_t*)data)[(int32_t)h] = pos;
p->val = val;
return val;
}
ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val)
{
uint32_t h = ir_str_hash(str, len);
char *data = (char*)strtab->data;
uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)];
ir_strtab_bucket *p;
while (pos != IR_INVALID_IDX) {
p = (ir_strtab_bucket*)(data + pos);
if (p->h == h
&& p->len == len
&& memcmp(p->str, str, len) == 0) {
return p->val = val;
}
pos = p->next;
}
return 0;
}
const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx)
{
IR_ASSERT(idx >= 0 && (uint32_t)idx < strtab->count);
return ((const ir_strtab_bucket*)strtab->data)[idx].str;
}
void ir_strtab_free(ir_strtab *strtab)
{
uint32_t hash_size = (uint32_t)(-(int32_t)strtab->mask);
char *data = (char*)strtab->data - (hash_size * sizeof(uint32_t));
ir_mem_free(data);
strtab->data = NULL;
if (strtab->buf) {
ir_mem_free(strtab->buf);
strtab->buf = NULL;
}
}
void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func)
{
uint32_t i;
for (i = 0; i < strtab->count; i++) {
const ir_strtab_bucket *b = &((ir_strtab_bucket*)strtab->data)[i];
func(b->str, b->len, b->val);
}
}

File diff suppressed because it is too large Load Diff

226
ext/opcache/jit/ir/ir_x86.h Normal file
View File

@@ -0,0 +1,226 @@
/*
* IR - Lightweight JIT Compilation Framework
* (x86/x86_64 CPU specific definitions)
* Copyright (C) 2022 Zend by Perforce.
* Authors: Dmitry Stogov <dmitry@php.net>
*/
#ifndef IR_X86_H
#define IR_X86_H
#if defined(IR_TARGET_X64)
# define IR_GP_REGS(_) \
_(R0, rax, eax, ax, al, ah) \
_(R1, rcx, ecx, cx, cl, ch) \
_(R2, rdx, edx, dx, dl, dh) \
_(R3, rbx, ebx, bx, bl, bh) \
_(R4, rsp, esp, __, __, __) \
_(R5, rbp, ebp, bp, r5b, __) \
_(R6, rsi, esi, si, r6b, __) \
_(R7, rdi, edi, di, r7b, __) \
_(R8, r8, r8d, r8w, r8b, __) \
_(R9, r9, r9d, r9w, r9b, __) \
_(R10, r10, r10d, r10w, r10b, __) \
_(R11, r11, r11d, r11w, r11b, __) \
_(R12, r12, r12d, r12w, r12b, __) \
_(R13, r13, r13d, r13w, r13b, __) \
_(R14, r14, r14d, r14w, r14b, __) \
_(R15, r15, r15d, r15w, r15b, __) \
# define IR_FP_REGS(_) \
_(XMM0, xmm0) \
_(XMM1, xmm1) \
_(XMM2, xmm2) \
_(XMM3, xmm3) \
_(XMM4, xmm4) \
_(XMM5, xmm5) \
_(XMM6, xmm6) \
_(XMM7, xmm7) \
_(XMM8, xmm8) \
_(XMM9, xmm9) \
_(XMM10, xmm10) \
_(XMM11, xmm11) \
_(XMM12, xmm12) \
_(XMM13, xmm13) \
_(XMM14, xmm14) \
_(XMM15, xmm15) \
#elif defined(IR_TARGET_X86)
# define IR_GP_REGS(_) \
_(R0, ___, eax, ax, al, ah) \
_(R1, ___, ecx, cx, cl, ch) \
_(R2, ___, edx, dx, dl, dh) \
_(R3, ___, ebx, bx, bl, bh) \
_(R4, ___, esp, __, __, __) \
_(R5, ___, ebp, bp, __, __) \
_(R6, ___, esi, si, __, __) \
_(R7, ___, edi, di, __, __) \
# define IR_FP_REGS(_) \
_(XMM0, xmm0) \
_(XMM1, xmm1) \
_(XMM2, xmm2) \
_(XMM3, xmm3) \
_(XMM4, xmm4) \
_(XMM5, xmm5) \
_(XMM6, xmm6) \
_(XMM7, xmm7) \
#else
# error "Unsupported target architecture"
#endif
#define IR_GP_REG_ENUM(code, name64, name32, name16, name8, name8h) \
IR_REG_ ## code,
#define IR_FP_REG_ENUM(code, name) \
IR_REG_ ## code,
enum _ir_reg {
_IR_REG_NONE = -1,
IR_GP_REGS(IR_GP_REG_ENUM)
IR_FP_REGS(IR_FP_REG_ENUM)
IR_REG_NUM,
};
#define IR_REG_GP_FIRST IR_REG_R0
#define IR_REG_FP_FIRST IR_REG_XMM0
#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1)
#define IR_REG_FP_LAST (IR_REG_NUM - 1)
#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */
#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */
#define IR_REGSET_64BIT 0
#define IR_REG_STACK_POINTER \
IR_REG_RSP
#define IR_REG_FRAME_POINTER \
IR_REG_RBP
#define IR_REGSET_FIXED \
(IR_REGSET(IR_REG_RSP))
#define IR_REGSET_GP \
IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_GP_FIRST, IR_REG_GP_LAST), IR_REGSET_FIXED)
#define IR_REGSET_FP \
IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_FP_FIRST, IR_REG_FP_LAST), IR_REGSET_FIXED)
#define IR_REG_RAX IR_REG_R0
#define IR_REG_RCX IR_REG_R1
#define IR_REG_RDX IR_REG_R2
#define IR_REG_RBX IR_REG_R3
#define IR_REG_RSP IR_REG_R4
#define IR_REG_RBP IR_REG_R5
#define IR_REG_RSI IR_REG_R6
#define IR_REG_RDI IR_REG_R7
/* Calling Convention */
#ifdef _WIN64
# define IR_REG_INT_RET1 IR_REG_RAX
# define IR_REG_FP_RET1 IR_REG_XMM0
# define IR_REG_INT_ARGS 4
# define IR_REG_FP_ARGS 4
# define IR_REG_INT_ARG1 IR_REG_RCX
# define IR_REG_INT_ARG2 IR_REG_RDX
# define IR_REG_INT_ARG3 IR_REG_R8
# define IR_REG_INT_ARG4 IR_REG_R9
# define IR_REG_FP_ARG1 IR_REG_XMM0
# define IR_REG_FP_ARG2 IR_REG_XMM1
# define IR_REG_FP_ARG3 IR_REG_XMM2
# define IR_REG_FP_ARG4 IR_REG_XMM3
# define IR_MAX_REG_ARGS 4
# define IR_SHADOW_ARGS 32 /* Reserved space in bytes - "home space" or "shadow store" for register arguments */
# define IR_REGSET_SCRATCH \
(IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \
| IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \
| IR_REGSET_INTERVAL(IR_REG_XMM0, IR_REG_XMM5))
# define IR_REGSET_PRESERVED \
(IR_REGSET(IR_REG_RBX) \
| IR_REGSET_INTERVAL(IR_REG_RBP, IR_REG_RDI) \
| IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15) \
| IR_REGSET_INTERVAL(IR_REG_XMM6, IR_REG_XMM15))
#elif defined(IR_TARGET_X64)
# define IR_REG_INT_RET1 IR_REG_RAX
# define IR_REG_FP_RET1 IR_REG_XMM0
# define IR_REG_INT_ARGS 6
# define IR_REG_FP_ARGS 8
# define IR_REG_INT_ARG1 IR_REG_RDI
# define IR_REG_INT_ARG2 IR_REG_RSI
# define IR_REG_INT_ARG3 IR_REG_RDX
# define IR_REG_INT_ARG4 IR_REG_RCX
# define IR_REG_INT_ARG5 IR_REG_R8
# define IR_REG_INT_ARG6 IR_REG_R9
# define IR_REG_FP_ARG1 IR_REG_XMM0
# define IR_REG_FP_ARG2 IR_REG_XMM1
# define IR_REG_FP_ARG3 IR_REG_XMM2
# define IR_REG_FP_ARG4 IR_REG_XMM3
# define IR_REG_FP_ARG5 IR_REG_XMM4
# define IR_REG_FP_ARG6 IR_REG_XMM5
# define IR_REG_FP_ARG7 IR_REG_XMM6
# define IR_REG_FP_ARG8 IR_REG_XMM7
# define IR_MAX_REG_ARGS 14
# define IR_SHADOW_ARGS 0
# define IR_REG_VARARG_FP_REGS IR_REG_RAX /* hidden argument to specify the number of vector registers used */
# define IR_REGSET_SCRATCH \
(IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \
| IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI) \
| IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \
| IR_REGSET_FP)
# define IR_REGSET_PRESERVED \
(IR_REGSET(IR_REG_RBX) \
| IR_REGSET(IR_REG_RBP) \
| IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15))
#elif defined(IR_TARGET_X86)
# define IR_REG_INT_RET1 IR_REG_RAX
# define IR_REG_INT_RET2 IR_REG_RDX
# define IR_REG_INT_ARGS 0
# define IR_REG_FP_ARGS 0
# define IR_HAVE_FASTCALL 1
# define IR_REG_INT_FCARGS 2
# define IR_REG_FP_FCARGS 0
# define IR_REG_INT_FCARG1 IR_REG_RCX
# define IR_REG_INT_FCARG2 IR_REG_RDX
# define IR_MAX_REG_ARGS 2
# define IR_SHADOW_ARGS 0
# define IR_REGSET_SCRATCH \
(IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | IR_REGSET_FP)
# define IR_REGSET_PRESERVED \
(IR_REGSET(IR_REG_RBX) \
| IR_REGSET(IR_REG_RBP) \
| IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI))
#else
# error "Unsupported target architecture"
#endif
typedef struct _ir_tmp_reg {
union {
uint8_t num;
int8_t reg;
};
uint8_t type;
uint8_t start;
uint8_t end;
} ir_tmp_reg;
struct _ir_target_constraints {
int8_t def_reg;
uint8_t tmps_count;
uint8_t hints_count;
ir_tmp_reg tmp_regs[3];
int8_t hints[IR_MAX_REG_ARGS + 3];
};
#endif /* IR_X86_H */

6
ext/opcache/jit/ir/y.txt Normal file
View File

@@ -0,0 +1,6 @@
llvm.floor.f64
llvm.fmuladd.f64
llvm.memcpy.p0.p0.i64
llvm.memset.p0.i64
llvm.va_end
llvm.va_start

File diff suppressed because it is too large Load Diff

View File

@@ -74,6 +74,16 @@
#define ZEND_JIT_DEBUG_TRACE_TSSA (1<<19)
#define ZEND_JIT_DEBUG_TRACE_EXIT_INFO (1<<20)
#define ZEND_JIT_DEBUG_IR_SRC (1<<24)
#define ZEND_JIT_DEBUG_IR_FINAL (1<<25)
#define ZEND_JIT_DEBUG_IR_CFG (1<<26)
#define ZEND_JIT_DEBUG_IR_REGS (1<<27)
#define ZEND_JIT_DEBUG_IR_AFTER_SCCP (1<<28)
#define ZEND_JIT_DEBUG_IR_AFTER_SCHEDULE (1<<29)
#define ZEND_JIT_DEBUG_IR_AFTER_REGS (1<<30)
#define ZEND_JIT_DEBUG_IR_CODEGEN (1U<<31)
#define ZEND_JIT_DEBUG_PERSISTENT 0x1f0 /* profile and debugger flags can't be changed at run-time */
#define ZEND_JIT_TRACE_MAX_LENGTH 1024 /* max length of single trace */
@@ -155,6 +165,27 @@ ZEND_EXT_API void zend_jit_deactivate(void);
ZEND_EXT_API void zend_jit_status(zval *ret);
ZEND_EXT_API void zend_jit_restart(void);
#ifdef ZEND_JIT_IR
#define ZREG_LOAD (1<<0)
#define ZREG_STORE (1<<1)
#define ZREG_LAST_USE (1<<2)
#define ZREG_PI (1<<3)
#define ZREG_PHI (1<<4)
#define ZREG_FORWARD (1<<5)
#define ZREG_SPILL_SLOT (1<<3)
#define ZREG_CONST (1<<4)
#define ZREG_ZVAL_COPY (2<<4)
#define ZREG_TYPE_ONLY (3<<4)
#define ZREG_ZVAL_ADDREF (4<<4)
#define ZREG_THIS (5<<4)
#define ZREG_NONE -1
#else
typedef struct _zend_lifetime_interval zend_lifetime_interval;
typedef struct _zend_life_range zend_life_range;
@@ -187,5 +218,6 @@ struct _zend_lifetime_interval {
zend_lifetime_interval *used_as_hint;
zend_lifetime_interval *list_next;
};
#endif
#endif /* HAVE_JIT_H */

View File

@@ -21,6 +21,8 @@
#ifndef ZEND_JIT_INTERNAL_H
#define ZEND_JIT_INTERNAL_H
#ifndef ZEND_JIT_IR
#include "zend_bitset.h"
/* Register Set */
@@ -108,21 +110,20 @@ uint32_t __inline __zend_jit_clz(uint32_t value) {
/* Register Names */
extern const char *zend_reg_name[];
#endif /* ZEND_JIT_IR */
/* Address Encoding */
typedef uintptr_t zend_jit_addr;
#define IS_CONST_ZVAL 0
#define IS_MEM_ZVAL 1
#define IS_REG 2
#define IS_REG 2 /* CPU register or PHP SSA variable number (for IR JIT) */
#define IS_REF_ZVAL 3 /* IR reference */
#define _ZEND_ADDR_MODE_MASK 0x3
#define _ZEND_ADDR_REG_SHIFT 2
#define _ZEND_ADDR_REG_MASK 0x3f /* no more than 64 registers */
#define _ZEND_ADDR_OFFSET_SHIFT 8
#define _ZEND_ADDR_REG_STORE_BIT 8
#define _ZEND_ADDR_REG_LOAD_BIT 9
#define _ZEND_ADDR_REG_LAST_USE_BIT 10
#define ZEND_ADDR_CONST_ZVAL(zv) \
(((zend_jit_addr)(uintptr_t)(zv)) | IS_CONST_ZVAL)
@@ -138,6 +139,13 @@ typedef uintptr_t zend_jit_addr;
#define Z_ZV(addr) ((zval*)(addr))
#define Z_OFFSET(addr) ((uint32_t)((addr)>>_ZEND_ADDR_OFFSET_SHIFT))
#define Z_REG(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_SHIFT) & _ZEND_ADDR_REG_MASK))
#ifndef ZEND_JIT_IR
#define _ZEND_ADDR_REG_STORE_BIT 8
#define _ZEND_ADDR_REG_LOAD_BIT 9
#define _ZEND_ADDR_REG_LAST_USE_BIT 10
#define Z_STORE(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_STORE_BIT) & 1))
#define Z_LOAD(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_LOAD_BIT) & 1))
#define Z_LAST_USE(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_LAST_USE_BIT) & 1))
@@ -178,6 +186,47 @@ static zend_always_inline zend_jit_addr _zend_jit_decode_op(uint8_t op_type, zno
#define OP_ADDR(opline, type, op) \
_zend_jit_decode_op((opline)->type, (opline)->op, opline, ZREG_NONE)
#define OP_REG_ADDR(opline, type, _op, _ssa_op) \
_zend_jit_decode_op((opline)->type, (opline)->_op, opline, \
OP_REG(ssa_op, _ssa_op))
#else /* ZEND_JIT_IR */
#define ZEND_ADDR_REF_ZVAL(ref) \
((((zend_jit_addr)(uintptr_t)(ref)) << _ZEND_ADDR_REG_SHIFT) | \
IS_REF_ZVAL)
#define Z_SSA_VAR(addr) ((addr)>>_ZEND_ADDR_REG_SHIFT)
#define Z_IR_REF(addr) ((addr)>>_ZEND_ADDR_REG_SHIFT)
#define Z_STORE(addr) \
((jit->ra && jit->ra[Z_SSA_VAR(addr)].ref) ? \
(jit->ra[Z_SSA_VAR(addr)].flags & ZREG_STORE) : \
0)
#define Z_LOAD(addr) \
((jit->ra && jit->ra[Z_SSA_VAR(addr)].ref) ? \
(jit->ra[Z_SSA_VAR(addr)].flags & ZREG_LOAD) : \
0)
#if ZEND_USE_ABS_CONST_ADDR
# define OP_ADDR(opline, type, op) \
(((opline)->type == IS_CONST) ? \
ZEND_ADDR_CONST_ZVAL((opline)->op.zv) : \
ZEND_ADDR_MEM_ZVAL(ZREG_FP, (opline)->op.var))
#else
# define OP_ADDR(opline, type, op) \
(((opline)->type == IS_CONST) ? \
ZEND_ADDR_CONST_ZVAL(RT_CONSTANT(opline, (opline)->op)) : \
ZEND_ADDR_MEM_ZVAL(ZREG_FP, (opline)->op.var))
#endif
#define OP_REG_ADDR(opline, type, op, _ssa_op) \
((ctx.ra && ssa_op->_ssa_op >= 0 && ctx.ra[ssa_op->_ssa_op].ref) ? \
ZEND_ADDR_REG(ssa_op->_ssa_op) : \
OP_ADDR(opline, type, op))
#endif /* ZEND_JIT_IR */
#define OP1_ADDR() \
OP_ADDR(opline, op1_type, op1)
#define OP2_ADDR() \
@@ -187,10 +236,6 @@ static zend_always_inline zend_jit_addr _zend_jit_decode_op(uint8_t op_type, zno
#define OP1_DATA_ADDR() \
OP_ADDR(opline + 1, op1_type, op1)
#define OP_REG_ADDR(opline, type, _op, _ssa_op) \
_zend_jit_decode_op((opline)->type, (opline)->_op, opline, \
OP_REG(ssa_op, _ssa_op))
#define OP1_REG_ADDR() \
OP_REG_ADDR(opline, op1_type, op1, op1_use)
#define OP2_REG_ADDR() \
@@ -213,8 +258,15 @@ static zend_always_inline bool zend_jit_same_addr(zend_jit_addr addr1, zend_jit_
{
if (addr1 == addr2) {
return 1;
#ifndef ZEND_JIT_IR
} else if (Z_MODE(addr1) == IS_REG && Z_MODE(addr2) == IS_REG) {
return Z_REG(addr1) == Z_REG(addr2);
#else
} else if (Z_MODE(addr1) == IS_REG && Z_MODE(addr2) == IS_REG) {
return Z_SSA_VAR(addr1) == Z_SSA_VAR(addr2);
} else if (Z_MODE(addr1) == IS_REF_ZVAL && Z_MODE(addr2) == IS_REF_ZVAL) {
return Z_IR_REF(addr1) == Z_IR_REF(addr2);
#endif
}
return 0;
}
@@ -414,6 +466,8 @@ typedef enum _zend_jit_trace_stop {
#define ZEND_JIT_EXIT_METHOD_CALL (1<<9) /* exit because of polymorphic INIT_METHOD_CALL call */
#define ZEND_JIT_EXIT_INVALIDATE (1<<10) /* invalidate current trace */
#define ZEND_JIT_EXIT_FIXED (1U<<31) /* the exit_info can't be changed by zend_jit_snapshot_handler() */
typedef union _zend_op_trace_info {
zend_op dummy; /* the size of this structure must be the same as zend_op */
struct {
@@ -515,8 +569,15 @@ typedef struct _zend_jit_trace_exit_info {
uint32_t flags; /* set of ZEND_JIT_EXIT_... */
uint32_t stack_size;
uint32_t stack_offset;
#ifdef ZEND_JIT_IR
int32_t poly_func_ref;
int32_t poly_this_ref;
int8_t poly_func_reg;
int8_t poly_this_reg;
#endif
} zend_jit_trace_exit_info;
#ifndef ZEND_JIT_IR
typedef union _zend_jit_trace_stack {
int32_t ssa_var;
uint32_t info;
@@ -530,6 +591,50 @@ typedef union _zend_jit_trace_stack {
#define STACK_VAR(_stack, _slot) \
(_stack)[_slot].ssa_var
#define SET_STACK_VAR(_stack, _slot, _ssa_var) do { \
(_stack)[_slot].ssa_var = _ssa_var; \
} while (0)
#define CLEAR_STACK_REF(_stack, _slot)
#else /* ZEND_JIT_IR */
typedef struct _zend_jit_trace_stack {
union {
uint32_t info;
struct {
uint8_t type; /* variable type (for type inference) */
uint8_t mem_type; /* stack slot type (for eliminate dead type store) */
int8_t reg;
uint8_t flags;
};
};
int32_t ref;
} zend_jit_trace_stack;
#define STACK_VAR(_stack, _slot) \
((int32_t*)(_stack))[_slot]
#define SET_STACK_VAR(_stack, _slot, _ssa_var) do { \
((int32_t*)(_stack))[_slot] = _ssa_var; \
} while (0)
#define CLEAR_STACK_REF(_stack, _slot) do { \
(_stack)[_slot].ref = IR_UNUSED; \
(_stack)[_slot].flags = 0; \
} while (0)
#define STACK_REF(_stack, _slot) \
(_stack)[_slot].ref
#define SET_STACK_REF(_stack, _slot, _ref) do { \
(_stack)[_slot].ref = (_ref); \
(_stack)[_slot].flags = 0; \
} while (0)
#define SET_STACK_REF_EX(_stack, _slot, _ref, _flags) do { \
(_stack)[_slot].ref = (_ref); \
(_stack)[_slot].flags = _flags; \
} while (0)
#endif /* ZEND_JIT_IR */
#define STACK_INFO(_stack, _slot) \
(_stack)[_slot].info
#define STACK_TYPE(_stack, _slot) \
@@ -540,9 +645,6 @@ typedef union _zend_jit_trace_stack {
(_stack)[_slot].reg
#define STACK_FLAGS(_stack, _slot) \
(_stack)[_slot].flags
#define SET_STACK_VAR(_stack, _slot, _ssa_var) do { \
(_stack)[_slot].ssa_var = _ssa_var; \
} while (0)
#define SET_STACK_INFO(_stack, _slot, _info) do { \
(_stack)[_slot].info = _info; \
} while (0)
@@ -554,6 +656,7 @@ typedef union _zend_jit_trace_stack {
} \
(_stack)[_slot].reg = ZREG_NONE; \
(_stack)[_slot].flags = 0; \
CLEAR_STACK_REF(_stack, _slot); \
} while (0)
#define SET_STACK_REG(_stack, _slot, _reg) do { \
(_stack)[_slot].reg = _reg; \
@@ -572,6 +675,13 @@ typedef union _zend_jit_trace_stack {
#define ZEND_JIT_TRACE_LOOP (1<<1)
#define ZEND_JIT_TRACE_USES_INITIAL_IP (1<<2)
#ifdef ZEND_JIT_IR
typedef union _zend_jit_exit_const {
int64_t i;
double d;
} zend_jit_exit_const;
#endif
typedef struct _zend_jit_trace_info {
uint32_t id; /* trace id */
uint32_t root; /* root trace id or self id for root traces */
@@ -591,6 +701,10 @@ typedef struct _zend_jit_trace_info {
zend_jit_trace_exit_info *exit_info; /* info about side exits */
zend_jit_trace_stack *stack_map;
//uint32_t loop_offset;
#ifdef ZEND_JIT_IR
uint32_t consts_count; /* number of side exits */
zend_jit_exit_const *constants;
#endif
} zend_jit_trace_info;
struct _zend_jit_trace_stack_frame {
@@ -709,7 +823,9 @@ ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL zend_jit_ret_trace_helper(ZEND_OPCODE_HAND
ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL zend_jit_loop_trace_helper(ZEND_OPCODE_HANDLER_ARGS);
int ZEND_FASTCALL zend_jit_trace_hot_root(zend_execute_data *execute_data, const zend_op *opline);
#ifndef ZEND_JIT_IR
int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf *regs);
#endif
zend_jit_trace_stop ZEND_FASTCALL zend_jit_trace_execute(zend_execute_data *execute_data, const zend_op *opline, zend_jit_trace_rec *trace_buffer, uint8_t start, uint32_t is_megamorphc);
static zend_always_inline const zend_op* zend_jit_trace_get_exit_opline(zend_jit_trace_rec *trace, const zend_op *opline, bool *exit_if_true)
@@ -794,11 +910,13 @@ static zend_always_inline bool zend_long_is_power_of_two(zend_long x)
return (x > 0) && !(x & (x - 1));
}
#ifndef ZEND_JIT_IR
static zend_always_inline uint32_t zend_long_floor_log2(zend_long x)
{
ZEND_ASSERT(zend_long_is_power_of_two(x));
return zend_ulong_ntz(x);
}
#endif
/* from http://aggregate.org/MAGIC/ */
static zend_always_inline uint32_t ones32(uint32_t x)

16531
ext/opcache/jit/zend_jit_ir.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -28,11 +28,13 @@
#include "Optimizer/zend_func_info.h"
#include "Optimizer/zend_call_graph.h"
#include "zend_jit.h"
#ifndef ZEND_JIT_IR
#if ZEND_JIT_TARGET_X86
# include "zend_jit_x86.h"
#elif ZEND_JIT_TARGET_ARM64
# include "zend_jit_arm64.h"
#endif
#endif /* ZEND_JIT_IR */
#include "zend_jit_internal.h"