From c10e85b905f6ed74898fb5ef778bb0fc737921a2 Mon Sep 17 00:00:00 2001 From: Alexander Stecher <45872305+AlliBalliBaba@users.noreply.github.com> Date: Mon, 25 Aug 2025 16:18:20 +0200 Subject: [PATCH] refactor: cleanup context (#1816) * Removes NewRequestWithContext. * Moves cgi logic to cgi.go * Calls 'update_request_info' from the C side. * Calls 'update_request_info' from the C side. * clang-format * Removes unnecessary export. * Adds TODO. * Adds TODO. * Removes 'is_worker_thread' * Shortens return statement. * Removes the context refactor. * adjusts comment. * Skips parsing cgi path variables on explicitly assigned worker. * suggesions by @dunglas. * Re-introduces 'is_worker_thread'. * More formatting. --- cgi.go | 90 ++++++++++++++++++++++++++++++++++++++++++------ context.go | 50 ++++++++++----------------- frankenphp.c | 46 ++++++++----------------- frankenphp.go | 63 --------------------------------- frankenphp.h | 9 ----- threadregular.go | 8 ----- threadworker.go | 14 +------- 7 files changed, 115 insertions(+), 165 deletions(-) diff --git a/cgi.go b/cgi.go index 238562fb..f9aae869 100644 --- a/cgi.go +++ b/cgi.go @@ -21,6 +21,16 @@ import ( "github.com/dunglas/frankenphp/internal/phpheaders" ) +// Protocol versions, in Apache mod_ssl format: https://httpd.apache.org/docs/current/mod/mod_ssl.html +// Note that these are slightly different from SupportedProtocols in caddytls/config.go +var tlsProtocolStrings = map[uint16]string{ + tls.VersionTLS10: "TLSv1", + tls.VersionTLS11: "TLSv1.1", + tls.VersionTLS12: "TLSv1.2", + tls.VersionTLS13: "TLSv1.3", +} + +// Known $_SERVER keys var knownServerKeys = []string{ "CONTENT_LENGTH", "DOCUMENT_ROOT", @@ -211,18 +221,49 @@ func go_register_variables(threadIndex C.uintptr_t, trackVarsArray *C.zval) { addPreparedEnvToServer(fc, trackVarsArray) } +// splitCgiPath splits the request path into SCRIPT_NAME, SCRIPT_FILENAME, PATH_INFO, DOCUMENT_URI +func splitCgiPath(fc *frankenPHPContext) { + path := fc.request.URL.Path + splitPath := fc.splitPath + + if splitPath == nil { + splitPath = []string{".php"} + } + + if splitPos := splitPos(path, splitPath); splitPos > -1 { + fc.docURI = path[:splitPos] + fc.pathInfo = path[splitPos:] + + // Strip PATH_INFO from SCRIPT_NAME + fc.scriptName = strings.TrimSuffix(path, fc.pathInfo) + + // Ensure the SCRIPT_NAME has a leading slash for compliance with RFC3875 + // Info: https://tools.ietf.org/html/rfc3875#section-4.1.13 + if fc.scriptName != "" && !strings.HasPrefix(fc.scriptName, "/") { + fc.scriptName = "/" + fc.scriptName + } + } + + // TODO: is it possible to delay this and avoid saving everything in the context? + // SCRIPT_FILENAME is the absolute path of SCRIPT_NAME + fc.scriptFilename = sanitizedPathJoin(fc.documentRoot, fc.scriptName) + fc.worker = getWorkerByPath(fc.scriptFilename) +} + // splitPos returns the index where path should // be split based on SplitPath. +// example: if splitPath is [".php"] +// "/path/to/script.php/some/path": ("/path/to/script.php", "/some/path") // // Adapted from https://github.com/caddyserver/caddy/blob/master/modules/caddyhttp/reverseproxy/fastcgi/fastcgi.go // Copyright 2015 Matthew Holt and The Caddy Authors -func splitPos(fc *frankenPHPContext, path string) int { - if len(fc.splitPath) == 0 { +func splitPos(path string, splitPath []string) int { + if len(splitPath) == 0 { return 0 } lowerPath := strings.ToLower(path) - for _, split := range fc.splitPath { + for _, split := range splitPath { if idx := strings.Index(lowerPath, strings.ToLower(split)); idx > -1 { return idx + len(split) } @@ -230,13 +271,42 @@ func splitPos(fc *frankenPHPContext, path string) int { return -1 } -// Map of supported protocols to Apache ssl_mod format -// Note that these are slightly different from SupportedProtocols in caddytls/config.go -var tlsProtocolStrings = map[uint16]string{ - tls.VersionTLS10: "TLSv1", - tls.VersionTLS11: "TLSv1.1", - tls.VersionTLS12: "TLSv1.2", - tls.VersionTLS13: "TLSv1.3", +// go_update_request_info updates the sapi_request_info struct +// See: https://github.com/php/php-src/blob/345e04b619c3bc11ea17ee02cdecad6ae8ce5891/main/SAPI.h#L72 +// +//export go_update_request_info +func go_update_request_info(threadIndex C.uintptr_t, info *C.sapi_request_info) C.bool { + thread := phpThreads[threadIndex] + fc := thread.getRequestContext() + request := fc.request + + authUser, authPassword, ok := request.BasicAuth() + if ok { + if authPassword != "" { + info.auth_password = thread.pinCString(authPassword) + } + if authUser != "" { + info.auth_user = thread.pinCString(authUser) + } + } + + info.request_method = thread.pinCString(request.Method) + info.query_string = thread.pinCString(request.URL.RawQuery) + info.content_length = C.zend_long(request.ContentLength) + + if contentType := request.Header.Get("Content-Type"); contentType != "" { + info.content_type = thread.pinCString(contentType) + } + + if fc.pathInfo != "" { + info.path_translated = thread.pinCString(sanitizedPathJoin(fc.documentRoot, fc.pathInfo)) // See: http://www.oreilly.com/openbook/cgi/ch02_04.html + } + + info.request_uri = thread.pinCString(request.URL.RequestURI()) + + info.proto_num = C.int(request.ProtoMajor*1000 + request.ProtoMinor) + + return C.bool(fc.worker != nil) } // SanitizedPathJoin performs filepath.Join(root, reqPath) that diff --git a/context.go b/context.go index 81f7593a..65aee5b7 100644 --- a/context.go +++ b/context.go @@ -5,6 +5,7 @@ import ( "log/slog" "net/http" "os" + "strconv" "strings" "time" ) @@ -67,36 +68,13 @@ func NewRequestWithContext(r *http.Request, opts ...RequestOption) (*http.Reques } } - if fc.splitPath == nil { - fc.splitPath = []string{".php"} - } - - if fc.env == nil { - fc.env = make(map[string]string) - } - - if splitPos := splitPos(fc, r.URL.Path); splitPos > -1 { - fc.docURI = r.URL.Path[:splitPos] - fc.pathInfo = r.URL.Path[splitPos:] - - // Strip PATH_INFO from SCRIPT_NAME - fc.scriptName = strings.TrimSuffix(r.URL.Path, fc.pathInfo) - - // Ensure the SCRIPT_NAME has a leading slash for compliance with RFC3875 - // Info: https://tools.ietf.org/html/rfc3875#section-4.1.13 - if fc.scriptName != "" && !strings.HasPrefix(fc.scriptName, "/") { - fc.scriptName = "/" + fc.scriptName - } - } - - // if a worker is assigned explicitly, use its filename - // determine if the filename belongs to a worker otherwise + // If a worker is already assigned explicitly, use its filename and skip parsing path variables if fc.worker != nil { fc.scriptFilename = fc.worker.fileName } else { - // SCRIPT_FILENAME is the absolute path of SCRIPT_NAME - fc.scriptFilename = sanitizedPathJoin(fc.documentRoot, fc.scriptName) - fc.worker = getWorkerByPath(fc.scriptFilename) + // If no worker was assigned, split the path into the "traditional" CGI path variables. + // This needs to already happen here in case a worker script still matches the path. + splitCgiPath(fc) } c := context.WithValue(r.Context(), contextKey, fc) @@ -104,6 +82,7 @@ func NewRequestWithContext(r *http.Request, opts ...RequestOption) (*http.Reques return r.WithContext(c), nil } +// newDummyContext creates a fake context from a request path func newDummyContext(requestPath string, opts ...RequestOption) (*frankenPHPContext, error) { r, err := http.NewRequest(http.MethodGet, requestPath, nil) if err != nil { @@ -132,13 +111,22 @@ func (fc *frankenPHPContext) closeContext() { // validate checks if the request should be outright rejected func (fc *frankenPHPContext) validate() bool { - if !strings.Contains(fc.request.URL.Path, "\x00") { - return true + if strings.Contains(fc.request.URL.Path, "\x00") { + fc.rejectBadRequest("Invalid request path") + + return false } - fc.rejectBadRequest("Invalid request path") + contentLengthStr := fc.request.Header.Get("Content-Length") + if contentLengthStr != "" { + if contentLength, err := strconv.Atoi(contentLengthStr); err != nil || contentLength < 0 { + fc.rejectBadRequest("invalid Content-Length header: " + contentLengthStr) - return false + return false + } + } + + return true } func (fc *frankenPHPContext) clientHasClosed() bool { diff --git a/frankenphp.c b/frankenphp.c index 102af2db..b6300dbc 100644 --- a/frankenphp.c +++ b/frankenphp.c @@ -75,6 +75,16 @@ __thread uintptr_t thread_index; __thread bool is_worker_thread = false; __thread zval *os_environment = NULL; +static void frankenphp_update_request_context() { + /* the server context is stored on the go side, still SG(server_context) needs + * to not be NULL */ + SG(server_context) = (void *)1; + /* status It is not reset by zend engine, set it to 200. */ + SG(sapi_headers).http_response_code = 200; + + is_worker_thread = go_update_request_info(thread_index, &SG(request_info)); +} + static void frankenphp_free_request_context() { if (SG(request_info).cookie_data != NULL) { free(SG(request_info).cookie_data); @@ -174,6 +184,8 @@ void frankenphp_add_assoc_str_ex(zval *track_vars_array, char *key, static int frankenphp_worker_request_startup() { int retval = SUCCESS; + frankenphp_update_request_context(); + zend_try { frankenphp_destroy_super_globals(); frankenphp_release_temporary_streams(); @@ -507,36 +519,8 @@ static void frankenphp_request_shutdown() { zval_ptr_dtor_nogc(&PG(http_globals)[TRACK_VARS_ENV]); array_init(&PG(http_globals)[TRACK_VARS_ENV]); } - php_request_shutdown((void *)0); frankenphp_free_request_context(); -} - -int frankenphp_update_server_context(bool is_worker_request, - - const char *request_method, - char *query_string, - zend_long content_length, - char *path_translated, char *request_uri, - const char *content_type, char *auth_user, - char *auth_password, int proto_num) { - - SG(server_context) = (void *)1; - is_worker_thread = is_worker_request; - - // It is not reset by zend engine, set it to 200. - SG(sapi_headers).http_response_code = 200; - - SG(request_info).auth_password = auth_password; - SG(request_info).auth_user = auth_user; - SG(request_info).request_method = request_method; - SG(request_info).query_string = query_string; - SG(request_info).content_type = content_type; - SG(request_info).content_length = content_length; - SG(request_info).path_translated = path_translated; - SG(request_info).request_uri = request_uri; - SG(request_info).proto_num = proto_num; - - return SUCCESS; + php_request_shutdown((void *)0); } static int frankenphp_startup(sapi_module_struct *sapi_module) { @@ -974,7 +958,8 @@ bool frankenphp_new_php_thread(uintptr_t thread_index) { return true; } -int frankenphp_request_startup() { +static int frankenphp_request_startup() { + frankenphp_update_request_context(); if (php_request_startup() == SUCCESS) { return SUCCESS; } @@ -1014,7 +999,6 @@ int frankenphp_execute_script(char *file_name) { zend_destroy_file_handle(&file_handle); - frankenphp_free_request_context(); frankenphp_request_shutdown(); return status; diff --git a/frankenphp.go b/frankenphp.go index e6807180..58cd95b8 100644 --- a/frankenphp.go +++ b/frankenphp.go @@ -12,8 +12,6 @@ package frankenphp // // We also set these flags for hardening: https://github.com/docker-library/php/blob/master/8.2/bookworm/zts/Dockerfile#L57-L59 -// #cgo nocallback frankenphp_update_server_context -// #cgo noescape frankenphp_update_server_context // #include // #include // #include @@ -32,7 +30,6 @@ import ( "os" "os/signal" "runtime" - "strconv" "strings" "sync" "syscall" @@ -313,66 +310,6 @@ func Shutdown() { logger.Debug("FrankenPHP shut down") } -func updateServerContext(thread *phpThread, fc *frankenPHPContext, isWorkerRequest bool) error { - request := fc.request - authUser, authPassword, ok := request.BasicAuth() - var cAuthUser, cAuthPassword *C.char - if ok && authPassword != "" { - cAuthPassword = thread.pinCString(authPassword) - } - if ok && authUser != "" { - cAuthUser = thread.pinCString(authUser) - } - - cMethod := thread.pinCString(request.Method) - cQueryString := thread.pinCString(request.URL.RawQuery) - contentLengthStr := request.Header.Get("Content-Length") - contentLength := 0 - if contentLengthStr != "" { - var err error - contentLength, err = strconv.Atoi(contentLengthStr) - if err != nil || contentLength < 0 { - return fmt.Errorf("invalid Content-Length header: %w", err) - } - } - - contentType := request.Header.Get("Content-Type") - var cContentType *C.char - if contentType != "" { - cContentType = thread.pinCString(contentType) - } - - // compliance with the CGI specification requires that - // PATH_TRANSLATED should only exist if PATH_INFO is defined. - // Info: https://www.ietf.org/rfc/rfc3875 Page 14 - var cPathTranslated *C.char - if fc.pathInfo != "" { - cPathTranslated = thread.pinCString(sanitizedPathJoin(fc.documentRoot, fc.pathInfo)) // Info: http://www.oreilly.com/openbook/cgi/ch02_04.html - } - - cRequestUri := thread.pinCString(request.URL.RequestURI()) - - ret := C.frankenphp_update_server_context( - C.bool(isWorkerRequest || fc.responseWriter == nil), - - cMethod, - cQueryString, - C.zend_long(contentLength), - cPathTranslated, - cRequestUri, - cContentType, - cAuthUser, - cAuthPassword, - C.int(request.ProtoMajor*1000+request.ProtoMinor), - ) - - if ret > 0 { - return ErrRequestContextCreation - } - - return nil -} - // ServeHTTP executes a PHP script according to the given context. func ServeHTTP(responseWriter http.ResponseWriter, request *http.Request) error { if !isRunning { diff --git a/frankenphp.h b/frankenphp.h index 246a221d..c17df606 100644 --- a/frankenphp.h +++ b/frankenphp.h @@ -51,15 +51,6 @@ int frankenphp_new_main_thread(int num_threads); bool frankenphp_new_php_thread(uintptr_t thread_index); bool frankenphp_shutdown_dummy_request(void); -int frankenphp_update_server_context(bool is_worker_request, - - const char *request_method, - char *query_string, - zend_long content_length, - char *path_translated, char *request_uri, - const char *content_type, char *auth_user, - char *auth_password, int proto_num); -int frankenphp_request_startup(); int frankenphp_execute_script(char *file_name); int frankenphp_execute_script_cli(char *script, int argc, char **argv, diff --git a/threadregular.go b/threadregular.go index 4ee7da95..88cef7e7 100644 --- a/threadregular.go +++ b/threadregular.go @@ -75,14 +75,6 @@ func (handler *regularThread) waitForRequest() string { handler.requestContext = fc handler.state.markAsWaiting(false) - if err := updateServerContext(handler.thread, fc, false); err != nil { - fc.rejectBadRequest(err.Error()) - handler.afterRequest() - handler.thread.Unpin() - // go back to beforeScriptExecution - return handler.beforeScriptExecution() - } - // set the scriptFilename that should be executed return fc.scriptFilename } diff --git a/threadworker.go b/threadworker.go index 212863d5..b7dc8203 100644 --- a/threadworker.go +++ b/threadworker.go @@ -91,10 +91,7 @@ func setupWorkerScript(handler *workerThread, worker *worker) { panic(err) } - if err := updateServerContext(handler.thread, fc, false); err != nil { - panic(err) - } - + fc.worker = worker handler.dummyContext = fc handler.isBootingScript = true clearSandboxedEnv(handler.thread) @@ -192,15 +189,6 @@ func (handler *workerThread) waitForWorkerRequest() bool { logger.LogAttrs(ctx, slog.LevelDebug, "request handling started", slog.String("worker", handler.worker.name), slog.Int("thread", handler.thread.threadIndex), slog.String("url", fc.request.RequestURI)) - if err := updateServerContext(handler.thread, fc, true); err != nil { - // Unexpected error or invalid request - logger.LogAttrs(ctx, slog.LevelDebug, "unexpected error", slog.String("worker", handler.worker.name), slog.Int("thread", handler.thread.threadIndex), slog.String("url", fc.request.RequestURI), slog.Any("error", err)) - fc.rejectBadRequest(err.Error()) - handler.workerContext = nil - - return handler.waitForWorkerRequest() - } - return true }