From a36547bc2ffb620ec6cc43f4c6ff4a4e276ef858 Mon Sep 17 00:00:00 2001 From: Alexander Stecher <45872305+AlliBalliBaba@users.noreply.github.com> Date: Thu, 13 Nov 2025 23:38:54 +0100 Subject: [PATCH] suggestion: simplify exponential backoff (#1970) * removes backoff. * Adjusts comment. * Suggestions by @dunglas * Removes 'max_consecutive_failures' * Removes 'max_consecutive_failures' * Adjusts warning. * Disables the logger in tests. * Revert "Adjusts warning." This reverts commit e93a6a930129e938d076fc5176a283f6b4b45852. * Revert "Removes 'max_consecutive_failures'" This reverts commit ba28ea0e4ada8639095bac8273961edf4c3b4cb2. * Revert "Removes 'max_consecutive_failures'" This reverts commit 32e649caf7a0f0b987cac3ffd37f6855497355d7. * Only fails on max failures again. * Restores failure timings. --- frankenphp_test.go | 10 +++--- internal/backoff/backoff.go | 59 -------------------------------- internal/backoff/backoff_test.go | 41 ---------------------- phpmainthread_test.go | 13 ++++--- testdata/failing-worker.php | 17 ++------- threadworker.go | 41 +++++++++++++--------- worker.go | 25 ++++++++++---- 7 files changed, 57 insertions(+), 149 deletions(-) delete mode 100644 internal/backoff/backoff.go delete mode 100644 internal/backoff/backoff_test.go diff --git a/frankenphp_test.go b/frankenphp_test.go index 7b4b44db..f7e0a171 100644 --- a/frankenphp_test.go +++ b/frankenphp_test.go @@ -601,10 +601,12 @@ func testRequestHeaders(t *testing.T, opts *testOptions) { } func TestFailingWorker(t *testing.T) { - runTest(t, func(handler func(http.ResponseWriter, *http.Request), _ *httptest.Server, i int) { - body, _ := testGet("http://example.com/failing-worker.php", handler, t) - assert.Contains(t, body, "ok") - }, &testOptions{workerScript: "failing-worker.php"}) + err := frankenphp.Init( + frankenphp.WithLogger(slog.New(slog.NewTextHandler(io.Discard, nil))), + frankenphp.WithWorkers("failing worker", "testdata/failing-worker.php", 4, frankenphp.WithWorkerMaxFailures(1)), + frankenphp.WithNumThreads(5), + ) + assert.Error(t, err, "should return an immediate error if workers fail on startup") } func TestEnv(t *testing.T) { diff --git a/internal/backoff/backoff.go b/internal/backoff/backoff.go deleted file mode 100644 index 73182f4a..00000000 --- a/internal/backoff/backoff.go +++ /dev/null @@ -1,59 +0,0 @@ -package backoff - -import "C" -import ( - "sync" - "time" -) - -type ExponentialBackoff struct { - backoff time.Duration - failureCount int - mu sync.RWMutex - MaxBackoff time.Duration - MinBackoff time.Duration - MaxConsecutiveFailures int -} - -// recordSuccess resets the backoff and failureCount -func (e *ExponentialBackoff) RecordSuccess() { - e.mu.Lock() - e.failureCount = 0 - e.backoff = e.MinBackoff - e.mu.Unlock() -} - -// recordFailure increments the failure count and increases the backoff, it returns true if MaxConsecutiveFailures has been reached -func (e *ExponentialBackoff) RecordFailure() bool { - e.mu.Lock() - e.failureCount += 1 - if e.backoff < e.MinBackoff { - e.backoff = e.MinBackoff - } - - e.backoff = min(e.backoff*2, e.MaxBackoff) - - e.mu.Unlock() - return e.MaxConsecutiveFailures != -1 && e.failureCount >= e.MaxConsecutiveFailures -} - -// wait sleeps for the backoff duration if failureCount is non-zero. -// NOTE: this is not tested and should be kept 'obviously correct' (i.e., simple) -func (e *ExponentialBackoff) Wait() { - e.mu.RLock() - if e.failureCount == 0 { - e.mu.RUnlock() - - return - } - e.mu.RUnlock() - - time.Sleep(e.backoff) -} - -func (e *ExponentialBackoff) FailureCount() int { - e.mu.RLock() - defer e.mu.RUnlock() - - return e.failureCount -} diff --git a/internal/backoff/backoff_test.go b/internal/backoff/backoff_test.go deleted file mode 100644 index b82efc4b..00000000 --- a/internal/backoff/backoff_test.go +++ /dev/null @@ -1,41 +0,0 @@ -package backoff - -import ( - "github.com/stretchr/testify/assert" - "testing" - "time" -) - -func TestExponentialBackoff_Reset(t *testing.T) { - e := &ExponentialBackoff{ - MaxBackoff: 5 * time.Second, - MinBackoff: 500 * time.Millisecond, - MaxConsecutiveFailures: 3, - } - - assert.False(t, e.RecordFailure()) - assert.False(t, e.RecordFailure()) - e.RecordSuccess() - - e.mu.RLock() - defer e.mu.RUnlock() - assert.Equal(t, 0, e.failureCount, "expected failureCount to be reset to 0") - assert.Equal(t, e.backoff, e.MinBackoff, "expected backoff to be reset to MinBackoff") -} - -func TestExponentialBackoff_Trigger(t *testing.T) { - e := &ExponentialBackoff{ - MaxBackoff: 500 * 3 * time.Millisecond, - MinBackoff: 500 * time.Millisecond, - MaxConsecutiveFailures: 3, - } - - assert.False(t, e.RecordFailure()) - assert.False(t, e.RecordFailure()) - assert.True(t, e.RecordFailure()) - - e.mu.RLock() - defer e.mu.RUnlock() - assert.Equal(t, e.failureCount, e.MaxConsecutiveFailures, "expected failureCount to be MaxConsecutiveFailures") - assert.Equal(t, e.backoff, e.MaxBackoff, "expected backoff to be MaxBackoff") -} diff --git a/phpmainthread_test.go b/phpmainthread_test.go index 81d94c2d..e6cafe06 100644 --- a/phpmainthread_test.go +++ b/phpmainthread_test.go @@ -175,9 +175,9 @@ func TestFinishBootingAWorkerScript(t *testing.T) { func TestReturnAnErrorIf2WorkersHaveTheSameFileName(t *testing.T) { workers = []*worker{} - w, err1 := newWorker(workerOpt{fileName: "filename.php", maxConsecutiveFailures: defaultMaxConsecutiveFailures}) + w, err1 := newWorker(workerOpt{fileName: "filename.php"}) workers = append(workers, w) - _, err2 := newWorker(workerOpt{fileName: "filename.php", maxConsecutiveFailures: defaultMaxConsecutiveFailures}) + _, err2 := newWorker(workerOpt{fileName: "filename.php"}) assert.NoError(t, err1) assert.Error(t, err2, "two workers cannot have the same filename") @@ -185,9 +185,9 @@ func TestReturnAnErrorIf2WorkersHaveTheSameFileName(t *testing.T) { func TestReturnAnErrorIf2ModuleWorkersHaveTheSameName(t *testing.T) { workers = []*worker{} - w, err1 := newWorker(workerOpt{fileName: "filename.php", name: "workername", maxConsecutiveFailures: defaultMaxConsecutiveFailures}) + w, err1 := newWorker(workerOpt{fileName: "filename.php", name: "workername"}) workers = append(workers, w) - _, err2 := newWorker(workerOpt{fileName: "filename2.php", name: "workername", maxConsecutiveFailures: defaultMaxConsecutiveFailures}) + _, err2 := newWorker(workerOpt{fileName: "filename2.php", name: "workername"}) assert.NoError(t, err1) assert.Error(t, err2, "two workers cannot have the same name") @@ -198,9 +198,8 @@ func getDummyWorker(fileName string) *worker { workers = []*worker{} } worker, _ := newWorker(workerOpt{ - fileName: testDataPath + "/" + fileName, - num: 1, - maxConsecutiveFailures: defaultMaxConsecutiveFailures, + fileName: testDataPath + "/" + fileName, + num: 1, }) workers = append(workers, worker) return worker diff --git a/testdata/failing-worker.php b/testdata/failing-worker.php index 108d2ff8..0bb001f1 100644 --- a/testdata/failing-worker.php +++ b/testdata/failing-worker.php @@ -1,18 +1,7 @@ = 0 && startupFailChan != nil && !watcherIsEnabled && handler.failureCount >= worker.maxConsecutiveFailures { + select { + case startupFailChan <- fmt.Errorf("worker failure: script %s has not reached frankenphp_handle_request()", worker.fileName): + handler.thread.state.Set(state.ShuttingDown) + return } - logger.LogAttrs(ctx, slog.LevelWarn, "many consecutive worker failures", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex), slog.Int("failures", handler.backoff.FailureCount())) } + + if watcherIsEnabled { + // worker script has probably failed due to script changes while watcher is enabled + logger.LogAttrs(ctx, slog.LevelWarn, "(watcher enabled) worker script has not reached frankenphp_handle_request()", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex)) + } else { + // rare case where worker script has failed on a restart during normal operation + // this can happen if startup success depends on external resources + logger.LogAttrs(ctx, slog.LevelWarn, "worker script has failed on restart", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex)) + } + + // wait a bit and try again (exponential backoff) + backoffDuration := time.Duration(handler.failureCount*handler.failureCount*100) * time.Millisecond + if backoffDuration > time.Second { + backoffDuration = time.Second + } + handler.failureCount++ + time.Sleep(backoffDuration) } // waitForWorkerRequest is called during frankenphp_handle_request in the php worker script. @@ -171,6 +177,7 @@ func (handler *workerThread) waitForWorkerRequest() (bool, any) { // Clear the first dummy request created to initialize the worker if handler.isBootingScript { handler.isBootingScript = false + handler.failureCount = 0 if !C.frankenphp_shutdown_dummy_request() { panic("Not in CGI context") } diff --git a/worker.go b/worker.go index 96ba45e3..13e49111 100644 --- a/worker.go +++ b/worker.go @@ -31,41 +31,52 @@ type worker struct { var ( workers []*worker watcherIsEnabled bool + startupFailChan chan (error) ) func initWorkers(opt []workerOpt) error { workers = make([]*worker, 0, len(opt)) - workersReady := sync.WaitGroup{} directoriesToWatch := getDirectoriesToWatch(opt) watcherIsEnabled = len(directoriesToWatch) > 0 + totalThreadsToStart := 0 for _, o := range opt { w, err := newWorker(o) if err != nil { return err } + totalThreadsToStart += w.num workers = append(workers, w) } + startupFailChan = make(chan error, totalThreadsToStart) + var workersReady sync.WaitGroup for _, w := range workers { - workersReady.Add(w.num) for i := 0; i < w.num; i++ { thread := getInactivePHPThread() convertToWorkerThread(thread, w) - go func() { - thread.state.WaitFor(state.Ready) - workersReady.Done() - }() + workersReady.Go(func() { + thread.state.WaitFor(state.Ready, state.ShuttingDown, state.Done) + }) } } workersReady.Wait() + select { + case err := <-startupFailChan: + // at least 1 worker has failed, shut down and return an error + Shutdown() + return fmt.Errorf("failed to initialize workers: %w", err) + default: + // all workers started successfully + startupFailChan = nil + } + if !watcherIsEnabled { return nil } - watcherIsEnabled = true if err := watcher.InitWatcher(directoriesToWatch, RestartWorkers, logger); err != nil { return err }