perf: tail latency with goSched (#2033)

Alternate implementation to #2016 that doesn't reduce RPS with lower
amounts of threads
This commit is contained in:
Alexander Stecher
2025-11-26 18:33:07 +01:00
committed by GitHub
parent abaf03c7f7
commit dadeb5a628
5 changed files with 56 additions and 25 deletions

View File

@@ -1,4 +1,5 @@
//go:build nomercure
package caddy
import (

View File

@@ -39,7 +39,7 @@ type workerConfig struct {
// MaxConsecutiveFailures sets the maximum number of consecutive failures before panicking (defaults to 6, set to -1 to never panick)
MaxConsecutiveFailures int `json:"max_consecutive_failures,omitempty"`
requestOptions []frankenphp.RequestOption
requestOptions []frankenphp.RequestOption
}
func parseWorkerConfig(d *caddyfile.Dispenser) (workerConfig, error) {

View File

@@ -307,7 +307,7 @@ func Init(options ...Option) error {
return err
}
regularRequestChan = make(chan contextHolder, opt.numThreads-workerThreadCount)
regularRequestChan = make(chan contextHolder)
regularThreads = make([]*phpThread, 0, opt.numThreads-workerThreadCount)
for i := 0; i < opt.numThreads-workerThreadCount; i++ {
convertToRegularThread(getInactivePHPThread())

View File

@@ -2,7 +2,9 @@ package frankenphp
import (
"context"
"runtime"
"sync"
"sync/atomic"
)
// representation of a non-worker PHP thread
@@ -16,9 +18,10 @@ type regularThread struct {
}
var (
regularThreads []*phpThread
regularThreadMu = &sync.RWMutex{}
regularRequestChan chan contextHolder
regularThreads []*phpThread
regularThreadMu = &sync.RWMutex{}
regularRequestChan chan contextHolder
queuedRegularThreads = atomic.Int32{}
)
func convertToRegularThread(thread *phpThread) {
@@ -81,6 +84,7 @@ func (handler *regularThread) waitForRequest() string {
// go back to beforeScriptExecution
return handler.beforeScriptExecution()
case ch = <-regularRequestChan:
case ch = <-handler.thread.requestChan:
}
handler.ctx = ch.ctx
@@ -100,23 +104,35 @@ func (handler *regularThread) afterRequest() {
func handleRequestWithRegularPHPThreads(ch contextHolder) error {
metrics.StartRequest()
select {
case regularRequestChan <- ch:
// a thread was available to handle the request immediately
<-ch.frankenPHPContext.done
metrics.StopRequest()
runtime.Gosched()
return nil
default:
// no thread was available
if queuedRegularThreads.Load() == 0 {
regularThreadMu.RLock()
for _, thread := range regularThreads {
select {
case thread.requestChan <- ch:
regularThreadMu.RUnlock()
<-ch.frankenPHPContext.done
metrics.StopRequest()
return nil
default:
// thread was not available
}
}
regularThreadMu.RUnlock()
}
// if no thread was available, mark the request as queued and fan it out to all threads
queuedRegularThreads.Add(1)
metrics.QueuedRequest()
for {
select {
case regularRequestChan <- ch:
queuedRegularThreads.Add(-1)
metrics.DequeuedRequest()
<-ch.frankenPHPContext.done
metrics.StopRequest()
@@ -125,7 +141,9 @@ func handleRequestWithRegularPHPThreads(ch contextHolder) error {
// the request has triggered scaling, continue to wait for a thread
case <-timeoutChan(maxWaitTime):
// the request has timed out stalling
queuedRegularThreads.Add(-1)
metrics.DequeuedRequest()
metrics.StopRequest()
ch.frankenPHPContext.reject(ErrMaxWaitTimeExceeded)

View File

@@ -6,8 +6,10 @@ import (
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/dunglas/frankenphp/internal/fastabs"
@@ -28,6 +30,7 @@ type worker struct {
maxConsecutiveFailures int
onThreadReady func(int)
onThreadShutdown func(int)
queuedRequests atomic.Int32
}
var (
@@ -253,24 +256,30 @@ func (worker *worker) isAtThreadLimit() bool {
func (worker *worker) handleRequest(ch contextHolder) error {
metrics.StartWorkerRequest(worker.name)
// dispatch requests to all worker threads in order
worker.threadMutex.RLock()
for _, thread := range worker.threads {
select {
case thread.requestChan <- ch:
worker.threadMutex.RUnlock()
<-ch.frankenPHPContext.done
metrics.StopWorkerRequest(worker.name, time.Since(ch.frankenPHPContext.startedAt))
runtime.Gosched()
return nil
default:
// thread is busy, continue
if worker.queuedRequests.Load() == 0 {
// dispatch requests to all worker threads in order
worker.threadMutex.RLock()
for _, thread := range worker.threads {
select {
case thread.requestChan <- ch:
worker.threadMutex.RUnlock()
<-ch.frankenPHPContext.done
metrics.StopWorkerRequest(worker.name, time.Since(ch.frankenPHPContext.startedAt))
return nil
default:
// thread is busy, continue
}
}
worker.threadMutex.RUnlock()
}
worker.threadMutex.RUnlock()
// if no thread was available, mark the request as queued and apply the scaling strategy
worker.queuedRequests.Add(1)
metrics.QueuedWorkerRequest(worker.name)
for {
workerScaleChan := scaleChan
if worker.isAtThreadLimit() {
@@ -279,6 +288,7 @@ func (worker *worker) handleRequest(ch contextHolder) error {
select {
case worker.requestChan <- ch:
worker.queuedRequests.Add(-1)
metrics.DequeuedWorkerRequest(worker.name)
<-ch.frankenPHPContext.done
metrics.StopWorkerRequest(worker.name, time.Since(ch.frankenPHPContext.startedAt))
@@ -288,7 +298,9 @@ func (worker *worker) handleRequest(ch contextHolder) error {
// the request has triggered scaling, continue to wait for a thread
case <-timeoutChan(maxWaitTime):
// the request has timed out stalling
worker.queuedRequests.Add(-1)
metrics.DequeuedWorkerRequest(worker.name)
metrics.StopWorkerRequest(worker.name, time.Since(ch.frankenPHPContext.startedAt))
ch.frankenPHPContext.reject(ErrMaxWaitTimeExceeded)