From 5ebb853e1a15f174409bdec0481a39c470a1ec5d Mon Sep 17 00:00:00 2001 From: Rasmus Lerdorf Date: Tue, 30 Sep 2025 20:20:37 -0400 Subject: [PATCH] Fix segfault during Redis Cluster failover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a Redis Cluster failover occurs, the client detects that the redirected node was a replica of the old master and calls cluster_map_keyspace() to remap the cluster topology. If cluster_map_keyspace() fails (e.g., due to network issues during the remap), it frees all node objects via zend_hash_clean(c->nodes) and zeros the master array via memset(c->master, 0, ...). The bug was that the return value of cluster_map_keyspace() was being ignored in the failover detection path. This caused the code to continue with NULL socket pointers, leading to segfaults when dereferencing c->cmd_sock later. This fix: 1. Checks the return value of cluster_map_keyspace() in failover detection and returns FAILURE if it fails 2. Adds defense-in-depth NULL checks after MOVED and ASK redirections to prevent segfaults if slots become NULL for any reason Fixes production crashes observed during Redis Cluster failovers. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- cluster_library.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/cluster_library.c b/cluster_library.c index c616af7..96def05 100644 --- a/cluster_library.c +++ b/cluster_library.c @@ -1431,7 +1431,10 @@ static int cluster_update_slot(redisCluster *c) { if (!CLUSTER_REDIR_CMP(c, slave->sock)) { // Detected a failover, the redirected node was a replica // Remap the cluster's keyspace - cluster_map_keyspace(c); + if (cluster_map_keyspace(c) == FAILURE) { + CLUSTER_THROW_EXCEPTION("Failed to remap cluster keyspace after failover", 0); + return FAILURE; + } return SUCCESS; } } ZEND_HASH_FOREACH_END(); @@ -1610,9 +1613,19 @@ PHP_REDIS_API short cluster_send_command(redisCluster *c, short slot, const char return -1; } c->cmd_sock = SLOT_SOCK(c, slot); + /* Verify slot is valid after update */ + if (!c->cmd_sock) { + CLUSTER_THROW_EXCEPTION("Socket for slot is NULL after MOVED redirection", 0); + return -1; + } } else if (c->redir_type == REDIR_ASK) { /* For ASK redirection we want to redirect but not update slot mapping */ c->cmd_sock = cluster_get_asking_sock(c); + /* Verify socket from ASK redirection */ + if (!c->cmd_sock) { + CLUSTER_THROW_EXCEPTION("Socket is NULL after ASK redirection", 0); + return -1; + } } }