@@ -3068,7 +3068,9 @@ def __init__(self, args, options=None, position=None):
30683068class NodeCommands :
30693069 """ """
30703070
3071- def __init__ (self , parse_response , connection_pool , connection ):
3071+ def __init__ (
3072+ self , parse_response , connection_pool : ConnectionPool , connection : Connection
3073+ ):
30723074 """ """
30733075 self .parse_response = parse_response
30743076 self .connection_pool = connection_pool
@@ -3423,15 +3425,18 @@ def _send_cluster_commands(
34233425 attempt = sorted (stack , key = lambda x : x .position )
34243426 is_default_node = False
34253427 # build a list of node objects based on node names we need to
3426- nodes = {}
3427-
3428- # as we move through each command that still needs to be processed,
3429- # we figure out the slot number that command maps to, then from
3430- # the slot determine the node.
3431- for c in attempt :
3432- command_policies = self ._pipe ._policy_resolver .resolve (c .args [0 ].lower ())
3428+ nodes : dict [str , NodeCommands ] = {}
3429+ nodes_written = 0
3430+ nodes_read = 0
34333431
3434- while True :
3432+ try :
3433+ # as we move through each command that still needs to be processed,
3434+ # we figure out the slot number that command maps to, then from
3435+ # the slot determine the node.
3436+ for c in attempt :
3437+ command_policies = self ._pipe ._policy_resolver .resolve (
3438+ c .args [0 ].lower ()
3439+ )
34353440 # refer to our internal node -> slot table that
34363441 # tells us where a given command should route to.
34373442 # (it might be possible we have a cached node that no longer
@@ -3506,37 +3511,38 @@ def _send_cluster_commands(
35063511 try :
35073512 connection = get_connection (redis_node )
35083513 except (ConnectionError , TimeoutError ):
3514+ # Release any connections we've already acquired before clearing nodes
35093515 for n in nodes .values ():
35103516 n .connection_pool .release (n .connection )
35113517 # Connection retries are being handled in the node's
35123518 # Retry object. Reinitialize the node -> slot table.
35133519 self ._nodes_manager .initialize ()
35143520 if is_default_node :
35153521 self ._pipe .replace_default_node ()
3522+ nodes = {}
35163523 raise
35173524 nodes [node_name ] = NodeCommands (
35183525 redis_node .parse_response ,
35193526 redis_node .connection_pool ,
35203527 connection ,
35213528 )
35223529 nodes [node_name ].append (c )
3523- break
35243530
3525- # send the commands in sequence.
3526- # we write to all the open sockets for each node first,
3527- # before reading anything
3528- # this allows us to flush all the requests out across the
3529- # network
3530- # so that we can read them from different sockets as they come back.
3531- # we dont' multiplex on the sockets as they come available,
3532- # but that shouldn't make too much difference.
3531+ # send the commands in sequence.
3532+ # we write to all the open sockets for each node first,
3533+ # before reading anything
3534+ # this allows us to flush all the requests out across the
3535+ # network
3536+ # so that we can read them from different sockets as they come back.
3537+ # we dont' multiplex on the sockets as they come available,
3538+ # but that shouldn't make too much difference.
35333539
3534- # Start timing for observability
3535- start_time = time .monotonic ()
3540+ # Start timing for observability
3541+ start_time = time .monotonic ()
35363542
3537- try :
35383543 node_commands = nodes .values ()
35393544 for n in node_commands :
3545+ nodes_written += 1
35403546 n .write ()
35413547
35423548 for n in node_commands :
@@ -3550,26 +3556,24 @@ def _send_cluster_commands(
35503556 db_namespace = str (n .connection .db ),
35513557 batch_size = len (n .commands ),
35523558 )
3559+ nodes_read += 1
35533560 finally :
3554- # release all of the redis connections we allocated earlier
3561+ # release all the redis connections we allocated earlier
35553562 # back into the connection pool.
3556- # we used to do this step as part of a try/finally block,
3557- # but it is really dangerous to
3558- # release connections back into the pool if for some
3559- # reason the socket has data still left in it
3560- # from a previous operation. The write and
3561- # read operations already have try/catch around them for
3562- # all known types of errors including connection
3563- # and socket level errors.
3564- # So if we hit an exception, something really bad
3565- # happened and putting any oF
3566- # these connections back into the pool is a very bad idea.
3567- # the socket might have unread buffer still sitting in it,
3568- # and then the next time we read from it we pass the
3569- # buffered result back from a previous command and
3570- # every single request after to that connection will always get
3571- # a mismatched result.
3572- for n in nodes .values ():
3563+ # if the connection is dirty (that is: we've written
3564+ # commands to it, but haven't read the responses), we need
3565+ # to close the connection before returning it to the pool.
3566+ # otherwise, the next caller to use this connection will
3567+ # read the response from _this_ request, not its own request.
3568+ # disconnecting discards the dirty state & forces the next
3569+ # caller to reconnect.
3570+ # NOTE: dicts have a consistent ordering; we're iterating
3571+ # through nodes.values() in the same order as we are when
3572+ # reading / writing to the connections above, which is critical
3573+ # for how we're using the nodes_written/nodes_read offsets.
3574+ for i , n in enumerate (nodes .values ()):
3575+ if i < nodes_written and i >= nodes_read :
3576+ n .connection .disconnect ()
35733577 n .connection_pool .release (n .connection )
35743578
35753579 # if the response isn't an exception it is a
0 commit comments