Skip to content

Commit 79ad21b

Browse files
Wauplinclaude
andcommitted
Fix reference cycle in hf_raise_for_status causing delayed object destruction (#4092)
* Fix reference cycle in hf_raise_for_status by extending _format with **attrs Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * simpler tests * syntax --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 25b5d2e commit 79ad21b

2 files changed

Lines changed: 51 additions & 21 deletions

File tree

src/huggingface_hub/utils/_http.py

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -774,26 +774,17 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: str | None = No
774774

775775
if error_code == "RevisionNotFound":
776776
message = f"{response.status_code} Client Error." + "\n\n" + f"Revision Not Found for url: {response.url}."
777-
revision_err = _format(RevisionNotFoundError, message, response)
778-
revision_err.repo_type = repo_type
779-
revision_err.repo_id = repo_id
780-
raise revision_err from e
777+
raise _format(RevisionNotFoundError, message, response, repo_type=repo_type, repo_id=repo_id) from e
781778

782779
elif error_code == "EntryNotFound":
783780
message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}."
784-
entry_err = _format(RemoteEntryNotFoundError, message, response)
785-
entry_err.repo_type = repo_type
786-
entry_err.repo_id = repo_id
787-
raise entry_err from e
781+
raise _format(RemoteEntryNotFoundError, message, response, repo_type=repo_type, repo_id=repo_id) from e
788782

789783
elif error_code == "GatedRepo":
790784
message = (
791785
f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated repo for url {response.url}."
792786
)
793-
gated_err = _format(GatedRepoError, message, response)
794-
gated_err.repo_type = repo_type
795-
gated_err.repo_id = repo_id
796-
raise gated_err from e
787+
raise _format(GatedRepoError, message, response, repo_type=repo_type, repo_id=repo_id) from e
797788

798789
elif error_message == "Access to this resource is disabled.":
799790
message = (
@@ -817,9 +808,9 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: str | None = No
817808
+ "\nPlease make sure you specified the correct bucket id (namespace/name)."
818809
+ "\nIf the bucket is private, make sure you are authenticated and your token has the required permissions."
819810
)
820-
bucket_err = _format(BucketNotFoundError, message, response)
821-
bucket_err.bucket_id = _parse_bucket_id_from_url(request_url)
822-
raise bucket_err from e
811+
raise _format(
812+
BucketNotFoundError, message, response, bucket_id=_parse_bucket_id_from_url(request_url)
813+
) from e
823814

824815
elif error_code == "RepoNotFound" or (
825816
response.status_code == 401
@@ -841,10 +832,7 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: str | None = No
841832
" make sure you are authenticated and your token has the required permissions."
842833
+ "\nFor more details, see https://huggingface.co/docs/huggingface_hub/authentication"
843834
)
844-
repo_err = _format(RepositoryNotFoundError, message, response)
845-
repo_err.repo_type = repo_type
846-
repo_err.repo_id = repo_id
847-
raise repo_err from e
835+
raise _format(RepositoryNotFoundError, message, response, repo_type=repo_type, repo_id=repo_id) from e
848836

849837
elif response.status_code == 400:
850838
message = (
@@ -919,7 +907,9 @@ def _warn_on_warning_headers(response: httpx.Response) -> None:
919907
_HfHubHTTPErrorT = TypeVar("_HfHubHTTPErrorT", bound=HfHubHTTPError)
920908

921909

922-
def _format(error_type: type[_HfHubHTTPErrorT], custom_message: str, response: httpx.Response) -> _HfHubHTTPErrorT:
910+
def _format(
911+
error_type: type[_HfHubHTTPErrorT], custom_message: str, response: httpx.Response, **attrs: Any
912+
) -> _HfHubHTTPErrorT:
923913
server_errors = []
924914

925915
# Retrieve server error from header
@@ -1009,7 +999,10 @@ def _format(error_type: type[_HfHubHTTPErrorT], custom_message: str, response: h
1009999
final_error_message += request_id_message
10101000

10111001
# Return
1012-
return error_type(final_error_message.strip(), response=response, server_message=server_message or None)
1002+
err = error_type(final_error_message.strip(), response=response, server_message=server_message or None)
1003+
for k, v in attrs.items():
1004+
setattr(err, k, v)
1005+
return err
10131006

10141007

10151008
def _curlify(request: httpx.Request) -> str:

tests/test_utils_http.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import threading
22
import time
33
import unittest
4+
import weakref
45
from http.server import BaseHTTPRequestHandler, HTTPServer
56
from typing import Generator, Optional
67
from unittest.mock import Mock, call, patch
@@ -702,3 +703,39 @@ def test_non_bucket_url(self):
702703

703704
def test_http_url(self):
704705
assert _parse_bucket_id_from_url("http://localhost:8080/api/buckets/ns/name") == "ns/name"
706+
707+
708+
class TestNoReferenceCycleInRaise:
709+
"""Regression test: hf_raise_for_status must not create reference cycles.
710+
711+
See https://github.com/huggingface/huggingface_hub/pull/4084 for details.
712+
When exceptions were stored in local variables before `raise ... from e`,
713+
CPython reference cycles prevented deterministic cleanup, causing real
714+
issues downstream (e.g. vLLM GPU memory not released).
715+
"""
716+
717+
def test_no_refcycle(self):
718+
url = "https://huggingface.co/api/models/user/repo"
719+
request = Mock(spec=httpx.Request)
720+
request.url = httpx.URL(url)
721+
response = Mock(spec=httpx.Response)
722+
response.status_code = 404
723+
response.url = url
724+
response.headers = httpx.Headers({"X-Error-Code": "RepoNotFound"})
725+
response.json.return_value = {}
726+
response.request = request
727+
response.raise_for_status.side_effect = httpx.HTTPStatusError("404", request=request, response=response)
728+
729+
ref = None
730+
try:
731+
hf_raise_for_status(response)
732+
except RepositoryNotFoundError as exc:
733+
# Clear the traceback to isolate our fix from the inherent
734+
# except-block cycle (exc.__traceback__ -> this frame -> exc).
735+
# We only care that hf_raise_for_status itself does not create a
736+
# cycle via intermediate local variables.
737+
exc.__traceback__ = None
738+
ref = weakref.ref(exc)
739+
# After exiting the except block, the exception should be freed by
740+
# refcount alone (no gc.collect() needed) if there is no cycle.
741+
assert ref() is None

0 commit comments

Comments
 (0)