Skip to content

Commit 5a76b32

Browse files
committed
use separate client without overall timeout for bucket file downloads
1 parent e4f90eb commit 5a76b32

2 files changed

Lines changed: 39 additions & 1 deletion

File tree

pkg/sources/huggingface/client.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,16 +133,37 @@ type HFClient struct {
133133
BaseURL string
134134
APIKey string
135135
HTTPClient *http.Client
136+
// downloadClient streams file bodies. Unlike HTTPClient it has no overall
137+
// Timeout, because http.Client.Timeout also interrupts Response.Body reads —
138+
// which would kill large/slow downloads handed to handlers.HandleFile.
139+
downloadClient *http.Client
136140
}
137141

138142
// NewHFClient creates a new HF client
139143
func NewHFClient(baseURL, apiKey string, timeout time.Duration) *HFClient {
144+
// Bound the connect + response-header phase (including redirect hops to the
145+
// CDN) without capping the body read; overall cancellation comes from the
146+
// request context. Built explicitly (rather than cloning DefaultTransport)
147+
// so it stays a *http.Transport even when tests swap the default transport.
148+
downloadTransport := &http.Transport{
149+
Proxy: http.ProxyFromEnvironment,
150+
ForceAttemptHTTP2: true,
151+
MaxIdleConns: 100,
152+
IdleConnTimeout: 90 * time.Second,
153+
TLSHandshakeTimeout: 10 * time.Second,
154+
ExpectContinueTimeout: 1 * time.Second,
155+
ResponseHeaderTimeout: timeout,
156+
}
157+
140158
return &HFClient{
141159
BaseURL: baseURL,
142160
APIKey: apiKey,
143161
HTTPClient: &http.Client{
144162
Timeout: timeout,
145163
},
164+
downloadClient: &http.Client{
165+
Transport: downloadTransport,
166+
},
146167
}
147168
}
148169

@@ -278,7 +299,7 @@ func (c *HFClient) DownloadBucketFile(ctx context.Context, bucketID string, path
278299
}
279300
req.Header.Set("Authorization", "Bearer "+c.APIKey)
280301

281-
resp, err := c.HTTPClient.Do(req)
302+
resp, err := c.downloadClient.Do(req)
282303
if err != nil {
283304
return nil, fmt.Errorf("failed to download bucket file %s: %w", path, err)
284305
}

pkg/sources/huggingface/huggingface_client_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package huggingface
22

33
import (
44
"fmt"
5+
"net/http"
56
"strconv"
67
"testing"
78
"time"
@@ -650,6 +651,22 @@ func TestGetGitPath_SpaceResource(t *testing.T) {
650651
assert.Equal(t, expectedPath, path)
651652
}
652653

654+
func TestNewHFClient_DownloadClientHasNoOverallTimeout(t *testing.T) {
655+
client := NewHFClient("https://huggingface.co", TEST_TOKEN, 10*time.Second)
656+
657+
// The API client keeps an overall timeout (its body is consumed in-call).
658+
assert.Equal(t, 10*time.Second, client.HTTPClient.Timeout)
659+
660+
// The download client must NOT have an overall timeout, since that would
661+
// also interrupt streaming Response.Body reads in handlers.HandleFile.
662+
assert.Equal(t, time.Duration(0), client.downloadClient.Timeout)
663+
664+
// The header phase is still bounded at the transport level.
665+
transport, ok := client.downloadClient.Transport.(*http.Transport)
666+
assert.True(t, ok)
667+
assert.Equal(t, 10*time.Second, transport.ResponseHeaderTimeout)
668+
}
669+
653670
func TestEscapePathSegments(t *testing.T) {
654671
tests := []struct {
655672
name string

0 commit comments

Comments
 (0)