Skip to content

Commit 6a4c7f6

Browse files
committed
review fixes
Signed-off-by: jorgee <jorge.ejarque@seqera.io>
1 parent 91554f4 commit 6a4c7f6

8 files changed

Lines changed: 225 additions & 12 deletions

File tree

plugins/nf-tower/src/main/io/seqera/tower/plugin/datalink/PagedDataLinkContent.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class PagedDataLinkContent implements Iterable<DataLinkItem> {
8989
current = items.iterator()
9090
nextToken = page?.nextPageToken as String
9191
} catch (IOException e) {
92-
throw new RuntimeException(e)
92+
throw new UncheckedIOException(e)
9393
}
9494
}
9595
return true

plugins/nf-tower/src/main/io/seqera/tower/plugin/datalink/SeqeraDataLinkClient.groovy

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ import io.seqera.tower.model.DataLinkProvider
3333
import io.seqera.tower.plugin.TowerClient
3434
import nextflow.exception.AbortOperationException
3535

36-
import java.nio.file.Path
37-
3836
/**
3937
* Typed client for Seqera Platform data-link API endpoints.
4038
*
@@ -65,7 +63,8 @@ class SeqeraDataLinkClient {
6563
}
6664

6765
/**
68-
* Resolve a data-link providers in the given workspace.
66+
* Distinct provider identifiers present in the workspace, sorted.
67+
* The returned set is unmodifiable; memoized per workspace.
6968
*/
7069
@Memoized
7170
Set<String> getDataLinkProviders(long workspaceId) {
@@ -75,12 +74,17 @@ class SeqeraDataLinkClient {
7574
final p = it.next().provider?.toString()
7675
if (p) providers.add(p)
7776
}
78-
return providers
77+
return Collections.unmodifiableSet(providers)
7978
}
8079

8180
/**
8281
* Resolve a data-link by {@code (provider, name)} in the given workspace.
83-
* Iterates the API's list endpoint lazily and short-circuits on first match.
82+
* Iterates the API's list endpoint lazily (server-side filtered by {@code name})
83+
* and short-circuits on first match.
84+
*
85+
* Memoized per {@code (workspaceId, provider, name)} tuple. Note: Groovy's
86+
* {@code @Memoized} caches successful returns only — a path that repeatedly
87+
* references a non-existent data-link re-runs the search each time.
8488
*/
8589
@Memoized
8690
DataLinkDto getDataLink(long workspaceId, String provider, String name) {
@@ -177,7 +181,7 @@ class SeqeraDataLinkClient {
177181

178182
private Iterator<DataLinkDto> current = Collections.<DataLinkDto>emptyIterator()
179183
private int offset = 0
180-
private long total = -1L // unknown until first fetch
184+
private long total = -1L // -1 = unknown; set only when the server reports totalSize
181185
private boolean exhausted = false
182186

183187
DataLinkListIterator(TowerClient towerClient, String endpoint, long workspaceId, int pageSize, String search = null) {
@@ -212,8 +216,11 @@ class SeqeraDataLinkClient {
212216
final items = (json.dataLinks as List<Map>)?.collect { Map m -> mapDataLink(m) } ?: Collections.<DataLinkDto>emptyList()
213217
current = items.iterator()
214218
offset += items.size()
215-
if (total < 0) total = (json.totalSize as Long) ?: 0L
216-
if (items.isEmpty() || offset >= total) exhausted = true
219+
// Record the server-reported total only if present (null/missing → leave as -1 and
220+
// rely on an empty-page response to signal exhaustion)
221+
if (total < 0 && json.totalSize != null) total = json.totalSize as Long
222+
// Exhausted when: this page is empty, OR we've reached the known total
223+
if (items.isEmpty() || (total >= 0 && offset >= total)) exhausted = true
217224
}
218225
}
219226

plugins/nf-tower/src/main/io/seqera/tower/plugin/fs/SeqeraFileSystemProvider.groovy

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,15 @@ class SeqeraFileSystemProvider extends FileSystemProvider {
197197

198198
final source = entries
199199
return new DirectoryStream<Path>() {
200+
private boolean iteratorCalled = false
200201
@Override
201202
Iterator<Path> iterator() {
203+
// NIO contract: DirectoryStream.iterator() may be called at most once.
204+
// For data-link listings a second iteration would also re-fetch pages 2+
205+
// (needlessly doubling API calls), so enforcing the contract is a win.
206+
if (iteratorCalled)
207+
throw new IllegalStateException("DirectoryStream.iterator() may be called at most once")
208+
iteratorCalled = true
202209
final inner = source.iterator()
203210
if (!filter) return inner
204211
return new FilteredIterator<Path>(inner, filter)

plugins/nf-tower/src/main/io/seqera/tower/plugin/fs/handler/DataLinksResourceHandler.groovy

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ class DataLinksResourceHandler implements ResourceTypeHandler {
6060
private final HttpClient httpClient
6161

6262
DataLinksResourceHandler(SeqeraFileSystem fs, SeqeraDataLinkClient client) {
63-
this(fs, client, HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build())
63+
this(fs, client, HttpClient.newBuilder()
64+
.connectTimeout(Duration.ofSeconds(10))
65+
.followRedirects(HttpClient.Redirect.NORMAL)
66+
.build())
6467
}
6568

6669
/** Test-only constructor to inject a mock {@link HttpClient}. */
@@ -111,8 +114,15 @@ class DataLinksResourceHandler implements ResourceTypeHandler {
111114
if (p.cachedAttributes) return p.cachedAttributes
112115
final workspaceId = fs.resolveWorkspaceId(p.org, p.workspace)
113116
final trail = p.trail
114-
if (trail.size() < 2) {
115-
// data-links/ or data-links/<provider> — always directory
117+
if (trail.isEmpty()) {
118+
// data-links/ — always a directory
119+
return new SeqeraFileAttributes(true)
120+
}
121+
if (trail.size() == 1) {
122+
// data-links/<provider> — validate the provider has at least one data-link
123+
final providers = client.getDataLinkProviders(workspaceId)
124+
if (!providers.contains(trail[0]))
125+
throw new NoSuchFileException(p.toString(), null, "No data-links for provider '${trail[0]}' in workspace '${p.workspace}'")
116126
return new SeqeraFileAttributes(true)
117127
}
118128
final dl = client.getDataLink(workspaceId, trail[0], trail[1])

plugins/nf-tower/src/test/io/seqera/tower/plugin/datalink/SeqeraDataLinkClientTest.groovy

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,135 @@ class SeqeraDataLinkClientTest extends Specification {
113113
!client.listDataLinks(10L).hasNext()
114114
}
115115

116+
// ---- getDataLink ----
117+
118+
def "getDataLink uses server-side search filter and returns first matching provider"() {
119+
given:
120+
def body = JsonOutput.toJson([dataLinks: [
121+
[id: 'dl-1', name: 'inputs', provider: 'google'],
122+
[id: 'dl-2', name: 'inputs', provider: 'aws']
123+
], totalSize: 2])
124+
def tc = tower()
125+
tc.sendApiRequest("${EP}/data-links?workspaceId=10&max=100&offset=0&search=inputs") >> ok(body)
126+
def client = new SeqeraDataLinkClient(tc)
127+
128+
when:
129+
def dl = client.getDataLink(10L, 'aws', 'inputs')
130+
131+
then:
132+
dl.id == 'dl-2'
133+
dl.provider.toString() == 'aws'
134+
}
135+
136+
def "getDataLink throws NoSuchFileException when no matching (provider, name) is found"() {
137+
given:
138+
def body = JsonOutput.toJson([dataLinks: [
139+
[id: 'dl-1', name: 'inputs', provider: 'google']
140+
], totalSize: 1])
141+
def tc = tower()
142+
tc.sendApiRequest("${EP}/data-links?workspaceId=10&max=100&offset=0&search=inputs") >> ok(body)
143+
def client = new SeqeraDataLinkClient(tc)
144+
145+
when:
146+
client.getDataLink(10L, 'aws', 'inputs')
147+
148+
then:
149+
thrown(NoSuchFileException)
150+
}
151+
152+
def "getDataLink memoizes successful lookups"() {
153+
given:
154+
def body = JsonOutput.toJson([dataLinks: [
155+
[id: 'dl-1', name: 'inputs', provider: 'aws']
156+
], totalSize: 1])
157+
def tc = tower()
158+
def client = new SeqeraDataLinkClient(tc)
159+
160+
when:
161+
def a = client.getDataLink(10L, 'aws', 'inputs')
162+
def b = client.getDataLink(10L, 'aws', 'inputs')
163+
164+
then:
165+
1 * tc.sendApiRequest("${EP}/data-links?workspaceId=10&max=100&offset=0&search=inputs") >> ok(body)
166+
a.is(b)
167+
}
168+
169+
// ---- getDataLinkProviders ----
170+
171+
def "getDataLinkProviders returns distinct sorted providers across all pages"() {
172+
given:
173+
def p1 = JsonOutput.toJson([dataLinks: [
174+
[id: 'dl-1', name: 'a', provider: 'aws'],
175+
[id: 'dl-2', name: 'b', provider: 'google']
176+
], totalSize: 3])
177+
def p2 = JsonOutput.toJson([dataLinks: [
178+
[id: 'dl-3', name: 'c', provider: 'aws']
179+
], totalSize: 3])
180+
def tc = tower()
181+
tc.sendApiRequest("${EP}/data-links?workspaceId=10&max=100&offset=0") >> ok(p1)
182+
tc.sendApiRequest("${EP}/data-links?workspaceId=10&max=100&offset=2") >> ok(p2)
183+
def client = new SeqeraDataLinkClient(tc)
184+
185+
when:
186+
def providers = client.getDataLinkProviders(10L)
187+
188+
then:
189+
providers as List == ['aws', 'google']
190+
}
191+
192+
def "getDataLinkProviders memoizes the result"() {
193+
given:
194+
def body = JsonOutput.toJson([dataLinks: [
195+
[id: 'dl-1', name: 'a', provider: 'aws']
196+
], totalSize: 1])
197+
def tc = tower()
198+
def client = new SeqeraDataLinkClient(tc)
199+
200+
when:
201+
def a = client.getDataLinkProviders(10L)
202+
def b = client.getDataLinkProviders(10L)
203+
204+
then:
205+
1 * tc.sendApiRequest("${EP}/data-links?workspaceId=10&max=100&offset=0") >> ok(body)
206+
a.is(b)
207+
}
208+
209+
def "getDataLinkProviders returns an unmodifiable Set"() {
210+
given:
211+
def body = JsonOutput.toJson([dataLinks: [
212+
[id: 'dl-1', name: 'a', provider: 'aws']
213+
], totalSize: 1])
214+
def tc = tower()
215+
tc.sendApiRequest(_) >> ok(body)
216+
def client = new SeqeraDataLinkClient(tc)
217+
218+
when:
219+
client.getDataLinkProviders(10L).add('gcs')
220+
221+
then:
222+
thrown(UnsupportedOperationException)
223+
}
224+
225+
// ---- listDataLinks pagination robustness ----
226+
227+
def "listDataLinks keeps paginating when totalSize is absent until an empty page"() {
228+
given:
229+
def p1 = JsonOutput.toJson([dataLinks: [[id: 'dl-1', name: 'a', provider: 'aws']]]) // no totalSize
230+
def p2 = JsonOutput.toJson([dataLinks: [[id: 'dl-2', name: 'b', provider: 'aws']]]) // no totalSize
231+
def p3 = JsonOutput.toJson([dataLinks: []]) // empty page → exhausted
232+
def tc = tower()
233+
tc.sendApiRequest("${EP}/data-links?workspaceId=10&max=100&offset=0") >> ok(p1)
234+
tc.sendApiRequest("${EP}/data-links?workspaceId=10&max=100&offset=1") >> ok(p2)
235+
tc.sendApiRequest("${EP}/data-links?workspaceId=10&max=100&offset=2") >> ok(p3)
236+
def client = new SeqeraDataLinkClient(tc)
237+
238+
when:
239+
def list = drain(client.listDataLinks(10L))
240+
241+
then:
242+
list*.id == ['dl-1', 'dl-2']
243+
}
244+
116245
// ---- getContent ----
117246
118247
def "getContent on a sub-path uses /browse/{path}"() {

plugins/nf-tower/src/test/io/seqera/tower/plugin/fs/SeqeraFileSystemProviderTest.groovy

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,4 +452,36 @@ class SeqeraFileSystemProviderTest extends Specification {
452452
def ex = thrown(NoSuchFileException)
453453
ex.reason?.contains('Unsupported resource type')
454454
}
455+
456+
def "readAttributes short-circuits when the SeqeraPath carries cachedAttributes (no API call)"() {
457+
given: 'a provider with a fresh filesystem and a path carrying pre-resolved attrs'
458+
def tc = spyTower()
459+
def fs = buildFs(tc)
460+
def attrs = new SeqeraFileAttributes(999L, java.time.Instant.EPOCH, java.time.Instant.EPOCH, 'cached-key')
461+
def path = new SeqeraPath(fs, 'seqera://acme/research/datasets/samples').resolveWithAttributes('nested', attrs)
462+
463+
when:
464+
def got = fs.provider().readAttributes(path, java.nio.file.attribute.BasicFileAttributes)
465+
466+
then: 'no workspace-cache load and no dataset/browse API calls were issued'
467+
0 * tc.sendApiRequest(_)
468+
got === attrs
469+
}
470+
471+
def "newDirectoryStream.iterator() throws IllegalStateException on a second call"() {
472+
given:
473+
def tc = spyTower()
474+
tc.sendApiRequest("${ENDPOINT}/user-info") >> ok(userInfoJson())
475+
tc.sendApiRequest("${ENDPOINT}/user/42/workspaces") >> ok(workspacesJson())
476+
def fs = buildFs(tc)
477+
def wsPath = new SeqeraPath(fs, 'seqera://acme/research')
478+
def stream = fs.provider().newDirectoryStream(wsPath, null)
479+
480+
when:
481+
stream.iterator()
482+
stream.iterator()
483+
484+
then:
485+
thrown(IllegalStateException)
486+
}
455487
}

plugins/nf-tower/src/test/io/seqera/tower/plugin/fs/handler/DataLinksResourceHandlerTest.groovy

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,33 @@ class DataLinksResourceHandlerTest extends Specification {
271271
!attr.regularFile
272272
}
273273

274+
def "readAttributes at data-links/<provider>/ reports directory when provider exists"() {
275+
given:
276+
def path = new SeqeraPath(fs, 'seqera://acme/research/data-links/aws')
277+
278+
when:
279+
def attr = handler.readAttributes(path)
280+
281+
then:
282+
1 * fs.resolveWorkspaceId(_, _) >> 10L
283+
1 * client.getDataLinkProviders(10L) >> (['aws', 'google'] as Set)
284+
attr.directory
285+
}
286+
287+
def "readAttributes at data-links/<provider>/ throws when the provider has no data-links"() {
288+
given:
289+
def path = new SeqeraPath(fs, 'seqera://acme/research/data-links/azure')
290+
291+
when:
292+
handler.readAttributes(path)
293+
294+
then:
295+
1 * fs.resolveWorkspaceId(_, _) >> 10L
296+
1 * client.getDataLinkProviders(10L) >> (['aws'] as Set)
297+
def ex = thrown(NoSuchFileException)
298+
ex.reason?.contains("No data-links for provider 'azure'")
299+
}
300+
274301
def "readAttributes at data-link root reports directory"() {
275302
given:
276303
def path = new SeqeraPath(fs, 'seqera://acme/research/data-links/aws/inputs')

specs/260422-seqera-datalinks-fs/spec.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
- Q: Which DTOs are introduced by this feature? → A: None. All types are reused from the `io.seqera:tower-api:1.121.0` dependency (`DataLinkDto`, `DataLinkItem`, `DataLinkProvider`, `DataLinkCredentials`, `DataLinkContentResponse`, `DataLinkDownloadUrlResponse`, etc.). A plugin-local `PagedDataLinkContent` holder class wraps the eager-first-page + lazy-pagination behavior but holds only tower-api types.
2121
- Q: Is browse-per-file supported by the Platform API? → A: Yes. `GET /data-links/{id}/browse/{path}` works for both directories and files, so `readAttributes` on any path is a single targeted call — no parent-browse-and-filter, no N+1 problem.
2222
- Q: How are paginated Platform responses returned to callers? → A: Streaming. The workspace data-link list (`GET /data-links`) returns an `Iterator<DataLinkDto>` that fetches offsets on demand. The browse endpoint returns a `PagedDataLinkContent` that loads the first page eagerly (so `readAttributes` can inspect it without iterating) and fetches subsequent pages lazily as the iterator advances. The handler layer exposes `Iterable<Path>` to the NIO `DirectoryStream`; no full materialization of listings in memory.
23+
- Q: What convenience methods does the client expose on top of the raw list endpoint? → A: Two memoized helpers — `getDataLink(ws, provider, name)` uses the server-side `&search=<name>` filter and returns the first match (throws `NoSuchFileException` on miss); `getDataLinkProviders(ws)` returns the sorted set of distinct providers present in the workspace. Both are memoized per-arguments within a single `SeqeraDataLinkClient` instance.
2324
- Q: How are attributes discovered after a listing? → A: When `newDirectoryStream` yields a child path, the handler attaches the per-item attributes (size for files, directory marker for folders) to the `SeqeraPath` via an optional cache field. A subsequent `readAttributes` on that path returns the cached value without any additional Platform API call. Paths parsed from URIs (no prior listing) fall back to the live browse endpoint.
2425
- Q: How are cloud credentials for the underlying bucket/prefix selected? → A: The Platform's `DataLinkDto.credentials` list associates one or more credential records with a data-link. The plugin forwards the first credential's ID as the `credentialsId` query parameter on browse and download-URL requests, when present. If the data-link has no associated credentials, the parameter is omitted and the Platform uses its default resolution.
2526
- Q: Which provider-segment value appears in user-visible paths? → A: The lowercase value of the `DataLinkProvider` enum, as exposed by its `toString()` (e.g. `aws`, `google`, `azure`). This matches the Platform UI.

0 commit comments

Comments
 (0)