Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0cd636f
feat(sdk): add sha1_of_source helper for checksum comparison
minitriga Apr 22, 2026
f199281
test(sdk): cover sha1_of_source BinaryIO position rewind from non-zer…
minitriga Apr 22, 2026
1790610
feat(sdk): add UploadResult dataclass for idempotent uploads
minitriga Apr 22, 2026
1c7978b
docs(sdk): clarify UploadResult.checksum None cases per review
minitriga Apr 22, 2026
296fd5f
feat(sdk): add async matches_local_checksum to InfrahubNode
minitriga Apr 22, 2026
702917f
refactor(sdk): apply Task 3 review feedback to matches_local_checksum
minitriga Apr 22, 2026
12011d0
feat(sdk): add sync matches_local_checksum to InfrahubNodeSync
minitriga Apr 22, 2026
dfd3123
feat(sdk): add async upload_if_changed with SHA-1 idempotency
minitriga Apr 22, 2026
78fd071
refactor(sdk): apply Task 5 review feedback to upload_if_changed
minitriga Apr 22, 2026
8f8457e
feat(sdk): add sync upload_if_changed to InfrahubNodeSync
minitriga Apr 22, 2026
a3a5a6a
feat(sdk): add skip_if_unchanged to async download_file
minitriga Apr 22, 2026
721ff33
fix(sdk): enforce unsaved-node check before skip_if_unchanged short-c…
minitriga Apr 22, 2026
442fe0a
feat(sdk): add skip_if_unchanged to sync download_file
minitriga Apr 22, 2026
1abc22f
docs(sdk): add changelog fragment for idempotent file operations
minitriga Apr 22, 2026
0fdfc01
docs(sdk): clarify UploadResult.checksum docstring and add fixed frag…
minitriga Apr 22, 2026
65f7c71
style(sdk): apply ruff format to new idempotent file operations
minitriga Apr 22, 2026
4e2d5a9
refactor(sdk): rename UploadResult.uploaded to was_uploaded per review
minitriga Apr 23, 2026
93fe705
test(sdk): drop tautological UploadResult frozen test per review
minitriga Apr 23, 2026
18e1572
docs(sdk): rewrite unsaved-node fix fragment in user-impact terms
minitriga Apr 23, 2026
9b8ebc1
test(sdk): add positive-path HTTP assertions to upload/download idemp…
minitriga Apr 23, 2026
5269fb6
docs(sdk): clarify that idempotency checks use cached node checksum
minitriga Apr 23, 2026
fda914c
docs(sdk): drop pre-release unsaved-node fragment per review
minitriga Apr 24, 2026
2f44bff
test(sdk): add HTTP request assertion to upload_if_changed unsaved-no…
minitriga Apr 24, 2026
47e7d39
docs(sdk): regenerate Python SDK API docs for idempotent file ops doc…
minitriga Apr 24, 2026
033246d
test(sdk): tighten upload_if_changed HTTP assertions to verify multip…
minitriga Apr 24, 2026
7a21d36
test(sdk): drop cross-test reference from upload_if_changed comments
minitriga Apr 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions changelog/+idempotent-file-ops.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Added SHA-1 idempotency primitives for `CoreFileObject` nodes:

- `InfrahubNode.matches_local_checksum(source)` / sync variant — compare a local `bytes | Path | BinaryIO` source against the node's server-stored checksum without invoking a transfer.
- `InfrahubNode.upload_if_changed(source, name=None)` / sync variant — stage + save only when the local source differs from the server, returning an `UploadResult(uploaded, checksum)` dataclass.
- `download_file(..., skip_if_unchanged=True)` — short-circuit the download when `dest` already exists on disk with a matching SHA-1. Returns `0` bytes written when skipped.

A shared `sha1_of_source` helper (streaming, 64 KiB chunks) centralises the hashing convention in `infrahub_sdk.file_handler`.
208 changes: 204 additions & 4 deletions docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/node.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,22 @@ artifact_fetch(self, name: str) -> str | dict[str, Any]
#### `download_file`

```python
download_file(self, dest: Path | None = None) -> bytes | int
download_file(self, dest: None = None, skip_if_unchanged: bool = ...) -> bytes
```

<details>
<summary>Show 2 other overloads</summary>

#### `download_file`

```python
download_file(self, dest: Path, skip_if_unchanged: bool = ...) -> int
```

#### `download_file`

```python
download_file(self, dest: Path | None = None, skip_if_unchanged: bool = False) -> bytes | int
```

Download the file content from this FileObject node.
Expand All @@ -54,16 +69,26 @@ The node must have been saved (have an id) before calling this method.
directly to this path (memory-efficient for large files) and the
number of bytes written will be returned. If not provided, the
file content will be returned as bytes.
- `skip_if_unchanged`: When ``True``, compute the SHA-1 of the file at
``dest`` (which must be provided) and compare against the
node's ``checksum`` attribute. If they match, return ``0``
without hitting the network. The ``checksum`` is the value
loaded when this node was fetched — a later server-side
change to the file will not be detected unless the caller
re-fetches the node first.

**Returns:**

- If ``dest`` is None: The file content as bytes.
- If ``dest`` is provided: The number of bytes written to the file.
- If ``skip_if_unchanged=True`` and the local file matches the server
- ``0``.

**Raises:**

- `FeatureNotSupportedError`: If this node doesn't inherit from CoreFileObject.
- `ValueError`: If the node hasn't been saved yet or file not found.
- `ValueError`: If the node hasn't been saved yet, file not found, or
``skip_if_unchanged=True`` was passed without a ``dest``.
- `AuthenticationError`: If authentication fails.

**Examples:**
Expand All @@ -73,8 +98,82 @@ The node must have been saved (have an id) before calling this method.
>>> content = await contract.download_file()
>>> # Stream to file (memory-efficient for large files)
>>> bytes_written = await contract.download_file(dest=Path("/tmp/contract.pdf"))
>>> # Skip download if local file already matches server checksum
>>> bytes_written = await contract.download_file(
... dest=Path("/tmp/contract.pdf"), skip_if_unchanged=True
... )
```

</details>
#### `matches_local_checksum`

```python
matches_local_checksum(self, source: bytes | Path | BinaryIO) -> bool
```

Return True if ``source``'s SHA-1 matches this node's server checksum.

Only available for nodes inheriting from ``CoreFileObject``. Callers
that want to branch on the comparison without invoking a transfer
should use this primitive instead of reading ``node.checksum.value``
and hashing ``source`` themselves, so the hashing convention stays
centralised in the SDK.

The comparison is against the ``checksum`` attribute as loaded
when this node was retrieved from the server. If the server's
file has been replaced since the node was fetched, this method
will not see that change — re-fetch the node to refresh the
checksum before comparing.

**Args:**

- `source`: Local content to hash and compare. Accepts the same
shapes as \:func\:`infrahub_sdk.file_handler.sha1_of_source`.

**Returns:**

- True if the local digest equals the server's stored checksum.

**Raises:**

- `FeatureNotSupportedError`: Node is not a ``CoreFileObject``.
- `ValueError`: Node has no server-side checksum yet (unsaved or
file never attached).

#### `upload_if_changed`

```python
upload_if_changed(self, source: bytes | Path | BinaryIO, name: str | None = None) -> UploadResult
```

Upload ``source`` only if its SHA-1 differs from the server checksum.

Composes :meth:`matches_local_checksum` with :meth:`upload_from_path`
(or :meth:`upload_from_bytes`) and :meth:`save`. For unsaved nodes or
nodes that have no prior server-side file, the upload is always
performed — there is nothing to compare against.

**Args:**

- `source`: Content to upload. ``bytes`` and ``BinaryIO`` sources
must supply ``name``; for a ``Path`` the filename is derived
from ``source.name`` when ``name`` is omitted.
- `name`: Filename to use on the server. Required for ``bytes`` /
``BinaryIO`` sources.

**Returns:**

- class:`UploadResult` with ``was_uploaded=False`` (skipped) or
- ``was_uploaded=True`` (transfer occurred), and the resulting server
- checksum (``None`` only when no server checksum was available
- after the operation).

**Raises:**

- `FeatureNotSupportedError`: Node is not a ``CoreFileObject``.
- `ValueError`: ``source`` is ``bytes`` or ``BinaryIO`` and no
``name`` was supplied.

#### `delete`

```python
Expand Down Expand Up @@ -221,7 +320,22 @@ artifact_fetch(self, name: str) -> str | dict[str, Any]
#### `download_file`

```python
download_file(self, dest: Path | None = None) -> bytes | int
download_file(self, dest: None = None, skip_if_unchanged: bool = ...) -> bytes
```

<details>
<summary>Show 2 other overloads</summary>

#### `download_file`

```python
download_file(self, dest: Path, skip_if_unchanged: bool = ...) -> int
```

#### `download_file`

```python
download_file(self, dest: Path | None = None, skip_if_unchanged: bool = False) -> bytes | int
```

Download the file content from this FileObject node.
Expand All @@ -235,16 +349,26 @@ The node must have been saved (have an id) before calling this method.
directly to this path (memory-efficient for large files) and the
number of bytes written will be returned. If not provided, the
file content will be returned as bytes.
- `skip_if_unchanged`: When ``True``, compute the SHA-1 of the file at
``dest`` (which must be provided) and compare against the
node's ``checksum`` attribute. If they match, return ``0``
without hitting the network. The ``checksum`` is the value
loaded when this node was fetched — a later server-side
change to the file will not be detected unless the caller
re-fetches the node first.

**Returns:**

- If ``dest`` is None: The file content as bytes.
- If ``dest`` is provided: The number of bytes written to the file.
- If ``skip_if_unchanged=True`` and the local file matches the server
- ``0``.

**Raises:**

- `FeatureNotSupportedError`: If this node doesn't inherit from CoreFileObject.
- `ValueError`: If the node hasn't been saved yet or file not found.
- `ValueError`: If the node hasn't been saved yet, file not found, or
``skip_if_unchanged=True`` was passed without a ``dest``.
- `AuthenticationError`: If authentication fails.

**Examples:**
Expand All @@ -254,8 +378,63 @@ The node must have been saved (have an id) before calling this method.
>>> content = contract.download_file()
>>> # Stream to file (memory-efficient for large files)
>>> bytes_written = contract.download_file(dest=Path("/tmp/contract.pdf"))
>>> # Skip download if local file already matches server checksum
>>> bytes_written = contract.download_file(
... dest=Path("/tmp/contract.pdf"), skip_if_unchanged=True
... )
```

</details>
#### `matches_local_checksum`

```python
matches_local_checksum(self, source: bytes | Path | BinaryIO) -> bool
```

Return True if ``source``'s SHA-1 matches this node's server checksum.

Sync equivalent of :meth:`InfrahubNode.matches_local_checksum`. See
that method for full documentation.

**Args:**

- `source`: Local content to hash and compare. Accepts the same
shapes as \:func\:`infrahub_sdk.file_handler.sha1_of_source`.

**Returns:**

- True if the local digest equals the server's stored checksum.

**Raises:**

- `FeatureNotSupportedError`: Node is not a ``CoreFileObject``.
- `ValueError`: Node has no server-side checksum yet.

#### `upload_if_changed`

```python
upload_if_changed(self, source: bytes | Path | BinaryIO, name: str | None = None) -> UploadResult
```

Upload ``source`` only if its SHA-1 differs from the server checksum.

Sync equivalent of :meth:`InfrahubNode.upload_if_changed`. See that
method for full documentation.

**Args:**

- `source`: Content to upload.
- `name`: Filename to use on the server.

**Returns:**

- class:`UploadResult`.

**Raises:**

- `FeatureNotSupportedError`: Node is not a ``CoreFileObject``.
- `ValueError`: Bytes/BinaryIO source without ``name``.

#### `delete`

```python
Expand Down Expand Up @@ -369,6 +548,27 @@ extract(self, params: dict[str, str]) -> dict[str, Any]

Extract some data points defined in a flat notation.

### `UploadResult`

Outcome of an idempotent upload attempt.

Returned by :meth:`InfrahubNode.upload_if_changed` and its sync twin.
``was_uploaded`` tells the caller whether a network transfer actually
happened; ``checksum`` carries the SHA-1 of the content held on the
server after the operation — on skip paths that is the server's
pre-existing value, on upload paths it is the locally-computed SHA-1
used as a proxy (which matches what a standard CoreFileObject server
stores, since the server computes SHA-1 of received bytes). ``None``
only when no server checksum was available (either the node was
unsaved and nothing was transferred, or the save returned no checksum
value).

The comparison used by ``upload_if_changed`` reads the node's
``checksum`` attribute, which was populated when the node was
fetched via ``client.get(...)``. A server-side change to the file
between the fetch and the call will not be detected unless the
caller re-fetches the node first.

### `InfrahubNodeBase`

Base class for InfrahubNode and InfrahubNodeSync
Expand Down
47 changes: 47 additions & 0 deletions infrahub_sdk/file_handler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import hashlib
from dataclasses import dataclass
from io import BytesIO
from pathlib import Path
Expand All @@ -13,6 +14,52 @@
if TYPE_CHECKING:
from .client import InfrahubClient, InfrahubClientSync

_SHA1_CHUNK_BYTES = 64 * 1024


def sha1_of_source(source: bytes | Path | BinaryIO) -> str:
"""Compute the SHA-1 hex digest of an upload/download source.

Accepts the same shapes as :meth:`FileHandlerBase.prepare_upload` so
callers can compare local content against a server-stored checksum
without materialising the full file in memory.

Args:
source: The content to hash. ``bytes`` are hashed in one shot.
A ``Path`` is read in 64 KiB chunks. A ``BinaryIO`` is read
from its current position, then rewound so downstream
callers can re-read it.

Returns:
Lowercase SHA-1 hex digest, matching the algorithm Infrahub
stores in ``CoreFileObject.checksum``.

Raises:
TypeError: If ``source`` is not one of the supported types.
"""
hasher = hashlib.sha1(usedforsecurity=False)

if isinstance(source, bytes):
hasher.update(source)
return hasher.hexdigest()

if isinstance(source, Path):
with source.open("rb") as fh:
while chunk := fh.read(_SHA1_CHUNK_BYTES):
hasher.update(chunk)
return hasher.hexdigest()

if hasattr(source, "read") and hasattr(source, "seek"):
start = source.tell()
try:
while chunk := source.read(_SHA1_CHUNK_BYTES):
hasher.update(chunk)
finally:
source.seek(start)
return hasher.hexdigest()

raise TypeError(f"sha1_of_source expects bytes, Path, or BinaryIO; got {type(source).__name__}")


@dataclass
class PreparedFile:
Expand Down
7 changes: 6 additions & 1 deletion infrahub_sdk/node/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
ARTIFACT_GENERATE_FEATURE_NOT_SUPPORTED_MESSAGE,
HFID_STR_SEPARATOR,
IP_TYPES,
MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE,
PROPERTIES_FLAG,
PROPERTIES_OBJECT,
SAFE_VALUE,
UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE,
)
from .node import InfrahubNode, InfrahubNodeBase, InfrahubNodeSync
from .node import InfrahubNode, InfrahubNodeBase, InfrahubNodeSync, UploadResult
from .parsers import parse_human_friendly_id
from .property import NodeProperty
from .related_node import RelatedNode, RelatedNodeBase, RelatedNodeSync
Expand All @@ -23,9 +25,11 @@
"ARTIFACT_GENERATE_FEATURE_NOT_SUPPORTED_MESSAGE",
"HFID_STR_SEPARATOR",
"IP_TYPES",
"MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE",
"PROPERTIES_FLAG",
"PROPERTIES_OBJECT",
"SAFE_VALUE",
"UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE",
"Attribute",
"InfrahubNode",
"InfrahubNodeBase",
Expand All @@ -37,5 +41,6 @@
"RelationshipManager",
"RelationshipManagerBase",
"RelationshipManagerSync",
"UploadResult",
"parse_human_friendly_id",
]
6 changes: 6 additions & 0 deletions infrahub_sdk/node/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
FILE_DOWNLOAD_FEATURE_NOT_SUPPORTED_MESSAGE = (
"calling download_file is only supported for nodes that inherit from CoreFileObject"
)
MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE = (
"calling matches_local_checksum is only supported for nodes that inherit from CoreFileObject"
)
UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE = (
"calling upload_if_changed is only supported for nodes that inherit from CoreFileObject"
)

HIERARCHY_FETCH_FEATURE_NOT_SUPPORTED_MESSAGE = "Hierarchical fields are not supported for this node."

Expand Down
Loading