Skip to content

Commit 035acb3

Browse files
drbhWauplin
andauthored
[Kernels] Minimal kernel repo type support (create/delete, download files, list refs/files/tree) (#4068)
* feat: support downloading kernel repos * Support kernel repo type * code quali * useless assignment * KernelIfno in docs * small mention in docs * upate tests * create + delete * use kernels-community/relu fior tests --------- Co-authored-by: Lucain Pouget <lucainp@gmail.com>
1 parent d82a7f7 commit 035acb3

8 files changed

Lines changed: 298 additions & 23 deletions

File tree

docs/source/en/guides/repository.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,18 @@ Or via CLI:
236236
```bash
237237
>>> hf repos move Wauplin/cool-model huggingface/cool-model
238238
```
239+
240+
## Kernel repositories
241+
242+
The Hub supports a `"kernel"` repository type for hosting compute kernels. This is **not** a fully-compatible repo type. Only a limited set of methods have been tested and are officially supported:
243+
244+
- [`kernel_info`]
245+
- [`hf_hub_download`]
246+
- [`snapshot_download`]
247+
- [`list_repo_refs`]
248+
- [`list_repo_files`]
249+
- [`list_repo_tree`]
250+
251+
Note that [`create_repo`] and [`delete_repo`] are also compatible but restricted to a small subset of allowed users and orgs on the Hub.
252+
253+
For building, publishing, and using kernel repos, please use the dedicated [`kernels`](https://github.com/huggingface/kernels) package instead. Refer to the [Kernels documentation](https://huggingface.co/docs/kernels/index) for more details.

docs/source/en/package_reference/hf_api.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ models = hf_api.list_models()
9797

9898
[[autodoc]] huggingface_hub.hf_api.InferenceProviderMapping
9999

100+
### KernelInfo
101+
102+
[[autodoc]] huggingface_hub.hf_api.KernelInfo
103+
100104
### LFSFileInfo
101105

102106
[[autodoc]] huggingface_hub.hf_api.LFSFileInfo

src/huggingface_hub/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@
182182
"GitRefInfo",
183183
"GitRefs",
184184
"HfApi",
185+
"KernelInfo",
185186
"ModelInfo",
186187
"Organization",
187188
"RepoFile",
@@ -265,6 +266,7 @@
265266
"grant_access",
266267
"inspect_job",
267268
"inspect_scheduled_job",
269+
"kernel_info",
268270
"list_accepted_access_requests",
269271
"list_bucket_tree",
270272
"list_buckets",
@@ -757,6 +759,7 @@
757759
"JobOwner",
758760
"JobStage",
759761
"JobStatus",
762+
"KernelInfo",
760763
"MCPClient",
761764
"ModelCard",
762765
"ModelCardData",
@@ -983,6 +986,7 @@
983986
"inspect_scheduled_job",
984987
"interpreter_login",
985988
"is_offline_mode",
989+
"kernel_info",
986990
"list_accepted_access_requests",
987991
"list_bucket_tree",
988992
"list_buckets",
@@ -1308,6 +1312,7 @@ def __dir__():
13081312
GitRefInfo, # noqa: F401
13091313
GitRefs, # noqa: F401
13101314
HfApi, # noqa: F401
1315+
KernelInfo, # noqa: F401
13111316
ModelInfo, # noqa: F401
13121317
Organization, # noqa: F401
13131318
RepoFile, # noqa: F401
@@ -1391,6 +1396,7 @@ def __dir__():
13911396
grant_access, # noqa: F401
13921397
inspect_job, # noqa: F401
13931398
inspect_scheduled_job, # noqa: F401
1399+
kernel_info, # noqa: F401
13941400
list_accepted_access_requests, # noqa: F401
13951401
list_bucket_tree, # noqa: F401
13961402
list_buckets, # noqa: F401

src/huggingface_hub/_snapshot_download.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
RevisionNotFoundError,
1818
)
1919
from .file_download import REGEX_COMMIT_HASH, DryRunFileInfo, hf_hub_download, repo_folder_name
20-
from .hf_api import DatasetInfo, HfApi, ModelInfo, RepoFile, SpaceInfo
20+
from .hf_api import DatasetInfo, HfApi, KernelInfo, ModelInfo, RepoFile, SpaceInfo
2121
from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
2222
from .utils.tqdm import _create_progress_bar
2323
from .utils.tqdm import tqdm as hf_tqdm
@@ -145,7 +145,7 @@ def snapshot_download(
145145
repo_id (`str`):
146146
A user or an organization name and a repo name separated by a `/`.
147147
repo_type (`str`, *optional*):
148-
Set to `"dataset"` or `"space"` if downloading from a dataset or space,
148+
Set to `"dataset"`, `"space"` or `"kernel"` if downloading from a dataset, space or kernel repo,
149149
`None` or `"model"` if downloading from a model. Default is `None`.
150150
revision (`str`, *optional*):
151151
An optional Git revision id which can be a branch name, a tag, or a
@@ -219,8 +219,10 @@ def snapshot_download(
219219

220220
if repo_type is None:
221221
repo_type = "model"
222-
if repo_type not in constants.REPO_TYPES:
223-
raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
222+
if repo_type not in constants.REPO_TYPES_WITH_KERNEL:
223+
raise ValueError(
224+
f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES_WITH_KERNEL)}"
225+
)
224226

225227
storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
226228

@@ -233,7 +235,7 @@ def snapshot_download(
233235
token=token,
234236
)
235237

236-
repo_info: ModelInfo | DatasetInfo | SpaceInfo | None = None
238+
repo_info: ModelInfo | DatasetInfo | SpaceInfo | KernelInfo | None = None
237239
api_call_error: Exception | None = None
238240
if not local_files_only:
239241
# try/except logic to handle different errors => taken from `hf_hub_download`
@@ -336,10 +338,10 @@ def snapshot_download(
336338

337339
# Corner case: on very large repos, the siblings list in `repo_info` might not contain all files.
338340
# In that case, we need to use the `list_repo_tree` method to prevent caching issues.
339-
repo_files: Iterable[str] = [f.rfilename for f in repo_info.siblings] if repo_info.siblings is not None else []
340-
unreliable_nb_files = (
341-
repo_info.siblings is None or len(repo_info.siblings) == 0 or len(repo_info.siblings) > LARGE_REPO_THRESHOLD
342-
)
341+
# Note: kernel repos don't expose siblings in their info response, so we always fall back to `list_repo_tree`.
342+
siblings = getattr(repo_info, "siblings", None)
343+
repo_files: Iterable[str] = [f.rfilename for f in siblings] if siblings is not None else []
344+
unreliable_nb_files = siblings is None or len(siblings) == 0 or len(siblings) > LARGE_REPO_THRESHOLD
343345
if unreliable_nb_files:
344346
logger.info(
345347
"Number of files in the repo is unreliable. Using `list_repo_tree` to ensure all files are listed."

src/huggingface_hub/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,17 +106,21 @@ def _as_int(value: str | None) -> int | None:
106106
REPO_TYPE_DATASET = "dataset"
107107
REPO_TYPE_SPACE = "space"
108108
REPO_TYPE_MODEL = "model"
109+
REPO_TYPE_KERNEL = "kernel"
109110
REPO_TYPES = [None, REPO_TYPE_MODEL, REPO_TYPE_DATASET, REPO_TYPE_SPACE]
111+
REPO_TYPES_WITH_KERNEL = REPO_TYPES + [REPO_TYPE_KERNEL]
110112
SPACES_SDK_TYPES = ["gradio", "streamlit", "docker", "static"]
111113

112114
REPO_TYPES_URL_PREFIXES = {
113115
REPO_TYPE_DATASET: "datasets/",
114116
REPO_TYPE_SPACE: "spaces/",
117+
REPO_TYPE_KERNEL: "kernels/",
115118
}
116119
REPO_TYPES_MAPPING = {
117120
"datasets": REPO_TYPE_DATASET,
118121
"spaces": REPO_TYPE_SPACE,
119122
"models": REPO_TYPE_MODEL,
123+
"kernels": REPO_TYPE_KERNEL,
120124
}
121125

122126

src/huggingface_hub/file_download.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def hf_hub_url(
220220
subfolder (`str`, *optional*):
221221
An optional value corresponding to a folder inside the repo.
222222
repo_type (`str`, *optional*):
223-
Set to `"dataset"` or `"space"` if downloading from a dataset or space,
223+
Set to `"dataset"`, `"space"` or `"kernel"` if downloading from a dataset, space or kernel repo,
224224
`None` or `"model"` if downloading from a model. Default is `None`.
225225
revision (`str`, *optional*):
226226
An optional Git revision id which can be a branch name, a tag, or a
@@ -264,7 +264,7 @@ def hf_hub_url(
264264
if subfolder is not None:
265265
filename = f"{subfolder}/{filename}"
266266

267-
if repo_type not in constants.REPO_TYPES:
267+
if repo_type not in constants.REPO_TYPES_WITH_KERNEL:
268268
raise ValueError("Invalid repo type")
269269

270270
if repo_type in constants.REPO_TYPES_URL_PREFIXES:
@@ -875,7 +875,7 @@ def hf_hub_download(
875875
subfolder (`str`, *optional*):
876876
An optional value corresponding to a folder inside the model repo.
877877
repo_type (`str`, *optional*):
878-
Set to `"dataset"` or `"space"` if downloading from a dataset or space,
878+
Set to `"dataset"`, `"space"` or `"kernel"` if downloading from a dataset, space or kernel repo,
879879
`None` or `"model"` if downloading from a model. Default is `None`.
880880
revision (`str`, *optional*):
881881
An optional Git revision id which can be a branch name, a tag, or a
@@ -959,8 +959,10 @@ def hf_hub_download(
959959

960960
if repo_type is None:
961961
repo_type = "model"
962-
if repo_type not in constants.REPO_TYPES:
963-
raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
962+
if repo_type not in constants.REPO_TYPES_WITH_KERNEL:
963+
raise ValueError(
964+
f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES_WITH_KERNEL)}"
965+
)
964966

965967
hf_headers = build_hf_headers(
966968
token=token,
@@ -1493,8 +1495,10 @@ def try_to_load_from_cache(
14931495
revision = "main"
14941496
if repo_type is None:
14951497
repo_type = "model"
1496-
if repo_type not in constants.REPO_TYPES:
1497-
raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
1498+
if repo_type not in constants.REPO_TYPES_WITH_KERNEL:
1499+
raise ValueError(
1500+
f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES_WITH_KERNEL)}"
1501+
)
14981502
if cache_dir is None:
14991503
cache_dir = constants.HF_HUB_CACHE
15001504

src/huggingface_hub/hf_api.py

Lines changed: 100 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ def repo_type_and_id_from_hf_id(hf_id: str, hub_url: str | None = None) -> tuple
400400
repo_type = constants.REPO_TYPES_MAPPING[repo_type]
401401
if repo_type == "":
402402
repo_type = None
403-
if repo_type not in constants.REPO_TYPES and repo_type != "bucket":
403+
if repo_type not in constants.REPO_TYPES_WITH_KERNEL and repo_type != "bucket":
404404
raise ValueError(f"Unknown `repo_type`: '{repo_type}' ('{input_hf_id}')")
405405

406406
return repo_type, namespace, repo_id
@@ -1368,6 +1368,54 @@ def __init__(self, **kwargs):
13681368
self.__dict__.update(**kwargs)
13691369

13701370

1371+
@dataclass
1372+
class KernelInfo:
1373+
"""
1374+
Contains information about a kernel repo on the Hub. This object is returned by [`kernel_info`].
1375+
1376+
Attributes:
1377+
id (`str`):
1378+
ID of the kernel repo.
1379+
author (`str`, *optional*):
1380+
Author of the kernel repo.
1381+
downloads (`int`, *optional*):
1382+
Number of downloads of the kernel repo over the last 30 days.
1383+
gated (`Literal["auto", "manual", False]`, *optional*):
1384+
Is the repo gated. If so, whether there is manual or automatic approval.
1385+
last_modified (`datetime`, *optional*):
1386+
Date of last commit to the repo.
1387+
likes (`int`, *optional*):
1388+
Number of likes of the kernel repo.
1389+
private (`bool`, *optional*):
1390+
Is the repo private.
1391+
sha (`str`, *optional*):
1392+
Repo SHA at this particular revision.
1393+
"""
1394+
1395+
id: str
1396+
author: str | None
1397+
downloads: int | None
1398+
gated: Literal["auto", "manual", False] | None
1399+
last_modified: datetime | None
1400+
likes: int | None
1401+
private: bool | None
1402+
sha: str | None
1403+
1404+
def __init__(self, **kwargs):
1405+
self.id = kwargs.pop("id")
1406+
self.author = kwargs.pop("author", None)
1407+
self.downloads = kwargs.pop("downloads", None)
1408+
self.gated = kwargs.pop("gated", None)
1409+
last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None)
1410+
self.last_modified = parse_datetime(last_modified) if last_modified else None
1411+
self.likes = kwargs.pop("likes", None)
1412+
self.private = kwargs.pop("private", None)
1413+
self.sha = kwargs.pop("sha", None)
1414+
1415+
# future compatibility
1416+
self.__dict__.update(**kwargs)
1417+
1418+
13711419
@dataclass
13721420
class CollectionItem:
13731421
"""
@@ -3293,6 +3341,46 @@ def space_info(
32933341
data = r.json()
32943342
return SpaceInfo(**data)
32953343

3344+
@validate_hf_hub_args
3345+
def kernel_info(
3346+
self,
3347+
repo_id: str,
3348+
*,
3349+
revision: str | None = None,
3350+
timeout: float | None = None,
3351+
token: bool | str | None = None,
3352+
) -> KernelInfo:
3353+
"""
3354+
Get info on one specific kernel on huggingface.co.
3355+
3356+
Args:
3357+
repo_id (`str`):
3358+
A namespace (user or an organization) and a repo name separated by a `/`.
3359+
revision (`str`, *optional*):
3360+
The revision of the kernel repository from which to get the
3361+
information.
3362+
timeout (`float`, *optional*):
3363+
Whether to set a timeout for the request to the Hub.
3364+
token (`bool` or `str`, *optional*):
3365+
A valid user access token (string). Defaults to the locally saved
3366+
token, which is the recommended method for authentication (see
3367+
https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
3368+
To disable authentication, pass `False`.
3369+
3370+
Returns:
3371+
[`~hf_api.ModelInfo`]: The kernel repository information.
3372+
"""
3373+
headers = self._build_hf_headers(token=token)
3374+
path = (
3375+
f"{self.endpoint}/api/kernels/{repo_id}"
3376+
if revision is None
3377+
else (f"{self.endpoint}/api/kernels/{repo_id}/revision/{quote(revision, safe='')}")
3378+
)
3379+
r = get_session().get(path, headers=headers, timeout=timeout)
3380+
hf_raise_for_status(r)
3381+
data = r.json()
3382+
return KernelInfo(**data)
3383+
32963384
@validate_hf_hub_args
32973385
def repo_info(
32983386
self,
@@ -3304,7 +3392,7 @@ def repo_info(
33043392
files_metadata: bool = False,
33053393
expand: ExpandModelProperty_T | ExpandDatasetProperty_T | ExpandSpaceProperty_T | None = None,
33063394
token: bool | str | None = None,
3307-
) -> ModelInfo | DatasetInfo | SpaceInfo:
3395+
) -> ModelInfo | DatasetInfo | SpaceInfo | KernelInfo:
33083396
"""
33093397
Get the info object for a given repo of a given type.
33103398

@@ -3354,6 +3442,9 @@ def repo_info(
33543442
method = self.dataset_info # type: ignore
33553443
case "space":
33563444
method = self.space_info # type: ignore
3445+
case "kernel":
3446+
# No expand/files_metadata for kernels
3447+
return self.kernel_info(repo_id, revision=revision, token=token, timeout=timeout)
33573448
case _:
33583449
raise ValueError("Unsupported repo type.")
33593450
return method(
@@ -3582,7 +3673,7 @@ def list_repo_tree(
35823673
revision (`str`, *optional*):
35833674
The revision of the repository from which to get the tree. Defaults to `"main"` branch.
35843675
repo_type (`str`, *optional*):
3585-
The type of the repository from which to get the tree (`"model"`, `"dataset"` or `"space"`.
3676+
The type of the repository from which to get the tree (`"model"`, `"dataset"`, `"space"` or `"kernel"`).
35863677
Defaults to `"model"`.
35873678
token (`bool` or `str`, *optional*):
35883679
A valid user access token (string). Defaults to the locally saved
@@ -3774,7 +3865,7 @@ def list_repo_refs(
37743865
A namespace (user or an organization) and a repo name separated
37753866
by a `/`.
37763867
repo_type (`str`, *optional*):
3777-
Set to `"dataset"` or `"space"` if listing refs from a dataset or a Space,
3868+
Set to `"dataset"`, `"space"` or `"kernel"` if listing refs from a dataset, a Space or a Kernel,
37783869
`None` or `"model"` if listing from a model. Default is `None`.
37793870
include_pull_requests (`bool`, *optional*):
37803871
Whether to include refs from pull requests in the list. Defaults to `False`.
@@ -4277,7 +4368,7 @@ def create_repo(
42774368

42784369
path = f"{self.endpoint}/api/repos/create"
42794370

4280-
if repo_type not in constants.REPO_TYPES:
4371+
if repo_type not in constants.REPO_TYPES_WITH_KERNEL:
42814372
raise ValueError("Invalid repo type")
42824373

42834374
resolved_visibility = _resolve_repo_visibility(private=private, visibility=visibility, repo_type=repo_type)
@@ -4310,7 +4401,7 @@ def create_repo(
43104401
("space_volumes", "volumes", [v.to_dict() for v in space_volumes] if space_volumes else None),
43114402
]
43124403

4313-
if repo_type == "space":
4404+
if repo_type == constants.REPO_TYPE_SPACE:
43144405
for _, key, value in space_args:
43154406
if value is not None:
43164407
payload[key] = value
@@ -4398,7 +4489,7 @@ def delete_repo(
43984489

43994490
path = f"{self.endpoint}/api/repos/delete"
44004491

4401-
if repo_type not in constants.REPO_TYPES:
4492+
if repo_type not in constants.REPO_TYPES_WITH_KERNEL:
44024493
raise ValueError("Invalid repo type")
44034494

44044495
json = {"name": name, "organization": organization}
@@ -13474,6 +13565,8 @@ def get_local_safetensors_metadata(path: str | Path) -> SafetensorsRepoMetadata:
1347413565
list_spaces = api.list_spaces
1347513566
space_info = api.space_info
1347613567

13568+
kernel_info = api.kernel_info
13569+
1347713570
list_papers = api.list_papers
1347813571
paper_info = api.paper_info
1347913572
read_paper = api.read_paper

0 commit comments

Comments
 (0)