Skip to content

Commit 7710156

Browse files
committed
Add the vulnerability report
closes: #6773
1 parent 4ee8c48 commit 7710156

12 files changed

Lines changed: 425 additions & 1 deletion

File tree

CHANGES/6773.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added the vulnerability report data model.

docs/admin/reference/tech-preview.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ The following features are currently being released as part of tech preview:
55
- [Support for Open Telemetry](site:pulpcore/docs/admin/learn/architecture/#telemetry-support)
66
- Upstream replicas
77
- Domains - Multi-Tenancy
8-
- [Checkpoint](site:pulpcore/docs/user/guides/checkpoint/)
8+
- [Checkpoint](site:pulpcore/docs/user/guides/checkpoint/)
9+
- [Vulnerability Report](site:pulpcore/docs/dev/learn/other/vulnerability-report/)
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# Adding vulnerability report for plugins
2+
3+
4+
!!! warning
5+
This feature is provided as a tech preview and could change in backwards incompatible
6+
ways in the future.
7+
8+
9+
Pulp provides a way to store known vulnerabilities from OSV for `content units` within a specified `RepositoryVersion`.
10+
Each plugin will need to implement a function to construct the [package payload](https://google.github.io/osv.dev/post-v1-query/#parameters)
11+
that will be used to query osv.dev database.
12+
13+
!!! note
14+
As of now, querying by osv.dev `commit` is not supported (use `package` instead).
15+
16+
The first step in writing a vulnerability report for a Pulp `content unit` is to identify the
17+
package [`ecosystem`](https://google.github.io/osv.dev/post-v1-query/#parameters) by checking
18+
[https://ossf.github.io/osv-schema/#defined-ecosystems](https://ossf.github.io/osv-schema/#defined-ecosystems).
19+
20+
The next step is to create a function at the top level of the module, so it can be loaded in pulpcore,
21+
that will be run as a Pulp task. This function should add a dictionary (the `osv_data` created through
22+
`build_osv_data` function in the following sample) to the `pulpcore.app.tasks.vulnerability_report.content_queue` queue.
23+
24+
Here is an example of a function with the above steps:
25+
26+
```python
27+
from pulpcore.app.tasks.vulnerability_report import content_queue
28+
29+
def get_content_from_repo_version(repo_version_pk: str):
30+
"""
31+
Populate content_queue Queue with the content_units found in RepositoryVersion
32+
"""
33+
repo_version = RepositoryVersion.objects.get(pk=repo_version_pk)
34+
ecosystem = "PyPI"
35+
for content_unit in repo_version.content:
36+
content = content_unit.cast()
37+
osv_data = build_osv_data(content.name, ecosystem, content.version)
38+
content_queue.put(osv_data)
39+
content_queue.put(None) # signal that there are no more content_units
40+
41+
def build_osv_data(name, ecosystem, version=None):
42+
osv_data = {"package": {"name": name, "ecosystem": ecosystem}}
43+
if version:
44+
osv_data["version"] = version
45+
return osv_data
46+
```
47+
48+
49+
Now that we have the function to populate the `content_queue` queue, we need to create a `ViewSet`
50+
to dispatch a task with it:
51+
52+
```python
53+
from pulpcore.app.serializers.vulnerability_report import VulnerabilityReportSerializer
54+
from pulpcore.app.tasks.vulnerability_report import check_content
55+
from pulpcore.app.viewsets.vulnerability_report import VulnerabilityReportViewSet
56+
from pulpcore.plugin.models import VulnerabilityReport
57+
from pulpcore.plugin.serializers import AsyncOperationResponseSerializer
58+
from pulpcore.plugin.tasking import dispatch
59+
from pulpcore.plugin.viewsets import OperationPostponedResponse
60+
61+
from my_plugin.app.serializers import MyPluginVulnerabilityReportSerializer
62+
from my_plugin.app.viewsets import get_content_from_repo_version
63+
64+
class MyPluginVulnerabilityReport(VulnerabilityReportViewSet):
65+
66+
endpoint_name = "vuln_report"
67+
queryset = VulnerabilityReport.objects.all()
68+
serializer_class = VulnerabilityReportSerializer
69+
70+
@extend_schema(
71+
request=MyPluginVulnerabilityReportSerializer,
72+
description="Trigger a task to generate the package vulnerability report",
73+
summary="Generate vulnerability report",
74+
responses={202: AsyncOperationResponseSerializer},
75+
)
76+
def create(self, request):
77+
serializer = MyPluginVulnerabilityReportSerializer(data=request.data, context={"request": request})
78+
serializer.is_valid(raise_exception=True)
79+
repo_version = serializer.validated_data["repo_version"]
80+
81+
# we need to pass the function as string because dispatch() args only accepts JSON serializable content
82+
content_queue_func = f"{get_content_from_repo_version.__module__}.{get_content_from_repo_version.__name__}"
83+
84+
task = dispatch(
85+
check_content,
86+
shared_resources=[repo_version.repository],
87+
kwargs={"func": content_queue_func, ["repo_version_pk": repo_version.pk]}
88+
)
89+
return OperationPostponedResponse(task, request)
90+
```
91+
92+
Here is a sample for the `MyPluginVulnerabilityReportSerializer` where we serialize the
93+
`RepositoryVersion` string into a `RepositoryVersionRelatedField` object:
94+
95+
```python
96+
from rest_framework import serializers
97+
from pulpcore.plugin.serializers import ValidateFieldsMixin, RepositoryVersionRelatedField
98+
99+
class MyPluginVulnerabilityReportSerializer(serializers.Serializer, ValidateFieldsMixin):
100+
101+
repo_version = RepositoryVersionRelatedField(
102+
required=True,
103+
allow_null=False,
104+
help_text=_("RepositoryVersion HREF with the packages to be checked."),
105+
)
106+
```
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Generated by Django 4.2.19 on 2025-07-14 19:37
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
import django_lifecycle.mixins
6+
import pulpcore.app.models.base
7+
import pulpcore.app.util
8+
9+
10+
class Migration(migrations.Migration):
11+
12+
dependencies = [
13+
('core', '0133_repositoryversion_content_ids'),
14+
]
15+
16+
operations = [
17+
migrations.CreateModel(
18+
name='VulnerabilityReport',
19+
fields=[
20+
('pulp_id', models.UUIDField(default=pulpcore.app.models.base.pulp_uuid, editable=False, primary_key=True, serialize=False)),
21+
('pulp_created', models.DateTimeField(auto_now_add=True)),
22+
('pulp_last_updated', models.DateTimeField(auto_now=True, null=True)),
23+
('vulns', models.JSONField()),
24+
('pulp_domain', models.ForeignKey(default=pulpcore.app.util.get_domain_pk, on_delete=django.db.models.deletion.CASCADE, to='core.domain')),
25+
],
26+
options={
27+
'default_related_name': '%(app_label)s_%(model_name)s',
28+
},
29+
bases=(django_lifecycle.mixins.LifecycleModelMixin, models.Model),
30+
),
31+
]

pulpcore/app/models/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@
8787
UploadChunk,
8888
)
8989

90+
from .vulnerability_report import VulnerabilityReport
91+
9092
# Moved here to avoid a circular import with Task
9193
from .progress import GroupProgressReport, ProgressReport
9294

@@ -170,4 +172,5 @@
170172
"OpenPGPSignature",
171173
"OpenPGPUserAttribute",
172174
"OpenPGPUserID",
175+
"VulnerabilityReport",
173176
]
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from django.db import models
2+
3+
from pulpcore.app.models import BaseModel
4+
from pulpcore.plugin.util import get_domain_pk
5+
6+
7+
class VulnerabilityReport(BaseModel):
8+
"""
9+
Model used in vulnerability report.
10+
"""
11+
12+
vulns = models.JSONField()
13+
pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.CASCADE)
14+
15+
class Meta:
16+
default_related_name = "%(app_label)s_%(model_name)s"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from rest_framework import serializers
2+
3+
from pulpcore.plugin.models import VulnerabilityReport
4+
from pulpcore.plugin.serializers import IdentityField, ModelSerializer
5+
6+
7+
class VulnerabilityReportSerializer(ModelSerializer):
8+
"""
9+
A serializer for the VulnerabilityReport Model.
10+
"""
11+
12+
vulns = serializers.JSONField()
13+
pulp_href = IdentityField(view_name="vuln_report-detail")
14+
15+
class Meta:
16+
model = VulnerabilityReport
17+
fields = ModelSerializer.Meta.fields + ("vulns",)
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
import aiohttp
2+
import importlib
3+
import json
4+
5+
from asgiref.sync import sync_to_async
6+
from queue import Empty, Queue
7+
from threading import Thread
8+
9+
from pulpcore.plugin.util import get_domain
10+
from pulpcore.plugin.models import CreatedResource, VulnerabilityReport
11+
from pulpcore.constants import OSV_QUERY_URL, VULNERABILITY_TASK_THREAD_TIMEOUT
12+
13+
14+
# Create a thread-safe queue to share Content units between threads
15+
content_queue = Queue()
16+
17+
18+
async def check_content(func, *args):
19+
"""
20+
Start the background_thread and make the API requests (scan) to osv.dev with the packages
21+
from Queue.
22+
23+
Args:
24+
func (callable | str): The function to populate content_queue Queue with the osv.dev
25+
expected request data format.
26+
args (tuple): The positional arguments to pass on to the func.
27+
"""
28+
module_name, function_name = func.rsplit(".", 1)
29+
module = importlib.import_module(module_name)
30+
func = getattr(module, function_name)
31+
32+
background_thread = Thread(target=func, args=args)
33+
await _scan_packages(background_thread)
34+
background_thread.join()
35+
36+
37+
async def _scan_packages(background_thread):
38+
"""
39+
Makes a request to the osv.dev API and store the results in VulnerabilityReport model.
40+
41+
Args:
42+
background_thread (Thread): We need to pass the thread object used to populate the queue to
43+
prevent deadlock issues.
44+
"""
45+
scanned_packages = await _scan_packages_from_queue(
46+
background_thread=background_thread,
47+
)
48+
await _save_vulnerability_report(scanned_packages)
49+
50+
51+
async def _scan_packages_from_queue(background_thread, http_client=None):
52+
"""
53+
Scans packages from a queue by making HTTP requests to OSV API.
54+
55+
Args:
56+
background_thread (Thread): Thread populating the queue
57+
http_client (Optional[aiohttp.ClientSession]): HTTP client for making requests
58+
59+
Returns:
60+
Dict[str, Any]: Dictionary mapping package names to vulnerability data
61+
"""
62+
63+
# Use provided client or create a new one
64+
if http_client:
65+
return await _process_queue_with_client(background_thread, http_client)
66+
67+
async with aiohttp.ClientSession() as session:
68+
return await _process_queue_with_client(background_thread, session)
69+
70+
71+
async def _process_queue_with_client(background_thread, session):
72+
"""
73+
Process queue items using the provided HTTP client session.
74+
75+
Args:
76+
background_thread (Thread): Thread populating the queue
77+
session (aiohttp.ClientSession): HTTP client session
78+
79+
Returns:
80+
Dict[str, Any]: Dictionary mapping package names to vulnerability data
81+
"""
82+
scanned_packages = {}
83+
try:
84+
for osv_data in iter(
85+
lambda: content_queue.get(timeout=VULNERABILITY_TASK_THREAD_TIMEOUT), None
86+
):
87+
if isinstance(osv_data, Exception):
88+
raise RuntimeError(f"Background vuln report task failed to execute: {osv_data}")
89+
90+
vulnerability_data = await _query_osv_api(session, osv_data)
91+
package_name = _get_package_name(osv_data)
92+
93+
if vulnerability_data.get("vulns"):
94+
scanned_packages[package_name] = vulnerability_data["vulns"]
95+
96+
# Handle pagination
97+
if next_page_token := vulnerability_data.get("next_page_token"):
98+
osv_data["page_token"] = next_page_token
99+
content_queue.put(osv_data)
100+
101+
except Empty:
102+
if not background_thread.is_alive():
103+
raise RuntimeError("Vuln report task thread died unexpectedly.")
104+
else:
105+
raise RuntimeError("Background vuln report thread took too long.")
106+
107+
return scanned_packages
108+
109+
110+
async def _query_osv_api(session, osv_data):
111+
"""
112+
Make a single request to the OSV API.
113+
114+
Args:
115+
session (aiohttp.ClientSession): HTTP client session
116+
osv_data (Dict[str, Any]): OSV query data
117+
118+
Returns:
119+
Dict[str, Any]: JSON response from OSV API
120+
"""
121+
data = json.dumps(osv_data)
122+
async with session.post(url=OSV_QUERY_URL, data=data) as response:
123+
response_body = await response.text()
124+
return json.loads(response_body)
125+
126+
127+
def _get_package_name(osv_data):
128+
"""
129+
Extract package name from OSV data.
130+
131+
Args:
132+
osv_data (Dict[str, Any]): OSV query data
133+
134+
Returns:
135+
str: Formatted package name
136+
"""
137+
osv_package_name = osv_data["package"]["name"]
138+
if osv_package_version := osv_data.get("version", ""):
139+
return f"{osv_package_name}-{osv_package_version}"
140+
return f"{osv_package_name}"
141+
142+
143+
async def _save_vulnerability_report(scanned_packages):
144+
"""
145+
Save vulnerability report to the database.
146+
147+
Args:
148+
scanned_packages (Dict[str, Any]): Dictionary mapping package names to vulnerability data
149+
"""
150+
vuln_report, created = await sync_to_async(VulnerabilityReport.objects.get_or_create)(
151+
vulns=scanned_packages, pulp_domain=get_domain()
152+
)
153+
if created:
154+
await CreatedResource.objects.acreate(content_object=vuln_report)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from rest_framework.mixins import DestroyModelMixin, ListModelMixin, RetrieveModelMixin
2+
3+
from pulpcore.plugin.models import VulnerabilityReport
4+
from pulpcore.plugin.serializers import VulnerabilityReportSerializer
5+
from pulpcore.plugin.viewsets import NamedModelViewSet
6+
7+
8+
class VulnerabilityReportViewSet(
9+
NamedModelViewSet, ListModelMixin, RetrieveModelMixin, DestroyModelMixin
10+
):
11+
12+
endpoint_name = "vuln_report"
13+
queryset = VulnerabilityReport.objects.all()
14+
serializer_class = VulnerabilityReportSerializer

pulpcore/constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,10 @@
127127
# The upper boundary represents an unsigned 32-bit integer and prevents overflow
128128
ORPHAN_PROTECTION_TIME_LOWER_BOUND = 0
129129
ORPHAN_PROTECTION_TIME_UPPER_BOUND = 4294967295 # (2^32)-1
130+
131+
# VULNERABILITY REPORT CONSTANTS
132+
# OSV API URL
133+
OSV_QUERY_URL = "https://api.osv.dev/v1/query"
134+
135+
# Timeout when waiting on tasks scan thread queue to avoid indefinite blocking.
136+
VULNERABILITY_TASK_THREAD_TIMEOUT = 60

0 commit comments

Comments
 (0)