Skip to content

Commit 74949e6

Browse files
committed
Add the vulnerability report
closes: #6773
1 parent 4ee8c48 commit 74949e6

12 files changed

Lines changed: 420 additions & 1 deletion

File tree

CHANGES/6773.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added the vulnerability report data model.

docs/admin/reference/tech-preview.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ The following features are currently being released as part of tech preview:
55
- [Support for Open Telemetry](site:pulpcore/docs/admin/learn/architecture/#telemetry-support)
66
- Upstream replicas
77
- Domains - Multi-Tenancy
8-
- [Checkpoint](site:pulpcore/docs/user/guides/checkpoint/)
8+
- [Checkpoint](site:pulpcore/docs/user/guides/checkpoint/)
9+
- [Vulnerability Report](site:pulpcore/docs/dev/learn/other/vulnerability-report/)
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Adding vulnerability report for plugins
2+
3+
4+
!!! warning
5+
This feature is provided as a tech preview and could change in backwards incompatible
6+
ways in the future.
7+
8+
9+
Pulp provides a way to store known vulnerabilities from OSV for `content units` within a specified `RepositoryVersion`.
10+
Each plugin will need to implement a function to construct the [package payload](https://google.github.io/osv.dev/post-v1-query/#parameters)
11+
that will be used to query osv.dev database.
12+
13+
!!! note
14+
As of now, querying by osv.dev `commit` is not supported (use `package` instead).
15+
16+
The first step in writing a vulnerability report for a Pulp `content unit` is to identify the
17+
package [`ecosystem`](https://google.github.io/osv.dev/post-v1-query/#parameters) by checking
18+
[https://ossf.github.io/osv-schema/#defined-ecosystems](https://ossf.github.io/osv-schema/#defined-ecosystems).
19+
20+
The next step is to create a function that will be run as a Pulp task. This function should add a
21+
dictionary (the `osv_data` created through `build_osv_data` function in the following sample) to the
22+
`pulpcore.app.tasks.vulnerability_report.content_queue` queue.
23+
24+
Here is an example of a function with the above steps:
25+
26+
```python
27+
from pulpcore.app.tasks.vulnerability_report import content_queue
28+
29+
def get_content_from_repo_version(repo_version_pk: str):
30+
"""
31+
Populate content_queue Queue with the content_units found in RepositoryVersion
32+
"""
33+
repo_version = RepositoryVersion.objects.get(pk=repo_version_pk)
34+
ecosystem = "PyPI"
35+
for content_unit in repo_version.content:
36+
content = content_unit.cast()
37+
osv_data = build_osv_data(content.name, ecosystem, content.version)
38+
content_queue.put(osv_data)
39+
content_queue.put(None) # signal that there are no more content_units
40+
41+
def build_osv_data(name, ecosystem, version=None):
42+
osv_data = {"package": {"name": name, "ecosystem": ecosystem}}
43+
if version:
44+
osv_data["version"] = version
45+
return osv_data
46+
```
47+
48+
49+
Now that we have the function to populate the `content_queue` queue, we need to create a `ViewSet`
50+
to dispatch a task with it:
51+
52+
```python
53+
from pulpcore.plugin.models import VulnerabilityReport
54+
from pulpcore.app.serializers.vulnerability_report import VulnerabilityReportSerializer
55+
from pulpcore.app.tasks.vulnerability_report import check_content
56+
from pulpcore.app.viewsets.vulnerability_report import VulnerabilityReport as VulnerabilityReportView
57+
58+
class MyPluginVulnerabilityReport(VulnerabilityReportView):
59+
60+
endpoint_name = "vuln_report"
61+
queryset = VulnerabilityReport.objects.all()
62+
serializer_class = VulnerabilityReportSerializer
63+
64+
@extend_schema(
65+
request=MyPluginContentVulnerabilityReportSerializer,
66+
description="Trigger a task to generate the package vulnerability report",
67+
summary="Generate vulnerability report",
68+
responses={202: AsyncOperationResponseSerializer},
69+
)
70+
def create(self, request):
71+
serializer = MyPluginContentVulnerabilityReportSerializer(data=request.data, context={"request": request})
72+
serializer.is_valid(raise_exception=True)
73+
shared_resources = [repo_version.repository]
74+
75+
# we need to pass the function as string because dispatch() args only accepts JSON serializable content
76+
content_queue_func = f"{get_content_from_repo_version.__module__}.{get_content_from_repo_version.__name__}"
77+
dispatch_task, kwargs = check_content, {"func": content_queue_func, "repo_version_pk": repo_version.pk}
78+
task = dispatch(dispatch_task, shared_resources=shared_resources, kwargs=kwargs)
79+
return OperationPostponedResponse(task, request)
80+
```
81+
82+
Here is a sample for the `MyPluginContentVulnerabilityReportSerializer` where we serialize the
83+
`RepositoryVersion` string into a `RepositoryVersionRelatedField` object:
84+
85+
```python
86+
from rest_framework import serializers
87+
from pulpcore.plugin.serializers import ValidateFieldsMixin, RepositoryVersionRelatedField
88+
89+
class MyPluginContentVulnerabilityReportSerializer(serializers.Serializer, ValidateFieldsMixin):
90+
91+
repo_version = RepositoryVersionRelatedField(
92+
required=False,
93+
allow_null=True,
94+
help_text=_("RepositoryVersion HREF with the packages to be checked."),
95+
)
96+
```
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Generated by Django 4.2.19 on 2025-07-14 19:37
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
import django_lifecycle.mixins
6+
import pulpcore.app.models.base
7+
import pulpcore.app.util
8+
9+
10+
class Migration(migrations.Migration):
11+
12+
dependencies = [
13+
('core', '0133_repositoryversion_content_ids'),
14+
]
15+
16+
operations = [
17+
migrations.CreateModel(
18+
name='VulnerabilityReport',
19+
fields=[
20+
('pulp_id', models.UUIDField(default=pulpcore.app.models.base.pulp_uuid, editable=False, primary_key=True, serialize=False)),
21+
('pulp_created', models.DateTimeField(auto_now_add=True)),
22+
('pulp_last_updated', models.DateTimeField(auto_now=True, null=True)),
23+
('vulns', models.JSONField()),
24+
('pulp_domain', models.ForeignKey(default=pulpcore.app.util.get_domain_pk, on_delete=django.db.models.deletion.CASCADE, to='core.domain')),
25+
],
26+
options={
27+
'default_related_name': '%(app_label)s_%(model_name)s',
28+
},
29+
bases=(django_lifecycle.mixins.LifecycleModelMixin, models.Model),
30+
),
31+
]

pulpcore/app/models/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@
8787
UploadChunk,
8888
)
8989

90+
from .vulnerability_report import VulnerabilityReport
91+
9092
# Moved here to avoid a circular import with Task
9193
from .progress import GroupProgressReport, ProgressReport
9294

@@ -170,4 +172,5 @@
170172
"OpenPGPSignature",
171173
"OpenPGPUserAttribute",
172174
"OpenPGPUserID",
175+
"VulnerabilityReport",
173176
]
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from django.db import models
2+
3+
from pulpcore.app.models import BaseModel
4+
from pulpcore.plugin.util import get_domain_pk
5+
6+
7+
class VulnerabilityReport(BaseModel):
8+
"""
9+
Model used in vulnerability report.
10+
"""
11+
12+
vulns = models.JSONField()
13+
pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.CASCADE)
14+
15+
class Meta:
16+
default_related_name = "%(app_label)s_%(model_name)s"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from rest_framework import serializers
2+
3+
from pulpcore.plugin.models import VulnerabilityReport
4+
from pulpcore.plugin.serializers import IdentityField, ModelSerializer
5+
6+
7+
class VulnerabilityReportSerializer(ModelSerializer):
8+
"""
9+
A serializer for the VulnerabilityReport Model.
10+
"""
11+
12+
vulns = serializers.JSONField()
13+
pulp_href = IdentityField(view_name="vuln_report-detail")
14+
15+
class Meta:
16+
model = VulnerabilityReport
17+
fields = ModelSerializer.Meta.fields + ("vulns",)
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import aiohttp
2+
import importlib
3+
import json
4+
import re
5+
6+
from asgiref.sync import sync_to_async
7+
from queue import Empty, Queue
8+
from threading import Thread
9+
10+
from pulpcore.plugin.util import get_domain
11+
from pulpcore.plugin.models import CreatedResource, VulnerabilityReport
12+
from pulpcore.constants import OSV_QUERY_URL, VULNERABILITY_TASK_THREAD_TIMEOUT
13+
14+
15+
# Create a thread-safe queue to share Content units between threads
16+
content_queue = Queue()
17+
18+
# CPE prefix from Red Hat packages
19+
cpe_prefix = re.compile(r"^cpe:\/[oa]:redhat")
20+
21+
22+
async def check_content(func, args=None):
23+
"""
24+
Start the background_thread and make the API requests (scan) to osv.dev with the packages
25+
from Queue.
26+
27+
Args:
28+
func (callable | str): The function to populate content_queue Queue with the osv.dev
29+
expected request data format.
30+
args (tuple): The positional arguments to pass on to the func.
31+
"""
32+
if not args:
33+
args = ()
34+
35+
module_name, function_name = func.rsplit(".", 1)
36+
module = importlib.import_module(module_name)
37+
func = getattr(module, function_name)
38+
39+
background_thread = Thread(target=func, args=args)
40+
await _scan_packages(background_thread)
41+
background_thread.join()
42+
43+
44+
async def _scan_packages(background_thread):
45+
"""
46+
Makes a request to the osv.dev API and store the results in VulnerabilityReport model.
47+
48+
Args:
49+
background_thread (Thread): We need to pass the thread object used to populate the queue to
50+
prevent deadlock issues.
51+
"""
52+
scanned_packages = await _scan_packages_from_queue(
53+
background_thread=background_thread,
54+
)
55+
await _save_vulnerability_report(scanned_packages)
56+
57+
58+
async def _scan_packages_from_queue(background_thread, http_client=None):
59+
"""
60+
Scans packages from a queue by making HTTP requests to OSV API.
61+
62+
Args:
63+
background_thread (Thread): Thread populating the queue
64+
http_client (Optional[aiohttp.ClientSession]): HTTP client for making requests
65+
66+
Returns:
67+
Dict[str, Any]: Dictionary mapping package names to vulnerability data
68+
"""
69+
70+
# Use provided client or create a new one
71+
if http_client:
72+
return await _process_queue_with_client(background_thread, http_client)
73+
74+
async with aiohttp.ClientSession() as session:
75+
return await _process_queue_with_client(background_thread, session)
76+
77+
78+
async def _process_queue_with_client(background_thread, session):
79+
"""
80+
Process queue items using the provided HTTP client session.
81+
82+
Args:
83+
background_thread (Thread): Thread populating the queue
84+
session (aiohttp.ClientSession): HTTP client session
85+
86+
Returns:
87+
Dict[str, Any]: Dictionary mapping package names to vulnerability data
88+
"""
89+
scanned_packages = {}
90+
try:
91+
for osv_data in iter(
92+
lambda: content_queue.get(timeout=VULNERABILITY_TASK_THREAD_TIMEOUT), None
93+
):
94+
if isinstance(osv_data, Exception):
95+
raise RuntimeError(f"Background vuln report task failed to execute: {osv_data}")
96+
97+
vulnerability_data = await _query_osv_api(session, osv_data)
98+
package_name = _get_package_name(osv_data)
99+
100+
if vulnerability_data.get("vulns"):
101+
scanned_packages[package_name] = vulnerability_data["vulns"]
102+
103+
# Handle pagination
104+
if next_page_token := vulnerability_data.get("next_page_token"):
105+
osv_data["page_token"] = next_page_token
106+
content_queue.put(osv_data)
107+
108+
except Empty:
109+
if not background_thread.is_alive():
110+
raise RuntimeError("Vuln report task thread died unexpectedly.")
111+
else:
112+
raise RuntimeError("Background vuln report thread took too long.")
113+
114+
return scanned_packages
115+
116+
117+
async def _query_osv_api(session, osv_data):
118+
"""
119+
Make a single request to the OSV API.
120+
121+
Args:
122+
session (aiohttp.ClientSession): HTTP client session
123+
osv_data (Dict[str, Any]): OSV query data
124+
125+
Returns:
126+
Dict[str, Any]: JSON response from OSV API
127+
"""
128+
data = json.dumps(osv_data)
129+
async with session.post(url=OSV_QUERY_URL, data=data) as response:
130+
response_body = await response.text()
131+
return json.loads(response_body)
132+
133+
134+
def _get_package_name(osv_data):
135+
"""
136+
Extract package name from OSV data.
137+
138+
Args:
139+
osv_data (Dict[str, Any]): OSV query data
140+
141+
Returns:
142+
str: Formatted package name
143+
"""
144+
osv_package_name = osv_data["package"]["name"]
145+
if osv_package_version := osv_data.get("version", ""):
146+
return f"{osv_package_name}-{osv_package_version}"
147+
return f"{osv_package_name}"
148+
149+
150+
async def _save_vulnerability_report(scanned_packages):
151+
"""
152+
Save vulnerability report to the database.
153+
154+
Args:
155+
scanned_packages (Dict[str, Any]): Dictionary mapping package names to vulnerability data
156+
"""
157+
vuln_report, created = await sync_to_async(VulnerabilityReport.objects.get_or_create)(
158+
vulns=scanned_packages, pulp_domain=get_domain()
159+
)
160+
if created:
161+
await CreatedResource.objects.acreate(content_object=vuln_report)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from rest_framework.mixins import DestroyModelMixin, ListModelMixin, RetrieveModelMixin
2+
3+
from pulpcore.plugin.models import VulnerabilityReport as VulnReport
4+
from pulpcore.app.serializers.vulnerability_report import VulnerabilityReportSerializer
5+
from pulpcore.plugin.viewsets import NamedModelViewSet
6+
7+
8+
class VulnerabilityReport(NamedModelViewSet, ListModelMixin, RetrieveModelMixin, DestroyModelMixin):
9+
10+
endpoint_name = "vuln_report"
11+
queryset = VulnReport.objects.all()
12+
serializer_class = VulnerabilityReportSerializer

pulpcore/constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,10 @@
127127
# The upper boundary represents an unsigned 32-bit integer and prevents overflow
128128
ORPHAN_PROTECTION_TIME_LOWER_BOUND = 0
129129
ORPHAN_PROTECTION_TIME_UPPER_BOUND = 4294967295 # (2^32)-1
130+
131+
# VULNERABILITY REPORT CONSTANTS
132+
# OSV API URL
133+
OSV_QUERY_URL = "https://api.osv.dev/v1/query"
134+
135+
# Timeout when waiting on tasks scan thread queue to avoid indefinite blocking.
136+
VULNERABILITY_TASK_THREAD_TIMEOUT = 60

0 commit comments

Comments
 (0)