Skip to content

Commit 195dfce

Browse files
committed
Add the vulnerability report
closes: #6773
1 parent 4ee8c48 commit 195dfce

12 files changed

Lines changed: 445 additions & 1 deletion

File tree

CHANGES/6773.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added the vulnerability report data model.

docs/admin/reference/tech-preview.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ The following features are currently being released as part of tech preview:
55
- [Support for Open Telemetry](site:pulpcore/docs/admin/learn/architecture/#telemetry-support)
66
- Upstream replicas
77
- Domains - Multi-Tenancy
8-
- [Checkpoint](site:pulpcore/docs/user/guides/checkpoint/)
8+
- [Checkpoint](site:pulpcore/docs/user/guides/checkpoint/)
9+
- [Vulnerability Report](site:pulpcore/docs/dev/learn/other/vulnerability-report/)
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Adding vulnerability report for plugins
2+
3+
4+
!!! warning
5+
This feature is provided as a tech preview and could change in backwards incompatible
6+
ways in the future.
7+
8+
9+
Pulp provides a way to store known vulnerabilities from OSV for `content units` within a specified `RepositoryVersion`.
10+
Each plugin will need to implement a function to construct the [package payload](https://google.github.io/osv.dev/post-v1-query/#parameters)
11+
that will be used to query osv.dev database.
12+
13+
!!! note
14+
As of now, querying by osv.dev `commit` is not supported (use `package` instead).
15+
16+
The first step in writing a vulnerability report for a Pulp `content unit` is to identify the
17+
package [`ecosystem`](https://google.github.io/osv.dev/post-v1-query/#parameters) by checking
18+
[https://ossf.github.io/osv-schema/#defined-ecosystems](https://ossf.github.io/osv-schema/#defined-ecosystems).
19+
20+
The next step is to create a function that will be run as a Pulp task. This function should add a
21+
dictionary (the `osv_data` created through `build_osv_data` function in the following sample) to the
22+
`pulpcore.app.tasks.vulnerability_report.content_queue` queue.
23+
24+
Here is an example of a function with the above steps:
25+
26+
```python
27+
from pulpcore.app.tasks.vulnerability_report import content_queue
28+
29+
def get_content_from_repo_version(repo_version_pk: str):
30+
"""
31+
Populate content_queue Queue with the content_units found in RepositoryVersion
32+
"""
33+
repo_version = RepositoryVersion.objects.get(pk=repo_version_pk)
34+
ecosystem = "PyPI"
35+
for content_unit in repo_version.content:
36+
content = content_unit.cast()
37+
osv_data = build_osv_data(content.name, ecosystem, content.version)
38+
content_queue.put(osv_data)
39+
content_queue.put(None) # signal that there are no more content_units
40+
41+
def build_osv_data(name, ecosystem, version=None):
42+
osv_data = {"package": {"name": name, "ecosystem": ecosystem}}
43+
if version:
44+
osv_data["version"] = version
45+
return osv_data
46+
```
47+
48+
49+
Now that we have the function to populate the `content_queue` queue, we need to create a `ViewSet`
50+
to dispatch a task with it:
51+
52+
```python
53+
from pulpcore.plugin.models import VulnerabilityReport
54+
from pulpcore.app.serializers.vulnerability_report import VulnerabilityReportSerializer
55+
from pulpcore.app.tasks.vulnerability_report import check_content
56+
from pulpcore.app.viewsets.vulnerability_report import VulnerabilityReport as VulnerabilityReportView
57+
58+
class MyPluginVulnerabilityReport(VulnerabilityReportView):
59+
60+
endpoint_name = "vuln_report"
61+
queryset = VulnerabilityReport.objects.all()
62+
serializer_class = VulnerabilityReportSerializer
63+
64+
@extend_schema(
65+
request=MyPluginContentVulnerabilityReportSerializer,
66+
description="Trigger a task to generate the package vulnerability report",
67+
summary="Generate vulnerability report",
68+
responses={202: AsyncOperationResponseSerializer},
69+
)
70+
def create(self, request):
71+
serializer = MyPluginContentVulnerabilityReportSerializer(data=request.data, context={"request": request})
72+
serializer.is_valid(raise_exception=True)
73+
shared_resources = [repo_version.repository]
74+
dispatch_task, kwargs = check_content, {"repo_version_pk": repo_version.pk}
75+
task = dispatch(dispatch_task, shared_resources=shared_resources, args=[get_content_from_repo_version], kwargs=kwargs)
76+
return OperationPostponedResponse(task, request)
77+
```
78+
79+
Here is a sample for the `MyPluginContentVulnerabilityReportSerializer` where we serialize the
80+
`RepositoryVersion` string into a `RepositoryVersionRelatedField` object:
81+
82+
```python
83+
from rest_framework import serializers
84+
from pulpcore.plugin.serializers import ValidateFieldsMixin, RepositoryVersionRelatedField
85+
86+
class MyPluginContentVulnerabilityReportSerializer(serializers.Serializer, ValidateFieldsMixin):
87+
88+
repo_version = RepositoryVersionRelatedField(
89+
required=False,
90+
allow_null=True,
91+
help_text=_("RepositoryVersion HREF with the packages to be checked."),
92+
)
93+
```
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Generated by Django 4.2.19 on 2025-07-14 19:37
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
import django_lifecycle.mixins
6+
import pulpcore.app.models.base
7+
import pulpcore.app.util
8+
9+
10+
class Migration(migrations.Migration):
11+
12+
dependencies = [
13+
('core', '0133_repositoryversion_content_ids'),
14+
]
15+
16+
operations = [
17+
migrations.CreateModel(
18+
name='VulnerabilityReport',
19+
fields=[
20+
('pulp_id', models.UUIDField(default=pulpcore.app.models.base.pulp_uuid, editable=False, primary_key=True, serialize=False)),
21+
('pulp_created', models.DateTimeField(auto_now_add=True)),
22+
('pulp_last_updated', models.DateTimeField(auto_now=True, null=True)),
23+
('vulns', models.JSONField()),
24+
('pulp_domain', models.ForeignKey(default=pulpcore.app.util.get_domain_pk, on_delete=django.db.models.deletion.CASCADE, to='core.domain')),
25+
],
26+
options={
27+
'default_related_name': '%(app_label)s_%(model_name)s',
28+
},
29+
bases=(django_lifecycle.mixins.LifecycleModelMixin, models.Model),
30+
),
31+
]

pulpcore/app/models/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@
8787
UploadChunk,
8888
)
8989

90+
from .vulnerability_report import VulnerabilityReport
91+
9092
# Moved here to avoid a circular import with Task
9193
from .progress import GroupProgressReport, ProgressReport
9294

@@ -170,4 +172,5 @@
170172
"OpenPGPSignature",
171173
"OpenPGPUserAttribute",
172174
"OpenPGPUserID",
175+
"VulnerabilityReport",
173176
]
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from django.db import models
2+
3+
from pulpcore.app.models import BaseModel
4+
from pulpcore.plugin.util import get_domain_pk
5+
6+
7+
class VulnerabilityReport(BaseModel):
8+
"""
9+
Model used in vulnerability report.
10+
"""
11+
12+
vulns = models.JSONField()
13+
pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.CASCADE)
14+
15+
class Meta:
16+
default_related_name = "%(app_label)s_%(model_name)s"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from rest_framework import serializers
2+
3+
from pulpcore.plugin.models import VulnerabilityReport
4+
from pulpcore.plugin.serializers import IdentityField, ModelSerializer
5+
6+
7+
class VulnerabilityReportSerializer(ModelSerializer):
8+
"""
9+
A serializer for the VulnerabilityReport Model.
10+
"""
11+
12+
vulns = serializers.JSONField()
13+
pulp_href = IdentityField(view_name="vuln_report-detail")
14+
15+
class Meta:
16+
model = VulnerabilityReport
17+
fields = ModelSerializer.Meta.fields + ("vulns",)
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
import aiohttp
2+
import json
3+
import re
4+
5+
from asgiref.sync import sync_to_async
6+
from queue import Empty, Queue
7+
from threading import Thread
8+
9+
from pulpcore.plugin.util import get_domain
10+
from pulpcore.plugin.models import CreatedResource, VulnerabilityReport
11+
from pulpcore.constants import OSV_QUERY_URL, VULNERABILITY_TASK_THREAD_TIMEOUT
12+
13+
14+
# Create a thread-safe queue to share Content units between threads
15+
content_queue = Queue()
16+
17+
# CPE prefix from Red Hat packages
18+
cpe_prefix = re.compile(r"^cpe:\/[oa]:redhat")
19+
20+
21+
async def check_content(func, args=None):
22+
"""
23+
Start the background_thread and make the API requests (scan) to osv.dev with the packages
24+
from Queue.
25+
26+
Args:
27+
func (callable | str): The function to populate content_queue Queue with the osv.dev
28+
expected request data format.
29+
args (tuple): The positional arguments to pass on to the func.
30+
"""
31+
if not args:
32+
args = ()
33+
background_thread = Thread(target=func, args=args)
34+
await _scan_packages(background_thread)
35+
background_thread.join()
36+
37+
38+
async def _scan_packages(background_thread):
39+
"""
40+
Makes a request to the osv.dev API and store the results in VulnerabilityReport model.
41+
42+
Args:
43+
background_thread (Thread): We need to pass the thread object used to populate the queue to
44+
prevent deadlock issues.
45+
"""
46+
scanned_packages = await _scan_packages_from_queue(
47+
background_thread=background_thread,
48+
)
49+
await _save_vulnerability_report(scanned_packages)
50+
51+
52+
async def _scan_packages_from_queue(background_thread, http_client=None):
53+
"""
54+
Scans packages from a queue by making HTTP requests to OSV API.
55+
56+
Args:
57+
background_thread (Thread): Thread populating the queue
58+
http_client (Optional[aiohttp.ClientSession]): HTTP client for making requests
59+
60+
Returns:
61+
Dict[str, Any]: Dictionary mapping package names to vulnerability data
62+
"""
63+
64+
# Use provided client or create a new one
65+
if http_client:
66+
return await _process_queue_with_client(background_thread, http_client)
67+
68+
async with aiohttp.ClientSession() as session:
69+
return await _process_queue_with_client(background_thread, session)
70+
71+
72+
async def _process_queue_with_client(background_thread, session):
73+
"""
74+
Process queue items using the provided HTTP client session.
75+
76+
Args:
77+
background_thread (Thread): Thread populating the queue
78+
session (aiohttp.ClientSession): HTTP client session
79+
80+
Returns:
81+
Dict[str, Any]: Dictionary mapping package names to vulnerability data
82+
"""
83+
scanned_packages = {}
84+
try:
85+
for osv_data in iter(
86+
lambda: content_queue.get(timeout=VULNERABILITY_TASK_THREAD_TIMEOUT), None
87+
):
88+
if isinstance(osv_data, Exception):
89+
raise RuntimeError(f"Background vuln report task failed to execute: {osv_data}")
90+
91+
vulnerability_data = await _query_osv_api(session, osv_data)
92+
package_name = _get_package_name(osv_data)
93+
94+
if vulnerability_data.get("vulns"):
95+
scanned_packages[package_name] = vulnerability_data["vulns"]
96+
97+
# Handle pagination
98+
if next_page_token := vulnerability_data.get("next_page_token"):
99+
osv_data["page_token"] = next_page_token
100+
content_queue.put(osv_data)
101+
102+
except Empty:
103+
if not background_thread.is_alive():
104+
raise RuntimeError("Vuln report task thread died unexpectedly.")
105+
else:
106+
raise RuntimeError("Background vuln report thread took too long.")
107+
108+
return scanned_packages
109+
110+
111+
async def _query_osv_api(session, osv_data):
112+
"""
113+
Make a single request to the OSV API.
114+
115+
Args:
116+
session (aiohttp.ClientSession): HTTP client session
117+
osv_data (Dict[str, Any]): OSV query data
118+
119+
Returns:
120+
Dict[str, Any]: JSON response from OSV API
121+
"""
122+
data = json.dumps(osv_data)
123+
async with session.post(url=OSV_QUERY_URL, data=data) as response:
124+
response_body = await response.text()
125+
return json.loads(response_body)
126+
127+
128+
def _get_package_name(osv_data):
129+
"""
130+
Extract package name from OSV data.
131+
132+
Args:
133+
osv_data (Dict[str, Any]): OSV query data
134+
135+
Returns:
136+
str: Formatted package name
137+
"""
138+
osv_package_name = osv_data["package"]["name"]
139+
if osv_package_version := osv_data.get("version", ""):
140+
return f"{osv_package_name}-{osv_package_version}"
141+
return f"{osv_package_name}"
142+
143+
144+
async def _save_vulnerability_report(scanned_packages):
145+
"""
146+
Save vulnerability report to the database.
147+
148+
Args:
149+
scanned_packages (Dict[str, Any]): Dictionary mapping package names to vulnerability data
150+
"""
151+
vuln_report, created = await sync_to_async(VulnerabilityReport.objects.get_or_create)(
152+
vulns=scanned_packages, pulp_domain=get_domain()
153+
)
154+
if created:
155+
await CreatedResource.objects.acreate(content_object=vuln_report)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from rest_framework.mixins import DestroyModelMixin, ListModelMixin, RetrieveModelMixin
2+
3+
from pulpcore.plugin.models import VulnerabilityReport as VulnReport
4+
from pulpcore.app.serializers.vulnerability_report import VulnerabilityReportSerializer
5+
from pulpcore.plugin.viewsets import NamedModelViewSet
6+
7+
8+
class VulnerabilityReport(NamedModelViewSet, ListModelMixin, RetrieveModelMixin, DestroyModelMixin):
9+
10+
endpoint_name = "vuln_report"
11+
queryset = VulnReport.objects.all()
12+
serializer_class = VulnerabilityReportSerializer

pulpcore/constants.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,16 @@
127127
# The upper boundary represents an unsigned 32-bit integer and prevents overflow
128128
ORPHAN_PROTECTION_TIME_LOWER_BOUND = 0
129129
ORPHAN_PROTECTION_TIME_UPPER_BOUND = 4294967295 # (2^32)-1
130+
131+
# VULNERABILITY REPORT CONSTANTS
132+
# OSV API URL
133+
OSV_QUERY_URL = "https://api.osv.dev/v1/query"
134+
135+
# Timeout when waiting on tasks scan thread queue to avoid indefinite blocking.
136+
VULNERABILITY_TASK_THREAD_TIMEOUT = 60
137+
138+
PKG_ECOSYSTEM = SimpleNamespace(
139+
rpm="Red Hat", npm="npm", maven="Maven", python="PyPI", gem="RubyGems"
140+
)
141+
OSV_RH_ECOSYSTEM_LABEL = "osv_dev ecosystem"
142+
OSV_RH_ECOSYSTEM_CPES_LABEL = "osv_dev cpes"

0 commit comments

Comments
 (0)