Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

matrixRTC:
replicas: 2
annotations:
has-no-service-monitor: "true"
ingress:
host: mrtc.{{ $.Values.serverName }}
tlsSecret: "{{ $.Release.Name }}-matrix-rtc-tls"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ initSecrets:
matrixAuthenticationService:
enabled: false
matrixRTC:
annotations:
has-no-service-monitor: "true"
extraEnv:
- name: LIVEKIT_INSECURE_SKIP_VERIFY_TLS
value: YES_I_KNOW_WHAT_I_AM_DOING
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ initSecrets:
matrixAuthenticationService:
enabled: false
matrixRTC:
annotations:
has-no-service-monitor: "true"
extraEnv:
- name: LIVEKIT_INSECURE_SKIP_VERIFY_TLS
value: YES_I_KNOW_WHAT_I_AM_DOING
Expand Down

This file was deleted.

5 changes: 4 additions & 1 deletion charts/matrix-stack/templates/matrix-rtc/sfu_service.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{{- /*
Copyright 2024 New Vector Ltd
Copyright 2024-2025 New Vector Ltd

SPDX-License-Identifier: AGPL-3.0-only
*/ -}}
Expand All @@ -21,6 +21,9 @@ spec:
- name: http
port: 7880
targetPort: http
- name: metrics
port: 6789
targetPort: metrics
selector:
app.kubernetes.io/instance: "{{ $.Release.Name }}-matrix-rtc-sfu"
{{- end -}}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{{- /*
Copyright 2024 New Vector Ltd
Copyright 2024-2025 New Vector Ltd

SPDX-License-Identifier: AGPL-3.0-only
*/ -}}
Expand All @@ -20,7 +20,7 @@ metadata:
spec:
endpoints:
- interval: 30s
port: http
port: metrics
selector:
matchLabels:
app.kubernetes.io/part-of: matrix-stack
Expand Down
1 change: 1 addition & 0 deletions newsfragments/569.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
matrix-rtc: Fix service monitors deployed.
1,358 changes: 686 additions & 672 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ types-pytz = "^2025.2.0.20250516"
mypy = "^1.16.1"
types-pyyaml = "^6.0.12.20250516"
semver = "^3.0.4"
prometheus-client = "^0.22.1"

[build-system]
requires = ["poetry-core"]
Expand Down
70 changes: 69 additions & 1 deletion tests/integration/lib/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@
import time

import pyhelm3
from lightkube import AsyncClient
from lightkube.models.core_v1 import (
Capabilities,
Container,
PodSpec,
SeccompProfile,
SecurityContext,
)
from lightkube.models.meta_v1 import ObjectMeta
from lightkube.resources.core_v1 import ConfigMap, Endpoints, Namespace, Secret
from lightkube.resources.core_v1 import ConfigMap, Endpoints, Namespace, Pod, Secret

from ..artifacts import CertKey, generate_cert
from .utils import merge
Expand Down Expand Up @@ -98,3 +106,63 @@ async def get_deployment_marker(kube_client, generated_data, marker: str):
name=f"{generated_data.release_name}-markers",
)
return configmap.data.get(marker)


async def run_pod_with_args(kube_client: AsyncClient, namespace, image_name, pod_name, args):
pod = Pod(
metadata=ObjectMeta(name=pod_name + "-" + str(int(time.time() * 1000)), namespace=namespace),
spec=PodSpec(
restartPolicy="Never",
containers=[
Container(
name="cmd",
image=image_name,
args=args,
securityContext=SecurityContext(
seccompProfile=SeccompProfile(type="RuntimeDefault"),
capabilities=Capabilities(drop=["ALL"]),
readOnlyRootFilesystem=True,
allowPrivilegeEscalation=False,
runAsNonRoot=True,
runAsUser=3000,
runAsGroup=3000,
),
)
],
),
)
assert pod.metadata
assert pod.metadata.name
assert pod.metadata.namespace
try:
await kube_client.create(pod)
start_time = time.time()
now = time.time()
completed = False
while start_time + 30 > now and not completed:
found_pod = await kube_client.get(Pod, name=pod.metadata.name, namespace=pod.metadata.namespace)
if (
found_pod.status
and found_pod.status.containerStatuses
and found_pod.status.containerStatuses[0].lastState
and found_pod.status.containerStatuses[0].lastState.terminated
and found_pod.status.containerStatuses[0].lastState.terminated.reason == "Completed"
):
completed = True
else:
now = time.time()
await asyncio.sleep(1)
else:
if start_time + 30 > now:
raise RuntimeError(
f"Pod {pod.metadata.name} did not start in time "
f"(failed after {time.time() - now} seconds), "
f"pod status: {found_pod.status}"
)

log_lines = ""
async for log_line in kube_client.log(pod.metadata.name, namespace=pod.metadata.namespace, container="cmd"):
log_lines += log_line
return log_lines
finally:
await kube_client.delete(Pod, name=pod.metadata.name, namespace=namespace)
68 changes: 64 additions & 4 deletions tests/integration/test_networking.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from lightkube import AsyncClient
from lightkube import operators as op
from lightkube.resources.core_v1 import Pod, Service
from prometheus_client.parser import text_string_to_metric_families

from .fixtures.data import ESSData
from .lib.helpers import wait_for_endpoint_ready
from .lib.helpers import run_pod_with_args, wait_for_endpoint_ready
from .lib.utils import read_service_monitor_kind


Expand Down Expand Up @@ -139,9 +140,6 @@ async def test_pods_monitored(
service_port_names = [port.name for port in service.spec.ports if port.name]
if endpoint["port"] in service_port_names:
break
# This Service does not have the named port. Potentially there's another Service that covers it
else:
continue

async for covered_pod in kube_client.list(
Pod, namespace=generated_data.ess_namespace, labels=service.spec.selector
Expand All @@ -162,3 +160,65 @@ async def test_pods_monitored(
assert all_monitorable_pods == monitored_pods, (
f"Some pods are not monitored : {', '.join(list(set(all_monitorable_pods) ^ set(monitored_pods)))}"
)


@pytest.mark.skipif(
os.environ.get("SKIP_SERVICE_MONITORS_CRDS", "false") == "true", reason="ServiceMonitors not deployed"
)
@pytest.mark.skipif(os.environ.get("MATRIX_TEST_FROM_REF", "") == "25.6.2", reason="This fails against 25.6.2.")
@pytest.mark.asyncio_cooperative
@pytest.mark.usefixtures("matrix_stack")
async def test_service_monitors_point_to_metrics(
kube_client: AsyncClient,
generated_data: ESSData,
):
async for service_monitor in kube_client.list(
await read_service_monitor_kind(kube_client),
namespace=generated_data.ess_namespace,
labels={"app.kubernetes.io/part-of": op.in_(["matrix-stack"])},
):
found_metrics = False
async for service in kube_client.list(
Service, namespace=generated_data.ess_namespace, labels=service_monitor["spec"]["selector"]["matchLabels"]
):
assert service.metadata, f"Encountered a service without metadata : {service}"
assert service.spec, f"Encountered a service without spec : {service}"
assert service.spec.ports, f"Ecountered a service without port : {service}"
assert service.spec.selector, f"Ecountered a service without selectors : {service}"

for endpoint in service_monitor["spec"]["endpoints"]:
service_port_names = [port.name for port in service.spec.ports if port.name]
if endpoint["port"] in service_port_names:
assert await has_actual_metrics_on_endpoint(
kube_client, generated_data, service, service_monitor["spec"]["endpoints"]
)
found_metrics = True
assert found_metrics, (
f"ServiceMonitor {service_monitor['metadata']['name']} does not point to any /metrics endpoint"
)


async def has_actual_metrics_on_endpoint(
kube_client: AsyncClient, generated_data: ESSData, service: Service, endpoints
):
assert service.metadata
assert service.spec
assert service.spec.ports
found_metrics = False
for endpoint in endpoints:
for port_spec in service.spec.ports:
if port_spec.name == endpoint["port"]:
metrics_data = await run_pod_with_args(
kube_client,
generated_data.ess_namespace,
"curlimages/curl:latest",
"curl",
[
"-s",
f"http://{service.metadata.name}.{generated_data.ess_namespace}.svc.cluster.local:{port_spec.port}/metrics",
],
)
for metric_family in text_string_to_metric_families(metrics_data):
assert metric_family.name, "Metric family has no name"
found_metrics = True
return found_metrics
1 change: 1 addition & 0 deletions tests/manifests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,7 @@ def make_synapse_worker_sub_component(worker_name: str, worker_type: str) -> Sub
name="matrix-rtc",
values_file_path=ValuesFilePath.read_write("matrixRTC"),
has_topology_spread_constraints=False,
has_service_monitor=False,
sub_components=(
SubComponentDetails(
name="matrix-rtc-sfu",
Expand Down
Loading