Skip to content

Commit ca353c3

Browse files
committed
feat(resmonitor): Allow custom-endpoint metrics via appendpoint config (Issue #1220)
- Extend ResourceMonitor CRD (deploy/kubeplus-chart/crds/kubeplus-crds.yaml) to accept `appendpoint` section for custom nodePort endpoints and metrics - Update Go API (platform-operator/pkg/apis/workflowcontroller/v1alpha1/types.go) to include new struct fields for appendpoint - Enhance plugin (plugins/crmetrics.py) with _get_custom_metrics() to detect and fetch metrics from the configured endpoint - Add example managed-service "custom-hello-world" under examples/managed-service/appmetrics to demonstrate custom metrics use case - Expand tests (tests/tests.py) to cover both default metrics (test_metrics()) and custom-endpoint metrics (test_metrics_custom()) Signed-off-by: Tony Nguyen <tonynguyenhuy@gmail.com>
1 parent f763814 commit ca353c3

File tree

12 files changed

+412
-8
lines changed

12 files changed

+412
-8
lines changed

deploy/kubeplus-chart/crds/kubeplus-crds.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,17 @@ spec:
5454
type: string
5555
monitorRelationships:
5656
type: string
57+
appEndpoints:
58+
type: object
59+
properties:
60+
label:
61+
type: string
62+
endpoint:
63+
type: string
64+
metrics:
65+
type: array
66+
items:
67+
type: string
5768
names:
5869
kind: ResourceMonitor
5970
plural: resourcemonitors
@@ -184,6 +195,17 @@ spec:
184195
type: string
185196
monitorRelationships:
186197
type: string
198+
appEndpoints:
199+
type: object
200+
properties:
201+
label:
202+
type: string
203+
endpoint:
204+
type: string
205+
metrics:
206+
type: array
207+
items:
208+
type: string
187209
names:
188210
kind: ResourceComposition
189211
plural: resourcecompositions
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
FROM ubuntu:22.04
2+
RUN apt-get update -y && apt-get install -y python-setuptools python3-pip
3+
ADD . /src
4+
RUN pip install -r /src/requirements.txt
5+
CMD ["python3", "/src/app.py"]
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Custom Hello World app
2+
3+
from flask import Flask
4+
from prometheus_client import Counter, generate_latest, CONTENT_TYPE_LATEST
5+
6+
app = Flask(__name__)
7+
8+
# Prometheus metrics (counts requests to "/" and "/bye")
9+
HELLO_REQUEST_COUNT = Counter("hello_requests_total", "Total requests to hello endpoint")
10+
BYE_REQUEST_COUNT = Counter("bye_requests_total", "Total requests to bye endpoint")
11+
12+
@app.route("/")
13+
def hello():
14+
HELLO_REQUEST_COUNT.inc() # increment counter every time / is hit
15+
return "Hello World, from Kubernetes!<br>"
16+
17+
@app.route("/bye")
18+
def bye():
19+
BYE_REQUEST_COUNT.inc() # increment counter every time /bye is hit
20+
return "Bye, from Kubernetes!<br>"
21+
22+
# Prometheus metrics endpoint
23+
@app.route("/metrics")
24+
def metrics():
25+
return generate_latest(), 200, {"Content-Type": CONTENT_TYPE_LATEST}
26+
27+
if __name__ == "__main__":
28+
app.run(host="0.0.0.0", port=5000)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
3+
IMAGE_NAME="custom-hello-world-app:latest"
4+
5+
eval $(minikube docker-env)
6+
docker build -t "$IMAGE_NAME" $KUBEPLUS_HOME/examples/managed-service/appmetrics/custom-hello-world/
Binary file not shown.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
apiVersion: workflows.kubeplus/v1alpha1
2+
kind: ResourceComposition
3+
metadata:
4+
name: custom-hello-world-app-composition
5+
spec:
6+
# newResource defines the new CRD to be installed define a workflow.
7+
newResource:
8+
resource:
9+
kind: CustomHelloWorldApp
10+
group: platformapi.kubeplus
11+
version: v1alpha1
12+
plural: customhelloworldapps
13+
# URL of the Helm chart that contains Kubernetes resources that represent a workflow.
14+
chartURL: file:///custom-hello-chart-0.0.1.tgz
15+
chartName: custom-hello-chart
16+
# respolicy defines the resource policy to be applied to instances of the specified custom resource.
17+
respolicy:
18+
apiVersion: workflows.kubeplus/v1alpha1
19+
kind: ResourcePolicy
20+
metadata:
21+
name: custom-hello-world-app-policy
22+
spec:
23+
resource:
24+
kind: CustomHelloWorldApp
25+
group: platformapi.kubeplus
26+
version: v1alpha1
27+
# resmonitor identifies the resource instances that should be monitored for CPU/Memory/Storage.
28+
# All the Pods that are related to the resource instance through either ownerReference relationship, or all the relationships
29+
# (ownerReference, label, annotation, spec properties) are considered in calculating the statistics.
30+
# The generated output is in Prometheus format.
31+
resmonitor:
32+
apiVersion: workflows.kubeplus/v1alpha1
33+
kind: ResourceMonitor
34+
metadata:
35+
name: custom-hello-world-app-monitor
36+
spec:
37+
resource:
38+
kind: CustomHelloWorldApp
39+
group: platformapi.kubeplus
40+
version: v1alpha1
41+
# This attribute indicates that Pods that are reachable through all the relationships should be used
42+
# as part of calculating the monitoring statistics.
43+
monitorRelationships: all
44+
45+
# Define endpoint for where to pull application specific metrics
46+
appEndpoints:
47+
label: "app=customhelloworld"
48+
endpoint: "metrics"
49+
metrics: ["hello_requests_total", "bye_requests_total"]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# defines an instance of the HelloWorldService kind
2+
apiVersion: platformapi.kubeplus/v1alpha1
3+
kind: CustomHelloWorldApp
4+
metadata:
5+
name: custom-hs1
6+
spec:
7+
# no specific specs
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
flask
2+
prometheus_client

platform-operator/pkg/apis/workflowcontroller/v1alpha1/types.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,14 @@ type ResourceMonitorStatus struct {
170170
type ResourceMonitorSpec struct {
171171
Resource Res `json:"resource"`
172172
//MonitoringPolicy Mon `json:"monitoringpolicy"`
173-
MonitorRelationships string `json:"monitorRelationships"`
173+
MonitorRelationships string `json:"monitorRelationships"`
174+
AppEndpoints ResourceMonitorAppEndpoints `json:"appEndpoints"`
175+
}
176+
177+
type ResourceMonitorAppEndpoints struct {
178+
Label string `json:"label"`
179+
Endpoint string `json:"endpoint"`
180+
Metrics []string `json:"metrics"`
174181
}
175182

176183
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

plugins/crmetrics.py

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import time
99
import yaml
1010
import utils
11+
import requests
1112

1213
class CRBase(object):
1314

@@ -993,12 +994,111 @@ def _get_metrics_creator_account_with_connections(self, account):
993994
print(" Number of Pods: " + str(len(pod_list_for_metrics)))
994995
print(" Number of Containers: " + str(num_of_containers))
995996
print(" Number of Nodes: " + str(num_of_hosts))
996-
print("Underlying Physical Resoures consumed:")
997+
print("Underlying Physical Resources consumed:")
997998
print(" Total CPU(cores): " + str(cpu) + "m")
998999
print(" Total MEMORY(bytes): " + str(mem) + "Mi")
9991000
print(" Total Storage(bytes): " + str(storage) + "Gi")
10001001
print("---------------------------------------------------------- ")
10011002

1003+
1004+
def _get_custom_metrics(self, custom_resource, custom_res_instance):
1005+
metrics_data = {}
1006+
metrics_descriptions = {}
1007+
1008+
# Get all ResourceCompositions
1009+
rc_list_raw, err = self.run_command("kubectl get resourcecompositions -o json")
1010+
if not rc_list_raw:
1011+
return metrics_data, metrics_descriptions
1012+
1013+
rc_list = json.loads(rc_list_raw)
1014+
for item in rc_list["items"]:
1015+
rc_name = item["metadata"]["name"]
1016+
1017+
# Get full ResourceComposition
1018+
rc_json_raw, err = self.run_command(f"kubectl get resourcecomposition {rc_name} -o json")
1019+
if not rc_json_raw:
1020+
continue
1021+
resource_composition = json.loads(rc_json_raw)
1022+
1023+
# Check if this ResourceComposition defines our custom_resource
1024+
kind_in_rc = resource_composition["spec"]["newResource"]["resource"]["kind"]
1025+
if kind_in_rc != custom_resource:
1026+
continue
1027+
1028+
# Get appEndpoints from resmonitor
1029+
try:
1030+
app_endpoint = resource_composition["spec"]["resmonitor"]["spec"]["appEndpoints"]
1031+
label_selector = app_endpoint["label"]
1032+
endpoint_path = app_endpoint["endpoint"]
1033+
metrics_names = app_endpoint["metrics"] # list of metric names to filter
1034+
except KeyError:
1035+
continue # No endpoints defined
1036+
1037+
# label_selector, endpoint_path, and metrics_names must be non-empty
1038+
if not label_selector or not endpoint_path or not metrics_names:
1039+
continue
1040+
1041+
# Find pods matching the label
1042+
pods_raw, err = self.run_command(f"kubectl get pods -n {custom_res_instance} -l {label_selector} -o json")
1043+
if not pods_raw:
1044+
print(err)
1045+
continue
1046+
1047+
pods = json.loads(pods_raw)
1048+
for pod in pods["items"]:
1049+
pod_name = pod["metadata"]["name"]
1050+
host_ip = pod["status"]["hostIP"]
1051+
if not host_ip:
1052+
continue
1053+
1054+
svc_raw, err = self.run_command(f"kubectl get svc -n {custom_res_instance} -l {label_selector} -o json")
1055+
svc_json = json.loads(svc_raw)
1056+
try:
1057+
node_port = svc_json["items"][0]["spec"]["ports"][0]["nodePort"]
1058+
except KeyError:
1059+
continue
1060+
1061+
# Query metrics endpoint using the host_ip and node_port
1062+
try:
1063+
url = f"http://{host_ip}:{node_port}/{endpoint_path}"
1064+
resp = requests.get(url)
1065+
resp.raise_for_status()
1066+
metrics_string = resp.text
1067+
except Exception as e:
1068+
print(f"Failed to query metrics for pod {pod_name} at {url}: {e}")
1069+
continue
1070+
1071+
# Filter metrics
1072+
for line in metrics_string.splitlines():
1073+
line = line.strip()
1074+
if not line or line.startswith("# TYPE"):
1075+
continue
1076+
1077+
# Extract the description of our desired metric (for pretty format)
1078+
if line.startswith("# HELP"):
1079+
parts = line.split(' ', 3)
1080+
_, _, metric_name, description = parts
1081+
if metric_name in metrics_names:
1082+
metrics_descriptions[metric_name] = description
1083+
continue
1084+
1085+
metric_part, value = line.rsplit(' ', 1)
1086+
1087+
# Extract base metric name (strip "{...}" if present)
1088+
base_name = ''
1089+
if '{' in metric_part:
1090+
base_name = metric_part.split('{', 1)[0]
1091+
else:
1092+
base_name = metric_part
1093+
1094+
# Store the value for the given metric
1095+
if base_name in metrics_names:
1096+
metrics_data[base_name] = value
1097+
1098+
1099+
return metrics_data, metrics_descriptions
1100+
1101+
10021102
def get_metrics_cr(self, custom_resource, custom_res_instance, opformat, kubeconfig):
10031103
namespace = self.get_kubeplus_namespace(kubeconfig)
10041104
accountidentity = self._get_identity(custom_resource, custom_res_instance, namespace)
@@ -1013,6 +1113,7 @@ def get_metrics_cr(self, custom_resource, custom_res_instance, opformat, kubecon
10131113
num_of_hosts_conn = self._parse_number_of_hosts(pod_list, kubecfg=kubeconfig)
10141114
cpu_conn, memory_conn, individual_pod_metrics = self._get_cpu_memory_usage_kubelet(pod_list, kubecfg=kubeconfig)
10151115
networkReceiveBytesTotal, networkTransmitBytesTotal, oom_events = self._get_cadvisor_metrics(pod_list, kubecfg=kubeconfig)
1116+
custom_metrics_data, custom_metrics_descriptions = self._get_custom_metrics(custom_resource, custom_res_instance)
10161117

10171118
num_of_not_running_pods = self._num_of_not_running_pods(pod_list, kubecfg=kubeconfig)
10181119

@@ -1037,6 +1138,11 @@ def get_metrics_cr(self, custom_resource, custom_res_instance, opformat, kubecon
10371138
op['networkTransmitBytes'] = str(networkTransmitBytesTotal) + " bytes"
10381139
op['notRunningPods'] = str(num_of_not_running_pods)
10391140
op['oom_events'] = str(oom_events)
1141+
1142+
# Append custom metrics
1143+
for metric, value in custom_metrics_data.items():
1144+
op[f'{metric}'] = str(value)
1145+
10401146
json_op = json.dumps(op)
10411147
print(json_op)
10421148
elif opformat == 'prometheus':
@@ -1066,6 +1172,11 @@ def get_metrics_cr(self, custom_resource, custom_res_instance, opformat, kubecon
10661172
podMetrics = podMetrics + pod_cpu_mem
10671173

10681174
metricsToReturn = cpuMetrics + "\n" + memoryMetrics + "\n" + storageMetrics + "\n" + numOfPods + "\n" + numOfContainers + "\n" + networkReceiveBytes + "\n" + networkTransmitBytes + "\n" + numOfNotRunningPods + "\n" + oomEvents + "\n" + podMetrics
1175+
1176+
# Append custom metrics
1177+
for metric, value in custom_metrics_data.items():
1178+
metricsToReturn += metric + '{custom_resource="' + fq_instance + '"} ' + str(value) + ' ' + timeInMillis + "\n"
1179+
10691180
print(metricsToReturn)
10701181
elif opformat == 'pretty':
10711182
print("---------------------------------------------------------- ")
@@ -1081,6 +1192,10 @@ def get_metrics_cr(self, custom_resource, custom_res_instance, opformat, kubecon
10811192
print(" Total Storage(bytes): " + str(total_storage) + "Gi")
10821193
print(" Total Network bytes received: " + str(networkReceiveBytesTotal))
10831194
print(" Total Network bytes transferred: " + str(networkTransmitBytesTotal))
1195+
print("Custom application metrics:")
1196+
for metric, description in custom_metrics_descriptions.items():
1197+
print(" " + description + ": " + str(custom_metrics_data[metric]))
1198+
10841199
print("---------------------------------------------------------- ")
10851200
else:
10861201
print("Unknown output format specified. Accepted values: pretty, json, prometheus")

0 commit comments

Comments
 (0)