Skip to content

Commit 3d5cef6

Browse files
committed
kubectl-ailogs
1 parent dffd5ab commit 3d5cef6

File tree

6 files changed

+290
-0
lines changed

6 files changed

+290
-0
lines changed

deploy/kubeplus-chart/templates/kubeplus-components-6.yaml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,18 @@ spec:
311311
requests:
312312
storage: 1Gi # Make sure this matches the PV size
313313
---
314+
apiVersion: v1
315+
kind: PersistentVolumeClaim
316+
metadata:
317+
name: ollama-models-pvc
318+
namespace: {{ .Release.Namespace }}
319+
spec:
320+
accessModes:
321+
- ReadWriteOnce
322+
resources:
323+
requests:
324+
storage: 5Gi
325+
---
314326
apiVersion: apps/v1
315327
kind: Deployment
316328
metadata:
@@ -349,6 +361,33 @@ spec:
349361
- "--secret"
350362
- "webhook-tls-certificates"
351363
containers:
364+
- name: ollama-ai
365+
image: ollama:latest
366+
imagePullPolicy: IfNotPresent
367+
env:
368+
- name: MODEL_NAME
369+
value: "gemma:2b"
370+
- name: OLLAMA_MODELS
371+
value: "/models"
372+
- name: OLLAMA_MAX_LOADED_MODELS
373+
value: "1"
374+
- name: PORT
375+
value: "8080"
376+
ports:
377+
- name: http
378+
containerPort: 8080
379+
volumeMounts:
380+
- name: ollama-models
381+
mountPath: /models
382+
resources:
383+
requests:
384+
cpu: "1"
385+
memory: "3Gi"
386+
ephemeral-storage: "1Gi"
387+
limits:
388+
cpu: "2"
389+
memory: "5Gi"
390+
ephemeral-storage: "2Gi"
352391
- name: kubeconfiggenerator
353392
image: {{ .Values.CRD_REGISTRATION_HELPER }} #gcr.io/cloudark-kubeplus/kubeconfiggenerator:3.0.27
354393
imagePullPolicy: IfNotPresent
@@ -433,6 +472,9 @@ spec:
433472
- name: webhook-certs
434473
secret:
435474
secretName: webhook-tls-certificates
475+
- name: ollama-models
476+
persistentVolumeClaim:
477+
claimName: ollama-models-pvc
436478
---
437479
apiVersion: batch/v1
438480
kind: Job

plugins/ai-analysis/Dockerfile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
FROM ollama/ollama:latest
2+
3+
RUN apt-get update && \
4+
apt-get install -y --no-install-recommends \
5+
python3 python3-venv python3-pip python3-full ca-certificates curl && \
6+
rm -rf /var/lib/apt/lists/*
7+
8+
RUN python3 -m venv /opt/venv
9+
ENV PATH="/opt/venv/bin:${PATH}"
10+
11+
RUN pip install --no-cache-dir flask ollama requests
12+
13+
WORKDIR /app
14+
COPY app.py /app/
15+
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
16+
RUN chmod +x /usr/local/bin/entrypoint.sh
17+
18+
ENV MODEL_NAME=llama3
19+
ENV PORT=8080
20+
21+
EXPOSE 8080
22+
23+
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]

plugins/ai-analysis/app.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from flask import Flask, request, jsonify
2+
from ollama import generate
3+
import os
4+
5+
app = Flask(__name__)
6+
MODEL = os.getenv("MODEL_NAME", "gemma:2b")
7+
8+
@app.get("/healthz")
9+
def healthz():
10+
return "ok", 200
11+
12+
@app.route("/crailogs", methods=["POST"])
13+
def cr_ai_logs():
14+
data = request.get_json(force=True)
15+
logs = data.get("logs", "")
16+
prompt = """
17+
You are a Kubernetes SRE copilot. Analyze the raw logs below and produce a strictly formatted JSON report.
18+
19+
TASKS:
20+
1. Provide a 1-2 line "overall_status" summarizing the observed issues.
21+
2. Detect incidents (SEV1-SEV3), each including:
22+
- pods
23+
- patterns
24+
- sample_log
25+
- likely_root_cause
26+
- impact
27+
- recommended_actions
28+
3. Output ONLY the following JSON structure:
29+
30+
{
31+
"title": "<string>",
32+
"incidents": [
33+
{
34+
"pods": ["<pod>", "..."],
35+
"patterns": ["<pattern>", "..."],
36+
"sample_log": "<string>",
37+
"likely_root_cause": "<string>",
38+
"impact": "<string>",
39+
"recommended_actions": ["<action>", "..."]
40+
}
41+
]
42+
}
43+
44+
RULES:
45+
- Consider log lines as incidents if they contain keywords: ERROR, FAIL, panic, CrashLoopBackOff, OOMKilled, exception.
46+
- For each detected incident, fill the "incidents" array with all required fields.
47+
- Be concise; overall_status should be 1-2 lines.
48+
- Stick strictly to the JSON structure; DO NOT add any fields or text outside the JSON.
49+
- Do not be too verbose. Be concise and stick exactly to the JSON format.
50+
51+
LOGS:
52+
""".strip()
53+
prompt = f"{prompt}\n{logs}"
54+
try:
55+
response = generate(model=f"{MODEL}", prompt=prompt)
56+
return jsonify({"output": response.get("response", "")}), 200
57+
except Exception as e:
58+
return jsonify({"error": str(e)}), 500
59+
60+
if __name__ == "__main__":
61+
app.run(host="0.0.0.0", port=int(os.getenv("PORT", "8080")))

plugins/ai-analysis/entrypoint.sh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
: "${MODEL_NAME:=gemma:2b}"
5+
: "${OLLAMA_MODELS:=/models}"
6+
: "${PORT:=8080}"
7+
8+
echo "[entrypoint] starting ollama serve"
9+
ollama serve &
10+
11+
echo "[entrypoint] waiting for ollama..."
12+
i=0; until curl -fsS http://127.0.0.1:11434/api/tags >/dev/null 2>&1; do
13+
i=$((i+1)); [ $i -gt 60 ] && { echo "ollama not ready"; exit 1; }
14+
sleep 1
15+
done
16+
17+
echo "[entrypoint] pulling model: ${MODEL_NAME}"
18+
ollama pull "${MODEL_NAME}" || true
19+
20+
sleep 2
21+
22+
echo "[entrypoint] starting flask on :${PORT}"
23+
exec python3 /app/app.py

plugins/crailogs.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import subprocess
2+
import sys
3+
import json
4+
import platform
5+
import requests
6+
import os
7+
from crmetrics import CRBase
8+
9+
class CRLogs(CRBase):
10+
11+
def _get_container_logs(self, pod, namespace, containers, kubeconfig):
12+
container_logs = []
13+
for c in containers:
14+
container = c['name']
15+
cmd = 'kubectl logs ' + pod + ' -n ' + namespace + ' -c ' + container + ' ' + kubeconfig
16+
container_logs.append("======== Pod::" + pod + "/container::" + container + " ===========")
17+
try:
18+
out = subprocess.Popen(cmd, stdout=subprocess.PIPE,
19+
stderr=subprocess.PIPE, shell=True).communicate()[0]
20+
if out:
21+
container_logs.append(str(out))
22+
container_logs.append("================================================\n\n")
23+
except Exception as e:
24+
container_logs.append(str(e))
25+
26+
return "\n".join(container_logs)
27+
28+
def get_logs(self, pod, namespace, kubeconfig):
29+
cmd = 'kubectl get pods ' + pod + ' -n ' + namespace + ' -o json ' + kubeconfig
30+
joined_logs = []
31+
try:
32+
out = subprocess.Popen(cmd, stdout=subprocess.PIPE,
33+
stderr=subprocess.PIPE, shell=True).communicate()[0]
34+
35+
if out:
36+
json_output = json.loads(out)
37+
containers = json_output['spec']['containers']
38+
joined_logs.append(self._get_container_logs(pod, namespace, containers, kubeconfig))
39+
40+
if 'initContainers' in json_output['spec']:
41+
init_containers = json_output['spec']['initContainers']
42+
joined_logs.append(self._get_container_logs(pod, namespace, init_containers, kubeconfig))
43+
44+
except Exception as e:
45+
joined_logs.append(str(e))
46+
47+
return "\n".join(joined_logs)
48+
49+
if __name__ == '__main__':
50+
crLogs = CRLogs()
51+
kind = sys.argv[1]
52+
instance = sys.argv[2]
53+
kubeconfig = sys.argv[3]
54+
resources = {}
55+
56+
joined_logs = []
57+
pods = crLogs.get_pods_in_ns(kind, instance, kubeconfig)
58+
for pod in pods:
59+
pod_name = pod['Name']
60+
pod_namespace = pod['Namespace']
61+
joined_logs.append(crLogs.get_logs(pod_name, pod_namespace, kubeconfig))
62+
joined_logs.append("---------------------------------------")
63+
64+
all_logs = "\n".join(joined_logs)
65+
url = "http://localhost:8080/crailogs"
66+
payload = {"logs": all_logs}
67+
68+
try:
69+
response = requests.post(url, json=payload)
70+
response.raise_for_status()
71+
result = response.json()
72+
if 'output' in result:
73+
print(json.dumps(result['output'], indent=2))
74+
except requests.exceptions.RequestException as e:
75+
print(f"Error communicating with model service: {e}")
76+
except ValueError:
77+
print(f"Response was not valid JSON: {response.text}")

plugins/kubectl-ailogs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/bin/bash
2+
3+
source utils.sh
4+
5+
print_help () {
6+
echo "NAME"
7+
echo " kubectl ailogs"
8+
echo ""
9+
echo "SYNOPSIS"
10+
echo " kubectl ailogs <Kind> <Instance> -k <Absolute path to kubeconfig>"
11+
echo ""
12+
echo "DESCRIPTION"
13+
echo " kubectl ailogs summarizes the state of container logs for all the containers of all the Pods that are related to the app instance."
14+
exit 0
15+
}
16+
17+
if (( $# < 4 )); then
18+
print_help
19+
fi
20+
21+
kind=$1
22+
instance=$2
23+
24+
kubeconfig1="$HOME/.kube/config" # Default value
25+
26+
shift;
27+
shift;
28+
29+
while getopts ":k:" opt; do
30+
case ${opt} in
31+
k )
32+
kubeconfig1=$OPTARG
33+
if [ ! -f $kubeconfig1 ]; then
34+
echo "Kubeconfig $kubeconfig1 does not exist."
35+
exit 0
36+
fi;;
37+
? )
38+
echo "Invalid option: ${1} " 1>&2
39+
print_help
40+
exit 0
41+
;;
42+
esac
43+
done
44+
45+
kubeconfig="--kubeconfig="$kubeconfig1
46+
if [ $# = 4 ] && [[ $4 == *"kubeconfig="* ]]; then
47+
kubeconfig=$4
48+
fi
49+
50+
canonicalKind=$(get_canonical_kind $kind)
51+
52+
if [[ $canonicalKind == *"Unknown"* ]]; then
53+
echo "$canonicalKind"
54+
exit 0
55+
fi
56+
57+
kubeplusNamespace=`kubectl get pods -A $kubeconfig | grep kubeplus-deployment | awk '{print $1}'`
58+
resStatus=`kubectl $kubeconfig get $kind $instance -n $kubeplusNamespace -o json 2>&1`
59+
if [[ $resStatus =~ 'Error' ]]; then
60+
echo $resStatus
61+
exit 0
62+
fi
63+
64+
python /$KUBEPLUS_HOME/plugins/crailogs.py $canonicalKind $instance $kubeconfig

0 commit comments

Comments
 (0)