kubectl-ailogs

Pranav-V · Pranav-V · commit 3d5cef67252a · 2025-12-03T22:53:33.000-06:00
diff --git a/deploy/kubeplus-chart/templates/kubeplus-components-6.yaml b/deploy/kubeplus-chart/templates/kubeplus-components-6.yaml
@@ -311,6 +311,18 @@ spec:
     requests:
       storage: 1Gi  # Make sure this matches the PV size
 ---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ollama-models-pvc
+  namespace: {{ .Release.Namespace }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 5Gi
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -349,6 +361,33 @@ spec:
           - "--secret"
           - "webhook-tls-certificates"
       containers:
+      - name: ollama-ai
+        image: ollama:latest
+        imagePullPolicy: IfNotPresent
+        env:
+          - name: MODEL_NAME
+            value: "gemma:2b"
+          - name: OLLAMA_MODELS
+            value: "/models"
+          - name: OLLAMA_MAX_LOADED_MODELS
+            value: "1"
+          - name: PORT
+            value: "8080"
+        ports:
+          - name: http
+            containerPort: 8080
+        volumeMounts:
+          - name: ollama-models
+            mountPath: /models
+        resources:
+          requests:
+            cpu: "1"
+            memory: "3Gi"
+            ephemeral-storage: "1Gi"
+          limits:
+            cpu: "2"
+            memory: "5Gi"
+            ephemeral-storage: "2Gi"
       - name: kubeconfiggenerator
         image: {{ .Values.CRD_REGISTRATION_HELPER }}  #gcr.io/cloudark-kubeplus/kubeconfiggenerator:3.0.27
         imagePullPolicy: IfNotPresent
@@ -433,6 +472,9 @@ spec:
         - name: webhook-certs
           secret:
             secretName: webhook-tls-certificates
+        - name: ollama-models
+          persistentVolumeClaim:
+            claimName: ollama-models-pvc
 ---
 apiVersion: batch/v1
 kind: Job
diff --git a/plugins/ai-analysis/Dockerfile b/plugins/ai-analysis/Dockerfile
@@ -0,0 +1,23 @@
+FROM ollama/ollama:latest
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    python3 python3-venv python3-pip python3-full ca-certificates curl && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN python3 -m venv /opt/venv
+ENV PATH="/opt/venv/bin:${PATH}"
+
+RUN pip install --no-cache-dir flask ollama requests
+
+WORKDIR /app
+COPY app.py /app/
+COPY entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+ENV MODEL_NAME=llama3
+ENV PORT=8080
+
+EXPOSE 8080
+
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
diff --git a/plugins/ai-analysis/app.py b/plugins/ai-analysis/app.py
@@ -0,0 +1,61 @@
+from flask import Flask, request, jsonify
+from ollama import generate
+import os
+
+app = Flask(__name__)
+MODEL = os.getenv("MODEL_NAME", "gemma:2b")
+
+@app.get("/healthz")
+def healthz():
+    return "ok", 200
+
+@app.route("/crailogs", methods=["POST"])
+def cr_ai_logs():
+	data = request.get_json(force=True)
+	logs = data.get("logs", "")
+	prompt = """
+				You are a Kubernetes SRE copilot. Analyze the raw logs below and produce a strictly formatted JSON report.
+
+				TASKS:
+					1. Provide a 1-2 line "overall_status" summarizing the observed issues.
+					2. Detect incidents (SEV1-SEV3), each including:
+						- pods
+						- patterns
+						- sample_log
+						- likely_root_cause
+						- impact
+						- recommended_actions
+					3. Output ONLY the following JSON structure:
+
+				{
+					"title": "<string>",
+					"incidents": [
+						{
+							"pods": ["<pod>", "..."],
+							"patterns": ["<pattern>", "..."],
+							"sample_log": "<string>",
+							"likely_root_cause": "<string>",
+							"impact": "<string>",
+							"recommended_actions": ["<action>", "..."]
+						}
+					]
+				}
+
+				RULES:
+					- Consider log lines as incidents if they contain keywords: ERROR, FAIL, panic, CrashLoopBackOff, OOMKilled, exception.
+					- For each detected incident, fill the "incidents" array with all required fields.
+					- Be concise; overall_status should be 1-2 lines.
+					- Stick strictly to the JSON structure; DO NOT add any fields or text outside the JSON.
+					- Do not be too verbose. Be concise and stick exactly to the JSON format.
+
+				LOGS:
+			""".strip()
+	prompt = f"{prompt}\n{logs}"
+	try:
+		response = generate(model=f"{MODEL}", prompt=prompt)
+		return jsonify({"output": response.get("response", "")}), 200
+	except Exception as e:
+		return jsonify({"error": str(e)}), 500
+
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=int(os.getenv("PORT", "8080")))
diff --git a/plugins/ai-analysis/entrypoint.sh b/plugins/ai-analysis/entrypoint.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+: "${MODEL_NAME:=gemma:2b}"
+: "${OLLAMA_MODELS:=/models}"
+: "${PORT:=8080}"
+
+echo "[entrypoint] starting ollama serve"
+ollama serve &
+
+echo "[entrypoint] waiting for ollama..."
+i=0; until curl -fsS http://127.0.0.1:11434/api/tags >/dev/null 2>&1; do
+  i=$((i+1)); [ $i -gt 60 ] && { echo "ollama not ready"; exit 1; }
+  sleep 1
+done
+
+echo "[entrypoint] pulling model: ${MODEL_NAME}"
+ollama pull "${MODEL_NAME}" || true
+
+sleep 2
+
+echo "[entrypoint] starting flask on :${PORT}"
+exec python3 /app/app.py
diff --git a/plugins/crailogs.py b/plugins/crailogs.py
@@ -0,0 +1,77 @@
+import subprocess
+import sys
+import json
+import platform
+import requests
+import os
+from crmetrics import CRBase
+
+class CRLogs(CRBase):
+
+	def _get_container_logs(self, pod, namespace, containers, kubeconfig):
+		container_logs = []
+		for c in containers:
+			container = c['name']
+			cmd = 'kubectl logs ' + pod + ' -n ' + namespace + ' -c ' + container + ' ' + kubeconfig
+			container_logs.append("======== Pod::" + pod + "/container::" + container + " ===========")
+			try:
+				out = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+										stderr=subprocess.PIPE, shell=True).communicate()[0]
+				if out:
+					container_logs.append(str(out))
+					container_logs.append("================================================\n\n")
+			except Exception as e:
+				container_logs.append(str(e))
+				
+		return "\n".join(container_logs)
+
+	def get_logs(self, pod, namespace, kubeconfig):
+		cmd = 'kubectl get pods ' + pod + ' -n ' + namespace + ' -o json ' + kubeconfig
+		joined_logs = []
+		try:
+			out = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+									stderr=subprocess.PIPE, shell=True).communicate()[0]
+
+			if out:
+				json_output = json.loads(out)
+				containers = json_output['spec']['containers']
+				joined_logs.append(self._get_container_logs(pod, namespace, containers, kubeconfig))
+			
+				if 'initContainers' in json_output['spec']:
+					init_containers = json_output['spec']['initContainers']
+					joined_logs.append(self._get_container_logs(pod, namespace, init_containers, kubeconfig))
+
+		except Exception as e:
+			joined_logs.append(str(e))
+			
+		return "\n".join(joined_logs)
+
+if __name__ == '__main__':
+	crLogs = CRLogs()
+	kind = sys.argv[1]
+	instance = sys.argv[2]
+	kubeconfig = sys.argv[3]
+	resources = {}
+	
+	joined_logs = []
+	pods = crLogs.get_pods_in_ns(kind, instance, kubeconfig)
+	for pod in pods:
+		pod_name = pod['Name']
+		pod_namespace = pod['Namespace']
+		joined_logs.append(crLogs.get_logs(pod_name, pod_namespace, kubeconfig))
+		joined_logs.append("---------------------------------------")
+		
+	all_logs = "\n".join(joined_logs)
+	url = "http://localhost:8080/crailogs"
+	payload = {"logs": all_logs}
+
+	try:
+		response = requests.post(url, json=payload)
+		response.raise_for_status()
+		result = response.json()
+		if 'output' in result:
+			print(json.dumps(result['output'], indent=2))
+	except requests.exceptions.RequestException as e:
+		print(f"Error communicating with model service: {e}")
+	except ValueError:
+		print(f"Response was not valid JSON: {response.text}")
diff --git a/plugins/kubectl-ailogs b/plugins/kubectl-ailogs
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+source utils.sh
+
+print_help () {
+    echo "NAME"
+    echo "        kubectl ailogs"
+    echo ""
+    echo "SYNOPSIS"
+    echo "        kubectl ailogs <Kind> <Instance> -k <Absolute path to kubeconfig>"
+    echo ""
+    echo "DESCRIPTION"
+    echo "        kubectl ailogs summarizes the state of container logs for all the containers of all the Pods that are related to the app instance."
+    exit 0
+}
+
+if (( $# < 4 )); then
+  print_help
+fi
+
+kind=$1
+instance=$2
+
+kubeconfig1="$HOME/.kube/config" # Default value
+
+shift;
+shift;
+
+while getopts ":k:" opt; do
+ case ${opt} in
+   k )
+     kubeconfig1=$OPTARG
+     if [ ! -f $kubeconfig1 ]; then
+       echo "Kubeconfig $kubeconfig1 does not exist."
+       exit 0
+     fi;;
+    ? )
+      echo "Invalid option: ${1} " 1>&2
+      print_help
+      exit 0
+      ;;
+ esac
+done
+
+kubeconfig="--kubeconfig="$kubeconfig1
+if [ $# = 4 ] && [[ $4 == *"kubeconfig="* ]]; then
+    kubeconfig=$4
+fi
+
+canonicalKind=$(get_canonical_kind $kind)
+
+if [[ $canonicalKind == *"Unknown"* ]]; then
+	echo "$canonicalKind"
+	exit 0
+fi
+
+kubeplusNamespace=`kubectl get pods -A $kubeconfig | grep kubeplus-deployment | awk '{print $1}'`
+resStatus=`kubectl $kubeconfig get $kind $instance -n $kubeplusNamespace -o json 2>&1`
+if [[ $resStatus =~ 'Error' ]]; then
+  echo $resStatus
+  exit 0
+fi
+
+python /$KUBEPLUS_HOME/plugins/crailogs.py $canonicalKind $instance $kubeconfig