Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions deploy/kubeplus-chart/templates/kubeplus-components-6.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,18 @@ spec:
requests:
storage: 1Gi # Make sure this matches the PV size
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ollama-models-pvc
namespace: {{ .Release.Namespace }}
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
Expand Down Expand Up @@ -349,6 +361,33 @@ spec:
- "--secret"
- "webhook-tls-certificates"
containers:
- name: ollama-ai
image: ollama:latest
imagePullPolicy: IfNotPresent
env:
- name: MODEL_NAME
value: "gemma:2b"
- name: OLLAMA_MODELS
value: "/models"
- name: OLLAMA_MAX_LOADED_MODELS
value: "1"
- name: PORT
value: "8080"
ports:
- name: http
containerPort: 8080
volumeMounts:
- name: ollama-models
mountPath: /models
resources:
requests:
cpu: "1"
memory: "3Gi"
ephemeral-storage: "1Gi"
limits:
cpu: "2"
memory: "5Gi"
ephemeral-storage: "2Gi"
- name: kubeconfiggenerator
image: {{ .Values.CRD_REGISTRATION_HELPER }} #gcr.io/cloudark-kubeplus/kubeconfiggenerator:3.0.27
imagePullPolicy: IfNotPresent
Expand Down Expand Up @@ -433,6 +472,9 @@ spec:
- name: webhook-certs
secret:
secretName: webhook-tls-certificates
- name: ollama-models
persistentVolumeClaim:
claimName: ollama-models-pvc
---
apiVersion: batch/v1
kind: Job
Expand Down
23 changes: 23 additions & 0 deletions plugins/ai-analysis/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM ollama/ollama:latest

RUN apt-get update && \
apt-get install -y --no-install-recommends \
python3 python3-venv python3-pip python3-full ca-certificates curl && \
rm -rf /var/lib/apt/lists/*

RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:${PATH}"

RUN pip install --no-cache-dir flask ollama requests

WORKDIR /app
COPY app.py /app/
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh

ENV MODEL_NAME=llama3
ENV PORT=8080

EXPOSE 8080

ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
61 changes: 61 additions & 0 deletions plugins/ai-analysis/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from flask import Flask, request, jsonify
from ollama import generate
import os

app = Flask(__name__)
MODEL = os.getenv("MODEL_NAME", "gemma:2b")

@app.get("/healthz")
def healthz():
return "ok", 200

@app.route("/crailogs", methods=["POST"])
def cr_ai_logs():
data = request.get_json(force=True)
logs = data.get("logs", "")
prompt = """
You are a Kubernetes SRE copilot. Analyze the raw logs below and produce a strictly formatted JSON report.

TASKS:
1. Provide a 1-2 line "overall_status" summarizing the observed issues.
2. Detect incidents (SEV1-SEV3), each including:
- pods
- patterns
- sample_log
- likely_root_cause
- impact
- recommended_actions
3. Output ONLY the following JSON structure:

{
"title": "<string>",
"incidents": [
{
"pods": ["<pod>", "..."],
"patterns": ["<pattern>", "..."],
"sample_log": "<string>",
"likely_root_cause": "<string>",
"impact": "<string>",
"recommended_actions": ["<action>", "..."]
}
]
}

RULES:
- Consider log lines as incidents if they contain keywords: ERROR, FAIL, panic, CrashLoopBackOff, OOMKilled, exception.
- For each detected incident, fill the "incidents" array with all required fields.
- Be concise; overall_status should be 1-2 lines.
- Stick strictly to the JSON structure; DO NOT add any fields or text outside the JSON.
- Do not be too verbose. Be concise and stick exactly to the JSON format.

LOGS:
""".strip()
prompt = f"{prompt}\n{logs}"
try:
response = generate(model=f"{MODEL}", prompt=prompt)
return jsonify({"output": response.get("response", "")}), 200
except Exception as e:
return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
app.run(host="0.0.0.0", port=int(os.getenv("PORT", "8080")))
23 changes: 23 additions & 0 deletions plugins/ai-analysis/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash
set -euo pipefail

: "${MODEL_NAME:=gemma:2b}"
: "${OLLAMA_MODELS:=/models}"
: "${PORT:=8080}"

echo "[entrypoint] starting ollama serve"
ollama serve &

echo "[entrypoint] waiting for ollama..."
i=0; until curl -fsS http://127.0.0.1:11434/api/tags >/dev/null 2>&1; do
i=$((i+1)); [ $i -gt 60 ] && { echo "ollama not ready"; exit 1; }
sleep 1
done

echo "[entrypoint] pulling model: ${MODEL_NAME}"
ollama pull "${MODEL_NAME}" || true

sleep 2

echo "[entrypoint] starting flask on :${PORT}"
exec python3 /app/app.py
77 changes: 77 additions & 0 deletions plugins/crailogs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import subprocess
import sys
import json
import platform
import requests
import os
from crmetrics import CRBase

class CRLogs(CRBase):

def _get_container_logs(self, pod, namespace, containers, kubeconfig):
container_logs = []
for c in containers:
container = c['name']
cmd = 'kubectl logs ' + pod + ' -n ' + namespace + ' -c ' + container + ' ' + kubeconfig
container_logs.append("======== Pod::" + pod + "/container::" + container + " ===========")
try:
out = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True).communicate()[0]
if out:
container_logs.append(str(out))
container_logs.append("================================================\n\n")
except Exception as e:
container_logs.append(str(e))

return "\n".join(container_logs)

def get_logs(self, pod, namespace, kubeconfig):
cmd = 'kubectl get pods ' + pod + ' -n ' + namespace + ' -o json ' + kubeconfig
joined_logs = []
try:
out = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True).communicate()[0]

if out:
json_output = json.loads(out)
containers = json_output['spec']['containers']
joined_logs.append(self._get_container_logs(pod, namespace, containers, kubeconfig))

if 'initContainers' in json_output['spec']:
init_containers = json_output['spec']['initContainers']
joined_logs.append(self._get_container_logs(pod, namespace, init_containers, kubeconfig))

except Exception as e:
joined_logs.append(str(e))

return "\n".join(joined_logs)

if __name__ == '__main__':
crLogs = CRLogs()
kind = sys.argv[1]
instance = sys.argv[2]
kubeconfig = sys.argv[3]
resources = {}

joined_logs = []
pods = crLogs.get_pods_in_ns(kind, instance, kubeconfig)
for pod in pods:
pod_name = pod['Name']
pod_namespace = pod['Namespace']
joined_logs.append(crLogs.get_logs(pod_name, pod_namespace, kubeconfig))
joined_logs.append("---------------------------------------")

all_logs = "\n".join(joined_logs)
url = "http://localhost:8080/crailogs"
payload = {"logs": all_logs}

try:
response = requests.post(url, json=payload)
response.raise_for_status()
result = response.json()
if 'output' in result:
print(json.dumps(result['output'], indent=2))
except requests.exceptions.RequestException as e:
print(f"Error communicating with model service: {e}")
except ValueError:
print(f"Response was not valid JSON: {response.text}")
64 changes: 64 additions & 0 deletions plugins/kubectl-ailogs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash

source utils.sh

print_help () {
echo "NAME"
echo " kubectl ailogs"
echo ""
echo "SYNOPSIS"
echo " kubectl ailogs <Kind> <Instance> -k <Absolute path to kubeconfig>"
echo ""
echo "DESCRIPTION"
echo " kubectl ailogs summarizes the state of container logs for all the containers of all the Pods that are related to the app instance."
exit 0
}

if (( $# < 4 )); then
print_help
fi

kind=$1
instance=$2

kubeconfig1="$HOME/.kube/config" # Default value

shift;
shift;

while getopts ":k:" opt; do
case ${opt} in
k )
kubeconfig1=$OPTARG
if [ ! -f $kubeconfig1 ]; then
echo "Kubeconfig $kubeconfig1 does not exist."
exit 0
fi;;
? )
echo "Invalid option: ${1} " 1>&2
print_help
exit 0
;;
esac
done

kubeconfig="--kubeconfig="$kubeconfig1
if [ $# = 4 ] && [[ $4 == *"kubeconfig="* ]]; then
kubeconfig=$4
fi

canonicalKind=$(get_canonical_kind $kind)

if [[ $canonicalKind == *"Unknown"* ]]; then
echo "$canonicalKind"
exit 0
fi

kubeplusNamespace=`kubectl get pods -A $kubeconfig | grep kubeplus-deployment | awk '{print $1}'`
resStatus=`kubectl $kubeconfig get $kind $instance -n $kubeplusNamespace -o json 2>&1`
if [[ $resStatus =~ 'Error' ]]; then
echo $resStatus
exit 0
fi

python /$KUBEPLUS_HOME/plugins/crailogs.py $canonicalKind $instance $kubeconfig
Loading