|
| 1 | +use prometheus::{ |
| 2 | + Counter, Encoder, Gauge, GaugeVec, Histogram, HistogramOpts, Opts, Registry, TextEncoder, |
| 3 | +}; |
| 4 | +use std::sync::Arc; |
| 5 | +use tokio::sync::RwLock; |
| 6 | +use tracing::info; |
| 7 | + |
| 8 | +pub struct IgniteMetrics { |
| 9 | + registry: Registry, |
| 10 | + pub vms_running: Gauge, |
| 11 | + pub vms_total: Counter, |
| 12 | + pub vm_boot_duration: Histogram, |
| 13 | + pub vm_memory_usage: GaugeVec, |
| 14 | + pub vm_cpu_usage: GaugeVec, |
| 15 | + pub snapshot_count: GaugeVec, |
| 16 | +} |
| 17 | + |
| 18 | +impl IgniteMetrics { |
| 19 | + pub fn new() -> Result<Self, prometheus::Error> { |
| 20 | + let registry = Registry::new(); |
| 21 | + |
| 22 | + let vms_running = Gauge::with_opts(Opts::new( |
| 23 | + "ignite_vms_running", |
| 24 | + "Number of currently running VMs", |
| 25 | + ))?; |
| 26 | + |
| 27 | + let vms_total = |
| 28 | + Counter::with_opts(Opts::new("ignite_vms_total", "Total number of VMs created"))?; |
| 29 | + |
| 30 | + let vm_boot_duration = Histogram::with_opts(HistogramOpts::new( |
| 31 | + "ignite_vm_boot_duration_seconds", |
| 32 | + "VM boot duration in seconds", |
| 33 | + ))?; |
| 34 | + |
| 35 | + let vm_memory_usage = GaugeVec::new( |
| 36 | + Opts::new( |
| 37 | + "ignite_vm_memory_usage_bytes", |
| 38 | + "Memory usage per VM in bytes", |
| 39 | + ), |
| 40 | + &["vm_id"], |
| 41 | + )?; |
| 42 | + |
| 43 | + let vm_cpu_usage = GaugeVec::new( |
| 44 | + Opts::new("ignite_vm_cpu_usage_percent", "CPU usage percentage per VM"), |
| 45 | + &["vm_id"], |
| 46 | + )?; |
| 47 | + |
| 48 | + let snapshot_count = GaugeVec::new( |
| 49 | + Opts::new("ignite_snapshot_count", "Number of snapshots per VM"), |
| 50 | + &["vm_id"], |
| 51 | + )?; |
| 52 | + |
| 53 | + registry.register(Box::new(vms_running.clone()))?; |
| 54 | + registry.register(Box::new(vms_total.clone()))?; |
| 55 | + registry.register(Box::new(vm_boot_duration.clone()))?; |
| 56 | + registry.register(Box::new(vm_memory_usage.clone()))?; |
| 57 | + registry.register(Box::new(vm_cpu_usage.clone()))?; |
| 58 | + registry.register(Box::new(snapshot_count.clone()))?; |
| 59 | + |
| 60 | + Ok(Self { |
| 61 | + registry, |
| 62 | + vms_running, |
| 63 | + vms_total, |
| 64 | + vm_boot_duration, |
| 65 | + vm_memory_usage, |
| 66 | + vm_cpu_usage, |
| 67 | + snapshot_count, |
| 68 | + }) |
| 69 | + } |
| 70 | + |
| 71 | + pub fn register_vm(&self, vm_id: &str) { |
| 72 | + self.vms_running.inc(); |
| 73 | + self.vms_total.inc(); |
| 74 | + self.vm_memory_usage.with_label_values(&[vm_id]).set(0.0); |
| 75 | + self.vm_cpu_usage.with_label_values(&[vm_id]).set(0.0); |
| 76 | + self.snapshot_count.with_label_values(&[vm_id]).set(0.0); |
| 77 | + info!("Registered VM {} in metrics", vm_id); |
| 78 | + } |
| 79 | + |
| 80 | + pub fn unregister_vm(&self, vm_id: &str) { |
| 81 | + self.vms_running.dec(); |
| 82 | + let _ = self.vm_memory_usage.remove_label_values(&[vm_id]); |
| 83 | + let _ = self.vm_cpu_usage.remove_label_values(&[vm_id]); |
| 84 | + let _ = self.snapshot_count.remove_label_values(&[vm_id]); |
| 85 | + info!("Unregistered VM {} from metrics", vm_id); |
| 86 | + } |
| 87 | + |
| 88 | + pub fn set_memory_usage(&self, vm_id: &str, bytes: u64) { |
| 89 | + self.vm_memory_usage |
| 90 | + .with_label_values(&[vm_id]) |
| 91 | + .set(bytes as f64); |
| 92 | + } |
| 93 | + |
| 94 | + pub fn set_cpu_usage(&self, vm_id: &str, percent: f64) { |
| 95 | + self.vm_cpu_usage.with_label_values(&[vm_id]).set(percent); |
| 96 | + } |
| 97 | + |
| 98 | + pub fn record_boot_duration(&self, seconds: f64) { |
| 99 | + self.vm_boot_duration.observe(seconds); |
| 100 | + } |
| 101 | + |
| 102 | + pub fn increment_snapshot_count(&self, vm_id: &str) { |
| 103 | + self.snapshot_count.with_label_values(&[vm_id]).inc(); |
| 104 | + } |
| 105 | + |
| 106 | + pub fn gather(&self) -> Vec<u8> { |
| 107 | + let encoder = TextEncoder::new(); |
| 108 | + let metric_families = self.registry.gather(); |
| 109 | + let mut buffer = Vec::new(); |
| 110 | + encoder.encode(&metric_families, &mut buffer).unwrap(); |
| 111 | + buffer |
| 112 | + } |
| 113 | + |
| 114 | + pub fn registry(&self) -> &Registry { |
| 115 | + &self.registry |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +impl Default for IgniteMetrics { |
| 120 | + fn default() -> Self { |
| 121 | + Self::new().expect("Failed to create IgniteMetrics") |
| 122 | + } |
| 123 | +} |
| 124 | + |
| 125 | +pub type SharedMetrics = Arc<RwLock<IgniteMetrics>>; |
| 126 | + |
| 127 | +pub fn create_metrics() -> Result<SharedMetrics, prometheus::Error> { |
| 128 | + let metrics = IgniteMetrics::new()?; |
| 129 | + Ok(Arc::new(RwLock::new(metrics))) |
| 130 | +} |
| 131 | + |
| 132 | +#[cfg(test)] |
| 133 | +mod tests { |
| 134 | + use super::*; |
| 135 | + |
| 136 | + #[test] |
| 137 | + fn test_metrics_creation() { |
| 138 | + let metrics = IgniteMetrics::new().unwrap(); |
| 139 | + assert_eq!(metrics.vms_running.get(), 0.0); |
| 140 | + assert_eq!(metrics.vms_total.get(), 0.0); |
| 141 | + } |
| 142 | + |
| 143 | + #[test] |
| 144 | + fn test_register_vm() { |
| 145 | + let metrics = IgniteMetrics::new().unwrap(); |
| 146 | + metrics.register_vm("test-vm-1"); |
| 147 | + assert_eq!(metrics.vms_running.get(), 1.0); |
| 148 | + assert_eq!(metrics.vms_total.get(), 1.0); |
| 149 | + } |
| 150 | + |
| 151 | + #[test] |
| 152 | + fn test_unregister_vm() { |
| 153 | + let metrics = IgniteMetrics::new().unwrap(); |
| 154 | + metrics.register_vm("test-vm-1"); |
| 155 | + metrics.unregister_vm("test-vm-1"); |
| 156 | + assert_eq!(metrics.vms_running.get(), 0.0); |
| 157 | + } |
| 158 | + |
| 159 | + #[test] |
| 160 | + fn test_set_memory_usage() { |
| 161 | + let metrics = IgniteMetrics::new().unwrap(); |
| 162 | + metrics.register_vm("test-vm-1"); |
| 163 | + metrics.set_memory_usage("test-vm-1", 2048); |
| 164 | + let memory = metrics |
| 165 | + .vm_memory_usage |
| 166 | + .with_label_values(&["test-vm-1"]) |
| 167 | + .get(); |
| 168 | + assert_eq!(memory, 2048.0); |
| 169 | + } |
| 170 | + |
| 171 | + #[test] |
| 172 | + fn test_set_cpu_usage() { |
| 173 | + let metrics = IgniteMetrics::new().unwrap(); |
| 174 | + metrics.register_vm("test-vm-1"); |
| 175 | + metrics.set_cpu_usage("test-vm-1", 50.0); |
| 176 | + let cpu = metrics.vm_cpu_usage.with_label_values(&["test-vm-1"]).get(); |
| 177 | + assert_eq!(cpu, 50.0); |
| 178 | + } |
| 179 | + |
| 180 | + #[test] |
| 181 | + fn test_record_boot_duration() { |
| 182 | + let metrics = IgniteMetrics::new().unwrap(); |
| 183 | + metrics.record_boot_duration(1.5); |
| 184 | + metrics.record_boot_duration(2.0); |
| 185 | + } |
| 186 | + |
| 187 | + #[test] |
| 188 | + fn test_increment_snapshot_count() { |
| 189 | + let metrics = IgniteMetrics::new().unwrap(); |
| 190 | + metrics.register_vm("test-vm-1"); |
| 191 | + metrics.increment_snapshot_count("test-vm-1"); |
| 192 | + metrics.increment_snapshot_count("test-vm-1"); |
| 193 | + let count = metrics |
| 194 | + .snapshot_count |
| 195 | + .with_label_values(&["test-vm-1"]) |
| 196 | + .get(); |
| 197 | + assert_eq!(count, 2.0); |
| 198 | + } |
| 199 | + |
| 200 | + #[test] |
| 201 | + fn test_gather_metrics() { |
| 202 | + let metrics = IgniteMetrics::new().unwrap(); |
| 203 | + metrics.register_vm("test-vm-1"); |
| 204 | + let output = metrics.gather(); |
| 205 | + let text = String::from_utf8_lossy(&output); |
| 206 | + assert!(text.contains("ignite_vms_running")); |
| 207 | + assert!(text.contains("ignite_vms_total")); |
| 208 | + } |
| 209 | + |
| 210 | + #[test] |
| 211 | + fn test_create_shared_metrics() { |
| 212 | + let shared = create_metrics().unwrap(); |
| 213 | + let metrics = shared.blocking_read(); |
| 214 | + assert_eq!(metrics.vms_running.get(), 0.0); |
| 215 | + } |
| 216 | +} |
0 commit comments