zernel_ebpf/consumers/
gpu_mem.rs1use serde::Serialize;
4use tracing::warn;
5
6#[derive(Debug, Clone, Serialize)]
9#[repr(C)]
10pub struct GpuMemEvent {
11 pub pid: u32,
12 pub gpu_id: u32,
13 pub alloc_bytes: u64,
14 pub free_bytes: u64,
15 pub total_usage: u64,
16 pub timestamp_ns: u64,
17}
18
19pub struct GpuMemConsumer {
21 pub oom_threshold: f64,
23 pub gpu_total_bytes: u64,
25}
26
27impl GpuMemConsumer {
28 pub fn new(oom_threshold: f64, gpu_total_bytes: u64) -> Self {
29 Self {
30 oom_threshold,
31 gpu_total_bytes,
32 }
33 }
34
35 pub fn process_event(&self, raw: &[u8]) -> Option<GpuMemEvent> {
38 let event = deserialize_event::<GpuMemEvent>(raw)?;
39
40 if self.gpu_total_bytes > 0 {
42 let usage_frac = event.total_usage as f64 / self.gpu_total_bytes as f64;
43 if usage_frac > self.oom_threshold {
44 warn!(
45 pid = event.pid,
46 gpu_id = event.gpu_id,
47 usage_pct = usage_frac * 100.0,
48 "GPU memory usage exceeds OOM threshold"
49 );
50 }
51 }
52
53 Some(event)
54 }
55}
56
57fn deserialize_event<T: Clone>(raw: &[u8]) -> Option<T> {
60 if raw.len() < std::mem::size_of::<T>() {
61 return None;
62 }
63 let event = unsafe { &*(raw.as_ptr() as *const T) };
67 Some(event.clone())
68}
69
70#[cfg(test)]
71mod tests {
72 use super::*;
73
74 #[test]
75 fn deserialize_gpu_mem_event() {
76 let event = GpuMemEvent {
77 pid: 1234,
78 gpu_id: 0,
79 alloc_bytes: 1024 * 1024,
80 free_bytes: 0,
81 total_usage: 80 * 1024 * 1024 * 1024,
82 timestamp_ns: 999999,
83 };
84
85 let raw = unsafe {
87 std::slice::from_raw_parts(
88 &event as *const GpuMemEvent as *const u8,
89 std::mem::size_of::<GpuMemEvent>(),
90 )
91 };
92
93 let consumer = GpuMemConsumer::new(0.95, 80 * 1024 * 1024 * 1024);
94 let result = consumer.process_event(raw).unwrap();
95 assert_eq!(result.pid, 1234);
96 assert_eq!(result.alloc_bytes, 1024 * 1024);
97 }
98
99 #[test]
100 fn rejects_undersized_buffer() {
101 let consumer = GpuMemConsumer::new(0.95, 80 * 1024 * 1024 * 1024);
102 assert!(consumer.process_event(&[0u8; 4]).is_none());
103 }
104}