package platform import ( "bufio" "os" "strconv" "strings" "time" ) // LiveMetricSample is a single point-in-time snapshot of server metrics // collected for the web UI metrics page. type LiveMetricSample struct { Timestamp time.Time `json:"ts"` Fans []FanReading `json:"fans"` Temps []TempReading `json:"temps"` PowerW float64 `json:"power_w"` CPULoadPct float64 `json:"cpu_load_pct"` MemLoadPct float64 `json:"mem_load_pct"` GPUs []GPUMetricRow `json:"gpus"` } // TempReading is a named temperature sensor value. type TempReading struct { Name string `json:"name"` Celsius float64 `json:"celsius"` } // SampleLiveMetrics collects a single metrics snapshot from all available // sources: GPU (via nvidia-smi), fans and temperatures (via ipmitool/sensors), // and system power (via ipmitool dcmi). Missing sources are silently skipped. func SampleLiveMetrics() LiveMetricSample { s := LiveMetricSample{Timestamp: time.Now().UTC()} // GPU metrics — skipped silently if nvidia-smi unavailable gpus, _ := SampleGPUMetrics(nil) s.GPUs = gpus // Fan speeds — skipped silently if ipmitool unavailable fans, _ := sampleFanSpeeds() s.Fans = fans // CPU/system temperature — returns 0 if unavailable cpuTemp := sampleCPUMaxTemp() if cpuTemp > 0 { s.Temps = append(s.Temps, TempReading{Name: "CPU", Celsius: cpuTemp}) } // System power — returns 0 if unavailable s.PowerW = sampleSystemPower() // CPU load — from /proc/stat s.CPULoadPct = sampleCPULoadPct() // Memory load — from /proc/meminfo s.MemLoadPct = sampleMemLoadPct() return s } // sampleCPULoadPct reads two /proc/stat snapshots 200ms apart and returns // the overall CPU utilisation percentage. var cpuStatPrev [2]uint64 // [total, idle] func sampleCPULoadPct() float64 { total, idle := readCPUStat() if total == 0 { return 0 } prevTotal, prevIdle := cpuStatPrev[0], cpuStatPrev[1] cpuStatPrev = [2]uint64{total, idle} if prevTotal == 0 { return 0 } dt := float64(total - prevTotal) di := float64(idle - prevIdle) if dt <= 0 { return 0 } pct := (1 - di/dt) * 100 if pct < 0 { return 0 } if pct > 100 { return 100 } return pct } func readCPUStat() (total, idle uint64) { f, err := os.Open("/proc/stat") if err != nil { return 0, 0 } defer f.Close() sc := bufio.NewScanner(f) for sc.Scan() { line := sc.Text() if !strings.HasPrefix(line, "cpu ") { continue } fields := strings.Fields(line)[1:] // skip "cpu" var vals [10]uint64 for i := 0; i < len(fields) && i < 10; i++ { vals[i], _ = strconv.ParseUint(fields[i], 10, 64) } // idle = idle + iowait idle = vals[3] + vals[4] for _, v := range vals { total += v } return total, idle } return 0, 0 } func sampleMemLoadPct() float64 { f, err := os.Open("/proc/meminfo") if err != nil { return 0 } defer f.Close() vals := map[string]uint64{} sc := bufio.NewScanner(f) for sc.Scan() { fields := strings.Fields(sc.Text()) if len(fields) >= 2 { v, _ := strconv.ParseUint(fields[1], 10, 64) vals[strings.TrimSuffix(fields[0], ":")] = v } } total := vals["MemTotal"] avail := vals["MemAvailable"] if total == 0 { return 0 } used := total - avail return float64(used) / float64(total) * 100 }