package platform import ( "bufio" "encoding/json" "os" "os/exec" "sort" "strconv" "strings" "time" ) // LiveMetricSample is a single point-in-time snapshot of server metrics // collected for the web UI metrics page. type LiveMetricSample struct { Timestamp time.Time `json:"ts"` Fans []FanReading `json:"fans"` Temps []TempReading `json:"temps"` PowerW float64 `json:"power_w"` CPULoadPct float64 `json:"cpu_load_pct"` MemLoadPct float64 `json:"mem_load_pct"` GPUs []GPUMetricRow `json:"gpus"` } // TempReading is a named temperature sensor value. type TempReading struct { Name string `json:"name"` Group string `json:"group,omitempty"` Celsius float64 `json:"celsius"` } // SampleLiveMetrics collects a single metrics snapshot from all available // sources: GPU (via nvidia-smi), fans and temperatures (via ipmitool/sensors), // and system power (via ipmitool dcmi). Missing sources are silently skipped. func SampleLiveMetrics() LiveMetricSample { s := LiveMetricSample{Timestamp: time.Now().UTC()} // GPU metrics — try NVIDIA first, fall back to AMD if gpus, err := SampleGPUMetrics(nil); err == nil && len(gpus) > 0 { s.GPUs = gpus } else if amdGPUs, err := sampleAMDGPUMetrics(); err == nil && len(amdGPUs) > 0 { s.GPUs = amdGPUs } // Fan speeds — skipped silently if ipmitool unavailable fans, _ := sampleFanSpeeds() s.Fans = fans s.Temps = append(s.Temps, sampleLiveTemperatureReadings()...) if !hasTempGroup(s.Temps, "cpu") { if cpuTemp := sampleCPUMaxTemp(); cpuTemp > 0 { s.Temps = append(s.Temps, TempReading{Name: "CPU Max", Group: "cpu", Celsius: cpuTemp}) } } // System power — returns 0 if unavailable s.PowerW = sampleSystemPower() // CPU load — from /proc/stat s.CPULoadPct = sampleCPULoadPct() // Memory load — from /proc/meminfo s.MemLoadPct = sampleMemLoadPct() return s } // sampleCPULoadPct reads two /proc/stat snapshots 200ms apart and returns // the overall CPU utilisation percentage. var cpuStatPrev [2]uint64 // [total, idle] func sampleCPULoadPct() float64 { total, idle := readCPUStat() if total == 0 { return 0 } prevTotal, prevIdle := cpuStatPrev[0], cpuStatPrev[1] cpuStatPrev = [2]uint64{total, idle} if prevTotal == 0 { return 0 } dt := float64(total - prevTotal) di := float64(idle - prevIdle) if dt <= 0 { return 0 } pct := (1 - di/dt) * 100 if pct < 0 { return 0 } if pct > 100 { return 100 } return pct } func readCPUStat() (total, idle uint64) { f, err := os.Open("/proc/stat") if err != nil { return 0, 0 } defer f.Close() sc := bufio.NewScanner(f) for sc.Scan() { line := sc.Text() if !strings.HasPrefix(line, "cpu ") { continue } fields := strings.Fields(line)[1:] // skip "cpu" var vals [10]uint64 for i := 0; i < len(fields) && i < 10; i++ { vals[i], _ = strconv.ParseUint(fields[i], 10, 64) } // idle = idle + iowait idle = vals[3] + vals[4] for _, v := range vals { total += v } return total, idle } return 0, 0 } func sampleMemLoadPct() float64 { f, err := os.Open("/proc/meminfo") if err != nil { return 0 } defer f.Close() vals := map[string]uint64{} sc := bufio.NewScanner(f) for sc.Scan() { fields := strings.Fields(sc.Text()) if len(fields) >= 2 { v, _ := strconv.ParseUint(fields[1], 10, 64) vals[strings.TrimSuffix(fields[0], ":")] = v } } total := vals["MemTotal"] avail := vals["MemAvailable"] if total == 0 { return 0 } used := total - avail return float64(used) / float64(total) * 100 } func hasTempGroup(temps []TempReading, group string) bool { for _, t := range temps { if t.Group == group { return true } } return false } func sampleLiveTemperatureReadings() []TempReading { if temps := sampleLiveTempsViaSensorsJSON(); len(temps) > 0 { return temps } return sampleLiveTempsViaIPMI() } func sampleLiveTempsViaSensorsJSON() []TempReading { out, err := exec.Command("sensors", "-j").Output() if err != nil || len(out) == 0 { return nil } var doc map[string]map[string]any if err := json.Unmarshal(out, &doc); err != nil { return nil } chips := make([]string, 0, len(doc)) for chip := range doc { chips = append(chips, chip) } sort.Strings(chips) temps := make([]TempReading, 0, len(chips)) seen := map[string]struct{}{} for _, chip := range chips { features := doc[chip] featureNames := make([]string, 0, len(features)) for name := range features { featureNames = append(featureNames, name) } sort.Strings(featureNames) for _, name := range featureNames { if strings.EqualFold(name, "Adapter") { continue } feature, ok := features[name].(map[string]any) if !ok { continue } value, ok := firstTempInputValue(feature) if !ok || value <= 0 || value > 150 { continue } group := classifyLiveTempGroup(chip, name) if group == "gpu" { continue } label := strings.TrimSpace(name) if label == "" { continue } if group == "ambient" { label = compactAmbientTempName(chip, label) } key := group + "\x00" + label if _, ok := seen[key]; ok { continue } seen[key] = struct{}{} temps = append(temps, TempReading{Name: label, Group: group, Celsius: value}) } } return temps } func sampleLiveTempsViaIPMI() []TempReading { out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output() if err != nil || len(out) == 0 { return nil } var temps []TempReading seen := map[string]struct{}{} for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { parts := strings.Split(line, "|") if len(parts) < 3 { continue } name := strings.TrimSpace(parts[0]) if name == "" { continue } unit := strings.ToLower(strings.TrimSpace(parts[2])) if !strings.Contains(unit, "degrees") { continue } raw := strings.TrimSpace(parts[1]) if raw == "" || strings.EqualFold(raw, "na") { continue } value, err := strconv.ParseFloat(raw, 64) if err != nil || value <= 0 || value > 150 { continue } group := classifyLiveTempGroup("", name) if group == "gpu" { continue } label := name if group == "ambient" { label = compactAmbientTempName("", label) } key := group + "\x00" + label if _, ok := seen[key]; ok { continue } seen[key] = struct{}{} temps = append(temps, TempReading{Name: label, Group: group, Celsius: value}) } return temps } func firstTempInputValue(feature map[string]any) (float64, bool) { keys := make([]string, 0, len(feature)) for key := range feature { keys = append(keys, key) } sort.Strings(keys) for _, key := range keys { lower := strings.ToLower(key) if !strings.Contains(lower, "temp") || !strings.HasSuffix(lower, "_input") { continue } switch value := feature[key].(type) { case float64: return value, true case string: f, err := strconv.ParseFloat(value, 64) if err == nil { return f, true } } } return 0, false } func classifyLiveTempGroup(chip, name string) string { text := strings.ToLower(strings.TrimSpace(chip + " " + name)) switch { case strings.Contains(text, "gpu"), strings.Contains(text, "amdgpu"), strings.Contains(text, "nvidia"), strings.Contains(text, "adeon"): return "gpu" case strings.Contains(text, "coretemp"), strings.Contains(text, "k10temp"), strings.Contains(text, "zenpower"), strings.Contains(text, "package id"), strings.Contains(text, "x86_pkg_temp"), strings.Contains(text, "tctl"), strings.Contains(text, "tdie"), strings.Contains(text, "tccd"), strings.Contains(text, "cpu"), strings.Contains(text, "peci"): return "cpu" default: return "ambient" } } func compactAmbientTempName(chip, name string) string { chip = strings.TrimSpace(chip) name = strings.TrimSpace(name) if chip == "" || strings.EqualFold(chip, name) { return name } if strings.Contains(strings.ToLower(name), strings.ToLower(chip)) { return name } return chip + " / " + name }