package collector import ( "encoding/csv" "log/slog" "os/exec" "path/filepath" "sort" "strconv" "strings" "bee/audit/internal/schema" ) var ( amdSMIExecCommand = exec.Command amdSMILookPath = exec.LookPath amdSMIGlob = filepath.Glob ) var amdSMIExecutableGlobs = []string{ "/opt/rocm/bin/rocm-smi", "/opt/rocm-*/bin/rocm-smi", "/usr/local/bin/rocm-smi", } type amdGPUInfo struct { BDF string Serial string Product string Firmware string PowerW *float64 TempC *float64 } func enrichPCIeWithAMD(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice { if !hasAMDGPUDevices(devs) { return devs } infoByBDF, err := queryAMDGPUs() if err != nil { slog.Info("amdgpu: enrichment skipped", "err", err) return devs } enriched := 0 for i := range devs { if !isAMDGPUDevice(devs[i]) || devs[i].BDF == nil { continue } info, ok := infoByBDF[normalizePCIeBDF(*devs[i].BDF)] if !ok { continue } if strings.TrimSpace(info.Serial) != "" { devs[i].SerialNumber = &info.Serial } if strings.TrimSpace(info.Firmware) != "" { devs[i].Firmware = &info.Firmware } if strings.TrimSpace(info.Product) != "" && devs[i].Model == nil { devs[i].Model = &info.Product } if info.PowerW != nil { devs[i].PowerW = info.PowerW } if info.TempC != nil { devs[i].TemperatureC = info.TempC } enriched++ } if enriched > 0 { slog.Info("amdgpu: enriched", "count", enriched) } return devs } func hasAMDGPUDevices(devs []schema.HardwarePCIeDevice) bool { for _, dev := range devs { if isAMDGPUDevice(dev) { return true } } return false } func isAMDGPUDevice(dev schema.HardwarePCIeDevice) bool { if dev.Manufacturer == nil || dev.DeviceClass == nil { return false } manufacturer := strings.ToLower(strings.TrimSpace(*dev.Manufacturer)) return strings.Contains(manufacturer, "advanced micro devices") && isGPUClass(strings.TrimSpace(*dev.DeviceClass)) } func queryAMDGPUs() (map[string]amdGPUInfo, error) { busByCard, err := queryAMDField("--showbus") if err != nil { return nil, err } infoByCard := map[string]amdGPUInfo{} for card, bus := range busByCard { bdf := normalizePCIeBDF(bus) if bdf == "" { continue } infoByCard[card] = amdGPUInfo{BDF: bdf} } if len(infoByCard) == 0 { return map[string]amdGPUInfo{}, nil } mergeAMDField(infoByCard, "--showserial", func(info *amdGPUInfo, value string) { info.Serial = value }) mergeAMDField(infoByCard, "--showproductname", func(info *amdGPUInfo, value string) { info.Product = value }) mergeAMDField(infoByCard, "--showvbios", func(info *amdGPUInfo, value string) { info.Firmware = value }) mergeAMDNumericField(infoByCard, "--showpower", func(info *amdGPUInfo, value float64) { info.PowerW = &value }) mergeAMDNumericField(infoByCard, "--showtemp", func(info *amdGPUInfo, value float64) { info.TempC = &value }) result := make(map[string]amdGPUInfo, len(infoByCard)) for _, info := range infoByCard { if info.BDF == "" { continue } result[info.BDF] = info } return result, nil } func mergeAMDField(infoByCard map[string]amdGPUInfo, flag string, apply func(*amdGPUInfo, string)) { values, err := queryAMDField(flag) if err != nil { return } for card, value := range values { info, ok := infoByCard[card] if !ok { continue } value = strings.TrimSpace(value) if value == "" { continue } apply(&info, value) infoByCard[card] = info } } func mergeAMDNumericField(infoByCard map[string]amdGPUInfo, flag string, apply func(*amdGPUInfo, float64)) { values, err := queryAMDNumericField(flag) if err != nil { return } for card, value := range values { info, ok := infoByCard[card] if !ok { continue } apply(&info, value) infoByCard[card] = info } } func queryAMDField(flag string) (map[string]string, error) { cmd, err := resolveAMDSMICmd(flag, "--csv") if err != nil { return nil, err } out, err := amdSMIExecCommand(cmd[0], cmd[1:]...).CombinedOutput() if err != nil { return nil, err } return parseROCmSingleValueCSV(string(out)), nil } func queryAMDNumericField(flag string) (map[string]float64, error) { values, err := queryAMDField(flag) if err != nil { return nil, err } out := map[string]float64{} for card, raw := range values { if value, ok := firstFloat(raw); ok { out[card] = value } } return out, nil } func resolveAMDSMICmd(args ...string) ([]string, error) { if path, err := amdSMILookPath("rocm-smi"); err == nil { return append([]string{path}, args...), nil } for _, pattern := range amdSMIExecutableGlobs { matches, err := amdSMIGlob(pattern) if err != nil { continue } sort.Strings(matches) for _, match := range matches { return append([]string{match}, args...), nil } } return nil, exec.ErrNotFound } func parseROCmSingleValueCSV(raw string) map[string]string { rows := map[string]string{} reader := csv.NewReader(strings.NewReader(raw)) reader.FieldsPerRecord = -1 records, err := reader.ReadAll() if err != nil { return rows } for _, rec := range records { if len(rec) < 2 { continue } card := normalizeROCmCardKey(rec[0]) if card == "" { continue } value := strings.TrimSpace(strings.Join(rec[1:], ",")) if value == "" || looksLikeCSVHeaderValue(value) { continue } rows[card] = value } return rows } func normalizeROCmCardKey(raw string) string { raw = strings.ToLower(strings.TrimSpace(raw)) raw = strings.Trim(raw, "\"") if raw == "" { return "" } if raw == "device" || raw == "gpu" || raw == "card" { return "" } if strings.HasPrefix(raw, "card") { return raw } if _, err := strconv.Atoi(raw); err == nil { return "card" + raw } return "" } func looksLikeCSVHeaderValue(value string) bool { value = strings.ToLower(strings.TrimSpace(value)) return strings.Contains(value, "product") || strings.Contains(value, "serial") || strings.Contains(value, "vbios") || strings.Contains(value, "bus") }