Files
bee/audit/internal/collector/amdgpu.go
Michael Chus 7d2e904d14 Bring codebase into compliance with bible contracts (A–E)
A (hardware-ingest-json v2.8-2.9): remove sensor location fields from schema
and collector; tag HardwareMemory.Location as json:"-"; add PlatformConfig to
HardwareSnapshot.

B (no-hardcoded-vendors): consolidate PCI vendor IDs into collector/pci_vendors.go;
replace all vendor-name string checks in isGPUDevice, isNVIDIADevice, isMellanoxDevice,
isAMDGPUDevice, matchesGPUVendor (sat_overlay), and validateIsVendorGPU (page_validate)
with numeric vendor_id comparisons.

C (module-structure): split app/app.go (1413 lines) into app.go + app_format.go,
app_network.go, app_services.go, app_packs.go, app_install.go — no logic changes.

D (go-code-style): wrap bare return err in interfaceAdminState and
interfaceIPv4Addrs (platform/network.go) with fmt.Errorf context including
the interface name.

E (go-project-bible): add bible-local/architecture/data-model.md and
bible-local/architecture/api-surface.md.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-13 14:32:08 +03:00

252 lines
5.7 KiB
Go

package collector
import (
"encoding/csv"
"log/slog"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"bee/audit/internal/schema"
)
var (
amdSMIExecCommand = exec.Command
amdSMILookPath = exec.LookPath
amdSMIGlob = filepath.Glob
)
var amdSMIExecutableGlobs = []string{
"/opt/rocm/bin/rocm-smi",
"/opt/rocm-*/bin/rocm-smi",
"/usr/local/bin/rocm-smi",
}
type amdGPUInfo struct {
BDF string
Serial string
Product string
Firmware string
PowerW *float64
TempC *float64
}
func enrichPCIeWithAMD(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
if !hasAMDGPUDevices(devs) {
return devs
}
infoByBDF, err := queryAMDGPUs()
if err != nil {
slog.Info("amdgpu: enrichment skipped", "err", err)
return devs
}
enriched := 0
for i := range devs {
if !isAMDGPUDevice(devs[i]) || devs[i].BDF == nil {
continue
}
info, ok := infoByBDF[normalizePCIeBDF(*devs[i].BDF)]
if !ok {
continue
}
if strings.TrimSpace(info.Serial) != "" {
devs[i].SerialNumber = &info.Serial
}
if strings.TrimSpace(info.Firmware) != "" {
devs[i].Firmware = &info.Firmware
}
if strings.TrimSpace(info.Product) != "" && devs[i].Model == nil {
devs[i].Model = &info.Product
}
if info.PowerW != nil {
devs[i].PowerW = info.PowerW
}
if info.TempC != nil {
devs[i].TemperatureC = info.TempC
}
enriched++
}
if enriched > 0 {
slog.Info("amdgpu: enriched", "count", enriched)
}
return devs
}
func hasAMDGPUDevices(devs []schema.HardwarePCIeDevice) bool {
for _, dev := range devs {
if isAMDGPUDevice(dev) {
return true
}
}
return false
}
func isAMDGPUDevice(dev schema.HardwarePCIeDevice) bool {
if dev.DeviceClass == nil {
return false
}
return dev.VendorID != nil && *dev.VendorID == AMDVendorID && isGPUClass(strings.TrimSpace(*dev.DeviceClass))
}
func queryAMDGPUs() (map[string]amdGPUInfo, error) {
busByCard, err := queryAMDField("--showbus")
if err != nil {
return nil, err
}
infoByCard := map[string]amdGPUInfo{}
for card, bus := range busByCard {
bdf := normalizePCIeBDF(bus)
if bdf == "" {
continue
}
infoByCard[card] = amdGPUInfo{BDF: bdf}
}
if len(infoByCard) == 0 {
return map[string]amdGPUInfo{}, nil
}
mergeAMDField(infoByCard, "--showserial", func(info *amdGPUInfo, value string) { info.Serial = value })
mergeAMDField(infoByCard, "--showproductname", func(info *amdGPUInfo, value string) { info.Product = value })
mergeAMDField(infoByCard, "--showvbios", func(info *amdGPUInfo, value string) { info.Firmware = value })
mergeAMDNumericField(infoByCard, "--showpower", func(info *amdGPUInfo, value float64) { info.PowerW = &value })
mergeAMDNumericField(infoByCard, "--showtemp", func(info *amdGPUInfo, value float64) { info.TempC = &value })
result := make(map[string]amdGPUInfo, len(infoByCard))
for _, info := range infoByCard {
if info.BDF == "" {
continue
}
result[info.BDF] = info
}
return result, nil
}
func mergeAMDField(infoByCard map[string]amdGPUInfo, flag string, apply func(*amdGPUInfo, string)) {
values, err := queryAMDField(flag)
if err != nil {
return
}
for card, value := range values {
info, ok := infoByCard[card]
if !ok {
continue
}
value = strings.TrimSpace(value)
if value == "" {
continue
}
apply(&info, value)
infoByCard[card] = info
}
}
func mergeAMDNumericField(infoByCard map[string]amdGPUInfo, flag string, apply func(*amdGPUInfo, float64)) {
values, err := queryAMDNumericField(flag)
if err != nil {
return
}
for card, value := range values {
info, ok := infoByCard[card]
if !ok {
continue
}
apply(&info, value)
infoByCard[card] = info
}
}
func queryAMDField(flag string) (map[string]string, error) {
cmd, err := resolveAMDSMICmd(flag, "--csv")
if err != nil {
return nil, err
}
out, err := amdSMIExecCommand(cmd[0], cmd[1:]...).CombinedOutput()
if err != nil {
return nil, err
}
return parseROCmSingleValueCSV(string(out)), nil
}
func queryAMDNumericField(flag string) (map[string]float64, error) {
values, err := queryAMDField(flag)
if err != nil {
return nil, err
}
out := map[string]float64{}
for card, raw := range values {
if value, ok := firstFloat(raw); ok {
out[card] = value
}
}
return out, nil
}
func resolveAMDSMICmd(args ...string) ([]string, error) {
if path, err := amdSMILookPath("rocm-smi"); err == nil {
return append([]string{path}, args...), nil
}
for _, pattern := range amdSMIExecutableGlobs {
matches, err := amdSMIGlob(pattern)
if err != nil {
continue
}
sort.Strings(matches)
for _, match := range matches {
return append([]string{match}, args...), nil
}
}
return nil, exec.ErrNotFound
}
func parseROCmSingleValueCSV(raw string) map[string]string {
rows := map[string]string{}
reader := csv.NewReader(strings.NewReader(raw))
reader.FieldsPerRecord = -1
records, err := reader.ReadAll()
if err != nil {
return rows
}
for _, rec := range records {
if len(rec) < 2 {
continue
}
card := normalizeROCmCardKey(rec[0])
if card == "" {
continue
}
value := strings.TrimSpace(strings.Join(rec[1:], ","))
if value == "" || looksLikeCSVHeaderValue(value) {
continue
}
rows[card] = value
}
return rows
}
func normalizeROCmCardKey(raw string) string {
raw = strings.ToLower(strings.TrimSpace(raw))
raw = strings.Trim(raw, "\"")
if raw == "" {
return ""
}
if raw == "device" || raw == "gpu" || raw == "card" {
return ""
}
if strings.HasPrefix(raw, "card") {
return raw
}
if _, err := strconv.Atoi(raw); err == nil {
return "card" + raw
}
return ""
}
func looksLikeCSVHeaderValue(value string) bool {
value = strings.ToLower(strings.TrimSpace(value))
return strings.Contains(value, "product") ||
strings.Contains(value, "serial") ||
strings.Contains(value, "vbios") ||
strings.Contains(value, "bus")
}