diff --git a/audit/internal/app/app.go b/audit/internal/app/app.go index 28a85f9..56e1ff3 100644 --- a/audit/internal/app/app.go +++ b/audit/internal/app/app.go @@ -190,6 +190,7 @@ func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, erro } result := collector.Run(runtimeMode) applyLatestSATStatuses(&result.Hardware, DefaultSATBaseDir, a.StatusDB) + writePSUStatusesToDB(a.StatusDB, result.Hardware.PowerSupplies) if health, err := ReadRuntimeHealth(DefaultRuntimeJSONPath); err == nil { result.Runtime = &health } @@ -926,6 +927,41 @@ func bodyOr(body, fallback string) string { return body } +// writePSUStatusesToDB records PSU statuses collected during audit into the +// component-status DB so they are visible in the Hardware Summary card. +// PSU status is sourced from IPMI (ipmitool fru + sdr) during audit. +func writePSUStatusesToDB(db *ComponentStatusDB, psus []schema.HardwarePowerSupply) { + if db == nil || len(psus) == 0 { + return + } + const source = "audit:ipmi" + worstStatus := "OK" + for _, psu := range psus { + if psu.Status == nil { + continue + } + slot := "?" + if psu.Slot != nil { + slot = *psu.Slot + } + st := *psu.Status + detail := "" + if psu.ErrorDescription != nil { + detail = *psu.ErrorDescription + } + db.Record("psu:"+slot, source, st, detail) + switch st { + case "Critical": + worstStatus = "Critical" + case "Warning": + if worstStatus != "Critical" { + worstStatus = "Warning" + } + } + } + db.Record("psu:all", source, worstStatus, "") +} + func ReadRuntimeHealth(path string) (schema.RuntimeHealth, error) { raw, err := os.ReadFile(path) if err != nil { diff --git a/audit/internal/collector/nic_mellanox.go b/audit/internal/collector/nic_mellanox.go index 80e4f9c..3db3427 100644 --- a/audit/internal/collector/nic_mellanox.go +++ b/audit/internal/collector/nic_mellanox.go @@ -179,11 +179,3 @@ func commandOutputWithTimeout(timeout time.Duration, name string, args ...string defer cancel() return exec.CommandContext(ctx, name, args...).Output() } - -func interfaceHasCarrier(iface string) bool { - raw, err := readNetCarrierFile(iface) - if err != nil { - return false - } - return strings.TrimSpace(raw) == "1" -} diff --git a/audit/internal/collector/nic_telemetry.go b/audit/internal/collector/nic_telemetry.go index ee82199..33fa481 100644 --- a/audit/internal/collector/nic_telemetry.go +++ b/audit/internal/collector/nic_telemetry.go @@ -58,12 +58,10 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw } } - if interfaceHasCarrier(iface) { - if out, err := ethtoolModuleQuery(iface); err == nil { - if injectSFPDOMTelemetry(&devs[i], out) { - enriched++ - continue - } + if out, err := ethtoolModuleQuery(iface); err == nil { + if injectSFPDOMTelemetry(&devs[i], out) { + enriched++ + continue } } if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil { @@ -115,8 +113,38 @@ func injectSFPDOMTelemetry(dev *schema.HardwarePCIeDevice, raw string) bool { } key := strings.ToLower(strings.TrimSpace(trimmed[:idx])) val := strings.TrimSpace(trimmed[idx+1:]) + if val == "" || strings.EqualFold(val, "not supported") || strings.EqualFold(val, "unknown") { + continue + } switch { + case key == "identifier": + s := parseSFPIdentifier(val) + dev.SFPIdentifier = &s + t := true + dev.SFPPresent = &t + changed = true + case key == "connector": + s := parseSFPConnector(val) + dev.SFPConnector = &s + changed = true + case key == "vendor name": + s := strings.TrimSpace(val) + dev.SFPVendor = &s + changed = true + case key == "vendor pn": + s := strings.TrimSpace(val) + dev.SFPPartNumber = &s + changed = true + case key == "vendor sn": + s := strings.TrimSpace(val) + dev.SFPSerialNumber = &s + changed = true + case strings.Contains(key, "laser wavelength"): + if f, ok := firstFloat(val); ok { + dev.SFPWavelengthNM = &f + changed = true + } case strings.Contains(key, "module temperature"): if f, ok := firstFloat(val); ok { dev.SFPTemperatureC = &f @@ -147,12 +175,61 @@ func injectSFPDOMTelemetry(dev *schema.HardwarePCIeDevice, raw string) bool { return changed } +// parseSFPIdentifier extracts the human-readable transceiver type from the +// raw ethtool identifier line, e.g. "0x03 (SFP)" → "SFP". +func parseSFPIdentifier(val string) string { + if s := extractParens(val); s != "" { + return s + } + return val +} + +// parseSFPConnector extracts the connector type from the raw ethtool line, +// e.g. "0x07 (LC)" → "LC". +func parseSFPConnector(val string) string { + if s := extractParens(val); s != "" { + return s + } + return val +} + +var parenRe = regexp.MustCompile(`\(([^)]+)\)`) + +func extractParens(s string) string { + m := parenRe.FindStringSubmatch(s) + if len(m) < 2 { + return "" + } + return strings.TrimSpace(m[1]) +} + func parseSFPDOM(raw string) map[string]any { dev := schema.HardwarePCIeDevice{} if !injectSFPDOMTelemetry(&dev, raw) { return map[string]any{} } out := map[string]any{} + if dev.SFPPresent != nil { + out["sfp_present"] = *dev.SFPPresent + } + if dev.SFPIdentifier != nil { + out["sfp_identifier"] = *dev.SFPIdentifier + } + if dev.SFPConnector != nil { + out["sfp_connector"] = *dev.SFPConnector + } + if dev.SFPVendor != nil { + out["sfp_vendor"] = *dev.SFPVendor + } + if dev.SFPPartNumber != nil { + out["sfp_part_number"] = *dev.SFPPartNumber + } + if dev.SFPSerialNumber != nil { + out["sfp_serial_number"] = *dev.SFPSerialNumber + } + if dev.SFPWavelengthNM != nil { + out["sfp_wavelength_nm"] = *dev.SFPWavelengthNM + } if dev.SFPTemperatureC != nil { out["sfp_temperature_c"] = *dev.SFPTemperatureC } diff --git a/audit/internal/collector/nic_telemetry_test.go b/audit/internal/collector/nic_telemetry_test.go index 22f9a2e..0fb8148 100644 --- a/audit/internal/collector/nic_telemetry_test.go +++ b/audit/internal/collector/nic_telemetry_test.go @@ -122,10 +122,7 @@ func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T) readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil } readNetCarrierFile = func(string) (string, error) { return "0", nil } ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") } - ethtoolModuleQuery = func(string) (string, error) { - t.Fatal("ethtool -m should not be called without carrier") - return "", nil - } + ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("no module") } class := "EthernetController" bdf := "0000:18:00.0" diff --git a/audit/internal/schema/hardware.go b/audit/internal/schema/hardware.go index f644f3f..2e75732 100644 --- a/audit/internal/schema/hardware.go +++ b/audit/internal/schema/hardware.go @@ -183,6 +183,13 @@ type HardwarePCIeDevice struct { BatteryTemperatureC *float64 `json:"battery_temperature_c,omitempty"` BatteryVoltageV *float64 `json:"battery_voltage_v,omitempty"` BatteryReplaceRequired *bool `json:"battery_replace_required,omitempty"` + SFPPresent *bool `json:"sfp_present,omitempty"` + SFPIdentifier *string `json:"sfp_identifier,omitempty"` + SFPConnector *string `json:"sfp_connector,omitempty"` + SFPVendor *string `json:"sfp_vendor,omitempty"` + SFPPartNumber *string `json:"sfp_part_number,omitempty"` + SFPSerialNumber *string `json:"sfp_serial_number,omitempty"` + SFPWavelengthNM *float64 `json:"sfp_wavelength_nm,omitempty"` SFPTemperatureC *float64 `json:"sfp_temperature_c,omitempty"` SFPTXPowerDBM *float64 `json:"sfp_tx_power_dbm,omitempty"` SFPRXPowerDBM *float64 `json:"sfp_rx_power_dbm,omitempty"` diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go index f0858b6..0fc09e4 100644 --- a/audit/internal/webui/pages.go +++ b/audit/internal/webui/pages.go @@ -317,106 +317,271 @@ func renderHardwareSummaryCard(opts HandlerOptions) string { if err != nil { return `
Hardware Summary
` } - // Parse just enough fields for the summary banner - var snap struct { - Summary struct { - CPU struct{ Model string } - Memory struct{ TotalGB float64 } - Storage []struct{ Device, Model, Size string } - GPUs []struct{ Model string } - PSUs []struct{ Model string } - } - Network struct { - Interfaces []struct { - Name string - IPv4 []string - State string - } - } - } - // Try to extract top-level fields loosely - var raw map[string]json.RawMessage - if err := json.Unmarshal(data, &raw); err != nil { + var ingest schema.HardwareIngestRequest + if err := json.Unmarshal(data, &ingest); err != nil { return `
Hardware Summary
Parse error
` } - _ = snap + hw := ingest.Hardware - // Also load runtime-health for badges - type componentHealth struct { - FailCount int `json:"fail_count"` - WarnCount int `json:"warn_count"` + var records []app.ComponentStatusRecord + if db, err := app.OpenComponentStatusDB(filepath.Join(opts.ExportDir, "component-status.json")); err == nil { + records = db.All() } - type healthSummary struct { - CPU componentHealth `json:"cpu"` - Memory componentHealth `json:"memory"` - Storage componentHealth `json:"storage"` - GPU componentHealth `json:"gpu"` - PSU componentHealth `json:"psu"` - Network componentHealth `json:"network"` - } - var health struct { - HardwareHealth healthSummary `json:"hardware_health"` - } - if hdata, herr := loadSnapshot(filepath.Join(opts.ExportDir, "runtime-health.json")); herr == nil { - _ = json.Unmarshal(hdata, &health) - } - - badge := func(h componentHealth) string { - if h.FailCount > 0 { - return `FAIL` - } - if h.WarnCount > 0 { - return `WARN` - } - return `OK` - } - - // Extract readable strings from raw JSON - getString := func(key string) string { - v, ok := raw[key] - if !ok { - return "" - } - var s string - if err := json.Unmarshal(v, &s); err == nil { - return s - } - return "" - } - - cpuModel := getString("cpu_model") - memStr := getString("memory_summary") - gpuSummary := getString("gpu_summary") var b strings.Builder b.WriteString(`
Hardware Summary
`) b.WriteString(``) writeRow := func(label, value, badgeHTML string) { - b.WriteString(fmt.Sprintf(``, + b.WriteString(fmt.Sprintf(``, html.EscapeString(label), html.EscapeString(value), badgeHTML)) } - if cpuModel != "" { - writeRow("CPU", cpuModel, badge(health.HardwareHealth.CPU)) - } else { - writeRow("CPU", "—", badge(health.HardwareHealth.CPU)) + + cpuRow := aggregateComponentStatus("CPU", records, []string{"cpu:all"}, nil) + writeRow("CPU", hwDescribeCPU(hw), runtimeStatusBadge(cpuRow.Status)) + + memRow := aggregateComponentStatus("Memory", records, []string{"memory:all"}, []string{"memory:"}) + writeRow("Memory", hwDescribeMemory(hw), runtimeStatusBadge(memRow.Status)) + + storageRow := aggregateComponentStatus("Storage", records, []string{"storage:all"}, []string{"storage:"}) + writeRow("Storage", hwDescribeStorage(hw), runtimeStatusBadge(storageRow.Status)) + + gpuRow := aggregateComponentStatus("GPU", records, nil, []string{"pcie:gpu:"}) + writeRow("GPU", hwDescribeGPU(hw), runtimeStatusBadge(gpuRow.Status)) + + psuRow := aggregateComponentStatus("PSU", records, nil, []string{"psu:"}) + writeRow("PSU", hwDescribePSU(hw), runtimeStatusBadge(psuRow.Status)) + + if nicDesc := hwDescribeNIC(hw); nicDesc != "" { + writeRow("Network", nicDesc, "") } - if memStr != "" { - writeRow("Memory", memStr, badge(health.HardwareHealth.Memory)) - } else { - writeRow("Memory", "—", badge(health.HardwareHealth.Memory)) - } - if gpuSummary != "" { - writeRow("GPU", gpuSummary, badge(health.HardwareHealth.GPU)) - } else { - writeRow("GPU", "—", badge(health.HardwareHealth.GPU)) - } - writeRow("Storage", "—", badge(health.HardwareHealth.Storage)) - writeRow("PSU", "—", badge(health.HardwareHealth.PSU)) + b.WriteString(`
%s%s%s
%s%s%s
`) b.WriteString(`
`) return b.String() } +// hwDescribeCPU returns a human-readable CPU summary, e.g. "2× Intel Xeon Gold 6338". +func hwDescribeCPU(hw schema.HardwareSnapshot) string { + counts := map[string]int{} + order := []string{} + for _, cpu := range hw.CPUs { + model := "Unknown CPU" + if cpu.Model != nil && *cpu.Model != "" { + model = *cpu.Model + } + if counts[model] == 0 { + order = append(order, model) + } + counts[model]++ + } + if len(order) == 0 { + return "—" + } + parts := make([]string, 0, len(order)) + for _, m := range order { + if counts[m] > 1 { + parts = append(parts, fmt.Sprintf("%d× %s", counts[m], m)) + } else { + parts = append(parts, m) + } + } + return strings.Join(parts, ", ") +} + +// hwDescribeMemory returns a summary like "16× 32 GB DDR4". +func hwDescribeMemory(hw schema.HardwareSnapshot) string { + type key struct { + sizeMB int + typ string + } + counts := map[key]int{} + order := []key{} + for _, dimm := range hw.Memory { + if dimm.SizeMB == nil || *dimm.SizeMB == 0 { + continue + } + t := "" + if dimm.Type != nil { + t = *dimm.Type + } + k := key{*dimm.SizeMB, t} + if counts[k] == 0 { + order = append(order, k) + } + counts[k]++ + } + if len(order) == 0 { + return "—" + } + parts := make([]string, 0, len(order)) + for _, k := range order { + gb := k.sizeMB / 1024 + desc := fmt.Sprintf("%d× %d GB", counts[k], gb) + if k.typ != "" { + desc += " " + k.typ + } + parts = append(parts, desc) + } + return strings.Join(parts, ", ") +} + +// hwDescribeStorage returns a summary like "4× 3.84 TB NVMe, 2× 1.92 TB SATA". +func hwDescribeStorage(hw schema.HardwareSnapshot) string { + type key struct { + sizeGB int + iface string + } + counts := map[key]int{} + order := []key{} + for _, disk := range hw.Storage { + sz := 0 + if disk.SizeGB != nil { + sz = *disk.SizeGB + } + iface := "" + if disk.Interface != nil { + iface = *disk.Interface + } else if disk.Type != nil { + iface = *disk.Type + } + k := key{sz, iface} + if counts[k] == 0 { + order = append(order, k) + } + counts[k]++ + } + if len(order) == 0 { + return "—" + } + parts := make([]string, 0, len(order)) + for _, k := range order { + var sizeStr string + if k.sizeGB >= 1000 { + sizeStr = fmt.Sprintf("%.2g TB", float64(k.sizeGB)/1000) + } else if k.sizeGB > 0 { + sizeStr = fmt.Sprintf("%d GB", k.sizeGB) + } else { + sizeStr = "?" + } + desc := fmt.Sprintf("%d× %s", counts[k], sizeStr) + if k.iface != "" { + desc += " " + k.iface + } + parts = append(parts, desc) + } + return strings.Join(parts, ", ") +} + +// hwDescribeGPU returns a summary like "8× NVIDIA H100 80GB". +func hwDescribeGPU(hw schema.HardwareSnapshot) string { + counts := map[string]int{} + order := []string{} + for _, dev := range hw.PCIeDevices { + if dev.DeviceClass == nil { + continue + } + if !isGPUDeviceClass(*dev.DeviceClass) { + continue + } + model := "Unknown GPU" + if dev.Model != nil && *dev.Model != "" { + model = *dev.Model + } + if counts[model] == 0 { + order = append(order, model) + } + counts[model]++ + } + if len(order) == 0 { + return "—" + } + parts := make([]string, 0, len(order)) + for _, m := range order { + if counts[m] > 1 { + parts = append(parts, fmt.Sprintf("%d× %s", counts[m], m)) + } else { + parts = append(parts, m) + } + } + return strings.Join(parts, ", ") +} + +// hwDescribePSU returns a summary like "2× 1600 W" or "2× PSU". +func hwDescribePSU(hw schema.HardwareSnapshot) string { + n := len(hw.PowerSupplies) + if n == 0 { + return "—" + } + // Try to get a consistent wattage + watt := 0 + consistent := true + for _, psu := range hw.PowerSupplies { + if psu.WattageW == nil { + consistent = false + break + } + if watt == 0 { + watt = *psu.WattageW + } else if *psu.WattageW != watt { + consistent = false + break + } + } + if consistent && watt > 0 { + return fmt.Sprintf("%d× %d W", n, watt) + } + return fmt.Sprintf("%d× PSU", n) +} + +// hwDescribeNIC returns a summary like "2× Mellanox ConnectX-6". +func hwDescribeNIC(hw schema.HardwareSnapshot) string { + counts := map[string]int{} + order := []string{} + for _, dev := range hw.PCIeDevices { + isNIC := false + if dev.DeviceClass != nil { + c := strings.ToLower(strings.TrimSpace(*dev.DeviceClass)) + isNIC = c == "ethernetcontroller" || c == "networkcontroller" || strings.Contains(c, "fibrechannel") + } + if !isNIC && len(dev.MacAddresses) == 0 { + continue + } + model := "" + if dev.Model != nil && *dev.Model != "" { + model = *dev.Model + } else if dev.Manufacturer != nil && *dev.Manufacturer != "" { + model = *dev.Manufacturer + " NIC" + } else { + model = "NIC" + } + if counts[model] == 0 { + order = append(order, model) + } + counts[model]++ + } + if len(order) == 0 { + return "" + } + parts := make([]string, 0, len(order)) + for _, m := range order { + if counts[m] > 1 { + parts = append(parts, fmt.Sprintf("%d× %s", counts[m], m)) + } else { + parts = append(parts, m) + } + } + return strings.Join(parts, ", ") +} + +func isGPUDeviceClass(class string) bool { + switch strings.TrimSpace(class) { + case "VideoController", "DisplayController", "ProcessingAccelerator": + return true + default: + return false + } +} + func renderAuditModal() string { return `