nvidia: improve component mapping, firmware, statuses and check times
This commit is contained in:
@@ -11,6 +11,8 @@ import (
|
|||||||
"git.mchus.pro/mchus/logpile/internal/models"
|
"git.mchus.pro/mchus/logpile/internal/models"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var cpuMicrocodeFirmwareRegex = regexp.MustCompile(`(?i)^cpu\d+\s+microcode$`)
|
||||||
|
|
||||||
// ConvertToReanimator converts AnalysisResult to Reanimator export format
|
// ConvertToReanimator converts AnalysisResult to Reanimator export format
|
||||||
func ConvertToReanimator(result *models.AnalysisResult) (*ReanimatorExport, error) {
|
func ConvertToReanimator(result *models.AnalysisResult) (*ReanimatorExport, error) {
|
||||||
if result == nil {
|
if result == nil {
|
||||||
@@ -77,14 +79,39 @@ func convertFirmware(firmware []models.FirmwareInfo) []ReanimatorFirmware {
|
|||||||
|
|
||||||
result := make([]ReanimatorFirmware, 0, len(firmware))
|
result := make([]ReanimatorFirmware, 0, len(firmware))
|
||||||
for _, fw := range firmware {
|
for _, fw := range firmware {
|
||||||
|
if isDeviceBoundFirmwareName(fw.DeviceName) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
result = append(result, ReanimatorFirmware{
|
result = append(result, ReanimatorFirmware{
|
||||||
DeviceName: fw.DeviceName,
|
DeviceName: fw.DeviceName,
|
||||||
Version: fw.Version,
|
Version: fw.Version,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
if len(result) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isDeviceBoundFirmwareName(name string) bool {
|
||||||
|
n := strings.TrimSpace(strings.ToLower(name))
|
||||||
|
if n == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.HasPrefix(n, "gpu ") ||
|
||||||
|
strings.HasPrefix(n, "nvswitch ") ||
|
||||||
|
strings.HasPrefix(n, "nic ") ||
|
||||||
|
strings.HasPrefix(n, "hdd ") ||
|
||||||
|
strings.HasPrefix(n, "ssd ") ||
|
||||||
|
strings.HasPrefix(n, "nvme ") ||
|
||||||
|
strings.HasPrefix(n, "psu") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return cpuMicrocodeFirmwareRegex.MatchString(strings.TrimSpace(name))
|
||||||
|
}
|
||||||
|
|
||||||
// convertCPUs converts CPU information to Reanimator format
|
// convertCPUs converts CPU information to Reanimator format
|
||||||
func convertCPUs(cpus []models.CPU, collectedAt string) []ReanimatorCPU {
|
func convertCPUs(cpus []models.CPU, collectedAt string) []ReanimatorCPU {
|
||||||
if len(cpus) == 0 {
|
if len(cpus) == 0 {
|
||||||
@@ -229,6 +256,7 @@ func convertStorage(storage []models.Storage, collectedAt string) []ReanimatorSt
|
|||||||
func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []ReanimatorPCIe {
|
func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []ReanimatorPCIe {
|
||||||
result := make([]ReanimatorPCIe, 0)
|
result := make([]ReanimatorPCIe, 0)
|
||||||
gpuSlots := make(map[string]struct{}, len(hw.GPUs))
|
gpuSlots := make(map[string]struct{}, len(hw.GPUs))
|
||||||
|
nvswitchFirmwareBySlot := buildNVSwitchFirmwareBySlot(hw.Firmware)
|
||||||
for _, gpu := range hw.GPUs {
|
for _, gpu := range hw.GPUs {
|
||||||
slot := strings.ToLower(strings.TrimSpace(gpu.Slot))
|
slot := strings.ToLower(strings.TrimSpace(gpu.Slot))
|
||||||
if slot != "" {
|
if slot != "" {
|
||||||
@@ -254,6 +282,10 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
|
|||||||
}
|
}
|
||||||
|
|
||||||
status := normalizeStatus(pcie.Status, false)
|
status := normalizeStatus(pcie.Status, false)
|
||||||
|
firmware := ""
|
||||||
|
if isNVSwitchPCIeDevice(pcie) {
|
||||||
|
firmware = nvswitchFirmwareBySlot[normalizeNVSwitchSlotForLookup(pcie.Slot)]
|
||||||
|
}
|
||||||
meta := buildStatusMeta(
|
meta := buildStatusMeta(
|
||||||
status,
|
status,
|
||||||
pcie.StatusCheckedAt,
|
pcie.StatusCheckedAt,
|
||||||
@@ -277,7 +309,7 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
|
|||||||
MaxLinkWidth: pcie.MaxLinkWidth,
|
MaxLinkWidth: pcie.MaxLinkWidth,
|
||||||
MaxLinkSpeed: pcie.MaxLinkSpeed,
|
MaxLinkSpeed: pcie.MaxLinkSpeed,
|
||||||
SerialNumber: serialNumber,
|
SerialNumber: serialNumber,
|
||||||
Firmware: "", // PCIeDevice doesn't have firmware in models
|
Firmware: firmware,
|
||||||
Status: status,
|
Status: status,
|
||||||
StatusCheckedAt: meta.StatusCheckedAt,
|
StatusCheckedAt: meta.StatusCheckedAt,
|
||||||
StatusChangedAt: meta.StatusChangedAt,
|
StatusChangedAt: meta.StatusChangedAt,
|
||||||
@@ -373,6 +405,57 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isNVSwitchPCIeDevice(pcie models.PCIeDevice) bool {
|
||||||
|
deviceClass := strings.TrimSpace(pcie.DeviceClass)
|
||||||
|
if strings.EqualFold(deviceClass, "NVSwitch") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
slot := normalizeNVSwitchSlotForLookup(pcie.Slot)
|
||||||
|
return strings.HasPrefix(slot, "NVSWITCH")
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildNVSwitchFirmwareBySlot(firmware []models.FirmwareInfo) map[string]string {
|
||||||
|
result := make(map[string]string)
|
||||||
|
for _, fw := range firmware {
|
||||||
|
name := strings.TrimSpace(fw.DeviceName)
|
||||||
|
if !strings.HasPrefix(strings.ToUpper(name), "NVSWITCH ") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
rest := strings.TrimSpace(name[len("NVSwitch "):])
|
||||||
|
if rest == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
slot := rest
|
||||||
|
if idx := strings.Index(rest, " ("); idx > 0 {
|
||||||
|
slot = strings.TrimSpace(rest[:idx])
|
||||||
|
}
|
||||||
|
slot = normalizeNVSwitchSlotForLookup(slot)
|
||||||
|
if slot == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, exists := result[slot]; exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
version := strings.TrimSpace(fw.Version)
|
||||||
|
if version == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result[slot] = version
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeNVSwitchSlotForLookup(slot string) string {
|
||||||
|
normalized := strings.ToUpper(strings.TrimSpace(slot))
|
||||||
|
if strings.HasPrefix(normalized, "NVSWITCHNVSWITCH") {
|
||||||
|
return "NVSWITCH" + strings.TrimPrefix(normalized, "NVSWITCHNVSWITCH")
|
||||||
|
}
|
||||||
|
return normalized
|
||||||
|
}
|
||||||
|
|
||||||
func isDisplayClass(deviceClass string) bool {
|
func isDisplayClass(deviceClass string) bool {
|
||||||
class := strings.ToLower(strings.TrimSpace(deviceClass))
|
class := strings.ToLower(strings.TrimSpace(deviceClass))
|
||||||
return strings.Contains(class, "display") ||
|
return strings.Contains(class, "display") ||
|
||||||
|
|||||||
@@ -359,6 +359,12 @@ func TestConvertPCIeDevices(t *testing.T) {
|
|||||||
|
|
||||||
func TestConvertPCIeDevices_NVSwitchWithoutSerialRemainsEmpty(t *testing.T) {
|
func TestConvertPCIeDevices_NVSwitchWithoutSerialRemainsEmpty(t *testing.T) {
|
||||||
hw := &models.HardwareConfig{
|
hw := &models.HardwareConfig{
|
||||||
|
Firmware: []models.FirmwareInfo{
|
||||||
|
{
|
||||||
|
DeviceName: "NVSwitch NVSWITCH1 (965-25612-0002-000)",
|
||||||
|
Version: "96.10.6D.00.01",
|
||||||
|
},
|
||||||
|
},
|
||||||
PCIeDevices: []models.PCIeDevice{
|
PCIeDevices: []models.PCIeDevice{
|
||||||
{
|
{
|
||||||
Slot: "NVSWITCH1",
|
Slot: "NVSWITCH1",
|
||||||
@@ -378,6 +384,9 @@ func TestConvertPCIeDevices_NVSwitchWithoutSerialRemainsEmpty(t *testing.T) {
|
|||||||
if result[0].SerialNumber != "" {
|
if result[0].SerialNumber != "" {
|
||||||
t.Fatalf("expected empty NVSwitch serial, got %q", result[0].SerialNumber)
|
t.Fatalf("expected empty NVSwitch serial, got %q", result[0].SerialNumber)
|
||||||
}
|
}
|
||||||
|
if result[0].Firmware != "96.10.6D.00.01" {
|
||||||
|
t.Fatalf("expected NVSwitch firmware 96.10.6D.00.01, got %q", result[0].Firmware)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConvertPCIeDevices_SkipsDisplayControllerDuplicates(t *testing.T) {
|
func TestConvertPCIeDevices_SkipsDisplayControllerDuplicates(t *testing.T) {
|
||||||
@@ -646,3 +655,47 @@ func TestConvertToReanimator_DeduplicatesAllSections(t *testing.T) {
|
|||||||
t.Fatalf("expected single #GPU0 record, got %d", gpuCount)
|
t.Fatalf("expected single #GPU0 record, got %d", gpuCount)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertToReanimator_FirmwareExcludesDeviceBoundEntries(t *testing.T) {
|
||||||
|
input := &models.AnalysisResult{
|
||||||
|
Filename: "fw-filter-test.json",
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"},
|
||||||
|
Firmware: []models.FirmwareInfo{
|
||||||
|
{DeviceName: "BIOS", Version: "1.0.0"},
|
||||||
|
{DeviceName: "BMC", Version: "2.0.0"},
|
||||||
|
{DeviceName: "GPU GPUSXM1 (692-2G520-0280-501)", Version: "96.00.D0.00.03"},
|
||||||
|
{DeviceName: "NVSwitch NVSWITCH0 (965-25612-0002-000)", Version: "96.10.6D.00.01"},
|
||||||
|
{DeviceName: "NIC #CPU1_PCIE9 (MCX512A-ACAT)", Version: "28.38.1900"},
|
||||||
|
{DeviceName: "CPU0 Microcode", Version: "0x2b000643"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
out, err := ConvertToReanimator(input)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ConvertToReanimator() failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(out.Hardware.Firmware) != 2 {
|
||||||
|
t.Fatalf("expected only machine-level firmware entries, got %d", len(out.Hardware.Firmware))
|
||||||
|
}
|
||||||
|
|
||||||
|
got := map[string]string{}
|
||||||
|
for _, fw := range out.Hardware.Firmware {
|
||||||
|
got[fw.DeviceName] = fw.Version
|
||||||
|
}
|
||||||
|
|
||||||
|
if got["BIOS"] != "1.0.0" {
|
||||||
|
t.Fatalf("expected BIOS firmware to be kept")
|
||||||
|
}
|
||||||
|
if got["BMC"] != "2.0.0" {
|
||||||
|
t.Fatalf("expected BMC firmware to be kept")
|
||||||
|
}
|
||||||
|
if _, exists := got["GPU GPUSXM1 (692-2G520-0280-501)"]; exists {
|
||||||
|
t.Fatalf("expected GPU firmware to be excluded from hardware.firmware")
|
||||||
|
}
|
||||||
|
if _, exists := got["NVSwitch NVSWITCH0 (965-25612-0002-000)"]; exists {
|
||||||
|
t.Fatalf("expected NVSwitch firmware to be excluded from hardware.firmware")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ type HardwareConfig struct {
|
|||||||
// FirmwareInfo represents firmware version information
|
// FirmwareInfo represents firmware version information
|
||||||
type FirmwareInfo struct {
|
type FirmwareInfo struct {
|
||||||
DeviceName string `json:"device_name"`
|
DeviceName string `json:"device_name"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
Version string `json:"version"`
|
Version string `json:"version"`
|
||||||
BuildTime string `json:"build_time,omitempty"`
|
BuildTime string `json:"build_time,omitempty"`
|
||||||
}
|
}
|
||||||
@@ -105,6 +106,7 @@ type FirmwareInfo struct {
|
|||||||
type BoardInfo struct {
|
type BoardInfo struct {
|
||||||
Manufacturer string `json:"manufacturer,omitempty"`
|
Manufacturer string `json:"manufacturer,omitempty"`
|
||||||
ProductName string `json:"product_name,omitempty"`
|
ProductName string `json:"product_name,omitempty"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
SerialNumber string `json:"serial_number,omitempty"`
|
SerialNumber string `json:"serial_number,omitempty"`
|
||||||
PartNumber string `json:"part_number,omitempty"`
|
PartNumber string `json:"part_number,omitempty"`
|
||||||
Version string `json:"version,omitempty"`
|
Version string `json:"version,omitempty"`
|
||||||
@@ -115,6 +117,7 @@ type BoardInfo struct {
|
|||||||
type CPU struct {
|
type CPU struct {
|
||||||
Socket int `json:"socket"`
|
Socket int `json:"socket"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
Cores int `json:"cores"`
|
Cores int `json:"cores"`
|
||||||
Threads int `json:"threads"`
|
Threads int `json:"threads"`
|
||||||
FrequencyMHz int `json:"frequency_mhz"`
|
FrequencyMHz int `json:"frequency_mhz"`
|
||||||
@@ -138,6 +141,7 @@ type CPU struct {
|
|||||||
type MemoryDIMM struct {
|
type MemoryDIMM struct {
|
||||||
Slot string `json:"slot"`
|
Slot string `json:"slot"`
|
||||||
Location string `json:"location"`
|
Location string `json:"location"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
Present bool `json:"present"`
|
Present bool `json:"present"`
|
||||||
SizeMB int `json:"size_mb"`
|
SizeMB int `json:"size_mb"`
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
@@ -162,6 +166,7 @@ type Storage struct {
|
|||||||
Slot string `json:"slot"`
|
Slot string `json:"slot"`
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
SizeGB int `json:"size_gb"`
|
SizeGB int `json:"size_gb"`
|
||||||
SerialNumber string `json:"serial_number,omitempty"`
|
SerialNumber string `json:"serial_number,omitempty"`
|
||||||
Manufacturer string `json:"manufacturer,omitempty"`
|
Manufacturer string `json:"manufacturer,omitempty"`
|
||||||
@@ -182,6 +187,7 @@ type Storage struct {
|
|||||||
// PCIeDevice represents a PCIe device
|
// PCIeDevice represents a PCIe device
|
||||||
type PCIeDevice struct {
|
type PCIeDevice struct {
|
||||||
Slot string `json:"slot"`
|
Slot string `json:"slot"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
VendorID int `json:"vendor_id"`
|
VendorID int `json:"vendor_id"`
|
||||||
DeviceID int `json:"device_id"`
|
DeviceID int `json:"device_id"`
|
||||||
BDF string `json:"bdf"`
|
BDF string `json:"bdf"`
|
||||||
@@ -207,6 +213,7 @@ type PCIeDevice struct {
|
|||||||
type NIC struct {
|
type NIC struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
MACAddress string `json:"mac_address"`
|
MACAddress string `json:"mac_address"`
|
||||||
SpeedMbps int `json:"speed_mbps,omitempty"`
|
SpeedMbps int `json:"speed_mbps,omitempty"`
|
||||||
SerialNumber string `json:"serial_number,omitempty"`
|
SerialNumber string `json:"serial_number,omitempty"`
|
||||||
@@ -217,6 +224,7 @@ type PSU struct {
|
|||||||
Slot string `json:"slot"`
|
Slot string `json:"slot"`
|
||||||
Present bool `json:"present"`
|
Present bool `json:"present"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
Vendor string `json:"vendor,omitempty"`
|
Vendor string `json:"vendor,omitempty"`
|
||||||
WattageW int `json:"wattage_w,omitempty"`
|
WattageW int `json:"wattage_w,omitempty"`
|
||||||
SerialNumber string `json:"serial_number,omitempty"`
|
SerialNumber string `json:"serial_number,omitempty"`
|
||||||
@@ -242,6 +250,7 @@ type GPU struct {
|
|||||||
Slot string `json:"slot"`
|
Slot string `json:"slot"`
|
||||||
Location string `json:"location,omitempty"`
|
Location string `json:"location,omitempty"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
Manufacturer string `json:"manufacturer,omitempty"`
|
Manufacturer string `json:"manufacturer,omitempty"`
|
||||||
VendorID int `json:"vendor_id,omitempty"`
|
VendorID int `json:"vendor_id,omitempty"`
|
||||||
DeviceID int `json:"device_id,omitempty"`
|
DeviceID int `json:"device_id,omitempty"`
|
||||||
@@ -280,6 +289,7 @@ type NetworkAdapter struct {
|
|||||||
Location string `json:"location"`
|
Location string `json:"location"`
|
||||||
Present bool `json:"present"`
|
Present bool `json:"present"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
Vendor string `json:"vendor,omitempty"`
|
Vendor string `json:"vendor,omitempty"`
|
||||||
VendorID int `json:"vendor_id,omitempty"`
|
VendorID int `json:"vendor_id,omitempty"`
|
||||||
DeviceID int `json:"device_id,omitempty"`
|
DeviceID int `json:"device_id,omitempty"`
|
||||||
|
|||||||
274
internal/parser/vendors/nvidia/component_status_time.go
vendored
Normal file
274
internal/parser/vendors/nvidia/component_status_time.go
vendored
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
package nvidia
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.mchus.pro/mchus/logpile/internal/models"
|
||||||
|
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
var verboseRunTestingLineRegex = regexp.MustCompile(`^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+\s+-\s+Testing\s+([a-zA-Z0-9_]+)\s*$`)
|
||||||
|
var runLogStartTimeRegex = regexp.MustCompile(`^Start time\s+([A-Za-z]{3}, \d{2} [A-Za-z]{3} \d{4} \d{2}:\d{2}:\d{2})\s*$`)
|
||||||
|
var runLogTestDurationRegex = regexp.MustCompile(`^Testing\s+([a-zA-Z0-9_]+)\s+\S+\s+\[\s*([0-9]+):([0-9]{2})s\s*\]\s*$`)
|
||||||
|
var modsStartLineRegex = regexp.MustCompile(`(?m)^MODS start:\s+([A-Za-z]{3}\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+\d{4})\s*$`)
|
||||||
|
var gpuFieldiagOutputPathRegex = regexp.MustCompile(`(?i)gpu_fieldiag[\\/]+sxm(\d+)_sn_([^\\/]+)[\\/]+output\.log$`)
|
||||||
|
var nvswitchDevnameRegex = regexp.MustCompile(`devname=[^,\s]+,(NVSWITCH\d+)`)
|
||||||
|
|
||||||
|
type componentCheckTimes struct {
|
||||||
|
GPUDefault time.Time
|
||||||
|
NVSwitchDefault time.Time
|
||||||
|
GPUBySerial map[string]time.Time // key: GPU serial
|
||||||
|
GPUBySlot map[string]time.Time // key: GPUSXM<idx>
|
||||||
|
NVSwitchBySlot map[string]time.Time // key: NVSWITCH<idx>
|
||||||
|
}
|
||||||
|
|
||||||
|
// CollectGPUAndNVSwitchCheckTimes extracts GPU/NVSwitch check timestamps from NVIDIA logs.
|
||||||
|
// Priority:
|
||||||
|
// 1) verbose_run.log "Testing <test>" timestamps
|
||||||
|
// 2) run.log start time + cumulative durations
|
||||||
|
func CollectGPUAndNVSwitchCheckTimes(files []parser.ExtractedFile) componentCheckTimes {
|
||||||
|
gpuBySerial := make(map[string]time.Time)
|
||||||
|
gpuBySlot := make(map[string]time.Time)
|
||||||
|
nvsBySlot := make(map[string]time.Time)
|
||||||
|
|
||||||
|
for _, f := range files {
|
||||||
|
path := strings.TrimSpace(f.Path)
|
||||||
|
pathLower := strings.ToLower(path)
|
||||||
|
|
||||||
|
// Per-GPU timestamp from gpu_fieldiag/<SXMx_SN_serial>/output.log
|
||||||
|
if strings.HasSuffix(pathLower, "output.log") && strings.Contains(pathLower, "gpu_fieldiag/") {
|
||||||
|
ts := parseModsStartTime(f.Content)
|
||||||
|
if ts.IsZero() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
matches := gpuFieldiagOutputPathRegex.FindStringSubmatch(path)
|
||||||
|
if len(matches) == 3 {
|
||||||
|
slot := "GPUSXM" + strings.TrimSpace(matches[1])
|
||||||
|
serial := strings.TrimSpace(matches[2])
|
||||||
|
if slot != "" {
|
||||||
|
gpuBySlot[slot] = ts
|
||||||
|
}
|
||||||
|
if serial != "" {
|
||||||
|
gpuBySerial[serial] = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-NVSwitch timestamp and slot list from nvswitch/output.log
|
||||||
|
if strings.HasSuffix(pathLower, "nvswitch/output.log") || strings.HasSuffix(pathLower, "nvswitch\\output.log") {
|
||||||
|
ts := parseModsStartTime(f.Content)
|
||||||
|
if ts.IsZero() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, slot := range parseNVSwitchSlotsFromOutput(f.Content) {
|
||||||
|
nvsBySlot[slot] = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
testStarts := make(map[string]time.Time)
|
||||||
|
|
||||||
|
if f := parser.FindFileByName(files, "verbose_run.log"); f != nil {
|
||||||
|
for testName, ts := range parseVerboseRunTestStartTimes(f.Content) {
|
||||||
|
testStarts[strings.ToLower(strings.TrimSpace(testName))] = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(testStarts) == 0 {
|
||||||
|
if f := parser.FindFileByName(files, "run.log"); f != nil {
|
||||||
|
for testName, ts := range parseRunLogTestStartTimes(f.Content) {
|
||||||
|
testStarts[strings.ToLower(strings.TrimSpace(testName))] = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return componentCheckTimes{
|
||||||
|
GPUDefault: pickFirstTestTime(testStarts, "gpu_fieldiag", "gpumem", "gpustress", "pcie", "inventory"),
|
||||||
|
NVSwitchDefault: pickFirstTestTime(testStarts, "nvswitch", "inventory"),
|
||||||
|
GPUBySerial: gpuBySerial,
|
||||||
|
GPUBySlot: gpuBySlot,
|
||||||
|
NVSwitchBySlot: nvsBySlot,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func pickFirstTestTime(testStarts map[string]time.Time, names ...string) time.Time {
|
||||||
|
for _, name := range names {
|
||||||
|
if ts := testStarts[strings.ToLower(strings.TrimSpace(name))]; !ts.IsZero() {
|
||||||
|
return ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return time.Time{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseVerboseRunTestStartTimes(content []byte) map[string]time.Time {
|
||||||
|
result := make(map[string]time.Time)
|
||||||
|
lines := strings.Split(string(content), "\n")
|
||||||
|
for _, line := range lines {
|
||||||
|
matches := verboseRunTestingLineRegex.FindStringSubmatch(strings.TrimSpace(line))
|
||||||
|
if len(matches) != 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := time.ParseInLocation("2006-01-02 15:04:05", strings.TrimSpace(matches[1]), time.UTC)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
testName := strings.ToLower(strings.TrimSpace(matches[2]))
|
||||||
|
if testName == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := result[testName]; !exists {
|
||||||
|
result[testName] = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseRunLogTestStartTimes(content []byte) map[string]time.Time {
|
||||||
|
lines := strings.Split(string(content), "\n")
|
||||||
|
start := time.Time{}
|
||||||
|
for _, line := range lines {
|
||||||
|
matches := runLogStartTimeRegex.FindStringSubmatch(strings.TrimSpace(line))
|
||||||
|
if len(matches) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parsed, err := time.ParseInLocation("Mon, 02 Jan 2006 15:04:05", strings.TrimSpace(matches[1]), time.UTC)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
start = parsed
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if start.IsZero() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make(map[string]time.Time)
|
||||||
|
cursor := start
|
||||||
|
for _, line := range lines {
|
||||||
|
matches := runLogTestDurationRegex.FindStringSubmatch(strings.TrimSpace(line))
|
||||||
|
if len(matches) != 4 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
testName := strings.ToLower(strings.TrimSpace(matches[1]))
|
||||||
|
minutes, errMin := strconv.Atoi(strings.TrimSpace(matches[2]))
|
||||||
|
seconds, errSec := strconv.Atoi(strings.TrimSpace(matches[3]))
|
||||||
|
if errMin != nil || errSec != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := result[testName]; !exists {
|
||||||
|
result[testName] = cursor
|
||||||
|
}
|
||||||
|
cursor = cursor.Add(time.Duration(minutes)*time.Minute + time.Duration(seconds)*time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseModsStartTime(content []byte) time.Time {
|
||||||
|
matches := modsStartLineRegex.FindSubmatch(content)
|
||||||
|
if len(matches) != 2 {
|
||||||
|
return time.Time{}
|
||||||
|
}
|
||||||
|
tsRaw := strings.TrimSpace(string(matches[1]))
|
||||||
|
if tsRaw == "" {
|
||||||
|
return time.Time{}
|
||||||
|
}
|
||||||
|
ts, err := time.ParseInLocation("Mon Jan 2 15:04:05 2006", tsRaw, time.UTC)
|
||||||
|
if err != nil {
|
||||||
|
return time.Time{}
|
||||||
|
}
|
||||||
|
return ts
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseNVSwitchSlotsFromOutput(content []byte) []string {
|
||||||
|
matches := nvswitchDevnameRegex.FindAllSubmatch(content, -1)
|
||||||
|
if len(matches) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
out := make([]string, 0, len(matches))
|
||||||
|
for _, m := range matches {
|
||||||
|
if len(m) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
slot := strings.ToUpper(strings.TrimSpace(string(m[1])))
|
||||||
|
if slot == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := seen[slot]; exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[slot] = struct{}{}
|
||||||
|
out = append(out, slot)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApplyGPUAndNVSwitchCheckTimes writes parsed check timestamps to component status metadata.
|
||||||
|
func ApplyGPUAndNVSwitchCheckTimes(result *models.AnalysisResult, times componentCheckTimes) {
|
||||||
|
if result == nil || result.Hardware == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range result.Hardware.GPUs {
|
||||||
|
gpu := &result.Hardware.GPUs[i]
|
||||||
|
ts := time.Time{}
|
||||||
|
if serial := strings.TrimSpace(gpu.SerialNumber); serial != "" {
|
||||||
|
ts = times.GPUBySerial[serial]
|
||||||
|
}
|
||||||
|
if ts.IsZero() {
|
||||||
|
ts = times.GPUBySlot[strings.ToUpper(strings.TrimSpace(gpu.Slot))]
|
||||||
|
}
|
||||||
|
if ts.IsZero() {
|
||||||
|
ts = times.GPUDefault
|
||||||
|
}
|
||||||
|
if ts.IsZero() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
gpu.StatusCheckedAt = ts
|
||||||
|
status := strings.TrimSpace(gpu.Status)
|
||||||
|
if status == "" {
|
||||||
|
status = "Unknown"
|
||||||
|
}
|
||||||
|
gpu.StatusAtCollect = &models.StatusAtCollection{
|
||||||
|
Status: status,
|
||||||
|
At: ts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range result.Hardware.PCIeDevices {
|
||||||
|
dev := &result.Hardware.PCIeDevices[i]
|
||||||
|
slot := normalizeNVSwitchSlot(strings.TrimSpace(dev.Slot))
|
||||||
|
if slot == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
slot = strings.ToUpper(slot)
|
||||||
|
if !strings.EqualFold(strings.TrimSpace(dev.DeviceClass), "NVSwitch") &&
|
||||||
|
!strings.HasPrefix(slot, "NVSWITCH") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ts := times.NVSwitchBySlot[slot]
|
||||||
|
if ts.IsZero() {
|
||||||
|
ts = times.NVSwitchDefault
|
||||||
|
}
|
||||||
|
if ts.IsZero() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
dev.StatusCheckedAt = ts
|
||||||
|
status := strings.TrimSpace(dev.Status)
|
||||||
|
if status == "" {
|
||||||
|
status = "Unknown"
|
||||||
|
}
|
||||||
|
dev.StatusAtCollect = &models.StatusAtCollection{
|
||||||
|
Status: status,
|
||||||
|
At: ts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
143
internal/parser/vendors/nvidia/component_status_time_test.go
vendored
Normal file
143
internal/parser/vendors/nvidia/component_status_time_test.go
vendored
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
package nvidia
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.mchus.pro/mchus/logpile/internal/models"
|
||||||
|
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseVerboseRunTestStartTimes(t *testing.T) {
|
||||||
|
content := []byte(`
|
||||||
|
2026-01-22 09:11:32,458 - Testing nvswitch
|
||||||
|
2026-01-22 09:45:36,016 - Testing gpu_fieldiag
|
||||||
|
`)
|
||||||
|
got := parseVerboseRunTestStartTimes(content)
|
||||||
|
|
||||||
|
nvs := got["nvswitch"]
|
||||||
|
if nvs.IsZero() {
|
||||||
|
t.Fatalf("expected nvswitch timestamp")
|
||||||
|
}
|
||||||
|
gpu := got["gpu_fieldiag"]
|
||||||
|
if gpu.IsZero() {
|
||||||
|
t.Fatalf("expected gpu_fieldiag timestamp")
|
||||||
|
}
|
||||||
|
if nvs.Format(time.RFC3339) != "2026-01-22T09:11:32Z" {
|
||||||
|
t.Fatalf("unexpected nvswitch timestamp: %s", nvs.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if gpu.Format(time.RFC3339) != "2026-01-22T09:45:36Z" {
|
||||||
|
t.Fatalf("unexpected gpu_fieldiag timestamp: %s", gpu.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRunLogTestStartTimes(t *testing.T) {
|
||||||
|
content := []byte(`
|
||||||
|
Start time Thu, 22 Jan 2026 07:42:26
|
||||||
|
Testing gpumem FAILED [ 26:12s ]
|
||||||
|
Testing gpustress OK [ 7:10s ]
|
||||||
|
Testing nvswitch OK [ 9:25s ]
|
||||||
|
`)
|
||||||
|
|
||||||
|
got := parseRunLogTestStartTimes(content)
|
||||||
|
if got["gpumem"].Format(time.RFC3339) != "2026-01-22T07:42:26Z" {
|
||||||
|
t.Fatalf("unexpected gpumem start: %s", got["gpumem"].Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if got["gpustress"].Format(time.RFC3339) != "2026-01-22T08:08:38Z" {
|
||||||
|
t.Fatalf("unexpected gpustress start: %s", got["gpustress"].Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if got["nvswitch"].Format(time.RFC3339) != "2026-01-22T08:15:48Z" {
|
||||||
|
t.Fatalf("unexpected nvswitch start: %s", got["nvswitch"].Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGPUAndNVSwitchCheckTimes(t *testing.T) {
|
||||||
|
gpuTs := time.Date(2026, 1, 22, 9, 45, 36, 0, time.UTC)
|
||||||
|
nvsTs := time.Date(2026, 1, 22, 9, 11, 32, 0, time.UTC)
|
||||||
|
|
||||||
|
result := &models.AnalysisResult{
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{Slot: "GPUSXM5", Status: "FAIL"},
|
||||||
|
},
|
||||||
|
PCIeDevices: []models.PCIeDevice{
|
||||||
|
{Slot: "NVSWITCH0", DeviceClass: "NVSwitch", Status: "PASS"},
|
||||||
|
{Slot: "NIC0", DeviceClass: "NetworkController", Status: "PASS"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ApplyGPUAndNVSwitchCheckTimes(result, componentCheckTimes{
|
||||||
|
GPUBySlot: map[string]time.Time{"GPUSXM5": gpuTs},
|
||||||
|
NVSwitchBySlot: map[string]time.Time{"NVSWITCH0": nvsTs},
|
||||||
|
})
|
||||||
|
|
||||||
|
if got := result.Hardware.GPUs[0].StatusCheckedAt; !got.Equal(gpuTs) {
|
||||||
|
t.Fatalf("expected gpu status_checked_at %s, got %s", gpuTs.Format(time.RFC3339), got.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if result.Hardware.GPUs[0].StatusAtCollect == nil || !result.Hardware.GPUs[0].StatusAtCollect.At.Equal(gpuTs) {
|
||||||
|
t.Fatalf("expected gpu status_at_collection.at %s", gpuTs.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if got := result.Hardware.PCIeDevices[0].StatusCheckedAt; !got.Equal(nvsTs) {
|
||||||
|
t.Fatalf("expected nvswitch status_checked_at %s, got %s", nvsTs.Format(time.RFC3339), got.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if result.Hardware.PCIeDevices[0].StatusAtCollect == nil || !result.Hardware.PCIeDevices[0].StatusAtCollect.At.Equal(nvsTs) {
|
||||||
|
t.Fatalf("expected nvswitch status_at_collection.at %s", nvsTs.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if !result.Hardware.PCIeDevices[1].StatusCheckedAt.IsZero() {
|
||||||
|
t.Fatalf("expected non-nvswitch device status_checked_at to stay zero")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCollectGPUAndNVSwitchCheckTimes_FromVerboseRun(t *testing.T) {
|
||||||
|
files := []parser.ExtractedFile{
|
||||||
|
{
|
||||||
|
Path: "verbose_run.log",
|
||||||
|
Content: []byte(`
|
||||||
|
2026-01-22 09:11:32,458 - Testing nvswitch
|
||||||
|
2026-01-22 09:45:36,016 - Testing gpu_fieldiag
|
||||||
|
`),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := CollectGPUAndNVSwitchCheckTimes(files)
|
||||||
|
if got.GPUDefault.Format(time.RFC3339) != "2026-01-22T09:45:36Z" {
|
||||||
|
t.Fatalf("unexpected GPU check time: %s", got.GPUDefault.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if got.NVSwitchDefault.Format(time.RFC3339) != "2026-01-22T09:11:32Z" {
|
||||||
|
t.Fatalf("unexpected NVSwitch check time: %s", got.NVSwitchDefault.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCollectGPUAndNVSwitchCheckTimes_FromComponentOutputLogs(t *testing.T) {
|
||||||
|
files := []parser.ExtractedFile{
|
||||||
|
{
|
||||||
|
Path: "gpu_fieldiag/SXM5_SN_1653925025497/output.log",
|
||||||
|
Content: []byte(`
|
||||||
|
$ some command
|
||||||
|
MODS start: Thu Jan 22 09:45:36 2026
|
||||||
|
`),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "nvswitch/output.log",
|
||||||
|
Content: []byte(`
|
||||||
|
$ cmd devname=0000:08:00.0,NVSWITCH3 devname=0000:07:00.0,NVSWITCH2 devname=0000:06:00.0,NVSWITCH1 devname=0000:05:00.0,NVSWITCH0
|
||||||
|
MODS start: Thu Jan 22 09:11:32 2026
|
||||||
|
`),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := CollectGPUAndNVSwitchCheckTimes(files)
|
||||||
|
if got.GPUBySerial["1653925025497"].Format(time.RFC3339) != "2026-01-22T09:45:36Z" {
|
||||||
|
t.Fatalf("unexpected GPU serial check time: %s", got.GPUBySerial["1653925025497"].Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if got.GPUBySlot["GPUSXM5"].Format(time.RFC3339) != "2026-01-22T09:45:36Z" {
|
||||||
|
t.Fatalf("unexpected GPU slot check time: %s", got.GPUBySlot["GPUSXM5"].Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if got.NVSwitchBySlot["NVSWITCH0"].Format(time.RFC3339) != "2026-01-22T09:11:32Z" {
|
||||||
|
t.Fatalf("unexpected NVSwitch0 check time: %s", got.NVSwitchBySlot["NVSWITCH0"].Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if got.NVSwitchBySlot["NVSWITCH3"].Format(time.RFC3339) != "2026-01-22T09:11:32Z" {
|
||||||
|
t.Fatalf("unexpected NVSwitch3 check time: %s", got.NVSwitchBySlot["NVSWITCH3"].Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
}
|
||||||
288
internal/parser/vendors/nvidia/gpu_model.go
vendored
288
internal/parser/vendors/nvidia/gpu_model.go
vendored
@@ -2,8 +2,8 @@ package nvidia
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"git.mchus.pro/mchus/logpile/internal/models"
|
"git.mchus.pro/mchus/logpile/internal/models"
|
||||||
@@ -13,8 +13,11 @@ import (
|
|||||||
var (
|
var (
|
||||||
gpuNameWithSerialRegex = regexp.MustCompile(`^SXM(\d+)_SN_(.+)$`)
|
gpuNameWithSerialRegex = regexp.MustCompile(`^SXM(\d+)_SN_(.+)$`)
|
||||||
gpuNameSlotOnlyRegex = regexp.MustCompile(`^SXM(\d+)$`)
|
gpuNameSlotOnlyRegex = regexp.MustCompile(`^SXM(\d+)$`)
|
||||||
skuModelRegex = regexp.MustCompile(`sku_hgx-([a-z0-9]+)-\d+-gpu`)
|
|
||||||
skuCodeRegex = regexp.MustCompile(`^(G\d{3})[.-](\d{4})`)
|
skuCodeRegex = regexp.MustCompile(`^(G\d{3})[.-](\d{4})`)
|
||||||
|
skuCodeInsideRegex = regexp.MustCompile(`(?:^|[^A-Z0-9])(?:\d)?(G\d{3})[.-](\d{4})(?:[^A-Z0-9]|$)`)
|
||||||
|
inforomPathRegex = regexp.MustCompile(`(?i)(?:^|[\\/])(checkinforom|inforom)[\\/](SXM(\d+))(?:_SN_([^\\/]+))?[\\/]fieldiag\.jso$`)
|
||||||
|
inforomProductPNRegex = regexp.MustCompile(`"product_part_num"\s*:\s*"([^"]+)"`)
|
||||||
|
inforomSerialRegex = regexp.MustCompile(`"serial_number"\s*:\s*"([^"]+)"`)
|
||||||
)
|
)
|
||||||
|
|
||||||
type testSpecData struct {
|
type testSpecData struct {
|
||||||
@@ -22,6 +25,7 @@ type testSpecData struct {
|
|||||||
VirtualID string `json:"virtual_id"`
|
VirtualID string `json:"virtual_id"`
|
||||||
Args struct {
|
Args struct {
|
||||||
SKUToFile map[string]string `json:"sku_to_sku_json_file_map"`
|
SKUToFile map[string]string `json:"sku_to_sku_json_file_map"`
|
||||||
|
ModsMapping map[string]json.RawMessage `json:"mods_mapping"`
|
||||||
} `json:"args"`
|
} `json:"args"`
|
||||||
} `json:"actions"`
|
} `json:"actions"`
|
||||||
}
|
}
|
||||||
@@ -35,49 +39,111 @@ type inventoryFieldDiagSummary struct {
|
|||||||
} `json:"ModsRuns"`
|
} `json:"ModsRuns"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var hardcodedSKUToFileMap = map[string]string{
|
||||||
|
"G520-0200": "sku_hgx-h100-8-gpu_80g_aircooled_field.json",
|
||||||
|
"G520-0201": "sku_hgx-h100-8-gpu_80g_aircooled_field.json",
|
||||||
|
"G520-0202": "sku_hgx-h100-8-gpu_80g_tpol_field.json",
|
||||||
|
"G520-0203": "sku_hgx-h100-8-gpu_80g_tpol_field.json",
|
||||||
|
"G520-0205": "sku_hgx-h800-8-gpu_80g_aircooled_field.json",
|
||||||
|
"G520-0207": "sku_hgx-h800-8-gpu_80g_tpol_field.json",
|
||||||
|
"G520-0221": "sku_hgx-h100-8-gpu_96g_aircooled_field.json",
|
||||||
|
"G520-0236": "sku_hgx-h20-8-gpu_96g_aircooled_field.json",
|
||||||
|
"G520-0238": "sku_hgx-h20-8-gpu_96g_tpol_field.json",
|
||||||
|
"G520-0266": "sku_hgx-h20-8-gpu_141g_aircooled_field.json",
|
||||||
|
"G520-0280": "sku_hgx-h200-8-gpu_141g_aircooled_field.json",
|
||||||
|
"G520-0282": "sku_hgx-h200-8-gpu_141g_tpol_field.json",
|
||||||
|
"G520-0292": "sku_hgx-h100-8-gpu_sku_292_field.json",
|
||||||
|
}
|
||||||
|
|
||||||
// ApplyGPUModelsFromSKU updates GPU model names using SKU mapping from testspec.json.
|
// ApplyGPUModelsFromSKU updates GPU model names using SKU mapping from testspec.json.
|
||||||
// Mapping source:
|
// Mapping source:
|
||||||
// - inventory/fieldiag_summary.json: GPUName -> BoardInfo(SKU)
|
// - inventory/fieldiag_summary.json: GPUName -> BoardInfo(SKU)
|
||||||
// - testspec.json: SKU -> sku_hgx-... filename
|
// - hardcoded SKU mapping
|
||||||
|
// - testspec.json: SKU -> sku_hgx-... filename (fallback for unknown hardcoded SKU)
|
||||||
|
// - inforom/*/fieldiag.jso: product_part_num (full P/N with embedded SKU)
|
||||||
|
// - testspec.json gpu_fieldiag.mods_mapping: DeviceID -> GPU generation (last fallback for description)
|
||||||
func ApplyGPUModelsFromSKU(files []parser.ExtractedFile, result *models.AnalysisResult) {
|
func ApplyGPUModelsFromSKU(files []parser.ExtractedFile, result *models.AnalysisResult) {
|
||||||
if result == nil || result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
|
if result == nil || result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
skuToFile := parseSKUToFileMap(files)
|
skuToFile := parseSKUToFileMap(files)
|
||||||
if len(skuToFile) == 0 {
|
generationByDeviceID := parseGenerationByDeviceID(files)
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
serialToSKU, slotToSKU := parseGPUSKUMapping(files)
|
serialToSKU, slotToSKU, serialToPartNumber, slotToPartNumber := parseGPUSKUMapping(files)
|
||||||
if len(serialToSKU) == 0 && len(slotToSKU) == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range result.Hardware.GPUs {
|
for i := range result.Hardware.GPUs {
|
||||||
gpu := &result.Hardware.GPUs[i]
|
gpu := &result.Hardware.GPUs[i]
|
||||||
sku := ""
|
slot := strings.TrimSpace(gpu.Slot)
|
||||||
|
serial := strings.TrimSpace(gpu.SerialNumber)
|
||||||
|
|
||||||
if serial := strings.TrimSpace(gpu.SerialNumber); serial != "" {
|
if gpu.PartNumber == "" && serial != "" {
|
||||||
|
if pn := strings.TrimSpace(serialToPartNumber[serial]); pn != "" {
|
||||||
|
gpu.PartNumber = pn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if gpu.PartNumber == "" {
|
||||||
|
if pn := strings.TrimSpace(slotToPartNumber[slot]); pn != "" {
|
||||||
|
gpu.PartNumber = pn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if partNumber := strings.TrimSpace(gpu.PartNumber); partNumber != "" {
|
||||||
|
gpu.Model = partNumber
|
||||||
|
}
|
||||||
|
|
||||||
|
sku := extractSKUFromPartNumber(gpu.PartNumber)
|
||||||
|
if sku == "" && serial != "" {
|
||||||
sku = serialToSKU[serial]
|
sku = serialToSKU[serial]
|
||||||
}
|
}
|
||||||
if sku == "" {
|
if sku == "" {
|
||||||
sku = slotToSKU[strings.TrimSpace(gpu.Slot)]
|
sku = slotToSKU[slot]
|
||||||
}
|
}
|
||||||
if sku == "" {
|
if sku != "" {
|
||||||
|
if desc := resolveDescriptionFromSKU(sku, skuToFile); desc != "" {
|
||||||
|
gpu.Description = desc
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
model := resolveModelFromSKU(sku, skuToFile)
|
|
||||||
if model == "" {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
gpu.Model = model
|
if gen := resolveGenerationDescription(gpu.DeviceID, generationByDeviceID); gen != "" {
|
||||||
|
gpu.Description = gen
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseSKUToFileMap(files []parser.ExtractedFile) map[string]string {
|
func parseSKUToFileMap(files []parser.ExtractedFile) map[string]string {
|
||||||
|
result := make(map[string]string, len(hardcodedSKUToFileMap))
|
||||||
|
for sku, file := range hardcodedSKUToFileMap {
|
||||||
|
result[normalizeSKUCode(sku)] = strings.TrimSpace(file)
|
||||||
|
}
|
||||||
|
|
||||||
|
specFile := parser.FindFileByName(files, "testspec.json")
|
||||||
|
if specFile == nil {
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
var spec testSpecData
|
||||||
|
if err := json.Unmarshal(specFile.Content, &spec); err != nil {
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, action := range spec.Actions {
|
||||||
|
for sku, file := range action.Args.SKUToFile {
|
||||||
|
normSKU := normalizeSKUCode(sku)
|
||||||
|
if normSKU == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Priority: hardcoded mapping wins, testspec extends unknown SKU list.
|
||||||
|
if _, exists := result[normSKU]; !exists {
|
||||||
|
result[normSKU] = strings.TrimSpace(file)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseGenerationByDeviceID(files []parser.ExtractedFile) map[string]string {
|
||||||
specFile := parser.FindFileByName(files, "testspec.json")
|
specFile := parser.FindFileByName(files, "testspec.json")
|
||||||
if specFile == nil {
|
if specFile == nil {
|
||||||
return nil
|
return nil
|
||||||
@@ -88,20 +154,61 @@ func parseSKUToFileMap(files []parser.ExtractedFile) map[string]string {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
result := make(map[string]string)
|
familyToGeneration := make(map[string]string)
|
||||||
|
deviceToGeneration := make(map[string]string)
|
||||||
|
|
||||||
for _, action := range spec.Actions {
|
for _, action := range spec.Actions {
|
||||||
for sku, file := range action.Args.SKUToFile {
|
if strings.TrimSpace(strings.ToLower(action.VirtualID)) != "gpu_fieldiag" {
|
||||||
normSKU := normalizeSKUCode(sku)
|
|
||||||
if normSKU == "" {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
result[normSKU] = strings.TrimSpace(file)
|
for key, raw := range action.Args.ModsMapping {
|
||||||
|
if strings.HasPrefix(key, "#mods.") {
|
||||||
|
family := strings.TrimSpace(strings.TrimPrefix(key, "#mods."))
|
||||||
|
if family == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var generation string
|
||||||
|
if err := json.Unmarshal(raw, &generation); err == nil {
|
||||||
|
generation = strings.TrimSpace(generation)
|
||||||
|
if generation != "" {
|
||||||
|
familyToGeneration[family] = generation
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[string]string) {
|
for key, raw := range action.Args.ModsMapping {
|
||||||
|
family := strings.TrimSpace(key)
|
||||||
|
if family == "" || strings.HasPrefix(family, "#") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
generation := strings.TrimSpace(familyToGeneration[family])
|
||||||
|
if generation == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var deviceIDs []string
|
||||||
|
if err := json.Unmarshal(raw, &deviceIDs); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, id := range deviceIDs {
|
||||||
|
norm := normalizeDeviceIDHex(id)
|
||||||
|
if norm != "" {
|
||||||
|
deviceToGeneration[norm] = generation
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return deviceToGeneration
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[string]string, map[string]string, map[string]string) {
|
||||||
|
serialToSKU := make(map[string]string)
|
||||||
|
slotToSKU := make(map[string]string)
|
||||||
|
serialToPartNumber := make(map[string]string)
|
||||||
|
slotToPartNumber := make(map[string]string)
|
||||||
|
|
||||||
|
// 1) inventory/fieldiag_summary.json mapping (GPUName/BoardInfo).
|
||||||
var summaryFile *parser.ExtractedFile
|
var summaryFile *parser.ExtractedFile
|
||||||
for _, f := range files {
|
for _, f := range files {
|
||||||
path := strings.ToLower(f.Path)
|
path := strings.ToLower(f.Path)
|
||||||
@@ -112,17 +219,67 @@ func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[st
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if summaryFile == nil {
|
if summaryFile == nil {
|
||||||
return nil, nil
|
// Continue: inforom may still contain usable part numbers.
|
||||||
|
} else {
|
||||||
|
var summaries []inventoryFieldDiagSummary
|
||||||
|
if err := json.Unmarshal(summaryFile.Content, &summaries); err == nil {
|
||||||
|
for _, summary := range summaries {
|
||||||
|
addSummaryMapping(summary, serialToSKU, slotToSKU)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
var summary inventoryFieldDiagSummary
|
var summary inventoryFieldDiagSummary
|
||||||
if err := json.Unmarshal(summaryFile.Content, &summary); err != nil {
|
if err := json.Unmarshal(summaryFile.Content, &summary); err == nil {
|
||||||
return nil, nil
|
addSummaryMapping(summary, serialToSKU, slotToSKU)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
serialToSKU := make(map[string]string)
|
// 2) inforom/checkinforom fieldiag.jso mapping (full product_part_num).
|
||||||
slotToSKU := make(map[string]string)
|
for _, f := range files {
|
||||||
|
path := strings.TrimSpace(f.Path)
|
||||||
|
m := inforomPathRegex.FindStringSubmatch(path)
|
||||||
|
if len(m) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
slot := "GPU" + strings.ToUpper(strings.TrimSpace(m[2])) // SXM7 -> GPUSXM7
|
||||||
|
serialFromPath := strings.TrimSpace(m[4])
|
||||||
|
|
||||||
|
productPNMatch := inforomProductPNRegex.FindSubmatch(f.Content)
|
||||||
|
if len(productPNMatch) == 2 {
|
||||||
|
partNumber := strings.TrimSpace(string(productPNMatch[1]))
|
||||||
|
if partNumber != "" {
|
||||||
|
slotToPartNumber[slot] = partNumber
|
||||||
|
if serialFromPath != "" {
|
||||||
|
serialToPartNumber[serialFromPath] = partNumber
|
||||||
|
}
|
||||||
|
if sku := extractSKUFromPartNumber(partNumber); sku != "" {
|
||||||
|
slotToSKU[slot] = sku
|
||||||
|
if serialFromPath != "" {
|
||||||
|
serialToSKU[serialFromPath] = sku
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
serialMatch := inforomSerialRegex.FindSubmatch(f.Content)
|
||||||
|
if len(serialMatch) == 2 {
|
||||||
|
serial := strings.TrimSpace(string(serialMatch[1]))
|
||||||
|
if serial != "" {
|
||||||
|
if sku := slotToSKU[slot]; sku != "" {
|
||||||
|
serialToSKU[serial] = sku
|
||||||
|
}
|
||||||
|
if pn := slotToPartNumber[slot]; pn != "" {
|
||||||
|
serialToPartNumber[serial] = pn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return serialToSKU, slotToSKU, serialToPartNumber, slotToPartNumber
|
||||||
|
}
|
||||||
|
|
||||||
|
func addSummaryMapping(summary inventoryFieldDiagSummary, serialToSKU map[string]string, slotToSKU map[string]string) {
|
||||||
for _, run := range summary.ModsRuns {
|
for _, run := range summary.ModsRuns {
|
||||||
for _, h := range run.ModsHeader {
|
for _, h := range run.ModsHeader {
|
||||||
sku := normalizeSKUCode(h.BoardInfo)
|
sku := normalizeSKUCode(h.BoardInfo)
|
||||||
@@ -141,27 +298,15 @@ func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[st
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return serialToSKU, slotToSKU
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func resolveModelFromSKU(sku string, skuToFile map[string]string) string {
|
func resolveDescriptionFromSKU(sku string, skuToFile map[string]string) string {
|
||||||
file := strings.ToLower(strings.TrimSpace(skuToFile[normalizeSKUCode(sku)]))
|
file := strings.ToLower(strings.TrimSpace(skuToFile[normalizeSKUCode(sku)]))
|
||||||
if file == "" {
|
if file == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
m := skuModelRegex.FindStringSubmatch(file)
|
return skuFilenameToDescription(file)
|
||||||
if len(m) != 2 {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
gpuFamily := strings.ToUpper(strings.TrimSpace(m[1]))
|
|
||||||
if gpuFamily == "" {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Sprintf("NVIDIA %s SXM", gpuFamily)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func normalizeSKUCode(v string) string {
|
func normalizeSKUCode(v string) string {
|
||||||
@@ -176,3 +321,54 @@ func normalizeSKUCode(v string) string {
|
|||||||
|
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func extractSKUFromPartNumber(partNumber string) string {
|
||||||
|
s := strings.TrimSpace(strings.ToUpper(partNumber))
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
if m := skuCodeInsideRegex.FindStringSubmatch(s); len(m) == 3 {
|
||||||
|
return m[1] + "-" + m[2]
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func skuFilenameToDescription(file string) string {
|
||||||
|
s := strings.TrimSpace(strings.ToLower(file))
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
s = strings.TrimSuffix(s, ".json")
|
||||||
|
s = strings.TrimSuffix(s, "_field")
|
||||||
|
s = strings.TrimPrefix(s, "sku_")
|
||||||
|
s = strings.ReplaceAll(s, "-", " ")
|
||||||
|
s = strings.ReplaceAll(s, "_", " ")
|
||||||
|
s = strings.Join(strings.Fields(s), " ")
|
||||||
|
|
||||||
|
return strings.TrimSpace(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveGenerationDescription(deviceID int, deviceToGeneration map[string]string) string {
|
||||||
|
if deviceID <= 0 || len(deviceToGeneration) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(deviceToGeneration[normalizeDeviceIDHex(strconv.FormatInt(int64(deviceID), 16))])
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeDeviceIDHex(v string) string {
|
||||||
|
s := strings.TrimSpace(strings.ToLower(v))
|
||||||
|
s = strings.TrimPrefix(s, "0x")
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err := strconv.ParseUint(s, 16, 32)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return "0x" + strings.ToLower(strconv.FormatUint(n, 16))
|
||||||
|
}
|
||||||
|
|||||||
155
internal/parser/vendors/nvidia/gpu_model_test.go
vendored
155
internal/parser/vendors/nvidia/gpu_model_test.go
vendored
@@ -50,7 +50,158 @@ func TestApplyGPUModelsFromSKU(t *testing.T) {
|
|||||||
|
|
||||||
ApplyGPUModelsFromSKU(files, result)
|
ApplyGPUModelsFromSKU(files, result)
|
||||||
|
|
||||||
if got := result.Hardware.GPUs[0].Model; got != "NVIDIA H200 SXM" {
|
if got := result.Hardware.GPUs[0].Model; got != "NVIDIA Device 2335" {
|
||||||
t.Fatalf("expected model NVIDIA H200 SXM, got %q", got)
|
t.Fatalf("expected model NVIDIA Device 2335, got %q", got)
|
||||||
|
}
|
||||||
|
if got := result.Hardware.GPUs[0].Description; got != "hgx h200 8 gpu 141g aircooled" {
|
||||||
|
t.Fatalf("expected description hgx h200 8 gpu 141g aircooled, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGPUModelsFromSKU_FromPartNumber(t *testing.T) {
|
||||||
|
files := []parser.ExtractedFile{
|
||||||
|
{
|
||||||
|
Path: "inforom/SXM5/fieldiag.jso",
|
||||||
|
Content: []byte(`[
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"__tag__":"inforom",
|
||||||
|
"serial_number":"1653925025497",
|
||||||
|
"product_part_num":"692-2G520-0280-501"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]`),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "testspec.json",
|
||||||
|
Content: []byte(`{
|
||||||
|
"actions":[
|
||||||
|
{
|
||||||
|
"virtual_id":"inventory",
|
||||||
|
"args":{
|
||||||
|
"sku_to_sku_json_file_map":{
|
||||||
|
"G520-0280":"sku_hgx-h200-8-gpu_141g_aircooled_field.json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &models.AnalysisResult{
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{
|
||||||
|
Slot: "GPUSXM5",
|
||||||
|
SerialNumber: "1653925025497",
|
||||||
|
Model: "NVIDIA Device 2335",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ApplyGPUModelsFromSKU(files, result)
|
||||||
|
|
||||||
|
if got := result.Hardware.GPUs[0].Model; got != "692-2G520-0280-501" {
|
||||||
|
t.Fatalf("expected model 692-2G520-0280-501, got %q", got)
|
||||||
|
}
|
||||||
|
if got := result.Hardware.GPUs[0].PartNumber; got != "692-2G520-0280-501" {
|
||||||
|
t.Fatalf("expected part number 692-2G520-0280-501, got %q", got)
|
||||||
|
}
|
||||||
|
if got := result.Hardware.GPUs[0].Description; got != "hgx h200 8 gpu 141g aircooled" {
|
||||||
|
t.Fatalf("expected description hgx h200 8 gpu 141g aircooled, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGPUModelsFromSKU_FieldDiagSummaryArrayFormat(t *testing.T) {
|
||||||
|
files := []parser.ExtractedFile{
|
||||||
|
{
|
||||||
|
Path: "inventory/fieldiag_summary.json",
|
||||||
|
Content: []byte(`[
|
||||||
|
{
|
||||||
|
"ModsRuns":[
|
||||||
|
{"ModsHeader":[
|
||||||
|
{"GpuName":"SXM5_SN_1653925025497","BoardInfo":"G520-0280"}
|
||||||
|
]}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]`),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "testspec.json",
|
||||||
|
Content: []byte(`{
|
||||||
|
"actions":[
|
||||||
|
{
|
||||||
|
"virtual_id":"inventory",
|
||||||
|
"args":{
|
||||||
|
"sku_to_sku_json_file_map":{
|
||||||
|
"G520-0280":"sku_hgx-h200-8-gpu_141g_aircooled_field.json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &models.AnalysisResult{
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{
|
||||||
|
Slot: "GPUSXM5",
|
||||||
|
SerialNumber: "1653925025497",
|
||||||
|
Model: "NVIDIA Device 2335",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ApplyGPUModelsFromSKU(files, result)
|
||||||
|
|
||||||
|
if got := result.Hardware.GPUs[0].Model; got != "NVIDIA Device 2335" {
|
||||||
|
t.Fatalf("expected model NVIDIA Device 2335, got %q", got)
|
||||||
|
}
|
||||||
|
if got := result.Hardware.GPUs[0].Description; got != "hgx h200 8 gpu 141g aircooled" {
|
||||||
|
t.Fatalf("expected description hgx h200 8 gpu 141g aircooled, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGPUModelsFromSKU_FallbackToGenerationFromModsMapping(t *testing.T) {
|
||||||
|
files := []parser.ExtractedFile{
|
||||||
|
{
|
||||||
|
Path: "testspec.json",
|
||||||
|
Content: []byte(`{
|
||||||
|
"actions":[
|
||||||
|
{
|
||||||
|
"virtual_id":"gpu_fieldiag",
|
||||||
|
"args":{
|
||||||
|
"mods_mapping":{
|
||||||
|
"#mods.525":"Hopper",
|
||||||
|
"525":["0x2335"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &models.AnalysisResult{
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{
|
||||||
|
Slot: "GPUSXM5",
|
||||||
|
Model: "NVIDIA Device 2335",
|
||||||
|
DeviceID: 0x2335,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ApplyGPUModelsFromSKU(files, result)
|
||||||
|
|
||||||
|
if got := result.Hardware.GPUs[0].Description; got != "Hopper" {
|
||||||
|
t.Fatalf("expected description Hopper, got %q", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
63
internal/parser/vendors/nvidia/inventory_log.go
vendored
63
internal/parser/vendors/nvidia/inventory_log.go
vendored
@@ -13,6 +13,11 @@ var (
|
|||||||
// Regex to extract devname mappings from fieldiag command line
|
// Regex to extract devname mappings from fieldiag command line
|
||||||
// Example: "devname=0000:ba:00.0,SXM5_SN_1653925027099"
|
// Example: "devname=0000:ba:00.0,SXM5_SN_1653925027099"
|
||||||
devnameRegex = regexp.MustCompile(`devname=([\da-fA-F:\.]+),(\w+)`)
|
devnameRegex = regexp.MustCompile(`devname=([\da-fA-F:\.]+),(\w+)`)
|
||||||
|
// Regex to capture BDF from commands like:
|
||||||
|
// "$ lspci -vvvs 0000:05:00.0" or "$ lspci -vvs 0000:05:00.0"
|
||||||
|
lspciBDFRegex = regexp.MustCompile(`^\$\s+lspci\s+-[^\s]*\s+([0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-7])\s*$`)
|
||||||
|
// Example: "Capabilities: [2f0 v1] Device Serial Number 99-d3-61-c8-ac-2d-b0-48"
|
||||||
|
deviceSerialRegex = regexp.MustCompile(`Device Serial Number\s+([0-9a-fA-F\-:]+)`)
|
||||||
)
|
)
|
||||||
|
|
||||||
// ParseInventoryLog parses inventory/output.log to extract GPU serial numbers
|
// ParseInventoryLog parses inventory/output.log to extract GPU serial numbers
|
||||||
@@ -75,6 +80,64 @@ func ParseInventoryLog(content []byte, result *models.AnalysisResult) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Third pass: parse lspci "Device Serial Number" by BDF (useful for NVSwitch serials).
|
||||||
|
bdfToDeviceSerial := make(map[string]string)
|
||||||
|
currentBDF := ""
|
||||||
|
scanner = bufio.NewScanner(strings.NewReader(string(content)))
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if m := lspciBDFRegex.FindStringSubmatch(line); len(m) == 2 {
|
||||||
|
currentBDF = strings.ToLower(strings.TrimSpace(m[1]))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if currentBDF == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if m := deviceSerialRegex.FindStringSubmatch(line); len(m) == 2 {
|
||||||
|
serial := strings.TrimSpace(m[1])
|
||||||
|
if serial != "" {
|
||||||
|
bdfToDeviceSerial[currentBDF] = serial
|
||||||
|
}
|
||||||
|
currentBDF = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply to PCIe devices first (includes NVSwitch).
|
||||||
|
for i := range result.Hardware.PCIeDevices {
|
||||||
|
dev := &result.Hardware.PCIeDevices[i]
|
||||||
|
if strings.TrimSpace(dev.SerialNumber) != "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
bdf := strings.ToLower(strings.TrimSpace(dev.BDF))
|
||||||
|
if bdf == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if serial := bdfToDeviceSerial[bdf]; serial != "" {
|
||||||
|
dev.SerialNumber = serial
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply to GPUs only if GPU serial is still empty (do not overwrite prod serial from devname).
|
||||||
|
for i := range result.Hardware.GPUs {
|
||||||
|
gpu := &result.Hardware.GPUs[i]
|
||||||
|
if strings.TrimSpace(gpu.SerialNumber) != "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
bdf := strings.ToLower(strings.TrimSpace(gpu.BDF))
|
||||||
|
if bdf == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if serial := bdfToDeviceSerial[bdf]; serial != "" {
|
||||||
|
gpu.SerialNumber = serial
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return scanner.Err()
|
return scanner.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"git.mchus.pro/mchus/logpile/internal/models"
|
||||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -81,3 +82,45 @@ func min(a, b int) int {
|
|||||||
}
|
}
|
||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseInventoryLog_AssignsNVSwitchSerialByBDF(t *testing.T) {
|
||||||
|
content := []byte(`
|
||||||
|
$ lspci -vvvs 0000:05:00.0
|
||||||
|
05:00.0 Bridge: NVIDIA Corporation Device 22a3 (rev a1)
|
||||||
|
Capabilities: [2f0 v1] Device Serial Number 99-d3-61-c8-ac-2d-b0-48
|
||||||
|
|
||||||
|
/tmp/fieldiag devname=0000:ba:00.0,SXM5_SN_1653925025497 fieldiag
|
||||||
|
`)
|
||||||
|
|
||||||
|
result := &models.AnalysisResult{
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{
|
||||||
|
Slot: "GPUSXM5",
|
||||||
|
BDF: "0000:ba:00.0",
|
||||||
|
SerialNumber: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
PCIeDevices: []models.PCIeDevice{
|
||||||
|
{
|
||||||
|
Slot: "NVSWITCH0",
|
||||||
|
BDF: "0000:05:00.0",
|
||||||
|
SerialNumber: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := ParseInventoryLog(content, result); err != nil {
|
||||||
|
t.Fatalf("ParseInventoryLog failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := result.Hardware.PCIeDevices[0].SerialNumber; got != "99-d3-61-c8-ac-2d-b0-48" {
|
||||||
|
t.Fatalf("expected NVSwitch serial 99-d3-61-c8-ac-2d-b0-48, got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GPU serial should come from fieldiag devname mapping.
|
||||||
|
if got := result.Hardware.GPUs[0].SerialNumber; got != "1653925025497" {
|
||||||
|
t.Fatalf("expected GPU serial 1653925025497, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
370
internal/parser/vendors/nvidia/nvflash_verbose.go
vendored
Normal file
370
internal/parser/vendors/nvidia/nvflash_verbose.go
vendored
Normal file
@@ -0,0 +1,370 @@
|
|||||||
|
package nvidia
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"git.mchus.pro/mchus/logpile/internal/models"
|
||||||
|
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
nvflashAdapterRegex = regexp.MustCompile(`^Adapter:\s+.+\(([\da-fA-F]+),([\da-fA-F]+),([\da-fA-F]+),([\da-fA-F]+)\)\s+S:([0-9A-Fa-f]{2}),B:([0-9A-Fa-f]{2}),D:([0-9A-Fa-f]{2}),F:([0-9A-Fa-f])`)
|
||||||
|
gpuPCIIDRegex = regexp.MustCompile(`^GPU_SXM(\d+)_PCIID:\s*(\S+)$`)
|
||||||
|
nvsPCIIDRegex = regexp.MustCompile(`^NVSWITCH_NVSWITCH(\d+)_PCIID:\s*(\S+)$`)
|
||||||
|
)
|
||||||
|
|
||||||
|
var nvswitchProjectToPartNumber = map[string]string{
|
||||||
|
"5612-0002": "965-25612-0002-000",
|
||||||
|
}
|
||||||
|
|
||||||
|
type nvflashDeviceRecord struct {
|
||||||
|
BDF string
|
||||||
|
VendorID int
|
||||||
|
DeviceID int
|
||||||
|
SSVendorID int
|
||||||
|
SSDeviceID int
|
||||||
|
Version string
|
||||||
|
BoardID string
|
||||||
|
HierarchyID string
|
||||||
|
ChipSKU string
|
||||||
|
Project string
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseNVFlashVerboseLog parses inventory/nvflash_verbose.log and applies firmware versions
|
||||||
|
// to already discovered devices using PCI BDF with optional ID checks.
|
||||||
|
func ParseNVFlashVerboseLog(content []byte, result *models.AnalysisResult) error {
|
||||||
|
if result == nil || result.Hardware == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
records := parseNVFlashRecords(content)
|
||||||
|
if len(records) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range result.Hardware.GPUs {
|
||||||
|
gpu := &result.Hardware.GPUs[i]
|
||||||
|
bdf := normalizePCIBDF(gpu.BDF)
|
||||||
|
if bdf == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rec, ok := records[bdf]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if gpu.DeviceID != 0 && rec.DeviceID != 0 && gpu.DeviceID != rec.DeviceID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if gpu.VendorID != 0 && rec.VendorID != 0 && gpu.VendorID != rec.VendorID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(rec.Version) != "" {
|
||||||
|
gpu.Firmware = strings.TrimSpace(rec.Version)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range result.Hardware.PCIeDevices {
|
||||||
|
dev := &result.Hardware.PCIeDevices[i]
|
||||||
|
bdf := normalizePCIBDF(dev.BDF)
|
||||||
|
if bdf == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rec, ok := records[bdf]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if dev.DeviceID != 0 && rec.DeviceID != 0 && dev.DeviceID != rec.DeviceID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if dev.VendorID != 0 && rec.VendorID != 0 && dev.VendorID != rec.VendorID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.EqualFold(strings.TrimSpace(dev.DeviceClass), "NVSwitch") || strings.HasPrefix(strings.ToUpper(strings.TrimSpace(dev.Slot)), "NVSWITCH") {
|
||||||
|
if mappedPN := mapNVSwitchPartNumberByProject(rec.Project); mappedPN != "" {
|
||||||
|
dev.PartNumber = mappedPN
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.TrimSpace(rec.Version) != "" && strings.TrimSpace(dev.PartNumber) == "" {
|
||||||
|
// Fallback for non-NVSwitch devices where part number is unknown.
|
||||||
|
dev.PartNumber = strings.TrimSpace(rec.Version)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
appendNVFlashFirmwareEntries(result, records)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApplyInventoryPCIIDs enriches devices with PCI BDFs from inventory/inventory.log.
|
||||||
|
func ApplyInventoryPCIIDs(content []byte, result *models.AnalysisResult) error {
|
||||||
|
if result == nil || result.Hardware == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
slotToBDF := parseInventoryPCIIDs(content)
|
||||||
|
if len(slotToBDF) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range result.Hardware.GPUs {
|
||||||
|
gpu := &result.Hardware.GPUs[i]
|
||||||
|
if strings.TrimSpace(gpu.BDF) != "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if bdf := slotToBDF[strings.TrimSpace(gpu.Slot)]; bdf != "" {
|
||||||
|
gpu.BDF = bdf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range result.Hardware.PCIeDevices {
|
||||||
|
dev := &result.Hardware.PCIeDevices[i]
|
||||||
|
if strings.TrimSpace(dev.BDF) != "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if bdf := slotToBDF[normalizeNVSwitchSlot(strings.TrimSpace(dev.Slot))]; bdf != "" {
|
||||||
|
dev.BDF = bdf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseNVFlashRecords(content []byte) map[string]nvflashDeviceRecord {
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(string(content)))
|
||||||
|
records := make(map[string]nvflashDeviceRecord)
|
||||||
|
var current *nvflashDeviceRecord
|
||||||
|
|
||||||
|
commit := func() {
|
||||||
|
if current == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if current.BDF == "" || strings.TrimSpace(current.Version) == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
records[current.BDF] = *current
|
||||||
|
}
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if m := nvflashAdapterRegex.FindStringSubmatch(line); len(m) == 9 {
|
||||||
|
commit()
|
||||||
|
vendorID, _ := parseHexInt(m[1])
|
||||||
|
deviceID, _ := parseHexInt(m[2])
|
||||||
|
ssVendorID, _ := parseHexInt(m[3])
|
||||||
|
ssDeviceID, _ := parseHexInt(m[4])
|
||||||
|
|
||||||
|
current = &nvflashDeviceRecord{
|
||||||
|
BDF: fmt.Sprintf("0000:%s:%s.%s", strings.ToLower(m[6]), strings.ToLower(m[7]), strings.ToLower(m[8])),
|
||||||
|
VendorID: vendorID,
|
||||||
|
DeviceID: deviceID,
|
||||||
|
SSVendorID: ssVendorID,
|
||||||
|
SSDeviceID: ssDeviceID,
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if current == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.Contains(line, ":") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(line, ":", 2)
|
||||||
|
key := strings.TrimSpace(parts[0])
|
||||||
|
val := strings.TrimSpace(parts[1])
|
||||||
|
if key == "" || val == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
switch key {
|
||||||
|
case "Version":
|
||||||
|
current.Version = val
|
||||||
|
case "Board ID":
|
||||||
|
current.BoardID = strings.ToLower(strings.TrimPrefix(val, "0x"))
|
||||||
|
case "Vendor ID":
|
||||||
|
if v, err := parseHexInt(val); err == nil {
|
||||||
|
current.VendorID = v
|
||||||
|
}
|
||||||
|
case "Device ID":
|
||||||
|
if v, err := parseHexInt(val); err == nil {
|
||||||
|
current.DeviceID = v
|
||||||
|
}
|
||||||
|
case "Hierarchy ID":
|
||||||
|
current.HierarchyID = val
|
||||||
|
case "Chip SKU":
|
||||||
|
current.ChipSKU = val
|
||||||
|
case "Project":
|
||||||
|
current.Project = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
commit()
|
||||||
|
return records
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseInventoryPCIIDs(content []byte) map[string]string {
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(string(content)))
|
||||||
|
slotToBDF := make(map[string]string)
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if m := gpuPCIIDRegex.FindStringSubmatch(line); len(m) == 3 {
|
||||||
|
slotToBDF["GPUSXM"+m[1]] = normalizePCIBDF(m[2])
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if m := nvsPCIIDRegex.FindStringSubmatch(line); len(m) == 3 {
|
||||||
|
slotToBDF["NVSWITCH"+m[1]] = normalizePCIBDF(m[2])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return slotToBDF
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizePCIBDF(v string) string {
|
||||||
|
s := strings.TrimSpace(strings.ToLower(v))
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// bus:device.func -> 0000:bus:device.func
|
||||||
|
short := regexp.MustCompile(`^([0-9a-f]{2}:[0-9a-f]{2}\.[0-7])$`)
|
||||||
|
if m := short.FindStringSubmatch(s); len(m) == 2 {
|
||||||
|
return "0000:" + m[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
full := regexp.MustCompile(`^([0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-7])$`)
|
||||||
|
if m := full.FindStringSubmatch(s); len(m) == 2 {
|
||||||
|
return m[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseHexInt(v string) (int, error) {
|
||||||
|
s := strings.TrimSpace(strings.ToLower(v))
|
||||||
|
s = strings.TrimPrefix(s, "0x")
|
||||||
|
if s == "" {
|
||||||
|
return 0, fmt.Errorf("empty hex value")
|
||||||
|
}
|
||||||
|
n, err := strconv.ParseInt(s, 16, 32)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func findNVFlashVerboseLog(files []parser.ExtractedFile) *parser.ExtractedFile {
|
||||||
|
for _, f := range files {
|
||||||
|
path := strings.ToLower(f.Path)
|
||||||
|
if strings.Contains(path, "inventory/nvflash_verbose.log") ||
|
||||||
|
strings.Contains(path, "inventory\\nvflash_verbose.log") {
|
||||||
|
return &f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func findInventoryInfoLog(files []parser.ExtractedFile) *parser.ExtractedFile {
|
||||||
|
for _, f := range files {
|
||||||
|
path := strings.ToLower(f.Path)
|
||||||
|
if strings.Contains(path, "inventory/inventory.log") ||
|
||||||
|
strings.Contains(path, "inventory\\inventory.log") {
|
||||||
|
return &f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func appendNVFlashFirmwareEntries(result *models.AnalysisResult, records map[string]nvflashDeviceRecord) {
|
||||||
|
if result == nil || result.Hardware == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Hardware.Firmware == nil {
|
||||||
|
result.Hardware.Firmware = make([]models.FirmwareInfo, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
for _, fw := range result.Hardware.Firmware {
|
||||||
|
key := strings.ToLower(strings.TrimSpace(fw.DeviceName)) + "|" + strings.TrimSpace(fw.Version)
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, gpu := range result.Hardware.GPUs {
|
||||||
|
version := strings.TrimSpace(gpu.Firmware)
|
||||||
|
if version == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
model := strings.TrimSpace(gpu.PartNumber)
|
||||||
|
if model == "" {
|
||||||
|
model = strings.TrimSpace(gpu.Model)
|
||||||
|
}
|
||||||
|
if model == "" {
|
||||||
|
model = strings.TrimSpace(gpu.Slot)
|
||||||
|
}
|
||||||
|
deviceName := fmt.Sprintf("GPU %s (%s)", strings.TrimSpace(gpu.Slot), model)
|
||||||
|
key := strings.ToLower(deviceName) + "|" + version
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
result.Hardware.Firmware = append(result.Hardware.Firmware, models.FirmwareInfo{
|
||||||
|
DeviceName: deviceName,
|
||||||
|
Version: version,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, dev := range result.Hardware.PCIeDevices {
|
||||||
|
bdf := normalizePCIBDF(dev.BDF)
|
||||||
|
rec, ok := records[bdf]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
version := strings.TrimSpace(rec.Version)
|
||||||
|
if version == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
slot := strings.TrimSpace(dev.Slot)
|
||||||
|
deviceClass := strings.TrimSpace(dev.DeviceClass)
|
||||||
|
if strings.EqualFold(deviceClass, "NVSwitch") || strings.HasPrefix(strings.ToUpper(slot), "NVSWITCH") {
|
||||||
|
model := slot
|
||||||
|
if pn := strings.TrimSpace(dev.PartNumber); pn != "" {
|
||||||
|
model = pn
|
||||||
|
}
|
||||||
|
deviceName := fmt.Sprintf("NVSwitch %s (%s)", slot, model)
|
||||||
|
key := strings.ToLower(deviceName) + "|" + version
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
result.Hardware.Firmware = append(result.Hardware.Firmware, models.FirmwareInfo{
|
||||||
|
DeviceName: deviceName,
|
||||||
|
Version: version,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mapNVSwitchPartNumberByProject(project string) string {
|
||||||
|
key := strings.TrimSpace(strings.ToLower(project))
|
||||||
|
if key == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(nvswitchProjectToPartNumber[key])
|
||||||
|
}
|
||||||
93
internal/parser/vendors/nvidia/nvflash_verbose_test.go
vendored
Normal file
93
internal/parser/vendors/nvidia/nvflash_verbose_test.go
vendored
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
package nvidia
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.mchus.pro/mchus/logpile/internal/models"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestApplyInventoryPCIIDsAndNVFlashFirmware(t *testing.T) {
|
||||||
|
result := &models.AnalysisResult{
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{
|
||||||
|
Slot: "GPUSXM5",
|
||||||
|
DeviceID: 0x2335,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
PCIeDevices: []models.PCIeDevice{
|
||||||
|
{
|
||||||
|
Slot: "NVSWITCHNVSWITCH2",
|
||||||
|
DeviceID: 0x22a3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inventoryLog := []byte(`
|
||||||
|
GPU_SXM5_PCIID: 0000:ba:00.0
|
||||||
|
NVSWITCH_NVSWITCH2_PCIID: 0000:07:00.0
|
||||||
|
`)
|
||||||
|
|
||||||
|
nvflashLog := []byte(`
|
||||||
|
Adapter: Graphics Device (10DE,2335,10DE,18BE) S:00,B:BA,D:00,F:00
|
||||||
|
Version : 96.00.D0.00.03
|
||||||
|
Board ID : 0x053C
|
||||||
|
Vendor ID : 0x10DE
|
||||||
|
Device ID : 0x2335
|
||||||
|
Hierarchy ID : Normal Board
|
||||||
|
Chip SKU : 895-0
|
||||||
|
Project : G520-0280
|
||||||
|
|
||||||
|
Adapter: Graphics Device (10DE,22A3,10DE,1796) S:00,B:07,D:00,F:00
|
||||||
|
Version : 96.10.6D.00.01
|
||||||
|
Board ID : 0x03B7
|
||||||
|
Vendor ID : 0x10DE
|
||||||
|
Device ID : 0x22A3
|
||||||
|
Hierarchy ID : Normal Board
|
||||||
|
Chip SKU : 890-0
|
||||||
|
Project : 5612-0002
|
||||||
|
`)
|
||||||
|
|
||||||
|
if err := ApplyInventoryPCIIDs(inventoryLog, result); err != nil {
|
||||||
|
t.Fatalf("ApplyInventoryPCIIDs failed: %v", err)
|
||||||
|
}
|
||||||
|
if err := ParseNVFlashVerboseLog(nvflashLog, result); err != nil {
|
||||||
|
t.Fatalf("ParseNVFlashVerboseLog failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := result.Hardware.GPUs[0].BDF; got != "0000:ba:00.0" {
|
||||||
|
t.Fatalf("expected GPU BDF 0000:ba:00.0, got %q", got)
|
||||||
|
}
|
||||||
|
if got := result.Hardware.GPUs[0].Firmware; got != "96.00.D0.00.03" {
|
||||||
|
t.Fatalf("expected GPU firmware 96.00.D0.00.03, got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := result.Hardware.PCIeDevices[0].BDF; got != "0000:07:00.0" {
|
||||||
|
t.Fatalf("expected NVSwitch BDF 0000:07:00.0, got %q", got)
|
||||||
|
}
|
||||||
|
if got := result.Hardware.PCIeDevices[0].PartNumber; got != "965-25612-0002-000" {
|
||||||
|
t.Fatalf("expected NVSwitch part number 965-25612-0002-000, got %q", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(result.Hardware.Firmware) == 0 {
|
||||||
|
t.Fatalf("expected firmware entries to be populated from nvflash log")
|
||||||
|
}
|
||||||
|
|
||||||
|
hasGPUFW := false
|
||||||
|
hasNVSwitchFW := false
|
||||||
|
for _, fw := range result.Hardware.Firmware {
|
||||||
|
if fw.Version == "96.00.D0.00.03" {
|
||||||
|
hasGPUFW = true
|
||||||
|
}
|
||||||
|
if fw.Version == "96.10.6D.00.01" {
|
||||||
|
hasNVSwitchFW = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !hasGPUFW {
|
||||||
|
t.Fatalf("expected GPU firmware version 96.00.D0.00.03 in hardware firmware list")
|
||||||
|
}
|
||||||
|
if !hasNVSwitchFW {
|
||||||
|
t.Fatalf("expected NVSwitch firmware version 96.10.6D.00.01 in hardware firmware list")
|
||||||
|
}
|
||||||
|
}
|
||||||
40
internal/parser/vendors/nvidia/parser.go
vendored
40
internal/parser/vendors/nvidia/parser.go
vendored
@@ -14,7 +14,7 @@ import (
|
|||||||
|
|
||||||
// parserVersion - version of this parser module
|
// parserVersion - version of this parser module
|
||||||
// IMPORTANT: Increment this version when making changes to parser logic!
|
// IMPORTANT: Increment this version when making changes to parser logic!
|
||||||
const parserVersion = "1.2.4"
|
const parserVersion = "1.3.0"
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
parser.Register(&Parser{})
|
parser.Register(&Parser{})
|
||||||
@@ -106,6 +106,8 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
|||||||
GPUs: make([]models.GPU, 0),
|
GPUs: make([]models.GPU, 0),
|
||||||
}
|
}
|
||||||
gpuStatuses := make(map[string]string)
|
gpuStatuses := make(map[string]string)
|
||||||
|
gpuFailureDetails := make(map[string]string)
|
||||||
|
nvswitchStatuses := make(map[string]string)
|
||||||
|
|
||||||
// Parse output.log first (contains dmidecode system info)
|
// Parse output.log first (contains dmidecode system info)
|
||||||
// Find the output.log file that contains dmidecode output
|
// Find the output.log file that contains dmidecode output
|
||||||
@@ -134,9 +136,26 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse inventory/inventory.log to enrich PCI BDF mapping for components.
|
||||||
|
inventoryInfoLog := findInventoryInfoLog(files)
|
||||||
|
if inventoryInfoLog != nil {
|
||||||
|
if err := ApplyInventoryPCIIDs(inventoryInfoLog.Content, result); err != nil {
|
||||||
|
_ = err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Enhance GPU model names using SKU mapping from testspec + inventory summary.
|
// Enhance GPU model names using SKU mapping from testspec + inventory summary.
|
||||||
ApplyGPUModelsFromSKU(files, result)
|
ApplyGPUModelsFromSKU(files, result)
|
||||||
|
|
||||||
|
// Parse inventory/nvflash_verbose.log and apply firmware versions by BDF + IDs.
|
||||||
|
// This runs after GPU model/part-number enrichment so firmware tab uses final model labels.
|
||||||
|
nvflashVerbose := findNVFlashVerboseLog(files)
|
||||||
|
if nvflashVerbose != nil {
|
||||||
|
if err := ParseNVFlashVerboseLog(nvflashVerbose.Content, result); err != nil {
|
||||||
|
_ = err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Parse summary.json (test results summary)
|
// Parse summary.json (test results summary)
|
||||||
if f := parser.FindFileByName(files, "summary.json"); f != nil {
|
if f := parser.FindFileByName(files, "summary.json"); f != nil {
|
||||||
events := ParseSummaryJSON(f.Content)
|
events := ParseSummaryJSON(f.Content)
|
||||||
@@ -144,6 +163,14 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
|||||||
for componentID, status := range CollectGPUStatusesFromSummaryJSON(f.Content) {
|
for componentID, status := range CollectGPUStatusesFromSummaryJSON(f.Content) {
|
||||||
gpuStatuses[componentID] = mergeGPUStatus(gpuStatuses[componentID], status)
|
gpuStatuses[componentID] = mergeGPUStatus(gpuStatuses[componentID], status)
|
||||||
}
|
}
|
||||||
|
for slot, status := range CollectNVSwitchStatusesFromSummaryJSON(f.Content) {
|
||||||
|
nvswitchStatuses[slot] = mergeGPUStatus(nvswitchStatuses[slot], status)
|
||||||
|
}
|
||||||
|
for componentID, detail := range CollectGPUFailureDetailsFromSummaryJSON(f.Content) {
|
||||||
|
if _, exists := gpuFailureDetails[componentID]; !exists && strings.TrimSpace(detail) != "" {
|
||||||
|
gpuFailureDetails[componentID] = strings.TrimSpace(detail)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse summary.csv (alternative format)
|
// Parse summary.csv (alternative format)
|
||||||
@@ -153,10 +180,21 @@ func (p *Parser) Parse(files []parser.ExtractedFile) (*models.AnalysisResult, er
|
|||||||
for componentID, status := range CollectGPUStatusesFromSummaryCSV(f.Content) {
|
for componentID, status := range CollectGPUStatusesFromSummaryCSV(f.Content) {
|
||||||
gpuStatuses[componentID] = mergeGPUStatus(gpuStatuses[componentID], status)
|
gpuStatuses[componentID] = mergeGPUStatus(gpuStatuses[componentID], status)
|
||||||
}
|
}
|
||||||
|
for slot, status := range CollectNVSwitchStatusesFromSummaryCSV(f.Content) {
|
||||||
|
nvswitchStatuses[slot] = mergeGPUStatus(nvswitchStatuses[slot], status)
|
||||||
|
}
|
||||||
|
for componentID, detail := range CollectGPUFailureDetailsFromSummaryCSV(f.Content) {
|
||||||
|
if _, exists := gpuFailureDetails[componentID]; !exists && strings.TrimSpace(detail) != "" {
|
||||||
|
gpuFailureDetails[componentID] = strings.TrimSpace(detail)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply per-GPU PASS/FAIL status derived from summary files.
|
// Apply per-GPU PASS/FAIL status derived from summary files.
|
||||||
ApplyGPUStatuses(result, gpuStatuses)
|
ApplyGPUStatuses(result, gpuStatuses)
|
||||||
|
ApplyGPUFailureDetails(result, gpuFailureDetails)
|
||||||
|
ApplyNVSwitchStatuses(result, nvswitchStatuses)
|
||||||
|
ApplyGPUAndNVSwitchCheckTimes(result, CollectGPUAndNVSwitchCheckTimes(files))
|
||||||
|
|
||||||
// Parse GPU field diagnostics logs
|
// Parse GPU field diagnostics logs
|
||||||
gpuFieldiagFiles := parser.FindFileByPattern(files, "gpu_fieldiag/", ".log")
|
gpuFieldiagFiles := parser.FindFileByPattern(files, "gpu_fieldiag/", ".log")
|
||||||
|
|||||||
99
internal/parser/vendors/nvidia/parser_test.go
vendored
99
internal/parser/vendors/nvidia/parser_test.go
vendored
@@ -4,6 +4,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||||
)
|
)
|
||||||
@@ -146,6 +147,39 @@ func TestNVIDIAParser_GPUStatusFromSummary_RealArchive07900(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNVIDIAParser_GPUErrorDetailsFromSummary_RealArchive07900(t *testing.T) {
|
||||||
|
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
||||||
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Test archive not found, skipping test")
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := parser.ExtractArchive(archivePath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to extract archive: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
p := &Parser{}
|
||||||
|
result, err := p.Parse(files)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse archive: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
|
||||||
|
t.Fatalf("expected GPUs in parsed result")
|
||||||
|
}
|
||||||
|
|
||||||
|
errBySerial := make(map[string]string, len(result.Hardware.GPUs))
|
||||||
|
for _, gpu := range result.Hardware.GPUs {
|
||||||
|
if gpu.SerialNumber != "" {
|
||||||
|
errBySerial[gpu.SerialNumber] = gpu.ErrorDescription
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if got := errBySerial["1653925025497"]; got != "Row remapping failed" {
|
||||||
|
t.Fatalf("expected GPU serial 1653925025497 error Row remapping failed, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNVIDIAParser_GPUModelFromSKU_RealArchive07900(t *testing.T) {
|
func TestNVIDIAParser_GPUModelFromSKU_RealArchive07900(t *testing.T) {
|
||||||
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
||||||
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||||
@@ -169,14 +203,75 @@ func TestNVIDIAParser_GPUModelFromSKU_RealArchive07900(t *testing.T) {
|
|||||||
|
|
||||||
found := false
|
found := false
|
||||||
for _, gpu := range result.Hardware.GPUs {
|
for _, gpu := range result.Hardware.GPUs {
|
||||||
if gpu.Model == "NVIDIA H200 SXM" {
|
if gpu.Model == "692-2G520-0280-501" && gpu.Description == "hgx h200 8 gpu 141g aircooled" {
|
||||||
found = true
|
found = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !found {
|
if !found {
|
||||||
t.Fatalf("expected at least one GPU model NVIDIA H200 SXM")
|
t.Fatalf("expected at least one GPU with model 692-2G520-0280-501 and description hgx h200 8 gpu 141g aircooled")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNVIDIAParser_ComponentCheckTimes_RealArchive07900(t *testing.T) {
|
||||||
|
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
||||||
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Test archive not found, skipping test")
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := parser.ExtractArchive(archivePath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to extract archive: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
p := &Parser{}
|
||||||
|
result, err := p.Parse(files)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse archive: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Hardware == nil {
|
||||||
|
t.Fatalf("expected hardware in parsed result")
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedGPU := time.Date(2026, 1, 22, 9, 45, 36, 0, time.UTC)
|
||||||
|
expectedNVSwitch := time.Date(2026, 1, 22, 9, 11, 32, 0, time.UTC)
|
||||||
|
|
||||||
|
if len(result.Hardware.GPUs) == 0 {
|
||||||
|
t.Fatalf("expected GPUs in parsed result")
|
||||||
|
}
|
||||||
|
for _, gpu := range result.Hardware.GPUs {
|
||||||
|
if !gpu.StatusCheckedAt.Equal(expectedGPU) {
|
||||||
|
t.Fatalf("expected GPU %s status_checked_at %s, got %s", gpu.Slot, expectedGPU.Format(time.RFC3339), gpu.StatusCheckedAt.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if gpu.StatusAtCollect == nil || !gpu.StatusAtCollect.At.Equal(expectedGPU) {
|
||||||
|
t.Fatalf("expected GPU %s status_at_collection.at %s", gpu.Slot, expectedGPU.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nvsCount := 0
|
||||||
|
for _, dev := range result.Hardware.PCIeDevices {
|
||||||
|
slot := normalizeNVSwitchSlot(dev.Slot)
|
||||||
|
if slot == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if dev.DeviceClass != "NVSwitch" && len(slot) < len("NVSWITCH") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if dev.DeviceClass != "NVSwitch" && slot[:len("NVSWITCH")] != "NVSWITCH" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
nvsCount++
|
||||||
|
if !dev.StatusCheckedAt.Equal(expectedNVSwitch) {
|
||||||
|
t.Fatalf("expected NVSwitch %s status_checked_at %s, got %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339), dev.StatusCheckedAt.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
if dev.StatusAtCollect == nil || !dev.StatusAtCollect.At.Equal(expectedNVSwitch) {
|
||||||
|
t.Fatalf("expected NVSwitch %s status_at_collection.at %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if nvsCount == 0 {
|
||||||
|
t.Fatalf("expected NVSwitch devices in parsed result")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
217
internal/parser/vendors/nvidia/summary.go
vendored
217
internal/parser/vendors/nvidia/summary.go
vendored
@@ -22,6 +22,7 @@ type SummaryEntry struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var gpuComponentIDRegex = regexp.MustCompile(`^SXM(\d+)_SN_(.+)$`)
|
var gpuComponentIDRegex = regexp.MustCompile(`^SXM(\d+)_SN_(.+)$`)
|
||||||
|
var nvswitchInventoryComponentRegex = regexp.MustCompile(`^NVSWITCH_(NVSWITCH\d+)_`)
|
||||||
|
|
||||||
// ParseSummaryJSON parses summary.json file and returns events
|
// ParseSummaryJSON parses summary.json file and returns events
|
||||||
func ParseSummaryJSON(content []byte) []models.Event {
|
func ParseSummaryJSON(content []byte) []models.Event {
|
||||||
@@ -121,6 +122,41 @@ func CollectGPUStatusesFromSummaryJSON(content []byte) map[string]string {
|
|||||||
return statuses
|
return statuses
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CollectGPUFailureDetailsFromSummaryJSON extracts per-GPU failure details from summary.json.
|
||||||
|
// Key format in returned map is component ID from summary (e.g. "SXM5_SN_1653925025497").
|
||||||
|
func CollectGPUFailureDetailsFromSummaryJSON(content []byte) map[string]string {
|
||||||
|
var entries []SummaryEntry
|
||||||
|
if err := json.Unmarshal(content, &entries); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
details := make(map[string]string)
|
||||||
|
for _, entry := range entries {
|
||||||
|
component := strings.TrimSpace(entry.ComponentID)
|
||||||
|
if component == "" || !gpuComponentIDRegex.MatchString(component) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if isSummaryJSONRecordPassing(entry.ErrorCode, entry.Notes) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
note := strings.TrimSpace(entry.Notes)
|
||||||
|
if note == "" || strings.EqualFold(note, "OK") {
|
||||||
|
note = strings.TrimSpace(entry.ErrorCode)
|
||||||
|
}
|
||||||
|
if note == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep first non-empty detail to avoid noisy overrides.
|
||||||
|
if _, exists := details[component]; !exists {
|
||||||
|
details[component] = note
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return details
|
||||||
|
}
|
||||||
|
|
||||||
// CollectGPUStatusesFromSummaryCSV extracts per-GPU PASS/FAIL status from summary.csv.
|
// CollectGPUStatusesFromSummaryCSV extracts per-GPU PASS/FAIL status from summary.csv.
|
||||||
// Key format in returned map is component ID from summary (e.g. "SXM5_SN_1653925025497").
|
// Key format in returned map is component ID from summary (e.g. "SXM5_SN_1653925025497").
|
||||||
func CollectGPUStatusesFromSummaryCSV(content []byte) map[string]string {
|
func CollectGPUStatusesFromSummaryCSV(content []byte) map[string]string {
|
||||||
@@ -155,6 +191,120 @@ func CollectGPUStatusesFromSummaryCSV(content []byte) map[string]string {
|
|||||||
return statuses
|
return statuses
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CollectNVSwitchStatusesFromSummaryJSON extracts per-NVSwitch PASS/FAIL status from summary.json.
|
||||||
|
// Key format in returned map is normalized switch slot (e.g. "NVSWITCH0").
|
||||||
|
func CollectNVSwitchStatusesFromSummaryJSON(content []byte) map[string]string {
|
||||||
|
var entries []SummaryEntry
|
||||||
|
if err := json.Unmarshal(content, &entries); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
statuses := make(map[string]string)
|
||||||
|
for _, entry := range entries {
|
||||||
|
component := strings.TrimSpace(entry.ComponentID)
|
||||||
|
matches := nvswitchInventoryComponentRegex.FindStringSubmatch(component)
|
||||||
|
if len(matches) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
slot := strings.TrimSpace(matches[1])
|
||||||
|
if slot == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
current := statuses[slot]
|
||||||
|
next := "PASS"
|
||||||
|
if !isSummaryJSONRecordPassing(entry.ErrorCode, entry.Notes) {
|
||||||
|
next = "FAIL"
|
||||||
|
}
|
||||||
|
statuses[slot] = mergeGPUStatus(current, next)
|
||||||
|
}
|
||||||
|
|
||||||
|
return statuses
|
||||||
|
}
|
||||||
|
|
||||||
|
// CollectNVSwitchStatusesFromSummaryCSV extracts per-NVSwitch PASS/FAIL status from summary.csv.
|
||||||
|
// Key format in returned map is normalized switch slot (e.g. "NVSWITCH0").
|
||||||
|
func CollectNVSwitchStatusesFromSummaryCSV(content []byte) map[string]string {
|
||||||
|
reader := csv.NewReader(strings.NewReader(string(content)))
|
||||||
|
records, err := reader.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
statuses := make(map[string]string)
|
||||||
|
for i, record := range records {
|
||||||
|
if i == 0 || len(record) < 7 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
component := strings.TrimSpace(record[5])
|
||||||
|
matches := nvswitchInventoryComponentRegex.FindStringSubmatch(component)
|
||||||
|
if len(matches) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
slot := strings.TrimSpace(matches[1])
|
||||||
|
if slot == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
errorCode := strings.TrimSpace(record[0])
|
||||||
|
notes := strings.TrimSpace(record[6])
|
||||||
|
|
||||||
|
current := statuses[slot]
|
||||||
|
next := "PASS"
|
||||||
|
if !isSummaryCSVRecordPassing(errorCode, notes) {
|
||||||
|
next = "FAIL"
|
||||||
|
}
|
||||||
|
statuses[slot] = mergeGPUStatus(current, next)
|
||||||
|
}
|
||||||
|
|
||||||
|
return statuses
|
||||||
|
}
|
||||||
|
|
||||||
|
// CollectGPUFailureDetailsFromSummaryCSV extracts per-GPU failure details from summary.csv.
|
||||||
|
// Key format in returned map is component ID from summary (e.g. "SXM5_SN_1653925025497").
|
||||||
|
func CollectGPUFailureDetailsFromSummaryCSV(content []byte) map[string]string {
|
||||||
|
reader := csv.NewReader(strings.NewReader(string(content)))
|
||||||
|
records, err := reader.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
details := make(map[string]string)
|
||||||
|
for i, record := range records {
|
||||||
|
if i == 0 || len(record) < 7 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
component := strings.TrimSpace(record[5])
|
||||||
|
if component == "" || !gpuComponentIDRegex.MatchString(component) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
errorCode := strings.TrimSpace(record[0])
|
||||||
|
notes := strings.TrimSpace(record[6])
|
||||||
|
if isSummaryCSVRecordPassing(errorCode, notes) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
note := notes
|
||||||
|
if note == "" || strings.EqualFold(note, "OK") {
|
||||||
|
note = errorCode
|
||||||
|
}
|
||||||
|
if note == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, exists := details[component]; !exists {
|
||||||
|
details[component] = note
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return details
|
||||||
|
}
|
||||||
|
|
||||||
func isSummaryJSONRecordPassing(errorCode, notes string) bool {
|
func isSummaryJSONRecordPassing(errorCode, notes string) bool {
|
||||||
_ = errorCode
|
_ = errorCode
|
||||||
return strings.TrimSpace(notes) == "OK"
|
return strings.TrimSpace(notes) == "OK"
|
||||||
@@ -213,6 +363,73 @@ func ApplyGPUStatuses(result *models.AnalysisResult, componentStatuses map[strin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ApplyNVSwitchStatuses applies aggregated PASS/FAIL statuses from summary components to parsed NVSwitch devices.
|
||||||
|
func ApplyNVSwitchStatuses(result *models.AnalysisResult, switchStatuses map[string]string) {
|
||||||
|
if result == nil || result.Hardware == nil || len(result.Hardware.PCIeDevices) == 0 || len(switchStatuses) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range result.Hardware.PCIeDevices {
|
||||||
|
dev := &result.Hardware.PCIeDevices[i]
|
||||||
|
slot := normalizeNVSwitchSlot(strings.TrimSpace(dev.Slot))
|
||||||
|
if slot == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(strings.ToUpper(slot), "NVSWITCH") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if st := switchStatuses[slot]; st != "" {
|
||||||
|
dev.Status = st
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApplyGPUFailureDetails maps parsed failure details from summary components to GPUs.
|
||||||
|
func ApplyGPUFailureDetails(result *models.AnalysisResult, componentDetails map[string]string) {
|
||||||
|
if result == nil || result.Hardware == nil || len(result.Hardware.GPUs) == 0 || len(componentDetails) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
slotDetails := make(map[string]string) // key: GPUSXM<idx>
|
||||||
|
serialDetails := make(map[string]string) // key: GPU serial
|
||||||
|
|
||||||
|
for componentID, detail := range componentDetails {
|
||||||
|
matches := gpuComponentIDRegex.FindStringSubmatch(strings.TrimSpace(componentID))
|
||||||
|
if len(matches) != 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
detail = strings.TrimSpace(detail)
|
||||||
|
if detail == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
slotKey := "GPUSXM" + matches[1]
|
||||||
|
serialKey := strings.TrimSpace(matches[2])
|
||||||
|
if _, exists := slotDetails[slotKey]; !exists {
|
||||||
|
slotDetails[slotKey] = detail
|
||||||
|
}
|
||||||
|
if serialKey != "" {
|
||||||
|
if _, exists := serialDetails[serialKey]; !exists {
|
||||||
|
serialDetails[serialKey] = detail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range result.Hardware.GPUs {
|
||||||
|
gpu := &result.Hardware.GPUs[i]
|
||||||
|
detail := ""
|
||||||
|
if serial := strings.TrimSpace(gpu.SerialNumber); serial != "" {
|
||||||
|
detail = serialDetails[serial]
|
||||||
|
}
|
||||||
|
if detail == "" {
|
||||||
|
detail = slotDetails[strings.TrimSpace(gpu.Slot)]
|
||||||
|
}
|
||||||
|
if detail != "" {
|
||||||
|
gpu.ErrorDescription = detail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// formatSummaryDescription creates a human-readable description from summary entry
|
// formatSummaryDescription creates a human-readable description from summary entry
|
||||||
func formatSummaryDescription(entry SummaryEntry) string {
|
func formatSummaryDescription(entry SummaryEntry) string {
|
||||||
component := entry.ComponentID
|
component := entry.ComponentID
|
||||||
|
|||||||
@@ -44,3 +44,79 @@ func TestApplyGPUStatuses_FromSummaryCSV_FailAndPass(t *testing.T) {
|
|||||||
t.Fatalf("expected serial 222 status PASS, got %q", bySerial["222"])
|
t.Fatalf("expected serial 222 status PASS, got %q", bySerial["222"])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestApplyGPUFailureDetails_FromSummaryJSON_BySerial(t *testing.T) {
|
||||||
|
jsonData := []byte(`[
|
||||||
|
{
|
||||||
|
"Error Code": "005-000-1-000000000363",
|
||||||
|
"Test": "gpumem",
|
||||||
|
"Component ID": "SXM5_SN_1653925025497",
|
||||||
|
"Notes": "Row remapping failed",
|
||||||
|
"Virtual ID": "gpumem",
|
||||||
|
"Ignore Error": "False"
|
||||||
|
}
|
||||||
|
]`)
|
||||||
|
|
||||||
|
result := &models.AnalysisResult{
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
GPUs: []models.GPU{
|
||||||
|
{Slot: "GPUSXM5", SerialNumber: "1653925025497"},
|
||||||
|
{Slot: "GPUSXM2", SerialNumber: "1653925024190"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
details := CollectGPUFailureDetailsFromSummaryJSON(jsonData)
|
||||||
|
ApplyGPUFailureDetails(result, details)
|
||||||
|
|
||||||
|
if got := result.Hardware.GPUs[0].ErrorDescription; got != "Row remapping failed" {
|
||||||
|
t.Fatalf("expected serial 1653925025497 error Row remapping failed, got %q", got)
|
||||||
|
}
|
||||||
|
if got := result.Hardware.GPUs[1].ErrorDescription; got != "" {
|
||||||
|
t.Fatalf("expected no error description for healthy GPU, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyNVSwitchStatuses_FromSummaryJSON(t *testing.T) {
|
||||||
|
jsonData := []byte(`[
|
||||||
|
{
|
||||||
|
"Error Code": "0",
|
||||||
|
"Test": "inventory",
|
||||||
|
"Component ID": "NVSWITCH_NVSWITCH0_VendorID",
|
||||||
|
"Notes": "OK",
|
||||||
|
"Virtual ID": "inventory",
|
||||||
|
"Ignore Error": "False"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Error Code": "1",
|
||||||
|
"Test": "inventory",
|
||||||
|
"Component ID": "NVSWITCH_NVSWITCH1_LinkState",
|
||||||
|
"Notes": "Link down",
|
||||||
|
"Virtual ID": "inventory",
|
||||||
|
"Ignore Error": "False"
|
||||||
|
}
|
||||||
|
]`)
|
||||||
|
|
||||||
|
result := &models.AnalysisResult{
|
||||||
|
Hardware: &models.HardwareConfig{
|
||||||
|
PCIeDevices: []models.PCIeDevice{
|
||||||
|
{Slot: "NVSWITCH0", Status: "Unknown"},
|
||||||
|
{Slot: "NVSWITCH1", Status: "Unknown"},
|
||||||
|
{Slot: "NVSWITCH2", Status: "Unknown"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
statuses := CollectNVSwitchStatusesFromSummaryJSON(jsonData)
|
||||||
|
ApplyNVSwitchStatuses(result, statuses)
|
||||||
|
|
||||||
|
if got := result.Hardware.PCIeDevices[0].Status; got != "PASS" {
|
||||||
|
t.Fatalf("expected NVSWITCH0 status PASS, got %q", got)
|
||||||
|
}
|
||||||
|
if got := result.Hardware.PCIeDevices[1].Status; got != "FAIL" {
|
||||||
|
t.Fatalf("expected NVSWITCH1 status FAIL, got %q", got)
|
||||||
|
}
|
||||||
|
if got := result.Hardware.PCIeDevices[2].Status; got != "Unknown" {
|
||||||
|
t.Fatalf("expected NVSWITCH2 status unchanged Unknown, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -186,6 +186,9 @@ func parseGPUComponent(comp Component) *models.GPU {
|
|||||||
switch prop.ID {
|
switch prop.ID {
|
||||||
case "DeviceID":
|
case "DeviceID":
|
||||||
deviceID = prop.GetValueAsString()
|
deviceID = prop.GetValueAsString()
|
||||||
|
if deviceID != "" {
|
||||||
|
fmt.Sscanf(deviceID, "%x", &gpu.DeviceID)
|
||||||
|
}
|
||||||
case "Vendor":
|
case "Vendor":
|
||||||
gpu.Manufacturer = prop.GetValueAsString()
|
gpu.Manufacturer = prop.GetValueAsString()
|
||||||
case "DeviceName":
|
case "DeviceName":
|
||||||
|
|||||||
@@ -410,8 +410,12 @@ func (s *Server) handleGetSerials(w http.ResponseWriter, r *http.Request) {
|
|||||||
if !hasUsableSerial(pcie.SerialNumber) {
|
if !hasUsableSerial(pcie.SerialNumber) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
component := pcie.DeviceClass
|
||||||
|
if strings.EqualFold(strings.TrimSpace(pcie.DeviceClass), "NVSwitch") && strings.TrimSpace(pcie.PartNumber) != "" {
|
||||||
|
component = strings.TrimSpace(pcie.PartNumber)
|
||||||
|
}
|
||||||
serials = append(serials, SerialEntry{
|
serials = append(serials, SerialEntry{
|
||||||
Component: pcie.DeviceClass,
|
Component: component,
|
||||||
Location: pcie.Slot,
|
Location: pcie.Slot,
|
||||||
SerialNumber: strings.TrimSpace(pcie.SerialNumber),
|
SerialNumber: strings.TrimSpace(pcie.SerialNumber),
|
||||||
Manufacturer: pcie.Manufacturer,
|
Manufacturer: pcie.Manufacturer,
|
||||||
@@ -526,6 +530,36 @@ func extractFirmwareComponentAndModel(deviceName string) (component, model strin
|
|||||||
return "NIC", "-"
|
return "NIC", "-"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For "GPU GPUSXM5 (692-2G520-0280-501)" -> component: "GPU", model: "GPUSXM5 (692-2G520-0280-501)"
|
||||||
|
if strings.HasPrefix(deviceName, "GPU ") {
|
||||||
|
if idx := strings.Index(deviceName, "("); idx != -1 {
|
||||||
|
model = strings.TrimSpace(strings.Trim(deviceName[idx:], "()"))
|
||||||
|
if model != "" {
|
||||||
|
return "GPU", model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
model = strings.TrimSpace(strings.TrimPrefix(deviceName, "GPU "))
|
||||||
|
if model == "" {
|
||||||
|
return "GPU", "-"
|
||||||
|
}
|
||||||
|
return "GPU", model
|
||||||
|
}
|
||||||
|
|
||||||
|
// For "NVSwitch NVSWITCH2 (NVSWITCH2)" -> component: "NVSwitch", model: "NVSWITCH2 (NVSWITCH2)"
|
||||||
|
if strings.HasPrefix(deviceName, "NVSwitch ") {
|
||||||
|
if idx := strings.Index(deviceName, "("); idx != -1 {
|
||||||
|
model = strings.TrimSpace(strings.Trim(deviceName[idx:], "()"))
|
||||||
|
if model != "" {
|
||||||
|
return "NVSwitch", model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
model = strings.TrimSpace(strings.TrimPrefix(deviceName, "NVSwitch "))
|
||||||
|
if model == "" {
|
||||||
|
return "NVSwitch", "-"
|
||||||
|
}
|
||||||
|
return "NVSwitch", model
|
||||||
|
}
|
||||||
|
|
||||||
// For "HDD Samsung MZ7L33T8HBNA-00A07" -> component: "HDD", model: "Samsung MZ7L33T8HBNA-00A07"
|
// For "HDD Samsung MZ7L33T8HBNA-00A07" -> component: "HDD", model: "Samsung MZ7L33T8HBNA-00A07"
|
||||||
if strings.HasPrefix(deviceName, "HDD ") {
|
if strings.HasPrefix(deviceName, "HDD ") {
|
||||||
return "HDD", strings.TrimPrefix(deviceName, "HDD ")
|
return "HDD", strings.TrimPrefix(deviceName, "HDD ")
|
||||||
|
|||||||
23
internal/server/handlers_firmware_test.go
Normal file
23
internal/server/handlers_firmware_test.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestExtractFirmwareComponentAndModel_GPUUsesPartNumberFromParentheses(t *testing.T) {
|
||||||
|
component, model := extractFirmwareComponentAndModel("GPU GPUSXM3 (692-2G520-0280-501)")
|
||||||
|
if component != "GPU" {
|
||||||
|
t.Fatalf("expected component GPU, got %q", component)
|
||||||
|
}
|
||||||
|
if model != "692-2G520-0280-501" {
|
||||||
|
t.Fatalf("expected GPU model 692-2G520-0280-501, got %q", model)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractFirmwareComponentAndModel_GPUFallbackWithoutParentheses(t *testing.T) {
|
||||||
|
component, model := extractFirmwareComponentAndModel("GPU 692-2G520-0280-501")
|
||||||
|
if component != "GPU" {
|
||||||
|
t.Fatalf("expected component GPU, got %q", component)
|
||||||
|
}
|
||||||
|
if model != "692-2G520-0280-501" {
|
||||||
|
t.Fatalf("expected GPU model 692-2G520-0280-501, got %q", model)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -903,9 +903,11 @@ function renderConfig(data) {
|
|||||||
|
|
||||||
// PCIe Device Inventory tab
|
// PCIe Device Inventory tab
|
||||||
html += '<div class="config-tab-content" id="config-pcie">';
|
html += '<div class="config-tab-content" id="config-pcie">';
|
||||||
if (config.pcie_devices && config.pcie_devices.length > 0) {
|
const hasPCIe = config.pcie_devices && config.pcie_devices.length > 0;
|
||||||
html += '<h3>PCIe устройства</h3><table class="config-table"><thead><tr><th>Слот</th><th>BDF</th><th>Тип</th><th>Производитель</th><th>Vendor:Device ID</th><th>PCIe Link</th></tr></thead><tbody>';
|
const hasGPUs = config.gpus && config.gpus.length > 0;
|
||||||
config.pcie_devices.forEach(p => {
|
if (hasPCIe || hasGPUs) {
|
||||||
|
html += '<h3>PCIe устройства</h3><table class="config-table"><thead><tr><th>Слот</th><th>BDF</th><th>Тип</th><th>Модель</th><th>Производитель</th><th>Vendor:Device ID</th><th>PCIe Link</th></tr></thead><tbody>';
|
||||||
|
(config.pcie_devices || []).forEach(p => {
|
||||||
const pcieLink = formatPCIeLink(
|
const pcieLink = formatPCIeLink(
|
||||||
p.link_width,
|
p.link_width,
|
||||||
p.link_speed,
|
p.link_speed,
|
||||||
@@ -916,11 +918,30 @@ function renderConfig(data) {
|
|||||||
<td>${escapeHtml(p.slot || '-')}</td>
|
<td>${escapeHtml(p.slot || '-')}</td>
|
||||||
<td><code>${escapeHtml(p.bdf || '-')}</code></td>
|
<td><code>${escapeHtml(p.bdf || '-')}</code></td>
|
||||||
<td>${escapeHtml(p.device_class || '-')}</td>
|
<td>${escapeHtml(p.device_class || '-')}</td>
|
||||||
|
<td>${escapeHtml(p.part_number || '-')}</td>
|
||||||
<td>${escapeHtml(p.manufacturer || '-')}</td>
|
<td>${escapeHtml(p.manufacturer || '-')}</td>
|
||||||
<td><code>${p.vendor_id ? p.vendor_id.toString(16) : '-'}:${p.device_id ? p.device_id.toString(16) : '-'}</code></td>
|
<td><code>${p.vendor_id ? p.vendor_id.toString(16) : '-'}:${p.device_id ? p.device_id.toString(16) : '-'}</code></td>
|
||||||
<td>${pcieLink}</td>
|
<td>${pcieLink}</td>
|
||||||
</tr>`;
|
</tr>`;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
(config.gpus || []).forEach(gpu => {
|
||||||
|
const pcieLink = formatPCIeLink(
|
||||||
|
gpu.current_link_width || gpu.link_width,
|
||||||
|
gpu.current_link_speed || gpu.link_speed,
|
||||||
|
gpu.max_link_width,
|
||||||
|
gpu.max_link_speed
|
||||||
|
);
|
||||||
|
html += `<tr>
|
||||||
|
<td>${escapeHtml(gpu.slot || '-')}</td>
|
||||||
|
<td><code>${escapeHtml(gpu.bdf || '-')}</code></td>
|
||||||
|
<td>GPU</td>
|
||||||
|
<td>${escapeHtml(gpu.model || gpu.part_number || '-')}</td>
|
||||||
|
<td>${escapeHtml(gpu.manufacturer || '-')}</td>
|
||||||
|
<td><code>${gpu.vendor_id ? gpu.vendor_id.toString(16) : '-'}:${gpu.device_id ? gpu.device_id.toString(16) : '-'}</code></td>
|
||||||
|
<td>${pcieLink}</td>
|
||||||
|
</tr>`;
|
||||||
|
});
|
||||||
html += '</tbody></table>';
|
html += '</tbody></table>';
|
||||||
} else {
|
} else {
|
||||||
html += '<p class="no-data">Нет данных о PCIe устройствах</p>';
|
html += '<p class="no-data">Нет данных о PCIe устройствах</p>';
|
||||||
|
|||||||
Reference in New Issue
Block a user