Fix NVIDIA GPU/NVSwitch parsing and Reanimator export statuses
This commit is contained in:
178
internal/parser/vendors/nvidia/gpu_model.go
vendored
Normal file
178
internal/parser/vendors/nvidia/gpu_model.go
vendored
Normal file
@@ -0,0 +1,178 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||
)
|
||||
|
||||
var (
|
||||
gpuNameWithSerialRegex = regexp.MustCompile(`^SXM(\d+)_SN_(.+)$`)
|
||||
gpuNameSlotOnlyRegex = regexp.MustCompile(`^SXM(\d+)$`)
|
||||
skuModelRegex = regexp.MustCompile(`sku_hgx-([a-z0-9]+)-\d+-gpu`)
|
||||
skuCodeRegex = regexp.MustCompile(`^(G\d{3})[.-](\d{4})`)
|
||||
)
|
||||
|
||||
type testSpecData struct {
|
||||
Actions []struct {
|
||||
VirtualID string `json:"virtual_id"`
|
||||
Args struct {
|
||||
SKUToFile map[string]string `json:"sku_to_sku_json_file_map"`
|
||||
} `json:"args"`
|
||||
} `json:"actions"`
|
||||
}
|
||||
|
||||
type inventoryFieldDiagSummary struct {
|
||||
ModsRuns []struct {
|
||||
ModsHeader []struct {
|
||||
GPUName string `json:"GpuName"`
|
||||
BoardInfo string `json:"BoardInfo"`
|
||||
} `json:"ModsHeader"`
|
||||
} `json:"ModsRuns"`
|
||||
}
|
||||
|
||||
// ApplyGPUModelsFromSKU updates GPU model names using SKU mapping from testspec.json.
|
||||
// Mapping source:
|
||||
// - inventory/fieldiag_summary.json: GPUName -> BoardInfo(SKU)
|
||||
// - testspec.json: SKU -> sku_hgx-... filename
|
||||
func ApplyGPUModelsFromSKU(files []parser.ExtractedFile, result *models.AnalysisResult) {
|
||||
if result == nil || result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
skuToFile := parseSKUToFileMap(files)
|
||||
if len(skuToFile) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
serialToSKU, slotToSKU := parseGPUSKUMapping(files)
|
||||
if len(serialToSKU) == 0 && len(slotToSKU) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for i := range result.Hardware.GPUs {
|
||||
gpu := &result.Hardware.GPUs[i]
|
||||
sku := ""
|
||||
|
||||
if serial := strings.TrimSpace(gpu.SerialNumber); serial != "" {
|
||||
sku = serialToSKU[serial]
|
||||
}
|
||||
if sku == "" {
|
||||
sku = slotToSKU[strings.TrimSpace(gpu.Slot)]
|
||||
}
|
||||
if sku == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
model := resolveModelFromSKU(sku, skuToFile)
|
||||
if model == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
gpu.Model = model
|
||||
}
|
||||
}
|
||||
|
||||
func parseSKUToFileMap(files []parser.ExtractedFile) map[string]string {
|
||||
specFile := parser.FindFileByName(files, "testspec.json")
|
||||
if specFile == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var spec testSpecData
|
||||
if err := json.Unmarshal(specFile.Content, &spec); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make(map[string]string)
|
||||
for _, action := range spec.Actions {
|
||||
for sku, file := range action.Args.SKUToFile {
|
||||
normSKU := normalizeSKUCode(sku)
|
||||
if normSKU == "" {
|
||||
continue
|
||||
}
|
||||
result[normSKU] = strings.TrimSpace(file)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[string]string) {
|
||||
var summaryFile *parser.ExtractedFile
|
||||
for _, f := range files {
|
||||
path := strings.ToLower(f.Path)
|
||||
if strings.Contains(path, "inventory/fieldiag_summary.json") ||
|
||||
strings.Contains(path, "inventory\\fieldiag_summary.json") {
|
||||
summaryFile = &f
|
||||
break
|
||||
}
|
||||
}
|
||||
if summaryFile == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var summary inventoryFieldDiagSummary
|
||||
if err := json.Unmarshal(summaryFile.Content, &summary); err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
serialToSKU := make(map[string]string)
|
||||
slotToSKU := make(map[string]string)
|
||||
|
||||
for _, run := range summary.ModsRuns {
|
||||
for _, h := range run.ModsHeader {
|
||||
sku := normalizeSKUCode(h.BoardInfo)
|
||||
if sku == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
gpuName := strings.TrimSpace(h.GPUName)
|
||||
if matches := gpuNameWithSerialRegex.FindStringSubmatch(gpuName); len(matches) == 3 {
|
||||
slotToSKU["GPUSXM"+matches[1]] = sku
|
||||
serialToSKU[strings.TrimSpace(matches[2])] = sku
|
||||
continue
|
||||
}
|
||||
if matches := gpuNameSlotOnlyRegex.FindStringSubmatch(gpuName); len(matches) == 2 {
|
||||
slotToSKU["GPUSXM"+matches[1]] = sku
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return serialToSKU, slotToSKU
|
||||
}
|
||||
|
||||
func resolveModelFromSKU(sku string, skuToFile map[string]string) string {
|
||||
file := strings.ToLower(strings.TrimSpace(skuToFile[normalizeSKUCode(sku)]))
|
||||
if file == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
m := skuModelRegex.FindStringSubmatch(file)
|
||||
if len(m) != 2 {
|
||||
return ""
|
||||
}
|
||||
|
||||
gpuFamily := strings.ToUpper(strings.TrimSpace(m[1]))
|
||||
if gpuFamily == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return fmt.Sprintf("NVIDIA %s SXM", gpuFamily)
|
||||
}
|
||||
|
||||
func normalizeSKUCode(v string) string {
|
||||
s := strings.TrimSpace(strings.ToUpper(v))
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
if m := skuCodeRegex.FindStringSubmatch(s); len(m) == 3 {
|
||||
return m[1] + "-" + m[2]
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
Reference in New Issue
Block a user