Files
logpile/internal/parser/vendors/nvidia/gpu_model.go

179 lines
4.1 KiB
Go

package nvidia
import (
"encoding/json"
"fmt"
"regexp"
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
"git.mchus.pro/mchus/logpile/internal/parser"
)
var (
gpuNameWithSerialRegex = regexp.MustCompile(`^SXM(\d+)_SN_(.+)$`)
gpuNameSlotOnlyRegex = regexp.MustCompile(`^SXM(\d+)$`)
skuModelRegex = regexp.MustCompile(`sku_hgx-([a-z0-9]+)-\d+-gpu`)
skuCodeRegex = regexp.MustCompile(`^(G\d{3})[.-](\d{4})`)
)
type testSpecData struct {
Actions []struct {
VirtualID string `json:"virtual_id"`
Args struct {
SKUToFile map[string]string `json:"sku_to_sku_json_file_map"`
} `json:"args"`
} `json:"actions"`
}
type inventoryFieldDiagSummary struct {
ModsRuns []struct {
ModsHeader []struct {
GPUName string `json:"GpuName"`
BoardInfo string `json:"BoardInfo"`
} `json:"ModsHeader"`
} `json:"ModsRuns"`
}
// ApplyGPUModelsFromSKU updates GPU model names using SKU mapping from testspec.json.
// Mapping source:
// - inventory/fieldiag_summary.json: GPUName -> BoardInfo(SKU)
// - testspec.json: SKU -> sku_hgx-... filename
func ApplyGPUModelsFromSKU(files []parser.ExtractedFile, result *models.AnalysisResult) {
if result == nil || result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
return
}
skuToFile := parseSKUToFileMap(files)
if len(skuToFile) == 0 {
return
}
serialToSKU, slotToSKU := parseGPUSKUMapping(files)
if len(serialToSKU) == 0 && len(slotToSKU) == 0 {
return
}
for i := range result.Hardware.GPUs {
gpu := &result.Hardware.GPUs[i]
sku := ""
if serial := strings.TrimSpace(gpu.SerialNumber); serial != "" {
sku = serialToSKU[serial]
}
if sku == "" {
sku = slotToSKU[strings.TrimSpace(gpu.Slot)]
}
if sku == "" {
continue
}
model := resolveModelFromSKU(sku, skuToFile)
if model == "" {
continue
}
gpu.Model = model
}
}
func parseSKUToFileMap(files []parser.ExtractedFile) map[string]string {
specFile := parser.FindFileByName(files, "testspec.json")
if specFile == nil {
return nil
}
var spec testSpecData
if err := json.Unmarshal(specFile.Content, &spec); err != nil {
return nil
}
result := make(map[string]string)
for _, action := range spec.Actions {
for sku, file := range action.Args.SKUToFile {
normSKU := normalizeSKUCode(sku)
if normSKU == "" {
continue
}
result[normSKU] = strings.TrimSpace(file)
}
}
return result
}
func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[string]string) {
var summaryFile *parser.ExtractedFile
for _, f := range files {
path := strings.ToLower(f.Path)
if strings.Contains(path, "inventory/fieldiag_summary.json") ||
strings.Contains(path, "inventory\\fieldiag_summary.json") {
summaryFile = &f
break
}
}
if summaryFile == nil {
return nil, nil
}
var summary inventoryFieldDiagSummary
if err := json.Unmarshal(summaryFile.Content, &summary); err != nil {
return nil, nil
}
serialToSKU := make(map[string]string)
slotToSKU := make(map[string]string)
for _, run := range summary.ModsRuns {
for _, h := range run.ModsHeader {
sku := normalizeSKUCode(h.BoardInfo)
if sku == "" {
continue
}
gpuName := strings.TrimSpace(h.GPUName)
if matches := gpuNameWithSerialRegex.FindStringSubmatch(gpuName); len(matches) == 3 {
slotToSKU["GPUSXM"+matches[1]] = sku
serialToSKU[strings.TrimSpace(matches[2])] = sku
continue
}
if matches := gpuNameSlotOnlyRegex.FindStringSubmatch(gpuName); len(matches) == 2 {
slotToSKU["GPUSXM"+matches[1]] = sku
}
}
}
return serialToSKU, slotToSKU
}
func resolveModelFromSKU(sku string, skuToFile map[string]string) string {
file := strings.ToLower(strings.TrimSpace(skuToFile[normalizeSKUCode(sku)]))
if file == "" {
return ""
}
m := skuModelRegex.FindStringSubmatch(file)
if len(m) != 2 {
return ""
}
gpuFamily := strings.ToUpper(strings.TrimSpace(m[1]))
if gpuFamily == "" {
return ""
}
return fmt.Sprintf("NVIDIA %s SXM", gpuFamily)
}
func normalizeSKUCode(v string) string {
s := strings.TrimSpace(strings.ToUpper(v))
if s == "" {
return ""
}
if m := skuCodeRegex.FindStringSubmatch(s); len(m) == 3 {
return m[1] + "-" + m[2]
}
return s
}