nvidia: improve component mapping, firmware, statuses and check times
This commit is contained in:
292
internal/parser/vendors/nvidia/gpu_model.go
vendored
292
internal/parser/vendors/nvidia/gpu_model.go
vendored
@@ -2,8 +2,8 @@ package nvidia
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
@@ -13,15 +13,19 @@ import (
|
||||
var (
|
||||
gpuNameWithSerialRegex = regexp.MustCompile(`^SXM(\d+)_SN_(.+)$`)
|
||||
gpuNameSlotOnlyRegex = regexp.MustCompile(`^SXM(\d+)$`)
|
||||
skuModelRegex = regexp.MustCompile(`sku_hgx-([a-z0-9]+)-\d+-gpu`)
|
||||
skuCodeRegex = regexp.MustCompile(`^(G\d{3})[.-](\d{4})`)
|
||||
skuCodeInsideRegex = regexp.MustCompile(`(?:^|[^A-Z0-9])(?:\d)?(G\d{3})[.-](\d{4})(?:[^A-Z0-9]|$)`)
|
||||
inforomPathRegex = regexp.MustCompile(`(?i)(?:^|[\\/])(checkinforom|inforom)[\\/](SXM(\d+))(?:_SN_([^\\/]+))?[\\/]fieldiag\.jso$`)
|
||||
inforomProductPNRegex = regexp.MustCompile(`"product_part_num"\s*:\s*"([^"]+)"`)
|
||||
inforomSerialRegex = regexp.MustCompile(`"serial_number"\s*:\s*"([^"]+)"`)
|
||||
)
|
||||
|
||||
type testSpecData struct {
|
||||
Actions []struct {
|
||||
VirtualID string `json:"virtual_id"`
|
||||
Args struct {
|
||||
SKUToFile map[string]string `json:"sku_to_sku_json_file_map"`
|
||||
SKUToFile map[string]string `json:"sku_to_sku_json_file_map"`
|
||||
ModsMapping map[string]json.RawMessage `json:"mods_mapping"`
|
||||
} `json:"args"`
|
||||
} `json:"actions"`
|
||||
}
|
||||
@@ -35,49 +39,111 @@ type inventoryFieldDiagSummary struct {
|
||||
} `json:"ModsRuns"`
|
||||
}
|
||||
|
||||
var hardcodedSKUToFileMap = map[string]string{
|
||||
"G520-0200": "sku_hgx-h100-8-gpu_80g_aircooled_field.json",
|
||||
"G520-0201": "sku_hgx-h100-8-gpu_80g_aircooled_field.json",
|
||||
"G520-0202": "sku_hgx-h100-8-gpu_80g_tpol_field.json",
|
||||
"G520-0203": "sku_hgx-h100-8-gpu_80g_tpol_field.json",
|
||||
"G520-0205": "sku_hgx-h800-8-gpu_80g_aircooled_field.json",
|
||||
"G520-0207": "sku_hgx-h800-8-gpu_80g_tpol_field.json",
|
||||
"G520-0221": "sku_hgx-h100-8-gpu_96g_aircooled_field.json",
|
||||
"G520-0236": "sku_hgx-h20-8-gpu_96g_aircooled_field.json",
|
||||
"G520-0238": "sku_hgx-h20-8-gpu_96g_tpol_field.json",
|
||||
"G520-0266": "sku_hgx-h20-8-gpu_141g_aircooled_field.json",
|
||||
"G520-0280": "sku_hgx-h200-8-gpu_141g_aircooled_field.json",
|
||||
"G520-0282": "sku_hgx-h200-8-gpu_141g_tpol_field.json",
|
||||
"G520-0292": "sku_hgx-h100-8-gpu_sku_292_field.json",
|
||||
}
|
||||
|
||||
// ApplyGPUModelsFromSKU updates GPU model names using SKU mapping from testspec.json.
|
||||
// Mapping source:
|
||||
// - inventory/fieldiag_summary.json: GPUName -> BoardInfo(SKU)
|
||||
// - testspec.json: SKU -> sku_hgx-... filename
|
||||
// - hardcoded SKU mapping
|
||||
// - testspec.json: SKU -> sku_hgx-... filename (fallback for unknown hardcoded SKU)
|
||||
// - inforom/*/fieldiag.jso: product_part_num (full P/N with embedded SKU)
|
||||
// - testspec.json gpu_fieldiag.mods_mapping: DeviceID -> GPU generation (last fallback for description)
|
||||
func ApplyGPUModelsFromSKU(files []parser.ExtractedFile, result *models.AnalysisResult) {
|
||||
if result == nil || result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
skuToFile := parseSKUToFileMap(files)
|
||||
if len(skuToFile) == 0 {
|
||||
return
|
||||
}
|
||||
generationByDeviceID := parseGenerationByDeviceID(files)
|
||||
|
||||
serialToSKU, slotToSKU := parseGPUSKUMapping(files)
|
||||
if len(serialToSKU) == 0 && len(slotToSKU) == 0 {
|
||||
return
|
||||
}
|
||||
serialToSKU, slotToSKU, serialToPartNumber, slotToPartNumber := parseGPUSKUMapping(files)
|
||||
|
||||
for i := range result.Hardware.GPUs {
|
||||
gpu := &result.Hardware.GPUs[i]
|
||||
sku := ""
|
||||
slot := strings.TrimSpace(gpu.Slot)
|
||||
serial := strings.TrimSpace(gpu.SerialNumber)
|
||||
|
||||
if serial := strings.TrimSpace(gpu.SerialNumber); serial != "" {
|
||||
if gpu.PartNumber == "" && serial != "" {
|
||||
if pn := strings.TrimSpace(serialToPartNumber[serial]); pn != "" {
|
||||
gpu.PartNumber = pn
|
||||
}
|
||||
}
|
||||
if gpu.PartNumber == "" {
|
||||
if pn := strings.TrimSpace(slotToPartNumber[slot]); pn != "" {
|
||||
gpu.PartNumber = pn
|
||||
}
|
||||
}
|
||||
|
||||
if partNumber := strings.TrimSpace(gpu.PartNumber); partNumber != "" {
|
||||
gpu.Model = partNumber
|
||||
}
|
||||
|
||||
sku := extractSKUFromPartNumber(gpu.PartNumber)
|
||||
if sku == "" && serial != "" {
|
||||
sku = serialToSKU[serial]
|
||||
}
|
||||
if sku == "" {
|
||||
sku = slotToSKU[strings.TrimSpace(gpu.Slot)]
|
||||
sku = slotToSKU[slot]
|
||||
}
|
||||
if sku == "" {
|
||||
continue
|
||||
if sku != "" {
|
||||
if desc := resolveDescriptionFromSKU(sku, skuToFile); desc != "" {
|
||||
gpu.Description = desc
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
model := resolveModelFromSKU(sku, skuToFile)
|
||||
if model == "" {
|
||||
continue
|
||||
if gen := resolveGenerationDescription(gpu.DeviceID, generationByDeviceID); gen != "" {
|
||||
gpu.Description = gen
|
||||
}
|
||||
|
||||
gpu.Model = model
|
||||
}
|
||||
}
|
||||
|
||||
func parseSKUToFileMap(files []parser.ExtractedFile) map[string]string {
|
||||
result := make(map[string]string, len(hardcodedSKUToFileMap))
|
||||
for sku, file := range hardcodedSKUToFileMap {
|
||||
result[normalizeSKUCode(sku)] = strings.TrimSpace(file)
|
||||
}
|
||||
|
||||
specFile := parser.FindFileByName(files, "testspec.json")
|
||||
if specFile == nil {
|
||||
return result
|
||||
}
|
||||
|
||||
var spec testSpecData
|
||||
if err := json.Unmarshal(specFile.Content, &spec); err != nil {
|
||||
return result
|
||||
}
|
||||
|
||||
for _, action := range spec.Actions {
|
||||
for sku, file := range action.Args.SKUToFile {
|
||||
normSKU := normalizeSKUCode(sku)
|
||||
if normSKU == "" {
|
||||
continue
|
||||
}
|
||||
// Priority: hardcoded mapping wins, testspec extends unknown SKU list.
|
||||
if _, exists := result[normSKU]; !exists {
|
||||
result[normSKU] = strings.TrimSpace(file)
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func parseGenerationByDeviceID(files []parser.ExtractedFile) map[string]string {
|
||||
specFile := parser.FindFileByName(files, "testspec.json")
|
||||
if specFile == nil {
|
||||
return nil
|
||||
@@ -88,20 +154,61 @@ func parseSKUToFileMap(files []parser.ExtractedFile) map[string]string {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := make(map[string]string)
|
||||
familyToGeneration := make(map[string]string)
|
||||
deviceToGeneration := make(map[string]string)
|
||||
|
||||
for _, action := range spec.Actions {
|
||||
for sku, file := range action.Args.SKUToFile {
|
||||
normSKU := normalizeSKUCode(sku)
|
||||
if normSKU == "" {
|
||||
if strings.TrimSpace(strings.ToLower(action.VirtualID)) != "gpu_fieldiag" {
|
||||
continue
|
||||
}
|
||||
for key, raw := range action.Args.ModsMapping {
|
||||
if strings.HasPrefix(key, "#mods.") {
|
||||
family := strings.TrimSpace(strings.TrimPrefix(key, "#mods."))
|
||||
if family == "" {
|
||||
continue
|
||||
}
|
||||
var generation string
|
||||
if err := json.Unmarshal(raw, &generation); err == nil {
|
||||
generation = strings.TrimSpace(generation)
|
||||
if generation != "" {
|
||||
familyToGeneration[family] = generation
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for key, raw := range action.Args.ModsMapping {
|
||||
family := strings.TrimSpace(key)
|
||||
if family == "" || strings.HasPrefix(family, "#") {
|
||||
continue
|
||||
}
|
||||
result[normSKU] = strings.TrimSpace(file)
|
||||
generation := strings.TrimSpace(familyToGeneration[family])
|
||||
if generation == "" {
|
||||
continue
|
||||
}
|
||||
var deviceIDs []string
|
||||
if err := json.Unmarshal(raw, &deviceIDs); err != nil {
|
||||
continue
|
||||
}
|
||||
for _, id := range deviceIDs {
|
||||
norm := normalizeDeviceIDHex(id)
|
||||
if norm != "" {
|
||||
deviceToGeneration[norm] = generation
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
|
||||
return deviceToGeneration
|
||||
}
|
||||
|
||||
func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[string]string) {
|
||||
func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[string]string, map[string]string, map[string]string) {
|
||||
serialToSKU := make(map[string]string)
|
||||
slotToSKU := make(map[string]string)
|
||||
serialToPartNumber := make(map[string]string)
|
||||
slotToPartNumber := make(map[string]string)
|
||||
|
||||
// 1) inventory/fieldiag_summary.json mapping (GPUName/BoardInfo).
|
||||
var summaryFile *parser.ExtractedFile
|
||||
for _, f := range files {
|
||||
path := strings.ToLower(f.Path)
|
||||
@@ -112,17 +219,67 @@ func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[st
|
||||
}
|
||||
}
|
||||
if summaryFile == nil {
|
||||
return nil, nil
|
||||
// Continue: inforom may still contain usable part numbers.
|
||||
} else {
|
||||
var summaries []inventoryFieldDiagSummary
|
||||
if err := json.Unmarshal(summaryFile.Content, &summaries); err == nil {
|
||||
for _, summary := range summaries {
|
||||
addSummaryMapping(summary, serialToSKU, slotToSKU)
|
||||
}
|
||||
} else {
|
||||
var summary inventoryFieldDiagSummary
|
||||
if err := json.Unmarshal(summaryFile.Content, &summary); err == nil {
|
||||
addSummaryMapping(summary, serialToSKU, slotToSKU)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var summary inventoryFieldDiagSummary
|
||||
if err := json.Unmarshal(summaryFile.Content, &summary); err != nil {
|
||||
return nil, nil
|
||||
// 2) inforom/checkinforom fieldiag.jso mapping (full product_part_num).
|
||||
for _, f := range files {
|
||||
path := strings.TrimSpace(f.Path)
|
||||
m := inforomPathRegex.FindStringSubmatch(path)
|
||||
if len(m) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
slot := "GPU" + strings.ToUpper(strings.TrimSpace(m[2])) // SXM7 -> GPUSXM7
|
||||
serialFromPath := strings.TrimSpace(m[4])
|
||||
|
||||
productPNMatch := inforomProductPNRegex.FindSubmatch(f.Content)
|
||||
if len(productPNMatch) == 2 {
|
||||
partNumber := strings.TrimSpace(string(productPNMatch[1]))
|
||||
if partNumber != "" {
|
||||
slotToPartNumber[slot] = partNumber
|
||||
if serialFromPath != "" {
|
||||
serialToPartNumber[serialFromPath] = partNumber
|
||||
}
|
||||
if sku := extractSKUFromPartNumber(partNumber); sku != "" {
|
||||
slotToSKU[slot] = sku
|
||||
if serialFromPath != "" {
|
||||
serialToSKU[serialFromPath] = sku
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
serialMatch := inforomSerialRegex.FindSubmatch(f.Content)
|
||||
if len(serialMatch) == 2 {
|
||||
serial := strings.TrimSpace(string(serialMatch[1]))
|
||||
if serial != "" {
|
||||
if sku := slotToSKU[slot]; sku != "" {
|
||||
serialToSKU[serial] = sku
|
||||
}
|
||||
if pn := slotToPartNumber[slot]; pn != "" {
|
||||
serialToPartNumber[serial] = pn
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
serialToSKU := make(map[string]string)
|
||||
slotToSKU := make(map[string]string)
|
||||
return serialToSKU, slotToSKU, serialToPartNumber, slotToPartNumber
|
||||
}
|
||||
|
||||
func addSummaryMapping(summary inventoryFieldDiagSummary, serialToSKU map[string]string, slotToSKU map[string]string) {
|
||||
for _, run := range summary.ModsRuns {
|
||||
for _, h := range run.ModsHeader {
|
||||
sku := normalizeSKUCode(h.BoardInfo)
|
||||
@@ -141,27 +298,15 @@ func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[st
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return serialToSKU, slotToSKU
|
||||
}
|
||||
|
||||
func resolveModelFromSKU(sku string, skuToFile map[string]string) string {
|
||||
func resolveDescriptionFromSKU(sku string, skuToFile map[string]string) string {
|
||||
file := strings.ToLower(strings.TrimSpace(skuToFile[normalizeSKUCode(sku)]))
|
||||
if file == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
m := skuModelRegex.FindStringSubmatch(file)
|
||||
if len(m) != 2 {
|
||||
return ""
|
||||
}
|
||||
|
||||
gpuFamily := strings.ToUpper(strings.TrimSpace(m[1]))
|
||||
if gpuFamily == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return fmt.Sprintf("NVIDIA %s SXM", gpuFamily)
|
||||
return skuFilenameToDescription(file)
|
||||
}
|
||||
|
||||
func normalizeSKUCode(v string) string {
|
||||
@@ -176,3 +321,54 @@ func normalizeSKUCode(v string) string {
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func extractSKUFromPartNumber(partNumber string) string {
|
||||
s := strings.TrimSpace(strings.ToUpper(partNumber))
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
if m := skuCodeInsideRegex.FindStringSubmatch(s); len(m) == 3 {
|
||||
return m[1] + "-" + m[2]
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func skuFilenameToDescription(file string) string {
|
||||
s := strings.TrimSpace(strings.ToLower(file))
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
s = strings.TrimSuffix(s, ".json")
|
||||
s = strings.TrimSuffix(s, "_field")
|
||||
s = strings.TrimPrefix(s, "sku_")
|
||||
s = strings.ReplaceAll(s, "-", " ")
|
||||
s = strings.ReplaceAll(s, "_", " ")
|
||||
s = strings.Join(strings.Fields(s), " ")
|
||||
|
||||
return strings.TrimSpace(s)
|
||||
}
|
||||
|
||||
func resolveGenerationDescription(deviceID int, deviceToGeneration map[string]string) string {
|
||||
if deviceID <= 0 || len(deviceToGeneration) == 0 {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(deviceToGeneration[normalizeDeviceIDHex(strconv.FormatInt(int64(deviceID), 16))])
|
||||
}
|
||||
|
||||
func normalizeDeviceIDHex(v string) string {
|
||||
s := strings.TrimSpace(strings.ToLower(v))
|
||||
s = strings.TrimPrefix(s, "0x")
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
n, err := strconv.ParseUint(s, 16, 32)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
return "0x" + strings.ToLower(strconv.FormatUint(n, 16))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user