Files
logpile/internal/parser/vendors/nvidia/gpu_model.go

375 lines
11 KiB
Go

package nvidia
import (
"encoding/json"
"regexp"
"strconv"
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
"git.mchus.pro/mchus/logpile/internal/parser"
)
var (
gpuNameWithSerialRegex = regexp.MustCompile(`^SXM(\d+)_SN_(.+)$`)
gpuNameSlotOnlyRegex = regexp.MustCompile(`^SXM(\d+)$`)
skuCodeRegex = regexp.MustCompile(`^(G\d{3})[.-](\d{4})`)
skuCodeInsideRegex = regexp.MustCompile(`(?:^|[^A-Z0-9])(?:\d)?(G\d{3})[.-](\d{4})(?:[^A-Z0-9]|$)`)
inforomPathRegex = regexp.MustCompile(`(?i)(?:^|[\\/])(checkinforom|inforom)[\\/](SXM(\d+))(?:_SN_([^\\/]+))?[\\/]fieldiag\.jso$`)
inforomProductPNRegex = regexp.MustCompile(`"product_part_num"\s*:\s*"([^"]+)"`)
inforomSerialRegex = regexp.MustCompile(`"serial_number"\s*:\s*"([^"]+)"`)
)
type testSpecData struct {
Actions []struct {
VirtualID string `json:"virtual_id"`
Args struct {
SKUToFile map[string]string `json:"sku_to_sku_json_file_map"`
ModsMapping map[string]json.RawMessage `json:"mods_mapping"`
} `json:"args"`
} `json:"actions"`
}
type inventoryFieldDiagSummary struct {
ModsRuns []struct {
ModsHeader []struct {
GPUName string `json:"GpuName"`
BoardInfo string `json:"BoardInfo"`
} `json:"ModsHeader"`
} `json:"ModsRuns"`
}
var hardcodedSKUToFileMap = map[string]string{
"G520-0200": "sku_hgx-h100-8-gpu_80g_aircooled_field.json",
"G520-0201": "sku_hgx-h100-8-gpu_80g_aircooled_field.json",
"G520-0202": "sku_hgx-h100-8-gpu_80g_tpol_field.json",
"G520-0203": "sku_hgx-h100-8-gpu_80g_tpol_field.json",
"G520-0205": "sku_hgx-h800-8-gpu_80g_aircooled_field.json",
"G520-0207": "sku_hgx-h800-8-gpu_80g_tpol_field.json",
"G520-0221": "sku_hgx-h100-8-gpu_96g_aircooled_field.json",
"G520-0236": "sku_hgx-h20-8-gpu_96g_aircooled_field.json",
"G520-0238": "sku_hgx-h20-8-gpu_96g_tpol_field.json",
"G520-0266": "sku_hgx-h20-8-gpu_141g_aircooled_field.json",
"G520-0280": "sku_hgx-h200-8-gpu_141g_aircooled_field.json",
"G520-0282": "sku_hgx-h200-8-gpu_141g_tpol_field.json",
"G520-0292": "sku_hgx-h100-8-gpu_sku_292_field.json",
}
// ApplyGPUModelsFromSKU updates GPU model names using SKU mapping from testspec.json.
// Mapping source:
// - inventory/fieldiag_summary.json: GPUName -> BoardInfo(SKU)
// - hardcoded SKU mapping
// - testspec.json: SKU -> sku_hgx-... filename (fallback for unknown hardcoded SKU)
// - inforom/*/fieldiag.jso: product_part_num (full P/N with embedded SKU)
// - testspec.json gpu_fieldiag.mods_mapping: DeviceID -> GPU generation (last fallback for description)
func ApplyGPUModelsFromSKU(files []parser.ExtractedFile, result *models.AnalysisResult) {
if result == nil || result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
return
}
skuToFile := parseSKUToFileMap(files)
generationByDeviceID := parseGenerationByDeviceID(files)
serialToSKU, slotToSKU, serialToPartNumber, slotToPartNumber := parseGPUSKUMapping(files)
for i := range result.Hardware.GPUs {
gpu := &result.Hardware.GPUs[i]
slot := strings.TrimSpace(gpu.Slot)
serial := strings.TrimSpace(gpu.SerialNumber)
if gpu.PartNumber == "" && serial != "" {
if pn := strings.TrimSpace(serialToPartNumber[serial]); pn != "" {
gpu.PartNumber = pn
}
}
if gpu.PartNumber == "" {
if pn := strings.TrimSpace(slotToPartNumber[slot]); pn != "" {
gpu.PartNumber = pn
}
}
if partNumber := strings.TrimSpace(gpu.PartNumber); partNumber != "" {
gpu.Model = partNumber
}
sku := extractSKUFromPartNumber(gpu.PartNumber)
if sku == "" && serial != "" {
sku = serialToSKU[serial]
}
if sku == "" {
sku = slotToSKU[slot]
}
if sku != "" {
if desc := resolveDescriptionFromSKU(sku, skuToFile); desc != "" {
gpu.Description = desc
continue
}
}
if gen := resolveGenerationDescription(gpu.DeviceID, generationByDeviceID); gen != "" {
gpu.Description = gen
}
}
}
func parseSKUToFileMap(files []parser.ExtractedFile) map[string]string {
result := make(map[string]string, len(hardcodedSKUToFileMap))
for sku, file := range hardcodedSKUToFileMap {
result[normalizeSKUCode(sku)] = strings.TrimSpace(file)
}
specFile := parser.FindFileByName(files, "testspec.json")
if specFile == nil {
return result
}
var spec testSpecData
if err := json.Unmarshal(specFile.Content, &spec); err != nil {
return result
}
for _, action := range spec.Actions {
for sku, file := range action.Args.SKUToFile {
normSKU := normalizeSKUCode(sku)
if normSKU == "" {
continue
}
// Priority: hardcoded mapping wins, testspec extends unknown SKU list.
if _, exists := result[normSKU]; !exists {
result[normSKU] = strings.TrimSpace(file)
}
}
}
return result
}
func parseGenerationByDeviceID(files []parser.ExtractedFile) map[string]string {
specFile := parser.FindFileByName(files, "testspec.json")
if specFile == nil {
return nil
}
var spec testSpecData
if err := json.Unmarshal(specFile.Content, &spec); err != nil {
return nil
}
familyToGeneration := make(map[string]string)
deviceToGeneration := make(map[string]string)
for _, action := range spec.Actions {
if strings.TrimSpace(strings.ToLower(action.VirtualID)) != "gpu_fieldiag" {
continue
}
for key, raw := range action.Args.ModsMapping {
if strings.HasPrefix(key, "#mods.") {
family := strings.TrimSpace(strings.TrimPrefix(key, "#mods."))
if family == "" {
continue
}
var generation string
if err := json.Unmarshal(raw, &generation); err == nil {
generation = strings.TrimSpace(generation)
if generation != "" {
familyToGeneration[family] = generation
}
}
}
}
for key, raw := range action.Args.ModsMapping {
family := strings.TrimSpace(key)
if family == "" || strings.HasPrefix(family, "#") {
continue
}
generation := strings.TrimSpace(familyToGeneration[family])
if generation == "" {
continue
}
var deviceIDs []string
if err := json.Unmarshal(raw, &deviceIDs); err != nil {
continue
}
for _, id := range deviceIDs {
norm := normalizeDeviceIDHex(id)
if norm != "" {
deviceToGeneration[norm] = generation
}
}
}
}
return deviceToGeneration
}
func parseGPUSKUMapping(files []parser.ExtractedFile) (map[string]string, map[string]string, map[string]string, map[string]string) {
serialToSKU := make(map[string]string)
slotToSKU := make(map[string]string)
serialToPartNumber := make(map[string]string)
slotToPartNumber := make(map[string]string)
// 1) inventory/fieldiag_summary.json mapping (GPUName/BoardInfo).
var summaryFile *parser.ExtractedFile
for _, f := range files {
path := strings.ToLower(f.Path)
if strings.Contains(path, "inventory/fieldiag_summary.json") ||
strings.Contains(path, "inventory\\fieldiag_summary.json") {
summaryFile = &f
break
}
}
if summaryFile == nil {
// Continue: inforom may still contain usable part numbers.
} else {
var summaries []inventoryFieldDiagSummary
if err := json.Unmarshal(summaryFile.Content, &summaries); err == nil {
for _, summary := range summaries {
addSummaryMapping(summary, serialToSKU, slotToSKU)
}
} else {
var summary inventoryFieldDiagSummary
if err := json.Unmarshal(summaryFile.Content, &summary); err == nil {
addSummaryMapping(summary, serialToSKU, slotToSKU)
}
}
}
// 2) inforom/checkinforom fieldiag.jso mapping (full product_part_num).
for _, f := range files {
path := strings.TrimSpace(f.Path)
m := inforomPathRegex.FindStringSubmatch(path)
if len(m) == 0 {
continue
}
slot := "GPU" + strings.ToUpper(strings.TrimSpace(m[2])) // SXM7 -> GPUSXM7
serialFromPath := strings.TrimSpace(m[4])
productPNMatch := inforomProductPNRegex.FindSubmatch(f.Content)
if len(productPNMatch) == 2 {
partNumber := strings.TrimSpace(string(productPNMatch[1]))
if partNumber != "" {
slotToPartNumber[slot] = partNumber
if serialFromPath != "" {
serialToPartNumber[serialFromPath] = partNumber
}
if sku := extractSKUFromPartNumber(partNumber); sku != "" {
slotToSKU[slot] = sku
if serialFromPath != "" {
serialToSKU[serialFromPath] = sku
}
}
}
}
serialMatch := inforomSerialRegex.FindSubmatch(f.Content)
if len(serialMatch) == 2 {
serial := strings.TrimSpace(string(serialMatch[1]))
if serial != "" {
if sku := slotToSKU[slot]; sku != "" {
serialToSKU[serial] = sku
}
if pn := slotToPartNumber[slot]; pn != "" {
serialToPartNumber[serial] = pn
}
}
}
}
return serialToSKU, slotToSKU, serialToPartNumber, slotToPartNumber
}
func addSummaryMapping(summary inventoryFieldDiagSummary, serialToSKU map[string]string, slotToSKU map[string]string) {
for _, run := range summary.ModsRuns {
for _, h := range run.ModsHeader {
sku := normalizeSKUCode(h.BoardInfo)
if sku == "" {
continue
}
gpuName := strings.TrimSpace(h.GPUName)
if matches := gpuNameWithSerialRegex.FindStringSubmatch(gpuName); len(matches) == 3 {
slotToSKU["GPUSXM"+matches[1]] = sku
serialToSKU[strings.TrimSpace(matches[2])] = sku
continue
}
if matches := gpuNameSlotOnlyRegex.FindStringSubmatch(gpuName); len(matches) == 2 {
slotToSKU["GPUSXM"+matches[1]] = sku
}
}
}
}
func resolveDescriptionFromSKU(sku string, skuToFile map[string]string) string {
file := strings.ToLower(strings.TrimSpace(skuToFile[normalizeSKUCode(sku)]))
if file == "" {
return ""
}
return skuFilenameToDescription(file)
}
func normalizeSKUCode(v string) string {
s := strings.TrimSpace(strings.ToUpper(v))
if s == "" {
return ""
}
if m := skuCodeRegex.FindStringSubmatch(s); len(m) == 3 {
return m[1] + "-" + m[2]
}
return s
}
func extractSKUFromPartNumber(partNumber string) string {
s := strings.TrimSpace(strings.ToUpper(partNumber))
if s == "" {
return ""
}
if m := skuCodeInsideRegex.FindStringSubmatch(s); len(m) == 3 {
return m[1] + "-" + m[2]
}
return ""
}
func skuFilenameToDescription(file string) string {
s := strings.TrimSpace(strings.ToLower(file))
if s == "" {
return ""
}
s = strings.TrimSuffix(s, ".json")
s = strings.TrimSuffix(s, "_field")
s = strings.TrimPrefix(s, "sku_")
s = strings.ReplaceAll(s, "-", " ")
s = strings.ReplaceAll(s, "_", " ")
s = strings.Join(strings.Fields(s), " ")
return strings.TrimSpace(s)
}
func resolveGenerationDescription(deviceID int, deviceToGeneration map[string]string) string {
if deviceID <= 0 || len(deviceToGeneration) == 0 {
return ""
}
return strings.TrimSpace(deviceToGeneration[normalizeDeviceIDHex(strconv.FormatInt(int64(deviceID), 16))])
}
func normalizeDeviceIDHex(v string) string {
s := strings.TrimSpace(strings.ToLower(v))
s = strings.TrimPrefix(s, "0x")
if s == "" {
return ""
}
n, err := strconv.ParseUint(s, 16, 32)
if err != nil {
return ""
}
return "0x" + strings.ToLower(strconv.FormatUint(n, 16))
}