Files
logpile/internal/parser/vendors/inspur/hgx_hwinfo.go

361 lines
8.1 KiB
Go

package inspur
import (
"fmt"
"regexp"
"strconv"
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
)
type hgxGPUAssemblyInfo struct {
Model string
Part string
Serial string
}
type hgxGPUFirmwareInfo struct {
Firmware string
InfoROM string
}
type hgxFirmwareInventoryEntry struct {
ID string
Version string
ActiveVersion string
InactiveVersion string
}
// Logical GPU index mapping used by HGX B200 UI ordering.
// Example from real logs/UI:
// GPU0->SXM5, GPU1->SXM7, GPU2->SXM6, GPU3->SXM8, GPU4->SXM2, GPU5->SXM4, GPU6->SXM3, GPU7->SXM1.
var hgxLogicalToSXM = map[int]int{
0: 5,
1: 7,
2: 6,
3: 8,
4: 2,
5: 4,
6: 3,
7: 1,
}
var (
reHGXGPUBlock = regexp.MustCompile(`(?s)/redfish/v1/Chassis/HGX_GPU_SXM_(\d+)/Assembly.*?"Name":\s*"GPU Board Assembly".*?"Model":\s*"([^"]+)".*?"PartNumber":\s*"([^"]+)".*?"SerialNumber":\s*"([^"]+)"`)
reHGXFWBlock = regexp.MustCompile(`(?s)"Id":\s*"HGX_FW_GPU_SXM_(\d+)".*?"Version":\s*"([^"]*)"`)
reHGXInfoROM = regexp.MustCompile(`(?s)"Id":\s*"HGX_InfoROM_GPU_SXM_(\d+)".*?"Version":\s*"([^"]*)"`)
reIDLine = regexp.MustCompile(`"Id":\s*"([^"]+)"`)
reVersion = regexp.MustCompile(`"Version":\s*"([^"]*)"`)
reSlotGPU = regexp.MustCompile(`(?i)gpu\s*#?\s*(\d+)`)
)
func enrichGPUsFromHGXHWInfo(content []byte, hw *models.HardwareConfig) {
if hw == nil || len(hw.GPUs) == 0 || len(content) == 0 {
return
}
bySXM := parseHGXGPUAssembly(content)
if len(bySXM) == 0 {
return
}
fwBySXM := parseHGXGPUFirmware(content)
normalizeHGXGPUInventory(hw, bySXM)
for i := range hw.GPUs {
gpu := &hw.GPUs[i]
logicalIdx, ok := extractLogicalGPUIndex(gpu.Slot)
if !ok {
// Keep existing info if slot index cannot be determined.
continue
}
sxm := resolveSXMIndex(logicalIdx, bySXM)
info, found := bySXM[sxm]
if !found {
continue
}
if strings.TrimSpace(gpu.SerialNumber) == "" {
gpu.SerialNumber = info.Serial
}
if shouldReplaceGPUModel(gpu.Model) {
gpu.Model = info.Model
}
if strings.TrimSpace(gpu.PartNumber) == "" {
gpu.PartNumber = info.Part
}
if strings.TrimSpace(gpu.Manufacturer) == "" {
gpu.Manufacturer = "NVIDIA"
}
if fw, ok := fwBySXM[sxm]; ok {
if strings.TrimSpace(gpu.Firmware) == "" && strings.TrimSpace(fw.Firmware) != "" {
gpu.Firmware = fw.Firmware
}
if strings.TrimSpace(gpu.VideoBIOS) == "" && strings.TrimSpace(fw.InfoROM) != "" {
gpu.VideoBIOS = fw.InfoROM
}
}
}
}
func appendHGXFirmwareFromHWInfo(content []byte, hw *models.HardwareConfig) {
if hw == nil || len(content) == 0 {
return
}
entries := parseHGXFirmwareInventory(content)
if len(entries) == 0 {
return
}
existing := make(map[string]bool, len(hw.Firmware))
for _, fw := range hw.Firmware {
key := strings.ToLower(strings.TrimSpace(fw.DeviceName) + "|" + strings.TrimSpace(fw.Version))
existing[key] = true
}
appendFW := func(name, version string) {
name = strings.TrimSpace(name)
version = strings.TrimSpace(version)
if name == "" || version == "" {
return
}
key := strings.ToLower(name + "|" + version)
if existing[key] {
return
}
existing[key] = true
hw.Firmware = append(hw.Firmware, models.FirmwareInfo{
DeviceName: name,
Version: version,
})
}
for _, e := range entries {
appendFW(e.ID, e.Version)
if e.ActiveVersion != "" && e.InactiveVersion != "" && e.ActiveVersion != e.InactiveVersion {
appendFW(e.ID+" Active Slot", e.ActiveVersion)
appendFW(e.ID+" Inactive Slot", e.InactiveVersion)
}
}
}
func parseHGXGPUAssembly(content []byte) map[int]hgxGPUAssemblyInfo {
result := make(map[int]hgxGPUAssemblyInfo)
matches := reHGXGPUBlock.FindAllSubmatch(content, -1)
for _, m := range matches {
if len(m) != 5 {
continue
}
sxmIdx, err := strconv.Atoi(string(m[1]))
if err != nil || sxmIdx <= 0 {
continue
}
result[sxmIdx] = hgxGPUAssemblyInfo{
Model: strings.TrimSpace(string(m[2])),
Part: strings.TrimSpace(string(m[3])),
Serial: strings.TrimSpace(string(m[4])),
}
}
return result
}
func parseHGXGPUFirmware(content []byte) map[int]hgxGPUFirmwareInfo {
result := make(map[int]hgxGPUFirmwareInfo)
matchesFW := reHGXFWBlock.FindAllSubmatch(content, -1)
for _, m := range matchesFW {
if len(m) != 3 {
continue
}
sxmIdx, err := strconv.Atoi(string(m[1]))
if err != nil || sxmIdx <= 0 {
continue
}
version := strings.TrimSpace(string(m[2]))
if version == "" {
continue
}
current := result[sxmIdx]
if current.Firmware == "" {
current.Firmware = version
}
result[sxmIdx] = current
}
matchesInfoROM := reHGXInfoROM.FindAllSubmatch(content, -1)
for _, m := range matchesInfoROM {
if len(m) != 3 {
continue
}
sxmIdx, err := strconv.Atoi(string(m[1]))
if err != nil || sxmIdx <= 0 {
continue
}
version := strings.TrimSpace(string(m[2]))
if version == "" {
continue
}
current := result[sxmIdx]
if current.InfoROM == "" {
current.InfoROM = version
}
result[sxmIdx] = current
}
return result
}
func parseHGXFirmwareInventory(content []byte) []hgxFirmwareInventoryEntry {
lines := strings.Split(string(content), "\n")
result := make([]hgxFirmwareInventoryEntry, 0)
var current *hgxFirmwareInventoryEntry
section := ""
flush := func() {
if current == nil {
return
}
if current.Version == "" && current.ActiveVersion == "" && current.InactiveVersion == "" {
current = nil
section = ""
return
}
result = append(result, *current)
current = nil
section = ""
}
for _, line := range lines {
if m := reIDLine.FindStringSubmatch(line); len(m) > 1 {
flush()
id := strings.TrimSpace(m[1])
if strings.HasPrefix(id, "HGX_") {
current = &hgxFirmwareInventoryEntry{ID: id}
}
continue
}
if current == nil {
continue
}
if strings.Contains(line, `"ActiveFirmwareSlot"`) {
section = "active"
}
if strings.Contains(line, `"InactiveFirmwareSlot"`) {
section = "inactive"
}
if m := reVersion.FindStringSubmatch(line); len(m) > 1 {
version := strings.TrimSpace(m[1])
if version == "" {
section = ""
continue
}
switch section {
case "active":
if current.ActiveVersion == "" {
current.ActiveVersion = version
}
case "inactive":
if current.InactiveVersion == "" {
current.InactiveVersion = version
}
default:
// Keep top-level version from the last seen plain "Version" in current entry.
current.Version = version
}
section = ""
}
}
flush()
return result
}
func extractLogicalGPUIndex(slot string) (int, bool) {
m := reSlotGPU.FindStringSubmatch(slot)
if len(m) < 2 {
return 0, false
}
idx, err := strconv.Atoi(m[1])
if err != nil || idx < 0 {
return 0, false
}
return idx, true
}
func resolveSXMIndex(logicalIdx int, bySXM map[int]hgxGPUAssemblyInfo) int {
if sxm, ok := hgxLogicalToSXM[logicalIdx]; ok {
if _, exists := bySXM[sxm]; exists {
return sxm
}
}
identity := logicalIdx + 1
if _, exists := bySXM[identity]; exists {
return identity
}
return identity
}
func shouldReplaceGPUModel(model string) bool {
trimmed := strings.TrimSpace(model)
if trimmed == "" {
return true
}
switch strings.ToLower(trimmed) {
case "vga", "3d controller", "display controller", "unknown":
return true
default:
return false
}
}
func normalizeHGXGPUInventory(hw *models.HardwareConfig, bySXM map[int]hgxGPUAssemblyInfo) {
// Keep only logical HGX GPUs (#GPU0..#GPU7) and remove BMC VGA entries.
filtered := make([]models.GPU, 0, len(hw.GPUs))
present := make(map[int]bool)
for _, gpu := range hw.GPUs {
idx, ok := extractLogicalGPUIndex(gpu.Slot)
if !ok || idx < 0 || idx > 7 {
continue
}
present[idx] = true
filtered = append(filtered, gpu)
}
// If some logical GPUs are missing in asset.json, add placeholders from HGX Redfish assembly.
for logicalIdx := 0; logicalIdx <= 7; logicalIdx++ {
if present[logicalIdx] {
continue
}
sxm := resolveSXMIndex(logicalIdx, bySXM)
info, ok := bySXM[sxm]
if !ok {
continue
}
filtered = append(filtered, models.GPU{
Slot: fmt.Sprintf("#GPU%d", logicalIdx),
Model: info.Model,
Manufacturer: "NVIDIA",
SerialNumber: info.Serial,
PartNumber: info.Part,
})
}
hw.GPUs = filtered
}