361 lines
8.1 KiB
Go
361 lines
8.1 KiB
Go
package inspur
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"git.mchus.pro/mchus/logpile/internal/models"
|
|
)
|
|
|
|
type hgxGPUAssemblyInfo struct {
|
|
Model string
|
|
Part string
|
|
Serial string
|
|
}
|
|
|
|
type hgxGPUFirmwareInfo struct {
|
|
Firmware string
|
|
InfoROM string
|
|
}
|
|
|
|
type hgxFirmwareInventoryEntry struct {
|
|
ID string
|
|
Version string
|
|
ActiveVersion string
|
|
InactiveVersion string
|
|
}
|
|
|
|
// Logical GPU index mapping used by HGX B200 UI ordering.
|
|
// Example from real logs/UI:
|
|
// GPU0->SXM5, GPU1->SXM7, GPU2->SXM6, GPU3->SXM8, GPU4->SXM2, GPU5->SXM4, GPU6->SXM3, GPU7->SXM1.
|
|
var hgxLogicalToSXM = map[int]int{
|
|
0: 5,
|
|
1: 7,
|
|
2: 6,
|
|
3: 8,
|
|
4: 2,
|
|
5: 4,
|
|
6: 3,
|
|
7: 1,
|
|
}
|
|
|
|
var (
|
|
reHGXGPUBlock = regexp.MustCompile(`(?s)/redfish/v1/Chassis/HGX_GPU_SXM_(\d+)/Assembly.*?"Name":\s*"GPU Board Assembly".*?"Model":\s*"([^"]+)".*?"PartNumber":\s*"([^"]+)".*?"SerialNumber":\s*"([^"]+)"`)
|
|
reHGXFWBlock = regexp.MustCompile(`(?s)"Id":\s*"HGX_FW_GPU_SXM_(\d+)".*?"Version":\s*"([^"]*)"`)
|
|
reHGXInfoROM = regexp.MustCompile(`(?s)"Id":\s*"HGX_InfoROM_GPU_SXM_(\d+)".*?"Version":\s*"([^"]*)"`)
|
|
reIDLine = regexp.MustCompile(`"Id":\s*"([^"]+)"`)
|
|
reVersion = regexp.MustCompile(`"Version":\s*"([^"]*)"`)
|
|
reSlotGPU = regexp.MustCompile(`(?i)gpu\s*#?\s*(\d+)`)
|
|
)
|
|
|
|
func enrichGPUsFromHGXHWInfo(content []byte, hw *models.HardwareConfig) {
|
|
if hw == nil || len(hw.GPUs) == 0 || len(content) == 0 {
|
|
return
|
|
}
|
|
|
|
bySXM := parseHGXGPUAssembly(content)
|
|
if len(bySXM) == 0 {
|
|
return
|
|
}
|
|
fwBySXM := parseHGXGPUFirmware(content)
|
|
|
|
normalizeHGXGPUInventory(hw, bySXM)
|
|
|
|
for i := range hw.GPUs {
|
|
gpu := &hw.GPUs[i]
|
|
logicalIdx, ok := extractLogicalGPUIndex(gpu.Slot)
|
|
if !ok {
|
|
// Keep existing info if slot index cannot be determined.
|
|
continue
|
|
}
|
|
|
|
sxm := resolveSXMIndex(logicalIdx, bySXM)
|
|
info, found := bySXM[sxm]
|
|
if !found {
|
|
continue
|
|
}
|
|
|
|
if strings.TrimSpace(gpu.SerialNumber) == "" {
|
|
gpu.SerialNumber = info.Serial
|
|
}
|
|
if shouldReplaceGPUModel(gpu.Model) {
|
|
gpu.Model = info.Model
|
|
}
|
|
if strings.TrimSpace(gpu.PartNumber) == "" {
|
|
gpu.PartNumber = info.Part
|
|
}
|
|
if strings.TrimSpace(gpu.Manufacturer) == "" {
|
|
gpu.Manufacturer = "NVIDIA"
|
|
}
|
|
if fw, ok := fwBySXM[sxm]; ok {
|
|
if strings.TrimSpace(gpu.Firmware) == "" && strings.TrimSpace(fw.Firmware) != "" {
|
|
gpu.Firmware = fw.Firmware
|
|
}
|
|
if strings.TrimSpace(gpu.VideoBIOS) == "" && strings.TrimSpace(fw.InfoROM) != "" {
|
|
gpu.VideoBIOS = fw.InfoROM
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func appendHGXFirmwareFromHWInfo(content []byte, hw *models.HardwareConfig) {
|
|
if hw == nil || len(content) == 0 {
|
|
return
|
|
}
|
|
|
|
entries := parseHGXFirmwareInventory(content)
|
|
if len(entries) == 0 {
|
|
return
|
|
}
|
|
|
|
existing := make(map[string]bool, len(hw.Firmware))
|
|
for _, fw := range hw.Firmware {
|
|
key := strings.ToLower(strings.TrimSpace(fw.DeviceName) + "|" + strings.TrimSpace(fw.Version))
|
|
existing[key] = true
|
|
}
|
|
|
|
appendFW := func(name, version string) {
|
|
name = strings.TrimSpace(name)
|
|
version = strings.TrimSpace(version)
|
|
if name == "" || version == "" {
|
|
return
|
|
}
|
|
key := strings.ToLower(name + "|" + version)
|
|
if existing[key] {
|
|
return
|
|
}
|
|
existing[key] = true
|
|
hw.Firmware = append(hw.Firmware, models.FirmwareInfo{
|
|
DeviceName: name,
|
|
Version: version,
|
|
})
|
|
}
|
|
|
|
for _, e := range entries {
|
|
appendFW(e.ID, e.Version)
|
|
|
|
if e.ActiveVersion != "" && e.InactiveVersion != "" && e.ActiveVersion != e.InactiveVersion {
|
|
appendFW(e.ID+" Active Slot", e.ActiveVersion)
|
|
appendFW(e.ID+" Inactive Slot", e.InactiveVersion)
|
|
}
|
|
}
|
|
}
|
|
|
|
func parseHGXGPUAssembly(content []byte) map[int]hgxGPUAssemblyInfo {
|
|
result := make(map[int]hgxGPUAssemblyInfo)
|
|
matches := reHGXGPUBlock.FindAllSubmatch(content, -1)
|
|
for _, m := range matches {
|
|
if len(m) != 5 {
|
|
continue
|
|
}
|
|
|
|
sxmIdx, err := strconv.Atoi(string(m[1]))
|
|
if err != nil || sxmIdx <= 0 {
|
|
continue
|
|
}
|
|
|
|
result[sxmIdx] = hgxGPUAssemblyInfo{
|
|
Model: strings.TrimSpace(string(m[2])),
|
|
Part: strings.TrimSpace(string(m[3])),
|
|
Serial: strings.TrimSpace(string(m[4])),
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func parseHGXGPUFirmware(content []byte) map[int]hgxGPUFirmwareInfo {
|
|
result := make(map[int]hgxGPUFirmwareInfo)
|
|
|
|
matchesFW := reHGXFWBlock.FindAllSubmatch(content, -1)
|
|
for _, m := range matchesFW {
|
|
if len(m) != 3 {
|
|
continue
|
|
}
|
|
sxmIdx, err := strconv.Atoi(string(m[1]))
|
|
if err != nil || sxmIdx <= 0 {
|
|
continue
|
|
}
|
|
version := strings.TrimSpace(string(m[2]))
|
|
if version == "" {
|
|
continue
|
|
}
|
|
|
|
current := result[sxmIdx]
|
|
if current.Firmware == "" {
|
|
current.Firmware = version
|
|
}
|
|
result[sxmIdx] = current
|
|
}
|
|
|
|
matchesInfoROM := reHGXInfoROM.FindAllSubmatch(content, -1)
|
|
for _, m := range matchesInfoROM {
|
|
if len(m) != 3 {
|
|
continue
|
|
}
|
|
sxmIdx, err := strconv.Atoi(string(m[1]))
|
|
if err != nil || sxmIdx <= 0 {
|
|
continue
|
|
}
|
|
version := strings.TrimSpace(string(m[2]))
|
|
if version == "" {
|
|
continue
|
|
}
|
|
|
|
current := result[sxmIdx]
|
|
if current.InfoROM == "" {
|
|
current.InfoROM = version
|
|
}
|
|
result[sxmIdx] = current
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func parseHGXFirmwareInventory(content []byte) []hgxFirmwareInventoryEntry {
|
|
lines := strings.Split(string(content), "\n")
|
|
result := make([]hgxFirmwareInventoryEntry, 0)
|
|
|
|
var current *hgxFirmwareInventoryEntry
|
|
section := ""
|
|
|
|
flush := func() {
|
|
if current == nil {
|
|
return
|
|
}
|
|
if current.Version == "" && current.ActiveVersion == "" && current.InactiveVersion == "" {
|
|
current = nil
|
|
section = ""
|
|
return
|
|
}
|
|
result = append(result, *current)
|
|
current = nil
|
|
section = ""
|
|
}
|
|
|
|
for _, line := range lines {
|
|
if m := reIDLine.FindStringSubmatch(line); len(m) > 1 {
|
|
flush()
|
|
id := strings.TrimSpace(m[1])
|
|
if strings.HasPrefix(id, "HGX_") {
|
|
current = &hgxFirmwareInventoryEntry{ID: id}
|
|
}
|
|
continue
|
|
}
|
|
|
|
if current == nil {
|
|
continue
|
|
}
|
|
|
|
if strings.Contains(line, `"ActiveFirmwareSlot"`) {
|
|
section = "active"
|
|
}
|
|
if strings.Contains(line, `"InactiveFirmwareSlot"`) {
|
|
section = "inactive"
|
|
}
|
|
|
|
if m := reVersion.FindStringSubmatch(line); len(m) > 1 {
|
|
version := strings.TrimSpace(m[1])
|
|
if version == "" {
|
|
section = ""
|
|
continue
|
|
}
|
|
switch section {
|
|
case "active":
|
|
if current.ActiveVersion == "" {
|
|
current.ActiveVersion = version
|
|
}
|
|
case "inactive":
|
|
if current.InactiveVersion == "" {
|
|
current.InactiveVersion = version
|
|
}
|
|
default:
|
|
// Keep top-level version from the last seen plain "Version" in current entry.
|
|
current.Version = version
|
|
}
|
|
section = ""
|
|
}
|
|
}
|
|
flush()
|
|
|
|
return result
|
|
}
|
|
|
|
func extractLogicalGPUIndex(slot string) (int, bool) {
|
|
m := reSlotGPU.FindStringSubmatch(slot)
|
|
if len(m) < 2 {
|
|
return 0, false
|
|
}
|
|
|
|
idx, err := strconv.Atoi(m[1])
|
|
if err != nil || idx < 0 {
|
|
return 0, false
|
|
}
|
|
return idx, true
|
|
}
|
|
|
|
func resolveSXMIndex(logicalIdx int, bySXM map[int]hgxGPUAssemblyInfo) int {
|
|
if sxm, ok := hgxLogicalToSXM[logicalIdx]; ok {
|
|
if _, exists := bySXM[sxm]; exists {
|
|
return sxm
|
|
}
|
|
}
|
|
|
|
identity := logicalIdx + 1
|
|
if _, exists := bySXM[identity]; exists {
|
|
return identity
|
|
}
|
|
|
|
return identity
|
|
}
|
|
|
|
func shouldReplaceGPUModel(model string) bool {
|
|
trimmed := strings.TrimSpace(model)
|
|
if trimmed == "" {
|
|
return true
|
|
}
|
|
switch strings.ToLower(trimmed) {
|
|
case "vga", "3d controller", "display controller", "unknown":
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func normalizeHGXGPUInventory(hw *models.HardwareConfig, bySXM map[int]hgxGPUAssemblyInfo) {
|
|
// Keep only logical HGX GPUs (#GPU0..#GPU7) and remove BMC VGA entries.
|
|
filtered := make([]models.GPU, 0, len(hw.GPUs))
|
|
present := make(map[int]bool)
|
|
for _, gpu := range hw.GPUs {
|
|
idx, ok := extractLogicalGPUIndex(gpu.Slot)
|
|
if !ok || idx < 0 || idx > 7 {
|
|
continue
|
|
}
|
|
present[idx] = true
|
|
filtered = append(filtered, gpu)
|
|
}
|
|
|
|
// If some logical GPUs are missing in asset.json, add placeholders from HGX Redfish assembly.
|
|
for logicalIdx := 0; logicalIdx <= 7; logicalIdx++ {
|
|
if present[logicalIdx] {
|
|
continue
|
|
}
|
|
sxm := resolveSXMIndex(logicalIdx, bySXM)
|
|
info, ok := bySXM[sxm]
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
filtered = append(filtered, models.GPU{
|
|
Slot: fmt.Sprintf("#GPU%d", logicalIdx),
|
|
Model: info.Model,
|
|
Manufacturer: "NVIDIA",
|
|
SerialNumber: info.Serial,
|
|
PartNumber: info.Part,
|
|
})
|
|
}
|
|
|
|
hw.GPUs = filtered
|
|
}
|