176 lines
3.8 KiB
Go
176 lines
3.8 KiB
Go
package inspur
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"git.mchus.pro/mchus/logpile/internal/models"
|
|
)
|
|
|
|
type hgxGPUAssemblyInfo struct {
|
|
Model string
|
|
Part string
|
|
Serial string
|
|
}
|
|
|
|
// Logical GPU index mapping used by HGX B200 UI ordering.
|
|
// Example from real logs/UI:
|
|
// GPU0->SXM5, GPU1->SXM7, GPU2->SXM6, GPU3->SXM8, GPU4->SXM2, GPU5->SXM4, GPU6->SXM3, GPU7->SXM1.
|
|
var hgxLogicalToSXM = map[int]int{
|
|
0: 5,
|
|
1: 7,
|
|
2: 6,
|
|
3: 8,
|
|
4: 2,
|
|
5: 4,
|
|
6: 3,
|
|
7: 1,
|
|
}
|
|
|
|
var (
|
|
reHGXGPUBlock = regexp.MustCompile(`(?s)/redfish/v1/Chassis/HGX_GPU_SXM_(\d+)/Assembly.*?"Name":\s*"GPU Board Assembly".*?"Model":\s*"([^"]+)".*?"PartNumber":\s*"([^"]+)".*?"SerialNumber":\s*"([^"]+)"`)
|
|
reSlotGPU = regexp.MustCompile(`(?i)gpu\s*#?\s*(\d+)`)
|
|
)
|
|
|
|
func enrichGPUsFromHGXHWInfo(content []byte, hw *models.HardwareConfig) {
|
|
if hw == nil || len(hw.GPUs) == 0 || len(content) == 0 {
|
|
return
|
|
}
|
|
|
|
bySXM := parseHGXGPUAssembly(content)
|
|
if len(bySXM) == 0 {
|
|
return
|
|
}
|
|
|
|
normalizeHGXGPUInventory(hw, bySXM)
|
|
|
|
for i := range hw.GPUs {
|
|
gpu := &hw.GPUs[i]
|
|
logicalIdx, ok := extractLogicalGPUIndex(gpu.Slot)
|
|
if !ok {
|
|
// Keep existing info if slot index cannot be determined.
|
|
continue
|
|
}
|
|
|
|
sxm := resolveSXMIndex(logicalIdx, bySXM)
|
|
info, found := bySXM[sxm]
|
|
if !found {
|
|
continue
|
|
}
|
|
|
|
if strings.TrimSpace(gpu.SerialNumber) == "" {
|
|
gpu.SerialNumber = info.Serial
|
|
}
|
|
if shouldReplaceGPUModel(gpu.Model) {
|
|
gpu.Model = info.Model
|
|
}
|
|
if strings.TrimSpace(gpu.PartNumber) == "" {
|
|
gpu.PartNumber = info.Part
|
|
}
|
|
if strings.TrimSpace(gpu.Manufacturer) == "" {
|
|
gpu.Manufacturer = "NVIDIA"
|
|
}
|
|
}
|
|
}
|
|
|
|
func parseHGXGPUAssembly(content []byte) map[int]hgxGPUAssemblyInfo {
|
|
result := make(map[int]hgxGPUAssemblyInfo)
|
|
matches := reHGXGPUBlock.FindAllSubmatch(content, -1)
|
|
for _, m := range matches {
|
|
if len(m) != 5 {
|
|
continue
|
|
}
|
|
|
|
sxmIdx, err := strconv.Atoi(string(m[1]))
|
|
if err != nil || sxmIdx <= 0 {
|
|
continue
|
|
}
|
|
|
|
result[sxmIdx] = hgxGPUAssemblyInfo{
|
|
Model: strings.TrimSpace(string(m[2])),
|
|
Part: strings.TrimSpace(string(m[3])),
|
|
Serial: strings.TrimSpace(string(m[4])),
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func extractLogicalGPUIndex(slot string) (int, bool) {
|
|
m := reSlotGPU.FindStringSubmatch(slot)
|
|
if len(m) < 2 {
|
|
return 0, false
|
|
}
|
|
|
|
idx, err := strconv.Atoi(m[1])
|
|
if err != nil || idx < 0 {
|
|
return 0, false
|
|
}
|
|
return idx, true
|
|
}
|
|
|
|
func resolveSXMIndex(logicalIdx int, bySXM map[int]hgxGPUAssemblyInfo) int {
|
|
if sxm, ok := hgxLogicalToSXM[logicalIdx]; ok {
|
|
if _, exists := bySXM[sxm]; exists {
|
|
return sxm
|
|
}
|
|
}
|
|
|
|
identity := logicalIdx + 1
|
|
if _, exists := bySXM[identity]; exists {
|
|
return identity
|
|
}
|
|
|
|
return identity
|
|
}
|
|
|
|
func shouldReplaceGPUModel(model string) bool {
|
|
trimmed := strings.TrimSpace(model)
|
|
if trimmed == "" {
|
|
return true
|
|
}
|
|
switch strings.ToLower(trimmed) {
|
|
case "vga", "3d controller", "display controller", "unknown":
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func normalizeHGXGPUInventory(hw *models.HardwareConfig, bySXM map[int]hgxGPUAssemblyInfo) {
|
|
// Keep only logical HGX GPUs (#GPU0..#GPU7) and remove BMC VGA entries.
|
|
filtered := make([]models.GPU, 0, len(hw.GPUs))
|
|
present := make(map[int]bool)
|
|
for _, gpu := range hw.GPUs {
|
|
idx, ok := extractLogicalGPUIndex(gpu.Slot)
|
|
if !ok || idx < 0 || idx > 7 {
|
|
continue
|
|
}
|
|
present[idx] = true
|
|
filtered = append(filtered, gpu)
|
|
}
|
|
|
|
// If some logical GPUs are missing in asset.json, add placeholders from HGX Redfish assembly.
|
|
for logicalIdx := 0; logicalIdx <= 7; logicalIdx++ {
|
|
if present[logicalIdx] {
|
|
continue
|
|
}
|
|
sxm := resolveSXMIndex(logicalIdx, bySXM)
|
|
info, ok := bySXM[sxm]
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
filtered = append(filtered, models.GPU{
|
|
Slot: fmt.Sprintf("#GPU%d", logicalIdx),
|
|
Model: info.Model,
|
|
Manufacturer: "NVIDIA",
|
|
SerialNumber: info.Serial,
|
|
PartNumber: info.Part,
|
|
})
|
|
}
|
|
|
|
hw.GPUs = filtered
|
|
}
|