Files
logpile/internal/collector/redfish_replay_gpu.go
Mikhail Chusavitin d650a6ba1c refactor: unified ingest pipeline + modular Redfish profile framework
Implement the full architectural plan: unified ingest.Service entry point
for archive and Redfish payloads, modular redfishprofile package with
composable profiles (generic, ami-family, msi, supermicro, dell,
hgx-topology), score-based profile matching with fallback expansion mode,
and profile-driven acquisition/analysis plans.

Vendor-specific logic moved out of common executors and into profile hooks.
GPU chassis lookup strategies and known storage recovery collections
(IntelVROC/HA-RAID/MRVL) now live in ResolvedAnalysisPlan, populated by
profiles at analysis time. Replay helpers read from the plan; no hardcoded
path lists remain in generic code.

Also splits redfish_replay.go into domain modules (gpu, storage, inventory,
fru, profiles) and adds full fixture/matcher/directive test coverage
including Dell, AMI, unknown-vendor fallback, and deterministic ordering.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-18 08:48:58 +03:00

152 lines
4.7 KiB
Go

package collector
import (
"fmt"
"strings"
"git.mchus.pro/mchus/logpile/internal/collector/redfishprofile"
"git.mchus.pro/mchus/logpile/internal/models"
)
func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string, plan redfishprofile.ResolvedAnalysisPlan) []models.GPU {
collections := make([]string, 0, len(systemPaths)*3+len(chassisPaths)*2)
for _, systemPath := range systemPaths {
collections = append(collections, joinPath(systemPath, "/PCIeDevices"))
collections = append(collections, joinPath(systemPath, "/Accelerators"))
collections = append(collections, joinPath(systemPath, "/GraphicsControllers"))
}
for _, chassisPath := range chassisPaths {
collections = append(collections, joinPath(chassisPath, "/PCIeDevices"))
collections = append(collections, joinPath(chassisPath, "/Accelerators"))
}
var out []models.GPU
seen := make(map[string]struct{})
idx := 1
for _, collectionPath := range collections {
memberDocs, err := r.getCollectionMembers(collectionPath)
if err != nil || len(memberDocs) == 0 {
continue
}
for _, doc := range memberDocs {
functionDocs := r.getLinkedPCIeFunctions(doc)
if !looksLikeGPU(doc, functionDocs) {
continue
}
supplementalDocs := r.getLinkedSupplementalDocs(doc, "EnvironmentMetrics", "Metrics")
for _, fn := range functionDocs {
supplementalDocs = append(supplementalDocs, r.getLinkedSupplementalDocs(fn, "EnvironmentMetrics", "Metrics")...)
}
gpu := parseGPUWithSupplementalDocs(doc, functionDocs, supplementalDocs, idx)
idx++
if plan.Directives.EnableGenericGraphicsControllerDedup && shouldSkipGenericGPUDuplicate(out, gpu) {
continue
}
key := gpuDocDedupKey(doc, gpu)
if key == "" {
continue
}
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
out = append(out, gpu)
}
}
if plan.Directives.EnableGenericGraphicsControllerDedup {
return dropModelOnlyGPUPlaceholders(out)
}
return out
}
// collectGPUsFromProcessors finds GPUs that some BMCs (e.g. MSI) expose as
// Processor entries with ProcessorType=GPU rather than as PCIe devices.
// It supplements the existing gpus slice (already found via PCIe path),
// skipping entries already present by UUID or SerialNumber.
// Serial numbers are looked up from Chassis members named after each GPU Id.
func (r redfishSnapshotReader) collectGPUsFromProcessors(systemPaths, chassisPaths []string, existing []models.GPU, plan redfishprofile.ResolvedAnalysisPlan) []models.GPU {
if !plan.Directives.EnableProcessorGPUFallback {
return append([]models.GPU{}, existing...)
}
chassisByID := make(map[string]map[string]interface{})
for _, cp := range chassisPaths {
doc, err := r.getJSON(cp)
if err != nil || len(doc) == 0 {
continue
}
id := strings.TrimSpace(asString(doc["Id"]))
if id != "" {
chassisByID[strings.ToUpper(id)] = doc
}
}
seenUUID := make(map[string]struct{})
seenSerial := make(map[string]struct{})
for _, g := range existing {
if u := strings.ToUpper(strings.TrimSpace(g.UUID)); u != "" {
seenUUID[u] = struct{}{}
}
if s := strings.ToUpper(strings.TrimSpace(g.SerialNumber)); s != "" {
seenSerial[s] = struct{}{}
}
}
out := append([]models.GPU{}, existing...)
idx := len(existing) + 1
for _, systemPath := range systemPaths {
procDocs, err := r.getCollectionMembers(joinPath(systemPath, "/Processors"))
if err != nil {
continue
}
for _, doc := range procDocs {
if !strings.EqualFold(strings.TrimSpace(asString(doc["ProcessorType"])), "GPU") {
continue
}
gpuID := strings.TrimSpace(asString(doc["Id"]))
serial := findFirstNormalizedStringByKeys(doc, "SerialNumber")
if serial == "" {
serial = resolveProcessorGPUChassisSerial(chassisByID, gpuID, plan)
}
uuid := strings.TrimSpace(asString(doc["UUID"]))
uuidKey := strings.ToUpper(uuid)
serialKey := strings.ToUpper(serial)
if uuidKey != "" {
if _, dup := seenUUID[uuidKey]; dup {
continue
}
seenUUID[uuidKey] = struct{}{}
}
if serialKey != "" {
if _, dup := seenSerial[serialKey]; dup {
continue
}
seenSerial[serialKey] = struct{}{}
}
slotLabel := firstNonEmpty(
redfishLocationLabel(doc["Location"]),
redfishLocationLabel(doc["PhysicalLocation"]),
)
if slotLabel == "" && gpuID != "" {
slotLabel = gpuID
}
if slotLabel == "" {
slotLabel = fmt.Sprintf("GPU%d", idx)
}
out = append(out, models.GPU{
Slot: slotLabel,
Model: firstNonEmpty(asString(doc["Model"]), asString(doc["Name"])),
Manufacturer: asString(doc["Manufacturer"]),
PartNumber: asString(doc["PartNumber"]),
SerialNumber: serial,
UUID: uuid,
Status: mapStatus(doc["Status"]),
})
idx++
}
}
return out
}