v1.2.0: Enhanced Inspur/Kaytus parser with GPU, PCIe, and storage support
Major improvements: - Add CSV SEL event parser for Kaytus firmware format - Add PCIe device parser with link speed/width detection - Add GPU temperature and PCIe link monitoring - Add disk backplane parser for storage bay information - Fix memory module detection (only show installed DIMMs) Parser enhancements: - Parse RESTful PCIe Device info (max/current link width/speed) - Parse GPU sensor data (core and memory temperatures) - Parse diskbackplane info (slot count, installed drives) - Parse SEL events from CSV format (selelist.csv) - Fix memory Present status logic (check mem_mod_status) Web interface improvements: - Add PCIe link degradation highlighting (red when current < max) - Add storage table with Present status and location - Update memory specification to show only installed modules with frequency - Sort events from newest to oldest - Filter out N/A serial numbers from display Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
214
internal/parser/vendors/inspur/pcie.go
vendored
Normal file
214
internal/parser/vendors/inspur/pcie.go
vendored
Normal file
@@ -0,0 +1,214 @@
|
||||
package inspur
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
// PCIeRESTInfo represents the RESTful PCIE Device info structure
|
||||
type PCIeRESTInfo []struct {
|
||||
ID int `json:"id"`
|
||||
Present int `json:"present"`
|
||||
Enable int `json:"enable"`
|
||||
Status int `json:"status"`
|
||||
VendorID int `json:"vendor_id"`
|
||||
VendorName string `json:"vendor_name"`
|
||||
DeviceID int `json:"device_id"`
|
||||
DeviceName string `json:"device_name"`
|
||||
BusNum int `json:"bus_num"`
|
||||
DevNum int `json:"dev_num"`
|
||||
FuncNum int `json:"func_num"`
|
||||
MaxLinkWidth int `json:"max_link_width"`
|
||||
MaxLinkSpeed int `json:"max_link_speed"`
|
||||
CurrentLinkWidth int `json:"current_link_width"`
|
||||
CurrentLinkSpeed int `json:"current_link_speed"`
|
||||
Slot int `json:"slot"`
|
||||
Location string `json:"location"`
|
||||
DeviceLocator string `json:"DeviceLocator"`
|
||||
DevType int `json:"dev_type"`
|
||||
DevSubtype int `json:"dev_subtype"`
|
||||
PartNum string `json:"part_num"`
|
||||
SerialNum string `json:"serial_num"`
|
||||
FwVer string `json:"fw_ver"`
|
||||
}
|
||||
|
||||
// ParsePCIeDevices parses RESTful PCIE Device info from devicefrusdr.log
|
||||
func ParsePCIeDevices(content []byte) []models.PCIeDevice {
|
||||
text := string(content)
|
||||
|
||||
// Find RESTful PCIE Device info section
|
||||
startMarker := "RESTful PCIE Device info:"
|
||||
endMarker := "BMC sdr Info:"
|
||||
|
||||
startIdx := strings.Index(text, startMarker)
|
||||
if startIdx == -1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
endIdx := strings.Index(text[startIdx:], endMarker)
|
||||
if endIdx == -1 {
|
||||
endIdx = len(text) - startIdx
|
||||
}
|
||||
|
||||
jsonText := text[startIdx+len(startMarker) : startIdx+endIdx]
|
||||
jsonText = strings.TrimSpace(jsonText)
|
||||
|
||||
var pcieInfo PCIeRESTInfo
|
||||
if err := json.Unmarshal([]byte(jsonText), &pcieInfo); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var devices []models.PCIeDevice
|
||||
for _, pcie := range pcieInfo {
|
||||
if pcie.Present != 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Convert PCIe speed to GEN notation
|
||||
maxSpeed := fmt.Sprintf("GEN%d", pcie.MaxLinkSpeed)
|
||||
currentSpeed := fmt.Sprintf("GEN%d", pcie.CurrentLinkSpeed)
|
||||
|
||||
// Determine device class based on dev_type
|
||||
deviceClass := determineDeviceClass(pcie.DevType, pcie.DevSubtype, pcie.DeviceName)
|
||||
|
||||
// Build BDF string
|
||||
bdf := fmt.Sprintf("%04x/%02x/%02x/%02x", 0, pcie.BusNum, pcie.DevNum, pcie.FuncNum)
|
||||
|
||||
device := models.PCIeDevice{
|
||||
Slot: pcie.Location,
|
||||
VendorID: pcie.VendorID,
|
||||
DeviceID: pcie.DeviceID,
|
||||
BDF: bdf,
|
||||
DeviceClass: deviceClass,
|
||||
Manufacturer: pcie.VendorName,
|
||||
LinkWidth: pcie.CurrentLinkWidth,
|
||||
LinkSpeed: currentSpeed,
|
||||
MaxLinkWidth: pcie.MaxLinkWidth,
|
||||
MaxLinkSpeed: maxSpeed,
|
||||
PartNumber: strings.TrimSpace(pcie.PartNum),
|
||||
SerialNumber: strings.TrimSpace(pcie.SerialNum),
|
||||
}
|
||||
|
||||
devices = append(devices, device)
|
||||
}
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
// determineDeviceClass maps device type to human-readable class
|
||||
func determineDeviceClass(devType, devSubtype int, deviceName string) string {
|
||||
// dev_type mapping:
|
||||
// 1 = Mass Storage Controller
|
||||
// 2 = Network Controller
|
||||
// 3 = Display Controller (GPU)
|
||||
// 4 = Multimedia Controller
|
||||
|
||||
switch devType {
|
||||
case 1:
|
||||
if devSubtype == 4 {
|
||||
return "RAID Controller"
|
||||
}
|
||||
return "Storage Controller"
|
||||
case 2:
|
||||
return "Network Controller"
|
||||
case 3:
|
||||
// GPU
|
||||
if strings.Contains(strings.ToUpper(deviceName), "H100") {
|
||||
return "GPU (H100)"
|
||||
}
|
||||
if strings.Contains(strings.ToUpper(deviceName), "A100") {
|
||||
return "GPU (A100)"
|
||||
}
|
||||
if strings.Contains(strings.ToUpper(deviceName), "NVIDIA") {
|
||||
return "GPU"
|
||||
}
|
||||
return "Display Controller"
|
||||
case 4:
|
||||
return "Multimedia Controller"
|
||||
default:
|
||||
return "Unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// ParseGPUs extracts GPU data from PCIe devices and sensors
|
||||
func ParseGPUs(pcieDevices []models.PCIeDevice, sensors []models.SensorReading) []models.GPU {
|
||||
var gpus []models.GPU
|
||||
|
||||
// Find GPU devices
|
||||
for _, pcie := range pcieDevices {
|
||||
if !strings.Contains(strings.ToLower(pcie.DeviceClass), "gpu") &&
|
||||
!strings.Contains(strings.ToLower(pcie.DeviceClass), "display") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip integrated graphics (ASPEED, etc.)
|
||||
if strings.Contains(pcie.Manufacturer, "ASPEED") {
|
||||
continue
|
||||
}
|
||||
|
||||
gpu := models.GPU{
|
||||
Slot: pcie.Slot,
|
||||
Location: pcie.Slot,
|
||||
Model: pcie.DeviceClass,
|
||||
Manufacturer: pcie.Manufacturer,
|
||||
SerialNumber: pcie.SerialNumber,
|
||||
MaxLinkWidth: pcie.MaxLinkWidth,
|
||||
MaxLinkSpeed: pcie.MaxLinkSpeed,
|
||||
CurrentLinkWidth: pcie.LinkWidth,
|
||||
CurrentLinkSpeed: pcie.LinkSpeed,
|
||||
Status: "OK",
|
||||
}
|
||||
|
||||
// Extract GPU number from slot name (e.g., "PCIE7" -> 7)
|
||||
slotNum := extractSlotNumber(pcie.Slot)
|
||||
|
||||
// Find temperature sensors for this GPU
|
||||
for _, sensor := range sensors {
|
||||
sensorName := strings.ToUpper(sensor.Name)
|
||||
|
||||
// Match GPU temperature sensor (e.g., "GPU7_Temp")
|
||||
if strings.Contains(sensorName, fmt.Sprintf("GPU%d_TEMP", slotNum)) {
|
||||
if sensor.RawValue != "" {
|
||||
fmt.Sscanf(sensor.RawValue, "%d", &gpu.Temperature)
|
||||
}
|
||||
}
|
||||
|
||||
// Match GPU memory temperature (e.g., "GPU7_Mem_Temp")
|
||||
if strings.Contains(sensorName, fmt.Sprintf("GPU%d_MEM_TEMP", slotNum)) {
|
||||
if sensor.RawValue != "" {
|
||||
fmt.Sscanf(sensor.RawValue, "%d", &gpu.MemTemperature)
|
||||
}
|
||||
}
|
||||
|
||||
// Match PCIe slot temperature (e.g., "PCIE7_GPU_TLM_T")
|
||||
if strings.Contains(sensorName, fmt.Sprintf("PCIE%d_GPU_TLM_T", slotNum)) {
|
||||
if sensor.RawValue != "" && gpu.Temperature == 0 {
|
||||
fmt.Sscanf(sensor.RawValue, "%d", &gpu.Temperature)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gpus = append(gpus, gpu)
|
||||
}
|
||||
|
||||
return gpus
|
||||
}
|
||||
|
||||
// extractSlotNumber extracts slot number from location string
|
||||
// e.g., "CPU0_PE3_AC_PCIE7" -> 7
|
||||
func extractSlotNumber(location string) int {
|
||||
parts := strings.Split(location, "_")
|
||||
for _, part := range parts {
|
||||
if strings.HasPrefix(part, "PCIE") || strings.HasPrefix(part, "#CPU") {
|
||||
var num int
|
||||
fmt.Sscanf(part, "PCIE%d", &num)
|
||||
if num > 0 {
|
||||
return num
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
Reference in New Issue
Block a user