Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fc9b446d2e | |||
|
|
ea68318744 | ||
|
|
518082c2e2 | ||
|
|
056dce0b98 | ||
|
|
24f2e65b6e | ||
|
|
7f27b9aa38 | ||
|
|
cf29131116 | ||
|
|
13e6324853 | ||
|
|
892ef6fb7d | ||
|
|
ce46a97975 | ||
|
|
258ecb3453 | ||
|
|
cbb0d1e522 | ||
|
|
bab941ccf1 | ||
|
|
b49c71a980 | ||
|
|
85d1acdaa3 | ||
|
|
a2d7513153 | ||
|
|
5b5d8609d3 | ||
|
|
e7442972d1 | ||
|
|
4c6daa1c5e | ||
|
|
e420888d71 | ||
|
|
8149360410 | ||
|
|
4262c5b798 | ||
|
|
b2e177af31 | ||
|
|
271dadda03 | ||
|
|
20766ccc76 | ||
|
|
966944d6d8 |
@@ -365,7 +365,6 @@ func (w *blackboxWorker) currentFlushPeriod() time.Duration {
|
|||||||
|
|
||||||
func (w *blackboxWorker) finishCycle(duration time.Duration, err error) {
|
func (w *blackboxWorker) finishCycle(duration time.Duration, err error) {
|
||||||
w.mu.Lock()
|
w.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
|
||||||
w.lastDuration = duration
|
w.lastDuration = duration
|
||||||
if err != nil {
|
if err != nil {
|
||||||
w.status = "degraded"
|
w.status = "degraded"
|
||||||
@@ -383,6 +382,10 @@ func (w *blackboxWorker) finishCycle(duration time.Duration, err error) {
|
|||||||
}
|
}
|
||||||
w.flushPeriod = adjustFlushPeriod(w.flushPeriod, duration, true, w.fastCycles)
|
w.flushPeriod = adjustFlushPeriod(w.flushPeriod, duration, true, w.fastCycles)
|
||||||
}
|
}
|
||||||
|
w.mu.Unlock()
|
||||||
|
// persistState must be called without w.mu held: it acquires rt.mu then
|
||||||
|
// each worker.mu inside persistStateLocked, so holding w.mu here would
|
||||||
|
// cause a deadlock (w.mu → rt.mu → w.mu).
|
||||||
w.runtime.persistState()
|
w.runtime.persistState()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -174,15 +174,19 @@ func cleanDMIValue(v string) string {
|
|||||||
upper := strings.ToUpper(v)
|
upper := strings.ToUpper(v)
|
||||||
placeholders := []string{
|
placeholders := []string{
|
||||||
"TO BE FILLED BY O.E.M.",
|
"TO BE FILLED BY O.E.M.",
|
||||||
|
"TO BE FILLED BY O.E.M",
|
||||||
"NOT SPECIFIED",
|
"NOT SPECIFIED",
|
||||||
"NOT SETTABLE",
|
"NOT SETTABLE",
|
||||||
"NOT PRESENT",
|
"NOT PRESENT",
|
||||||
|
"NOT AVAILABLE",
|
||||||
"UNKNOWN",
|
"UNKNOWN",
|
||||||
"N/A",
|
"N/A",
|
||||||
"NONE",
|
"NONE",
|
||||||
"NULL",
|
"NULL",
|
||||||
"DEFAULT STRING",
|
"DEFAULT STRING",
|
||||||
"0",
|
"0",
|
||||||
|
"0123456789",
|
||||||
|
"1234567890",
|
||||||
}
|
}
|
||||||
for _, p := range placeholders {
|
for _, p := range placeholders {
|
||||||
if upper == p {
|
if upper == p {
|
||||||
|
|||||||
@@ -84,6 +84,10 @@ func TestCleanDMIValue(t *testing.T) {
|
|||||||
{" Inspur ", "Inspur"},
|
{" Inspur ", "Inspur"},
|
||||||
{"", ""},
|
{"", ""},
|
||||||
{"0", ""},
|
{"0", ""},
|
||||||
|
{"0123456789", ""},
|
||||||
|
{"1234567890", ""},
|
||||||
|
{"Not Available", ""},
|
||||||
|
{"To Be Filled By O.E.M", ""},
|
||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
got := cleanDMIValue(tt.input)
|
got := cleanDMIValue(tt.input)
|
||||||
@@ -109,6 +113,80 @@ func TestParseDMIFields(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseBoard_Dell(t *testing.T) {
|
||||||
|
type1 := mustReadFile(t, "testdata/dmidecode_type1_dell.txt")
|
||||||
|
type2 := mustReadFile(t, "testdata/dmidecode_type2_dell.txt")
|
||||||
|
|
||||||
|
board := parseBoard(type1, type2)
|
||||||
|
|
||||||
|
if board.SerialNumber != "7SG9F63" {
|
||||||
|
t.Errorf("serial_number: got %q, want %q", board.SerialNumber, "7SG9F63")
|
||||||
|
}
|
||||||
|
if board.Manufacturer == nil || *board.Manufacturer != "Dell Inc." {
|
||||||
|
t.Errorf("manufacturer: got %v, want Dell Inc.", board.Manufacturer)
|
||||||
|
}
|
||||||
|
if board.ProductName == nil || *board.ProductName != "PowerEdge R740xd" {
|
||||||
|
t.Errorf("product_name: got %v, want PowerEdge R740xd", board.ProductName)
|
||||||
|
}
|
||||||
|
// part number comes from type2 Product Name
|
||||||
|
if board.PartNumber == nil || *board.PartNumber != "0F9N89" {
|
||||||
|
t.Errorf("part_number: got %v, want 0F9N89", board.PartNumber)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBoard_HPE(t *testing.T) {
|
||||||
|
type1 := mustReadFile(t, "testdata/dmidecode_type1_hpe.txt")
|
||||||
|
type2 := mustReadFile(t, "testdata/dmidecode_type2_hpe.txt")
|
||||||
|
|
||||||
|
board := parseBoard(type1, type2)
|
||||||
|
|
||||||
|
if board.SerialNumber != "CZJ9320CXN" {
|
||||||
|
t.Errorf("serial_number: got %q, want %q", board.SerialNumber, "CZJ9320CXN")
|
||||||
|
}
|
||||||
|
if board.Manufacturer == nil || *board.Manufacturer != "HPE" {
|
||||||
|
t.Errorf("manufacturer: got %v, want HPE", board.Manufacturer)
|
||||||
|
}
|
||||||
|
if board.ProductName == nil || *board.ProductName != "ProLiant DL380 Gen10" {
|
||||||
|
t.Errorf("product_name: got %v, want ProLiant DL380 Gen10", board.ProductName)
|
||||||
|
}
|
||||||
|
if board.PartNumber == nil || *board.PartNumber != "ProLiant DL380 Gen10" {
|
||||||
|
t.Errorf("part_number: got %v, want ProLiant DL380 Gen10", board.PartNumber)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBoard_Supermicro_Placeholders(t *testing.T) {
|
||||||
|
type1 := mustReadFile(t, "testdata/dmidecode_type1_supermicro.txt")
|
||||||
|
type2 := mustReadFile(t, "testdata/dmidecode_type2_supermicro.txt")
|
||||||
|
|
||||||
|
board := parseBoard(type1, type2)
|
||||||
|
|
||||||
|
if board.SerialNumber != "S214726X2A36789" {
|
||||||
|
t.Errorf("serial_number: got %q, want %q", board.SerialNumber, "S214726X2A36789")
|
||||||
|
}
|
||||||
|
if board.Manufacturer == nil || *board.Manufacturer != "Supermicro" {
|
||||||
|
t.Errorf("manufacturer: got %v, want Supermicro", board.Manufacturer)
|
||||||
|
}
|
||||||
|
if board.ProductName == nil || *board.ProductName != "SYS-6028R-WTR" {
|
||||||
|
t.Errorf("product_name: got %v, want SYS-6028R-WTR", board.ProductName)
|
||||||
|
}
|
||||||
|
// "X10DRW-i" is the real part number from type 2
|
||||||
|
if board.PartNumber == nil || *board.PartNumber != "X10DRW-i" {
|
||||||
|
t.Errorf("part_number: got %v, want X10DRW-i", board.PartNumber)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBIOSFirmware_Dell(t *testing.T) {
|
||||||
|
type0 := mustReadFile(t, "testdata/dmidecode_type0_dell.txt")
|
||||||
|
fw := parseBIOSFirmware(type0)
|
||||||
|
|
||||||
|
if len(fw) != 1 {
|
||||||
|
t.Fatalf("expected 1 firmware record, got %d", len(fw))
|
||||||
|
}
|
||||||
|
if fw[0].Version != "2.5.4" {
|
||||||
|
t.Errorf("version: got %q, want 2.5.4", fw[0].Version)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func mustReadFile(t *testing.T, path string) string {
|
func mustReadFile(t *testing.T, path string) string {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
b, err := os.ReadFile(path)
|
b, err := os.ReadFile(path)
|
||||||
|
|||||||
@@ -49,7 +49,8 @@ func Run(_ runtimeenv.Mode) schema.HardwareIngestRequest {
|
|||||||
snap.VROCLicense = collectVROCLicense(snap.PCIeDevices)
|
snap.VROCLicense = collectVROCLicense(snap.PCIeDevices)
|
||||||
snap.PowerSupplies = collectPSUs(derefString(snap.Board.Manufacturer))
|
snap.PowerSupplies = collectPSUs(derefString(snap.Board.Manufacturer))
|
||||||
snap.PowerSupplies = enrichPSUsWithTelemetry(snap.PowerSupplies, sensorDoc)
|
snap.PowerSupplies = enrichPSUsWithTelemetry(snap.PowerSupplies, sensorDoc)
|
||||||
snap.Sensors = buildSensorsFromDoc(sensorDoc)
|
snap.Sensors = mergeIPMISensors(buildSensorsFromDoc(sensorDoc), collectIPMISensors())
|
||||||
|
snap.EventLogs = append(collectIPMISEL(), collectDmesgErrors()...)
|
||||||
finalizeSnapshot(&snap, collectedAt)
|
finalizeSnapshot(&snap, collectedAt)
|
||||||
|
|
||||||
// remaining collectors added in steps 1.8 – 1.10
|
// remaining collectors added in steps 1.8 – 1.10
|
||||||
|
|||||||
129
audit/internal/collector/dmesg_events.go
Normal file
129
audit/internal/collector/dmesg_events.go
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bee/audit/internal/schema"
|
||||||
|
"log/slog"
|
||||||
|
"os/exec"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// dmesg -T output: [Thu Jun 18 14:23:45 2026] message
|
||||||
|
// dmesg without -T: [ 123.456789] message
|
||||||
|
var dmesgTimestampRE = regexp.MustCompile(`^\[([^\]]+)\]\s*(.*)$`)
|
||||||
|
|
||||||
|
// Keywords that indicate an error or hardware problem worth capturing.
|
||||||
|
var dmesgErrorPatterns = []*regexp.Regexp{
|
||||||
|
regexp.MustCompile(`(?i)\berr(or)?\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bfail(ed|ure)?\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bfault\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bwarn(ing)?\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bAER\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bXid\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bNVRM\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bpanic\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bcorrected\b`),
|
||||||
|
regexp.MustCompile(`(?i)\buncorrect`),
|
||||||
|
regexp.MustCompile(`(?i)\bECC\b`),
|
||||||
|
regexp.MustCompile(`(?i)\btimeout\b`),
|
||||||
|
regexp.MustCompile(`(?i)\breset\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bdead\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bhang\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bstall\b`),
|
||||||
|
regexp.MustCompile(`(?i)\bdisabled\b`),
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectDmesgErrors runs `dmesg -T` (or `dmesg` without -T on failure) and
|
||||||
|
// returns only lines that match known error/warning patterns.
|
||||||
|
func collectDmesgErrors() []schema.HardwareEventLog {
|
||||||
|
out, err := exec.Command("dmesg", "-T").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
// Fallback: dmesg without human-readable timestamps
|
||||||
|
out, err = exec.Command("dmesg").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entries := parseDmesgErrors(string(out))
|
||||||
|
if len(entries) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
slog.Info("dmesg: collected error entries", "count", len(entries))
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseDmesgErrors(output string) []schema.HardwareEventLog {
|
||||||
|
var entries []schema.HardwareEventLog
|
||||||
|
collectedAt := time.Now().UTC().Format(time.RFC3339)
|
||||||
|
|
||||||
|
for _, line := range strings.Split(output, "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var timestamp, message string
|
||||||
|
if m := dmesgTimestampRE.FindStringSubmatch(line); m != nil {
|
||||||
|
timestamp = strings.TrimSpace(m[1])
|
||||||
|
message = strings.TrimSpace(m[2])
|
||||||
|
} else {
|
||||||
|
message = line
|
||||||
|
}
|
||||||
|
|
||||||
|
if message == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !matchesAny(message, dmesgErrorPatterns) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
severity := dmesgSeverity(message)
|
||||||
|
source := "dmesg"
|
||||||
|
|
||||||
|
var eventTime *string
|
||||||
|
if timestamp != "" {
|
||||||
|
t := timestamp
|
||||||
|
eventTime = &t
|
||||||
|
} else {
|
||||||
|
eventTime = &collectedAt
|
||||||
|
}
|
||||||
|
|
||||||
|
entries = append(entries, schema.HardwareEventLog{
|
||||||
|
Source: source,
|
||||||
|
EventTime: eventTime,
|
||||||
|
Severity: &severity,
|
||||||
|
Message: message,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchesAny(s string, patterns []*regexp.Regexp) bool {
|
||||||
|
for _, p := range patterns {
|
||||||
|
if p.MatchString(s) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func dmesgSeverity(msg string) string {
|
||||||
|
lower := strings.ToLower(msg)
|
||||||
|
switch {
|
||||||
|
case strings.Contains(lower, "panic") ||
|
||||||
|
strings.Contains(lower, "aer") ||
|
||||||
|
strings.Contains(lower, "uncorrect") ||
|
||||||
|
strings.Contains(lower, "xid") ||
|
||||||
|
strings.Contains(lower, "nvrm"):
|
||||||
|
return statusCritical
|
||||||
|
case strings.Contains(lower, "error") ||
|
||||||
|
strings.Contains(lower, "fault") ||
|
||||||
|
strings.Contains(lower, "fail") ||
|
||||||
|
strings.Contains(lower, "dead") ||
|
||||||
|
strings.Contains(lower, "hang"):
|
||||||
|
return statusCritical
|
||||||
|
default:
|
||||||
|
return statusWarning
|
||||||
|
}
|
||||||
|
}
|
||||||
90
audit/internal/collector/ipmi_sel.go
Normal file
90
audit/internal/collector/ipmi_sel.go
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bee/audit/internal/schema"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// collectIPMISEL runs `ipmitool sel list` and returns parsed event log entries.
|
||||||
|
// Returns nil if ipmitool is unavailable or the SEL is empty.
|
||||||
|
func collectIPMISEL() []schema.HardwareEventLog {
|
||||||
|
out, err := exec.Command("ipmitool", "sel", "list").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
entries := parseIPMISELOutput(string(out))
|
||||||
|
if len(entries) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
slog.Info("ipmi sel: collected", "entries", len(entries))
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseIPMISELOutput parses `ipmitool sel list` output.
|
||||||
|
// Line format: ID | date | time | sensor | event description | direction
|
||||||
|
// Example: 1 | 06/18/2026 | 14:23:45 | Temperature #0x30 | Upper Critical going high | Asserted
|
||||||
|
func parseIPMISELOutput(output string) []schema.HardwareEventLog {
|
||||||
|
var entries []schema.HardwareEventLog
|
||||||
|
for _, line := range strings.Split(output, "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(line, "|", 6)
|
||||||
|
if len(parts) < 5 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
id := strings.TrimSpace(parts[0])
|
||||||
|
date := strings.TrimSpace(parts[1])
|
||||||
|
timeStr := strings.TrimSpace(parts[2])
|
||||||
|
sensor := strings.TrimSpace(parts[3])
|
||||||
|
event := strings.TrimSpace(parts[4])
|
||||||
|
direction := ""
|
||||||
|
if len(parts) == 6 {
|
||||||
|
direction = strings.TrimSpace(parts[5])
|
||||||
|
}
|
||||||
|
|
||||||
|
var eventTime *string
|
||||||
|
if date != "" && timeStr != "" {
|
||||||
|
t := fmt.Sprintf("%s %s", date, timeStr)
|
||||||
|
eventTime = &t
|
||||||
|
}
|
||||||
|
|
||||||
|
message := event
|
||||||
|
if direction != "" && strings.EqualFold(direction, "Deasserted") {
|
||||||
|
message = event + " (Deasserted)"
|
||||||
|
}
|
||||||
|
|
||||||
|
severity := ipmiSELSeverity(event)
|
||||||
|
isActive := !strings.EqualFold(direction, "Deasserted")
|
||||||
|
|
||||||
|
entry := schema.HardwareEventLog{
|
||||||
|
Source: "ipmi-sel",
|
||||||
|
EventTime: eventTime,
|
||||||
|
Severity: &severity,
|
||||||
|
MessageID: &id,
|
||||||
|
Message: message,
|
||||||
|
IsActive: &isActive,
|
||||||
|
}
|
||||||
|
if sensor != "" {
|
||||||
|
entry.ComponentRef = &sensor
|
||||||
|
}
|
||||||
|
entries = append(entries, entry)
|
||||||
|
}
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
|
||||||
|
func ipmiSELSeverity(event string) string {
|
||||||
|
lower := strings.ToLower(event)
|
||||||
|
switch {
|
||||||
|
case strings.Contains(lower, "critical") || strings.Contains(lower, "non-recoverable"):
|
||||||
|
return statusCritical
|
||||||
|
case strings.Contains(lower, "non-critical") || strings.Contains(lower, "warning") || strings.Contains(lower, "degraded"):
|
||||||
|
return statusWarning
|
||||||
|
default:
|
||||||
|
return "info"
|
||||||
|
}
|
||||||
|
}
|
||||||
216
audit/internal/collector/ipmi_sensors.go
Normal file
216
audit/internal/collector/ipmi_sensors.go
Normal file
@@ -0,0 +1,216 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bee/audit/internal/schema"
|
||||||
|
"log/slog"
|
||||||
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// collectIPMISensors runs `ipmitool sensor` and returns parsed sensor readings.
|
||||||
|
// Returns nil if ipmitool is unavailable or produces no output.
|
||||||
|
func collectIPMISensors() *schema.HardwareSensors {
|
||||||
|
out, err := exec.Command("ipmitool", "sensor").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
result := parseIPMISensorOutput(string(out))
|
||||||
|
if result == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
slog.Info("ipmi sensors: collected",
|
||||||
|
"fans", len(result.Fans),
|
||||||
|
"temperatures", len(result.Temperatures),
|
||||||
|
"power", len(result.Power),
|
||||||
|
"other", len(result.Other),
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseIPMISensorOutput parses `ipmitool sensor` text output.
|
||||||
|
// Each line: name | value | unit | status | lnr | lcr | lnc | unc | ucr | unr
|
||||||
|
func parseIPMISensorOutput(output string) *schema.HardwareSensors {
|
||||||
|
result := &schema.HardwareSensors{}
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
|
||||||
|
for _, line := range strings.Split(output, "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.Split(line, "|")
|
||||||
|
if len(parts) < 4 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := strings.TrimSpace(parts[0])
|
||||||
|
rawVal := strings.TrimSpace(parts[1])
|
||||||
|
unit := strings.TrimSpace(parts[2])
|
||||||
|
status := strings.TrimSpace(parts[3])
|
||||||
|
|
||||||
|
if name == "" || rawVal == "na" || rawVal == "N/A" || rawVal == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
value, err := strconv.ParseFloat(rawVal, 64)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
statusStr := normalizeIPMISensorStatus(status)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case strings.EqualFold(unit, "RPM"):
|
||||||
|
if duplicateSensor(seen, "fan", name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rpm := int(value)
|
||||||
|
item := schema.HardwareFanSensor{Name: name, RPM: &rpm}
|
||||||
|
if statusStr != "" {
|
||||||
|
item.Status = &statusStr
|
||||||
|
}
|
||||||
|
result.Fans = append(result.Fans, item)
|
||||||
|
|
||||||
|
case strings.EqualFold(unit, "degrees C") || strings.EqualFold(unit, "C"):
|
||||||
|
if duplicateSensor(seen, "temp", name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
item := schema.HardwareTemperatureSensor{Name: name, Celsius: &value}
|
||||||
|
if len(parts) >= 9 {
|
||||||
|
if unc := parseIPMIThreshold(parts[7]); unc != nil {
|
||||||
|
item.ThresholdWarningCelsius = unc
|
||||||
|
}
|
||||||
|
if ucr := parseIPMIThreshold(parts[8]); ucr != nil {
|
||||||
|
item.ThresholdCriticalCelsius = ucr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if statusStr != "" {
|
||||||
|
item.Status = &statusStr
|
||||||
|
} else {
|
||||||
|
item.Status = deriveTemperatureStatus(item.Celsius, item.ThresholdWarningCelsius, item.ThresholdCriticalCelsius)
|
||||||
|
}
|
||||||
|
result.Temperatures = append(result.Temperatures, item)
|
||||||
|
|
||||||
|
case strings.EqualFold(unit, "Volts") || strings.EqualFold(unit, "V"):
|
||||||
|
if duplicateSensor(seen, "power", name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
item := schema.HardwarePowerSensor{Name: name, VoltageV: &value}
|
||||||
|
if statusStr != "" {
|
||||||
|
item.Status = &statusStr
|
||||||
|
}
|
||||||
|
result.Power = append(result.Power, item)
|
||||||
|
|
||||||
|
case strings.EqualFold(unit, "Watts") || strings.EqualFold(unit, "W"):
|
||||||
|
if duplicateSensor(seen, "power", name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
item := schema.HardwarePowerSensor{Name: name, PowerW: &value}
|
||||||
|
if statusStr != "" {
|
||||||
|
item.Status = &statusStr
|
||||||
|
}
|
||||||
|
result.Power = append(result.Power, item)
|
||||||
|
|
||||||
|
case strings.EqualFold(unit, "Amps") || strings.EqualFold(unit, "A"):
|
||||||
|
if duplicateSensor(seen, "power", name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
item := schema.HardwarePowerSensor{Name: name, CurrentA: &value}
|
||||||
|
if statusStr != "" {
|
||||||
|
item.Status = &statusStr
|
||||||
|
}
|
||||||
|
result.Power = append(result.Power, item)
|
||||||
|
|
||||||
|
default:
|
||||||
|
if duplicateSensor(seen, "other", name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
item := schema.HardwareOtherSensor{Name: name, Value: &value}
|
||||||
|
if unit != "" {
|
||||||
|
item.Unit = &unit
|
||||||
|
}
|
||||||
|
if statusStr != "" {
|
||||||
|
item.Status = &statusStr
|
||||||
|
}
|
||||||
|
result.Other = append(result.Other, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(result.Fans) == 0 && len(result.Temperatures) == 0 && len(result.Power) == 0 && len(result.Other) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseIPMIThreshold(raw string) *float64 {
|
||||||
|
s := strings.TrimSpace(raw)
|
||||||
|
if s == "" || s == "na" || s == "N/A" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
v, err := strconv.ParseFloat(s, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &v
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeIPMISensorStatus(s string) string {
|
||||||
|
switch strings.ToLower(s) {
|
||||||
|
case "ok":
|
||||||
|
return statusOK
|
||||||
|
case "cr", "ucr", "lcr":
|
||||||
|
return statusCritical
|
||||||
|
case "nc", "unc", "lnc", "nr", "unr", "lnr":
|
||||||
|
return statusWarning
|
||||||
|
case "ns", "na":
|
||||||
|
return ""
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// mergeIPMISensors appends IPMI sensor entries into existing, skipping names already present.
|
||||||
|
func mergeIPMISensors(existing, ipmi *schema.HardwareSensors) *schema.HardwareSensors {
|
||||||
|
if ipmi == nil {
|
||||||
|
return existing
|
||||||
|
}
|
||||||
|
if existing == nil {
|
||||||
|
return ipmi
|
||||||
|
}
|
||||||
|
|
||||||
|
existingNames := map[string]struct{}{}
|
||||||
|
for _, s := range existing.Fans {
|
||||||
|
existingNames["fan\x00"+s.Name] = struct{}{}
|
||||||
|
}
|
||||||
|
for _, s := range existing.Temperatures {
|
||||||
|
existingNames["temp\x00"+s.Name] = struct{}{}
|
||||||
|
}
|
||||||
|
for _, s := range existing.Power {
|
||||||
|
existingNames["power\x00"+s.Name] = struct{}{}
|
||||||
|
}
|
||||||
|
for _, s := range existing.Other {
|
||||||
|
existingNames["other\x00"+s.Name] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, s := range ipmi.Fans {
|
||||||
|
if _, ok := existingNames["fan\x00"+s.Name]; !ok {
|
||||||
|
existing.Fans = append(existing.Fans, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, s := range ipmi.Temperatures {
|
||||||
|
if _, ok := existingNames["temp\x00"+s.Name]; !ok {
|
||||||
|
existing.Temperatures = append(existing.Temperatures, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, s := range ipmi.Power {
|
||||||
|
if _, ok := existingNames["power\x00"+s.Name]; !ok {
|
||||||
|
existing.Power = append(existing.Power, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, s := range ipmi.Other {
|
||||||
|
if _, ok := existingNames["other\x00"+s.Name]; !ok {
|
||||||
|
existing.Other = append(existing.Other, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return existing
|
||||||
|
}
|
||||||
87
audit/internal/collector/memory_test.go
Normal file
87
audit/internal/collector/memory_test.go
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseMemory_Mixed(t *testing.T) {
|
||||||
|
out := mustReadFile(t, "testdata/dmidecode_type17_mixed.txt")
|
||||||
|
dimms := parseMemory(out)
|
||||||
|
|
||||||
|
if len(dimms) != 3 {
|
||||||
|
t.Fatalf("expected 3 DIMMs, got %d", len(dimms))
|
||||||
|
}
|
||||||
|
|
||||||
|
// slot 0: populated, 16 GB Supermicro-style
|
||||||
|
d0 := dimms[0]
|
||||||
|
if d0.Present == nil || !*d0.Present {
|
||||||
|
t.Errorf("dimm0: expected present=true")
|
||||||
|
}
|
||||||
|
if d0.SizeMB == nil || *d0.SizeMB != 16384 {
|
||||||
|
t.Errorf("dimm0: size_mb=%v, want 16384", d0.SizeMB)
|
||||||
|
}
|
||||||
|
if d0.Slot == nil || *d0.Slot != "P1-DIMMA1" {
|
||||||
|
t.Errorf("dimm0: slot=%v, want P1-DIMMA1", d0.Slot)
|
||||||
|
}
|
||||||
|
if d0.Location == nil || *d0.Location != "P0_Node0_Channel0_Dimm0" {
|
||||||
|
t.Errorf("dimm0: location=%v, want P0_Node0_Channel0_Dimm0", d0.Location)
|
||||||
|
}
|
||||||
|
if d0.Manufacturer == nil || *d0.Manufacturer != "Micron" {
|
||||||
|
t.Errorf("dimm0: manufacturer=%v, want Micron", d0.Manufacturer)
|
||||||
|
}
|
||||||
|
if d0.PartNumber == nil || *d0.PartNumber != "36ASF2G72PZ-2G1A2" {
|
||||||
|
t.Errorf("dimm0: part_number=%v, want 36ASF2G72PZ-2G1A2", d0.PartNumber)
|
||||||
|
}
|
||||||
|
if d0.MaxSpeedMHz == nil || *d0.MaxSpeedMHz != 2133 {
|
||||||
|
t.Errorf("dimm0: max_speed_mhz=%v, want 2133", d0.MaxSpeedMHz)
|
||||||
|
}
|
||||||
|
|
||||||
|
// slot 1: empty
|
||||||
|
d1 := dimms[1]
|
||||||
|
if d1.Present == nil || *d1.Present {
|
||||||
|
t.Errorf("dimm1: expected present=false")
|
||||||
|
}
|
||||||
|
if d1.Status == nil || *d1.Status != statusEmpty {
|
||||||
|
t.Errorf("dimm1: status=%v, want %s", d1.Status, statusEmpty)
|
||||||
|
}
|
||||||
|
if d1.SizeMB != nil {
|
||||||
|
t.Errorf("dimm1: size_mb should be nil for empty slot, got %v", d1.SizeMB)
|
||||||
|
}
|
||||||
|
|
||||||
|
// slot 2: populated, 32768 MB Dell-style size
|
||||||
|
d2 := dimms[2]
|
||||||
|
if d2.Present == nil || !*d2.Present {
|
||||||
|
t.Errorf("dimm2: expected present=true")
|
||||||
|
}
|
||||||
|
if d2.SizeMB == nil || *d2.SizeMB != 32768 {
|
||||||
|
t.Errorf("dimm2: size_mb=%v, want 32768", d2.SizeMB)
|
||||||
|
}
|
||||||
|
if d2.Manufacturer == nil || *d2.Manufacturer != "Samsung" {
|
||||||
|
t.Errorf("dimm2: manufacturer=%v, want Samsung", d2.Manufacturer)
|
||||||
|
}
|
||||||
|
if d2.CurrentSpeedMHz == nil || *d2.CurrentSpeedMHz != 2400 {
|
||||||
|
t.Errorf("dimm2: current_speed_mhz=%v, want 2400", d2.CurrentSpeedMHz)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseMemorySizeMB(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
want int
|
||||||
|
}{
|
||||||
|
{"16 GB", 16384},
|
||||||
|
{"32 GB", 32768},
|
||||||
|
{"8 GB", 8192},
|
||||||
|
{"16384 MB", 16384},
|
||||||
|
{"32768 MB", 32768},
|
||||||
|
{"No Module Installed", 0},
|
||||||
|
{"0", 0},
|
||||||
|
{"", 0},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
got := parseMemorySizeMB(tt.input)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("parseMemorySizeMB(%q) = %d, want %d", tt.input, got, tt.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -38,11 +38,22 @@ func bestEffortRescanHotplugStorage() {
|
|||||||
for _, path := range hostPaths {
|
for _, path := range hostPaths {
|
||||||
// SAS HBAs (e.g. smartpqi) block indefinitely in sas_user_scan when
|
// SAS HBAs (e.g. smartpqi) block indefinitely in sas_user_scan when
|
||||||
// written to — SAS topology is discovered by the driver itself.
|
// written to — SAS topology is discovered by the driver itself.
|
||||||
|
// Detect via two methods: (1) sas_host class registration, and
|
||||||
|
// (2) driver proc_name — smartpqi uses scsi_transport_sas but does
|
||||||
|
// not register a sas_host object, so (1) alone misses it.
|
||||||
host := filepath.Base(filepath.Dir(path))
|
host := filepath.Base(filepath.Dir(path))
|
||||||
if _, err := os.Stat("/sys/class/sas_host/" + host); err == nil {
|
if _, err := os.Stat("/sys/class/sas_host/" + host); err == nil {
|
||||||
slog.Info("storage: scsi host scan skipped (SAS host)", "path", path)
|
slog.Info("storage: scsi host scan skipped (SAS host)", "path", path)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if procName, err := os.ReadFile("/sys/class/scsi_host/" + host + "/proc_name"); err == nil {
|
||||||
|
switch strings.TrimSpace(string(procName)) {
|
||||||
|
case "smartpqi", "hpsa":
|
||||||
|
slog.Info("storage: scsi host scan skipped (SAS transport driver)",
|
||||||
|
"path", path, "driver", strings.TrimSpace(string(procName)))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
if err := hotplugWriteFile(path, []byte("- - -\n"), 0644); err != nil {
|
if err := hotplugWriteFile(path, []byte("- - -\n"), 0644); err != nil {
|
||||||
slog.Info("storage: scsi host scan write failed", "path", path, "err", err)
|
slog.Info("storage: scsi host scan write failed", "path", path, "err", err)
|
||||||
continue
|
continue
|
||||||
|
|||||||
27
audit/internal/collector/testdata/dmidecode_type0_dell.txt
vendored
Normal file
27
audit/internal/collector/testdata/dmidecode_type0_dell.txt
vendored
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# dmidecode 3.2
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 3.1.0 present.
|
||||||
|
|
||||||
|
Handle 0x0000, DMI type 0, 26 bytes
|
||||||
|
BIOS Information
|
||||||
|
Vendor: Dell Inc.
|
||||||
|
Version: 2.5.4
|
||||||
|
Release Date: 01/13/2020
|
||||||
|
Address: 0xF0000
|
||||||
|
Runtime Size: 64 kB
|
||||||
|
ROM Size: 32 MB
|
||||||
|
Characteristics:
|
||||||
|
ISA is supported
|
||||||
|
PCI is supported
|
||||||
|
PNP is supported
|
||||||
|
BIOS is upgradeable
|
||||||
|
BIOS shadowing is allowed
|
||||||
|
Boot from CD is supported
|
||||||
|
Selectable boot is supported
|
||||||
|
EDD is supported
|
||||||
|
ACPI is supported
|
||||||
|
USB legacy is supported
|
||||||
|
BIOS boot specification is supported
|
||||||
|
Targeted content distribution is supported
|
||||||
|
UEFI is supported
|
||||||
|
BIOS Revision: 2.5
|
||||||
59
audit/internal/collector/testdata/dmidecode_type17_mixed.txt
vendored
Normal file
59
audit/internal/collector/testdata/dmidecode_type17_mixed.txt
vendored
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# dmidecode 3.1
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 2.8 present.
|
||||||
|
|
||||||
|
Handle 0x0026, DMI type 17, 40 bytes
|
||||||
|
Memory Device
|
||||||
|
Array Handle: 0x0025
|
||||||
|
Error Information Handle: Not Provided
|
||||||
|
Total Width: 72 bits
|
||||||
|
Data Width: 64 bits
|
||||||
|
Size: 16 GB
|
||||||
|
Form Factor: DIMM
|
||||||
|
Set: None
|
||||||
|
Locator: P1-DIMMA1
|
||||||
|
Bank Locator: P0_Node0_Channel0_Dimm0
|
||||||
|
Type: DDR4
|
||||||
|
Type Detail: Synchronous
|
||||||
|
Speed: 2133 MT/s
|
||||||
|
Manufacturer: Micron
|
||||||
|
Serial Number: 1A2B3C4D
|
||||||
|
Asset Tag: Not Specified
|
||||||
|
Part Number: 36ASF2G72PZ-2G1A2
|
||||||
|
Rank: 2
|
||||||
|
Configured Memory Speed: 2133 MT/s
|
||||||
|
|
||||||
|
Handle 0x0027, DMI type 17, 40 bytes
|
||||||
|
Memory Device
|
||||||
|
Array Handle: 0x0025
|
||||||
|
Error Information Handle: Not Provided
|
||||||
|
Total Width: Unknown
|
||||||
|
Data Width: Unknown
|
||||||
|
Size: No Module Installed
|
||||||
|
Form Factor: DIMM
|
||||||
|
Set: None
|
||||||
|
Locator: P1-DIMMA2
|
||||||
|
Bank Locator: P0_Node0_Channel0_Dimm1
|
||||||
|
Type: DDR4
|
||||||
|
Type Detail: Synchronous
|
||||||
|
|
||||||
|
Handle 0x0028, DMI type 17, 84 bytes
|
||||||
|
Memory Device
|
||||||
|
Array Handle: 0x0025
|
||||||
|
Error Information Handle: Not Provided
|
||||||
|
Total Width: 72 bits
|
||||||
|
Data Width: 64 bits
|
||||||
|
Size: 32768 MB
|
||||||
|
Form Factor: DIMM
|
||||||
|
Set: 1
|
||||||
|
Locator: A1
|
||||||
|
Bank Locator: Not Specified
|
||||||
|
Type: DDR4
|
||||||
|
Type Detail: Synchronous Registered (Buffered)
|
||||||
|
Speed: 2933 MT/s
|
||||||
|
Manufacturer: Samsung
|
||||||
|
Serial Number: 5E6F7A8B
|
||||||
|
Asset Tag: Not Specified
|
||||||
|
Part Number: M393A4K40CB2-CVF
|
||||||
|
Rank: 2
|
||||||
|
Configured Memory Speed: 2400 MT/s
|
||||||
14
audit/internal/collector/testdata/dmidecode_type1_dell.txt
vendored
Normal file
14
audit/internal/collector/testdata/dmidecode_type1_dell.txt
vendored
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
# dmidecode 3.2
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 3.1.0 present.
|
||||||
|
|
||||||
|
Handle 0x0100, DMI type 1, 27 bytes
|
||||||
|
System Information
|
||||||
|
Manufacturer: Dell Inc.
|
||||||
|
Product Name: PowerEdge R740xd
|
||||||
|
Version: Not Specified
|
||||||
|
Serial Number: 7SG9F63
|
||||||
|
UUID: b1c2d3e4-f5a6-7890-bcde-f12345678901
|
||||||
|
Wake-up Type: Power Switch
|
||||||
|
SKU Number: SKU=NotProvided;ModelName=PowerEdge R740xd
|
||||||
|
Family: PowerEdge
|
||||||
14
audit/internal/collector/testdata/dmidecode_type1_hpe.txt
vendored
Normal file
14
audit/internal/collector/testdata/dmidecode_type1_hpe.txt
vendored
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
# dmidecode 3.3
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 3.1.0 present.
|
||||||
|
|
||||||
|
Handle 0x008E, DMI type 1, 27 bytes
|
||||||
|
System Information
|
||||||
|
Manufacturer: HPE
|
||||||
|
Product Name: ProLiant DL380 Gen10
|
||||||
|
Version: Not Specified
|
||||||
|
Serial Number: CZJ9320CXN
|
||||||
|
UUID: c2d3e4f5-a6b7-8901-cdef-012345678902
|
||||||
|
Wake-up Type: Power Switch
|
||||||
|
SKU Number: 868703-B21
|
||||||
|
Family: ProLiant
|
||||||
14
audit/internal/collector/testdata/dmidecode_type1_supermicro.txt
vendored
Normal file
14
audit/internal/collector/testdata/dmidecode_type1_supermicro.txt
vendored
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
# dmidecode 3.1
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 2.8 present.
|
||||||
|
|
||||||
|
Handle 0x0001, DMI type 1, 27 bytes
|
||||||
|
System Information
|
||||||
|
Manufacturer: Supermicro
|
||||||
|
Product Name: SYS-6028R-WTR
|
||||||
|
Version: 0123456789
|
||||||
|
Serial Number: S214726X2A36789
|
||||||
|
UUID: d3e4f5a6-b7c8-9012-def0-123456789003
|
||||||
|
Wake-up Type: Power Switch
|
||||||
|
SKU Number: Default string
|
||||||
|
Family: Default string
|
||||||
10
audit/internal/collector/testdata/dmidecode_type2_dell.txt
vendored
Normal file
10
audit/internal/collector/testdata/dmidecode_type2_dell.txt
vendored
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# dmidecode 3.2
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 3.1.0 present.
|
||||||
|
|
||||||
|
Handle 0x0200, DMI type 2, 8 bytes
|
||||||
|
Base Board Information
|
||||||
|
Manufacturer: Dell Inc.
|
||||||
|
Product Name: 0F9N89
|
||||||
|
Version: A00
|
||||||
|
Serial Number: 7SG9F63
|
||||||
19
audit/internal/collector/testdata/dmidecode_type2_hpe.txt
vendored
Normal file
19
audit/internal/collector/testdata/dmidecode_type2_hpe.txt
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# dmidecode 3.3
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 3.1.0 present.
|
||||||
|
|
||||||
|
Handle 0x00A4, DMI type 2, 15 bytes
|
||||||
|
Base Board Information
|
||||||
|
Manufacturer: HPE
|
||||||
|
Product Name: ProLiant DL380 Gen10
|
||||||
|
Version: Not Specified
|
||||||
|
Serial Number: CZJ9320CXN
|
||||||
|
Asset Tag: CZJ9320CXN
|
||||||
|
Features:
|
||||||
|
Board is a hosting board
|
||||||
|
Board is removable
|
||||||
|
Board is replaceable
|
||||||
|
Location In Chassis: Not Specified
|
||||||
|
Chassis Handle: 0x0000
|
||||||
|
Type: Motherboard
|
||||||
|
Contained Object Handles: 0
|
||||||
18
audit/internal/collector/testdata/dmidecode_type2_supermicro.txt
vendored
Normal file
18
audit/internal/collector/testdata/dmidecode_type2_supermicro.txt
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# dmidecode 3.1
|
||||||
|
Getting SMBIOS data from sysfs.
|
||||||
|
SMBIOS 2.8 present.
|
||||||
|
|
||||||
|
Handle 0x0002, DMI type 2, 15 bytes
|
||||||
|
Base Board Information
|
||||||
|
Manufacturer: Supermicro
|
||||||
|
Product Name: X10DRW-i
|
||||||
|
Version: 1.02
|
||||||
|
Serial Number: S214726X2A36789
|
||||||
|
Asset Tag: Default string
|
||||||
|
Features:
|
||||||
|
Board is a hosting board
|
||||||
|
Board is replaceable
|
||||||
|
Location In Chassis: Default string
|
||||||
|
Chassis Handle: 0x0003
|
||||||
|
Type: Motherboard
|
||||||
|
Contained Object Handles: 0
|
||||||
@@ -182,11 +182,18 @@ func (s *System) DetectGPUVendor() string {
|
|||||||
return "amd"
|
return "amd"
|
||||||
}
|
}
|
||||||
if raw, err := exec.Command("lspci", "-nn").Output(); err == nil {
|
if raw, err := exec.Command("lspci", "-nn").Output(); err == nil {
|
||||||
text := strings.ToLower(string(raw))
|
// Only match AMD GPU device classes [0300]=VGA, [0302]=3D controller, [0380]=Display.
|
||||||
if strings.Contains(text, "advanced micro devices") || strings.Contains(text, "amd/ati") {
|
// AMD CPUs also appear in lspci as "Advanced Micro Devices" (Root Complex, IOMMU, etc.)
|
||||||
|
// so matching vendor alone causes false positives on AMD CPU servers without GPUs.
|
||||||
|
for _, line := range strings.Split(strings.ToLower(string(raw)), "\n") {
|
||||||
|
if !strings.Contains(line, "advanced micro devices") && !strings.Contains(line, "amd/ati") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.Contains(line, "[0300]") || strings.Contains(line, "[0302]") || strings.Contains(line, "[0380]") {
|
||||||
return "amd"
|
return "amd"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,9 @@ var techDumpFixedCommands = []struct {
|
|||||||
{Name: "sensors", Args: []string{"-j"}, File: "sensors.json"},
|
{Name: "sensors", Args: []string{"-j"}, File: "sensors.json"},
|
||||||
{Name: "ipmitool", Args: []string{"fru", "print"}, File: "ipmitool-fru.txt"},
|
{Name: "ipmitool", Args: []string{"fru", "print"}, File: "ipmitool-fru.txt"},
|
||||||
{Name: "ipmitool", Args: []string{"sdr"}, File: "ipmitool-sdr.txt"},
|
{Name: "ipmitool", Args: []string{"sdr"}, File: "ipmitool-sdr.txt"},
|
||||||
|
{Name: "ipmitool", Args: []string{"sensor"}, File: "ipmitool-sensor.txt"},
|
||||||
|
{Name: "ipmitool", Args: []string{"sel", "list"}, File: "ipmitool-sel.txt"},
|
||||||
|
{Name: "ipmitool", Args: []string{"sel", "time", "get"}, File: "ipmitool-sel-time.txt"},
|
||||||
{Name: "nvme", Args: []string{"list", "-o", "json"}, File: "nvme-list.json"},
|
{Name: "nvme", Args: []string{"list", "-o", "json"}, File: "nvme-list.json"},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1292,12 +1292,28 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)
|
|||||||
_ = json.NewEncoder(w).Encode(map[string]string{"task_id": t.ID})
|
_ = json.NewEncoder(w).Encode(map[string]string{"task_id": t.ID})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPISystemReboot(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if err := exec.Command("systemctl", "reboot").Start(); err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "reboot failed: "+err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, map[string]string{"status": "rebooting"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPISystemShutdown(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if err := exec.Command("systemctl", "poweroff").Start(); err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "shutdown failed: "+err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, map[string]string{"status": "shutting down"})
|
||||||
|
}
|
||||||
|
|
||||||
// ── Tools ─────────────────────────────────────────────────────────────────────
|
// ── Tools ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
var standardTools = []string{
|
var standardTools = []string{
|
||||||
"dmidecode", "smartctl", "nvme", "lspci", "ipmitool",
|
"dmidecode", "smartctl", "nvme", "lspci", "ipmitool",
|
||||||
"nvidia-smi", "dcgmi", "nv-hostengine", "memtester", "stress-ng", "nvtop",
|
"nvidia-smi", "dcgmi", "nv-hostengine", "memtester", "stress-ng", "nvtop",
|
||||||
"mstflint",
|
"mstflint", "saa",
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPIToolsCheck(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleAPIToolsCheck(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|||||||
280
audit/internal/webui/huawei_elabel.go
Normal file
280
audit/internal/webui/huawei_elabel.go
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
package webui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type huaweiField struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Key string `json:"key"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
ReadOnly bool `json:"read_only,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type huaweiChange struct {
|
||||||
|
Key string `json:"key"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type huaweiFieldDef struct {
|
||||||
|
Name string
|
||||||
|
Key string
|
||||||
|
FruID byte
|
||||||
|
TypeID byte
|
||||||
|
FieldID byte
|
||||||
|
Special string // "chassis-type" | "guid"
|
||||||
|
}
|
||||||
|
|
||||||
|
var huaweiElabelDefs = []huaweiFieldDef{
|
||||||
|
{"Device Name", "DeviceName", 0x00, 0x06, 0x01, ""},
|
||||||
|
{"Device Serial Number", "DeviceSerialNumber", 0x00, 0x06, 0x03, ""},
|
||||||
|
{"Product Name", "ProductName", 0x00, 0x03, 0x01, ""},
|
||||||
|
{"Product Serial Number", "ProductSerialNumber", 0x00, 0x03, 0x04, ""},
|
||||||
|
{"Product Asset Tag", "ProductAssetTag", 0x00, 0x03, 0x05, ""},
|
||||||
|
{"Product Manufacturer", "ProductManufacturer", 0x00, 0x03, 0x00, ""},
|
||||||
|
{"Mainboard Manufacturer", "MainboardManufacturer", 0x00, 0x02, 0x01, ""},
|
||||||
|
{"Board Product Name", "BoardProductName", 0x00, 0x02, 0x02, ""},
|
||||||
|
{"Chassis Part Number", "ChassisPartnumber", 0x00, 0x01, 0x01, ""},
|
||||||
|
{"Chassis Type", "ChassisType", 0x00, 0x01, 0x00, "chassis-type"},
|
||||||
|
{"IO Chassis Serial", "IOChassisSerialNumber", 0x01, 0x03, 0x04, ""},
|
||||||
|
{"IO Chassis Asset Tag", "IOChassisAssetTag", 0x01, 0x03, 0x05, ""},
|
||||||
|
{"GUID", "GUID", 0x00, 0x00, 0x00, "guid"},
|
||||||
|
}
|
||||||
|
|
||||||
|
// huaweiGetRaw reads a string elabel field via OEM IPMI raw command.
|
||||||
|
// Protocol: ipmitool raw 0x30 0x90 0x05 <fru_id> <type_id> <field_id> 0x00 0x30
|
||||||
|
// Response: <length_byte> <ascii_byte1> ... (null-terminated)
|
||||||
|
func huaweiGetRaw(ctx context.Context, def huaweiFieldDef) (string, error) {
|
||||||
|
if def.Special == "guid" {
|
||||||
|
return huaweiGetGUID(ctx)
|
||||||
|
}
|
||||||
|
args := []string{
|
||||||
|
"0x30", "0x90", "0x05",
|
||||||
|
fmt.Sprintf("0x%02x", def.FruID),
|
||||||
|
fmt.Sprintf("0x%02x", def.TypeID),
|
||||||
|
fmt.Sprintf("0x%02x", def.FieldID),
|
||||||
|
"0x00", "0x30",
|
||||||
|
}
|
||||||
|
out, err := exec.CommandContext(ctx, "ipmitool", append([]string{"raw"}, args...)...).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return huaweiParseStringResponse(strings.TrimSpace(string(out)), def.Special), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// huaweiParseStringResponse decodes the OEM IPMI response bytes to a string.
|
||||||
|
// Format: <length_byte> <byte1> <byte2> ...
|
||||||
|
func huaweiParseStringResponse(hexOut, special string) string {
|
||||||
|
parts := strings.Fields(hexOut)
|
||||||
|
if len(parts) < 2 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if special == "chassis-type" {
|
||||||
|
// Response: <length=1> <type_byte>
|
||||||
|
if len(parts) >= 2 {
|
||||||
|
n, err := strconv.ParseUint(parts[1], 16, 8)
|
||||||
|
if err == nil {
|
||||||
|
return fmt.Sprintf("0x%02x", n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
var sb strings.Builder
|
||||||
|
for _, p := range parts[1:] {
|
||||||
|
b, err := strconv.ParseUint(p, 16, 8)
|
||||||
|
if err != nil || b == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
sb.WriteByte(byte(b))
|
||||||
|
}
|
||||||
|
return strings.TrimRight(sb.String(), "\x00")
|
||||||
|
}
|
||||||
|
|
||||||
|
// huaweiGetGUID reads the system GUID via standard IPMI Get System GUID (0x06 0x08).
|
||||||
|
func huaweiGetGUID(ctx context.Context) (string, error) {
|
||||||
|
out, err := exec.CommandContext(ctx, "ipmitool", "raw", "0x06", "0x08").CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
parts := strings.Fields(strings.TrimSpace(string(out)))
|
||||||
|
if len(parts) != 16 {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
// Format as UUID: 4-2-2-2-6 byte groups
|
||||||
|
// iBMC returns bytes in reversed order; re-reverse to get canonical UUID.
|
||||||
|
var bytes [16]string
|
||||||
|
for i, p := range parts {
|
||||||
|
bytes[15-i] = p
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s%s%s%s-%s%s-%s%s-%s%s-%s%s%s%s%s%s",
|
||||||
|
bytes[0], bytes[1], bytes[2], bytes[3],
|
||||||
|
bytes[4], bytes[5],
|
||||||
|
bytes[6], bytes[7],
|
||||||
|
bytes[8], bytes[9],
|
||||||
|
bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15],
|
||||||
|
), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// huaweiChunks splits a value into 19-byte chunks for the OEM IPMI SET protocol.
|
||||||
|
// Key byte: bit7=1 means more chunks follow; bits 0-6 = offset into string.
|
||||||
|
func huaweiChunks(value string) [][]string {
|
||||||
|
if len(value) == 0 {
|
||||||
|
return [][]string{{"0x00", "0x01", "0x00"}}
|
||||||
|
}
|
||||||
|
const maxLen = 63
|
||||||
|
if len(value) > maxLen {
|
||||||
|
value = value[:maxLen]
|
||||||
|
}
|
||||||
|
const chunkSize = 19
|
||||||
|
var chunks [][]string
|
||||||
|
for offset := 0; offset < len(value); {
|
||||||
|
end := offset + chunkSize
|
||||||
|
if end > len(value) {
|
||||||
|
end = len(value)
|
||||||
|
}
|
||||||
|
isLast := end >= len(value)
|
||||||
|
key := byte(offset)
|
||||||
|
if !isLast {
|
||||||
|
key |= 0x80
|
||||||
|
}
|
||||||
|
args := []string{
|
||||||
|
fmt.Sprintf("0x%02x", key),
|
||||||
|
fmt.Sprintf("0x%02x", end-offset),
|
||||||
|
}
|
||||||
|
for _, b := range []byte(value[offset:end]) {
|
||||||
|
args = append(args, fmt.Sprintf("0x%02x", b))
|
||||||
|
}
|
||||||
|
chunks = append(chunks, args)
|
||||||
|
offset = end
|
||||||
|
}
|
||||||
|
return chunks
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPIHuaweiElabelRead(w http.ResponseWriter, r *http.Request) {
|
||||||
|
ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
var fields []huaweiField
|
||||||
|
for _, def := range huaweiElabelDefs {
|
||||||
|
val, err := huaweiGetRaw(ctx, def)
|
||||||
|
if err != nil {
|
||||||
|
// First field failure likely means no Huawei BMC — abort with error.
|
||||||
|
if len(fields) == 0 {
|
||||||
|
msg := strings.TrimSpace(err.Error())
|
||||||
|
writeError(w, http.StatusInternalServerError, "huawei elabel not available: "+msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
val = ""
|
||||||
|
}
|
||||||
|
fields = append(fields, huaweiField{
|
||||||
|
Name: def.Name,
|
||||||
|
Key: def.Key,
|
||||||
|
Value: val,
|
||||||
|
ReadOnly: def.Special == "guid" || def.Special == "chassis-type",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
writeJSON(w, fields)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPIHuaweiElabelWrite(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req struct {
|
||||||
|
Changes []huaweiChange `json:"changes"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(req.Changes) == 0 {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "no changes provided")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
defByKey := make(map[string]huaweiFieldDef, len(huaweiElabelDefs))
|
||||||
|
for _, d := range huaweiElabelDefs {
|
||||||
|
defByKey[d.Key] = d
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range req.Changes {
|
||||||
|
def, ok := defByKey[c.Key]
|
||||||
|
if !ok {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "unknown field key: "+c.Key)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if def.Special == "guid" || def.Special == "chassis-type" {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "field is read-only: "+c.Key)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(c.Value) > 63 {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "value too long (max 63 chars): "+c.Key)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, ch := range c.Value {
|
||||||
|
if ch < 0x20 || ch > 0x7E {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "non-printable character in value for: "+c.Key)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t := &Task{
|
||||||
|
ID: newJobID("huawei-elabel-write"),
|
||||||
|
Name: fmt.Sprintf("Huawei Elabel Write (%d field(s))", len(req.Changes)),
|
||||||
|
Target: "huawei-elabel-write",
|
||||||
|
Priority: defaultTaskPriority("huawei-elabel-write", taskParams{}),
|
||||||
|
Status: TaskPending,
|
||||||
|
CreatedAt: time.Now(),
|
||||||
|
params: taskParams{HuaweiElabelChanges: req.Changes},
|
||||||
|
}
|
||||||
|
globalQueue.enqueue(t)
|
||||||
|
writeJSON(w, map[string]string{"task_id": t.ID})
|
||||||
|
}
|
||||||
|
|
||||||
|
func runHuaweiElabelWriteTask(ctx context.Context, j *jobState, p taskParams) error {
|
||||||
|
defByKey := make(map[string]huaweiFieldDef, len(huaweiElabelDefs))
|
||||||
|
for _, d := range huaweiElabelDefs {
|
||||||
|
defByKey[d.Key] = d
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enable device name effective flag before writing.
|
||||||
|
enableCmd := exec.CommandContext(ctx, "ipmitool", "raw", "0x30", "0x90", "0x21", "0x04", "0x01")
|
||||||
|
if out, err := enableCmd.CombinedOutput(); err != nil {
|
||||||
|
j.append("Warning: enable flag: " + strings.TrimSpace(string(out)))
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range p.HuaweiElabelChanges {
|
||||||
|
def := defByKey[c.Key]
|
||||||
|
setPrefix := []string{
|
||||||
|
"0x30", "0x90", "0x04",
|
||||||
|
fmt.Sprintf("0x%02x", def.FruID),
|
||||||
|
fmt.Sprintf("0x%02x", def.TypeID),
|
||||||
|
fmt.Sprintf("0x%02x", def.FieldID),
|
||||||
|
}
|
||||||
|
|
||||||
|
chunks := huaweiChunks(c.Value)
|
||||||
|
j.append(fmt.Sprintf("Setting %s = %q (%d chunk(s))", c.Key, c.Value, len(chunks)))
|
||||||
|
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
args := append([]string{"raw"}, setPrefix...)
|
||||||
|
args = append(args, chunk...)
|
||||||
|
cmd := exec.CommandContext(ctx, "ipmitool", args...)
|
||||||
|
if err := streamCmdJob(j, cmd); err != nil {
|
||||||
|
return fmt.Errorf("set %s: %w", c.Key, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Commit after each field.
|
||||||
|
commitCmd := exec.CommandContext(ctx, "ipmitool", "raw", "0x30", "0x90", "0x06", "0x00", "0xAA")
|
||||||
|
if out, err := commitCmd.CombinedOutput(); err != nil {
|
||||||
|
return fmt.Errorf("commit after %s: %w (output: %s)", c.Key, err, strings.TrimSpace(string(out)))
|
||||||
|
}
|
||||||
|
j.append("Committed " + c.Key)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
204
audit/internal/webui/ipmi_fru.go
Normal file
204
audit/internal/webui/ipmi_fru.go
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
package webui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
type fruField struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
Editable bool `json:"editable"`
|
||||||
|
Area string `json:"area,omitempty"`
|
||||||
|
Index int `json:"index,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type fruChange struct {
|
||||||
|
Area string `json:"area"`
|
||||||
|
Index int `json:"index"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// fruEditableFields maps display name → area + index for ipmitool fru edit.
|
||||||
|
var fruEditableFields = map[string]struct {
|
||||||
|
Area string
|
||||||
|
Index int
|
||||||
|
}{
|
||||||
|
// Chassis — vendor doc names and ipmitool abbreviated names
|
||||||
|
"Chassis Part Number": {"c", 0},
|
||||||
|
"Chassis Serial Number": {"c", 1},
|
||||||
|
"Chassis Serial": {"c", 1},
|
||||||
|
"Chassis Extra": {"c", 2},
|
||||||
|
// Board — vendor doc names and ipmitool abbreviated names
|
||||||
|
"Board Manufacturer": {"b", 0},
|
||||||
|
"Board Mfg": {"b", 0},
|
||||||
|
"Board Product Name": {"b", 1},
|
||||||
|
"Board Product": {"b", 1},
|
||||||
|
"Board Serial Number": {"b", 2},
|
||||||
|
"Board Serial": {"b", 2},
|
||||||
|
"Board Part Number": {"b", 3},
|
||||||
|
// Product — vendor doc names and ipmitool abbreviated names
|
||||||
|
"Product Manufacturer": {"p", 0},
|
||||||
|
"Product Name": {"p", 1},
|
||||||
|
"Product Part Number": {"p", 2},
|
||||||
|
"Product Version": {"p", 3},
|
||||||
|
"Product Serial Number": {"p", 4},
|
||||||
|
"Product Serial": {"p", 4},
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseFRUOutput(output string) []fruField {
|
||||||
|
var fields []fruField
|
||||||
|
for _, line := range strings.Split(output, "\n") {
|
||||||
|
// Lines look like: " Field Name : value"
|
||||||
|
trimmed := strings.TrimLeft(line, " \t")
|
||||||
|
if trimmed == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
colon := strings.Index(trimmed, " : ")
|
||||||
|
if colon < 0 {
|
||||||
|
// try ": " with no leading space before colon
|
||||||
|
colon = strings.Index(trimmed, ": ")
|
||||||
|
if colon < 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := strings.TrimSpace(trimmed[:colon])
|
||||||
|
value := strings.TrimSpace(trimmed[colon+2:])
|
||||||
|
if name == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
editable, area, idx := fruFieldMeta(name)
|
||||||
|
fields = append(fields, fruField{Name: name, Value: value, Editable: editable, Area: area, Index: idx})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := strings.TrimSpace(trimmed[:colon])
|
||||||
|
value := strings.TrimSpace(trimmed[colon+3:])
|
||||||
|
if name == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
editable, area, idx := fruFieldMeta(name)
|
||||||
|
fields = append(fields, fruField{Name: name, Value: value, Editable: editable, Area: area, Index: idx})
|
||||||
|
}
|
||||||
|
return fields
|
||||||
|
}
|
||||||
|
|
||||||
|
func fruFieldMeta(name string) (editable bool, area string, index int) {
|
||||||
|
if e, ok := fruEditableFields[name]; ok {
|
||||||
|
return true, e.Area, e.Index
|
||||||
|
}
|
||||||
|
// All fields are shown as editable; server will reject unknown fields.
|
||||||
|
return true, "", 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPIIPMIFRURead(w http.ResponseWriter, r *http.Request) {
|
||||||
|
ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
out, err := exec.CommandContext(ctx, "ipmitool", "fru", "print", "0").CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
msg := strings.TrimSpace(string(out))
|
||||||
|
if msg == "" {
|
||||||
|
msg = err.Error()
|
||||||
|
}
|
||||||
|
writeError(w, http.StatusInternalServerError, "ipmitool fru print: "+msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fields := parseFRUOutput(string(out))
|
||||||
|
writeJSON(w, fields)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPIIPMIFRUWrite(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req struct {
|
||||||
|
Changes []fruChange `json:"changes"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(req.Changes) == 0 {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "no changes provided")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
validAreas := map[string]bool{"c": true, "b": true, "p": true}
|
||||||
|
for i, c := range req.Changes {
|
||||||
|
if c.Area == "" {
|
||||||
|
e, ok := fruEditableFields[c.Name]
|
||||||
|
if !ok {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "field not writable via ipmitool: "+c.Name)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
req.Changes[i].Area = e.Area
|
||||||
|
req.Changes[i].Index = e.Index
|
||||||
|
c = req.Changes[i]
|
||||||
|
}
|
||||||
|
if !validAreas[c.Area] {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "invalid area: "+c.Area)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if c.Index < 0 || c.Index > 9 {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, fmt.Sprintf("invalid index %d", c.Index))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(c.Value) > 64 {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "value too long (max 64 chars)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, ch := range c.Value {
|
||||||
|
if ch > unicode.MaxASCII || (ch < 0x20 && ch != 0) {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "value contains non-printable characters")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t := &Task{
|
||||||
|
ID: newJobID("ipmi-fru-write"),
|
||||||
|
Name: fmt.Sprintf("IPMI FRU Write (%d field(s))", len(req.Changes)),
|
||||||
|
Target: "ipmi-fru-write",
|
||||||
|
Priority: defaultTaskPriority("ipmi-fru-write", taskParams{}),
|
||||||
|
Status: TaskPending,
|
||||||
|
CreatedAt: time.Now(),
|
||||||
|
params: taskParams{FRUChanges: req.Changes},
|
||||||
|
}
|
||||||
|
globalQueue.enqueue(t)
|
||||||
|
writeJSON(w, map[string]string{"task_id": t.ID})
|
||||||
|
}
|
||||||
|
|
||||||
|
func runIPMIFRUWriteTask(ctx context.Context, j *jobState, exportDir string, p taskParams) error {
|
||||||
|
// Backup current FRU state
|
||||||
|
backupDir := filepath.Join(exportDir, "fru-backups")
|
||||||
|
if err := os.MkdirAll(backupDir, 0755); err != nil {
|
||||||
|
return fmt.Errorf("mkdir fru-backups: %w", err)
|
||||||
|
}
|
||||||
|
stamp := time.Now().Format("20060102150405")
|
||||||
|
backupPath := filepath.Join(backupDir, "fru-"+stamp+".txt")
|
||||||
|
|
||||||
|
backupOut, err := exec.CommandContext(ctx, "ipmitool", "fru", "print", "0").CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("backup fru print: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(backupPath, backupOut, 0644); err != nil {
|
||||||
|
return fmt.Errorf("write backup: %w", err)
|
||||||
|
}
|
||||||
|
j.append("Backup saved to " + backupPath)
|
||||||
|
|
||||||
|
// Apply changes
|
||||||
|
for _, c := range p.FRUChanges {
|
||||||
|
j.append(fmt.Sprintf("Setting %s (%s %d) = %q", c.Name, c.Area, c.Index, c.Value))
|
||||||
|
cmd := exec.CommandContext(ctx, "ipmitool", "fru", "edit", "0", "field", c.Area, fmt.Sprintf("%d", c.Index), c.Value)
|
||||||
|
if err := streamCmdJob(j, cmd); err != nil {
|
||||||
|
return fmt.Errorf("fru edit %s %d: %w", c.Area, c.Index, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
@@ -68,6 +68,10 @@ tbody tr:hover td{background:rgba(0,0,0,.03)}
|
|||||||
.chip-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b}
|
.chip-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b}
|
||||||
.chip-fail{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
|
.chip-fail{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
|
||||||
.chip-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
|
.chip-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
|
||||||
|
/* Nav separator and tasks count badge */
|
||||||
|
.nav-sep{height:1px;background:rgba(255,255,255,.12);margin:6px 0}
|
||||||
|
.tasks-nav-count{background:var(--accent);color:#fff;border-radius:10px;padding:1px 7px;font-size:11px;font-weight:700;display:none;margin-left:auto}
|
||||||
|
.tasks-nav-count.active{display:inline}
|
||||||
/* Output terminal */
|
/* Output terminal */
|
||||||
.terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all;user-select:text;-webkit-user-select:text}
|
.terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all;user-select:text;-webkit-user-select:text}
|
||||||
.terminal-wrap{position:relative}.terminal-copy{position:absolute;top:6px;right:6px;background:#2d2f30;border:1px solid #444;color:#aaa;font-size:11px;padding:2px 8px;border-radius:3px;cursor:pointer;opacity:.7}.terminal-copy:hover{opacity:1}
|
.terminal-wrap{position:relative}.terminal-copy{position:absolute;top:6px;right:6px;background:#2d2f30;border:1px solid #444;color:#aaa;font-size:11px;padding:2px 8px;border-radius:3px;cursor:pointer;opacity:.7}.terminal-copy:hover{opacity:1}
|
||||||
@@ -93,14 +97,21 @@ tbody tr:hover td{background:rgba(0,0,0,.03)}
|
|||||||
}
|
}
|
||||||
|
|
||||||
func layoutNav(active string, buildLabel string) string {
|
func layoutNav(active string, buildLabel string) string {
|
||||||
items := []struct{ id, label, href, onclick string }{
|
type navItem struct {
|
||||||
{"dashboard", "Dashboard", "/", ""},
|
id, label, href string
|
||||||
{"audit", "Audit", "/audit", ""},
|
sep bool
|
||||||
{"validate", "Validate", "/validate", ""},
|
}
|
||||||
{"burn", "Burn", "/burn", ""},
|
items := []navItem{
|
||||||
{"benchmark", "Benchmark", "/benchmark", ""},
|
{id: "dashboard", label: "Dashboard", href: "/"},
|
||||||
{"tasks", "Tasks", "/tasks", ""},
|
{id: "audit", label: "1. Audit", href: "/audit"},
|
||||||
{"tools", "Tools", "/tools", ""},
|
{id: "check", label: "2. Check", href: "/check"},
|
||||||
|
{id: "load", label: "3. Load", href: "/load"},
|
||||||
|
{id: "burn", label: "4. Burn", href: "/burn"},
|
||||||
|
{id: "benchmark", label: "5. Benchmark", href: "/benchmark"},
|
||||||
|
{sep: true},
|
||||||
|
{id: "tasks", label: "Tasks", href: "/tasks"},
|
||||||
|
{id: "tools", label: "Tools", href: "/tools"},
|
||||||
|
{id: "settings", label: "Settings", href: "/settings"},
|
||||||
}
|
}
|
||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
b.WriteString(`<aside class="sidebar">`)
|
b.WriteString(`<aside class="sidebar">`)
|
||||||
@@ -120,19 +131,24 @@ func layoutNav(active string, buildLabel string) string {
|
|||||||
}
|
}
|
||||||
b.WriteString(`<nav class="nav">`)
|
b.WriteString(`<nav class="nav">`)
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
|
if item.sep {
|
||||||
|
b.WriteString(`<div class="nav-sep"></div>`)
|
||||||
|
continue
|
||||||
|
}
|
||||||
cls := "nav-item"
|
cls := "nav-item"
|
||||||
if item.id == active {
|
if item.id == active {
|
||||||
cls += " active"
|
cls += " active"
|
||||||
}
|
}
|
||||||
if item.onclick != "" {
|
if item.id == "tasks" {
|
||||||
b.WriteString(fmt.Sprintf(`<a class="%s" href="%s" onclick="%s">%s</a>`,
|
b.WriteString(fmt.Sprintf(`<a class="%s" href="%s" id="tasks-nav-item">%s<span class="tasks-nav-count" id="tasks-nav-count"></span></a>`, cls, item.href, item.label))
|
||||||
cls, item.href, item.onclick, item.label))
|
|
||||||
} else {
|
} else {
|
||||||
b.WriteString(fmt.Sprintf(`<a class="%s" href="%s">%s</a>`,
|
b.WriteString(fmt.Sprintf(`<a class="%s" href="%s">%s</a>`, cls, item.href, item.label))
|
||||||
cls, item.href, item.label))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b.WriteString(`</nav>`)
|
b.WriteString(`</nav>`)
|
||||||
|
b.WriteString(`<script>`)
|
||||||
|
b.WriteString(`(function(){function u(){fetch('/api/tasks',{cache:'no-store'}).then(function(r){return r.json();}).then(function(d){var n=Array.isArray(d)?d.filter(function(t){return t.status==='pending'||t.status==='running';}).length:0;var c=document.getElementById('tasks-nav-count');var el=document.getElementById('tasks-nav-item');if(c){c.textContent=n>0?String(n):'';c.className='tasks-nav-count'+(n>0?' active':'');}if(el){el.style.color=n>0?'#f6c90e':'';}}).catch(function(){});}u();setInterval(u,5000);})();`)
|
||||||
|
b.WriteString(`</script>`)
|
||||||
b.WriteString(`</aside>`)
|
b.WriteString(`</aside>`)
|
||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -611,3 +611,7 @@ func renderPowerBenchmarkResultsCard(exportDir string) string {
|
|||||||
b.WriteString(`</div></div>`)
|
b.WriteString(`</div></div>`)
|
||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// renderSpeed and renderEndurance are legacy wrappers; canonical page is 5. Benchmark at /benchmark.
|
||||||
|
func renderSpeed(opts HandlerOptions) string { return renderBenchmark(opts) }
|
||||||
|
func renderEndurance(opts HandlerOptions) string { return renderBenchmark(opts) }
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package webui
|
|||||||
|
|
||||||
func renderBurn() string {
|
func renderBurn() string {
|
||||||
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Warning:</strong> Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.</div>
|
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Warning:</strong> Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.</div>
|
||||||
<div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Burn exposes sustained GPU compute load recipes. DCGM diagnostics (` + "targeted_stress, targeted_power, pulse_test" + `) and LINPACK remain in <a href="/validate">Validate → Stress mode</a>; NCCL and NVBandwidth are available directly from <a href="/validate">Validate</a>.</div>
|
<div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Burn runs sustained GPU compute and CPU/memory stress recipes. DCGM targeted diagnostics (<code>targeted_stress</code>, <code>targeted_power</code>, <code>pulse_test</code>) and NCCL/NVBandwidth are on the <a href="/load">3. Load</a> page. For performance benchmarks, see <a href="/benchmark">5. Benchmark</a>.</div>
|
||||||
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
||||||
|
|
||||||
<div class="card" style="margin-bottom:16px">
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
|||||||
@@ -402,93 +402,226 @@ loadNvidiaSelfHeal();
|
|||||||
}
|
}
|
||||||
|
|
||||||
func renderTools() string {
|
func renderTools() string {
|
||||||
return `<div class="card" style="margin-bottom:16px">
|
return renderNVMeFormatCard() + `
|
||||||
<div class="card-head">System Install</div>
|
|
||||||
<div class="card-body">
|
|
||||||
<div style="margin-bottom:20px">
|
|
||||||
<div style="font-weight:600;margin-bottom:8px">Install to RAM</div>
|
|
||||||
<p id="boot-source-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Detecting boot source...</p>
|
|
||||||
<p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
|
|
||||||
<button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">▶ Copy to RAM</button>
|
|
||||||
</div>
|
|
||||||
<div style="border-top:1px solid var(--line);padding-top:20px">
|
|
||||||
<div style="font-weight:600;margin-bottom:8px">Install to Disk</div>` +
|
|
||||||
renderInstallInline() + `
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
|
|
||||||
const boot = document.getElementById('boot-source-text');
|
|
||||||
const txt = document.getElementById('ram-status-text');
|
|
||||||
const btn = document.getElementById('ram-install-btn');
|
|
||||||
let source = d.device || d.source || 'unknown source';
|
|
||||||
let kind = d.kind || 'unknown';
|
|
||||||
let label = source;
|
|
||||||
if (kind === 'ram') label = 'RAM';
|
|
||||||
else if (kind === 'usb') label = 'USB (' + source + ')';
|
|
||||||
else if (kind === 'cdrom') label = 'CD-ROM (' + source + ')';
|
|
||||||
else if (kind === 'disk') label = 'disk (' + source + ')';
|
|
||||||
else label = source;
|
|
||||||
boot.textContent = 'Current boot source: ' + label + '.';
|
|
||||||
txt.textContent = d.blocked_reason || d.message || 'Checking...';
|
|
||||||
if (d.status === 'ok' || d.in_ram) {
|
|
||||||
txt.style.color = 'var(--ok, green)';
|
|
||||||
} else if (d.status === 'failed') {
|
|
||||||
txt.style.color = 'var(--err, #b91c1c)';
|
|
||||||
} else {
|
|
||||||
txt.style.color = 'var(--muted)';
|
|
||||||
}
|
|
||||||
if (d.can_start_task) {
|
|
||||||
btn.style.display = '';
|
|
||||||
btn.disabled = false;
|
|
||||||
} else {
|
|
||||||
btn.style.display = 'none';
|
|
||||||
}
|
|
||||||
});
|
|
||||||
function installToRAM() {
|
|
||||||
document.getElementById('ram-install-btn').disabled = true;
|
|
||||||
fetch('/api/system/install-to-ram', {method:'POST'}).then(r=>r.json()).then(d=>{
|
|
||||||
window.location.href = '/tasks#' + d.task_id;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
|
` + renderFRUEditorCard() + `
|
||||||
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
|
|
||||||
` + renderSupportBundleInline() + `
|
` + renderRAIDMgmtCard()
|
||||||
<div style="border-top:1px solid var(--border);margin-top:16px;padding-top:16px">
|
}
|
||||||
<div style="font-weight:600;margin-bottom:8px">USB Black-Box</div>
|
|
||||||
` + renderUSBExportInline() + `
|
func renderFRUEditorCard() string {
|
||||||
</div>
|
return `<div class="card"><div class="card-head card-head-actions">FRU / Elabel<div class="card-head-buttons"><button class="btn btn-sm btn-secondary" onclick="fruAllRead()">Read All</button></div></div><div class="card-body">
|
||||||
|
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Reads and edits hardware identity fields from all available sources. Each field shows its source method.</p>
|
||||||
|
<div id="fru-all-status" style="font-size:13px;color:var(--muted);margin-bottom:8px"></div>
|
||||||
|
<div id="fru-src-status" style="display:none;margin-bottom:10px"></div>
|
||||||
|
<div id="fru-all-table"></div>
|
||||||
</div></div>
|
</div></div>
|
||||||
|
<style>
|
||||||
<div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">↻ Check</button></div>
|
.fru-chip{display:inline-block;font-size:10px;font-weight:600;letter-spacing:.02em;padding:1px 6px;border-radius:3px;vertical-align:middle;white-space:nowrap;margin-right:8px;flex-shrink:0}
|
||||||
<div class="card-body"><div id="tools-table"><p style="color:var(--muted);font-size:13px">Checking...</p></div></div></div>
|
.fru-chip-ipmi{background:#e8e8e8;color:#555}
|
||||||
|
.fru-chip-huawei{background:#fff0e6;color:#b83}
|
||||||
<div class="card"><div class="card-head">NVIDIA Self Heal</div><div class="card-body">` +
|
.fru-chip-saa{background:#e6f0ff;color:#557}
|
||||||
renderNvidiaSelfHealInline() + `</div></div>
|
.fru-inp-wrap{display:flex;align-items:center;gap:0}
|
||||||
|
</style>
|
||||||
<div class="card"><div class="card-head">Network</div><div class="card-body">` +
|
|
||||||
renderNetworkInline() + `</div></div>
|
|
||||||
|
|
||||||
<div class="card"><div class="card-head">Services</div><div class="card-body">` +
|
|
||||||
renderServicesInline() + `</div></div>
|
|
||||||
|
|
||||||
` + renderNVMeFormatCard() + `
|
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
function checkTools() {
|
(function(){
|
||||||
document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
|
var _actBtn='width:22px;height:22px;padding:0;font-size:13px;line-height:1;border:1px solid var(--line);border-radius:3px;background:var(--surface);cursor:pointer;vertical-align:middle;';
|
||||||
fetch('/api/tools/check').then(r=>r.json()).then(tools => {
|
var _inp='width:100%;padding:3px 6px;border:1.5px solid #888;border-radius:3px;font-size:13px;font-family:monospace;background:var(--surface);color:var(--ink);';
|
||||||
const rows = tools.map(t =>
|
|
||||||
'<tr><td>'+t.Name+'</td><td><span class="badge '+(t.OK ? 'badge-ok' : 'badge-err')+'">'+(t.OK ? '✓ '+t.Path : '✗ missing')+'</span></td></tr>'
|
var SOURCES = [
|
||||||
).join('');
|
{
|
||||||
document.getElementById('tools-table').innerHTML =
|
id: 'ipmi-fru',
|
||||||
'<table><tr><th>Tool</th><th>Status</th></tr>'+rows+'</table>';
|
label: 'IPMI FRU',
|
||||||
|
chipClass: 'fru-chip-ipmi',
|
||||||
|
url: '/api/tools/ipmi-fru',
|
||||||
|
writeUrl: '/api/tools/ipmi-fru/write',
|
||||||
|
rowAttrs: function(f) {
|
||||||
|
return 'data-source="ipmi-fru" data-area="'+esc(f.area||'')+'" data-index="'+(f.index||0)+'" data-name="'+esc(f.name)+'"';
|
||||||
|
},
|
||||||
|
writeBody: function(inp) {
|
||||||
|
return JSON.stringify({changes:[{area:inp.dataset.area,index:parseInt(inp.dataset.index,10),name:inp.dataset.name,value:inp.value}]});
|
||||||
|
},
|
||||||
|
fieldName: function(f) { return f.name; },
|
||||||
|
fieldValue: function(f) { return f.value||''; },
|
||||||
|
readOnly: function(f) { return false; },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'huawei',
|
||||||
|
label: 'Huawei iBMC',
|
||||||
|
chipClass: 'fru-chip-huawei',
|
||||||
|
url: '/api/tools/huawei-elabel',
|
||||||
|
writeUrl: '/api/tools/huawei-elabel/write',
|
||||||
|
rowAttrs: function(f) {
|
||||||
|
return 'data-source="huawei" data-key="'+esc(f.key)+'"';
|
||||||
|
},
|
||||||
|
writeBody: function(inp) {
|
||||||
|
return JSON.stringify({changes:[{key:inp.dataset.key,value:inp.value}]});
|
||||||
|
},
|
||||||
|
fieldName: function(f) { return f.name; },
|
||||||
|
fieldValue: function(f) { return f.value||''; },
|
||||||
|
readOnly: function(f) { return !!f.read_only; },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'saa-dmi',
|
||||||
|
label: 'SAA DMI',
|
||||||
|
chipClass: 'fru-chip-saa',
|
||||||
|
url: '/api/tools/saa-dmi',
|
||||||
|
writeUrl: '/api/tools/saa-dmi/write',
|
||||||
|
rowAttrs: function(f) {
|
||||||
|
return 'data-source="saa-dmi" data-shn="'+esc(f.shn)+'"';
|
||||||
|
},
|
||||||
|
writeBody: function(inp) {
|
||||||
|
return JSON.stringify({changes:[{shn:inp.dataset.shn,value:inp.value}]});
|
||||||
|
},
|
||||||
|
fieldName: function(f) { return f.name; },
|
||||||
|
fieldValue: function(f) { return f.value||''; },
|
||||||
|
readOnly: function(f) { return false; },
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
function esc(s){return String(s==null?'':s).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>').replace(/"/g,'"');}
|
||||||
|
|
||||||
|
function renderSrcStatus(perSource) {
|
||||||
|
var bar = document.getElementById('fru-src-status');
|
||||||
|
if (!perSource.length) { bar.style.display = 'none'; bar.innerHTML = ''; return; }
|
||||||
|
var html = '';
|
||||||
|
perSource.forEach(function(p) {
|
||||||
|
var state, color;
|
||||||
|
if (p.ok) {
|
||||||
|
state = p.count + ' field(s) available';
|
||||||
|
color = 'var(--ok-fg,green)';
|
||||||
|
} else if (/not activated|product key|SFT-DCMS|SFT-OOB/i.test(p.reason)) {
|
||||||
|
state = 'requires Supermicro license (SFT-OOB-LIC / SFT-DCMS-SINGLE) — activate on BMC';
|
||||||
|
color = 'var(--crit-fg,#9f3a38)';
|
||||||
|
} else {
|
||||||
|
state = p.reason || 'unavailable';
|
||||||
|
color = 'var(--muted)';
|
||||||
|
}
|
||||||
|
html += '<div style="display:flex;align-items:center;gap:8px;font-size:12px;margin:3px 0">'
|
||||||
|
+ '<span class="fru-chip '+p.src.chipClass+'">'+p.src.label+'</span>'
|
||||||
|
+ '<span style="color:'+color+'">'+esc(state)+'</span>'
|
||||||
|
+ '</div>';
|
||||||
});
|
});
|
||||||
|
bar.innerHTML = html;
|
||||||
|
bar.style.display = '';
|
||||||
}
|
}
|
||||||
checkTools();
|
|
||||||
|
window.fruAllRead = function() {
|
||||||
|
var status = document.getElementById('fru-all-status');
|
||||||
|
var table = document.getElementById('fru-all-table');
|
||||||
|
status.textContent = 'Reading…'; status.style.color = 'var(--muted)';
|
||||||
|
table.innerHTML = '';
|
||||||
|
|
||||||
|
var fetches = SOURCES.map(function(src) {
|
||||||
|
return fetch(src.url, {cache:'no-store'})
|
||||||
|
.then(function(r){ return r.json().then(function(d){ if(!r.ok) throw new Error(d.error||r.statusText); return d; }); });
|
||||||
|
});
|
||||||
|
|
||||||
|
Promise.allSettled(fetches).then(function(results) {
|
||||||
|
var rows = '';
|
||||||
|
var totalFields = 0;
|
||||||
|
var perSource = [];
|
||||||
|
|
||||||
|
results.forEach(function(res, i) {
|
||||||
|
var src = SOURCES[i];
|
||||||
|
if (res.status === 'rejected' || !Array.isArray(res.value) || res.value.length === 0) {
|
||||||
|
var reason = '';
|
||||||
|
if (res.status === 'rejected' && res.reason) reason = res.reason.message;
|
||||||
|
else reason = 'no editable fields returned';
|
||||||
|
perSource.push({src:src, ok:false, count:0, reason:reason});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
perSource.push({src:src, ok:true, count:res.value.length, reason:''});
|
||||||
|
res.value.forEach(function(f) {
|
||||||
|
var val = esc(src.fieldValue(f));
|
||||||
|
var ro = src.readOnly(f);
|
||||||
|
var attrs = ro ? '' : (' '+src.rowAttrs(f));
|
||||||
|
rows += '<tr>'
|
||||||
|
+ '<td style="white-space:nowrap;padding-right:4px;vertical-align:middle">'
|
||||||
|
+ '<span class="fru-chip '+src.chipClass+'">'+src.label+'</span>'
|
||||||
|
+ '</td>'
|
||||||
|
+ '<td style="color:var(--muted);white-space:nowrap;padding-right:16px;vertical-align:middle;font-size:13px">'+esc(src.fieldName(f))+'</td>'
|
||||||
|
+ '<td style="vertical-align:middle">'
|
||||||
|
+ (ro
|
||||||
|
? '<span style="font-family:monospace;font-size:13px;color:var(--muted)">'+val+'</span>'
|
||||||
|
: '<input class="fru-uni-inp" style="'+_inp+'" value="'+val+'" data-original="'+val+'"'+attrs+' oninput="fruUniChanged(this)">')
|
||||||
|
+ '</td>'
|
||||||
|
+ '<td class="fru-uni-act" style="display:none;white-space:nowrap;padding-left:6px;vertical-align:middle">'
|
||||||
|
+ '<button style="'+_actBtn+'color:var(--ok-fg,green);margin-right:3px" title="Save" onclick="fruUniSave(this)">✓</button>'
|
||||||
|
+ '<button style="'+_actBtn+'color:var(--crit-fg,#9f3a38)" title="Cancel" onclick="fruUniCancel(this)">✗</button>'
|
||||||
|
+ '<span class="fru-uni-msg" style="font-size:11px;margin-left:5px;color:var(--muted)"></span>'
|
||||||
|
+ '</td>'
|
||||||
|
+ '</tr>';
|
||||||
|
totalFields++;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
renderSrcStatus(perSource);
|
||||||
|
|
||||||
|
if (totalFields === 0) {
|
||||||
|
status.textContent = 'No editable fields available — see per-source status below.';
|
||||||
|
status.style.color = 'var(--crit-fg,#9f3a38)';
|
||||||
|
table.innerHTML = '';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.innerHTML = '<table style="width:100%;border-collapse:collapse">'+rows+'</table>';
|
||||||
|
status.textContent = totalFields + ' field(s) loaded';
|
||||||
|
status.style.color = 'var(--muted)';
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
window.fruUniChanged = function(inp) {
|
||||||
|
var row = inp.closest('tr');
|
||||||
|
row.querySelector('.fru-uni-act').style.display = inp.value !== inp.dataset.original ? '' : 'none';
|
||||||
|
row.querySelector('.fru-uni-msg').textContent = '';
|
||||||
|
};
|
||||||
|
|
||||||
|
window.fruUniCancel = function(btn) {
|
||||||
|
var row = btn.closest('tr');
|
||||||
|
var inp = row.querySelector('.fru-uni-inp');
|
||||||
|
inp.value = inp.dataset.original;
|
||||||
|
row.querySelector('.fru-uni-act').style.display = 'none';
|
||||||
|
row.querySelector('.fru-uni-msg').textContent = '';
|
||||||
|
};
|
||||||
|
|
||||||
|
window.fruUniSave = function(btn) {
|
||||||
|
var row = btn.closest('tr');
|
||||||
|
var inp = row.querySelector('.fru-uni-inp');
|
||||||
|
var msg = row.querySelector('.fru-uni-msg');
|
||||||
|
var cancelBtn = row.querySelectorAll('.fru-uni-act button')[1];
|
||||||
|
var src = SOURCES.find(function(s){ return s.id === inp.dataset.source; });
|
||||||
|
if (!src) { msg.textContent = 'Unknown source'; msg.style.color='var(--crit-fg)'; return; }
|
||||||
|
|
||||||
|
btn.disabled = true; cancelBtn.disabled = true;
|
||||||
|
msg.textContent = '…'; msg.style.color = 'var(--muted)';
|
||||||
|
|
||||||
|
fetch(src.writeUrl, {method:'POST', headers:{'Content-Type':'application/json'}, body:src.writeBody(inp)})
|
||||||
|
.then(function(r){ return r.json().then(function(d){ if(!r.ok) throw new Error(d.error||r.statusText); return d; }); })
|
||||||
|
.then(function(d) {
|
||||||
|
var poll = setInterval(function() {
|
||||||
|
fetch('/api/tasks',{cache:'no-store'}).then(function(r){return r.json();}).then(function(tasks){
|
||||||
|
var t = Array.isArray(tasks) ? tasks.find(function(x){return x.id===d.task_id;}) : null;
|
||||||
|
if (!t) return;
|
||||||
|
if (t.status==='done') {
|
||||||
|
clearInterval(poll);
|
||||||
|
inp.dataset.original = inp.value;
|
||||||
|
row.querySelector('.fru-uni-act').style.display = 'none';
|
||||||
|
msg.textContent = ''; msg.style.color = '';
|
||||||
|
} else if (t.status==='failed'||t.status==='cancelled') {
|
||||||
|
clearInterval(poll);
|
||||||
|
msg.textContent = t.error||t.status; msg.style.color = 'var(--crit-fg)';
|
||||||
|
btn.disabled = false; cancelBtn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}, 1500);
|
||||||
|
})
|
||||||
|
.catch(function(e) {
|
||||||
|
msg.textContent = 'Error: '+e.message; msg.style.color = 'var(--crit-fg)';
|
||||||
|
btn.disabled = false; cancelBtn.disabled = false;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
})();
|
||||||
</script>`
|
</script>`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
115
audit/internal/webui/page_settings.go
Normal file
115
audit/internal/webui/page_settings.go
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
package webui
|
||||||
|
|
||||||
|
import "html"
|
||||||
|
|
||||||
|
func renderSettings(opts HandlerOptions) string {
|
||||||
|
version := opts.BuildLabel
|
||||||
|
if version == "" {
|
||||||
|
version = "dev"
|
||||||
|
}
|
||||||
|
return `<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">System Install</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<div style="margin-bottom:20px">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">Install to RAM</div>
|
||||||
|
<p id="boot-source-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Detecting boot source...</p>
|
||||||
|
<p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
|
||||||
|
<button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">▶ Copy to RAM</button>
|
||||||
|
</div>
|
||||||
|
<div style="border-top:1px solid var(--line);padding-top:20px">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">Install to Disk</div>` +
|
||||||
|
renderInstallInline() + `
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script>
|
||||||
|
fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
|
||||||
|
const boot = document.getElementById('boot-source-text');
|
||||||
|
const txt = document.getElementById('ram-status-text');
|
||||||
|
const btn = document.getElementById('ram-install-btn');
|
||||||
|
let kind = d.kind || 'unknown';
|
||||||
|
let source = d.device || d.source || 'unknown source';
|
||||||
|
let label = kind==='ram'?'RAM':kind==='usb'?'USB ('+source+')':kind==='cdrom'?'CD-ROM ('+source+')':kind==='disk'?'disk ('+source+')':source;
|
||||||
|
boot.textContent = 'Current boot source: ' + label + '.';
|
||||||
|
txt.textContent = d.blocked_reason || d.message || 'Checking...';
|
||||||
|
txt.style.color = (d.status==='ok'||d.in_ram)?'var(--ok,green)':d.status==='failed'?'var(--err,#b91c1c)':'var(--muted)';
|
||||||
|
if (d.can_start_task) { btn.style.display=''; btn.disabled=false; } else { btn.style.display='none'; }
|
||||||
|
});
|
||||||
|
function installToRAM() {
|
||||||
|
document.getElementById('ram-install-btn').disabled = true;
|
||||||
|
fetch('/api/system/install-to-ram', {method:'POST'}).then(r=>r.json()).then(d=>{
|
||||||
|
window.location.href = '/tasks#' + d.task_id;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
|
||||||
|
<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
|
||||||
|
` + renderSupportBundleInline() + `
|
||||||
|
<div style="border-top:1px solid var(--border);margin-top:16px;padding-top:16px">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">USB Black-Box</div>
|
||||||
|
` + renderUSBExportInline() + `
|
||||||
|
</div>
|
||||||
|
</div></div>
|
||||||
|
|
||||||
|
<div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">↻ Check</button></div>
|
||||||
|
<div class="card-body"><div id="tools-table"><p style="color:var(--muted);font-size:13px">Checking...</p></div></div></div>
|
||||||
|
<script>
|
||||||
|
function checkTools() {
|
||||||
|
document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
|
||||||
|
fetch('/api/tools/check').then(r=>r.json()).then(tools => {
|
||||||
|
const rows = tools.map(t =>
|
||||||
|
'<tr><td>'+t.Name+'</td><td><span class="badge '+(t.OK?'badge-ok':'badge-err')+'">'+(t.OK?'✓ '+t.Path:'✗ missing')+'</span></td></tr>'
|
||||||
|
).join('');
|
||||||
|
document.getElementById('tools-table').innerHTML = '<table><tr><th>Tool</th><th>Status</th></tr>'+rows+'</table>';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
checkTools();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="card"><div class="card-head">NVIDIA Self Heal</div><div class="card-body">` +
|
||||||
|
renderNvidiaSelfHealInline() + `</div></div>
|
||||||
|
|
||||||
|
<div class="card"><div class="card-head">Network</div><div class="card-body">` +
|
||||||
|
renderNetworkInline() + `</div></div>
|
||||||
|
|
||||||
|
<div class="card"><div class="card-head">Services</div><div class="card-body">` +
|
||||||
|
renderServicesInline() + `</div></div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-head">Build Info</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<table style="width:auto">
|
||||||
|
<tbody>
|
||||||
|
<tr><td style="color:var(--muted);padding-right:24px">Version</td><td>` + html.EscapeString(version) + `</td></tr>
|
||||||
|
<tr><td style="color:var(--muted);padding-right:24px">Title</td><td>` + html.EscapeString(opts.Title) + `</td></tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-head">Power</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<div style="display:flex;gap:8px;align-items:center">
|
||||||
|
<button class="btn btn-secondary btn-sm" onclick="systemPower('reboot')">Reboot</button>
|
||||||
|
<button class="btn btn-secondary btn-sm" onclick="systemPower('shutdown')">Shutdown</button>
|
||||||
|
<span id="power-status" style="font-size:12px;color:var(--muted)"></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function systemPower(action) {
|
||||||
|
var label = action === 'reboot' ? 'reboot' : 'shut down';
|
||||||
|
if (!confirm('Are you sure you want to ' + label + ' the server?')) return;
|
||||||
|
var el = document.getElementById('power-status');
|
||||||
|
if (el) el.textContent = action === 'reboot' ? 'Rebooting...' : 'Shutting down...';
|
||||||
|
fetch('/api/system/' + action, {method: 'POST'})
|
||||||
|
.then(function(r) { return r.json(); })
|
||||||
|
.catch(function(e) { if (el) el.textContent = 'Error: ' + e.message; });
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
`
|
||||||
|
}
|
||||||
@@ -68,6 +68,14 @@ func validateTotalStressSec(n int) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func renderValidate(opts HandlerOptions) string {
|
func renderValidate(opts HandlerOptions) string {
|
||||||
|
return renderValidateMode(opts, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func renderValidateStress(opts HandlerOptions) string {
|
||||||
|
return renderValidateMode(opts, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func renderValidateMode(opts HandlerOptions, stressDefault bool) string {
|
||||||
inv := loadValidateInventory(opts)
|
inv := loadValidateInventory(opts)
|
||||||
n := inv.NvidiaGPUCount
|
n := inv.NvidiaGPUCount
|
||||||
validateTotalStr := validateFmtDur(validateTotalValidateSec(n))
|
validateTotalStr := validateFmtDur(validateTotalValidateSec(n))
|
||||||
@@ -76,26 +84,49 @@ func renderValidate(opts HandlerOptions) string {
|
|||||||
if n > 0 {
|
if n > 0 {
|
||||||
gpuNote = fmt.Sprintf(" (%d GPU)", n)
|
gpuNote = fmt.Sprintf(" (%d GPU)", n)
|
||||||
}
|
}
|
||||||
return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.</div>
|
estStr := validateTotalStr
|
||||||
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
if stressDefault {
|
||||||
|
estStr = stressTotalStr
|
||||||
|
}
|
||||||
|
alert := `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.</div>`
|
||||||
|
if stressDefault {
|
||||||
|
alert = `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Stress mode:</strong> Runs extended load tests — CPU stress-ng, memory passes, DCGM targeted diagnostics. Higher wear than Validate.</div>`
|
||||||
|
}
|
||||||
|
|
||||||
<div class="card" style="margin-bottom:16px">
|
stressOnlyCards := ""
|
||||||
<div class="card-head">Validate Profile</div>
|
if stressDefault {
|
||||||
<div class="card-body validate-profile-body">
|
stressOnlyCards = renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runNvidiaValidateSet('nvidia-targeted-stress')", "", renderValidateCardBody(
|
||||||
<div class="validate-profile-col">
|
inv.NVIDIA,
|
||||||
<div class="form-row" style="margin:12px 0 0"><label>Mode</label></div>
|
`Runs a controlled NVIDIA DCGM load to check stability under moderate stress.`,
|
||||||
<label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-validate" value="validate" checked onchange="satModeChanged()"><span>Validate — quick non-destructive check</span></label>
|
`<code>dcgmi diag targeted_stress</code>`,
|
||||||
<label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-stress" value="stress" onchange="satModeChanged()"><span>Stress — thorough load test (` + stressTotalStr + gpuNote + `)</span></label>
|
validateFmtDur(platform.SATEstimatedNvidiaTargetedStressSec)+` (all GPUs simultaneously).`,
|
||||||
</div>
|
)) +
|
||||||
<div class="validate-profile-col validate-profile-action">
|
renderSATCard("nvidia-targeted-power", "NVIDIA Targeted Power", "runNvidiaValidateSet('nvidia-targeted-power')", "", renderValidateCardBody(
|
||||||
<p style="color:var(--muted);font-size:12px;margin:0 0 10px">Runs validate modules sequentially. Validate: ` + validateTotalStr + gpuNote + `; Stress: ` + stressTotalStr + gpuNote + `. Estimates are based on real log data and scale with GPU count.</p>
|
inv.NVIDIA,
|
||||||
<button type="button" class="btn btn-primary" onclick="runAllSAT()">Validate one by one</button>
|
`Checks that the GPU can sustain its declared power delivery envelope. Pass/fail determined by DCGM.`,
|
||||||
<div style="margin-top:12px">
|
`<code>dcgmi diag targeted_power</code>`,
|
||||||
|
validateFmtDur(platform.SATEstimatedNvidiaTargetedPowerSec)+` (all GPUs simultaneously).`,
|
||||||
|
)) +
|
||||||
|
renderSATCard("nvidia-pulse", "NVIDIA PSU Pulse Test", "runNvidiaFabricValidate('nvidia-pulse')", "", renderValidateCardBody(
|
||||||
|
inv.NVIDIA,
|
||||||
|
`Tests power supply transient response by pulsing all GPUs simultaneously between idle and full load. Synchronous pulses across all GPUs create worst-case PSU load spikes — running per-GPU would miss PSU-level failures.`,
|
||||||
|
`<code>dcgmi diag pulse_test</code>`,
|
||||||
|
validateFmtDur(platform.SATEstimatedNvidiaPulseTestSec)+` (all GPUs simultaneously; measured on 8-GPU system).`,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
satStressModeJS := "function satStressMode() { return false; }"
|
||||||
|
if stressDefault {
|
||||||
|
satStressModeJS = "function satStressMode() { return true; }"
|
||||||
|
}
|
||||||
|
|
||||||
|
return alert + `
|
||||||
|
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
||||||
|
<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px">
|
||||||
|
<button type="button" class="btn btn-primary" onclick="runAllSAT()">Run All</button>
|
||||||
<span id="sat-all-status" style="font-size:12px;color:var(--muted)"></span>
|
<span id="sat-all-status" style="font-size:12px;color:var(--muted)"></span>
|
||||||
</div>
|
<span style="font-size:12px;color:var(--muted)">est. ` + estStr + gpuNote + `</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="grid3">
|
<div class="grid3">
|
||||||
` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody(
|
` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody(
|
||||||
@@ -122,7 +153,7 @@ func renderValidate(opts HandlerOptions) string {
|
|||||||
<div class="card-head">NVIDIA GPU Selection</div>
|
<div class="card-head">NVIDIA GPU Selection</div>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<p style="font-size:12px;color:var(--muted);margin:0 0 8px">` + inv.NVIDIA + `</p>
|
<p style="font-size:12px;color:var(--muted);margin:0 0 8px">` + inv.NVIDIA + `</p>
|
||||||
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">All NVIDIA validate tasks use only the GPUs selected here. The same selection is used by Validate one by one.</p>
|
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">All NVIDIA validate tasks use only the GPUs selected here. The same selection is used by Run All.</p>
|
||||||
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
|
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
|
||||||
<button class="btn btn-sm btn-secondary" type="button" onclick="satSelectAllGPUs()">Select All</button>
|
<button class="btn btn-sm btn-secondary" type="button" onclick="satSelectAllGPUs()">Select All</button>
|
||||||
<button class="btn btn-sm btn-secondary" type="button" onclick="satSelectNoGPUs()">Clear</button>
|
<button class="btn btn-sm btn-secondary" type="button" onclick="satSelectNoGPUs()">Clear</button>
|
||||||
@@ -143,46 +174,19 @@ func renderValidate(opts HandlerOptions) string {
|
|||||||
validateFmtDur(platform.SATEstimatedNvidiaGPUValidateSec),
|
validateFmtDur(platform.SATEstimatedNvidiaGPUValidateSec),
|
||||||
validateFmtDur(platform.SATEstimatedNvidiaGPUStressSec)),
|
validateFmtDur(platform.SATEstimatedNvidiaGPUStressSec)),
|
||||||
)) +
|
)) +
|
||||||
`<div id="sat-card-nvidia-targeted-stress">` +
|
stressOnlyCards +
|
||||||
renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runNvidiaValidateSet('nvidia-targeted-stress')", "", renderValidateCardBody(
|
|
||||||
inv.NVIDIA,
|
|
||||||
`Runs a controlled NVIDIA DCGM load to check stability under moderate stress.`,
|
|
||||||
`<code>dcgmi diag targeted_stress</code>`,
|
|
||||||
"Skipped in Validate. Stress: " + validateFmtDur(platform.SATEstimatedNvidiaTargetedStressSec) + ` (all GPUs simultaneously).<p id="sat-ts-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
|
|
||||||
)) +
|
|
||||||
`</div>` +
|
|
||||||
`<div id="sat-card-nvidia-targeted-power">` +
|
|
||||||
renderSATCard("nvidia-targeted-power", "NVIDIA Targeted Power", "runNvidiaValidateSet('nvidia-targeted-power')", "", renderValidateCardBody(
|
|
||||||
inv.NVIDIA,
|
|
||||||
`Checks that the GPU can sustain its declared power delivery envelope. Pass/fail determined by DCGM.`,
|
|
||||||
`<code>dcgmi diag targeted_power</code>`,
|
|
||||||
"Skipped in Validate. Stress: " + validateFmtDur(platform.SATEstimatedNvidiaTargetedPowerSec) + ` (all GPUs simultaneously).<p id="sat-tp-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
|
|
||||||
)) +
|
|
||||||
`</div>` +
|
|
||||||
`<div id="sat-card-nvidia-pulse">` +
|
|
||||||
renderSATCard("nvidia-pulse", "NVIDIA PSU Pulse Test", "runNvidiaFabricValidate('nvidia-pulse')", "", renderValidateCardBody(
|
|
||||||
inv.NVIDIA,
|
|
||||||
`Tests power supply transient response by pulsing all GPUs simultaneously between idle and full load. Synchronous pulses across all GPUs create worst-case PSU load spikes — running per-GPU would miss PSU-level failures.`,
|
|
||||||
`<code>dcgmi diag pulse_test</code>`,
|
|
||||||
`Skipped in Validate. Stress: `+validateFmtDur(platform.SATEstimatedNvidiaPulseTestSec)+` (all GPUs simultaneously; measured on 8-GPU system).`+`<p id="sat-pt-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
|
|
||||||
)) +
|
|
||||||
`</div>` +
|
|
||||||
`<div id="sat-card-nvidia-interconnect">` +
|
|
||||||
renderSATCard("nvidia-interconnect", "NVIDIA Interconnect (NCCL)", "runNvidiaFabricValidate('nvidia-interconnect')", "", renderValidateCardBody(
|
renderSATCard("nvidia-interconnect", "NVIDIA Interconnect (NCCL)", "runNvidiaFabricValidate('nvidia-interconnect')", "", renderValidateCardBody(
|
||||||
inv.NVIDIA,
|
inv.NVIDIA,
|
||||||
`Verifies NVLink/NVSwitch fabric bandwidth using NCCL all_reduce_perf across all selected GPUs. Pass/fail based on achieved bandwidth vs. theoretical.`,
|
`Verifies NVLink/NVSwitch fabric bandwidth using NCCL all_reduce_perf across all selected GPUs. Pass/fail based on achieved bandwidth vs. theoretical.`,
|
||||||
`<code>all_reduce_perf</code> (NCCL tests)`,
|
`<code>all_reduce_perf</code> (NCCL tests)`,
|
||||||
`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaInterconnectSec)+` (all GPUs simultaneously, requires ≥2).`,
|
validateFmtDur(platform.SATEstimatedNvidiaInterconnectSec)+` (all GPUs simultaneously, requires ≥2).`,
|
||||||
)) +
|
)) +
|
||||||
`</div>` +
|
|
||||||
`<div id="sat-card-nvidia-bandwidth">` +
|
|
||||||
renderSATCard("nvidia-bandwidth", "NVIDIA Bandwidth (NVBandwidth)", "runNvidiaFabricValidate('nvidia-bandwidth')", "", renderValidateCardBody(
|
renderSATCard("nvidia-bandwidth", "NVIDIA Bandwidth (NVBandwidth)", "runNvidiaFabricValidate('nvidia-bandwidth')", "", renderValidateCardBody(
|
||||||
inv.NVIDIA,
|
inv.NVIDIA,
|
||||||
`Validates GPU memory copy and peer-to-peer bandwidth paths using NVBandwidth.`,
|
`Validates GPU memory copy and peer-to-peer bandwidth paths using NVBandwidth.`,
|
||||||
`<code>nvbandwidth</code>`,
|
`<code>nvbandwidth</code>`,
|
||||||
`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaBandwidthSec)+` (all GPUs simultaneously; nvbandwidth runs all built-in tests without a time limit — duration set by the tool).`,
|
validateFmtDur(platform.SATEstimatedNvidiaBandwidthSec)+` (all GPUs simultaneously; nvbandwidth runs all built-in tests without a time limit — duration set by the tool).`,
|
||||||
)) +
|
)) +
|
||||||
`</div>` +
|
|
||||||
`</div>
|
`</div>
|
||||||
<div class="grid3" style="margin-top:16px">
|
<div class="grid3" style="margin-top:16px">
|
||||||
` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody(
|
` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody(
|
||||||
@@ -197,36 +201,15 @@ func renderValidate(opts HandlerOptions) string {
|
|||||||
<div class="card-body"><div id="sat-terminal" class="terminal"></div></div>
|
<div class="card-body"><div id="sat-terminal" class="terminal"></div></div>
|
||||||
</div>
|
</div>
|
||||||
<style>
|
<style>
|
||||||
.validate-profile-body { display:grid; grid-template-columns:1fr 1fr 1fr; gap:24px; align-items:stretch; }
|
|
||||||
.validate-profile-col { min-width:0; display:flex; flex-direction:column; }
|
|
||||||
.validate-profile-action { display:flex; flex-direction:column; align-items:center; justify-content:center; }
|
|
||||||
.validate-card-body { padding:0; }
|
.validate-card-body { padding:0; }
|
||||||
.validate-card-section { padding:12px 16px 0; }
|
.validate-card-section { padding:12px 16px 0; }
|
||||||
.validate-card-section:last-child { padding-bottom:16px; }
|
.validate-card-section:last-child { padding-bottom:16px; }
|
||||||
.sat-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
|
.sat-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
|
||||||
.sat-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
.sat-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
||||||
@media(max-width:900px){ .validate-profile-body { grid-template-columns:1fr; } }
|
|
||||||
</style>
|
</style>
|
||||||
<script>
|
<script>
|
||||||
let satES = null;
|
let satES = null;
|
||||||
function satStressMode() {
|
` + satStressModeJS + `
|
||||||
return document.querySelector('input[name="sat-mode"]:checked')?.value === 'stress';
|
|
||||||
}
|
|
||||||
function satModeChanged() {
|
|
||||||
const stress = satStressMode();
|
|
||||||
[
|
|
||||||
{card: 'sat-card-nvidia-targeted-stress', hint: 'sat-ts-mode-hint'},
|
|
||||||
{card: 'sat-card-nvidia-targeted-power', hint: 'sat-tp-mode-hint'},
|
|
||||||
{card: 'sat-card-nvidia-pulse', hint: 'sat-pt-mode-hint'},
|
|
||||||
].forEach(function(item) {
|
|
||||||
const card = document.getElementById(item.card);
|
|
||||||
if (card) {
|
|
||||||
card.style.opacity = stress ? '1' : '0.5';
|
|
||||||
const hint = document.getElementById(item.hint);
|
|
||||||
if (hint) hint.style.display = stress ? 'none' : '';
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
function satLabels() {
|
function satLabels() {
|
||||||
return {nvidia:'Validate GPU', 'nvidia-targeted-stress':'NVIDIA Targeted Stress (dcgmi diag targeted_stress)', 'nvidia-targeted-power':'NVIDIA Targeted Power (dcgmi diag targeted_power)', 'nvidia-pulse':'NVIDIA PSU Pulse Test (dcgmi diag pulse_test)', 'nvidia-interconnect':'NVIDIA Interconnect (NCCL all_reduce_perf)', 'nvidia-bandwidth':'NVIDIA Bandwidth (NVBandwidth)', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
|
return {nvidia:'Validate GPU', 'nvidia-targeted-stress':'NVIDIA Targeted Stress (dcgmi diag targeted_stress)', 'nvidia-targeted-power':'NVIDIA Targeted Power (dcgmi diag targeted_power)', 'nvidia-pulse':'NVIDIA PSU Pulse Test (dcgmi diag pulse_test)', 'nvidia-interconnect':'NVIDIA Interconnect (NCCL all_reduce_perf)', 'nvidia-bandwidth':'NVIDIA Bandwidth (NVBandwidth)', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
|
||||||
}
|
}
|
||||||
@@ -656,6 +639,292 @@ func validateIsVendorGPU(dev schema.HardwarePCIeDevice, vendor string) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// renderCheck renders the non-destructive Check page (step 2).
|
||||||
|
// Shows validate-mode tests only: CPU, Memory, Storage, NVIDIA L2, NCCL, NVBandwidth, AMD.
|
||||||
|
// Stress-mode tests (targeted-stress, targeted-power, pulse) are on the Load page.
|
||||||
|
func renderCheck(opts HandlerOptions) string {
|
||||||
|
inv := loadValidateInventory(opts)
|
||||||
|
n := inv.NvidiaGPUCount
|
||||||
|
validateTotalStr := validateFmtDur(validateTotalValidateSec(n))
|
||||||
|
gpuNote := ""
|
||||||
|
if n > 0 {
|
||||||
|
gpuNote = fmt.Sprintf(" (%d GPU)", n)
|
||||||
|
}
|
||||||
|
return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Check tests collect diagnostics only — no writes to disks, no sustained load, no hardware wear counters incremented. For stress testing, go to <a href="/burn">4. Burn</a>.</div>
|
||||||
|
<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px">
|
||||||
|
<button type="button" class="btn btn-primary" onclick="runAllCheckSAT()">Run All Checks</button>
|
||||||
|
<span id="sat-all-status" style="font-size:12px;color:var(--muted)"></span>
|
||||||
|
<span style="font-size:12px;color:var(--muted)">est. ` + validateTotalStr + gpuNote + `</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="grid3">
|
||||||
|
` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody(
|
||||||
|
inv.CPU,
|
||||||
|
`Collects CPU inventory and temperatures, then runs a bounded CPU stress pass.`,
|
||||||
|
`<code>lscpu</code>, <code>sensors</code>, <code>stress-ng</code>`,
|
||||||
|
validateFmtDur(platform.SATEstimatedCPUValidateSec)+` (stress-ng 60 s).`,
|
||||||
|
)) +
|
||||||
|
renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody(
|
||||||
|
inv.Memory,
|
||||||
|
`Runs a RAM validation pass and records memory state around the test.`,
|
||||||
|
`<code>free</code>, <code>memtester</code>`,
|
||||||
|
validateFmtDur(platform.SATEstimatedMemoryValidateSec)+` (256 MB × 1 pass).`,
|
||||||
|
)) +
|
||||||
|
renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
|
||||||
|
inv.Storage,
|
||||||
|
`Scans all storage devices and runs the matching health or self-test path for each.`,
|
||||||
|
`<code>lsblk</code>; NVMe: <code>nvme</code>; SATA/SAS: <code>smartctl</code>`,
|
||||||
|
`Seconds (NVMe: instant device query; SATA/SAS: short self-test).`,
|
||||||
|
)) +
|
||||||
|
`</div>
|
||||||
|
<div style="height:1px;background:var(--border);margin:16px 0"></div>
|
||||||
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">NVIDIA GPU Selection</div>
|
||||||
|
<div class="card-body">
|
||||||
|
<p style="font-size:12px;color:var(--muted);margin:0 0 8px">` + inv.NVIDIA + `</p>
|
||||||
|
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
|
||||||
|
<button class="btn btn-sm btn-secondary" type="button" onclick="satSelectAllGPUs()">Select All</button>
|
||||||
|
<button class="btn btn-sm btn-secondary" type="button" onclick="satSelectNoGPUs()">Clear</button>
|
||||||
|
</div>
|
||||||
|
<div id="sat-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
|
||||||
|
<p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
|
||||||
|
</div>
|
||||||
|
<p id="sat-gpu-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA check tasks.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="grid3">
|
||||||
|
` + renderSATCard("nvidia", "NVIDIA GPU", "runNvidiaValidateSet('nvidia')", "", renderValidateCardBody(
|
||||||
|
inv.NVIDIA,
|
||||||
|
`Runs NVIDIA diagnostics and board inventory checks (DCGM Level 2).`,
|
||||||
|
`<code>nvidia-smi</code>, <code>dmidecode</code>, <code>dcgmi diag</code>`,
|
||||||
|
validateFmtDur(platform.SATEstimatedNvidiaGPUValidateSec)+` (Level 2, all GPUs simultaneously).`,
|
||||||
|
)) +
|
||||||
|
renderSATCard("nvidia-interconnect", "NVIDIA Interconnect (NCCL)", "runNvidiaFabricValidate('nvidia-interconnect')", "", renderValidateCardBody(
|
||||||
|
inv.NVIDIA,
|
||||||
|
`Verifies NVLink/NVSwitch fabric bandwidth using NCCL all_reduce_perf across all selected GPUs.`,
|
||||||
|
`<code>all_reduce_perf</code> (NCCL tests)`,
|
||||||
|
validateFmtDur(platform.SATEstimatedNvidiaInterconnectSec)+` (all GPUs simultaneously, requires ≥2).`,
|
||||||
|
)) +
|
||||||
|
renderSATCard("nvidia-bandwidth", "NVIDIA Bandwidth (NVBandwidth)", "runNvidiaFabricValidate('nvidia-bandwidth')", "", renderValidateCardBody(
|
||||||
|
inv.NVIDIA,
|
||||||
|
`Validates GPU memory copy and peer-to-peer bandwidth paths using NVBandwidth.`,
|
||||||
|
`<code>nvbandwidth</code>`,
|
||||||
|
validateFmtDur(platform.SATEstimatedNvidiaBandwidthSec)+` (all GPUs simultaneously).`,
|
||||||
|
)) +
|
||||||
|
`</div>
|
||||||
|
<div class="grid3" style="margin-top:16px">
|
||||||
|
` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody(
|
||||||
|
inv.AMD,
|
||||||
|
`Runs AMD GPU inventory, MEM integrity, and MEM bandwidth checks.`,
|
||||||
|
`GPU Validate: <code>rocm-smi</code>, <code>dmidecode</code>; MEM Integrity: <code>rvs mem</code>; MEM Bandwidth: <code>rocm-bandwidth-test</code>, <code>rvs babel</code>`,
|
||||||
|
`<div style="display:flex;flex-direction:column;gap:4px"><label class="cb-row"><input type="checkbox" id="sat-amd-target" checked><span>GPU Validate</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-mem-target" checked><span>MEM Integrity</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-bandwidth-target" checked><span>MEM Bandwidth</span></label></div>`,
|
||||||
|
)) +
|
||||||
|
`</div>
|
||||||
|
<div id="sat-output" style="display:none;margin-top:16px" class="card">
|
||||||
|
<div class="card-head">Test Output <span id="sat-title"></span></div>
|
||||||
|
<div class="card-body"><div id="sat-terminal" class="terminal"></div></div>
|
||||||
|
</div>
|
||||||
|
<style>
|
||||||
|
.validate-card-body { padding:0; }
|
||||||
|
.validate-card-section { padding:12px 16px 0; }
|
||||||
|
.validate-card-section:last-child { padding-bottom:16px; }
|
||||||
|
.sat-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
|
||||||
|
.sat-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
||||||
|
.cb-row { display:flex; align-items:flex-start; gap:8px; padding:4px 0; cursor:pointer; font-size:13px; }
|
||||||
|
.cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
||||||
|
</style>
|
||||||
|
<script>
|
||||||
|
let satES = null;
|
||||||
|
function satLabels() {
|
||||||
|
return {nvidia:'Check GPU (DCGM L2)', 'nvidia-interconnect':'NVIDIA Interconnect (NCCL)', 'nvidia-bandwidth':'NVIDIA Bandwidth (NVBandwidth)', memory:'Check Memory', storage:'Check Storage', cpu:'Check CPU', amd:'Check AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
|
||||||
|
}
|
||||||
|
let satNvidiaGPUsPromise = null;
|
||||||
|
function loadSatNvidiaGPUs() {
|
||||||
|
if (!satNvidiaGPUsPromise) {
|
||||||
|
satNvidiaGPUsPromise = fetch('/api/gpu/nvidia').then(r => {
|
||||||
|
if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
|
||||||
|
return r.json();
|
||||||
|
}).then(list => Array.isArray(list) ? list : []);
|
||||||
|
}
|
||||||
|
return satNvidiaGPUsPromise;
|
||||||
|
}
|
||||||
|
function satSelectedGPUIndices() {
|
||||||
|
return Array.from(document.querySelectorAll('.sat-nvidia-checkbox'))
|
||||||
|
.filter(el => el.checked && !el.disabled)
|
||||||
|
.map(el => parseInt(el.value, 10))
|
||||||
|
.filter(v => !Number.isNaN(v))
|
||||||
|
.sort((a, b) => a - b);
|
||||||
|
}
|
||||||
|
function satUpdateGPUSelectionNote() {
|
||||||
|
const note = document.getElementById('sat-gpu-selection-note');
|
||||||
|
if (!note) return;
|
||||||
|
const sel = satSelectedGPUIndices();
|
||||||
|
note.textContent = sel.length
|
||||||
|
? 'Selected GPUs: ' + sel.join(', ') + '. Multi-GPU tests will use all selected GPUs.'
|
||||||
|
: 'Select at least one NVIDIA GPU to enable NVIDIA check tasks.';
|
||||||
|
}
|
||||||
|
function satRenderGPUList(gpus) {
|
||||||
|
const root = document.getElementById('sat-gpu-list');
|
||||||
|
if (!root) return;
|
||||||
|
if (!gpus || !gpus.length) {
|
||||||
|
root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
|
||||||
|
satUpdateGPUSelectionNote(); return;
|
||||||
|
}
|
||||||
|
root.innerHTML = gpus.map(gpu => {
|
||||||
|
const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
|
||||||
|
return '<label class="sat-gpu-row"><input class="sat-nvidia-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="satUpdateGPUSelectionNote()"><span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span></label>';
|
||||||
|
}).join('');
|
||||||
|
satUpdateGPUSelectionNote();
|
||||||
|
}
|
||||||
|
function satSelectAllGPUs() { document.querySelectorAll('.sat-nvidia-checkbox').forEach(el => { el.checked = true; }); satUpdateGPUSelectionNote(); }
|
||||||
|
function satSelectNoGPUs() { document.querySelectorAll('.sat-nvidia-checkbox').forEach(el => { el.checked = false; }); satUpdateGPUSelectionNote(); }
|
||||||
|
function satGPULoadInit() {
|
||||||
|
loadSatNvidiaGPUs().then(satRenderGPUList).catch(err => {
|
||||||
|
const root = document.getElementById('sat-gpu-list');
|
||||||
|
if (root) root.innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
|
||||||
|
satUpdateGPUSelectionNote();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
function satRequestBody(target, overrides) {
|
||||||
|
const body = {};
|
||||||
|
const labels = satLabels();
|
||||||
|
body.display_name = labels[target] || ('Check ' + target);
|
||||||
|
body.stress_mode = false;
|
||||||
|
if (target === 'cpu') body.duration = 60;
|
||||||
|
if (overrides) Object.keys(overrides).forEach(k => { body[k] = overrides[k]; });
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
function enqueueSATTarget(target, overrides) {
|
||||||
|
return fetch('/api/sat/' + target + '/run', {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(satRequestBody(target, overrides))}).then(r => r.json());
|
||||||
|
}
|
||||||
|
function streamSATTask(taskId, title, resetTerminal) {
|
||||||
|
if (satES) { satES.close(); satES = null; }
|
||||||
|
document.getElementById('sat-output').style.display = 'block';
|
||||||
|
document.getElementById('sat-title').textContent = '— ' + title;
|
||||||
|
const term = document.getElementById('sat-terminal');
|
||||||
|
if (resetTerminal) term.textContent = '';
|
||||||
|
term.textContent += 'Task ' + taskId + ' queued. Streaming log...\n';
|
||||||
|
return new Promise(resolve => {
|
||||||
|
satES = new EventSource('/api/tasks/' + taskId + '/stream');
|
||||||
|
satES.onmessage = e => { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
|
||||||
|
satES.addEventListener('done', e => {
|
||||||
|
satES.close(); satES = null;
|
||||||
|
term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
|
||||||
|
term.scrollTop = term.scrollHeight;
|
||||||
|
resolve({ok: !e.data, error: e.data || ''});
|
||||||
|
});
|
||||||
|
satES.onerror = () => {
|
||||||
|
if (satES) { satES.close(); satES = null; }
|
||||||
|
term.textContent += '\nERROR: stream disconnected.\n';
|
||||||
|
term.scrollTop = term.scrollHeight;
|
||||||
|
resolve({ok: false, error: 'stream disconnected'});
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
function selectedAMDValidateTargets() {
|
||||||
|
const targets = [];
|
||||||
|
const gpu = document.getElementById('sat-amd-target');
|
||||||
|
const mem = document.getElementById('sat-amd-mem-target');
|
||||||
|
const bw = document.getElementById('sat-amd-bandwidth-target');
|
||||||
|
if (gpu && gpu.checked && !gpu.disabled) targets.push('amd');
|
||||||
|
if (mem && mem.checked && !mem.disabled) targets.push('amd-mem');
|
||||||
|
if (bw && bw.checked && !bw.disabled) targets.push('amd-bandwidth');
|
||||||
|
return targets;
|
||||||
|
}
|
||||||
|
function runSAT(target) { return runSATWithOverrides(target, null); }
|
||||||
|
function runSATWithOverrides(target, overrides) {
|
||||||
|
const title = (overrides && overrides.display_name) || target;
|
||||||
|
document.getElementById('sat-output').style.display = 'block';
|
||||||
|
document.getElementById('sat-title').textContent = '— ' + title;
|
||||||
|
const term = document.getElementById('sat-terminal');
|
||||||
|
term.textContent = 'Enqueuing ' + title + ' test...\n';
|
||||||
|
return enqueueSATTarget(target, overrides).then(d => streamSATTask(d.task_id, title, false));
|
||||||
|
}
|
||||||
|
function runNvidiaFabricValidate(target) {
|
||||||
|
const indices = satSelectedGPUIndices();
|
||||||
|
if (!indices.length) { alert('No NVIDIA GPUs available.'); return; }
|
||||||
|
runSATWithOverrides(target, {gpu_indices: indices, display_name: satLabels()[target] || target});
|
||||||
|
}
|
||||||
|
function runNvidiaValidateSet(target) {
|
||||||
|
const sel = satSelectedGPUIndices();
|
||||||
|
if (!sel.length) { alert('Select at least one NVIDIA GPU.'); return; }
|
||||||
|
return runSATWithOverrides(target, {gpu_indices: sel, display_name: satLabels()[target] || target});
|
||||||
|
}
|
||||||
|
function runAMDValidateSet() {
|
||||||
|
const targets = selectedAMDValidateTargets();
|
||||||
|
if (!targets.length) return;
|
||||||
|
if (targets.length === 1) return runSAT(targets[0]);
|
||||||
|
const term = document.getElementById('sat-terminal');
|
||||||
|
document.getElementById('sat-output').style.display = 'block';
|
||||||
|
document.getElementById('sat-title').textContent = '— amd';
|
||||||
|
term.textContent = 'Running AMD check set...\n';
|
||||||
|
const labels = satLabels();
|
||||||
|
const runNext = idx => {
|
||||||
|
if (idx >= targets.length) return Promise.resolve();
|
||||||
|
const t = targets[idx];
|
||||||
|
term.textContent += '\n[' + (idx + 1) + '/' + targets.length + '] ' + labels[t] + '\n';
|
||||||
|
return enqueueSATTarget(t).then(d => streamSATTask(d.task_id, labels[t], false)).then(() => runNext(idx + 1));
|
||||||
|
};
|
||||||
|
return runNext(0);
|
||||||
|
}
|
||||||
|
function runAllCheckSAT() {
|
||||||
|
const status = document.getElementById('sat-all-status');
|
||||||
|
status.textContent = 'Enqueuing...';
|
||||||
|
const nvidiaIndices = satSelectedGPUIndices();
|
||||||
|
const nvidiaAllTargets = ['nvidia', 'nvidia-interconnect', 'nvidia-bandwidth'];
|
||||||
|
const baseTargets = ['cpu', 'memory', 'storage'];
|
||||||
|
const amdTargets = selectedAMDValidateTargets();
|
||||||
|
const expanded = [];
|
||||||
|
baseTargets.forEach(t => expanded.push({target: t}));
|
||||||
|
if (nvidiaIndices.length) {
|
||||||
|
nvidiaAllTargets.forEach(t => {
|
||||||
|
const btn = document.getElementById('sat-btn-' + t);
|
||||||
|
if (!(btn && btn.disabled)) expanded.push({target: t, overrides: {gpu_indices: nvidiaIndices, display_name: satLabels()[t] || t}});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
amdTargets.forEach(t => expanded.push({target: t}));
|
||||||
|
if (!expanded.length) { status.textContent = 'No tasks selected.'; return; }
|
||||||
|
const total = expanded.length;
|
||||||
|
const runNext = idx => {
|
||||||
|
if (idx >= expanded.length) { status.textContent = 'Completed ' + total + ' task(s).'; return Promise.resolve(); }
|
||||||
|
const item = expanded[idx];
|
||||||
|
status.textContent = 'Running ' + (idx + 1) + '/' + total + '...';
|
||||||
|
return enqueueSATTarget(item.target, item.overrides).then(() => runNext(idx + 1));
|
||||||
|
};
|
||||||
|
runNext(0).catch(err => { status.textContent = 'Error: ' + err.message; });
|
||||||
|
}
|
||||||
|
function disableSATCard(id, reason) {
|
||||||
|
const btn = document.getElementById('sat-btn-' + id);
|
||||||
|
if (!btn) return;
|
||||||
|
btn.disabled = true; btn.title = reason; btn.style.opacity = '0.4';
|
||||||
|
const card = btn.closest('.card');
|
||||||
|
if (card) {
|
||||||
|
let note = card.querySelector('.sat-unavail');
|
||||||
|
if (!note) {
|
||||||
|
note = document.createElement('p');
|
||||||
|
note.className = 'sat-unavail';
|
||||||
|
note.style.cssText = 'color:var(--muted);font-size:12px;margin:0 0 8px';
|
||||||
|
const body = card.querySelector('.card-body');
|
||||||
|
if (body) body.insertBefore(note, body.firstChild);
|
||||||
|
}
|
||||||
|
note.textContent = reason;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fetch('/api/gpu/presence').then(r => r.json()).then(gp => {
|
||||||
|
if (!gp.nvidia) ['nvidia','nvidia-interconnect','nvidia-bandwidth'].forEach(t => disableSATCard(t, 'No NVIDIA GPU detected'));
|
||||||
|
if (!gp.amd) {
|
||||||
|
disableSATCard('amd', 'No AMD GPU detected');
|
||||||
|
['sat-amd-target','sat-amd-mem-target','sat-amd-bandwidth-target'].forEach(id => {
|
||||||
|
const cb = document.getElementById(id);
|
||||||
|
if (cb) { cb.disabled = true; cb.checked = false; }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
satGPULoadInit();
|
||||||
|
</script>`
|
||||||
|
}
|
||||||
|
|
||||||
func renderSATCard(id, label, runAction, headerActions, body string) string {
|
func renderSATCard(id, label, runAction, headerActions, body string) string {
|
||||||
actions := `<button id="sat-btn-` + id + `" class="btn btn-primary btn-sm" onclick="` + runAction + `">Run</button>`
|
actions := `<button id="sat-btn-` + id + `" class="btn btn-primary btn-sm" onclick="` + runAction + `">Run</button>`
|
||||||
if strings.TrimSpace(headerActions) != "" {
|
if strings.TrimSpace(headerActions) != "" {
|
||||||
|
|||||||
@@ -24,41 +24,54 @@ func renderPage(page string, opts HandlerOptions) string {
|
|||||||
body = renderDashboard(opts)
|
body = renderDashboard(opts)
|
||||||
case "audit":
|
case "audit":
|
||||||
pageID = "audit"
|
pageID = "audit"
|
||||||
title = "Audit"
|
title = "1. Audit"
|
||||||
body = renderAudit()
|
body = renderAudit()
|
||||||
case "validate":
|
case "check":
|
||||||
pageID = "validate"
|
pageID = "check"
|
||||||
title = "Validate"
|
title = "2. Check"
|
||||||
body = renderValidate(opts)
|
body = renderCheck(opts)
|
||||||
|
case "load":
|
||||||
|
pageID = "load"
|
||||||
|
title = "3. Load"
|
||||||
|
body = renderValidateStress(opts)
|
||||||
case "burn":
|
case "burn":
|
||||||
pageID = "burn"
|
pageID = "burn"
|
||||||
title = "Burn"
|
title = "4. Burn"
|
||||||
body = renderBurn()
|
body = renderBurn()
|
||||||
case "benchmark":
|
case "benchmark":
|
||||||
pageID = "benchmark"
|
pageID = "benchmark"
|
||||||
title = "Benchmark"
|
title = "5. Benchmark"
|
||||||
|
body = renderBenchmark(opts)
|
||||||
|
case "tools":
|
||||||
|
pageID = "tools"
|
||||||
|
title = "Tools"
|
||||||
|
body = renderTools()
|
||||||
|
case "settings":
|
||||||
|
pageID = "settings"
|
||||||
|
title = "Settings"
|
||||||
|
body = renderSettings(opts)
|
||||||
|
// Legacy routes (redirected at HTTP level in handlePage; these are fallbacks)
|
||||||
|
case "validate", "tests":
|
||||||
|
pageID = "load"
|
||||||
|
title = "3. Load"
|
||||||
|
body = renderValidate(opts)
|
||||||
|
case "burn-in":
|
||||||
|
pageID = "burn"
|
||||||
|
title = "4. Burn"
|
||||||
|
body = renderBurn()
|
||||||
|
case "speed", "endurance":
|
||||||
|
pageID = "benchmark"
|
||||||
|
title = "5. Benchmark"
|
||||||
body = renderBenchmark(opts)
|
body = renderBenchmark(opts)
|
||||||
case "tasks":
|
case "tasks":
|
||||||
pageID = "tasks"
|
pageID = "tasks"
|
||||||
title = "Tasks"
|
title = "Tasks"
|
||||||
body = renderTasks()
|
body = renderTasks()
|
||||||
case "tools":
|
// Hidden pages (not in nav, accessible by direct URL)
|
||||||
pageID = "tools"
|
|
||||||
title = "Tools"
|
|
||||||
body = renderTools()
|
|
||||||
// Legacy routes kept accessible but not in nav
|
|
||||||
case "metrics":
|
case "metrics":
|
||||||
pageID = "metrics"
|
pageID = "metrics"
|
||||||
title = "Live Metrics"
|
title = "Live Metrics"
|
||||||
body = renderMetrics()
|
body = renderMetrics()
|
||||||
case "tests":
|
|
||||||
pageID = "validate"
|
|
||||||
title = "Acceptance Tests"
|
|
||||||
body = renderValidate(opts)
|
|
||||||
case "burn-in":
|
|
||||||
pageID = "burn"
|
|
||||||
title = "Burn-in Tests"
|
|
||||||
body = renderBurn()
|
|
||||||
case "network":
|
case "network":
|
||||||
pageID = "network"
|
pageID = "network"
|
||||||
title = "Network"
|
title = "Network"
|
||||||
|
|||||||
689
audit/internal/webui/raid_mgmt.go
Normal file
689
audit/internal/webui/raid_mgmt.go
Normal file
@@ -0,0 +1,689 @@
|
|||||||
|
package webui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// --- Response types ---
|
||||||
|
|
||||||
|
type raidDriveInfo struct {
|
||||||
|
Slot string `json:"slot,omitempty"`
|
||||||
|
Device string `json:"device,omitempty"`
|
||||||
|
Model string `json:"model,omitempty"`
|
||||||
|
SizeGB float64 `json:"size_gb,omitempty"`
|
||||||
|
Serial string `json:"serial,omitempty"`
|
||||||
|
State string `json:"state,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type raidArrayInfo struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Level string `json:"level,omitempty"`
|
||||||
|
Members []string `json:"members"`
|
||||||
|
Degraded bool `json:"degraded"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type raidControllerInfo struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
Index int `json:"index"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
ForeignDrives []raidDriveInfo `json:"foreign_drives"`
|
||||||
|
FreeDrives []raidDriveInfo `json:"free_drives"`
|
||||||
|
Arrays []raidArrayInfo `json:"arrays,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type raidStatusResp struct {
|
||||||
|
Controllers []raidControllerInfo `json:"controllers"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- LSI/storcli detection ---
|
||||||
|
|
||||||
|
func detectLSIControllers() []raidControllerInfo {
|
||||||
|
ctrlOut, err := exec.Command("storcli64", "/call", "show", "J").Output()
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var ctrlDoc struct {
|
||||||
|
Controllers []struct {
|
||||||
|
ResponseData struct {
|
||||||
|
Basics struct {
|
||||||
|
Controller int `json:"Controller"`
|
||||||
|
Model string `json:"Model"`
|
||||||
|
} `json:"Basics"`
|
||||||
|
} `json:"Response Data"`
|
||||||
|
} `json:"Controllers"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(ctrlOut, &ctrlDoc); err != nil || len(ctrlDoc.Controllers) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
driveOut, _ := exec.Command("storcli64", "/call/eall/sall", "show", "all", "J").Output()
|
||||||
|
|
||||||
|
var driveDoc struct {
|
||||||
|
Controllers []struct {
|
||||||
|
ResponseData struct {
|
||||||
|
DriveInformation []struct {
|
||||||
|
EIDSlt string `json:"EID:Slt"`
|
||||||
|
State string `json:"State"`
|
||||||
|
Size string `json:"Size"`
|
||||||
|
Intf string `json:"Intf"`
|
||||||
|
Med string `json:"Med"`
|
||||||
|
Model string `json:"Model"`
|
||||||
|
SN string `json:"SN"`
|
||||||
|
} `json:"Drive Information"`
|
||||||
|
} `json:"Response Data"`
|
||||||
|
} `json:"Controllers"`
|
||||||
|
}
|
||||||
|
if len(driveOut) > 0 {
|
||||||
|
json.Unmarshal(driveOut, &driveDoc) //nolint:errcheck
|
||||||
|
}
|
||||||
|
|
||||||
|
var controllers []raidControllerInfo
|
||||||
|
for i, c := range ctrlDoc.Controllers {
|
||||||
|
ctrl := raidControllerInfo{
|
||||||
|
ID: fmt.Sprintf("lsi-%d", c.ResponseData.Basics.Controller),
|
||||||
|
Type: "lsi",
|
||||||
|
Index: c.ResponseData.Basics.Controller,
|
||||||
|
Model: c.ResponseData.Basics.Model,
|
||||||
|
ForeignDrives: []raidDriveInfo{},
|
||||||
|
FreeDrives: []raidDriveInfo{},
|
||||||
|
}
|
||||||
|
if ctrl.Model == "" {
|
||||||
|
ctrl.Model = fmt.Sprintf("LSI Controller %d", ctrl.Index)
|
||||||
|
}
|
||||||
|
|
||||||
|
if i < len(driveDoc.Controllers) {
|
||||||
|
for _, d := range driveDoc.Controllers[i].ResponseData.DriveInformation {
|
||||||
|
info := raidDriveInfo{
|
||||||
|
Slot: strings.TrimSpace(d.EIDSlt),
|
||||||
|
Model: strings.TrimSpace(d.Model),
|
||||||
|
State: strings.TrimSpace(d.State),
|
||||||
|
SizeGB: raidParseHumanSizeGB(d.Size),
|
||||||
|
Serial: strings.TrimSpace(d.SN),
|
||||||
|
}
|
||||||
|
switch strings.TrimSpace(d.State) {
|
||||||
|
case "Frgn":
|
||||||
|
ctrl.ForeignDrives = append(ctrl.ForeignDrives, info)
|
||||||
|
case "UGood", "JBOD":
|
||||||
|
ctrl.FreeDrives = append(ctrl.FreeDrives, info)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
controllers = append(controllers, ctrl)
|
||||||
|
}
|
||||||
|
return controllers
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- VROC/mdadm detection ---
|
||||||
|
|
||||||
|
var raidMDStatDegradedRx = regexp.MustCompile(`\[[U_]+\]`)
|
||||||
|
|
||||||
|
type mdStatEntry struct {
|
||||||
|
Name string
|
||||||
|
Level string
|
||||||
|
Members []string
|
||||||
|
Degraded bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseRAIDMDStat(raw string) []mdStatEntry {
|
||||||
|
var entries []mdStatEntry
|
||||||
|
var cur *mdStatEntry
|
||||||
|
for _, line := range strings.Split(raw, "\n") {
|
||||||
|
if strings.HasPrefix(line, "Personalities") || strings.HasPrefix(line, "unused devices") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if idx := strings.Index(line, " : "); idx > 0 {
|
||||||
|
name := strings.TrimSpace(line[:idx])
|
||||||
|
rest := line[idx+3:]
|
||||||
|
entry := mdStatEntry{Name: name}
|
||||||
|
for _, tok := range strings.Fields(rest) {
|
||||||
|
if strings.HasPrefix(tok, "raid") || strings.HasPrefix(tok, "linear") {
|
||||||
|
entry.Level = tok
|
||||||
|
}
|
||||||
|
if bk := strings.Index(tok, "["); bk > 0 && strings.HasSuffix(tok, "]") {
|
||||||
|
entry.Members = append(entry.Members, tok[:bk])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entries = append(entries, entry)
|
||||||
|
cur = &entries[len(entries)-1]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if cur != nil {
|
||||||
|
if m := raidMDStatDegradedRx.FindString(line); m != "" && strings.Contains(m, "_") {
|
||||||
|
cur.Degraded = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
|
||||||
|
func detectVROCController() *raidControllerInfo {
|
||||||
|
out, err := exec.Command("mdadm", "--detail-platform").CombinedOutput()
|
||||||
|
if err != nil && len(out) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
hasVROC := false
|
||||||
|
for _, line := range strings.Split(string(out), "\n") {
|
||||||
|
lower := strings.ToLower(line)
|
||||||
|
if strings.Contains(lower, "license") || strings.Contains(lower, "intel") || strings.Contains(lower, "platform") {
|
||||||
|
hasVROC = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !hasVROC {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ctrl := &raidControllerInfo{
|
||||||
|
ID: "vroc-0",
|
||||||
|
Type: "vroc",
|
||||||
|
Model: "Intel VROC",
|
||||||
|
ForeignDrives: []raidDriveInfo{},
|
||||||
|
FreeDrives: []raidDriveInfo{},
|
||||||
|
}
|
||||||
|
|
||||||
|
inArray := map[string]bool{}
|
||||||
|
raw, err := os.ReadFile("/proc/mdstat")
|
||||||
|
if err == nil {
|
||||||
|
for _, arr := range parseRAIDMDStat(string(raw)) {
|
||||||
|
ctrl.Arrays = append(ctrl.Arrays, raidArrayInfo{
|
||||||
|
Name: arr.Name,
|
||||||
|
Level: arr.Level,
|
||||||
|
Members: arr.Members,
|
||||||
|
Degraded: arr.Degraded,
|
||||||
|
})
|
||||||
|
for _, m := range arr.Members {
|
||||||
|
inArray[m] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lsblkOut, err := exec.Command("lsblk", "-J", "-d", "-o", "NAME,SIZE,TYPE,MODEL,SERIAL").Output()
|
||||||
|
if err == nil {
|
||||||
|
var lsblkDoc struct {
|
||||||
|
BlockDevices []struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Size string `json:"size"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
Serial string `json:"serial"`
|
||||||
|
} `json:"blockdevices"`
|
||||||
|
}
|
||||||
|
if json.Unmarshal(lsblkOut, &lsblkDoc) == nil {
|
||||||
|
for _, d := range lsblkDoc.BlockDevices {
|
||||||
|
if d.Type != "disk" || inArray[d.Name] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ctrl.FreeDrives = append(ctrl.FreeDrives, raidDriveInfo{
|
||||||
|
Device: "/dev/" + d.Name,
|
||||||
|
Model: strings.TrimSpace(d.Model),
|
||||||
|
Serial: strings.TrimSpace(d.Serial),
|
||||||
|
State: "available",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ctrl
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- API handlers ---
|
||||||
|
|
||||||
|
func (h *handler) handleAPIRAIDStatus(w http.ResponseWriter, r *http.Request) {
|
||||||
|
resp := raidStatusResp{Controllers: []raidControllerInfo{}}
|
||||||
|
|
||||||
|
if lsi := detectLSIControllers(); len(lsi) > 0 {
|
||||||
|
resp.Controllers = append(resp.Controllers, lsi...)
|
||||||
|
}
|
||||||
|
if vroc := detectVROCController(); vroc != nil {
|
||||||
|
resp.Controllers = append(resp.Controllers, *vroc)
|
||||||
|
}
|
||||||
|
|
||||||
|
writeJSON(w, resp)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPIRAIDForeignAction(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req struct {
|
||||||
|
ControllerID string `json:"controller_id"`
|
||||||
|
Action string `json:"action"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if req.Action != "import" && req.Action != "clear" {
|
||||||
|
writeError(w, http.StatusBadRequest, "action must be 'import' or 'clear'")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ctrlIdx, ok := parseLSIControllerIndex(req.ControllerID)
|
||||||
|
if !ok {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid controller_id")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
target := "raid-foreign-clear"
|
||||||
|
name := fmt.Sprintf("RAID Foreign Clear (ctrl %d)", ctrlIdx)
|
||||||
|
if req.Action == "import" {
|
||||||
|
target = "raid-foreign-import"
|
||||||
|
name = fmt.Sprintf("RAID Foreign Import (ctrl %d)", ctrlIdx)
|
||||||
|
}
|
||||||
|
|
||||||
|
t := &Task{
|
||||||
|
ID: newJobID(target),
|
||||||
|
Name: name,
|
||||||
|
Target: target,
|
||||||
|
Priority: defaultTaskPriority(target, taskParams{}),
|
||||||
|
Status: TaskPending,
|
||||||
|
CreatedAt: time.Now(),
|
||||||
|
params: taskParams{RAIDController: ctrlIdx},
|
||||||
|
}
|
||||||
|
globalQueue.enqueue(t)
|
||||||
|
writeJSON(w, map[string]string{"task_id": t.ID})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPIRAIDCreateMirror(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req struct {
|
||||||
|
ControllerID string `json:"controller_id"`
|
||||||
|
Devices []string `json:"devices"`
|
||||||
|
ArrayName string `json:"array_name"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(req.Devices) < 2 {
|
||||||
|
writeError(w, http.StatusBadRequest, "at least 2 devices required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var target, name string
|
||||||
|
var params taskParams
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(req.ControllerID, "lsi-"):
|
||||||
|
ctrlIdx, ok := parseLSIControllerIndex(req.ControllerID)
|
||||||
|
if !ok {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid controller_id")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
target = "raid-lsi-create-mirror"
|
||||||
|
name = fmt.Sprintf("Create RAID 1 Mirror (LSI ctrl %d)", ctrlIdx)
|
||||||
|
params = taskParams{RAIDController: ctrlIdx, RAIDDevices: req.Devices}
|
||||||
|
|
||||||
|
case req.ControllerID == "vroc-0":
|
||||||
|
arrayName := strings.TrimSpace(req.ArrayName)
|
||||||
|
if arrayName == "" {
|
||||||
|
arrayName = "bee-mirror0"
|
||||||
|
}
|
||||||
|
target = "raid-vroc-create-mirror"
|
||||||
|
name = fmt.Sprintf("Create VROC RAID 1 (%s)", arrayName)
|
||||||
|
params = taskParams{RAIDDevices: req.Devices, RAIDArrayName: arrayName}
|
||||||
|
|
||||||
|
default:
|
||||||
|
writeError(w, http.StatusBadRequest, "unknown controller_id")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
t := &Task{
|
||||||
|
ID: newJobID(target),
|
||||||
|
Name: name,
|
||||||
|
Target: target,
|
||||||
|
Priority: defaultTaskPriority(target, taskParams{}),
|
||||||
|
Status: TaskPending,
|
||||||
|
CreatedAt: time.Now(),
|
||||||
|
params: params,
|
||||||
|
}
|
||||||
|
globalQueue.enqueue(t)
|
||||||
|
writeJSON(w, map[string]string{"task_id": t.ID})
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseLSIControllerIndex(id string) (int, bool) {
|
||||||
|
if !strings.HasPrefix(id, "lsi-") {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
n, err := strconv.Atoi(strings.TrimPrefix(id, "lsi-"))
|
||||||
|
if err != nil || n < 0 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return n, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Task runner functions ---
|
||||||
|
|
||||||
|
func runRAIDForeignClearTask(ctx context.Context, j *jobState, ctrl int) error {
|
||||||
|
j.append(fmt.Sprintf("Clearing foreign configuration on controller %d...", ctrl))
|
||||||
|
cmd := exec.CommandContext(ctx, "storcli64", fmt.Sprintf("/c%d/fall", ctrl), "del", "noprompt")
|
||||||
|
return streamCmdJob(j, cmd)
|
||||||
|
}
|
||||||
|
|
||||||
|
func runRAIDForeignImportTask(ctx context.Context, j *jobState, ctrl int) error {
|
||||||
|
j.append(fmt.Sprintf("Importing foreign configuration on controller %d...", ctrl))
|
||||||
|
cmd := exec.CommandContext(ctx, "storcli64", fmt.Sprintf("/c%d/fall", ctrl), "import", "noprompt")
|
||||||
|
return streamCmdJob(j, cmd)
|
||||||
|
}
|
||||||
|
|
||||||
|
func runRAIDLSICreateMirrorTask(ctx context.Context, j *jobState, ctrl int, drives []string) error {
|
||||||
|
driveList := strings.Join(drives, ",")
|
||||||
|
j.append(fmt.Sprintf("Creating RAID 1 on controller %d with drives: %s", ctrl, driveList))
|
||||||
|
cmd := exec.CommandContext(ctx, "storcli64",
|
||||||
|
fmt.Sprintf("/c%d", ctrl),
|
||||||
|
"add", "vd", "type=raid1",
|
||||||
|
fmt.Sprintf("drives=%s", driveList),
|
||||||
|
"pdperarray=2",
|
||||||
|
)
|
||||||
|
return streamCmdJob(j, cmd)
|
||||||
|
}
|
||||||
|
|
||||||
|
func runRAIDVROCCreateMirrorTask(ctx context.Context, j *jobState, devices []string, arrayName string) error {
|
||||||
|
if arrayName == "" {
|
||||||
|
arrayName = "bee-mirror0"
|
||||||
|
}
|
||||||
|
devPath := "/dev/md/" + arrayName
|
||||||
|
args := []string{
|
||||||
|
"--create", devPath,
|
||||||
|
"--level=1",
|
||||||
|
fmt.Sprintf("--raid-devices=%d", len(devices)),
|
||||||
|
"--run",
|
||||||
|
}
|
||||||
|
args = append(args, devices...)
|
||||||
|
j.append(fmt.Sprintf("Creating VROC RAID 1 array %s with: %s", devPath, strings.Join(devices, " ")))
|
||||||
|
cmd := exec.CommandContext(ctx, "mdadm", args...)
|
||||||
|
return streamCmdJob(j, cmd)
|
||||||
|
}
|
||||||
|
|
||||||
|
// raidParseHumanSizeGB parses storcli size strings like "1.818 TB", "745.211 GB".
|
||||||
|
func raidParseHumanSizeGB(s string) float64 {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
if s == "" {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
upper := strings.ToUpper(s)
|
||||||
|
var mul float64
|
||||||
|
var numStr string
|
||||||
|
switch {
|
||||||
|
case strings.Contains(upper, " TB"):
|
||||||
|
mul = 1024
|
||||||
|
numStr = strings.TrimSpace(strings.SplitN(upper, " T", 2)[0])
|
||||||
|
case strings.Contains(upper, " GB"):
|
||||||
|
mul = 1
|
||||||
|
numStr = strings.TrimSpace(strings.SplitN(upper, " G", 2)[0])
|
||||||
|
case strings.Contains(upper, " MB"):
|
||||||
|
mul = 1.0 / 1024
|
||||||
|
numStr = strings.TrimSpace(strings.SplitN(upper, " M", 2)[0])
|
||||||
|
default:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
v, err := strconv.ParseFloat(numStr, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return v * mul
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- UI card ---
|
||||||
|
|
||||||
|
func renderRAIDMgmtCard() string {
|
||||||
|
return `<div class="card"><div class="card-head card-head-actions">RAID Controller Management<div class="card-head-buttons"><button class="btn btn-sm btn-secondary" onclick="raidLoad()">↻ Refresh</button></div></div><div class="card-body">
|
||||||
|
<div id="raid-status" style="font-size:13px;color:var(--muted);margin-bottom:8px">Loading...</div>
|
||||||
|
<div id="raid-content"></div>
|
||||||
|
<div id="raid-out-wrap" style="display:none;margin-top:14px">
|
||||||
|
<div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
|
||||||
|
<span id="raid-out-label" style="font-size:12px;font-weight:600;color:var(--muted)">Output</span>
|
||||||
|
<span id="raid-out-status" style="font-size:12px"></span>
|
||||||
|
</div>
|
||||||
|
<div id="raid-terminal" class="terminal" style="max-height:260px;width:100%;box-sizing:border-box"></div>
|
||||||
|
</div>
|
||||||
|
</div></div>
|
||||||
|
<script>
|
||||||
|
(function(){
|
||||||
|
function escHtml(s) {
|
||||||
|
return String(s||'').replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>').replace(/"/g,'"');
|
||||||
|
}
|
||||||
|
|
||||||
|
var _raidControllers = [];
|
||||||
|
|
||||||
|
function raidLoad() {
|
||||||
|
var status = document.getElementById('raid-status');
|
||||||
|
var content = document.getElementById('raid-content');
|
||||||
|
status.textContent = 'Detecting RAID controllers...';
|
||||||
|
status.style.color = 'var(--muted)';
|
||||||
|
content.innerHTML = '';
|
||||||
|
fetch('/api/tools/raid/status', {cache:'no-store'})
|
||||||
|
.then(function(r) {
|
||||||
|
if (!r.ok) return r.json().then(function(e) { throw new Error(e.error || r.statusText); });
|
||||||
|
return r.json();
|
||||||
|
})
|
||||||
|
.then(function(data) {
|
||||||
|
_raidControllers = data.controllers || [];
|
||||||
|
if (_raidControllers.length === 0) {
|
||||||
|
status.textContent = 'No RAID controllers detected.';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
status.textContent = _raidControllers.length + ' controller(s) detected.';
|
||||||
|
content.innerHTML = _raidControllers.map(function(c, i) {
|
||||||
|
return raidRenderController(c, i);
|
||||||
|
}).join('<hr style="margin:16px 0;border:none;border-top:1px solid var(--border)">');
|
||||||
|
})
|
||||||
|
.catch(function(e) {
|
||||||
|
status.textContent = 'Error: ' + e.message;
|
||||||
|
status.style.color = 'var(--crit-fg)';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function raidRenderController(c, idx) {
|
||||||
|
var html = '';
|
||||||
|
var typeLabel = c.type === 'lsi' ? 'LSI / Broadcom' : 'Intel VROC';
|
||||||
|
html += '<div style="font-weight:600;font-size:13px;margin-bottom:10px">' + typeLabel + ' — ' + escHtml(c.model) + '</div>';
|
||||||
|
|
||||||
|
if (c.type === 'lsi') {
|
||||||
|
var foreign = c.foreign_drives || [];
|
||||||
|
if (foreign.length > 0) {
|
||||||
|
html += '<div style="background:var(--warn-bg,rgba(240,192,0,0.1));border:1px solid var(--warn-border,#c8a800);border-radius:4px;padding:10px 12px;margin-bottom:12px">';
|
||||||
|
html += '<div style="font-weight:600;font-size:13px;margin-bottom:6px">⚠︎ Foreign Configuration Detected (' + foreign.length + ' drive(s))</div>';
|
||||||
|
html += '<table style="margin-bottom:10px"><tr><th>Slot</th><th>Model</th><th>Size</th><th>State</th></tr>';
|
||||||
|
foreign.forEach(function(d) {
|
||||||
|
html += '<tr>'
|
||||||
|
+ '<td style="font-family:monospace">' + escHtml(d.slot) + '</td>'
|
||||||
|
+ '<td>' + escHtml(d.model||'—') + '</td>'
|
||||||
|
+ '<td>' + (d.size_gb > 0 ? Math.round(d.size_gb) + ' GB' : '—') + '</td>'
|
||||||
|
+ '<td><span class="badge badge-warn">' + escHtml(d.state) + '</span></td>'
|
||||||
|
+ '</tr>';
|
||||||
|
});
|
||||||
|
html += '</table>';
|
||||||
|
html += '<div style="display:flex;gap:8px;flex-wrap:wrap">';
|
||||||
|
html += '<button class="btn btn-sm btn-primary" onclick="raidForeignAction(\'' + escHtml(c.id) + '\',\'import\',this)">Import Foreign Config</button>';
|
||||||
|
html += '<button class="btn btn-sm btn-secondary" style="color:var(--crit-fg)" onclick="raidForeignAction(\'' + escHtml(c.id) + '\',\'clear\',this)">Clear Foreign Config</button>';
|
||||||
|
html += '</div></div>';
|
||||||
|
}
|
||||||
|
|
||||||
|
html += raidRenderMirrorSection(c, idx, 'lsi');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c.type === 'vroc') {
|
||||||
|
var arrays = c.arrays || [];
|
||||||
|
if (arrays.length > 0) {
|
||||||
|
html += '<div style="font-size:12px;font-weight:600;color:var(--muted);margin-bottom:6px;text-transform:uppercase;letter-spacing:.04em">Active Arrays</div>';
|
||||||
|
html += '<table style="margin-bottom:14px"><tr><th>Name</th><th>Level</th><th>Members</th><th>Status</th></tr>';
|
||||||
|
arrays.forEach(function(a) {
|
||||||
|
var badge = a.degraded
|
||||||
|
? '<span class="badge badge-err">Degraded</span>'
|
||||||
|
: '<span class="badge badge-ok">OK</span>';
|
||||||
|
html += '<tr>'
|
||||||
|
+ '<td style="font-family:monospace">' + escHtml(a.name) + '</td>'
|
||||||
|
+ '<td>' + escHtml(a.level||'—') + '</td>'
|
||||||
|
+ '<td style="font-family:monospace;font-size:12px">' + (a.members||[]).map(escHtml).join(', ') + '</td>'
|
||||||
|
+ '<td>' + badge + '</td>'
|
||||||
|
+ '</tr>';
|
||||||
|
});
|
||||||
|
html += '</table>';
|
||||||
|
}
|
||||||
|
|
||||||
|
html += raidRenderMirrorSection(c, idx, 'vroc');
|
||||||
|
}
|
||||||
|
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
function raidRenderMirrorSection(c, idx, kind) {
|
||||||
|
var free = c.free_drives || [];
|
||||||
|
var html = '<div style="font-size:12px;font-weight:600;color:var(--muted);margin-bottom:6px;text-transform:uppercase;letter-spacing:.04em">Create RAID 1 Mirror</div>';
|
||||||
|
|
||||||
|
if (free.length < 2) {
|
||||||
|
html += '<p style="font-size:13px;color:var(--muted)">No unconfigured drives available (need at least 2).</p>';
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
html += '<p style="font-size:13px;color:var(--muted);margin-bottom:8px">Select exactly 2 drives:</p>';
|
||||||
|
html += '<div>';
|
||||||
|
free.forEach(function(d) {
|
||||||
|
var val = kind === 'lsi' ? d.slot : d.device;
|
||||||
|
var label = kind === 'lsi'
|
||||||
|
? escHtml(d.slot) + (d.model ? ' — ' + escHtml(d.model) : '') + (d.size_gb > 0 ? ' (' + Math.round(d.size_gb) + ' GB)' : '')
|
||||||
|
: escHtml(d.device) + (d.model ? ' — ' + escHtml(d.model) : '') + (d.serial ? ' [' + escHtml(d.serial) + ']' : '');
|
||||||
|
html += '<label style="display:block;margin-bottom:4px;font-size:13px;cursor:pointer">'
|
||||||
|
+ '<input type="checkbox" class="raid-mirror-check-' + idx + '" value="' + escHtml(val) + '"> '
|
||||||
|
+ label + '</label>';
|
||||||
|
});
|
||||||
|
html += '</div>';
|
||||||
|
|
||||||
|
if (kind === 'vroc') {
|
||||||
|
html += '<div style="margin-top:10px;display:flex;align-items:center;gap:8px;flex-wrap:wrap">'
|
||||||
|
+ '<label style="font-size:13px">Array name: <input type="text" id="vroc-arrayname-' + idx + '" value="bee-mirror0" style="font-family:monospace;padding:2px 6px;width:140px"></label>';
|
||||||
|
} else {
|
||||||
|
html += '<div style="margin-top:10px;display:flex;gap:8px">';
|
||||||
|
}
|
||||||
|
|
||||||
|
html += '<button class="btn btn-sm btn-primary raid-mirror-btn-' + idx + '" onclick="raidCreateMirror(\'' + escHtml(c.id) + '\',' + idx + ',\'' + kind + '\',this)">Create Mirror</button>';
|
||||||
|
html += '</div>';
|
||||||
|
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
function raidForeignAction(ctrlID, action, btn) {
|
||||||
|
if (action === 'clear' && !confirm('Clear foreign configuration on ' + ctrlID + '?\n\nThis will DELETE the foreign RAID metadata. Data on those drives may become inaccessible.')) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var original = btn ? btn.textContent : '';
|
||||||
|
if (btn) { btn.disabled = true; btn.textContent = action === 'import' ? 'Importing...' : 'Clearing...'; }
|
||||||
|
raidShowOutput('RAID foreign ' + action, '', '');
|
||||||
|
fetch('/api/tools/raid/foreign', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({controller_id: ctrlID, action: action})
|
||||||
|
})
|
||||||
|
.then(function(r) { return r.json(); })
|
||||||
|
.then(function(d) {
|
||||||
|
if (d.error) throw new Error(d.error);
|
||||||
|
var actionLabel = action === 'import' ? 'Import foreign config' : 'Clear foreign config';
|
||||||
|
raidStreamTask(d.task_id, actionLabel, function() {
|
||||||
|
if (btn) { btn.disabled = false; btn.textContent = original; }
|
||||||
|
raidLoad();
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.catch(function(e) {
|
||||||
|
raidShowOutput('Error', 'failed', e.message);
|
||||||
|
if (btn) { btn.disabled = false; btn.textContent = original; }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function raidCreateMirror(ctrlID, idx, kind, btn) {
|
||||||
|
var checks = document.querySelectorAll('.raid-mirror-check-' + idx + ':checked');
|
||||||
|
if (checks.length !== 2) {
|
||||||
|
alert('Select exactly 2 drives.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var devices = Array.from(checks).map(function(c) { return c.value; });
|
||||||
|
var arrayName = '';
|
||||||
|
if (kind === 'vroc') {
|
||||||
|
var nameEl = document.getElementById('vroc-arrayname-' + idx);
|
||||||
|
arrayName = nameEl ? nameEl.value.trim() : 'bee-mirror0';
|
||||||
|
if (!arrayName) arrayName = 'bee-mirror0';
|
||||||
|
}
|
||||||
|
var original = btn ? btn.textContent : '';
|
||||||
|
if (btn) { btn.disabled = true; btn.textContent = 'Creating...'; }
|
||||||
|
raidShowOutput('Create RAID 1', '', '');
|
||||||
|
fetch('/api/tools/raid/create-mirror', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({controller_id: ctrlID, devices: devices, array_name: arrayName})
|
||||||
|
})
|
||||||
|
.then(function(r) { return r.json(); })
|
||||||
|
.then(function(d) {
|
||||||
|
if (d.error) throw new Error(d.error);
|
||||||
|
raidStreamTask(d.task_id, 'Create RAID 1 mirror', function() {
|
||||||
|
if (btn) { btn.disabled = false; btn.textContent = original; }
|
||||||
|
raidLoad();
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.catch(function(e) {
|
||||||
|
raidShowOutput('Error', 'failed', e.message);
|
||||||
|
if (btn) { btn.disabled = false; btn.textContent = original; }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function raidShowOutput(label, status, text) {
|
||||||
|
var wrap = document.getElementById('raid-out-wrap');
|
||||||
|
var labelEl = document.getElementById('raid-out-label');
|
||||||
|
var statusEl = document.getElementById('raid-out-status');
|
||||||
|
var term = document.getElementById('raid-terminal');
|
||||||
|
wrap.style.display = 'block';
|
||||||
|
labelEl.textContent = label;
|
||||||
|
if (status === 'ok') {
|
||||||
|
statusEl.textContent = '✓ done';
|
||||||
|
statusEl.style.color = 'var(--ok-fg)';
|
||||||
|
} else if (status === 'failed') {
|
||||||
|
statusEl.textContent = '✗ failed';
|
||||||
|
statusEl.style.color = 'var(--crit-fg)';
|
||||||
|
} else {
|
||||||
|
statusEl.textContent = status;
|
||||||
|
statusEl.style.color = 'var(--muted)';
|
||||||
|
}
|
||||||
|
if (text !== undefined) {
|
||||||
|
term.textContent = text;
|
||||||
|
term.scrollTop = term.scrollHeight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function raidStreamTask(taskID, taskName, onDone) {
|
||||||
|
var term = document.getElementById('raid-terminal');
|
||||||
|
term.textContent = '';
|
||||||
|
raidShowOutput(taskName || 'Running…', 'running…', undefined);
|
||||||
|
var es = new EventSource('/api/tasks/' + taskID + '/stream');
|
||||||
|
es.onmessage = function(e) {
|
||||||
|
term.textContent += e.data + '\n';
|
||||||
|
term.scrollTop = term.scrollHeight;
|
||||||
|
};
|
||||||
|
es.addEventListener('done', function(e) {
|
||||||
|
es.close();
|
||||||
|
if (!e.data) {
|
||||||
|
raidShowOutput(taskName, 'ok', undefined);
|
||||||
|
} else {
|
||||||
|
raidShowOutput(taskName, 'failed', undefined);
|
||||||
|
term.textContent += '\nFailed: ' + e.data;
|
||||||
|
term.scrollTop = term.scrollHeight;
|
||||||
|
}
|
||||||
|
if (onDone) onDone();
|
||||||
|
});
|
||||||
|
es.onerror = function() {
|
||||||
|
es.close();
|
||||||
|
raidShowOutput(taskName, 'failed', undefined);
|
||||||
|
if (onDone) onDone();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
window.raidLoad = raidLoad;
|
||||||
|
raidLoad();
|
||||||
|
})();
|
||||||
|
</script>`
|
||||||
|
}
|
||||||
214
audit/internal/webui/saa_dmi.go
Normal file
214
audit/internal/webui/saa_dmi.go
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
package webui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type dmiField struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Shn string `json:"shn"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type saaChange struct {
|
||||||
|
Shn string `json:"shn"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
shnRE = regexp.MustCompile(`^[A-Za-z0-9_]{1,16}$`)
|
||||||
|
dmiSectionRE = regexp.MustCompile(`^\[(.+?)\]$`)
|
||||||
|
// Item Name {SHN} = value // comment
|
||||||
|
// SHN may contain parentheses, e.g. {PS(4)LC} for power supply fields
|
||||||
|
dmiItemRE = regexp.MustCompile(`^(.+?)\s+\{([A-Za-z0-9_()\-]{1,24})\}\s*=\s*(.*)$`)
|
||||||
|
dmiVersionRE = regexp.MustCompile(`(?i)^version\s*=`)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
// parseDMIFile parses the DMI.txt produced by "saa GetDmiInfo".
|
||||||
|
// Real format (from SAA User Guide 4.8.1):
|
||||||
|
//
|
||||||
|
// [System]
|
||||||
|
// Version {SYVS} = "A Version" // string value
|
||||||
|
// Serial Number {SYSN} = $DEFAULT$ // string value
|
||||||
|
// UUID {SYUU} = 00112233-... // hex value
|
||||||
|
func parseDMIFile(content string) []dmiField {
|
||||||
|
var fields []dmiField
|
||||||
|
currentSection := ""
|
||||||
|
for _, line := range strings.Split(content, "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" || strings.HasPrefix(line, "//") || strings.HasPrefix(line, "#") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if dmiVersionRE.MatchString(line) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if m := dmiSectionRE.FindStringSubmatch(line); m != nil {
|
||||||
|
currentSection = strings.TrimSpace(m[1])
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m := dmiItemRE.FindStringSubmatch(line)
|
||||||
|
if m == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
itemName := strings.TrimSpace(m[1])
|
||||||
|
shn := m[2]
|
||||||
|
rawValue := strings.TrimSpace(m[3])
|
||||||
|
// strip trailing comment (space + //)
|
||||||
|
if idx := strings.LastIndex(rawValue, " //"); idx >= 0 {
|
||||||
|
rawValue = strings.TrimSpace(rawValue[:idx])
|
||||||
|
}
|
||||||
|
// strip surrounding double quotes from string values
|
||||||
|
if len(rawValue) >= 2 && rawValue[0] == '"' && rawValue[len(rawValue)-1] == '"' {
|
||||||
|
rawValue = rawValue[1 : len(rawValue)-1]
|
||||||
|
}
|
||||||
|
displayName := itemName
|
||||||
|
if currentSection != "" {
|
||||||
|
displayName = currentSection + " / " + itemName
|
||||||
|
}
|
||||||
|
fields = append(fields, dmiField{Name: displayName, Shn: shn, Value: rawValue})
|
||||||
|
}
|
||||||
|
return fields
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPISAADMIRead(w http.ResponseWriter, r *http.Request) {
|
||||||
|
ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
tmpDir, err := os.MkdirTemp("", "bee-saa-*")
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "create temp dir: "+err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(tmpDir)
|
||||||
|
|
||||||
|
dmiFile := filepath.Join(tmpDir, "DMI.txt")
|
||||||
|
cmd := exec.CommandContext(ctx, "saa", "-c", "GetDmiInfo", "--file", dmiFile, "--overwrite")
|
||||||
|
cmd.Dir = "/usr/local/bin"
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
msg := strings.TrimSpace(string(out))
|
||||||
|
if msg == "" {
|
||||||
|
msg = err.Error()
|
||||||
|
}
|
||||||
|
writeError(w, http.StatusInternalServerError, "saa GetDmiInfo: "+msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
raw, err := os.ReadFile(dmiFile)
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "read DMI file: "+err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fields := parseDMIFile(string(raw))
|
||||||
|
if len(fields) == 0 {
|
||||||
|
writeError(w, http.StatusInternalServerError, "no DMI fields found (file may be empty — reboot the server and try again)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, fields)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPISAADMIWrite(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req struct {
|
||||||
|
Changes []saaChange `json:"changes"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid request body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(req.Changes) == 0 {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "no changes provided")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, c := range req.Changes {
|
||||||
|
if !shnRE.MatchString(c.Shn) {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "invalid shn: "+c.Shn)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(c.Value) == 0 || len(c.Value) > 64 {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "value length out of range for shn: "+c.Shn)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, ch := range c.Value {
|
||||||
|
if ch < 0x20 || ch > 0x7E {
|
||||||
|
writeError(w, http.StatusUnprocessableEntity, "value contains non-printable character for shn: "+c.Shn)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t := &Task{
|
||||||
|
ID: newJobID("saa-dmi-write"),
|
||||||
|
Name: fmt.Sprintf("SAA DMI Write (%d field(s))", len(req.Changes)),
|
||||||
|
Target: "saa-dmi-write",
|
||||||
|
Priority: defaultTaskPriority("saa-dmi-write", taskParams{}),
|
||||||
|
Status: TaskPending,
|
||||||
|
CreatedAt: time.Now(),
|
||||||
|
params: taskParams{
|
||||||
|
SAADmiChanges: req.Changes,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
globalQueue.enqueue(t)
|
||||||
|
writeJSON(w, map[string]string{"task_id": t.ID})
|
||||||
|
}
|
||||||
|
|
||||||
|
func runSAADMIWriteTask(ctx context.Context, j *jobState, exportDir string, p taskParams) error {
|
||||||
|
tmpDir, err := os.MkdirTemp("", "bee-saa-*")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("create temp dir: %w", err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(tmpDir)
|
||||||
|
dmiFile := filepath.Join(tmpDir, "DMI.txt")
|
||||||
|
|
||||||
|
j.append("Reading current DMI configuration...")
|
||||||
|
getCmd := exec.CommandContext(ctx, "saa", "-c", "GetDmiInfo", "--file", dmiFile, "--overwrite")
|
||||||
|
getCmd.Dir = "/usr/local/bin"
|
||||||
|
if err := streamCmdJob(j, getCmd); err != nil {
|
||||||
|
return fmt.Errorf("GetDmiInfo: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
backupDir := filepath.Join(exportDir, "dmi-backups")
|
||||||
|
if err := os.MkdirAll(backupDir, 0o755); err != nil {
|
||||||
|
return fmt.Errorf("create backup dir: %w", err)
|
||||||
|
}
|
||||||
|
backupName := "dmi-" + time.Now().UTC().Format("20060102-150405") + ".txt"
|
||||||
|
backupPath := filepath.Join(backupDir, backupName)
|
||||||
|
raw, err := os.ReadFile(dmiFile)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("read DMI file: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(backupPath, raw, 0o644); err != nil {
|
||||||
|
return fmt.Errorf("write backup: %w", err)
|
||||||
|
}
|
||||||
|
j.append("Backup saved: dmi-backups/" + backupName)
|
||||||
|
|
||||||
|
for _, c := range p.SAADmiChanges {
|
||||||
|
j.append("Setting " + c.Shn + " = " + c.Value)
|
||||||
|
cmd := exec.CommandContext(ctx, "saa", "-c", "EditDmiInfo", "--file", dmiFile, "--shn", c.Shn, "--value", c.Value)
|
||||||
|
cmd.Dir = "/usr/local/bin"
|
||||||
|
if err := streamCmdJob(j, cmd); err != nil {
|
||||||
|
return fmt.Errorf("EditDmiInfo %s: %w", c.Shn, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
j.append("Applying changes to hardware...")
|
||||||
|
changeCmd := exec.CommandContext(ctx, "saa", "-c", "ChangeDmiInfo", "--file", dmiFile)
|
||||||
|
changeCmd.Dir = "/usr/local/bin"
|
||||||
|
if err := streamCmdJob(j, changeCmd); err != nil {
|
||||||
|
return fmt.Errorf("ChangeDmiInfo: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
j.append("Done. Reboot the server for changes to take effect.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
@@ -314,6 +314,15 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
mux.HandleFunc("GET /api/tools/check", h.handleAPIToolsCheck)
|
mux.HandleFunc("GET /api/tools/check", h.handleAPIToolsCheck)
|
||||||
mux.HandleFunc("GET /api/tools/nvme-formats", h.handleAPINVMeFormats)
|
mux.HandleFunc("GET /api/tools/nvme-formats", h.handleAPINVMeFormats)
|
||||||
mux.HandleFunc("POST /api/tools/nvme-format/run", h.handleAPINVMeFormatRun)
|
mux.HandleFunc("POST /api/tools/nvme-format/run", h.handleAPINVMeFormatRun)
|
||||||
|
mux.HandleFunc("GET /api/tools/saa-dmi", h.handleAPISAADMIRead)
|
||||||
|
mux.HandleFunc("POST /api/tools/saa-dmi/write", h.handleAPISAADMIWrite)
|
||||||
|
mux.HandleFunc("GET /api/tools/ipmi-fru", h.handleAPIIPMIFRURead)
|
||||||
|
mux.HandleFunc("POST /api/tools/ipmi-fru/write", h.handleAPIIPMIFRUWrite)
|
||||||
|
mux.HandleFunc("GET /api/tools/huawei-elabel", h.handleAPIHuaweiElabelRead)
|
||||||
|
mux.HandleFunc("POST /api/tools/huawei-elabel/write", h.handleAPIHuaweiElabelWrite)
|
||||||
|
mux.HandleFunc("GET /api/tools/raid/status", h.handleAPIRAIDStatus)
|
||||||
|
mux.HandleFunc("POST /api/tools/raid/foreign", h.handleAPIRAIDForeignAction)
|
||||||
|
mux.HandleFunc("POST /api/tools/raid/create-mirror", h.handleAPIRAIDCreateMirror)
|
||||||
|
|
||||||
// GPU presence / tools
|
// GPU presence / tools
|
||||||
mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
|
mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
|
||||||
@@ -325,6 +334,8 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
// System
|
// System
|
||||||
mux.HandleFunc("GET /api/system/ram-status", h.handleAPIRAMStatus)
|
mux.HandleFunc("GET /api/system/ram-status", h.handleAPIRAMStatus)
|
||||||
mux.HandleFunc("POST /api/system/install-to-ram", h.handleAPIInstallToRAM)
|
mux.HandleFunc("POST /api/system/install-to-ram", h.handleAPIInstallToRAM)
|
||||||
|
mux.HandleFunc("POST /api/system/reboot", h.handleAPISystemReboot)
|
||||||
|
mux.HandleFunc("POST /api/system/shutdown", h.handleAPISystemShutdown)
|
||||||
|
|
||||||
// Preflight
|
// Preflight
|
||||||
mux.HandleFunc("GET /api/preflight", h.handleAPIPreflight)
|
mux.HandleFunc("GET /api/preflight", h.handleAPIPreflight)
|
||||||
@@ -1419,14 +1430,17 @@ func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
|
|||||||
if page == "" {
|
if page == "" {
|
||||||
page = "dashboard"
|
page = "dashboard"
|
||||||
}
|
}
|
||||||
// Redirect old routes to new names
|
// Redirect legacy routes to new named pages
|
||||||
switch page {
|
switch page {
|
||||||
case "tests":
|
case "validate", "tests":
|
||||||
http.Redirect(w, r, "/validate", http.StatusMovedPermanently)
|
http.Redirect(w, r, "/load", http.StatusMovedPermanently)
|
||||||
return
|
return
|
||||||
case "burn-in":
|
case "burn-in":
|
||||||
http.Redirect(w, r, "/burn", http.StatusMovedPermanently)
|
http.Redirect(w, r, "/burn", http.StatusMovedPermanently)
|
||||||
return
|
return
|
||||||
|
case "speed", "endurance":
|
||||||
|
http.Redirect(w, r, "/benchmark", http.StatusMovedPermanently)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
body := renderPage(page, h.opts)
|
body := renderPage(page, h.opts)
|
||||||
w.Header().Set("Cache-Control", "no-store")
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
|
|||||||
@@ -666,41 +666,51 @@ func TestTasksPageRendersOpenLinksAndPaginationControls(t *testing.T) {
|
|||||||
|
|
||||||
func TestToolsPageRendersNvidiaSelfHealSection(t *testing.T) {
|
func TestToolsPageRendersNvidiaSelfHealSection(t *testing.T) {
|
||||||
handler := NewHandler(HandlerOptions{})
|
handler := NewHandler(HandlerOptions{})
|
||||||
rec := httptest.NewRecorder()
|
|
||||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tools", nil))
|
// /tools: only NVMe Block Format and Supermicro DMI remain
|
||||||
if rec.Code != http.StatusOK {
|
recTools := httptest.NewRecorder()
|
||||||
t.Fatalf("status=%d", rec.Code)
|
handler.ServeHTTP(recTools, httptest.NewRequest(http.MethodGet, "/tools", nil))
|
||||||
|
if recTools.Code != http.StatusOK {
|
||||||
|
t.Fatalf("tools status=%d", recTools.Code)
|
||||||
}
|
}
|
||||||
body := rec.Body.String()
|
toolsBody := recTools.Body.String()
|
||||||
if !strings.Contains(body, `NVIDIA Self Heal`) {
|
if !strings.Contains(toolsBody, `NVMe Block Format`) {
|
||||||
t.Fatalf("tools page missing nvidia self heal section: %s", body)
|
t.Fatalf("tools page missing nvme block format section: %s", toolsBody)
|
||||||
}
|
}
|
||||||
if !strings.Contains(body, `Restart GPU Drivers`) {
|
if !strings.Contains(toolsBody, `/api/tools/nvme-formats`) || !strings.Contains(toolsBody, `/api/tools/nvme-format/run`) {
|
||||||
t.Fatalf("tools page missing restart gpu drivers button: %s", body)
|
t.Fatalf("tools page missing nvme format api usage: %s", toolsBody)
|
||||||
}
|
}
|
||||||
if !strings.Contains(body, `nvidiaRestartDrivers()`) {
|
|
||||||
t.Fatalf("tools page missing nvidiaRestartDrivers action: %s", body)
|
// /settings: system install, support bundle, tool check, nvidia self heal, network, services
|
||||||
|
recSettings := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(recSettings, httptest.NewRequest(http.MethodGet, "/settings", nil))
|
||||||
|
if recSettings.Code != http.StatusOK {
|
||||||
|
t.Fatalf("settings status=%d", recSettings.Code)
|
||||||
}
|
}
|
||||||
if !strings.Contains(body, `/api/gpu/nvidia-status`) {
|
settingsBody := recSettings.Body.String()
|
||||||
t.Fatalf("tools page missing nvidia status api usage: %s", body)
|
if !strings.Contains(settingsBody, `NVIDIA Self Heal`) {
|
||||||
|
t.Fatalf("settings page missing nvidia self heal section: %s", settingsBody)
|
||||||
}
|
}
|
||||||
if !strings.Contains(body, `nvidiaResetGPU(`) {
|
if !strings.Contains(settingsBody, `Restart GPU Drivers`) {
|
||||||
t.Fatalf("tools page missing nvidiaResetGPU action: %s", body)
|
t.Fatalf("settings page missing restart gpu drivers button: %s", settingsBody)
|
||||||
}
|
}
|
||||||
if !strings.Contains(body, `id="boot-source-text"`) {
|
if !strings.Contains(settingsBody, `nvidiaRestartDrivers()`) {
|
||||||
t.Fatalf("tools page missing boot source field: %s", body)
|
t.Fatalf("settings page missing nvidiaRestartDrivers action: %s", settingsBody)
|
||||||
}
|
}
|
||||||
if !strings.Contains(body, `USB Black-Box`) {
|
if !strings.Contains(settingsBody, `/api/gpu/nvidia-status`) {
|
||||||
t.Fatalf("tools page missing usb black-box section: %s", body)
|
t.Fatalf("settings page missing nvidia status api usage: %s", settingsBody)
|
||||||
}
|
}
|
||||||
if !strings.Contains(body, `/api/blackbox/status`) {
|
if !strings.Contains(settingsBody, `nvidiaResetGPU(`) {
|
||||||
t.Fatalf("tools page missing black-box status api usage: %s", body)
|
t.Fatalf("settings page missing nvidiaResetGPU action: %s", settingsBody)
|
||||||
}
|
}
|
||||||
if !strings.Contains(body, `NVMe Block Format`) {
|
if !strings.Contains(settingsBody, `id="boot-source-text"`) {
|
||||||
t.Fatalf("tools page missing nvme block format section: %s", body)
|
t.Fatalf("settings page missing boot source field: %s", settingsBody)
|
||||||
}
|
}
|
||||||
if !strings.Contains(body, `/api/tools/nvme-formats`) || !strings.Contains(body, `/api/tools/nvme-format/run`) {
|
if !strings.Contains(settingsBody, `USB Black-Box`) {
|
||||||
t.Fatalf("tools page missing nvme format api usage: %s", body)
|
t.Fatalf("settings page missing usb black-box section: %s", settingsBody)
|
||||||
|
}
|
||||||
|
if !strings.Contains(settingsBody, `/api/blackbox/status`) {
|
||||||
|
t.Fatalf("settings page missing black-box status api usage: %s", settingsBody)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -791,46 +801,45 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestValidatePageRendersNvidiaTargetedStressCard(t *testing.T) {
|
func TestCheckPageRendersGPUSelectionAndNvidiaCards(t *testing.T) {
|
||||||
handler := NewHandler(HandlerOptions{})
|
handler := NewHandler(HandlerOptions{})
|
||||||
rec := httptest.NewRecorder()
|
rec := httptest.NewRecorder()
|
||||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
|
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/check", nil))
|
||||||
if rec.Code != http.StatusOK {
|
if rec.Code != http.StatusOK {
|
||||||
t.Fatalf("status=%d", rec.Code)
|
t.Fatalf("status=%d", rec.Code)
|
||||||
}
|
}
|
||||||
body := rec.Body.String()
|
body := rec.Body.String()
|
||||||
for _, needle := range []string{
|
for _, needle := range []string{
|
||||||
`NVIDIA GPU Targeted Stress`,
|
|
||||||
`nvidia-targeted-stress`,
|
|
||||||
`controlled NVIDIA DCGM load`,
|
|
||||||
`<code>dcgmi diag targeted_stress</code>`,
|
|
||||||
`NVIDIA GPU Selection`,
|
`NVIDIA GPU Selection`,
|
||||||
`All NVIDIA validate tasks use only the GPUs selected here.`,
|
|
||||||
`Select All`,
|
|
||||||
`id="sat-gpu-list"`,
|
`id="sat-gpu-list"`,
|
||||||
|
`Select All`,
|
||||||
|
`id="sat-btn-nvidia"`,
|
||||||
|
`NVIDIA Interconnect (NCCL)`,
|
||||||
|
`NVIDIA Bandwidth (NVBandwidth)`,
|
||||||
|
`Non-destructive`,
|
||||||
} {
|
} {
|
||||||
if !strings.Contains(body, needle) {
|
if !strings.Contains(body, needle) {
|
||||||
t.Fatalf("validate page missing %q: %s", needle, body)
|
t.Fatalf("check page missing %q: %s", needle, body)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestValidatePageRendersNvidiaFabricCardsInValidateMode(t *testing.T) {
|
func TestCheckPageRendersNvidiaFabricCards(t *testing.T) {
|
||||||
handler := NewHandler(HandlerOptions{})
|
handler := NewHandler(HandlerOptions{})
|
||||||
rec := httptest.NewRecorder()
|
rec := httptest.NewRecorder()
|
||||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
|
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/check", nil))
|
||||||
if rec.Code != http.StatusOK {
|
if rec.Code != http.StatusOK {
|
||||||
t.Fatalf("status=%d", rec.Code)
|
t.Fatalf("status=%d", rec.Code)
|
||||||
}
|
}
|
||||||
body := rec.Body.String()
|
body := rec.Body.String()
|
||||||
for _, needle := range []string{
|
for _, needle := range []string{
|
||||||
`NVIDIA Interconnect (NCCL)`,
|
`NVIDIA Interconnect (NCCL)`,
|
||||||
`Validate and Stress:`,
|
|
||||||
`NVIDIA Bandwidth (NVBandwidth)`,
|
`NVIDIA Bandwidth (NVBandwidth)`,
|
||||||
`nvbandwidth runs all built-in tests without a time limit`,
|
`nvbandwidth`,
|
||||||
|
`all_reduce_perf`,
|
||||||
} {
|
} {
|
||||||
if !strings.Contains(body, needle) {
|
if !strings.Contains(body, needle) {
|
||||||
t.Fatalf("validate page missing %q: %s", needle, body)
|
t.Fatalf("check page missing %q: %s", needle, body)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -847,7 +856,6 @@ func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
|
|||||||
`NVIDIA Max Compute Load`,
|
`NVIDIA Max Compute Load`,
|
||||||
`dcgmproftester`,
|
`dcgmproftester`,
|
||||||
`NCCL`,
|
`NCCL`,
|
||||||
`Validate → Stress mode`,
|
|
||||||
`id="burn-gpu-list"`,
|
`id="burn-gpu-list"`,
|
||||||
} {
|
} {
|
||||||
if !strings.Contains(body, needle) {
|
if !strings.Contains(body, needle) {
|
||||||
@@ -1219,7 +1227,8 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
|
|||||||
],
|
],
|
||||||
"services":[
|
"services":[
|
||||||
{"name":"bee-web","status":"active"},
|
{"name":"bee-web","status":"active"},
|
||||||
{"name":"bee-nvidia","status":"inactive"}
|
{"name":"bee-audit","status":"inactive"},
|
||||||
|
{"name":"bee-nvidia","status":"failed"}
|
||||||
]
|
]
|
||||||
}`
|
}`
|
||||||
if err := os.WriteFile(filepath.Join(exportDir, "runtime-health.json"), []byte(health), 0644); err != nil {
|
if err := os.WriteFile(filepath.Join(exportDir, "runtime-health.json"), []byte(health), 0644); err != nil {
|
||||||
@@ -1273,7 +1282,7 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
|
|||||||
`Bee Services`,
|
`Bee Services`,
|
||||||
`CUDA runtime is not ready for GPU SAT.`,
|
`CUDA runtime is not ready for GPU SAT.`,
|
||||||
`Missing: nvidia-smi`,
|
`Missing: nvidia-smi`,
|
||||||
`bee-nvidia=inactive`,
|
`bee-nvidia=failed`,
|
||||||
// Hardware Summary card — component health badges
|
// Hardware Summary card — component health badges
|
||||||
`Hardware Summary`,
|
`Hardware Summary`,
|
||||||
`>CPU<`,
|
`>CPU<`,
|
||||||
|
|||||||
@@ -382,6 +382,40 @@ func executeTaskWithOptions(opts *HandlerOptions, t *Task, j *jobState, ctx cont
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
err = runNVMeFormatTask(ctx, j, t.params.Device, t.params.LBAF)
|
err = runNVMeFormatTask(ctx, j, t.params.Device, t.params.LBAF)
|
||||||
|
case "saa-dmi-write":
|
||||||
|
if len(t.params.SAADmiChanges) == 0 {
|
||||||
|
err = fmt.Errorf("no changes provided")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err = runSAADMIWriteTask(ctx, j, opts.ExportDir, t.params)
|
||||||
|
case "ipmi-fru-write":
|
||||||
|
if len(t.params.FRUChanges) == 0 {
|
||||||
|
err = fmt.Errorf("no changes provided")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err = runIPMIFRUWriteTask(ctx, j, opts.ExportDir, t.params)
|
||||||
|
case "huawei-elabel-write":
|
||||||
|
if len(t.params.HuaweiElabelChanges) == 0 {
|
||||||
|
err = fmt.Errorf("no changes provided")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err = runHuaweiElabelWriteTask(ctx, j, t.params)
|
||||||
|
case "raid-foreign-clear":
|
||||||
|
err = runRAIDForeignClearTask(ctx, j, t.params.RAIDController)
|
||||||
|
case "raid-foreign-import":
|
||||||
|
err = runRAIDForeignImportTask(ctx, j, t.params.RAIDController)
|
||||||
|
case "raid-lsi-create-mirror":
|
||||||
|
if len(t.params.RAIDDevices) < 2 {
|
||||||
|
err = fmt.Errorf("at least 2 drives required")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err = runRAIDLSICreateMirrorTask(ctx, j, t.params.RAIDController, t.params.RAIDDevices)
|
||||||
|
case "raid-vroc-create-mirror":
|
||||||
|
if len(t.params.RAIDDevices) < 2 {
|
||||||
|
err = fmt.Errorf("at least 2 devices required")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err = runRAIDVROCCreateMirrorTask(ctx, j, t.params.RAIDDevices, t.params.RAIDArrayName)
|
||||||
default:
|
default:
|
||||||
j.append("ERROR: unknown target: " + t.Target)
|
j.append("ERROR: unknown target: " + t.Target)
|
||||||
j.finish("unknown target")
|
j.finish("unknown target")
|
||||||
|
|||||||
@@ -140,6 +140,12 @@ type taskParams struct {
|
|||||||
Device string `json:"device,omitempty"` // for install
|
Device string `json:"device,omitempty"` // for install
|
||||||
LBAF int `json:"lbaf,omitempty"`
|
LBAF int `json:"lbaf,omitempty"`
|
||||||
PlatformComponents []string `json:"platform_components,omitempty"`
|
PlatformComponents []string `json:"platform_components,omitempty"`
|
||||||
|
SAADmiChanges []saaChange `json:"saa_dmi_changes,omitempty"`
|
||||||
|
FRUChanges []fruChange `json:"fru_changes,omitempty"`
|
||||||
|
HuaweiElabelChanges []huaweiChange `json:"huawei_elabel_changes,omitempty"`
|
||||||
|
RAIDController int `json:"raid_controller,omitempty"`
|
||||||
|
RAIDDevices []string `json:"raid_devices,omitempty"`
|
||||||
|
RAIDArrayName string `json:"raid_array_name,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type persistedTask struct {
|
type persistedTask struct {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ Generic engineering rules live in `bible/rules/patterns/`.
|
|||||||
| `docs/hardware-ingest-contract.md` | Current Reanimator hardware ingest JSON contract |
|
| `docs/hardware-ingest-contract.md` | Current Reanimator hardware ingest JSON contract |
|
||||||
| `docs/validate-vs-burn.md` | Validate and Validate -> Stress hardware test policy |
|
| `docs/validate-vs-burn.md` | Validate and Validate -> Stress hardware test policy |
|
||||||
| `decisions/` | Architectural decision log, including read-only submodule policy |
|
| `decisions/` | Architectural decision log, including read-only submodule policy |
|
||||||
|
| `proposals/` | RFCs and contract change proposals for Reanimator Core |
|
||||||
|
|
||||||
## Validate Test Matrix
|
## Validate Test Matrix
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,103 @@
|
|||||||
# Backlog
|
# Backlog
|
||||||
|
|
||||||
|
## Сбор SFP-модулей
|
||||||
|
|
||||||
|
**Статус:** не реализовано.
|
||||||
|
|
||||||
|
### Источник данных
|
||||||
|
|
||||||
|
`ethtool -m <iface>` / `ethtool --module-info <iface>` — читает EEPROM SFP/SFP+/QSFP28/QSFP-DD по стандарту MSA (SFF-8472 / SFF-8636).
|
||||||
|
|
||||||
|
Доступные поля из EEPROM:
|
||||||
|
- Идентификатор модуля: `Identifier` (SFP, SFP+, QSFP28, …)
|
||||||
|
- Тип коннектора: `Connector`
|
||||||
|
- Вендор: `Vendor name`, `Vendor OUI`, `Vendor PN`, `Vendor SN`, `Vendor rev`
|
||||||
|
- Оптика: `Wavelength`, `Transceiver type` (10GBase-SR, LR, DAC, …)
|
||||||
|
- Телеметрия DOM (если модуль поддерживает): `Laser tx bias current`, `Transmit avg optical power`, `Receive avg optical power`, `Module temperature`, `Module voltage`
|
||||||
|
- Статус: `Rx power high alarm`, `Tx power low warning`, …
|
||||||
|
|
||||||
|
Для QSFP28 данные повторяются на 4 канала (lane 0–3).
|
||||||
|
|
||||||
|
Инструмент требует root. На bee ISO — доступен (`ethtool` входит в образ).
|
||||||
|
|
||||||
|
### Scope для bee
|
||||||
|
|
||||||
|
1. Собирать список сетевых интерфейсов через `ip -j link show` (только `ether`, без `lo`/VLAN/bond).
|
||||||
|
2. Для каждого интерфейса пробовать `ethtool -m <iface>`. Если модуль отсутствует или не поддерживает EEPROM read — тихо пропускать.
|
||||||
|
3. Связывать интерфейс с PCIe-устройством через `ethtool -i <iface>` → поле `bus-info` (BDF) → сопоставление с `pcie_devices[].slot`.
|
||||||
|
|
||||||
|
### Gap в контракте
|
||||||
|
|
||||||
|
Текущий контракт v2.10 имеет в `pcie_devices[]` скалярные поля:
|
||||||
|
- `sfp_temperature_c`, `sfp_tx_power_dbm`, `sfp_rx_power_dbm`, `sfp_voltage_v`, `sfp_bias_ma`
|
||||||
|
|
||||||
|
Этого **недостаточно**:
|
||||||
|
- Одна NIC-карта может иметь несколько портов — нужен массив, а не скаляр.
|
||||||
|
- Нет полей идентификации модуля (vendor, part_number, serial_number, wavelength, connector).
|
||||||
|
- Нет разбивки по каналам для QSFP28.
|
||||||
|
|
||||||
|
### Предлагаемое расширение контракта
|
||||||
|
|
||||||
|
Добавить в `pcie_devices[]` массив `sfp_modules[]`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"pcie_devices": [
|
||||||
|
{
|
||||||
|
"slot": "0000:3b:00.0",
|
||||||
|
"device_class": "EthernetController",
|
||||||
|
"model": "ConnectX-6 Dx",
|
||||||
|
"sfp_modules": [
|
||||||
|
{
|
||||||
|
"port": 0,
|
||||||
|
"identifier": "QSFP28",
|
||||||
|
"connector": "LC",
|
||||||
|
"vendor": "Mellanox",
|
||||||
|
"part_number": "MFA1A00-C003",
|
||||||
|
"serial_number": "MT2124VS09999",
|
||||||
|
"revision": "A",
|
||||||
|
"wavelength_nm": 850,
|
||||||
|
"transceiver_type": "100GBase-SR4",
|
||||||
|
"temperature_c": 36.4,
|
||||||
|
"voltage_v": 3.29,
|
||||||
|
"tx_power_dbm": -1.8,
|
||||||
|
"rx_power_dbm": -2.1,
|
||||||
|
"bias_ma": 7.2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Поля `sfp_modules[]`:
|
||||||
|
|
||||||
|
| Поле | Тип | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `port` | int | Номер порта на NIC (0-based) |
|
||||||
|
| `identifier` | string | `SFP`, `SFP+`, `QSFP28`, `QSFP-DD`, … |
|
||||||
|
| `connector` | string | `LC`, `MPO`, `DAC`, … |
|
||||||
|
| `vendor` | string | Производитель модуля |
|
||||||
|
| `part_number` | string | Партномер |
|
||||||
|
| `serial_number` | string | Серийный номер |
|
||||||
|
| `revision` | string | Ревизия |
|
||||||
|
| `wavelength_nm` | int | Длина волны, нм |
|
||||||
|
| `transceiver_type` | string | `10GBase-SR`, `100GBase-SR4`, `DAC`, … |
|
||||||
|
| `temperature_c` | float | Температура модуля, °C |
|
||||||
|
| `voltage_v` | float | Напряжение, В |
|
||||||
|
| `tx_power_dbm` | float | TX оптическая мощность, dBm |
|
||||||
|
| `rx_power_dbm` | float | RX оптическая мощность, dBm |
|
||||||
|
| `bias_ma` | float | Bias current, мА |
|
||||||
|
|
||||||
|
Старые скалярные поля `sfp_temperature_c` / `sfp_tx_power_dbm` / `sfp_rx_power_dbm` / `sfp_voltage_v` / `sfp_bias_ma` на уровне `pcie_devices[]` — **вывести из контракта** (deprecated), заменить на `sfp_modules[]`.
|
||||||
|
|
||||||
|
### Порядок реализации
|
||||||
|
|
||||||
|
1. Согласовать расширение контракта с Reanimator Core (bump до v2.11).
|
||||||
|
2. Добавить `ethtool` parser в `audit/internal/collector/` — новый файл `sfp.go`.
|
||||||
|
3. Дополнить schema в `audit/internal/schema/` типом `SFPModule`.
|
||||||
|
4. Добавить `sfp_modules` в `PCIeDevice` в schema.
|
||||||
|
5. Заполнять в NIC-коллекторе: связь интерфейс → BDF → `pcie_devices[].sfp_modules`.
|
||||||
|
6. Показывать в TUI и web UI в разделе PCIe/NIC.
|
||||||
|
|
||||||
## BMC версия через IPMI
|
## BMC версия через IPMI
|
||||||
|
|
||||||
**Статус:** реализовано.
|
**Статус:** реализовано.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
title: Hardware Ingest JSON Contract
|
title: Hardware Ingest JSON Contract
|
||||||
version: "2.10"
|
version: "2.11"
|
||||||
updated: "2026-04-29"
|
updated: "2026-06-19"
|
||||||
maintainer: Reanimator Core
|
maintainer: Reanimator Core
|
||||||
audience: external-integrators, ai-agents
|
audience: external-integrators, ai-agents
|
||||||
language: ru
|
language: ru
|
||||||
@@ -9,7 +9,7 @@ language: ru
|
|||||||
|
|
||||||
# Интеграция с Reanimator: контракт JSON-импорта аппаратного обеспечения
|
# Интеграция с Reanimator: контракт JSON-импорта аппаратного обеспечения
|
||||||
|
|
||||||
Версия: **2.10** · Дата: **2026-04-29**
|
Версия: **2.11** · Дата: **2026-06-19**
|
||||||
|
|
||||||
Документ описывает формат JSON для передачи данных об аппаратном обеспечении серверов в систему **Reanimator** (управление жизненным циклом аппаратного обеспечения).
|
Документ описывает формат JSON для передачи данных об аппаратном обеспечении серверов в систему **Reanimator** (управление жизненным циклом аппаратного обеспечения).
|
||||||
Предназначен для разработчиков смежных систем (Redfish-коллекторов, агентов мониторинга, CMDB-экспортёров) и может быть включён в документацию интегрируемых проектов.
|
Предназначен для разработчиков смежных систем (Redfish-коллекторов, агентов мониторинга, CMDB-экспортёров) и может быть включён в документацию интегрируемых проектов.
|
||||||
@@ -22,6 +22,7 @@ language: ru
|
|||||||
|
|
||||||
| Версия | Дата | Изменения |
|
| Версия | Дата | Изменения |
|
||||||
|--------|------|-----------|
|
|--------|------|-----------|
|
||||||
|
| 2.11 | 2026-06-19 | В `pcie_devices[]` добавлен необязательный массив `sfp_modules[]` с идентификацией и DOM telemetry SFP/QSFP-модулей. Скалярные поля `sfp_temperature_c` / `sfp_tx_power_dbm` / `sfp_rx_power_dbm` / `sfp_voltage_v` / `sfp_bias_ma` помечены как deprecated (принимаются, но `sfp_modules[]` имеет приоритет) |
|
||||||
| 2.10 | 2026-04-29 | Для `hardware.storage[]` добавлены необязательные числовые поля `logical_block_size_bytes`, `physical_block_size_bytes`, `metadata_bytes_per_block` для нормализованного описания формата блока накопителя |
|
| 2.10 | 2026-04-29 | Для `hardware.storage[]` добавлены необязательные числовые поля `logical_block_size_bytes`, `physical_block_size_bytes`, `metadata_bytes_per_block` для нормализованного описания формата блока накопителя |
|
||||||
| 2.9 | 2026-03-19 | Добавлена необязательная секция `hardware.platform_config` — произвольный объект с настройками платформы (BIOS/Redfish); хранится как latest-snapshot per machine |
|
| 2.9 | 2026-03-19 | Добавлена необязательная секция `hardware.platform_config` — произвольный объект с настройками платформы (BIOS/Redfish); хранится как latest-snapshot per machine |
|
||||||
| 2.8 | 2026-03-15 | Поле `location` удалено из всех `sensors.*`; сенсоры передаются только по `name` и измеренным значениям |
|
| 2.8 | 2026-03-15 | Поле `location` удалено из всех `sensors.*`; сенсоры передаются только по `name` и измеренным значениям |
|
||||||
@@ -422,11 +423,12 @@ GET /ingest/hardware/jobs/{job_id}
|
|||||||
| `battery_temperature_c` | float | нет | Температура батареи / supercap, °C |
|
| `battery_temperature_c` | float | нет | Температура батареи / supercap, °C |
|
||||||
| `battery_voltage_v` | float | нет | Напряжение батареи / supercap, В |
|
| `battery_voltage_v` | float | нет | Напряжение батареи / supercap, В |
|
||||||
| `battery_replace_required` | bool | нет | Требуется замена батареи / supercap |
|
| `battery_replace_required` | bool | нет | Требуется замена батареи / supercap |
|
||||||
| `sfp_temperature_c` | float | нет | Температура SFP/optic, °C |
|
| `sfp_temperature_c` | float | нет | Температура SFP/optic, °C *(deprecated since 2.11)* |
|
||||||
| `sfp_tx_power_dbm` | float | нет | TX optical power, dBm |
|
| `sfp_tx_power_dbm` | float | нет | TX optical power, dBm *(deprecated since 2.11)* |
|
||||||
| `sfp_rx_power_dbm` | float | нет | RX optical power, dBm |
|
| `sfp_rx_power_dbm` | float | нет | RX optical power, dBm *(deprecated since 2.11)* |
|
||||||
| `sfp_voltage_v` | float | нет | Напряжение SFP, В |
|
| `sfp_voltage_v` | float | нет | Напряжение SFP, В *(deprecated since 2.11)* |
|
||||||
| `sfp_bias_ma` | float | нет | Bias current SFP, мА |
|
| `sfp_bias_ma` | float | нет | Bias current SFP, мА *(deprecated since 2.11)* |
|
||||||
|
| `sfp_modules` | array | нет | Установленные SFP/QSFP-модули по портам (см. sfp_modules[]) |
|
||||||
| `bdf` | string | нет | Deprecated alias для `slot`; при наличии ingest нормализует его в `slot` |
|
| `bdf` | string | нет | Deprecated alias для `slot`; при наличии ingest нормализует его в `slot` |
|
||||||
| `device_class` | string | нет | Класс устройства (см. список ниже) |
|
| `device_class` | string | нет | Класс устройства (см. список ниже) |
|
||||||
| `manufacturer` | string | нет | Производитель |
|
| `manufacturer` | string | нет | Производитель |
|
||||||
@@ -444,10 +446,43 @@ GET /ingest/hardware/jobs/{job_id}
|
|||||||
`numa_node` передавайте для NIC / InfiniBand / RAID / GPU, когда источник знает CPU/NUMA affinity. Поле сохраняется в snapshot-атрибутах PCIe-компонента и дублируется в telemetry для topology use cases.
|
`numa_node` передавайте для NIC / InfiniBand / RAID / GPU, когда источник знает CPU/NUMA affinity. Поле сохраняется в snapshot-атрибутах PCIe-компонента и дублируется в telemetry для topology use cases.
|
||||||
Поля `temperature_c` и `power_w` используйте для device-level telemetry GPU / accelerator / smart PCIe devices. Они не влияют на идентификацию компонента.
|
Поля `temperature_c` и `power_w` используйте для device-level telemetry GPU / accelerator / smart PCIe devices. Они не влияют на идентификацию компонента.
|
||||||
|
|
||||||
|
**Deprecated поля sfp_\*:** Скалярные поля `sfp_temperature_c`, `sfp_tx_power_dbm`, `sfp_rx_power_dbm`, `sfp_voltage_v`, `sfp_bias_ma` продолжают приниматься, но помечены как deprecated since 2.11. Если в payload одновременно присутствуют `sfp_modules[]` и deprecated sfp_-скаляры — приоритет у `sfp_modules[]`, скаляры игнорируются. Deprecated поля будут удалены в версии 3.0.
|
||||||
|
|
||||||
**Генерация serial_number при отсутствии или `"N/A"`:** `{board_serial}-PCIE-{slot}`, где `slot` для PCIe равен BDF.
|
**Генерация serial_number при отсутствии или `"N/A"`:** `{board_serial}-PCIE-{slot}`, где `slot` для PCIe равен BDF.
|
||||||
|
|
||||||
`slot` — единственный канонический адрес компонента. Для PCIe в `slot` передавайте BDF. Поле `bdf` сохраняется только как переходный alias на входе и не должно использоваться как отдельная координата рядом со `slot`.
|
`slot` — единственный канонический адрес компонента. Для PCIe в `slot` передавайте BDF. Поле `bdf` сохраняется только как переходный alias на входе и не должно использоваться как отдельная координата рядом со `slot`.
|
||||||
|
|
||||||
|
#### pcie_devices[].sfp_modules[]
|
||||||
|
|
||||||
|
Необязательный массив установленных SFP/QSFP-модулей для данного PCIe-устройства. Один элемент — один порт. Используйте для многопортовых NIC (ConnectX-6 Dx, Intel X710, Mellanox HDR и др.).
|
||||||
|
|
||||||
|
| Поле | Тип | Обязательно | Описание |
|
||||||
|
|------|-----|-------------|----------|
|
||||||
|
| `port` | int | **да** | Номер порта на NIC (0-based). Ключ дедупликации внутри устройства |
|
||||||
|
| `identifier` | string | нет | Тип модуля: `SFP`, `SFP+`, `SFP28`, `QSFP+`, `QSFP28`, `QSFP-DD`, `DAC` |
|
||||||
|
| `connector` | string | нет | Тип разъёма: `LC`, `MPO`, `RJ45`, `DAC`, `AOC`, `No separable connector` |
|
||||||
|
| `vendor` | string | нет | Производитель модуля из EEPROM |
|
||||||
|
| `part_number` | string | нет | Партномер из EEPROM |
|
||||||
|
| `serial_number` | string | нет | Серийный номер из EEPROM |
|
||||||
|
| `revision` | string | нет | Ревизия из EEPROM |
|
||||||
|
| `wavelength_nm` | int | нет | Длина волны, нм (0 для DAC/медных кабелей) |
|
||||||
|
| `transceiver_type` | string | нет | `10GBase-SR`, `10GBase-LR`, `25GBase-SR`, `100GBase-SR4`, `DAC`, … |
|
||||||
|
| `temperature_c` | float | нет | Температура модуля, °C (DOM telemetry) |
|
||||||
|
| `voltage_v` | float | нет | Напряжение питания, В (DOM telemetry) |
|
||||||
|
| `tx_power_dbm` | float | нет | TX оптическая мощность, dBm (DOM telemetry) |
|
||||||
|
| `rx_power_dbm` | float | нет | RX оптическая мощность, dBm (DOM telemetry) |
|
||||||
|
| `bias_ma` | float | нет | Bias current, мА (DOM telemetry) |
|
||||||
|
|
||||||
|
**Ключ дедупликации:** `(pcie_devices[].slot, sfp_modules[].port)`.
|
||||||
|
|
||||||
|
**Правила ingest:**
|
||||||
|
- При каждом импорте — полная замена `sfp_modules[]` для данного `pcie_devices[].slot` (upsert всего массива целиком).
|
||||||
|
- Если `sfp_modules` отсутствует или `null` — существующие данные SFP не трогать.
|
||||||
|
- Если `sfp_modules: []` (пустой массив) — трактовать как «модули не обнаружены», очистить сохранённые данные.
|
||||||
|
- Дубли по `port` внутри одного `pcie_devices[]` — невалидны, endpoint возвращает `400` с описанием поля.
|
||||||
|
- Модули без `serial_number` допустимы (многие DAC-кабели не имеют SN); сохраняются по ключу `(slot, port)`.
|
||||||
|
- Изменение `serial_number` или `part_number` модуля на порту создаёт событие `COMPONENT_CHANGED` для PCIe-устройства с описанием «SFP module replaced on port N».
|
||||||
|
|
||||||
**Значения `device_class`:**
|
**Значения `device_class`:**
|
||||||
|
|
||||||
| Значение | Назначение |
|
| Значение | Назначение |
|
||||||
@@ -472,16 +507,47 @@ GET /ingest/hardware/jobs/{job_id}
|
|||||||
"numa_node": 0,
|
"numa_node": 0,
|
||||||
"temperature_c": 48.5,
|
"temperature_c": 48.5,
|
||||||
"power_w": 18.2,
|
"power_w": 18.2,
|
||||||
"sfp_temperature_c": 36.2,
|
|
||||||
"sfp_tx_power_dbm": -1.8,
|
|
||||||
"sfp_rx_power_dbm": -2.1,
|
|
||||||
"device_class": "EthernetController",
|
"device_class": "EthernetController",
|
||||||
"manufacturer": "Intel",
|
"manufacturer": "Mellanox",
|
||||||
"model": "X710 10GbE",
|
"model": "ConnectX-6 Dx",
|
||||||
"serial_number": "K65472-003",
|
"serial_number": "MT2012X12345",
|
||||||
"firmware": "9.20 0x8000d4ae",
|
"firmware": "22.35.2010",
|
||||||
"mac_addresses": ["3c:fd:fe:aa:bb:cc", "3c:fd:fe:aa:bb:cd"],
|
"mac_addresses": ["3c:fd:fe:aa:bb:cc", "3c:fd:fe:aa:bb:cd"],
|
||||||
"status": "OK"
|
"status": "OK",
|
||||||
|
"sfp_modules": [
|
||||||
|
{
|
||||||
|
"port": 0,
|
||||||
|
"identifier": "QSFP28",
|
||||||
|
"connector": "LC",
|
||||||
|
"vendor": "Mellanox",
|
||||||
|
"part_number": "MFA1A00-C003",
|
||||||
|
"serial_number": "MT2124VS09999",
|
||||||
|
"revision": "A",
|
||||||
|
"wavelength_nm": 850,
|
||||||
|
"transceiver_type": "100GBase-SR4",
|
||||||
|
"temperature_c": 36.4,
|
||||||
|
"voltage_v": 3.29,
|
||||||
|
"tx_power_dbm": -1.8,
|
||||||
|
"rx_power_dbm": -2.1,
|
||||||
|
"bias_ma": 7.2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"port": 1,
|
||||||
|
"identifier": "QSFP28",
|
||||||
|
"connector": "LC",
|
||||||
|
"vendor": "Mellanox",
|
||||||
|
"part_number": "MFA1A00-C003",
|
||||||
|
"serial_number": "MT2124VS09998",
|
||||||
|
"revision": "A",
|
||||||
|
"wavelength_nm": 850,
|
||||||
|
"transceiver_type": "100GBase-SR4",
|
||||||
|
"temperature_c": 35.9,
|
||||||
|
"voltage_v": 3.28,
|
||||||
|
"tx_power_dbm": -1.9,
|
||||||
|
"rx_power_dbm": -2.3,
|
||||||
|
"bias_ma": 7.1
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
@@ -793,7 +859,24 @@ PSU без `serial_number` игнорируется.
|
|||||||
"model": "X710 10GbE",
|
"model": "X710 10GbE",
|
||||||
"serial_number": "K65472-003",
|
"serial_number": "K65472-003",
|
||||||
"mac_addresses": ["3c:fd:fe:aa:bb:cc", "3c:fd:fe:aa:bb:cd"],
|
"mac_addresses": ["3c:fd:fe:aa:bb:cc", "3c:fd:fe:aa:bb:cd"],
|
||||||
"status": "OK"
|
"status": "OK",
|
||||||
|
"sfp_modules": [
|
||||||
|
{
|
||||||
|
"port": 0,
|
||||||
|
"identifier": "SFP+",
|
||||||
|
"connector": "LC",
|
||||||
|
"vendor": "Intel",
|
||||||
|
"part_number": "FTLX8574D3BCV-IT",
|
||||||
|
"serial_number": "FNS123456789",
|
||||||
|
"wavelength_nm": 850,
|
||||||
|
"transceiver_type": "10GBase-SR",
|
||||||
|
"temperature_c": 34.1,
|
||||||
|
"voltage_v": 3.30,
|
||||||
|
"tx_power_dbm": -2.5,
|
||||||
|
"rx_power_dbm": -3.0,
|
||||||
|
"bias_ma": 6.8
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"power_supplies": [
|
"power_supplies": [
|
||||||
|
|||||||
201
bible-local/proposals/2026-06-19-sfp-modules-contract-rfc.md
Normal file
201
bible-local/proposals/2026-06-19-sfp-modules-contract-rfc.md
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
# RFC: SFP Module Identity and Telemetry in Hardware Ingest Contract
|
||||||
|
|
||||||
|
**Date:** 2026-06-19
|
||||||
|
**Status:** proposal
|
||||||
|
**Target:** Reanimator Core — ingest schema, validation, and storage
|
||||||
|
**Requested by:** bee (hardware audit LiveCD)
|
||||||
|
**Contract version affected:** 2.10 → 2.11
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Задача для агента
|
||||||
|
|
||||||
|
Внести изменения в контракт ingest и реализовать их в Reanimator Core:
|
||||||
|
|
||||||
|
1. Добавить массив `sfp_modules[]` внутри `pcie_devices[]`.
|
||||||
|
2. Обновить JSON-схему и валидатор ingest endpoint.
|
||||||
|
3. Реализовать хранение/upsert данных SFP-модулей.
|
||||||
|
4. Пометить скалярные поля `sfp_*` на уровне `pcie_devices[]` как deprecated.
|
||||||
|
5. Обновить документ контракта (`hardware-ingest-contract.md`) до версии 2.11.
|
||||||
|
|
||||||
|
Ниже — всё необходимое для принятия решений без дополнительных вопросов.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Контекст: откуда берутся данные
|
||||||
|
|
||||||
|
**Источник в bee:** `ethtool -m <iface>` (читает EEPROM SFP/SFP+/QSFP28/QSFP-DD по стандарту MSA SFF-8472 / SFF-8636).
|
||||||
|
|
||||||
|
**Связь с PCIe:** `ethtool -i <iface>` возвращает `bus-info` = BDF (`0000:3b:00.0`), который совпадает с `pcie_devices[].slot`. Так bee связывает SFP-данные конкретного интерфейса с PCIe-устройством.
|
||||||
|
|
||||||
|
**Один NIC — несколько модулей:** карта ConnectX-6 Dx (2 порта), Intel X710 (4 порта), Mellanox HDR (2 порта). Каждый порт — отдельный `ethtool -m`, отдельный SFP-модуль. Одного скаляра на устройство недостаточно.
|
||||||
|
|
||||||
|
**QSFP28/QSFP-DD:** 4-канальные модули возвращают telemetry отдельно по каждому каналу (lane). В предложенной схеме lane-уровень не включён в первую версию — только агрегированные значения модуля в целом. Расширение до lane-уровня — отдельный RFC если понадобится.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Проблема с текущим контрактом v2.10
|
||||||
|
|
||||||
|
В `pcie_devices[]` есть пять скалярных полей:
|
||||||
|
|
||||||
|
```
|
||||||
|
sfp_temperature_c float
|
||||||
|
sfp_tx_power_dbm float
|
||||||
|
sfp_rx_power_dbm float
|
||||||
|
sfp_voltage_v float
|
||||||
|
sfp_bias_ma float
|
||||||
|
```
|
||||||
|
|
||||||
|
Ограничения:
|
||||||
|
- **Нет идентификации модуля** — vendor, part_number, serial_number, wavelength отсутствуют; модуль нельзя инвентаризировать как самостоятельный компонент.
|
||||||
|
- **Только один набор значений на устройство** — невозможно описать 4-портовый NIC.
|
||||||
|
- **Нет типа модуля** — SFP, QSFP28, DAC-кабель не различаются.
|
||||||
|
- **Нет connector/transceiver_type** — невозможно понять, оптика это или медь.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Предлагаемое изменение схемы
|
||||||
|
|
||||||
|
### Новая структура `sfp_modules[]`
|
||||||
|
|
||||||
|
Добавляется как необязательное поле внутри каждого объекта `pcie_devices[]`.
|
||||||
|
|
||||||
|
```json
|
||||||
|
"pcie_devices": [
|
||||||
|
{
|
||||||
|
"slot": "0000:3b:00.0",
|
||||||
|
"device_class": "EthernetController",
|
||||||
|
"model": "ConnectX-6 Dx",
|
||||||
|
"manufacturer": "Mellanox",
|
||||||
|
"serial_number": "MT2012X12345",
|
||||||
|
"status": "OK",
|
||||||
|
"sfp_modules": [
|
||||||
|
{
|
||||||
|
"port": 0,
|
||||||
|
"identifier": "QSFP28",
|
||||||
|
"connector": "LC",
|
||||||
|
"vendor": "Mellanox",
|
||||||
|
"part_number": "MFA1A00-C003",
|
||||||
|
"serial_number": "MT2124VS09999",
|
||||||
|
"revision": "A",
|
||||||
|
"wavelength_nm": 850,
|
||||||
|
"transceiver_type": "100GBase-SR4",
|
||||||
|
"temperature_c": 36.4,
|
||||||
|
"voltage_v": 3.29,
|
||||||
|
"tx_power_dbm": -1.8,
|
||||||
|
"rx_power_dbm": -2.1,
|
||||||
|
"bias_ma": 7.2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"port": 1,
|
||||||
|
"identifier": "QSFP28",
|
||||||
|
"connector": "LC",
|
||||||
|
"vendor": "Mellanox",
|
||||||
|
"part_number": "MFA1A00-C003",
|
||||||
|
"serial_number": "MT2124VS09998",
|
||||||
|
"revision": "A",
|
||||||
|
"wavelength_nm": 850,
|
||||||
|
"transceiver_type": "100GBase-SR4",
|
||||||
|
"temperature_c": 35.9,
|
||||||
|
"voltage_v": 3.28,
|
||||||
|
"tx_power_dbm": -1.9,
|
||||||
|
"rx_power_dbm": -2.3,
|
||||||
|
"bias_ma": 7.1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Поля `sfp_modules[]`
|
||||||
|
|
||||||
|
| Поле | Тип | Обязательно | Описание |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `port` | int | **да** | Номер порта на NIC (0-based). Ключ дедупликации внутри устройства. |
|
||||||
|
| `identifier` | string | нет | Тип модуля: `SFP`, `SFP+`, `SFP28`, `QSFP+`, `QSFP28`, `QSFP-DD`, `DAC` |
|
||||||
|
| `connector` | string | нет | Тип разъёма: `LC`, `MPO`, `RJ45`, `DAC`, `AOC`, `No separable connector` |
|
||||||
|
| `vendor` | string | нет | Производитель модуля из EEPROM |
|
||||||
|
| `part_number` | string | нет | Партномер из EEPROM |
|
||||||
|
| `serial_number` | string | нет | Серийный номер из EEPROM |
|
||||||
|
| `revision` | string | нет | Ревизия из EEPROM |
|
||||||
|
| `wavelength_nm` | int | нет | Длина волны, нм (0 для DAC/медных кабелей) |
|
||||||
|
| `transceiver_type` | string | нет | `10GBase-SR`, `10GBase-LR`, `25GBase-SR`, `100GBase-SR4`, `DAC`, … |
|
||||||
|
| `temperature_c` | float | нет | Температура модуля, °C (DOM telemetry) |
|
||||||
|
| `voltage_v` | float | нет | Напряжение питания, В (DOM telemetry) |
|
||||||
|
| `tx_power_dbm` | float | нет | TX оптическая мощность, dBm (DOM telemetry) |
|
||||||
|
| `rx_power_dbm` | float | нет | RX оптическая мощность, dBm (DOM telemetry) |
|
||||||
|
| `bias_ma` | float | нет | Bias current, мА (DOM telemetry) |
|
||||||
|
|
||||||
|
**Ключ дедупликации:** `(pcie_device.slot, sfp_modules[].port)`.
|
||||||
|
|
||||||
|
**Модули без серийного номера** — допустимы; многие DAC-кабели не имеют SN. Не игнорировать, сохранять по ключу `(slot, port)`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Deprecated поля
|
||||||
|
|
||||||
|
Следующие поля на уровне `pcie_devices[]` помечаются как **deprecated** начиная с v2.11:
|
||||||
|
|
||||||
|
```
|
||||||
|
sfp_temperature_c
|
||||||
|
sfp_tx_power_dbm
|
||||||
|
sfp_rx_power_dbm
|
||||||
|
sfp_voltage_v
|
||||||
|
sfp_bias_ma
|
||||||
|
```
|
||||||
|
|
||||||
|
**Поведение при получении deprecated полей:**
|
||||||
|
- Продолжать принимать и сохранять (не ломать существующих интеграторов).
|
||||||
|
- Если одновременно присутствуют `sfp_modules[]` и deprecated скаляры — приоритет у `sfp_modules[]`; скаляры игнорируются.
|
||||||
|
- В документации пометить как `deprecated since 2.11, will be removed in 3.0`.
|
||||||
|
|
||||||
|
**Не удалять** deprecated поля из валидации в этом PR — только пометить в документации и changelog.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Правила ingest для `sfp_modules[]`
|
||||||
|
|
||||||
|
- `sfp_modules[]` хранится как snapshot-атрибут PCIe-компонента (аналогично `mac_addresses`).
|
||||||
|
- При каждом импорте — полная замена `sfp_modules[]` для данного `pcie_devices[].slot` (upsert всего массива целиком, не merge по портам).
|
||||||
|
- Если `sfp_modules` отсутствует или `null` — существующие данные SFP не трогать (не затирать).
|
||||||
|
- Если `sfp_modules: []` (пустой массив) — трактовать как «модули не обнаружены», очистить сохранённые данные.
|
||||||
|
- Изменение `serial_number` или `part_number` модуля на порту — создавать событие `COMPONENT_CHANGED` для PCIe-устройства с описанием «SFP module replaced on port N».
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Изменения в документе контракта
|
||||||
|
|
||||||
|
Файл: `bible-local/docs/hardware-ingest-contract.md`
|
||||||
|
|
||||||
|
1. Заголовок версии: `2.10` → `2.11`, дата → `2026-06-19`.
|
||||||
|
2. Добавить в changelog:
|
||||||
|
```
|
||||||
|
| 2.11 | 2026-06-19 | В `pcie_devices[]` добавлен необязательный массив `sfp_modules[]`
|
||||||
|
с идентификацией и DOM telemetry SFP/QSFP-модулей. Скалярные поля
|
||||||
|
sfp_temperature_c / sfp_tx_power_dbm / sfp_rx_power_dbm / sfp_voltage_v /
|
||||||
|
sfp_bias_ma помечены как deprecated (принимаются, но sfp_modules[] имеет приоритет). |
|
||||||
|
```
|
||||||
|
3. В секции `pcie_devices` добавить строку в таблицу полей:
|
||||||
|
```
|
||||||
|
| `sfp_modules` | array | нет | Установленные SFP/QSFP-модули по портам (см. sfp_modules[]) |
|
||||||
|
```
|
||||||
|
4. Добавить подсекцию `#### pcie_devices[].sfp_modules[]` с таблицей полей и примером JSON (из раздела выше).
|
||||||
|
5. Пометить deprecated поля в таблице: добавить суффикс `*(deprecated since 2.11)*`.
|
||||||
|
6. Обновить полный пример JSON — добавить `sfp_modules` к NIC-записи в `pcie_devices`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Что не нужно делать в этом PR
|
||||||
|
|
||||||
|
- Не добавлять lane-level данные QSFP (tx_power_dbm_lane_0 и т.п.) — отдельный RFC.
|
||||||
|
- Не удалять deprecated поля — только пометить.
|
||||||
|
- Не создавать отдельную top-level секцию `network_ports` — данные остаются вложенными в `pcie_devices`.
|
||||||
|
- Не менять логику идентификации PCIe-компонента — `serial_number` SFP-модуля не является ключом для самостоятельного компонента.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Валидация
|
||||||
|
|
||||||
|
Единственное обязательное поле в `sfp_modules[]` — `port` (int, >= 0).
|
||||||
|
Все остальные поля опциональны.
|
||||||
|
Дубли по `port` внутри одного `pcie_devices[]` — невалидны, возвращать `400` с описанием поля.
|
||||||
@@ -9,7 +9,7 @@ NCCL_TESTS_VERSION=2.13.10
|
|||||||
NVCC_VERSION=12.8
|
NVCC_VERSION=12.8
|
||||||
CUBLAS_VERSION=13.1.1.3-1
|
CUBLAS_VERSION=13.1.1.3-1
|
||||||
CUDA_USERSPACE_VERSION=13.0.96-1
|
CUDA_USERSPACE_VERSION=13.0.96-1
|
||||||
DCGM_VERSION=4.5.3-1
|
DCGM_VERSION=4.6.0-1
|
||||||
JOHN_JUMBO_COMMIT=67fcf9fe5a
|
JOHN_JUMBO_COMMIT=67fcf9fe5a
|
||||||
ROCM_VERSION=6.3.4
|
ROCM_VERSION=6.3.4
|
||||||
ROCM_SMI_VERSION=7.4.0.60304-76~22.04
|
ROCM_SMI_VERSION=7.4.0.60304-76~22.04
|
||||||
|
|||||||
@@ -1419,6 +1419,13 @@ rm -rf \
|
|||||||
if [ "$BEE_GPU_VENDOR" != "nvidia" ]; then
|
if [ "$BEE_GPU_VENDOR" != "nvidia" ]; then
|
||||||
rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nvidia-load"
|
rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nvidia-load"
|
||||||
rm -f "${OVERLAY_STAGE_DIR}/etc/systemd/system/bee-nvidia.service"
|
rm -f "${OVERLAY_STAGE_DIR}/etc/systemd/system/bee-nvidia.service"
|
||||||
|
rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-burn"
|
||||||
|
rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-john-gpu-stress"
|
||||||
|
rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress"
|
||||||
|
rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nvidia-recover"
|
||||||
|
rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-dcgmproftester-staggered"
|
||||||
|
rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-check-nvswitch"
|
||||||
|
rm -rf "${OVERLAY_STAGE_DIR}/etc/systemd/system/nvidia-fabricmanager.service.d"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- inject authorized_keys for SSH access ---
|
# --- inject authorized_keys for SSH access ---
|
||||||
@@ -1473,7 +1480,7 @@ cp "${BUILDER_DIR}/smoketest.sh" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smokete
|
|||||||
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
|
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
|
||||||
|
|
||||||
# --- vendor utilities (optional pre-fetched binaries) ---
|
# --- vendor utilities (optional pre-fetched binaries) ---
|
||||||
for tool in storcli64 sas2ircu sas3ircu arcconf ssacli; do
|
for tool in storcli64 sas2ircu sas3ircu arcconf ssacli saa; do
|
||||||
if [ -f "${VENDOR_DIR}/${tool}" ]; then
|
if [ -f "${VENDOR_DIR}/${tool}" ]; then
|
||||||
cp "${VENDOR_DIR}/${tool}" "${OVERLAY_STAGE_DIR}/usr/local/bin/${tool}"
|
cp "${VENDOR_DIR}/${tool}" "${OVERLAY_STAGE_DIR}/usr/local/bin/${tool}"
|
||||||
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/${tool}" || true
|
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/${tool}" || true
|
||||||
@@ -1483,6 +1490,17 @@ for tool in storcli64 sas2ircu sas3ircu arcconf ssacli; do
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# saa companion directories — saa searches for these relative to CWD (/usr/local/bin)
|
||||||
|
for saa_subdir in acpica_bin ExternalData tool stunnel GO_SNMP; do
|
||||||
|
if [ -d "${VENDOR_DIR}/${saa_subdir}" ]; then
|
||||||
|
cp -r "${VENDOR_DIR}/${saa_subdir}" "${OVERLAY_STAGE_DIR}/usr/local/bin/"
|
||||||
|
find "${OVERLAY_STAGE_DIR}/usr/local/bin/${saa_subdir}" -type f -exec chmod +x {} \; 2>/dev/null || true
|
||||||
|
echo "vendor saa: ${saa_subdir}/ (included)"
|
||||||
|
else
|
||||||
|
echo "vendor saa: ${saa_subdir}/ (not found, skipped)"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
# --- NVIDIA kernel modules and userspace libs ---
|
# --- NVIDIA kernel modules and userspace libs ---
|
||||||
if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
|
if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
|
||||||
run_step "build NVIDIA ${NVIDIA_DRIVER_VERSION} modules" "40-nvidia-module" \
|
run_step "build NVIDIA ${NVIDIA_DRIVER_VERSION} modules" "40-nvidia-module" \
|
||||||
|
|||||||
@@ -1,3 +1,8 @@
|
|||||||
|
[Unit]
|
||||||
|
# bee-nvidia.service loads the NVIDIA kernel modules; fabricmanager must wait
|
||||||
|
# for them to be fully initialized before attempting to open /dev/nvidiactl.
|
||||||
|
After=bee-nvidia.service
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
# Skip fabricmanager on systems without NVSwitch hardware.
|
# Skip fabricmanager on systems without NVSwitch hardware.
|
||||||
# ExecCondition exits 1-254 → unit is silently skipped (inactive, not failed).
|
# ExecCondition exits 1-254 → unit is silently skipped (inactive, not failed).
|
||||||
|
|||||||
@@ -67,7 +67,8 @@ if ! mkdir "${LOCK_DIR}" 2>/dev/null; then
|
|||||||
fi
|
fi
|
||||||
trap 'rmdir "${LOCK_DIR}" >/dev/null 2>&1 || true' EXIT
|
trap 'rmdir "${LOCK_DIR}" >/dev/null 2>&1 || true' EXIT
|
||||||
|
|
||||||
if have_nvidia_gpu && [ ! -e /dev/nvidia0 ]; then
|
GPU_VENDOR=$(cat /etc/bee-gpu-vendor 2>/dev/null || echo "")
|
||||||
|
if [ "$GPU_VENDOR" = "nvidia" ] && have_nvidia_gpu && [ ! -e /dev/nvidia0 ]; then
|
||||||
log_event "NVIDIA GPU detected but /dev/nvidia0 is missing"
|
log_event "NVIDIA GPU detected but /dev/nvidia0 is missing"
|
||||||
restart_service bee-nvidia.service || true
|
restart_service bee-nvidia.service || true
|
||||||
fi
|
fi
|
||||||
|
|||||||
1131
iso/vendor/ExternalData/SMCIPID.txt
vendored
Normal file
1131
iso/vendor/ExternalData/SMCIPID.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2333
iso/vendor/ExternalData/VENID.txt
vendored
Normal file
2333
iso/vendor/ExternalData/VENID.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
37
iso/vendor/ExternalData/supportAutoDST
vendored
Normal file
37
iso/vendor/ExternalData/supportAutoDST
vendored
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
(UTC-10:00) Aleutian Islands
|
||||||
|
(UTC-09:00) Alaska
|
||||||
|
(UTC-08:00) Baja California
|
||||||
|
(UTC-08:00) Pacific Time (US & Canada)
|
||||||
|
(UTC-07:00) Mountain Time (US & Canada)
|
||||||
|
(UTC-06:00) Central Time (US & Canada)
|
||||||
|
(UTC-06:00) Easter Island
|
||||||
|
(UTC-05:00) Eastern Time (US & Canada)
|
||||||
|
(UTC-05:00) Haiti
|
||||||
|
(UTC-05:00) Havana
|
||||||
|
(UTC-05:00) Indiana (East)
|
||||||
|
(UTC-05:00) Turks and Caicos
|
||||||
|
(UTC-04:00) Asuncion
|
||||||
|
(UTC-04:00) Atlantic Time (Canada)
|
||||||
|
(UTC-04:00) Santiago
|
||||||
|
(UTC-03:30) Newfoundland
|
||||||
|
(UTC-03:00) Saint Pierre and Miquelon
|
||||||
|
(UTC-01:00) Azores
|
||||||
|
(UTC+00:00) Dublin, Edinburgh, Lisbon, London
|
||||||
|
(UTC+01:00) Casablanca
|
||||||
|
(UTC+01:00) Amsterdam, Berlin, Bern, Rome, Stockholm, Vienna
|
||||||
|
(UTC+01:00) Belgrade, Bratislava, Budapest, Ljubljana, Prague
|
||||||
|
(UTC+01:00) Brussels, Copenhagen, Madrid, Paris
|
||||||
|
(UTC+01:00) Sarajevo, Skopje, Warsaw, Zagreb
|
||||||
|
(UTC+02:00) Athens, Bucharest
|
||||||
|
(UTC+02:00) Beirut
|
||||||
|
(UTC+02:00) Chisinau
|
||||||
|
(UTC+02:00) Gaza, Hebron
|
||||||
|
(UTC+02:00) Helsinki, Kyiv, Riga, Sofia, Tallinn, Vilnius
|
||||||
|
(UTC+02:00) Jerusalem
|
||||||
|
(UTC+09:30) Adelaide
|
||||||
|
(UTC+10:00) Canberra, Melbourne, Sydney
|
||||||
|
(UTC+10:00) Hobart
|
||||||
|
(UTC+10:30) Lord Howe Island
|
||||||
|
(UTC+11:00) Norfolk Island
|
||||||
|
(UTC+12:00) Auckland, Wellington
|
||||||
|
(UTC+12:45) Chatham Islands
|
||||||
139
iso/vendor/ExternalData/timezone.txt
vendored
Normal file
139
iso/vendor/ExternalData/timezone.txt
vendored
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
(UTC-12:00) International Date Line West
|
||||||
|
(UTC-11:00) Coordinated Universal Time-11
|
||||||
|
(UTC-10:00) Aleutian Islands
|
||||||
|
(UTC-10:00) Hawaii
|
||||||
|
(UTC-09:30) Marquesas Islands
|
||||||
|
(UTC-09:00) Alaska
|
||||||
|
(UTC-09:00) Coordinated Universal Time-09
|
||||||
|
(UTC-08:00) Baja California
|
||||||
|
(UTC-08:00) Coordinated Universal Time-08
|
||||||
|
(UTC-08:00) Pacific Time (US & Canada)
|
||||||
|
(UTC-07:00) Arizona
|
||||||
|
(UTC-07:00) Chihuahua, La Paz, Mazatlan
|
||||||
|
(UTC-07:00) Mountain Time (US & Canada)
|
||||||
|
(UTC-07:00) Yukon
|
||||||
|
(UTC-06:00) Central America
|
||||||
|
(UTC-06:00) Central Time (US & Canada)
|
||||||
|
(UTC-06:00) Easter Island
|
||||||
|
(UTC-06:00) Guadalajara, Mexico City, Monterrey
|
||||||
|
(UTC-06:00) Saskatchewan
|
||||||
|
(UTC-05:00) Bogota, Lima, Quito, Rio Branco
|
||||||
|
(UTC-05:00) Chetumal
|
||||||
|
(UTC-05:00) Eastern Time (US & Canada)
|
||||||
|
(UTC-05:00) Haiti
|
||||||
|
(UTC-05:00) Havana
|
||||||
|
(UTC-05:00) Indiana (East)
|
||||||
|
(UTC-05:00) Turks and Caicos
|
||||||
|
(UTC-04:00) Atlantic Time (Canada)
|
||||||
|
(UTC-04:00) Caracas
|
||||||
|
(UTC-04:00) Cuiaba
|
||||||
|
(UTC-04:00) Georgetown, La Paz, Manaus, San Juan
|
||||||
|
(UTC-04:00) Santiago
|
||||||
|
(UTC-03:30) Newfoundland
|
||||||
|
(UTC-03:00) Asuncion
|
||||||
|
(UTC-03:00) Araguaina
|
||||||
|
(UTC-03:00) Brasilia
|
||||||
|
(UTC-03:00) Cayenne, Fortaleza
|
||||||
|
(UTC-03:00) City of Buenos Aires
|
||||||
|
(UTC-03:00) Greenland
|
||||||
|
(UTC-03:00) Montevideo
|
||||||
|
(UTC-03:00) Punta Arenas
|
||||||
|
(UTC-03:00) Saint Pierre and Miquelon
|
||||||
|
(UTC-03:00) Salvador
|
||||||
|
(UTC-02:00) Coordinated Universal Time-02
|
||||||
|
(UTC-01:00) Azores
|
||||||
|
(UTC-01:00) Cabo Verde Is.
|
||||||
|
(UTC+00:00) Coordinated Universal Time
|
||||||
|
(UTC+00:00) Dublin, Edinburgh, Lisbon, London
|
||||||
|
(UTC+00:00) Monrovia, Reykjavik
|
||||||
|
(UTC+00:00) Sao Tome
|
||||||
|
(UTC+01:00) Casablanca
|
||||||
|
(UTC+01:00) Amsterdam, Berlin, Bern, Rome, Stockholm, Vienna
|
||||||
|
(UTC+01:00) Belgrade, Bratislava, Budapest, Ljubljana, Prague
|
||||||
|
(UTC+01:00) Brussels, Copenhagen, Madrid, Paris
|
||||||
|
(UTC+01:00) Sarajevo, Skopje, Warsaw, Zagreb
|
||||||
|
(UTC+01:00) West Central Africa
|
||||||
|
(UTC+02:00) Amman
|
||||||
|
(UTC+02:00) Athens, Bucharest
|
||||||
|
(UTC+02:00) Beirut
|
||||||
|
(UTC+02:00) Cairo
|
||||||
|
(UTC+02:00) Chisinau
|
||||||
|
(UTC+02:00) Damascus
|
||||||
|
(UTC+02:00) Gaza, Hebron
|
||||||
|
(UTC+02:00) Harare, Pretoria
|
||||||
|
(UTC+02:00) Helsinki, Kyiv, Riga, Sofia, Tallinn, Vilnius
|
||||||
|
(UTC+02:00) Jerusalem
|
||||||
|
(UTC+02:00) Juba
|
||||||
|
(UTC+02:00) Kaliningrad
|
||||||
|
(UTC+02:00) Khartoum
|
||||||
|
(UTC+02:00) Tripoli
|
||||||
|
(UTC+02:00) Windhoek
|
||||||
|
(UTC+03:00) Baghdad
|
||||||
|
(UTC+03:00) Istanbul
|
||||||
|
(UTC+03:00) Kuwait, Riyadh
|
||||||
|
(UTC+03:00) Minsk
|
||||||
|
(UTC+03:00) Moscow, St. Petersburg
|
||||||
|
(UTC+03:00) Nairobi
|
||||||
|
(UTC+03:00) Volgograd
|
||||||
|
(UTC+03:30) Tehran
|
||||||
|
(UTC+04:00) Abu Dhabi, Muscat
|
||||||
|
(UTC+04:00) Astrakhan, Ulyanovsk
|
||||||
|
(UTC+04:00) Baku
|
||||||
|
(UTC+04:00) Izhevsk, Samara
|
||||||
|
(UTC+04:00) Port Louis
|
||||||
|
(UTC+04:00) Saratov
|
||||||
|
(UTC+04:00) Tbilisi
|
||||||
|
(UTC+04:00) Yerevan
|
||||||
|
(UTC+04:30) Kabul
|
||||||
|
(UTC+05:00) Ashgabat, Tashkent
|
||||||
|
(UTC+05:00) Astana
|
||||||
|
(UTC+05:00) Ekaterinburg
|
||||||
|
(UTC+05:00) Islamabad, Karachi
|
||||||
|
(UTC+05:00) Qyzylorda
|
||||||
|
(UTC+05:30) Chennai, Kolkata, Mumbai, New Delhi
|
||||||
|
(UTC+05:30) Sri Jayawardenepura
|
||||||
|
(UTC+05:45) Kathmandu
|
||||||
|
(UTC+06:00) Dhaka
|
||||||
|
(UTC+06:00) Omsk
|
||||||
|
(UTC+06:30) Yangon (Rangoon)
|
||||||
|
(UTC+07:00) Bangkok, Hanoi, Jakarta
|
||||||
|
(UTC+07:00) Barnaul, Gorno-Altaysk
|
||||||
|
(UTC+07:00) Hovd
|
||||||
|
(UTC+07:00) Krasnoyarsk
|
||||||
|
(UTC+07:00) Novosibirsk
|
||||||
|
(UTC+07:00) Tomsk
|
||||||
|
(UTC+08:00) Beijing, Chongqing, Hong Kong, Urumqi
|
||||||
|
(UTC+08:00) Irkutsk
|
||||||
|
(UTC+08:00) Kuala Lumpur, Singapore
|
||||||
|
(UTC+08:00) Perth
|
||||||
|
(UTC+08:00) Taipei
|
||||||
|
(UTC+08:00) Ulaanbaatar
|
||||||
|
(UTC+08:45) Eucla
|
||||||
|
(UTC+09:00) Chita
|
||||||
|
(UTC+09:00) Osaka, Sapporo, Tokyo
|
||||||
|
(UTC+09:00) Pyongyang
|
||||||
|
(UTC+09:00) Seoul
|
||||||
|
(UTC+09:00) Yakutsk
|
||||||
|
(UTC+09:30) Adelaide
|
||||||
|
(UTC+09:30) Darwin
|
||||||
|
(UTC+10:00) Brisbane
|
||||||
|
(UTC+10:00) Canberra, Melbourne, Sydney
|
||||||
|
(UTC+10:00) Guam, Port Moresby
|
||||||
|
(UTC+10:00) Hobart
|
||||||
|
(UTC+10:00) Vladivostok
|
||||||
|
(UTC+10:30) Lord Howe Island
|
||||||
|
(UTC+11:00) Bougainville Island
|
||||||
|
(UTC+11:00) Chokurdakh
|
||||||
|
(UTC+11:00) Magadan
|
||||||
|
(UTC+11:00) Norfolk Island
|
||||||
|
(UTC+11:00) Sakhalin
|
||||||
|
(UTC+11:00) Solomon Is., New Caledonia
|
||||||
|
(UTC+12:00) Anadyr, Petropavlovsk-Kamchatsky
|
||||||
|
(UTC+12:00) Auckland, Wellington
|
||||||
|
(UTC+12:00) Coordinated Universal Time+12
|
||||||
|
(UTC+12:00) Fiji
|
||||||
|
(UTC+12:45) Chatham Islands
|
||||||
|
(UTC+13:00) Coordinated Universal Time+13
|
||||||
|
(UTC+13:00) Nuku'alofa
|
||||||
|
(UTC+13:00) Samoa
|
||||||
|
(UTC+14:00) Kiritimati Island
|
||||||
BIN
iso/vendor/ExternalData/tui.fnt
vendored
Normal file
BIN
iso/vendor/ExternalData/tui.fnt
vendored
Normal file
Binary file not shown.
BIN
iso/vendor/GO_SNMP/AlertServer
vendored
Executable file
BIN
iso/vendor/GO_SNMP/AlertServer
vendored
Executable file
Binary file not shown.
BIN
iso/vendor/acpica_bin/acpidump
vendored
Executable file
BIN
iso/vendor/acpica_bin/acpidump
vendored
Executable file
Binary file not shown.
BIN
iso/vendor/acpica_bin/acpiexec
vendored
Executable file
BIN
iso/vendor/acpica_bin/acpiexec
vendored
Executable file
Binary file not shown.
BIN
iso/vendor/saa
vendored
Executable file
BIN
iso/vendor/saa
vendored
Executable file
Binary file not shown.
BIN
iso/vendor/stunnel/stunnel64
vendored
Executable file
BIN
iso/vendor/stunnel/stunnel64
vendored
Executable file
Binary file not shown.
BIN
iso/vendor/tool/USBController/ASMedia/114xfwdl
vendored
Executable file
BIN
iso/vendor/tool/USBController/ASMedia/114xfwdl
vendored
Executable file
Binary file not shown.
BIN
iso/vendor/tool/gpu/nVidia/x64/nvuflash
vendored
Executable file
BIN
iso/vendor/tool/gpu/nVidia/x64/nvuflash
vendored
Executable file
Binary file not shown.
BIN
iso/vendor/tool/gpu/nVidia/x64/setrom
vendored
Executable file
BIN
iso/vendor/tool/gpu/nVidia/x64/setrom
vendored
Executable file
Binary file not shown.
Reference in New Issue
Block a user