Compare commits

...

5 Commits

Author SHA1 Message Date
Mikhail Chusavitin
11ea640626 power ramp: fix missing step-1 GPU telemetry, add GPU/server power breakdown
Ramp Sequence table's Run 1 row showed "—" for GPU power because the
step-1 fast path (reusing single-card calibration) never populated
PerGPUTelemetry like steps 2+ do. Also add GPU total W / Server itself W
columns and an idle baseline row so server-vs-GPU consumption is visible
per ramp step.
2026-07-01 17:39:58 +03:00
Mikhail Chusavitin
796acdfec1 ipmi fru: add Asset Tag and vendor Extra field write support (in-band)
Product Asset Tag (p 5) and the repeated custom "Extra" fields (Product
Extra p 7, Board Extra b 5/6/7, Chassis Extra c 2/3) from the Inspur FRU
field doc weren't writable — ipmitool prints identically-named lines for
each custom field with no index of its own, so a plain name lookup
couldn't tell them apart. parseFRUOutput now counts occurrences per area
to recover the real index, and the existing area/index round-trip in the
FRU editor write path picks it up automatically. Out-of-band (-H/-U/-P)
writing remains out of scope.

Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
2026-07-01 17:21:26 +03:00
Mikhail Chusavitin
2a7d366e50 bump bible and chart submodules
bible: add chart-based web visual baseline contract
chart: support hardware contract 2.10

Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
2026-07-01 14:56:07 +03:00
Mikhail Chusavitin
5bfaecd417 storage SAT: wait for smartctl self-test completion, add human-readable resource summary
smartctl -t short only launches the self-test and returns immediately
("Testing has begun"); unlike nvme device-self-test --wait, it has no
blocking mode. Validate/Load runs closed the task and produced reports
before the drive actually finished the test. Now poll smartctl -a until
the test completes (or times out) and report the real result.

Also add a per-disk "Resource" section with pseudographic progress bars
for uptime (vs 5y design life), bytes written (vs 1 DWPD x 5y budget),
and bytes read (percent from SMART attribute 242), all rendered in
human-scaled units (days/years, TB/PB) instead of raw hour/byte counts.

Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
2026-07-01 14:54:48 +03:00
Mikhail Chusavitin
8575cf06f8 webui: show all RAID drives per controller and add drive-prepare action
RAID Controller Management previously hid any LSI drive that wasn't
already Frgn/UGood/JBOD, and scoped VROC "free drives" from all system
disks instead of the ones actually wired to the VROC controller's
ports - drives attached directly to the CPU or another HBA could leak
in. Now every drive is listed per its own controller, and LSI drives
not already ready for array creation get a "Prepare" button that
forces them to Unconfigured Good via storcli.

Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
2026-07-01 13:32:03 +03:00
11 changed files with 530 additions and 39 deletions

View File

@@ -4008,14 +4008,23 @@ func renderPowerBenchReport(result NvidiaPowerBenchResult) string {
idleW = result.ServerPower.IdleW
}
// Build header: Run | GPU 0 | GPU 1 | ... | Server wall W | Per GPU wall W | Platform eff.
// Build header: Run | GPU 0 | GPU 1 | ... | GPU total W | Server itself W | Server wall W | Per GPU wall W | Platform eff.
headers := []string{"Run"}
for _, idx := range allGPUIndices {
headers = append(headers, fmt.Sprintf("GPU %d W", idx))
}
headers = append(headers, "Server wall W", "Per GPU wall W", "Platform eff.")
headers = append(headers, "GPU total W", "Server itself W", "Server wall W", "Per GPU wall W", "Platform eff.")
var rampRows [][]string
if idleW > 0 {
idleRow := []string{"0 (idle)"}
for range allGPUIndices {
idleRow = append(idleRow, "—")
}
// No load: GPU total is negligible, all draw is the server's own baseline.
idleRow = append(idleRow, "—", fmt.Sprintf("%.0f", idleW), fmt.Sprintf("%.0f", idleW), "—", "—")
rampRows = append(rampRows, idleRow)
}
for _, step := range result.RampSteps {
row := []string{fmt.Sprintf("%d", step.StepIndex)}
for _, idx := range allGPUIndices {
@@ -4036,6 +4045,16 @@ func renderPowerBenchReport(result NvidiaPowerBenchResult) string {
}
row = append(row, gpuPwr)
}
// GPU total W = sum of observed GPU power (nvidia-smi)
gpuTotal := "—"
if step.TotalObservedPowerW > 0 {
gpuTotal = fmt.Sprintf("%.0f", step.TotalObservedPowerW)
}
// Server itself W = server wall power minus GPU total (non-GPU baseline draw)
serverItself := "—"
if step.ServerLoadedW > 0 && step.TotalObservedPowerW > 0 {
serverItself = fmt.Sprintf("%.0f", step.ServerLoadedW-step.TotalObservedPowerW)
}
// Server wall W
serverWall := "—"
if step.ServerLoadedW > 0 {
@@ -4055,7 +4074,7 @@ func renderPowerBenchReport(result NvidiaPowerBenchResult) string {
}
platEff = fmt.Sprintf("%.2f", eff)
}
row = append(row, serverWall, perGPUWall, platEff)
row = append(row, gpuTotal, serverItself, serverWall, perGPUWall, platEff)
rampRows = append(rampRows, row)
}
b.WriteString(fmtMDTable(headers, rampRows))
@@ -4617,6 +4636,8 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
ramp.AvgFanRPM = singleRun.AvgFanRPM
ramp.AvgFanDutyCyclePct = singleRun.AvgFanDutyCyclePct
}
firstSummary := firstCalib.Summary
ramp.PerGPUTelemetry = map[int]*BenchmarkTelemetrySummary{firstIdx: &firstSummary}
if !firstCalib.Completed {
ramp.Status = "FAILED"
ramp.Notes = append(ramp.Notes, fmt.Sprintf("GPU %d did not complete single-card %s", firstIdx, benchmarkPowerEngineLabel(benchmarkPowerEngine())))

View File

@@ -746,6 +746,25 @@ func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, e
key := filepath.Base(devPath) + "_" + strings.ReplaceAll(job.name, "-", "_")
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
// smartctl -t short only launches the self-test on the drive firmware and
// returns immediately ("Testing has begun"); unlike `nvme device-self-test
// --wait`, smartctl has no blocking mode, so we must poll the drive
// ourselves until the self-test actually finishes.
if job.name == "smartctl-self-test-short" && err == nil {
statusName := "smartctl-self-test-status"
statusOut := waitForSmartctlSelfTest(ctx, verboseLog, devPath, logFunc)
deviceOutputs[statusName] = statusOut
statusFile := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+2, statusName)
if writeErr := os.WriteFile(filepath.Join(runDir, statusFile), statusOut, 0644); writeErr != nil {
return "", writeErr
}
sStatus, sRC := classifySATResult(statusName, statusOut, nil)
stats.Add(sStatus)
sKey := filepath.Base(devPath) + "_" + strings.ReplaceAll(statusName, "-", "_")
fmt.Fprintf(&summary, "%s_rc=%d\n", sKey, sRC)
fmt.Fprintf(&summary, "%s_status=%s\n", sKey, sStatus)
}
}
reportText := GenerateDiskReportText(index+1, devPath, deviceOutputs, time.Now().UTC())
_ = os.WriteFile(filepath.Join(runDir, "disk-"+prefix+"-report.txt"), []byte(reportText), 0644)
@@ -1181,6 +1200,42 @@ func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string
return out, err
}
// smartctlSelfTestPollInterval/Timeout bound how long we poll the drive after
// launching `smartctl -t short`, which SMART/ATA specs put at ~2 minutes.
const (
smartctlSelfTestPollInterval = 5 * time.Second
smartctlSelfTestTimeout = 4 * time.Minute
)
// waitForSmartctlSelfTest polls `smartctl -a` until the short self-test
// started on devPath finishes (or the timeout/context elapses) and returns
// the final output, which reflects the actual test result rather than the
// "Testing has begun" launch acknowledgement.
func waitForSmartctlSelfTest(ctx context.Context, verboseLog, devPath string, logFunc func(string)) []byte {
deadline := time.Now().Add(smartctlSelfTestTimeout)
var last []byte
for {
out, _ := runSATCommandCtx(ctx, verboseLog, "smartctl-self-test-status", []string{"smartctl", "-a", devPath}, nil, nil)
last = out
if ctx.Err() != nil {
return last
}
lower := bytes.ToLower(out)
if !bytes.Contains(lower, []byte("self-test routine in progress")) &&
!bytes.Contains(lower, []byte("% of test remaining")) {
return last
}
if time.Now().After(deadline) {
return last
}
select {
case <-ctx.Done():
return last
case <-time.After(smartctlSelfTestPollInterval):
}
}
}
func listStorageDevices() ([]string, error) {
out, err := satExecCommand("lsblk", "-dn", "-o", "NAME,TYPE,TRAN").Output()
if err != nil {

View File

@@ -158,6 +158,17 @@ func writeNVMeReport(b *strings.Builder, outputs map[string][]byte) {
writeField(b, "Media Errors", formatUint(me))
writeField(b, "Error Log Entries", formatUint(nel))
capacityBytes := ctrl.TotalCap
if capacityBytes == 0 {
capacityBytes = ctrl.NVMCap
}
writeResourceSection(b, resourceInfo{
powerOnHours: poh,
writtenBytes: uint64(nvmeU64(sl.DataUnitsWritten)) * 512000,
readBytes: uint64(nvmeU64(sl.DataUnitsRead)) * 512000,
capacityBytes: capacityBytes,
})
if selfTest := outputs["nvme-device-self-test"]; len(selfTest) > 0 {
writeSectionHeader(b, "Self-Test")
result := parseSelfTestResult(string(selfTest))
@@ -168,7 +179,7 @@ func writeNVMeReport(b *strings.Builder, outputs map[string][]byte) {
// ── SATA / SAS (smartctl) ────────────────────────────────────────────────────
var (
smartHealthRE = regexp.MustCompile(`(?i)SMART overall-health self-assessment test result:\s*(\S+)`)
smartHealthRE = regexp.MustCompile(`(?i)SMART overall-health self-assessment test result:\s*(\S+)`)
smartAttrLineRE = regexp.MustCompile(
`^\s*(\d{1,3})\s+(\S+)\s+0x[0-9a-fA-F]+\s+(\d{1,3})\s+(\d{1,3})\s+(\d{1,3})\s+\S+\s+\S+\s+\S+\s+(.+?)\s*$`,
)
@@ -205,8 +216,10 @@ func writeSATAReport(b *strings.Builder, outputs map[string][]byte) {
if m := smartFirmwareRE.FindStringSubmatch(text); m != nil {
writeField(b, "Firmware", strings.TrimSpace(m[1]))
}
var capacityBytes uint64
if m := smartCapacityRE.FindStringSubmatch(text); m != nil {
cap := strings.TrimSpace(m[1])
capacityBytes = parseLeadingUint(cap)
// trim everything after "[" if present (e.g. "500,107,862,016 bytes [500 GB]")
if idx := strings.Index(cap, "["); idx > 0 {
cap = strings.TrimSpace(cap[idx+1:])
@@ -233,7 +246,36 @@ func writeSATAReport(b *strings.Builder, outputs map[string][]byte) {
}
}
if selfTest := outputs["smartctl-self-test-short"]; len(selfTest) > 0 {
var poh, writtenLBAs, readLBAs uint64
var readValue int
hasReadValue := false
for _, a := range attrs {
switch a.ID {
case 9: // Power_On_Hours
poh = parseLeadingUint(a.Raw)
case 241: // Total_LBAs_Written
writtenLBAs = parseLeadingUint(a.Raw)
case 242: // Total_LBAs_Read
readLBAs = parseLeadingUint(a.Raw)
readValue = a.Value
hasReadValue = true
}
}
const sataSectorBytes = 512
writeResourceSection(b, resourceInfo{
powerOnHours: poh,
writtenBytes: writtenLBAs * sataSectorBytes,
readBytes: readLBAs * sataSectorBytes,
capacityBytes: capacityBytes,
readPercent: 100 - readValue,
hasReadPercent: hasReadValue,
})
selfTest := outputs["smartctl-self-test-status"]
if len(selfTest) == 0 {
selfTest = outputs["smartctl-self-test-short"]
}
if len(selfTest) > 0 {
writeSectionHeader(b, "Self-Test")
result := parseSelfTestResult(string(selfTest))
writeField(b, "Result", result)
@@ -274,29 +316,45 @@ func parseSMARTAttrs(text string) []smartAttr {
return attrs
}
// parseSelfTestResult extracts a one-line summary from nvme device-self-test
// or smartctl -t short output.
// parseSelfTestResult extracts a one-line summary from nvme device-self-test,
// smartctl -a (post-completion status), or smartctl -t short (launch ack) output.
func parseSelfTestResult(text string) string {
text = strings.TrimSpace(text)
if text == "" {
return "no output"
}
lines := strings.Split(text, "\n")
// smartctl -a: "Self-test execution status: ( 0)\n\tThe previous
// self-test routine completed\n\twithout error ..." — the description
// wraps onto following indented, colon-free continuation lines.
for i, line := range lines {
if strings.Contains(strings.ToLower(line), "self-test execution status") {
parts := []string{strings.TrimSpace(line)}
for j := i + 1; j < len(lines) && j < i+4; j++ {
cont := strings.TrimSpace(lines[j])
if cont == "" || strings.Contains(cont, ":") {
break
}
parts = append(parts, cont)
}
return strings.Join(parts, " ")
}
}
// nvme device-self-test: look for "Short Device Self-Test Status : 0x0" or similar
for _, line := range strings.Split(text, "\n") {
for _, line := range lines {
l := strings.ToLower(line)
if strings.Contains(l, "self-test status") || strings.Contains(l, "self test status") {
return strings.TrimSpace(line)
}
}
// smartctl -t short: "Testing has begun" or "Short BGST started"
for _, line := range strings.Split(text, "\n") {
for _, line := range lines {
l := strings.ToLower(line)
if strings.Contains(l, "testing has begun") || strings.Contains(l, "started") || strings.Contains(l, "complete") {
return strings.TrimSpace(line)
}
}
// fallback: last non-empty line
lines := strings.Split(strings.TrimSpace(text), "\n")
for i := len(lines) - 1; i >= 0; i-- {
if s := strings.TrimSpace(lines[i]); s != "" {
return s
@@ -305,6 +363,115 @@ func parseSelfTestResult(text string) string {
return "done"
}
// ── Resource (pseudographic usage bars) ────────────────────────────────────────
// designLifeYears/dwpd model the drive's rated endurance: 1 drive-write-per-day
// for 5 years, the baseline enterprise endurance spec used when the vendor's
// own TBW/DWPD rating isn't available from SMART/NVMe data.
const (
designLifeYears = 5
dwpd = 1.0
)
type resourceInfo struct {
powerOnHours uint64
writtenBytes uint64
readBytes uint64
capacityBytes uint64
readPercent int // only meaningful when hasReadPercent
hasReadPercent bool // true when the source SMART attribute exposes a normalized read-wear value
}
func writeResourceSection(b *strings.Builder, r resourceInfo) {
writeSectionHeader(b, "Resource")
const maxLifeHours = designLifeYears * 365 * 24
upFrac := float64(r.powerOnHours) / float64(maxLifeHours)
fmt.Fprintf(b, " %-9s %s %s / %s (%s)\n",
"Uptime", progressBar(upFrac, 24), formatHoursHuman(r.powerOnHours), formatHoursHuman(maxLifeHours), formatPercent(upFrac*100))
if r.capacityBytes > 0 {
maxWritten := float64(r.capacityBytes) * dwpd * designLifeYears * 365
wFrac := float64(r.writtenBytes) / maxWritten
fmt.Fprintf(b, " %-9s %s %s / %s (%s, %g DWPD×%dy)\n",
"Written", progressBar(wFrac, 24), formatBytesHuman(float64(r.writtenBytes)), formatBytesHuman(maxWritten), formatPercent(wFrac*100), dwpd, designLifeYears)
} else {
fmt.Fprintf(b, " %-9s %s\n", "Written", formatBytesHuman(float64(r.writtenBytes)))
}
if r.hasReadPercent {
fmt.Fprintf(b, " %-9s %s %s (%d%%)\n",
"Read", progressBar(float64(r.readPercent)/100, 24), formatBytesHuman(float64(r.readBytes)), r.readPercent)
} else {
fmt.Fprintf(b, " %-9s %s\n", "Read", formatBytesHuman(float64(r.readBytes)))
}
}
// progressBar renders a fixed-width pseudographic bar, e.g. "[######------]".
func progressBar(frac float64, width int) string {
if math.IsNaN(frac) || frac < 0 {
frac = 0
}
if frac > 1 {
frac = 1
}
filled := int(math.Round(frac * float64(width)))
return "[" + strings.Repeat("#", filled) + strings.Repeat("-", width-filled) + "]"
}
// formatBytesHuman renders a decimal (SI) human-readable byte size, e.g. "1.23 TB".
func formatBytesHuman(n float64) string {
units := []string{"B", "KB", "MB", "GB", "TB", "PB"}
i := 0
for n >= 1000 && i < len(units)-1 {
n /= 1000
i++
}
if i == 0 {
return fmt.Sprintf("%.0f %s", n, units[i])
}
return fmt.Sprintf("%.2f %s", n, units[i])
}
// formatHoursHuman renders an hour count as a human-scaled duration (hours,
// days, or years) so uptimes don't show as raw four/five-digit hour counts.
func formatHoursHuman(hours uint64) string {
if hours < 48 {
return fmt.Sprintf("%d h", hours)
}
days := float64(hours) / 24
if days < 365 {
return fmt.Sprintf("%.0f d", days)
}
years := days / 365
if years == math.Trunc(years) {
return fmt.Sprintf("%.0f y", years)
}
return fmt.Sprintf("%.1f y", years)
}
// formatPercent renders a percentage with extra precision below 1% (e.g.
// "0.03%"), where a rounded "0%" would hide any usage at all.
func formatPercent(pct float64) string {
if pct > 0 && pct < 1 {
return fmt.Sprintf("%.2f%%", pct)
}
return fmt.Sprintf("%.0f%%", pct)
}
// parseLeadingUint parses the leading run of digits/commas in s (e.g. from a
// SMART raw value or "500,107,862,016 bytes") into a uint64, ignoring the rest.
func parseLeadingUint(s string) uint64 {
s = strings.TrimSpace(s)
end := 0
for end < len(s) && (s[end] >= '0' && s[end] <= '9' || s[end] == ',') {
end++
}
digits := strings.ReplaceAll(s[:end], ",", "")
n, _ := strconv.ParseUint(digits, 10, 64)
return n
}
// ── Formatting helpers ────────────────────────────────────────────────────────
func writeSectionHeader(b *strings.Builder, title string) {

View File

@@ -37,26 +37,41 @@ var fruEditableFields = map[string]struct {
"Chassis Part Number": {"c", 0},
"Chassis Serial Number": {"c", 1},
"Chassis Serial": {"c", 1},
"Chassis Extra": {"c", 2},
// Board — vendor doc names and ipmitool abbreviated names
"Board Manufacturer": {"b", 0},
"Board Mfg": {"b", 0},
"Board Product Name": {"b", 1},
"Board Product": {"b", 1},
"Board Manufacturer": {"b", 0},
"Board Mfg": {"b", 0},
"Board Product Name": {"b", 1},
"Board Product": {"b", 1},
"Board Serial Number": {"b", 2},
"Board Serial": {"b", 2},
"Board Part Number": {"b", 3},
"Board Serial": {"b", 2},
"Board Part Number": {"b", 3},
// Product — vendor doc names and ipmitool abbreviated names
"Product Manufacturer": {"p", 0},
"Product Name": {"p", 1},
"Product Part Number": {"p", 2},
"Product Version": {"p", 3},
"Product Manufacturer": {"p", 0},
"Product Name": {"p", 1},
"Product Part Number": {"p", 2},
"Product Version": {"p", 3},
"Product Serial Number": {"p", 4},
"Product Serial": {"p", 4},
"Product Serial": {"p", 4},
"Product Asset Tag": {"p", 5},
}
// fruExtraBaseIndex gives the starting ipmitool field index for each area's
// repeated "<Area> Extra" custom fields, per the vendor FRU field doc (Chassis
// extra fields start at 2, Board at 5, Product at 7). ipmitool fru print
// emits one identically-named line per custom field, so parseFRUOutput
// counts occurrences to recover the real index for each one.
var fruExtraBaseIndex = map[string]struct {
Area string
Base int
}{
"Chassis Extra": {"c", 2},
"Board Extra": {"b", 5},
"Product Extra": {"p", 7},
}
func parseFRUOutput(output string) []fruField {
var fields []fruField
extraSeen := map[string]int{}
for _, line := range strings.Split(output, "\n") {
// Lines look like: " Field Name : value"
trimmed := strings.TrimLeft(line, " \t")
@@ -64,33 +79,32 @@ func parseFRUOutput(output string) []fruField {
continue
}
colon := strings.Index(trimmed, " : ")
valueOffset := 3
if colon < 0 {
// try ": " with no leading space before colon
colon = strings.Index(trimmed, ": ")
valueOffset = 2
if colon < 0 {
continue
}
name := strings.TrimSpace(trimmed[:colon])
value := strings.TrimSpace(trimmed[colon+2:])
if name == "" {
continue
}
editable, area, idx := fruFieldMeta(name)
fields = append(fields, fruField{Name: name, Value: value, Editable: editable, Area: area, Index: idx})
continue
}
name := strings.TrimSpace(trimmed[:colon])
value := strings.TrimSpace(trimmed[colon+3:])
value := strings.TrimSpace(trimmed[colon+valueOffset:])
if name == "" {
continue
}
editable, area, idx := fruFieldMeta(name)
editable, area, idx := fruFieldMeta(name, extraSeen)
fields = append(fields, fruField{Name: name, Value: value, Editable: editable, Area: area, Index: idx})
}
return fields
}
func fruFieldMeta(name string) (editable bool, area string, index int) {
func fruFieldMeta(name string, extraSeen map[string]int) (editable bool, area string, index int) {
if e, ok := fruExtraBaseIndex[name]; ok {
idx := e.Base + extraSeen[name]
extraSeen[name]++
return true, e.Area, idx
}
if e, ok := fruEditableFields[name]; ok {
return true, e.Area, e.Index
}
@@ -201,4 +215,3 @@ func runIPMIFRUWriteTask(ctx context.Context, j *jobState, exportDir string, p t
}
return nil
}

View File

@@ -0,0 +1,59 @@
package webui
import "testing"
func TestParseFRUOutputExtraFields(t *testing.T) {
// Realistic ipmitool fru print output: repeated "<Area> Extra" lines
// (one per custom field) must resolve to sequential indices per the
// vendor FRU doc (Chassis Extra starts at 2, Board Extra at 5, Product
// Extra at 7), not all collapse onto the same index.
out := `
Product Manufacturer : Inspur
Product Name : NF5280M6
Product Part Number : PN123
Product Version : 1.0
Product Serial : SN123
Product Asset Tag : ASSET01
Product Extra : custom-p1
Board Mfg : Inspur
Board Product : BoardX
Board Serial : BSN1
Board Part Number : BPN1
Board Extra : custom-b1
Board Extra : custom-b2
Board Extra : custom-b3
Chassis Part Number : CPN1
Chassis Serial : CSN1
Chassis Extra : front-half
Chassis Extra : back-half
`
fields := parseFRUOutput(out)
byName := map[string][]fruField{}
for _, f := range fields {
byName[f.Name] = append(byName[f.Name], f)
}
assertMeta := func(name string, occurrence int, wantArea string, wantIndex int) {
t.Helper()
list := byName[name]
if occurrence >= len(list) {
t.Fatalf("expected occurrence %d of %q, got %d entries", occurrence, name, len(list))
}
f := list[occurrence]
if f.Area != wantArea || f.Index != wantIndex {
t.Errorf("%s[%d] = area:%q index:%d, want area:%q index:%d", name, occurrence, f.Area, f.Index, wantArea, wantIndex)
}
if !f.Editable {
t.Errorf("%s[%d] expected editable", name, occurrence)
}
}
assertMeta("Product Asset Tag", 0, "p", 5)
assertMeta("Product Extra", 0, "p", 7)
assertMeta("Board Extra", 0, "b", 5)
assertMeta("Board Extra", 1, "b", 6)
assertMeta("Board Extra", 2, "b", 7)
assertMeta("Chassis Extra", 0, "c", 2)
assertMeta("Chassis Extra", 1, "c", 3)
}

View File

@@ -38,6 +38,7 @@ type raidControllerInfo struct {
Model string `json:"model"`
ForeignDrives []raidDriveInfo `json:"foreign_drives"`
FreeDrives []raidDriveInfo `json:"free_drives"`
AllDrives []raidDriveInfo `json:"all_drives"`
Arrays []raidArrayInfo `json:"arrays,omitempty"`
}
@@ -97,6 +98,7 @@ func detectLSIControllers() []raidControllerInfo {
Model: c.ResponseData.Basics.Model,
ForeignDrives: []raidDriveInfo{},
FreeDrives: []raidDriveInfo{},
AllDrives: []raidDriveInfo{},
}
if ctrl.Model == "" {
ctrl.Model = fmt.Sprintf("LSI Controller %d", ctrl.Index)
@@ -111,6 +113,7 @@ func detectLSIControllers() []raidControllerInfo {
SizeGB: raidParseHumanSizeGB(d.Size),
Serial: strings.TrimSpace(d.SN),
}
ctrl.AllDrives = append(ctrl.AllDrives, info)
switch strings.TrimSpace(d.State) {
case "Frgn":
ctrl.ForeignDrives = append(ctrl.ForeignDrives, info)
@@ -168,6 +171,30 @@ func parseRAIDMDStat(raw string) []mdStatEntry {
return entries
}
// raidVROCPortRx matches lines like " Port2 : /dev/sda (SERIAL123)"
// or " Port3 : - no device attached -" from `mdadm --detail-platform`.
var raidVROCPortRx = regexp.MustCompile(`^\s*Port\d+\s*:\s*(\S+)`)
// parseVROCPorts returns the block device basenames (e.g. "sda") that are
// physically wired to the VROC I/O controller's ports, per `mdadm
// --detail-platform` output. Drives attached directly to the CPU (or to a
// separate HBA) rather than through this controller's ports are excluded.
func parseVROCPorts(raw string) map[string]bool {
ports := map[string]bool{}
for _, line := range strings.Split(raw, "\n") {
m := raidVROCPortRx.FindStringSubmatch(line)
if m == nil {
continue
}
dev := m[1]
if !strings.HasPrefix(dev, "/dev/") {
continue
}
ports[strings.TrimPrefix(dev, "/dev/")] = true
}
return ports
}
func detectVROCController() *raidControllerInfo {
out, err := exec.Command("mdadm", "--detail-platform").CombinedOutput()
if err != nil && len(out) == 0 {
@@ -191,8 +218,16 @@ func detectVROCController() *raidControllerInfo {
Model: "Intel VROC",
ForeignDrives: []raidDriveInfo{},
FreeDrives: []raidDriveInfo{},
AllDrives: []raidDriveInfo{},
}
ports := parseVROCPorts(string(out))
// Some mdadm builds omit the "Port" lines from --detail-platform. When
// we can't determine which drives are actually wired to this
// controller, fall back to showing every disk not already in an array
// rather than hiding everything.
portsKnown := len(ports) > 0
inArray := map[string]bool{}
raw, err := os.ReadFile("/proc/mdstat")
if err == nil {
@@ -222,15 +257,25 @@ func detectVROCController() *raidControllerInfo {
}
if json.Unmarshal(lsblkOut, &lsblkDoc) == nil {
for _, d := range lsblkDoc.BlockDevices {
if d.Type != "disk" || inArray[d.Name] {
// Only consider disks wired to this controller's ports -
// drives attached directly to the CPU (or another
// controller) never show up as VROC ports and are skipped.
if d.Type != "disk" || (portsKnown && !ports[d.Name]) {
continue
}
ctrl.FreeDrives = append(ctrl.FreeDrives, raidDriveInfo{
info := raidDriveInfo{
Device: "/dev/" + d.Name,
Model: strings.TrimSpace(d.Model),
Serial: strings.TrimSpace(d.Serial),
State: "available",
})
}
if inArray[d.Name] {
info.State = "member"
}
ctrl.AllDrives = append(ctrl.AllDrives, info)
if info.State == "available" {
ctrl.FreeDrives = append(ctrl.FreeDrives, info)
}
}
}
}
@@ -348,6 +393,38 @@ func (h *handler) handleAPIRAIDCreateMirror(w http.ResponseWriter, r *http.Reque
writeJSON(w, map[string]string{"task_id": t.ID})
}
func (h *handler) handleAPIRAIDPrepareDrive(w http.ResponseWriter, r *http.Request) {
var req struct {
ControllerID string `json:"controller_id"`
Slot string `json:"slot"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid JSON")
return
}
ctrlIdx, ok := parseLSIControllerIndex(req.ControllerID)
if !ok {
writeError(w, http.StatusBadRequest, "invalid controller_id")
return
}
if _, _, ok := parseRAIDSlot(req.Slot); !ok {
writeError(w, http.StatusBadRequest, "invalid slot")
return
}
t := &Task{
ID: newJobID("raid-lsi-prepare-drive"),
Name: fmt.Sprintf("Prepare drive %s (LSI ctrl %d)", req.Slot, ctrlIdx),
Target: "raid-lsi-prepare-drive",
Priority: defaultTaskPriority("raid-lsi-prepare-drive", taskParams{}),
Status: TaskPending,
CreatedAt: time.Now(),
params: taskParams{RAIDController: ctrlIdx, RAIDSlot: req.Slot},
}
globalQueue.enqueue(t)
writeJSON(w, map[string]string{"task_id": t.ID})
}
func parseLSIControllerIndex(id string) (int, bool) {
if !strings.HasPrefix(id, "lsi-") {
return 0, false
@@ -385,6 +462,34 @@ func runRAIDLSICreateMirrorTask(ctx context.Context, j *jobState, ctrl int, driv
return streamCmdJob(j, cmd)
}
// parseRAIDSlot splits a storcli "EID:Slt" identifier (e.g. "252:0") into
// enclosure and slot numbers.
func parseRAIDSlot(slot string) (eid int, slt int, ok bool) {
parts := strings.SplitN(strings.TrimSpace(slot), ":", 2)
if len(parts) != 2 {
return 0, 0, false
}
eid, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
slt, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 != nil || err2 != nil {
return 0, 0, false
}
return eid, slt, true
}
func runRAIDPrepareDriveTask(ctx context.Context, j *jobState, ctrl int, slot string) error {
eid, slt, ok := parseRAIDSlot(slot)
if !ok {
return fmt.Errorf("invalid slot %q", slot)
}
j.append(fmt.Sprintf("Preparing drive %s on controller %d (set good, force)...", slot, ctrl))
cmd := exec.CommandContext(ctx, "storcli64",
fmt.Sprintf("/c%d/e%d/s%d", ctrl, eid, slt),
"set", "good", "force",
)
return streamCmdJob(j, cmd)
}
func runRAIDVROCCreateMirrorTask(ctx context.Context, j *jobState, devices []string, arrayName string) error {
if arrayName == "" {
arrayName = "bee-mirror0"
@@ -507,6 +612,7 @@ function raidRenderController(c, idx) {
html += '</div></div>';
}
html += raidRenderAllDrives(c, idx);
html += raidRenderMirrorSection(c, idx, 'lsi');
}
@@ -529,12 +635,71 @@ function raidRenderController(c, idx) {
html += '</table>';
}
html += raidRenderAllDrives(c, idx);
html += raidRenderMirrorSection(c, idx, 'vroc');
}
return html;
}
var RAID_READY_STATES = {'UGood': true, 'JBOD': true, 'available': true};
var RAID_NO_PREPARE_STATES = {'UGood': true, 'JBOD': true, 'Frgn': true, 'Onln': true, 'Msng': true};
function raidRenderAllDrives(c, idx) {
var drives = c.all_drives || [];
var isLSI = c.type === 'lsi';
if (drives.length === 0) {
return '<p style="font-size:13px;color:var(--muted);margin-bottom:12px">No drives detected on this controller.</p>';
}
var html = '<div style="font-size:12px;font-weight:600;color:var(--muted);margin-bottom:6px;text-transform:uppercase;letter-spacing:.04em">All Drives on This Controller</div>';
html += '<table style="margin-bottom:14px"><tr><th>' + (isLSI ? 'Slot' : 'Device') + '</th><th>Model</th><th>Size</th><th>State</th>' + (isLSI ? '<th></th>' : '') + '</tr>';
drives.forEach(function(d) {
var ready = !!RAID_READY_STATES[d.state];
var badgeClass = ready ? 'badge-ok' : 'badge-warn';
var actionCell = '';
if (isLSI && !RAID_NO_PREPARE_STATES[d.state]) {
actionCell = '<td><button class="btn btn-sm btn-secondary" onclick="raidPrepareDrive(\'' + escHtml(c.id) + '\',\'' + escHtml(d.slot) + '\',this)">Prepare</button></td>';
} else if (isLSI) {
actionCell = '<td></td>';
}
html += '<tr>'
+ '<td style="font-family:monospace">' + escHtml(isLSI ? d.slot : d.device) + '</td>'
+ '<td>' + escHtml(d.model||'—') + (d.serial ? ' [' + escHtml(d.serial) + ']' : '') + '</td>'
+ '<td>' + (d.size_gb > 0 ? Math.round(d.size_gb) + ' GB' : '—') + '</td>'
+ '<td><span class="badge ' + badgeClass + '">' + escHtml(d.state||'—') + '</span></td>'
+ actionCell
+ '</tr>';
});
html += '</table>';
return html;
}
function raidPrepareDrive(ctrlID, slot, btn) {
if (!confirm('Prepare drive ' + slot + ' on ' + ctrlID + ' for array creation?\n\nThis forces the drive into Unconfigured Good state. If it currently belongs to a virtual drive or holds data, that data will become inaccessible.')) {
return;
}
var original = btn ? btn.textContent : '';
if (btn) { btn.disabled = true; btn.textContent = 'Preparing...'; }
raidShowOutput('Prepare drive ' + slot, '', '');
fetch('/api/tools/raid/prepare-drive', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({controller_id: ctrlID, slot: slot})
})
.then(function(r) { return r.json(); })
.then(function(d) {
if (d.error) throw new Error(d.error);
raidStreamTask(d.task_id, 'Prepare drive ' + slot, function() {
if (btn) { btn.disabled = false; btn.textContent = original; }
raidLoad();
});
})
.catch(function(e) {
raidShowOutput('Error', 'failed', e.message);
if (btn) { btn.disabled = false; btn.textContent = original; }
});
}
function raidRenderMirrorSection(c, idx, kind) {
var free = c.free_drives || [];
var html = '<div style="font-size:12px;font-weight:600;color:var(--muted);margin-bottom:6px;text-transform:uppercase;letter-spacing:.04em">Create RAID 1 Mirror</div>';
@@ -683,6 +848,9 @@ function raidStreamTask(taskID, taskName, onDone) {
}
window.raidLoad = raidLoad;
window.raidForeignAction = raidForeignAction;
window.raidCreateMirror = raidCreateMirror;
window.raidPrepareDrive = raidPrepareDrive;
raidLoad();
})();
</script>`

View File

@@ -323,6 +323,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
mux.HandleFunc("GET /api/tools/raid/status", h.handleAPIRAIDStatus)
mux.HandleFunc("POST /api/tools/raid/foreign", h.handleAPIRAIDForeignAction)
mux.HandleFunc("POST /api/tools/raid/create-mirror", h.handleAPIRAIDCreateMirror)
mux.HandleFunc("POST /api/tools/raid/prepare-drive", h.handleAPIRAIDPrepareDrive)
// GPU presence / tools
mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)

View File

@@ -410,6 +410,12 @@ func executeTaskWithOptions(opts *HandlerOptions, t *Task, j *jobState, ctx cont
break
}
err = runRAIDLSICreateMirrorTask(ctx, j, t.params.RAIDController, t.params.RAIDDevices)
case "raid-lsi-prepare-drive":
if strings.TrimSpace(t.params.RAIDSlot) == "" {
err = fmt.Errorf("no drive slot provided")
break
}
err = runRAIDPrepareDriveTask(ctx, j, t.params.RAIDController, t.params.RAIDSlot)
case "raid-vroc-create-mirror":
if len(t.params.RAIDDevices) < 2 {
err = fmt.Errorf("at least 2 devices required")

View File

@@ -146,6 +146,7 @@ type taskParams struct {
RAIDController int `json:"raid_controller,omitempty"`
RAIDDevices []string `json:"raid_devices,omitempty"`
RAIDArrayName string `json:"raid_array_name,omitempty"`
RAIDSlot string `json:"raid_slot,omitempty"`
}
type persistedTask struct {

2
bible

Submodule bible updated: 1977730d93...d2600f1279