Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5b98005d5d | |||
| 33bc275da2 | |||
| 11ea640626 | |||
| 796acdfec1 | |||
| 2a7d366e50 | |||
| 5bfaecd417 | |||
| 8575cf06f8 |
@@ -766,7 +766,7 @@ func parseMDAdmPlatformLicense(raw string) *string {
|
||||
|
||||
func queryDeviceSerial(devPath string) string {
|
||||
if out, err := exec.Command("nvme", "id-ctrl", devPath, "-o", "json").Output(); err == nil {
|
||||
var ctrl nvmeIDCtrl
|
||||
var ctrl NVMeIDCtrl
|
||||
if json.Unmarshal(out, &ctrl) == nil {
|
||||
if v := cleanDMIValue(strings.TrimSpace(ctrl.SerialNumber)); v != "" {
|
||||
return v
|
||||
|
||||
@@ -84,16 +84,19 @@ func collectStorage() []schema.HardwareStorage {
|
||||
return result
|
||||
}
|
||||
|
||||
// jsonInt64 accepts both a bare JSON number and a JSON-quoted number string.
|
||||
// lsblk -J emits LOG-SEC / PHY-SEC as integers on util-linux ≥ 2.37 (Debian 12)
|
||||
// but older versions emit them as strings. This type handles both.
|
||||
type jsonInt64 int64
|
||||
// JSONInt64 accepts a bare JSON number (512), a JSON-quoted number string
|
||||
// ("512" — lsblk -J on util-linux < 2.37, and nvme-cli for large 64-bit
|
||||
// counters that would lose precision as JS numbers), or a {"lo":n,"hi":n}
|
||||
// object (128-bit NVMe counters on some nvme-cli versions; hi is ignored as
|
||||
// no real counter exceeds 64 bits). Shared by lsblk and nvme-cli JSON output
|
||||
// across the collector and the human-readable disk report.
|
||||
type JSONInt64 int64
|
||||
|
||||
func (j *jsonInt64) UnmarshalJSON(data []byte) error {
|
||||
func (j *JSONInt64) UnmarshalJSON(data []byte) error {
|
||||
// bare number: 512
|
||||
var n int64
|
||||
if err := json.Unmarshal(data, &n); err == nil {
|
||||
*j = jsonInt64(n)
|
||||
*j = JSONInt64(n)
|
||||
return nil
|
||||
}
|
||||
// quoted string: "512"
|
||||
@@ -101,24 +104,32 @@ func (j *jsonInt64) UnmarshalJSON(data []byte) error {
|
||||
if err := json.Unmarshal(data, &s); err == nil {
|
||||
n, err := strconv.ParseInt(strings.TrimSpace(s), 10, 64)
|
||||
if err == nil {
|
||||
*j = jsonInt64(n)
|
||||
*j = JSONInt64(n)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// {"lo":n,"hi":n} 128-bit counter object
|
||||
var obj struct {
|
||||
Lo int64 `json:"lo"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &obj); err == nil {
|
||||
*j = JSONInt64(obj.Lo)
|
||||
return nil
|
||||
}
|
||||
return nil // null or unexpected type — leave zero
|
||||
}
|
||||
|
||||
// lsblkDevice is a minimal lsblk JSON record.
|
||||
type lsblkDevice struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Size string `json:"size"`
|
||||
Serial string `json:"serial"`
|
||||
Model string `json:"model"`
|
||||
Tran string `json:"tran"`
|
||||
Hctl string `json:"hctl"`
|
||||
LogSec jsonInt64 `json:"log-sec"`
|
||||
PhySec jsonInt64 `json:"phy-sec"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Size string `json:"size"`
|
||||
Serial string `json:"serial"`
|
||||
Model string `json:"model"`
|
||||
Tran string `json:"tran"`
|
||||
Hctl string `json:"hctl"`
|
||||
LogSec JSONInt64 `json:"log-sec"`
|
||||
PhySec JSONInt64 `json:"phy-sec"`
|
||||
}
|
||||
|
||||
type lsblkRoot struct {
|
||||
@@ -423,32 +434,36 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
|
||||
return s
|
||||
}
|
||||
|
||||
// nvmeSmartLog is the subset of `nvme smart-log -o json` output we care about.
|
||||
// nvme-cli emits most counters as JSON strings (e.g. "power_on_hours":"49"),
|
||||
// so all numeric fields use jsonInt64 which accepts both bare numbers and
|
||||
// quoted strings. Field names match nvme-cli JSON output, not NVMe spec prose.
|
||||
type nvmeSmartLog struct {
|
||||
CriticalWarning jsonInt64 `json:"critical_warning"`
|
||||
PercentageUsed jsonInt64 `json:"percent_used"`
|
||||
AvailableSpare jsonInt64 `json:"avail_spare"`
|
||||
SpareThreshold jsonInt64 `json:"spare_thresh"`
|
||||
Temperature jsonInt64 `json:"temperature"`
|
||||
PowerOnHours jsonInt64 `json:"power_on_hours"`
|
||||
PowerCycles jsonInt64 `json:"power_cycles"`
|
||||
UnsafeShutdowns jsonInt64 `json:"unsafe_shutdowns"`
|
||||
DataUnitsRead jsonInt64 `json:"data_units_read"`
|
||||
DataUnitsWritten jsonInt64 `json:"data_units_written"`
|
||||
ControllerBusy jsonInt64 `json:"controller_busy_time"`
|
||||
MediaErrors jsonInt64 `json:"media_errors"`
|
||||
NumErrLogEntries jsonInt64 `json:"num_err_log_entries"`
|
||||
// NVMeSmartLog is the subset of `nvme smart-log -o json` output shared by the
|
||||
// structured collector and the human-readable disk report. nvme-cli emits
|
||||
// most counters as JSON strings (e.g. "power_on_hours":"49") or, on some
|
||||
// versions, as {"lo":n,"hi":n} objects — all numeric fields use JSONInt64,
|
||||
// which accepts bare numbers, quoted strings, and lo/hi objects. Field names
|
||||
// match nvme-cli JSON output, not NVMe spec prose.
|
||||
type NVMeSmartLog struct {
|
||||
CriticalWarning JSONInt64 `json:"critical_warning"`
|
||||
PercentageUsed JSONInt64 `json:"percent_used"`
|
||||
AvailableSpare JSONInt64 `json:"avail_spare"`
|
||||
SpareThreshold JSONInt64 `json:"spare_thresh"`
|
||||
Temperature JSONInt64 `json:"temperature"`
|
||||
PowerOnHours JSONInt64 `json:"power_on_hours"`
|
||||
PowerCycles JSONInt64 `json:"power_cycles"`
|
||||
UnsafeShutdowns JSONInt64 `json:"unsafe_shutdowns"`
|
||||
DataUnitsRead JSONInt64 `json:"data_units_read"`
|
||||
DataUnitsWritten JSONInt64 `json:"data_units_written"`
|
||||
ControllerBusy JSONInt64 `json:"controller_busy_time"`
|
||||
MediaErrors JSONInt64 `json:"media_errors"`
|
||||
NumErrLogEntries JSONInt64 `json:"num_err_log_entries"`
|
||||
}
|
||||
|
||||
// nvmeIDCtrl is the subset of `nvme id-ctrl -o json` output.
|
||||
type nvmeIDCtrl struct {
|
||||
ModelNumber string `json:"mn"`
|
||||
SerialNumber string `json:"sn"`
|
||||
FirmwareRev string `json:"fr"`
|
||||
TotalCapacity int64 `json:"tnvmcap"`
|
||||
// NVMeIDCtrl is the subset of `nvme id-ctrl -o json` output shared by the
|
||||
// structured collector and the human-readable disk report.
|
||||
type NVMeIDCtrl struct {
|
||||
ModelNumber string `json:"mn"`
|
||||
SerialNumber string `json:"sn"`
|
||||
FirmwareRev string `json:"fr"`
|
||||
TotalCapacity JSONInt64 `json:"tnvmcap"`
|
||||
NVMCapacity JSONInt64 `json:"nvmcap"`
|
||||
}
|
||||
|
||||
func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage {
|
||||
@@ -481,7 +496,7 @@ func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage {
|
||||
|
||||
// id-ctrl: model, serial, firmware, capacity
|
||||
if out, err := exec.Command("nvme", "id-ctrl", devPath, "-o", "json").Output(); err == nil {
|
||||
var ctrl nvmeIDCtrl
|
||||
var ctrl NVMeIDCtrl
|
||||
if json.Unmarshal(out, &ctrl) == nil {
|
||||
if v := cleanDMIValue(strings.TrimSpace(ctrl.ModelNumber)); v != "" {
|
||||
s.Model = &v
|
||||
@@ -502,7 +517,7 @@ func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage {
|
||||
|
||||
// smart-log: wear telemetry
|
||||
if out, err := exec.Command("nvme", "smart-log", devPath, "-o", "json").Output(); err == nil {
|
||||
var log nvmeSmartLog
|
||||
var log NVMeSmartLog
|
||||
if json.Unmarshal(out, &log) == nil {
|
||||
if log.PowerOnHours > 0 {
|
||||
v := int64(log.PowerOnHours)
|
||||
|
||||
@@ -56,7 +56,7 @@ func TestJsonInt64UnmarshalBothFormats(t *testing.T) {
|
||||
{`null`, 0},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
var v jsonInt64
|
||||
var v JSONInt64
|
||||
if err := v.UnmarshalJSON([]byte(tc.json)); err != nil {
|
||||
t.Fatalf("UnmarshalJSON(%s): unexpected error %v", tc.json, err)
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
|
||||
// TestNVMeSmartLogUnmarshal verifies that nvme-cli JSON output (where most
|
||||
// counters are quoted strings and field names differ from NVMe spec prose)
|
||||
// is correctly parsed into nvmeSmartLog.
|
||||
// is correctly parsed into NVMeSmartLog.
|
||||
func TestNVMeSmartLogUnmarshal(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
@@ -30,7 +30,7 @@ func TestNVMeSmartLogUnmarshal(t *testing.T) {
|
||||
"media_errors": "0",
|
||||
"num_err_log_entries": "0"
|
||||
}`
|
||||
var log nvmeSmartLog
|
||||
var log NVMeSmartLog
|
||||
if err := json.Unmarshal([]byte(raw), &log); err != nil {
|
||||
t.Fatalf("json.Unmarshal failed: %v", err)
|
||||
}
|
||||
|
||||
@@ -4008,14 +4008,23 @@ func renderPowerBenchReport(result NvidiaPowerBenchResult) string {
|
||||
idleW = result.ServerPower.IdleW
|
||||
}
|
||||
|
||||
// Build header: Run | GPU 0 | GPU 1 | ... | Server wall W | Per GPU wall W | Platform eff.
|
||||
// Build header: Run | GPU 0 | GPU 1 | ... | GPU total W | Server itself W | Server wall W | Per GPU wall W | Platform eff.
|
||||
headers := []string{"Run"}
|
||||
for _, idx := range allGPUIndices {
|
||||
headers = append(headers, fmt.Sprintf("GPU %d W", idx))
|
||||
}
|
||||
headers = append(headers, "Server wall W", "Per GPU wall W", "Platform eff.")
|
||||
headers = append(headers, "GPU total W", "Server itself W", "Server wall W", "Per GPU wall W", "Platform eff.")
|
||||
|
||||
var rampRows [][]string
|
||||
if idleW > 0 {
|
||||
idleRow := []string{"0 (idle)"}
|
||||
for range allGPUIndices {
|
||||
idleRow = append(idleRow, "—")
|
||||
}
|
||||
// No load: GPU total is negligible, all draw is the server's own baseline.
|
||||
idleRow = append(idleRow, "—", fmt.Sprintf("%.0f", idleW), fmt.Sprintf("%.0f", idleW), "—", "—")
|
||||
rampRows = append(rampRows, idleRow)
|
||||
}
|
||||
for _, step := range result.RampSteps {
|
||||
row := []string{fmt.Sprintf("%d", step.StepIndex)}
|
||||
for _, idx := range allGPUIndices {
|
||||
@@ -4036,6 +4045,16 @@ func renderPowerBenchReport(result NvidiaPowerBenchResult) string {
|
||||
}
|
||||
row = append(row, gpuPwr)
|
||||
}
|
||||
// GPU total W = sum of observed GPU power (nvidia-smi)
|
||||
gpuTotal := "—"
|
||||
if step.TotalObservedPowerW > 0 {
|
||||
gpuTotal = fmt.Sprintf("%.0f", step.TotalObservedPowerW)
|
||||
}
|
||||
// Server itself W = server wall power minus GPU total (non-GPU baseline draw)
|
||||
serverItself := "—"
|
||||
if step.ServerLoadedW > 0 && step.TotalObservedPowerW > 0 {
|
||||
serverItself = fmt.Sprintf("%.0f", step.ServerLoadedW-step.TotalObservedPowerW)
|
||||
}
|
||||
// Server wall W
|
||||
serverWall := "—"
|
||||
if step.ServerLoadedW > 0 {
|
||||
@@ -4055,7 +4074,7 @@ func renderPowerBenchReport(result NvidiaPowerBenchResult) string {
|
||||
}
|
||||
platEff = fmt.Sprintf("%.2f", eff)
|
||||
}
|
||||
row = append(row, serverWall, perGPUWall, platEff)
|
||||
row = append(row, gpuTotal, serverItself, serverWall, perGPUWall, platEff)
|
||||
rampRows = append(rampRows, row)
|
||||
}
|
||||
b.WriteString(fmtMDTable(headers, rampRows))
|
||||
@@ -4617,6 +4636,8 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
|
||||
ramp.AvgFanRPM = singleRun.AvgFanRPM
|
||||
ramp.AvgFanDutyCyclePct = singleRun.AvgFanDutyCyclePct
|
||||
}
|
||||
firstSummary := firstCalib.Summary
|
||||
ramp.PerGPUTelemetry = map[int]*BenchmarkTelemetrySummary{firstIdx: &firstSummary}
|
||||
if !firstCalib.Completed {
|
||||
ramp.Status = "FAILED"
|
||||
ramp.Notes = append(ramp.Notes, fmt.Sprintf("GPU %d did not complete single-card %s", firstIdx, benchmarkPowerEngineLabel(benchmarkPowerEngine())))
|
||||
|
||||
@@ -746,6 +746,25 @@ func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, e
|
||||
key := filepath.Base(devPath) + "_" + strings.ReplaceAll(job.name, "-", "_")
|
||||
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
|
||||
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
|
||||
|
||||
// smartctl -t short only launches the self-test on the drive firmware and
|
||||
// returns immediately ("Testing has begun"); unlike `nvme device-self-test
|
||||
// --wait`, smartctl has no blocking mode, so we must poll the drive
|
||||
// ourselves until the self-test actually finishes.
|
||||
if job.name == "smartctl-self-test-short" && err == nil {
|
||||
statusName := "smartctl-self-test-status"
|
||||
statusOut := waitForSmartctlSelfTest(ctx, verboseLog, devPath, logFunc)
|
||||
deviceOutputs[statusName] = statusOut
|
||||
statusFile := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+2, statusName)
|
||||
if writeErr := os.WriteFile(filepath.Join(runDir, statusFile), statusOut, 0644); writeErr != nil {
|
||||
return "", writeErr
|
||||
}
|
||||
sStatus, sRC := classifySATResult(statusName, statusOut, nil)
|
||||
stats.Add(sStatus)
|
||||
sKey := filepath.Base(devPath) + "_" + strings.ReplaceAll(statusName, "-", "_")
|
||||
fmt.Fprintf(&summary, "%s_rc=%d\n", sKey, sRC)
|
||||
fmt.Fprintf(&summary, "%s_status=%s\n", sKey, sStatus)
|
||||
}
|
||||
}
|
||||
reportText := GenerateDiskReportText(index+1, devPath, deviceOutputs, time.Now().UTC())
|
||||
_ = os.WriteFile(filepath.Join(runDir, "disk-"+prefix+"-report.txt"), []byte(reportText), 0644)
|
||||
@@ -1181,6 +1200,42 @@ func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string
|
||||
return out, err
|
||||
}
|
||||
|
||||
// smartctlSelfTestPollInterval/Timeout bound how long we poll the drive after
|
||||
// launching `smartctl -t short`, which SMART/ATA specs put at ~2 minutes.
|
||||
const (
|
||||
smartctlSelfTestPollInterval = 5 * time.Second
|
||||
smartctlSelfTestTimeout = 4 * time.Minute
|
||||
)
|
||||
|
||||
// waitForSmartctlSelfTest polls `smartctl -a` until the short self-test
|
||||
// started on devPath finishes (or the timeout/context elapses) and returns
|
||||
// the final output, which reflects the actual test result rather than the
|
||||
// "Testing has begun" launch acknowledgement.
|
||||
func waitForSmartctlSelfTest(ctx context.Context, verboseLog, devPath string, logFunc func(string)) []byte {
|
||||
deadline := time.Now().Add(smartctlSelfTestTimeout)
|
||||
var last []byte
|
||||
for {
|
||||
out, _ := runSATCommandCtx(ctx, verboseLog, "smartctl-self-test-status", []string{"smartctl", "-a", devPath}, nil, nil)
|
||||
last = out
|
||||
if ctx.Err() != nil {
|
||||
return last
|
||||
}
|
||||
lower := bytes.ToLower(out)
|
||||
if !bytes.Contains(lower, []byte("self-test routine in progress")) &&
|
||||
!bytes.Contains(lower, []byte("% of test remaining")) {
|
||||
return last
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return last
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return last
|
||||
case <-time.After(smartctlSelfTestPollInterval):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func listStorageDevices() ([]string, error) {
|
||||
out, err := satExecCommand("lsblk", "-dn", "-o", "NAME,TYPE,TRAN").Output()
|
||||
if err != nil {
|
||||
@@ -1204,7 +1259,7 @@ func storageSATCommands(devPath string, extended bool) []satJob {
|
||||
return jobs
|
||||
}
|
||||
jobs := []satJob{
|
||||
{name: "smartctl-health", cmd: []string{"smartctl", "-H", "-A", devPath}},
|
||||
{name: "smartctl-health", cmd: []string{"smartctl", "-H", "-A", "-i", devPath}},
|
||||
}
|
||||
if extended {
|
||||
jobs = append(jobs, satJob{name: "smartctl-self-test-short", cmd: []string{"smartctl", "-t", "short", devPath}})
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package platform
|
||||
|
||||
import (
|
||||
"bee/audit/internal/collector"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
@@ -39,65 +40,22 @@ func GenerateDiskReportText(index int, devPath string, outputs map[string][]byte
|
||||
|
||||
// ── NVMe ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
type nvmeIdCtrl struct {
|
||||
ModelNumber string `json:"mn"`
|
||||
SerialNumber string `json:"sn"`
|
||||
Firmware string `json:"fr"`
|
||||
TotalCap uint64 `json:"tnvmcap"`
|
||||
NVMCap uint64 `json:"nvmcap"`
|
||||
}
|
||||
|
||||
// nvmeU64 handles both plain JSON numbers and {"lo":n,"hi":n} objects that
|
||||
// some nvme-cli versions emit for 128-bit counters.
|
||||
func nvmeU64(raw json.RawMessage) uint64 {
|
||||
if len(raw) == 0 {
|
||||
return 0
|
||||
}
|
||||
var n uint64
|
||||
if json.Unmarshal(raw, &n) == nil {
|
||||
return n
|
||||
}
|
||||
var obj struct {
|
||||
Lo uint64 `json:"lo"`
|
||||
Hi uint64 `json:"hi"`
|
||||
}
|
||||
if json.Unmarshal(raw, &obj) == nil {
|
||||
return obj.Lo
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type nvmeSmartLogRaw struct {
|
||||
CriticalWarning uint64 `json:"critical_warning"`
|
||||
Temperature json.RawMessage `json:"temperature"`
|
||||
AvailSpare uint64 `json:"avail_spare"`
|
||||
SpareThresh uint64 `json:"spare_thresh"`
|
||||
PercentUsed uint64 `json:"percent_used"`
|
||||
DataUnitsRead json.RawMessage `json:"data_units_read"`
|
||||
DataUnitsWritten json.RawMessage `json:"data_units_written"`
|
||||
PowerCycles json.RawMessage `json:"power_cycles"`
|
||||
PowerOnHours json.RawMessage `json:"power_on_hours"`
|
||||
UnsafeShutdowns json.RawMessage `json:"unsafe_shutdowns"`
|
||||
MediaErrors json.RawMessage `json:"media_errors"`
|
||||
NumErrLogEntries json.RawMessage `json:"num_err_log_entries"`
|
||||
}
|
||||
|
||||
func writeNVMeReport(b *strings.Builder, outputs map[string][]byte) {
|
||||
// id-ctrl
|
||||
var ctrl nvmeIdCtrl
|
||||
var ctrl collector.NVMeIDCtrl
|
||||
if data := outputs["nvme-id-ctrl"]; len(data) > 0 {
|
||||
_ = json.Unmarshal(data, &ctrl)
|
||||
}
|
||||
|
||||
model := strings.TrimSpace(ctrl.ModelNumber)
|
||||
serial := strings.TrimSpace(ctrl.SerialNumber)
|
||||
firmware := strings.TrimSpace(ctrl.Firmware)
|
||||
firmware := strings.TrimSpace(ctrl.FirmwareRev)
|
||||
|
||||
capacityGB := ""
|
||||
if ctrl.TotalCap > 0 {
|
||||
capacityGB = formatCapacityGB(ctrl.TotalCap)
|
||||
} else if ctrl.NVMCap > 0 {
|
||||
capacityGB = formatCapacityGB(ctrl.NVMCap)
|
||||
if ctrl.TotalCapacity > 0 {
|
||||
capacityGB = formatCapacityGB(uint64(ctrl.TotalCapacity))
|
||||
} else if ctrl.NVMCapacity > 0 {
|
||||
capacityGB = formatCapacityGB(uint64(ctrl.NVMCapacity))
|
||||
}
|
||||
|
||||
writeField(b, "Model", model)
|
||||
@@ -113,62 +71,75 @@ func writeNVMeReport(b *strings.Builder, outputs map[string][]byte) {
|
||||
b.WriteString("\n(no SMART data)\n")
|
||||
return
|
||||
}
|
||||
var sl nvmeSmartLogRaw
|
||||
var sl collector.NVMeSmartLog
|
||||
if err := json.Unmarshal(data, &sl); err != nil {
|
||||
fmt.Fprintf(b, "\n(SMART parse error: %v)\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
tempK := nvmeU64(sl.Temperature)
|
||||
tempC := int(tempK) - 273
|
||||
tempC := int(sl.Temperature) - 273
|
||||
if tempC < 0 {
|
||||
tempC = 0
|
||||
}
|
||||
|
||||
critWarn := sl.CriticalWarning
|
||||
critWarnStr := "OK"
|
||||
if critWarn != 0 {
|
||||
critWarnStr = fmt.Sprintf("0x%02X", critWarn)
|
||||
if sl.CriticalWarning != 0 {
|
||||
critWarnStr = fmt.Sprintf("0x%02X", sl.CriticalWarning)
|
||||
}
|
||||
|
||||
poh := nvmeU64(sl.PowerOnHours)
|
||||
pc := nvmeU64(sl.PowerCycles)
|
||||
us := nvmeU64(sl.UnsafeShutdowns)
|
||||
me := nvmeU64(sl.MediaErrors)
|
||||
nel := nvmeU64(sl.NumErrLogEntries)
|
||||
poh := uint64(sl.PowerOnHours)
|
||||
pc := uint64(sl.PowerCycles)
|
||||
us := uint64(sl.UnsafeShutdowns)
|
||||
me := uint64(sl.MediaErrors)
|
||||
nel := uint64(sl.NumErrLogEntries)
|
||||
|
||||
// data_units are in 1000 × 512-byte sectors = 512,000 bytes each
|
||||
dataRead := float64(nvmeU64(sl.DataUnitsRead)) * 512000 / 1e9
|
||||
dataWritten := float64(nvmeU64(sl.DataUnitsWritten)) * 512000 / 1e9
|
||||
readBytes := uint64(sl.DataUnitsRead) * 512000
|
||||
writtenBytes := uint64(sl.DataUnitsWritten) * 512000
|
||||
|
||||
writeSectionHeader(b, "Health")
|
||||
writeField(b, "Temperature", fmt.Sprintf("%d °C", tempC))
|
||||
writeField(b, "Critical Warning", critWarnStr)
|
||||
writeField(b, "Percentage Used", fmt.Sprintf("%d %%", sl.PercentUsed))
|
||||
writeField(b, "Available Spare", fmt.Sprintf("%d %% (threshold: %d %%)", sl.AvailSpare, sl.SpareThresh))
|
||||
writeField(b, "Percentage Used", fmt.Sprintf("%d %%", sl.PercentageUsed))
|
||||
writeField(b, "Available Spare", fmt.Sprintf("%d %% (threshold: %d %%)", sl.AvailableSpare, sl.SpareThreshold))
|
||||
|
||||
writeSectionHeader(b, "Usage")
|
||||
writeField(b, "Power On Hours", fmt.Sprintf("%s h", formatUint(poh)))
|
||||
writeField(b, "Power Cycles", formatUint(pc))
|
||||
writeField(b, "Unsafe Shutdowns", formatUint(us))
|
||||
writeField(b, "Data Written", fmt.Sprintf("%.1f GB", dataWritten))
|
||||
writeField(b, "Data Read", fmt.Sprintf("%.1f GB", dataRead))
|
||||
writeField(b, "Data Written", formatBytesHuman(float64(writtenBytes)))
|
||||
writeField(b, "Data Read", formatBytesHuman(float64(readBytes)))
|
||||
|
||||
writeSectionHeader(b, "Errors")
|
||||
writeField(b, "Media Errors", formatUint(me))
|
||||
writeField(b, "Error Log Entries", formatUint(nel))
|
||||
|
||||
capacityBytes := uint64(ctrl.TotalCapacity)
|
||||
if capacityBytes == 0 {
|
||||
capacityBytes = uint64(ctrl.NVMCapacity)
|
||||
}
|
||||
ri := resourceInfo{
|
||||
powerOnHours: poh,
|
||||
powerCycles: pc,
|
||||
writtenBytes: writtenBytes,
|
||||
readBytes: readBytes,
|
||||
capacityBytes: capacityBytes,
|
||||
}
|
||||
writeResourceSection(b, ri)
|
||||
|
||||
if selfTest := outputs["nvme-device-self-test"]; len(selfTest) > 0 {
|
||||
writeSectionHeader(b, "Self-Test")
|
||||
result := parseSelfTestResult(string(selfTest))
|
||||
writeField(b, "Result", result)
|
||||
}
|
||||
|
||||
writeConclusionSection(b, ri)
|
||||
}
|
||||
|
||||
// ── SATA / SAS (smartctl) ────────────────────────────────────────────────────
|
||||
|
||||
var (
|
||||
smartHealthRE = regexp.MustCompile(`(?i)SMART overall-health self-assessment test result:\s*(\S+)`)
|
||||
smartHealthRE = regexp.MustCompile(`(?i)SMART overall-health self-assessment test result:\s*(\S+)`)
|
||||
smartAttrLineRE = regexp.MustCompile(
|
||||
`^\s*(\d{1,3})\s+(\S+)\s+0x[0-9a-fA-F]+\s+(\d{1,3})\s+(\d{1,3})\s+(\d{1,3})\s+\S+\s+\S+\s+\S+\s+(.+?)\s*$`,
|
||||
)
|
||||
@@ -205,8 +176,10 @@ func writeSATAReport(b *strings.Builder, outputs map[string][]byte) {
|
||||
if m := smartFirmwareRE.FindStringSubmatch(text); m != nil {
|
||||
writeField(b, "Firmware", strings.TrimSpace(m[1]))
|
||||
}
|
||||
var capacityBytes uint64
|
||||
if m := smartCapacityRE.FindStringSubmatch(text); m != nil {
|
||||
cap := strings.TrimSpace(m[1])
|
||||
capacityBytes = parseLeadingUint(cap)
|
||||
// trim everything after "[" if present (e.g. "500,107,862,016 bytes [500 GB]")
|
||||
if idx := strings.Index(cap, "["); idx > 0 {
|
||||
cap = strings.TrimSpace(cap[idx+1:])
|
||||
@@ -233,11 +206,46 @@ func writeSATAReport(b *strings.Builder, outputs map[string][]byte) {
|
||||
}
|
||||
}
|
||||
|
||||
if selfTest := outputs["smartctl-self-test-short"]; len(selfTest) > 0 {
|
||||
var poh, pc, writtenLBAs, readLBAs uint64
|
||||
var readValue int
|
||||
hasReadValue := false
|
||||
for _, a := range attrs {
|
||||
switch a.ID {
|
||||
case 9: // Power_On_Hours
|
||||
poh = parseLeadingUint(a.Raw)
|
||||
case 12: // Power_Cycle_Count
|
||||
pc = parseLeadingUint(a.Raw)
|
||||
case 241: // Total_LBAs_Written
|
||||
writtenLBAs = parseLeadingUint(a.Raw)
|
||||
case 242: // Total_LBAs_Read
|
||||
readLBAs = parseLeadingUint(a.Raw)
|
||||
readValue = a.Value
|
||||
hasReadValue = true
|
||||
}
|
||||
}
|
||||
const sataSectorBytes = 512
|
||||
ri := resourceInfo{
|
||||
powerOnHours: poh,
|
||||
powerCycles: pc,
|
||||
writtenBytes: writtenLBAs * sataSectorBytes,
|
||||
readBytes: readLBAs * sataSectorBytes,
|
||||
capacityBytes: capacityBytes,
|
||||
readPercent: 100 - readValue,
|
||||
hasReadPercent: hasReadValue,
|
||||
}
|
||||
writeResourceSection(b, ri)
|
||||
|
||||
selfTest := outputs["smartctl-self-test-status"]
|
||||
if len(selfTest) == 0 {
|
||||
selfTest = outputs["smartctl-self-test-short"]
|
||||
}
|
||||
if len(selfTest) > 0 {
|
||||
writeSectionHeader(b, "Self-Test")
|
||||
result := parseSelfTestResult(string(selfTest))
|
||||
writeField(b, "Result", result)
|
||||
}
|
||||
|
||||
writeConclusionSection(b, ri)
|
||||
}
|
||||
|
||||
func parseSMARTAttrs(text string) []smartAttr {
|
||||
@@ -274,29 +282,45 @@ func parseSMARTAttrs(text string) []smartAttr {
|
||||
return attrs
|
||||
}
|
||||
|
||||
// parseSelfTestResult extracts a one-line summary from nvme device-self-test
|
||||
// or smartctl -t short output.
|
||||
// parseSelfTestResult extracts a one-line summary from nvme device-self-test,
|
||||
// smartctl -a (post-completion status), or smartctl -t short (launch ack) output.
|
||||
func parseSelfTestResult(text string) string {
|
||||
text = strings.TrimSpace(text)
|
||||
if text == "" {
|
||||
return "no output"
|
||||
}
|
||||
lines := strings.Split(text, "\n")
|
||||
// smartctl -a: "Self-test execution status: ( 0)\n\tThe previous
|
||||
// self-test routine completed\n\twithout error ..." — the description
|
||||
// wraps onto following indented, colon-free continuation lines.
|
||||
for i, line := range lines {
|
||||
if strings.Contains(strings.ToLower(line), "self-test execution status") {
|
||||
parts := []string{strings.TrimSpace(line)}
|
||||
for j := i + 1; j < len(lines) && j < i+4; j++ {
|
||||
cont := strings.TrimSpace(lines[j])
|
||||
if cont == "" || strings.Contains(cont, ":") {
|
||||
break
|
||||
}
|
||||
parts = append(parts, cont)
|
||||
}
|
||||
return strings.Join(parts, " ")
|
||||
}
|
||||
}
|
||||
// nvme device-self-test: look for "Short Device Self-Test Status : 0x0" or similar
|
||||
for _, line := range strings.Split(text, "\n") {
|
||||
for _, line := range lines {
|
||||
l := strings.ToLower(line)
|
||||
if strings.Contains(l, "self-test status") || strings.Contains(l, "self test status") {
|
||||
return strings.TrimSpace(line)
|
||||
}
|
||||
}
|
||||
// smartctl -t short: "Testing has begun" or "Short BGST started"
|
||||
for _, line := range strings.Split(text, "\n") {
|
||||
for _, line := range lines {
|
||||
l := strings.ToLower(line)
|
||||
if strings.Contains(l, "testing has begun") || strings.Contains(l, "started") || strings.Contains(l, "complete") {
|
||||
return strings.TrimSpace(line)
|
||||
}
|
||||
}
|
||||
// fallback: last non-empty line
|
||||
lines := strings.Split(strings.TrimSpace(text), "\n")
|
||||
for i := len(lines) - 1; i >= 0; i-- {
|
||||
if s := strings.TrimSpace(lines[i]); s != "" {
|
||||
return s
|
||||
@@ -305,6 +329,180 @@ func parseSelfTestResult(text string) string {
|
||||
return "done"
|
||||
}
|
||||
|
||||
// ── Resource (pseudographic usage bars) ────────────────────────────────────────
|
||||
|
||||
// designLifeYears/dwpd model the drive's rated endurance: 1 drive-write-per-day
|
||||
// for 5 years, the baseline enterprise endurance spec used when the vendor's
|
||||
// own TBW/DWPD rating isn't available from SMART/NVMe data.
|
||||
const (
|
||||
designLifeYears = 5
|
||||
dwpd = 1.0
|
||||
)
|
||||
|
||||
type resourceInfo struct {
|
||||
powerOnHours uint64
|
||||
powerCycles uint64
|
||||
writtenBytes uint64
|
||||
readBytes uint64
|
||||
capacityBytes uint64
|
||||
readPercent int // only meaningful when hasReadPercent
|
||||
hasReadPercent bool // true when the source SMART attribute exposes a normalized read-wear value
|
||||
}
|
||||
|
||||
func writeResourceSection(b *strings.Builder, r resourceInfo) {
|
||||
writeSectionHeader(b, "Resource")
|
||||
|
||||
const maxLifeHours = designLifeYears * 365 * 24
|
||||
upFrac := float64(r.powerOnHours) / float64(maxLifeHours)
|
||||
fmt.Fprintf(b, " %-9s %s %s / %s (%s)\n",
|
||||
"Uptime", progressBar(upFrac, 24), formatHoursHuman(r.powerOnHours), formatHoursHuman(maxLifeHours), formatPercent(upFrac*100))
|
||||
|
||||
if r.capacityBytes > 0 {
|
||||
maxWritten := float64(r.capacityBytes) * dwpd * designLifeYears * 365
|
||||
wFrac := float64(r.writtenBytes) / maxWritten
|
||||
fmt.Fprintf(b, " %-9s %s %s / %s (%s, %g DWPD×%dy)\n",
|
||||
"Written", progressBar(wFrac, 24), formatBytesHuman(float64(r.writtenBytes)), formatBytesHuman(maxWritten), formatPercent(wFrac*100), dwpd, designLifeYears)
|
||||
} else {
|
||||
fmt.Fprintf(b, " %-9s %s\n", "Written", formatBytesHuman(float64(r.writtenBytes)))
|
||||
}
|
||||
|
||||
if r.hasReadPercent {
|
||||
fmt.Fprintf(b, " %-9s %s %s (%d%%)\n",
|
||||
"Read", progressBar(float64(r.readPercent)/100, 24), formatBytesHuman(float64(r.readBytes)), r.readPercent)
|
||||
} else {
|
||||
fmt.Fprintf(b, " %-9s %s\n", "Read", formatBytesHuman(float64(r.readBytes)))
|
||||
}
|
||||
}
|
||||
|
||||
// ── Conclusion (new-vs-used verdict) ────────────────────────────────────────
|
||||
|
||||
// Thresholds for treating a drive as "new": less than one full drive-write
|
||||
// (110% of capacity, headroom for provisioning/overprovisioning rounding),
|
||||
// less than a bit over two full drive-reads (210% of capacity), under a
|
||||
// week of power-on time, and under 30 power cycles. Any one violation is
|
||||
// enough to call the drive used — these are deliberately loose bounds, not
|
||||
// a wear/endurance judgment (see -- Resource -- for that).
|
||||
const (
|
||||
newDiskMaxWrittenFrac = 1.10
|
||||
newDiskMaxReadFrac = 2.10
|
||||
newDiskMaxUptimeHours = 7 * 24
|
||||
newDiskMaxPowerCycles = 30
|
||||
)
|
||||
|
||||
func writeConclusionSection(b *strings.Builder, r resourceInfo) {
|
||||
writeSectionHeader(b, "Conclusion")
|
||||
|
||||
var reasons, notes []string
|
||||
isNew := true
|
||||
|
||||
if r.capacityBytes > 0 {
|
||||
writtenFrac := float64(r.writtenBytes) / float64(r.capacityBytes)
|
||||
readFrac := float64(r.readBytes) / float64(r.capacityBytes)
|
||||
if writtenFrac >= newDiskMaxWrittenFrac {
|
||||
isNew = false
|
||||
reasons = append(reasons, fmt.Sprintf(
|
||||
"data written %s (%s of capacity)",
|
||||
formatBytesHuman(float64(r.writtenBytes)), formatPercent(writtenFrac*100)))
|
||||
}
|
||||
if readFrac >= newDiskMaxReadFrac {
|
||||
isNew = false
|
||||
reasons = append(reasons, fmt.Sprintf(
|
||||
"data read %s (%s of capacity)",
|
||||
formatBytesHuman(float64(r.readBytes)), formatPercent(readFrac*100)))
|
||||
}
|
||||
} else {
|
||||
notes = append(notes, "capacity unknown — write/read criteria not evaluated")
|
||||
}
|
||||
|
||||
if r.powerOnHours >= newDiskMaxUptimeHours {
|
||||
isNew = false
|
||||
reasons = append(reasons, fmt.Sprintf("uptime %s", formatHoursHuman(r.powerOnHours)))
|
||||
}
|
||||
|
||||
if r.powerCycles >= newDiskMaxPowerCycles {
|
||||
isNew = false
|
||||
reasons = append(reasons, fmt.Sprintf("power cycles %s", formatUint(r.powerCycles)))
|
||||
}
|
||||
|
||||
if isNew {
|
||||
writeField(b, "Disk Condition", "NEW")
|
||||
} else {
|
||||
writeField(b, "Disk Condition", "USED")
|
||||
b.WriteString(" Reason:\n")
|
||||
for _, reason := range reasons {
|
||||
fmt.Fprintf(b, " - %s\n", reason)
|
||||
}
|
||||
}
|
||||
for _, note := range notes {
|
||||
fmt.Fprintf(b, " Note: %s\n", note)
|
||||
}
|
||||
}
|
||||
|
||||
// progressBar renders a fixed-width pseudographic bar, e.g. "[######------]".
|
||||
func progressBar(frac float64, width int) string {
|
||||
if math.IsNaN(frac) || frac < 0 {
|
||||
frac = 0
|
||||
}
|
||||
if frac > 1 {
|
||||
frac = 1
|
||||
}
|
||||
filled := int(math.Round(frac * float64(width)))
|
||||
return "[" + strings.Repeat("#", filled) + strings.Repeat("-", width-filled) + "]"
|
||||
}
|
||||
|
||||
// formatBytesHuman renders a decimal (SI) human-readable byte size, e.g. "1.23 TB".
|
||||
func formatBytesHuman(n float64) string {
|
||||
units := []string{"B", "KB", "MB", "GB", "TB", "PB"}
|
||||
i := 0
|
||||
for n >= 1000 && i < len(units)-1 {
|
||||
n /= 1000
|
||||
i++
|
||||
}
|
||||
if i == 0 {
|
||||
return fmt.Sprintf("%.0f %s", n, units[i])
|
||||
}
|
||||
return fmt.Sprintf("%.2f %s", n, units[i])
|
||||
}
|
||||
|
||||
// formatHoursHuman renders an hour count as a human-scaled duration (hours,
|
||||
// days, or years) so uptimes don't show as raw four/five-digit hour counts.
|
||||
func formatHoursHuman(hours uint64) string {
|
||||
if hours < 48 {
|
||||
return fmt.Sprintf("%d h", hours)
|
||||
}
|
||||
days := float64(hours) / 24
|
||||
if days < 365 {
|
||||
return fmt.Sprintf("%.0f d", days)
|
||||
}
|
||||
years := days / 365
|
||||
if years == math.Trunc(years) {
|
||||
return fmt.Sprintf("%.0f y", years)
|
||||
}
|
||||
return fmt.Sprintf("%.1f y", years)
|
||||
}
|
||||
|
||||
// formatPercent renders a percentage with extra precision below 1% (e.g.
|
||||
// "0.03%"), where a rounded "0%" would hide any usage at all.
|
||||
func formatPercent(pct float64) string {
|
||||
if pct > 0 && pct < 1 {
|
||||
return fmt.Sprintf("%.2f%%", pct)
|
||||
}
|
||||
return fmt.Sprintf("%.0f%%", pct)
|
||||
}
|
||||
|
||||
// parseLeadingUint parses the leading run of digits/commas in s (e.g. from a
|
||||
// SMART raw value or "500,107,862,016 bytes") into a uint64, ignoring the rest.
|
||||
func parseLeadingUint(s string) uint64 {
|
||||
s = strings.TrimSpace(s)
|
||||
end := 0
|
||||
for end < len(s) && (s[end] >= '0' && s[end] <= '9' || s[end] == ',') {
|
||||
end++
|
||||
}
|
||||
digits := strings.ReplaceAll(s[:end], ",", "")
|
||||
n, _ := strconv.ParseUint(digits, 10, 64)
|
||||
return n
|
||||
}
|
||||
|
||||
// ── Formatting helpers ────────────────────────────────────────────────────────
|
||||
|
||||
func writeSectionHeader(b *strings.Builder, title string) {
|
||||
|
||||
@@ -83,7 +83,36 @@ func TestGenerateDiskReportNVMe(t *testing.T) {
|
||||
assertContains(t, report, "1,234 h") // power_on_hours with separator
|
||||
assertContains(t, report, "32") // power_cycles
|
||||
assertContains(t, report, "3") // unsafe_shutdowns
|
||||
assertContains(t, report, "378.0 GB") // data_units_written * 512000 / 1e9
|
||||
assertContains(t, report, "378.00 GB") // data_units_written * 512000, human-scaled
|
||||
}
|
||||
|
||||
// TestGenerateDiskReportNVMeDataUnitsScaleToTB verifies that heavy write/read
|
||||
// counters render in the "-- Usage --" section as TB/PB, not raw GB, matching
|
||||
// the "-- Resource --" section which already used formatBytesHuman.
|
||||
func TestGenerateDiskReportNVMeDataUnitsScaleToTB(t *testing.T) {
|
||||
t.Parallel()
|
||||
heavy := []byte(`{
|
||||
"critical_warning": 0,
|
||||
"temperature": 307,
|
||||
"avail_spare": 100,
|
||||
"spare_thresh": 10,
|
||||
"percent_used": 0,
|
||||
"data_units_read": "252420478",
|
||||
"data_units_written": "103834055",
|
||||
"power_cycles": "45",
|
||||
"power_on_hours": "45",
|
||||
"unsafe_shutdowns": "35",
|
||||
"media_errors": "0",
|
||||
"num_err_log_entries": "0"
|
||||
}`)
|
||||
outputs := map[string][]byte{
|
||||
"nvme-id-ctrl": testNVMeIdCtrl,
|
||||
"nvme-smart-log": heavy,
|
||||
}
|
||||
report := GenerateDiskReportText(1, "/dev/nvme0n1", outputs, time.Unix(0, 0).UTC())
|
||||
|
||||
assertContains(t, report, "Data Written : 53.16 TB")
|
||||
assertContains(t, report, "Data Read : 129.24 TB")
|
||||
}
|
||||
|
||||
func TestGenerateDiskReportNVMeLoHi(t *testing.T) {
|
||||
|
||||
@@ -37,26 +37,41 @@ var fruEditableFields = map[string]struct {
|
||||
"Chassis Part Number": {"c", 0},
|
||||
"Chassis Serial Number": {"c", 1},
|
||||
"Chassis Serial": {"c", 1},
|
||||
"Chassis Extra": {"c", 2},
|
||||
// Board — vendor doc names and ipmitool abbreviated names
|
||||
"Board Manufacturer": {"b", 0},
|
||||
"Board Mfg": {"b", 0},
|
||||
"Board Product Name": {"b", 1},
|
||||
"Board Product": {"b", 1},
|
||||
"Board Manufacturer": {"b", 0},
|
||||
"Board Mfg": {"b", 0},
|
||||
"Board Product Name": {"b", 1},
|
||||
"Board Product": {"b", 1},
|
||||
"Board Serial Number": {"b", 2},
|
||||
"Board Serial": {"b", 2},
|
||||
"Board Part Number": {"b", 3},
|
||||
"Board Serial": {"b", 2},
|
||||
"Board Part Number": {"b", 3},
|
||||
// Product — vendor doc names and ipmitool abbreviated names
|
||||
"Product Manufacturer": {"p", 0},
|
||||
"Product Name": {"p", 1},
|
||||
"Product Part Number": {"p", 2},
|
||||
"Product Version": {"p", 3},
|
||||
"Product Manufacturer": {"p", 0},
|
||||
"Product Name": {"p", 1},
|
||||
"Product Part Number": {"p", 2},
|
||||
"Product Version": {"p", 3},
|
||||
"Product Serial Number": {"p", 4},
|
||||
"Product Serial": {"p", 4},
|
||||
"Product Serial": {"p", 4},
|
||||
"Product Asset Tag": {"p", 5},
|
||||
}
|
||||
|
||||
// fruExtraBaseIndex gives the starting ipmitool field index for each area's
|
||||
// repeated "<Area> Extra" custom fields, per the vendor FRU field doc (Chassis
|
||||
// extra fields start at 2, Board at 5, Product at 7). ipmitool fru print
|
||||
// emits one identically-named line per custom field, so parseFRUOutput
|
||||
// counts occurrences to recover the real index for each one.
|
||||
var fruExtraBaseIndex = map[string]struct {
|
||||
Area string
|
||||
Base int
|
||||
}{
|
||||
"Chassis Extra": {"c", 2},
|
||||
"Board Extra": {"b", 5},
|
||||
"Product Extra": {"p", 7},
|
||||
}
|
||||
|
||||
func parseFRUOutput(output string) []fruField {
|
||||
var fields []fruField
|
||||
extraSeen := map[string]int{}
|
||||
for _, line := range strings.Split(output, "\n") {
|
||||
// Lines look like: " Field Name : value"
|
||||
trimmed := strings.TrimLeft(line, " \t")
|
||||
@@ -64,33 +79,32 @@ func parseFRUOutput(output string) []fruField {
|
||||
continue
|
||||
}
|
||||
colon := strings.Index(trimmed, " : ")
|
||||
valueOffset := 3
|
||||
if colon < 0 {
|
||||
// try ": " with no leading space before colon
|
||||
colon = strings.Index(trimmed, ": ")
|
||||
valueOffset = 2
|
||||
if colon < 0 {
|
||||
continue
|
||||
}
|
||||
name := strings.TrimSpace(trimmed[:colon])
|
||||
value := strings.TrimSpace(trimmed[colon+2:])
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
editable, area, idx := fruFieldMeta(name)
|
||||
fields = append(fields, fruField{Name: name, Value: value, Editable: editable, Area: area, Index: idx})
|
||||
continue
|
||||
}
|
||||
name := strings.TrimSpace(trimmed[:colon])
|
||||
value := strings.TrimSpace(trimmed[colon+3:])
|
||||
value := strings.TrimSpace(trimmed[colon+valueOffset:])
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
editable, area, idx := fruFieldMeta(name)
|
||||
editable, area, idx := fruFieldMeta(name, extraSeen)
|
||||
fields = append(fields, fruField{Name: name, Value: value, Editable: editable, Area: area, Index: idx})
|
||||
}
|
||||
return fields
|
||||
}
|
||||
|
||||
func fruFieldMeta(name string) (editable bool, area string, index int) {
|
||||
func fruFieldMeta(name string, extraSeen map[string]int) (editable bool, area string, index int) {
|
||||
if e, ok := fruExtraBaseIndex[name]; ok {
|
||||
idx := e.Base + extraSeen[name]
|
||||
extraSeen[name]++
|
||||
return true, e.Area, idx
|
||||
}
|
||||
if e, ok := fruEditableFields[name]; ok {
|
||||
return true, e.Area, e.Index
|
||||
}
|
||||
@@ -201,4 +215,3 @@ func runIPMIFRUWriteTask(ctx context.Context, j *jobState, exportDir string, p t
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
package webui
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseFRUOutputExtraFields(t *testing.T) {
|
||||
// Realistic ipmitool fru print output: repeated "<Area> Extra" lines
|
||||
// (one per custom field) must resolve to sequential indices per the
|
||||
// vendor FRU doc (Chassis Extra starts at 2, Board Extra at 5, Product
|
||||
// Extra at 7), not all collapse onto the same index.
|
||||
out := `
|
||||
Product Manufacturer : Inspur
|
||||
Product Name : NF5280M6
|
||||
Product Part Number : PN123
|
||||
Product Version : 1.0
|
||||
Product Serial : SN123
|
||||
Product Asset Tag : ASSET01
|
||||
Product Extra : custom-p1
|
||||
Board Mfg : Inspur
|
||||
Board Product : BoardX
|
||||
Board Serial : BSN1
|
||||
Board Part Number : BPN1
|
||||
Board Extra : custom-b1
|
||||
Board Extra : custom-b2
|
||||
Board Extra : custom-b3
|
||||
Chassis Part Number : CPN1
|
||||
Chassis Serial : CSN1
|
||||
Chassis Extra : front-half
|
||||
Chassis Extra : back-half
|
||||
`
|
||||
fields := parseFRUOutput(out)
|
||||
|
||||
byName := map[string][]fruField{}
|
||||
for _, f := range fields {
|
||||
byName[f.Name] = append(byName[f.Name], f)
|
||||
}
|
||||
|
||||
assertMeta := func(name string, occurrence int, wantArea string, wantIndex int) {
|
||||
t.Helper()
|
||||
list := byName[name]
|
||||
if occurrence >= len(list) {
|
||||
t.Fatalf("expected occurrence %d of %q, got %d entries", occurrence, name, len(list))
|
||||
}
|
||||
f := list[occurrence]
|
||||
if f.Area != wantArea || f.Index != wantIndex {
|
||||
t.Errorf("%s[%d] = area:%q index:%d, want area:%q index:%d", name, occurrence, f.Area, f.Index, wantArea, wantIndex)
|
||||
}
|
||||
if !f.Editable {
|
||||
t.Errorf("%s[%d] expected editable", name, occurrence)
|
||||
}
|
||||
}
|
||||
|
||||
assertMeta("Product Asset Tag", 0, "p", 5)
|
||||
assertMeta("Product Extra", 0, "p", 7)
|
||||
assertMeta("Board Extra", 0, "b", 5)
|
||||
assertMeta("Board Extra", 1, "b", 6)
|
||||
assertMeta("Board Extra", 2, "b", 7)
|
||||
assertMeta("Chassis Extra", 0, "c", 2)
|
||||
assertMeta("Chassis Extra", 1, "c", 3)
|
||||
}
|
||||
@@ -38,6 +38,7 @@ type raidControllerInfo struct {
|
||||
Model string `json:"model"`
|
||||
ForeignDrives []raidDriveInfo `json:"foreign_drives"`
|
||||
FreeDrives []raidDriveInfo `json:"free_drives"`
|
||||
AllDrives []raidDriveInfo `json:"all_drives"`
|
||||
Arrays []raidArrayInfo `json:"arrays,omitempty"`
|
||||
}
|
||||
|
||||
@@ -97,6 +98,7 @@ func detectLSIControllers() []raidControllerInfo {
|
||||
Model: c.ResponseData.Basics.Model,
|
||||
ForeignDrives: []raidDriveInfo{},
|
||||
FreeDrives: []raidDriveInfo{},
|
||||
AllDrives: []raidDriveInfo{},
|
||||
}
|
||||
if ctrl.Model == "" {
|
||||
ctrl.Model = fmt.Sprintf("LSI Controller %d", ctrl.Index)
|
||||
@@ -111,6 +113,7 @@ func detectLSIControllers() []raidControllerInfo {
|
||||
SizeGB: raidParseHumanSizeGB(d.Size),
|
||||
Serial: strings.TrimSpace(d.SN),
|
||||
}
|
||||
ctrl.AllDrives = append(ctrl.AllDrives, info)
|
||||
switch strings.TrimSpace(d.State) {
|
||||
case "Frgn":
|
||||
ctrl.ForeignDrives = append(ctrl.ForeignDrives, info)
|
||||
@@ -168,6 +171,30 @@ func parseRAIDMDStat(raw string) []mdStatEntry {
|
||||
return entries
|
||||
}
|
||||
|
||||
// raidVROCPortRx matches lines like " Port2 : /dev/sda (SERIAL123)"
|
||||
// or " Port3 : - no device attached -" from `mdadm --detail-platform`.
|
||||
var raidVROCPortRx = regexp.MustCompile(`^\s*Port\d+\s*:\s*(\S+)`)
|
||||
|
||||
// parseVROCPorts returns the block device basenames (e.g. "sda") that are
|
||||
// physically wired to the VROC I/O controller's ports, per `mdadm
|
||||
// --detail-platform` output. Drives attached directly to the CPU (or to a
|
||||
// separate HBA) rather than through this controller's ports are excluded.
|
||||
func parseVROCPorts(raw string) map[string]bool {
|
||||
ports := map[string]bool{}
|
||||
for _, line := range strings.Split(raw, "\n") {
|
||||
m := raidVROCPortRx.FindStringSubmatch(line)
|
||||
if m == nil {
|
||||
continue
|
||||
}
|
||||
dev := m[1]
|
||||
if !strings.HasPrefix(dev, "/dev/") {
|
||||
continue
|
||||
}
|
||||
ports[strings.TrimPrefix(dev, "/dev/")] = true
|
||||
}
|
||||
return ports
|
||||
}
|
||||
|
||||
func detectVROCController() *raidControllerInfo {
|
||||
out, err := exec.Command("mdadm", "--detail-platform").CombinedOutput()
|
||||
if err != nil && len(out) == 0 {
|
||||
@@ -191,8 +218,16 @@ func detectVROCController() *raidControllerInfo {
|
||||
Model: "Intel VROC",
|
||||
ForeignDrives: []raidDriveInfo{},
|
||||
FreeDrives: []raidDriveInfo{},
|
||||
AllDrives: []raidDriveInfo{},
|
||||
}
|
||||
|
||||
ports := parseVROCPorts(string(out))
|
||||
// Some mdadm builds omit the "Port" lines from --detail-platform. When
|
||||
// we can't determine which drives are actually wired to this
|
||||
// controller, fall back to showing every disk not already in an array
|
||||
// rather than hiding everything.
|
||||
portsKnown := len(ports) > 0
|
||||
|
||||
inArray := map[string]bool{}
|
||||
raw, err := os.ReadFile("/proc/mdstat")
|
||||
if err == nil {
|
||||
@@ -222,15 +257,25 @@ func detectVROCController() *raidControllerInfo {
|
||||
}
|
||||
if json.Unmarshal(lsblkOut, &lsblkDoc) == nil {
|
||||
for _, d := range lsblkDoc.BlockDevices {
|
||||
if d.Type != "disk" || inArray[d.Name] {
|
||||
// Only consider disks wired to this controller's ports -
|
||||
// drives attached directly to the CPU (or another
|
||||
// controller) never show up as VROC ports and are skipped.
|
||||
if d.Type != "disk" || (portsKnown && !ports[d.Name]) {
|
||||
continue
|
||||
}
|
||||
ctrl.FreeDrives = append(ctrl.FreeDrives, raidDriveInfo{
|
||||
info := raidDriveInfo{
|
||||
Device: "/dev/" + d.Name,
|
||||
Model: strings.TrimSpace(d.Model),
|
||||
Serial: strings.TrimSpace(d.Serial),
|
||||
State: "available",
|
||||
})
|
||||
}
|
||||
if inArray[d.Name] {
|
||||
info.State = "member"
|
||||
}
|
||||
ctrl.AllDrives = append(ctrl.AllDrives, info)
|
||||
if info.State == "available" {
|
||||
ctrl.FreeDrives = append(ctrl.FreeDrives, info)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -348,6 +393,38 @@ func (h *handler) handleAPIRAIDCreateMirror(w http.ResponseWriter, r *http.Reque
|
||||
writeJSON(w, map[string]string{"task_id": t.ID})
|
||||
}
|
||||
|
||||
func (h *handler) handleAPIRAIDPrepareDrive(w http.ResponseWriter, r *http.Request) {
|
||||
var req struct {
|
||||
ControllerID string `json:"controller_id"`
|
||||
Slot string `json:"slot"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||
return
|
||||
}
|
||||
ctrlIdx, ok := parseLSIControllerIndex(req.ControllerID)
|
||||
if !ok {
|
||||
writeError(w, http.StatusBadRequest, "invalid controller_id")
|
||||
return
|
||||
}
|
||||
if _, _, ok := parseRAIDSlot(req.Slot); !ok {
|
||||
writeError(w, http.StatusBadRequest, "invalid slot")
|
||||
return
|
||||
}
|
||||
|
||||
t := &Task{
|
||||
ID: newJobID("raid-lsi-prepare-drive"),
|
||||
Name: fmt.Sprintf("Prepare drive %s (LSI ctrl %d)", req.Slot, ctrlIdx),
|
||||
Target: "raid-lsi-prepare-drive",
|
||||
Priority: defaultTaskPriority("raid-lsi-prepare-drive", taskParams{}),
|
||||
Status: TaskPending,
|
||||
CreatedAt: time.Now(),
|
||||
params: taskParams{RAIDController: ctrlIdx, RAIDSlot: req.Slot},
|
||||
}
|
||||
globalQueue.enqueue(t)
|
||||
writeJSON(w, map[string]string{"task_id": t.ID})
|
||||
}
|
||||
|
||||
func parseLSIControllerIndex(id string) (int, bool) {
|
||||
if !strings.HasPrefix(id, "lsi-") {
|
||||
return 0, false
|
||||
@@ -385,6 +462,34 @@ func runRAIDLSICreateMirrorTask(ctx context.Context, j *jobState, ctrl int, driv
|
||||
return streamCmdJob(j, cmd)
|
||||
}
|
||||
|
||||
// parseRAIDSlot splits a storcli "EID:Slt" identifier (e.g. "252:0") into
|
||||
// enclosure and slot numbers.
|
||||
func parseRAIDSlot(slot string) (eid int, slt int, ok bool) {
|
||||
parts := strings.SplitN(strings.TrimSpace(slot), ":", 2)
|
||||
if len(parts) != 2 {
|
||||
return 0, 0, false
|
||||
}
|
||||
eid, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||
slt, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||
if err1 != nil || err2 != nil {
|
||||
return 0, 0, false
|
||||
}
|
||||
return eid, slt, true
|
||||
}
|
||||
|
||||
func runRAIDPrepareDriveTask(ctx context.Context, j *jobState, ctrl int, slot string) error {
|
||||
eid, slt, ok := parseRAIDSlot(slot)
|
||||
if !ok {
|
||||
return fmt.Errorf("invalid slot %q", slot)
|
||||
}
|
||||
j.append(fmt.Sprintf("Preparing drive %s on controller %d (set good, force)...", slot, ctrl))
|
||||
cmd := exec.CommandContext(ctx, "storcli64",
|
||||
fmt.Sprintf("/c%d/e%d/s%d", ctrl, eid, slt),
|
||||
"set", "good", "force",
|
||||
)
|
||||
return streamCmdJob(j, cmd)
|
||||
}
|
||||
|
||||
func runRAIDVROCCreateMirrorTask(ctx context.Context, j *jobState, devices []string, arrayName string) error {
|
||||
if arrayName == "" {
|
||||
arrayName = "bee-mirror0"
|
||||
@@ -507,6 +612,7 @@ function raidRenderController(c, idx) {
|
||||
html += '</div></div>';
|
||||
}
|
||||
|
||||
html += raidRenderAllDrives(c, idx);
|
||||
html += raidRenderMirrorSection(c, idx, 'lsi');
|
||||
}
|
||||
|
||||
@@ -529,12 +635,71 @@ function raidRenderController(c, idx) {
|
||||
html += '</table>';
|
||||
}
|
||||
|
||||
html += raidRenderAllDrives(c, idx);
|
||||
html += raidRenderMirrorSection(c, idx, 'vroc');
|
||||
}
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
var RAID_READY_STATES = {'UGood': true, 'JBOD': true, 'available': true};
|
||||
var RAID_NO_PREPARE_STATES = {'UGood': true, 'JBOD': true, 'Frgn': true, 'Onln': true, 'Msng': true};
|
||||
|
||||
function raidRenderAllDrives(c, idx) {
|
||||
var drives = c.all_drives || [];
|
||||
var isLSI = c.type === 'lsi';
|
||||
if (drives.length === 0) {
|
||||
return '<p style="font-size:13px;color:var(--muted);margin-bottom:12px">No drives detected on this controller.</p>';
|
||||
}
|
||||
var html = '<div style="font-size:12px;font-weight:600;color:var(--muted);margin-bottom:6px;text-transform:uppercase;letter-spacing:.04em">All Drives on This Controller</div>';
|
||||
html += '<table style="margin-bottom:14px"><tr><th>' + (isLSI ? 'Slot' : 'Device') + '</th><th>Model</th><th>Size</th><th>State</th>' + (isLSI ? '<th></th>' : '') + '</tr>';
|
||||
drives.forEach(function(d) {
|
||||
var ready = !!RAID_READY_STATES[d.state];
|
||||
var badgeClass = ready ? 'badge-ok' : 'badge-warn';
|
||||
var actionCell = '';
|
||||
if (isLSI && !RAID_NO_PREPARE_STATES[d.state]) {
|
||||
actionCell = '<td><button class="btn btn-sm btn-secondary" onclick="raidPrepareDrive(\'' + escHtml(c.id) + '\',\'' + escHtml(d.slot) + '\',this)">Prepare</button></td>';
|
||||
} else if (isLSI) {
|
||||
actionCell = '<td></td>';
|
||||
}
|
||||
html += '<tr>'
|
||||
+ '<td style="font-family:monospace">' + escHtml(isLSI ? d.slot : d.device) + '</td>'
|
||||
+ '<td>' + escHtml(d.model||'—') + (d.serial ? ' [' + escHtml(d.serial) + ']' : '') + '</td>'
|
||||
+ '<td>' + (d.size_gb > 0 ? Math.round(d.size_gb) + ' GB' : '—') + '</td>'
|
||||
+ '<td><span class="badge ' + badgeClass + '">' + escHtml(d.state||'—') + '</span></td>'
|
||||
+ actionCell
|
||||
+ '</tr>';
|
||||
});
|
||||
html += '</table>';
|
||||
return html;
|
||||
}
|
||||
|
||||
function raidPrepareDrive(ctrlID, slot, btn) {
|
||||
if (!confirm('Prepare drive ' + slot + ' on ' + ctrlID + ' for array creation?\n\nThis forces the drive into Unconfigured Good state. If it currently belongs to a virtual drive or holds data, that data will become inaccessible.')) {
|
||||
return;
|
||||
}
|
||||
var original = btn ? btn.textContent : '';
|
||||
if (btn) { btn.disabled = true; btn.textContent = 'Preparing...'; }
|
||||
raidShowOutput('Prepare drive ' + slot, '', '');
|
||||
fetch('/api/tools/raid/prepare-drive', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({controller_id: ctrlID, slot: slot})
|
||||
})
|
||||
.then(function(r) { return r.json(); })
|
||||
.then(function(d) {
|
||||
if (d.error) throw new Error(d.error);
|
||||
raidStreamTask(d.task_id, 'Prepare drive ' + slot, function() {
|
||||
if (btn) { btn.disabled = false; btn.textContent = original; }
|
||||
raidLoad();
|
||||
});
|
||||
})
|
||||
.catch(function(e) {
|
||||
raidShowOutput('Error', 'failed', e.message);
|
||||
if (btn) { btn.disabled = false; btn.textContent = original; }
|
||||
});
|
||||
}
|
||||
|
||||
function raidRenderMirrorSection(c, idx, kind) {
|
||||
var free = c.free_drives || [];
|
||||
var html = '<div style="font-size:12px;font-weight:600;color:var(--muted);margin-bottom:6px;text-transform:uppercase;letter-spacing:.04em">Create RAID 1 Mirror</div>';
|
||||
@@ -683,6 +848,9 @@ function raidStreamTask(taskID, taskName, onDone) {
|
||||
}
|
||||
|
||||
window.raidLoad = raidLoad;
|
||||
window.raidForeignAction = raidForeignAction;
|
||||
window.raidCreateMirror = raidCreateMirror;
|
||||
window.raidPrepareDrive = raidPrepareDrive;
|
||||
raidLoad();
|
||||
})();
|
||||
</script>`
|
||||
|
||||
@@ -323,6 +323,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
||||
mux.HandleFunc("GET /api/tools/raid/status", h.handleAPIRAIDStatus)
|
||||
mux.HandleFunc("POST /api/tools/raid/foreign", h.handleAPIRAIDForeignAction)
|
||||
mux.HandleFunc("POST /api/tools/raid/create-mirror", h.handleAPIRAIDCreateMirror)
|
||||
mux.HandleFunc("POST /api/tools/raid/prepare-drive", h.handleAPIRAIDPrepareDrive)
|
||||
|
||||
// GPU presence / tools
|
||||
mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
|
||||
|
||||
@@ -410,6 +410,12 @@ func executeTaskWithOptions(opts *HandlerOptions, t *Task, j *jobState, ctx cont
|
||||
break
|
||||
}
|
||||
err = runRAIDLSICreateMirrorTask(ctx, j, t.params.RAIDController, t.params.RAIDDevices)
|
||||
case "raid-lsi-prepare-drive":
|
||||
if strings.TrimSpace(t.params.RAIDSlot) == "" {
|
||||
err = fmt.Errorf("no drive slot provided")
|
||||
break
|
||||
}
|
||||
err = runRAIDPrepareDriveTask(ctx, j, t.params.RAIDController, t.params.RAIDSlot)
|
||||
case "raid-vroc-create-mirror":
|
||||
if len(t.params.RAIDDevices) < 2 {
|
||||
err = fmt.Errorf("at least 2 devices required")
|
||||
|
||||
@@ -146,6 +146,7 @@ type taskParams struct {
|
||||
RAIDController int `json:"raid_controller,omitempty"`
|
||||
RAIDDevices []string `json:"raid_devices,omitempty"`
|
||||
RAIDArrayName string `json:"raid_array_name,omitempty"`
|
||||
RAIDSlot string `json:"raid_slot,omitempty"`
|
||||
}
|
||||
|
||||
type persistedTask struct {
|
||||
|
||||
+1
-1
Submodule bible updated: 1977730d93...d2600f1279
+1
-1
Submodule internal/chart updated: 8105c7ec08...2a15bc87f1
Reference in New Issue
Block a user