Files
bee/audit/internal/collector/storage_discovery_test.go
Michael Chus 963bc960ca Fix SATA discovery, add NVLink bridge detection, add infiniband-diags
- storage: add jsonInt64 dual-format unmarshaler to handle lsblk output
  change in util-linux 2.38 (LOG-SEC/PHY-SEC now emitted as JSON
  integers, not quoted strings); fixes SATA disks invisible on Debian 12
- pcie: detect NVLink bridge mezzanine CX-7 cards (Mellanox x2, no host
  net ifaces, DeviceName contains "NVLINK" in lspci -v) and mark them
  with device_class="NVLinkBridge"; escalate PCIe link speed downgrade to
  Critical for these cards (Gen3 on a fixed internal connector = hardware
  fault, not a transient warning)
- pcie: cross-reference nvidia-smi topo to capture NVLink bond counts and
  active status for all NVLink bridge cards
- packages: add infiniband-diags to ISO; provides ibstat required by
  nvidia-fabricmanager-start.sh to enumerate IB devices before FM launch
  (absence causes CUDA_ERROR_SYSTEM_NOT_READY)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-28 20:57:04 +03:00

168 lines
4.8 KiB
Go

package collector
import (
"encoding/json"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
)
func TestMergeStorageDevicePrefersNonEmptyFields(t *testing.T) {
t.Parallel()
got := mergeStorageDevice(
lsblkDevice{Name: "nvme0n1", Type: "disk", Tran: "nvme"},
lsblkDevice{Name: "nvme0n1", Type: "disk", Size: "1024", Serial: "SN123", Model: "Kioxia"},
)
if got.Serial != "SN123" {
t.Fatalf("serial=%q want SN123", got.Serial)
}
if got.Model != "Kioxia" {
t.Fatalf("model=%q want Kioxia", got.Model)
}
if got.Size != "1024" {
t.Fatalf("size=%q want 1024", got.Size)
}
}
func TestParseStorageBytes(t *testing.T) {
t.Parallel()
if got := parseStorageBytes(" 2048 "); got != 2048 {
t.Fatalf("parseStorageBytes=%d want 2048", got)
}
if got := parseStorageBytes("1.92 TB"); got != 0 {
t.Fatalf("parseStorageBytes invalid=%d want 0", got)
}
}
func TestJsonInt64UnmarshalBothFormats(t *testing.T) {
t.Parallel()
// util-linux ≥ 2.37 emits LOG-SEC / PHY-SEC as bare JSON numbers.
// Older versions emit quoted strings. Both must parse without error
// so that the entire lsblkDevices() call does not return nil on Debian 12.
cases := []struct {
json string
want int64
}{
{`512`, 512},
{`4096`, 4096},
{`"512"`, 512},
{`"4096"`, 4096},
{`null`, 0},
}
for _, tc := range cases {
var v jsonInt64
if err := v.UnmarshalJSON([]byte(tc.json)); err != nil {
t.Fatalf("UnmarshalJSON(%s): unexpected error %v", tc.json, err)
}
if int64(v) != tc.want {
t.Fatalf("UnmarshalJSON(%s)=%d want %d", tc.json, int64(v), tc.want)
}
}
// Simulate the exact JSON shape that triggered the bug on Debian 12.
input := []byte(`{
"blockdevices": [
{"name":"sda","type":"disk","size":"3.6T","serial":"S1234","model":"SEAGATE","tran":"sata","hctl":"0:0:0:0","log-sec":512,"phy-sec":4096},
{"name":"sdb","type":"disk","size":"3.6T","serial":"S5678","model":"SEAGATE","tran":"sata","hctl":"0:0:1:0","log-sec":512,"phy-sec":4096}
]
}`)
var root lsblkRoot
if err := json.Unmarshal(input, &root); err != nil {
t.Fatalf("lsblkRoot unmarshal with integer log-sec/phy-sec: %v", err)
}
if len(root.Blockdevices) != 2 {
t.Fatalf("got %d blockdevices want 2", len(root.Blockdevices))
}
if int64(root.Blockdevices[0].LogSec) != 512 {
t.Fatalf("LogSec=%d want 512", root.Blockdevices[0].LogSec)
}
if int64(root.Blockdevices[0].PhySec) != 4096 {
t.Fatalf("PhySec=%d want 4096", root.Blockdevices[0].PhySec)
}
}
func TestBestEffortRescanHotplugStorage(t *testing.T) {
t.Parallel()
tmp := t.TempDir()
rescanPath := filepath.Join(tmp, "pci-rescan")
scanDir := filepath.Join(tmp, "scsi_host")
host0Path := filepath.Join(scanDir, "host0", "scan")
host1Path := filepath.Join(scanDir, "host1", "scan")
argsPath := filepath.Join(tmp, "udevadm-args")
toolPath := filepath.Join(tmp, "udevadm")
if err := os.MkdirAll(filepath.Dir(host0Path), 0755); err != nil {
t.Fatalf("mkdir host0: %v", err)
}
if err := os.MkdirAll(filepath.Dir(host1Path), 0755); err != nil {
t.Fatalf("mkdir host1: %v", err)
}
if err := os.WriteFile(host0Path, nil, 0644); err != nil {
t.Fatalf("touch host0 scan: %v", err)
}
if err := os.WriteFile(host1Path, nil, 0644); err != nil {
t.Fatalf("touch host1 scan: %v", err)
}
script := "#!/bin/sh\nprintf '%s' \"$*\" > \"" + argsPath + "\"\n"
if err := os.WriteFile(toolPath, []byte(script), 0755); err != nil {
t.Fatalf("write udevadm stub: %v", err)
}
oldPath := os.Getenv("PATH")
if err := os.Setenv("PATH", tmp+string(os.PathListSeparator)+oldPath); err != nil {
t.Fatalf("set PATH: %v", err)
}
defer func() { _ = os.Setenv("PATH", oldPath) }()
oldRescanPath := pciRescanPath
oldSCSIGlob := scsiHostScanGlob
oldWriteFile := hotplugWriteFile
oldExecCommand := hotplugExecCommand
oldGlob := hotplugGlob
pciRescanPath = rescanPath
scsiHostScanGlob = filepath.Join(scanDir, "host*", "scan")
hotplugWriteFile = os.WriteFile
hotplugExecCommand = exec.Command
hotplugGlob = filepath.Glob
defer func() {
pciRescanPath = oldRescanPath
scsiHostScanGlob = oldSCSIGlob
hotplugWriteFile = oldWriteFile
hotplugExecCommand = oldExecCommand
hotplugGlob = oldGlob
}()
bestEffortRescanHotplugStorage()
raw, err := os.ReadFile(rescanPath)
if err != nil {
t.Fatalf("read rescan file: %v", err)
}
if string(raw) != "1\n" {
t.Fatalf("rescan payload=%q want %q", string(raw), "1\n")
}
for _, path := range []string{host0Path, host1Path} {
raw, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read scsi scan file %s: %v", path, err)
}
if string(raw) != "- - -\n" {
t.Fatalf("scsi scan payload at %s =%q want %q", path, string(raw), "- - -\n")
}
}
args, err := os.ReadFile(argsPath)
if err != nil {
t.Fatalf("read udevadm args: %v", err)
}
if got := strings.TrimSpace(string(args)); got != "settle --timeout=10" {
t.Fatalf("udevadm args=%q want %q", got, "settle --timeout=10")
}
}