From d92f8f41d014cd68200b8e4f9cd8042b5606d95c Mon Sep 17 00:00:00 2001 From: Mikhail Chusavitin Date: Wed, 1 Apr 2026 12:22:17 +0300 Subject: [PATCH] Fix memtest ISO validation false negatives --- .../2026-04-01-memtest-build-strategy.md | 29 ++++ bible-local/docs/iso-build-rules.md | 7 + iso/builder/Dockerfile | 1 + iso/builder/build.sh | 130 +++++++++++++----- 4 files changed, 133 insertions(+), 34 deletions(-) diff --git a/bible-local/decisions/2026-04-01-memtest-build-strategy.md b/bible-local/decisions/2026-04-01-memtest-build-strategy.md index 2e300ea..a2fde9d 100644 --- a/bible-local/decisions/2026-04-01-memtest-build-strategy.md +++ b/bible-local/decisions/2026-04-01-memtest-build-strategy.md @@ -37,6 +37,27 @@ Additional evidence from the archived `easy-bee-nvidia-v3.17-dirty-amd64` logs d So the assumption "the current normal binary hook path is late enough to patch final memtest artifacts" is also false. +Correction after inspecting the real `easy-bee-nvidia-v3.20-5-g76a9100-amd64.iso` +artifact dated 2026-04-01: + +- the final ISO does contain `boot/memtest86+x64.bin` +- the final ISO does contain `boot/memtest86+x64.efi` +- the final ISO does contain memtest menu entries in both `boot/grub/grub.cfg` + and `isolinux/live.cfg` +- so `v3.20-5-g76a9100` was **not** another real memtest regression in the + shipped ISO +- the regression was in the build-time validator/debug path in `build.sh` + +Root cause of the false alarm: + +- `build.sh` treated "ISO reader command exists" as equivalent to "ISO reader + successfully listed/extracted members" +- `iso_list_files` / `iso_extract_file` failures were collapsed into the same + observable output as "memtest content missing" +- this made a reader failure look identical to a missing memtest payload +- as a result, we re-entered the same memtest investigation loop even though + the real ISO was already correct + ## Known Failed Attempts These approaches were already tried and should not be repeated blindly: @@ -79,6 +100,8 @@ Any future memtest fix must explicitly identify: - where the memtest binaries are reliably available at build time - which exact build stage writes the final bootloader configs that land in the ISO - and a post-build proof from a real ISO, not only from intermediate workdir files +- whether the ISO inspection step itself succeeded, rather than merely whether + the validator printed a memtest warning ## Decision @@ -107,6 +130,10 @@ Current implementation direction: - injects memtest binaries there - ensures final bootloader entries there - reruns late binary stages (`binary_checksums`, `binary_iso`, `binary_zsync`) after the patch +- also treat ISO validation tooling as part of the critical path: + - install a stable ISO reader in the builder image + - fail with an explicit reader error if ISO listing/extraction fails + - do not treat reader failure as evidence that memtest is missing ## Consequences @@ -114,4 +141,6 @@ Current implementation direction: - Future memtest changes must also begin by reading the failed-attempt list above. - We should stop re-introducing "prefer built-in live-build memtest" as a default assumption without new evidence. - Memtest validation in `build.sh` is not optional; it is the acceptance gate that prevents another silent regression. +- But validation output is only trustworthy if ISO reading itself succeeded. A + "missing memtest" warning without a successful ISO read is not evidence. - If we change memtest strategy again, we must update this ADR with the exact build evidence that justified the change. diff --git a/bible-local/docs/iso-build-rules.md b/bible-local/docs/iso-build-rules.md index a439e6f..30614de 100644 --- a/bible-local/docs/iso-build-rules.md +++ b/bible-local/docs/iso-build-rules.md @@ -32,6 +32,9 @@ contains all of the following: Rules: - Keep explicit post-build memtest validation in `build.sh`. +- Treat ISO reader success as a separate prerequisite from memtest content. + If the reader cannot list or extract from the ISO, that is a validator + failure, not proof that memtest is missing. - If built-in integration does not produce the artifacts above, use a deterministic project-owned copy/extract step instead of hoping live-build will "start working". @@ -53,3 +56,7 @@ Known bad loops for this repository: timing is late enough to patch final `binary/boot/grub/grub.cfg` or `binary/isolinux/live.cfg`; logs from 2026-04-01 showed those files were not present yet when the hook executed. +- Do not treat a validator warning as ground truth until you have confirmed the + ISO reader actually succeeded. On 2026-04-01 we misdiagnosed another memtest + regression because the final ISO was correct but the validator produced a + false negative. diff --git a/iso/builder/Dockerfile b/iso/builder/Dockerfile index 6efa92c..d9a8c9f 100644 --- a/iso/builder/Dockerfile +++ b/iso/builder/Dockerfile @@ -17,6 +17,7 @@ RUN apt-get update -qq && apt-get install -y \ wget \ curl \ tar \ + libarchive-tools \ xz-utils \ rsync \ build-essential \ diff --git a/iso/builder/build.sh b/iso/builder/build.sh index 8f957d5..62bd519 100755 --- a/iso/builder/build.sh +++ b/iso/builder/build.sh @@ -145,6 +145,25 @@ iso_extract_file() { return 127 } +iso_read_file_list() { + iso_path="$1" + out_path="$2" + + iso_list_files "$iso_path" > "$out_path" || return 1 + [ -s "$out_path" ] || return 1 + return 0 +} + +iso_read_member() { + iso_path="$1" + iso_member="$2" + out_path="$3" + + iso_extract_file "$iso_path" "$iso_member" > "$out_path" || return 1 + [ -s "$out_path" ] || return 1 + return 0 +} + require_iso_reader() { command -v bsdtar >/dev/null 2>&1 && return 0 command -v xorriso >/dev/null 2>&1 && return 0 @@ -237,14 +256,32 @@ dump_memtest_debug() { fi if [ -n "$iso_path" ] && [ -f "$iso_path" ]; then + iso_files="$(mktemp)" + iso_grub_cfg="$(mktemp)" + iso_isolinux_cfg="$(mktemp)" + echo "-- ISO memtest files --" - iso_list_files "$iso_path" | grep 'memtest' | sed 's/^/ /' || echo " (no memtest files in ISO)" + if iso_read_file_list "$iso_path" "$iso_files"; then + grep 'memtest' "$iso_files" | sed 's/^/ /' || echo " (no memtest files in ISO)" + else + echo " (failed to list ISO contents)" + fi echo "-- ISO GRUB memtest lines --" - iso_extract_file "$iso_path" boot/grub/grub.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo " (no memtest lines in boot/grub/grub.cfg)" + if iso_read_member "$iso_path" boot/grub/grub.cfg "$iso_grub_cfg"; then + grep -n 'Memory Test\|memtest' "$iso_grub_cfg" || echo " (no memtest lines in boot/grub/grub.cfg)" + else + echo " (failed to read boot/grub/grub.cfg from ISO)" + fi echo "-- ISO isolinux memtest lines --" - iso_extract_file "$iso_path" isolinux/live.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo " (no memtest lines in isolinux/live.cfg)" + if iso_read_member "$iso_path" isolinux/live.cfg "$iso_isolinux_cfg"; then + grep -n 'Memory Test\|memtest' "$iso_isolinux_cfg" || echo " (no memtest lines in isolinux/live.cfg)" + else + echo " (failed to read isolinux/live.cfg from ISO)" + fi + + rm -f "$iso_files" "$iso_grub_cfg" "$iso_isolinux_cfg" fi echo "=== end memtest debug: ${phase} ===" @@ -274,6 +311,7 @@ memtest_fail() { iso_memtest_present() { iso_path="$1" + iso_files="$(mktemp)" [ -f "$iso_path" ] || return 1 @@ -282,46 +320,57 @@ iso_memtest_present() { elif command -v xorriso >/dev/null 2>&1; then : else - return 1 + return 2 fi - iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || return 1 - iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || return 1 + iso_read_file_list "$iso_path" "$iso_files" || { + rm -f "$iso_files" + return 2 + } + + grep -q '^boot/memtest86+x64\.bin$' "$iso_files" || { + rm -f "$iso_files" + return 1 + } + grep -q '^boot/memtest86+x64\.efi$' "$iso_files" || { + rm -f "$iso_files" + return 1 + } grub_cfg="$(mktemp)" isolinux_cfg="$(mktemp)" - iso_extract_file "$iso_path" boot/grub/grub.cfg > "$grub_cfg" 2>/dev/null || { - rm -f "$grub_cfg" "$isolinux_cfg" - return 1 + iso_read_member "$iso_path" boot/grub/grub.cfg "$grub_cfg" || { + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" + return 2 } - iso_extract_file "$iso_path" isolinux/live.cfg > "$isolinux_cfg" 2>/dev/null || { - rm -f "$grub_cfg" "$isolinux_cfg" - return 1 + iso_read_member "$iso_path" isolinux/live.cfg "$isolinux_cfg" || { + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" + return 2 } grep -q 'Memory Test (memtest86+)' "$grub_cfg" || { - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 1 } grep -q '/boot/memtest86+x64\.efi' "$grub_cfg" || { - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 1 } grep -q '/boot/memtest86+x64\.bin' "$grub_cfg" || { - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 1 } grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" || { - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 1 } grep -q '/boot/memtest86+x64\.bin' "$isolinux_cfg" || { - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 1 } - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 0 } @@ -335,56 +384,65 @@ validate_iso_memtest() { } require_iso_reader "$iso_path" || return 0 - iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || { - memtest_fail "memtest BIOS binary missing in ISO: boot/memtest86+x64.bin" "$iso_path" + iso_files="$(mktemp)" + iso_read_file_list "$iso_path" "$iso_files" || { + memtest_fail "failed to list ISO contents while validating memtest" "$iso_path" + rm -f "$iso_files" return 0 } - iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || { + + grep -q '^boot/memtest86+x64\.bin$' "$iso_files" || { + memtest_fail "memtest BIOS binary missing in ISO: boot/memtest86+x64.bin" "$iso_path" + rm -f "$iso_files" + return 0 + } + grep -q '^boot/memtest86+x64\.efi$' "$iso_files" || { memtest_fail "memtest EFI binary missing in ISO: boot/memtest86+x64.efi" "$iso_path" + rm -f "$iso_files" return 0 } grub_cfg="$(mktemp)" isolinux_cfg="$(mktemp)" - iso_extract_file "$iso_path" boot/grub/grub.cfg > "$grub_cfg" || { - memtest_fail "failed to extract boot/grub/grub.cfg from ISO" "$iso_path" - rm -f "$grub_cfg" "$isolinux_cfg" + iso_read_member "$iso_path" boot/grub/grub.cfg "$grub_cfg" || { + memtest_fail "failed to read boot/grub/grub.cfg from ISO" "$iso_path" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 0 } - iso_extract_file "$iso_path" isolinux/live.cfg > "$isolinux_cfg" || { - memtest_fail "failed to extract isolinux/live.cfg from ISO" "$iso_path" - rm -f "$grub_cfg" "$isolinux_cfg" + iso_read_member "$iso_path" isolinux/live.cfg "$isolinux_cfg" || { + memtest_fail "failed to read isolinux/live.cfg from ISO" "$iso_path" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 0 } grep -q 'Memory Test (memtest86+)' "$grub_cfg" || { memtest_fail "GRUB menu entry for memtest is missing" "$iso_path" - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 0 } grep -q '/boot/memtest86+x64\.efi' "$grub_cfg" || { memtest_fail "GRUB memtest EFI path is missing" "$iso_path" - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 0 } grep -q '/boot/memtest86+x64\.bin' "$grub_cfg" || { memtest_fail "GRUB memtest BIOS path is missing" "$iso_path" - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 0 } grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" || { memtest_fail "isolinux menu entry for memtest is missing" "$iso_path" - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 0 } grep -q '/boot/memtest86+x64\.bin' "$isolinux_cfg" || { memtest_fail "isolinux memtest path is missing" "$iso_path" - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" return 0 } - rm -f "$grub_cfg" "$isolinux_cfg" + rm -f "$iso_files" "$grub_cfg" "$isolinux_cfg" echo "=== memtest validation OK ===" } @@ -1078,9 +1136,13 @@ fi ISO_RAW="${LB_DIR}/live-image-amd64.hybrid.iso" if [ -f "$ISO_RAW" ]; then dump_memtest_debug "post-build" "${LB_DIR}" "$ISO_RAW" - if ! iso_memtest_present "$ISO_RAW"; then + iso_memtest_present "$ISO_RAW" + memtest_status=$? + if [ "$memtest_status" -eq 1 ]; then recover_iso_memtest "${LB_DIR}" "$ISO_RAW" dump_memtest_debug "post-recovery" "${LB_DIR}" "$ISO_RAW" + elif [ "$memtest_status" -eq 2 ]; then + memtest_fail "failed to inspect ISO for memtest before recovery" "$ISO_RAW" fi validate_iso_memtest "$ISO_RAW" cp "$ISO_RAW" "$ISO_OUT"