From 7bbeab7eb2fdfbfd0e0b8abf890b97706562cab6 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 13 May 2026 17:37:50 -0500 Subject: [PATCH 1/3] improve: sharper Verrou dd_line attribution via source filtering and function scoping Three changes to reduce spurious dd_line reports pointing at do-loop boundaries, #:endfor lines, or inlined-function call sites: 1. _CONTROL_FLOW_RE + _is_arithmetic_loc: post-filter dd_line results by reading the actual source line; skip pure control-flow delimiters (end do, end if, #:endfor, $:END_GPU_PARALLEL_LOOP, etc.) that share DWARF info with the preceding arithmetic due to template expansion or inlining. 2. restrict_syms in _write_dd_run_sh / _run_dd_line: when dd_sym has already identified responsible functions, dd_line_run.sh embeds --source-function= flags so Verrou only perturbs FP ops inside those functions, narrowing the bisection space. 3. FFLAGS=-fno-inline in fp-stability.yml: prevents gfortran from inlining small helpers into their callers, so DWARF correctly attributes arithmetic to the callee source line rather than the caller loop header. --- .github/workflows/fp-stability.yml | 6 ++ toolchain/mfc/fp_stability.py | 124 +++++++++++++++++++++++++---- 2 files changed, 116 insertions(+), 14 deletions(-) diff --git a/.github/workflows/fp-stability.yml b/.github/workflows/fp-stability.yml index 7e86d7dc02..8a977cfcb3 100644 --- a/.github/workflows/fp-stability.yml +++ b/.github/workflows/fp-stability.yml @@ -101,6 +101,12 @@ jobs: run: ~/.local/verrou/bin/valgrind --version - name: Build MFC (debug, serial) + # FFLAGS=-fno-inline prevents gfortran from inlining small functions into + # their callers. Without it, DWARF debug info attributes inlined ops to + # the caller's line (often a do-loop header), making Verrou dd_line point + # to loop boundaries instead of the actual arithmetic. + env: + FFLAGS: "-fno-inline" run: ./mfc.sh build -t pre_process simulation --no-mpi --debug -j"$(nproc)" - name: Run FP Stability Suite diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 4095fb32d8..c41aa2b6a6 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -79,6 +79,55 @@ # Matches the first "at" frame in a Valgrind stack trace: "(file.fpp:LINE)". _VGFRAME_RE = re.compile(r"\(([^):]+\.(?:fpp|f90|F90|c|cpp))\s*:(\d+)\)") +# Lines that are clearly control-flow delimiters rather than arithmetic. +# dd_line sometimes reports these when the responsible arithmetic is on the +# preceding line but shares DWARF debug info with the delimiter (e.g. loop +# boundaries in #:for-expanded code, or inlined functions at call sites). +_CONTROL_FLOW_RE = re.compile( + r"^\s*(" + r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b" + r"|do\s+\w+\s*=\s*[\w,\s]+" # naked do-loop header (no arithmetic) + r"|\$:END_GPU\w+" # fypp GPU macro closers + r"|#:end\w*" # fypp directive closers (#:endfor, #:enddef, etc.) + r"|\s*!\s*$" # comment-only lines + r"|\s*$" # blank lines + r")", + re.IGNORECASE, +) + + +def _read_source_line(fname: str, lineno: int) -> str: + """Return the raw source line at lineno (1-based), or '' if unavailable.""" + if os.path.isabs(fname) and os.path.isfile(fname): + candidates = [fname] + else: + candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True) + if not candidates: + return "" + try: + with open(candidates[0]) as fh: + lines = fh.readlines() + return lines[lineno - 1] if 0 < lineno <= len(lines) else "" + except OSError: + return "" + + +def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool: + """Return True if any line in [start, end] contains non-trivial arithmetic. + + Filters out loop delimiters and fypp directive lines that dd_line sometimes + reports when the responsible arithmetic shares DWARF info with its enclosing + control-flow boundary (inlining, #:for template expansion, etc.). + Returns True (keep) when uncertain so we never silently drop real hotspots. + """ + for lineno in range(start, end + 1): + line = _read_source_line(fname, lineno) + if not line: + return True # can't read — keep to be safe + if not _CONTROL_FLOW_RE.match(line): + return True + return False + def _get_source_context(fname: str, lineno: int, context: int = 2) -> str: """Return a annotated source snippet around lineno, or '' if file not found. @@ -649,7 +698,7 @@ def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, r return results -def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str): +def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str, restrict_syms: list = None): """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line. verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via @@ -659,7 +708,15 @@ def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str): environment — we honour that so the reference is a stable nearest-rounding baseline to compare against. CLI --rounding-mode would override the env var and break the reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead. + + restrict_syms: when provided (a list of function names from dd_sym), the script + adds --source-function flags so Verrou only perturbs FP ops inside those functions. + This narrows dd_line's search and avoids spurious template-expansion attributions. """ + sym_flags = "" + if restrict_syms: + sym_flags = " ".join(f"--source-function={s!r}" for s in restrict_syms) + content = textwrap.dedent(f"""\ #!/usr/bin/env bash # Generated by mfc.sh fp-stability — do not edit by hand. @@ -684,7 +741,7 @@ def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str): ROUND="${{VERROU_ROUNDING_MODE:-float}}" cd "$TMPDIR_RUN" - "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" "$SIM_BIN" + "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" {sym_flags} "$SIM_BIN" rc=$? [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/" @@ -741,10 +798,17 @@ def _dd_env(verrou_bin: str) -> dict: def _parse_rddmin_locs(summary_path: str) -> list: - """Extract [(rel_path, start_line, end_line)] from a dd_line rddmin_summary.""" + """Extract [(rel_path, start_line, end_line)] from a dd_line rddmin_summary. + + Filters out locations whose source lines are pure control-flow delimiters + (loop boundaries, fypp directive closers, blank/comment lines). These can + appear when the responsible arithmetic shares DWARF debug info with an + enclosing boundary due to inlining or #:for template expansion. + """ if not os.path.isfile(summary_path): return [] locs = [] + skipped = [] with open(summary_path) as fh: for line in fh: m = _LOC_RE.search(line) @@ -759,7 +823,14 @@ def _parse_rddmin_locs(summary_path: str) -> list: rel = path except ValueError: rel = path - locs.append((rel.replace("\\", "/"), start, end)) + rel = rel.replace("\\", "/") + if _is_arithmetic_loc(path, start, end): + locs.append((rel, start, end)) + else: + skipped.append((rel, start, end)) + for rel, start, end in skipped: + loc = f"{rel}:{start}" if start == end else f"{rel}:{start}-{end}" + cons.print(f" [dim]dd_line: skipped control-flow boundary {loc}[/dim]") return locs @@ -821,8 +892,22 @@ def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_di return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary")) -def _run_dd_line(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_dir: str, threshold: float = None) -> list: - """Run verrou_dd_line; return list of (rel_path, start_line, end_line) tuples.""" +def _run_dd_line( + case: dict, + verrou_bin: str, + sim_bin: str, + work_dir: str, + log_dir: str, + threshold: float = None, + restrict_syms: list = None, +) -> list: + """Run verrou_dd_line; return list of (rel_path, start_line, end_line) tuples. + + restrict_syms: function names from dd_sym. When provided, dd_run.sh passes + --source-function= flags to Verrou so perturbation is restricted to those + functions. This narrows the bisection space and avoids spurious attributions + to template-shared or inlined code outside the responsible functions. + """ dd_bin = _find_dd_line(verrou_bin) if not dd_bin: cons.print(" [dim]verrou_dd_line not found; skipping line-level debug[/dim]") @@ -830,15 +915,14 @@ def _run_dd_line(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_d dd_dir = os.path.join(log_dir, case["name"]) os.makedirs(dd_dir, exist_ok=True) - dd_run_sh = os.path.join(dd_dir, "dd_run.sh") + # Always write a fresh dd_run.sh for dd_line so --source-function flags are included. + dd_run_sh = os.path.join(dd_dir, "dd_line_run.sh") dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") effective_threshold = threshold if threshold is not None else case["threshold"] - if not os.path.isfile(dd_run_sh): - _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) - _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold) - else: - # dd_sym already wrote dd_cmp.py with its threshold; rewrite with ours if different - _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold) + _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir, restrict_syms=restrict_syms) + _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold) + if restrict_syms: + cons.print(f" [dim]dd_line restricted to {len(restrict_syms)} dd_sym function(s)[/dim]") _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line") return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary")) @@ -954,7 +1038,19 @@ def _run_case( cons.print(f" [bold yellow]dd_sym error[/bold yellow]: {exc}") if dd_threshold > 0 and run_dd_line: try: - result["dd_line_locs"] = _run_dd_line(case, verrou_bin, sim_bin, work_dir, log_dir, threshold=dd_threshold) + # Seed dd_line with dd_sym results so perturbation is restricted + # to the responsible functions, avoiding spurious attributions from + # template-expansion or inlining outside those functions. + restrict = result["dd_sym_syms"] if result["dd_sym_syms"] else None + result["dd_line_locs"] = _run_dd_line( + case, + verrou_bin, + sim_bin, + work_dir, + log_dir, + threshold=dd_threshold, + restrict_syms=restrict, + ) except Exception as exc: cons.print(f" [bold yellow]dd_line error[/bold yellow]: {exc}") From ef7b21c481e5e88f4c4446e94d5e4947df124416 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 13 May 2026 18:16:28 -0500 Subject: [PATCH 2/3] fp_stability: fix dd_run.sh VERROU_EXCLUDE/VERROU_SOURCE forwarding dd_run.sh never forwarded the VERROU_EXCLUDE and VERROU_SOURCE env vars injected by verrou_dd_sym and verrou_dd_line respectively. Without these, dd_sym perturbs all functions uniformly (over-inclusive results) and dd_line cannot narrow to individual source lines. Also fix _parse_rddmin_syms: it was returning every non-empty line including the 'ddmin0: Fail Ratio...' metadata lines. These got embedded literally as --source-function flags (not a valid Verrou flag), crashing the dd_line reference run and producing an empty rddmin_summary. Remove the broken restrict_syms / --source-function approach entirely. Verrou's own bisection protocol handles scope restriction via env vars. Also exclude fp-stability-logs/ from typos and gitignore since Verrou generates files containing 'unamed_filename_verrou' (its internal token). --- .gitignore | 2 ++ .typos.toml | 2 +- toolchain/mfc/fp_stability.py | 62 ++++++++++++++++++----------------- 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index aba54411e1..23c66fe4c1 100644 --- a/.gitignore +++ b/.gitignore @@ -114,3 +114,5 @@ cce_*/ cce_*.log run_cce_*.sh .ffmt_cache/ +# FP-stability log artifacts (generated by ./mfc.sh fp-stability) +fp-stability-logs/ diff --git a/.typos.toml b/.typos.toml index 595090c271..6772f4d204 100644 --- a/.typos.toml +++ b/.typos.toml @@ -31,4 +31,4 @@ tru = "tru" # typo for "true" in "when_tru" - tests dependency keys PNGs = "PNGs" [files] -extend-exclude = ["docs/documentation/references*", "docs/references.bib", "tests/", "toolchain/cce_simulation_workgroup_256.sh", "build-docs/", "build/", "build_test/"] +extend-exclude = ["docs/documentation/references*", "docs/references.bib", "tests/", "toolchain/cce_simulation_workgroup_256.sh", "build-docs/", "build/", "build_test/", "fp-stability-logs/"] diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index c41aa2b6a6..7657d42372 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -698,7 +698,7 @@ def _run_vprec_sweep(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, r return results -def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str, restrict_syms: list = None): +def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str): """Generate dd_run.sh for verrou_dd_sym / verrou_dd_line. verrou_dd_* calls: dd_run.sh RUNDIR and injects function/line exclusion via @@ -708,15 +708,7 @@ def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str, rest environment — we honour that so the reference is a stable nearest-rounding baseline to compare against. CLI --rounding-mode would override the env var and break the reference, so we pass the mode via ${VERROU_ROUNDING_MODE:-float} instead. - - restrict_syms: when provided (a list of function names from dd_sym), the script - adds --source-function flags so Verrou only perturbs FP ops inside those functions. - This narrows dd_line's search and avoids spurious template-expansion attributions. """ - sym_flags = "" - if restrict_syms: - sym_flags = " ".join(f"--source-function={s!r}" for s in restrict_syms) - content = textwrap.dedent(f"""\ #!/usr/bin/env bash # Generated by mfc.sh fp-stability — do not edit by hand. @@ -740,8 +732,15 @@ def _write_dd_run_sh(path: str, verrou_bin: str, sim_bin: str, ic_dir: str, rest # test steps while letting the reference use nearest-rounding. ROUND="${{VERROU_ROUNDING_MODE:-float}}" + # verrou_dd_sym injects VERROU_EXCLUDE (symbols to exclude from perturbation). + # verrou_dd_line injects VERROU_SOURCE (source lines to restrict perturbation to). + # Forward them as valgrind flags when set. + EXTRA="" + [ -n "${{VERROU_EXCLUDE:-}}" ] && EXTRA="$EXTRA --exclude=$VERROU_EXCLUDE" + [ -n "${{VERROU_SOURCE:-}}" ] && EXTRA="$EXTRA --source=$VERROU_SOURCE" + cd "$TMPDIR_RUN" - "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" {sym_flags} "$SIM_BIN" + "$VERROU_BIN" --tool=verrou --error-limit=no --rounding-mode="$ROUND" $EXTRA "$SIM_BIN" rc=$? [ -d "$TMPDIR_RUN/D" ] && cp -a "$TMPDIR_RUN/D/." "$RUNDIR/" @@ -835,11 +834,29 @@ def _parse_rddmin_locs(summary_path: str) -> list: def _parse_rddmin_syms(summary_path: str) -> list: - """Extract symbol/function names from a dd_sym rddmin_summary.""" + """Extract symbol/function names from a dd_sym rddmin_summary. + + rddmin_summary format: + ddmin0:\\tFail Ratio: ...\\tFail indexes: ... + \\t\\t + ddmin1:\\t... + \\t\\t + + Lines starting with 'ddmin' are metadata; function names are on the + indented (tab-prefixed) lines as the first tab-delimited field. + """ if not os.path.isfile(summary_path): return [] + syms = [] with open(summary_path) as fh: - return [ln.strip() for ln in fh if ln.strip()] + for ln in fh: + stripped = ln.strip() + if not stripped or stripped.startswith("ddmin"): + continue + sym = stripped.split("\t")[0].strip() + if sym: + syms.append(sym) + return syms def _run_dd_tool( @@ -899,15 +916,8 @@ def _run_dd_line( work_dir: str, log_dir: str, threshold: float = None, - restrict_syms: list = None, ) -> list: - """Run verrou_dd_line; return list of (rel_path, start_line, end_line) tuples. - - restrict_syms: function names from dd_sym. When provided, dd_run.sh passes - --source-function= flags to Verrou so perturbation is restricted to those - functions. This narrows the bisection space and avoids spurious attributions - to template-shared or inlined code outside the responsible functions. - """ + """Run verrou_dd_line; return list of (rel_path, start_line, end_line) tuples.""" dd_bin = _find_dd_line(verrou_bin) if not dd_bin: cons.print(" [dim]verrou_dd_line not found; skipping line-level debug[/dim]") @@ -915,14 +925,11 @@ def _run_dd_line( dd_dir = os.path.join(log_dir, case["name"]) os.makedirs(dd_dir, exist_ok=True) - # Always write a fresh dd_run.sh for dd_line so --source-function flags are included. - dd_run_sh = os.path.join(dd_dir, "dd_line_run.sh") + dd_run_sh = os.path.join(dd_dir, "dd_run.sh") dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py") effective_threshold = threshold if threshold is not None else case["threshold"] - _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir, restrict_syms=restrict_syms) + _write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir) _write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold) - if restrict_syms: - cons.print(f" [dim]dd_line restricted to {len(restrict_syms)} dd_sym function(s)[/dim]") _run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line") return _parse_rddmin_locs(os.path.join(dd_dir, "dd.line", "rddmin_summary")) @@ -1038,10 +1045,6 @@ def _run_case( cons.print(f" [bold yellow]dd_sym error[/bold yellow]: {exc}") if dd_threshold > 0 and run_dd_line: try: - # Seed dd_line with dd_sym results so perturbation is restricted - # to the responsible functions, avoiding spurious attributions from - # template-expansion or inlining outside those functions. - restrict = result["dd_sym_syms"] if result["dd_sym_syms"] else None result["dd_line_locs"] = _run_dd_line( case, verrou_bin, @@ -1049,7 +1052,6 @@ def _run_case( work_dir, log_dir, threshold=dd_threshold, - restrict_syms=restrict, ) except Exception as exc: cons.print(f" [bold yellow]dd_line error[/bold yellow]: {exc}") From bb15d3ab8f6acc0942220a8149e6d2939710abae Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 13 May 2026 19:00:28 -0500 Subject: [PATCH 3/3] fp_stability: extend control-flow filter to cancellation locs and more patterns The _CONTROL_FLOW_RE filter was only applied to dd_line results; the cancellation check was passing its locs through unfiltered, producing subroutine declarations, else-if branches, and loop headers in the step summary alongside real arithmetic sites. Changes: - Apply _is_arithmetic_loc filter to cancellation_locs in _run_cancellation_check; log how many control-flow boundaries skipped. - Extend _CONTROL_FLOW_RE to also catch: - else / else if (...) then branches - subroutine declarations (recursive/pure/elemental variants) --- toolchain/mfc/fp_stability.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/toolchain/mfc/fp_stability.py b/toolchain/mfc/fp_stability.py index 7657d42372..dd848f046c 100644 --- a/toolchain/mfc/fp_stability.py +++ b/toolchain/mfc/fp_stability.py @@ -87,6 +87,8 @@ r"^\s*(" r"end\s+(do|if|select|where|forall|subroutine|function|module|program|block)\b" r"|do\s+\w+\s*=\s*[\w,\s]+" # naked do-loop header (no arithmetic) + r"|else(\s+if\s*\(.*\)\s*then)?\s*$" # else / else if (...) then + r"|(recursive\s+|pure\s+|elemental\s+)*subroutine\s+\w+" # subroutine declaration r"|\$:END_GPU\w+" # fypp GPU macro closers r"|#:end\w*" # fypp directive closers (#:endfor, #:enddef, etc.) r"|\s*!\s*$" # comment-only lines @@ -621,7 +623,12 @@ def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: _run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="nearest", extra_flags=flags) except MFCException: pass - return _parse_cancel_gen(gen_path) + raw = _parse_cancel_gen(gen_path) + filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)] + skipped = len(raw) - len(filtered) + if skipped: + cons.print(f" [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]") + return filtered def _run_mca_samples(