diff --git a/.Rbuildignore b/.Rbuildignore
index d01bf375..8f6f943d 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -33,8 +33,10 @@
 ^paper$
 ^LICENSE$
 
-# ---- Legacy test fixtures (GitHub CI only, not shipped to CRAN) ----
-^tests/testthat/fixtures$
+# ---- Legacy test fixtures: only the `legacy` subdirectory is excluded
+#      so it does not bloat the tarball; live fixtures used by tests in
+#      R CMD check must ship so the matching tests can run.
+^tests/testthat/fixtures/legacy$
 
 # ---- C/C++ build artifacts (REQUIRED) ----
 ^src/.*\.o$
diff --git a/NAMESPACE b/NAMESPACE
index 2404bd92..2d5f31e1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -49,6 +49,7 @@ export(beta_bernoulli_prior)
 export(beta_prime_prior)
 export(bgm)
 export(bgmCompare)
+export(bgms_posterior_mean)
 export(cauchy_prior)
 export(exponential_prior)
 export(extract_arguments)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 4823d2c1..708a166a 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -29,6 +29,78 @@ sample_ggm_prior_cpp <- function(p, n_samples, n_warmup = 1000L, pairwise_scale
     .Call(`_bgms_sample_ggm_prior`, p, n_samples, n_warmup, pairwise_scale, interaction_prior_type, scale_prior_type, gamma_shape, gamma_rate, step_size, max_depth, seed, verbose, edge_indicators_nullable, delta)
 }
 
+log_Z_NLO_gamma_cpp <- function(G, alpha, beta, sigma, include_F = FALSE, delta = 0.0) {
+    .Call(`_bgms_log_Z_NLO_gamma_cpp`, G, alpha, beta, sigma, include_F, delta)
+}
+
+log_Z_manuscript_NLO_alpha1_cpp <- function(G, beta, sigma, delta) {
+    .Call(`_bgms_log_Z_manuscript_NLO_alpha1_cpp`, G, beta, sigma, delta)
+}
+
+log_Z_manuscript_NLO_alpha1_degord_cpp <- function(G, i, j, beta, sigma, delta) {
+    .Call(`_bgms_log_Z_manuscript_NLO_alpha1_degord_cpp`, G, i, j, beta, sigma, delta)
+}
+
+log_Z_NLO_gamma_degord_cpp <- function(G, i, j, alpha, beta, sigma, include_F = FALSE, delta = 0.0) {
+    .Call(`_bgms_log_Z_NLO_gamma_degord_cpp`, G, i, j, alpha, beta, sigma, include_F, delta)
+}
+
+log_Z_NLO_gamma_delta_incr_alpha1_cpp <- function(G_before, i, j, beta, sigma, delta, include_F = FALSE) {
+    .Call(`_bgms_log_Z_NLO_gamma_delta_incr_alpha1_cpp`, G_before, i, j, beta, sigma, delta, include_F)
+}
+
+log_Z_NLO_gamma_delta_incr_alphaN_cpp <- function(G_before, i, j, alpha, beta, sigma, delta, include_F = FALSE) {
+    .Call(`_bgms_log_Z_NLO_gamma_delta_incr_alphaN_cpp`, G_before, i, j, alpha, beta, sigma, delta, include_F)
+}
+
+degord_chain_aux_cpp <- function(q, alpha, beta, sigma, delta) {
+    .Call(`_bgms_degord_chain_aux_cpp`, q, alpha, beta, sigma, delta)
+}
+
+degord_pi_aux_cpp <- function(G_pi, alpha, beta, sigma, delta) {
+    .Call(`_bgms_degord_pi_aux_cpp`, G_pi, alpha, beta, sigma, delta)
+}
+
+degord_permute_graph_cpp <- function(G, i, j) {
+    .Call(`_bgms_degord_permute_graph_cpp`, G, i, j)
+}
+
+degord_log_Zhat_pi_from_pool_cpp <- function(noise_pool_t, G_pi, alpha, beta, sigma, delta, slab_tilt_mode = 0L) {
+    .Call(`_bgms_degord_log_Zhat_pi_from_pool_cpp`, noise_pool_t, G_pi, alpha, beta, sigma, delta, slab_tilt_mode)
+}
+
+degord_delta_log_Zhat_pi_toggle_cpp <- function(noise_pool, noise_pool_t, G_curr, i, j, alpha, beta, sigma, delta, slab_tilt_mode = 0L) {
+    .Call(`_bgms_degord_delta_log_Zhat_pi_toggle_cpp`, noise_pool, noise_pool_t, G_curr, i, j, alpha, beta, sigma, delta, slab_tilt_mode)
+}
+
+degord_draw_bartlett_pool_cpp <- function(q, M_inner, seed) {
+    .Call(`_bgms_degord_draw_bartlett_pool_cpp`, q, M_inner, seed)
+}
+
+degord_V_at_Gamma_pi_cpp <- function(K_depth, pools_t, G_pi, alpha, beta, sigma, delta, c_val, rho, slab_tilt_mode = 0L) {
+    .Call(`_bgms_degord_V_at_Gamma_pi_cpp`, K_depth, pools_t, G_pi, alpha, beta, sigma, delta, c_val, rho, slab_tilt_mode)
+}
+
+degord_V_log_at_Gamma_pi_cpp <- function(K_depth, pools_t, G_pi, alpha, beta, sigma, delta, log_c, rho, slab_tilt_mode = 0L) {
+    .Call(`_bgms_degord_V_log_at_Gamma_pi_cpp`, K_depth, pools_t, G_pi, alpha, beta, sigma, delta, log_c, rho, slab_tilt_mode)
+}
+
+degord_V_log_pair_at_Gamma_curr_star_cpp <- function(K_depth, pools_t, G_pi_curr, G_pi_star, alpha, beta, sigma, delta, log_c_curr, log_c_star, rho, slab_tilt_mode = 0L) {
+    .Call(`_bgms_degord_V_log_pair_at_Gamma_curr_star_cpp`, K_depth, pools_t, G_pi_curr, G_pi_star, alpha, beta, sigma, delta, log_c_curr, log_c_star, rho, slab_tilt_mode)
+}
+
+degord_log_Zhat_star_from_cache_cpp <- function(noise_pool_t, G_pi_curr, G_pi_star, alpha, beta, sigma, delta, slab_tilt_mode = 0L) {
+    .Call(`_bgms_degord_log_Zhat_star_from_cache_cpp`, noise_pool_t, G_pi_curr, G_pi_star, alpha, beta, sigma, delta, slab_tilt_mode)
+}
+
+degord_draw_U_rr_cpp <- function(M_inner, q, rho, seed) {
+    .Call(`_bgms_degord_draw_U_rr_cpp`, M_inner, q, rho, seed)
+}
+
+ggm_hierarchical_smoke_cpp <- function(observations, inclusion_prob, interaction_scale, diagonal_shape, diagonal_rate, delta, M_inner, kappa, rho, n_sweeps, seed, use_manuscript_nlo = FALSE) {
+    .Call(`_bgms_ggm_hierarchical_smoke_cpp`, observations, inclusion_prob, interaction_scale, diagonal_shape, diagonal_rate, delta, M_inner, kappa, rho, n_sweeps, seed, use_manuscript_nlo)
+}
+
 .compute_ess_cpp <- function(array3d) {
     .Call(`_bgms_compute_ess_cpp`, array3d)
 }
@@ -113,8 +185,8 @@ ggm_test_logp_and_gradient_prior <- function(theta, suf_stat, n, edge_indicators
     .Call(`_bgms_ggm_test_logp_and_gradient_prior`, theta, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate)
 }
 
-sample_ggm <- function(inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, sampler_type, seed, no_threads, progress_type, progress_callback = NULL, edge_prior = "Bernoulli", beta_bernoulli_alpha = 1.0, beta_bernoulli_beta = 1.0, beta_bernoulli_alpha_between = 1.0, beta_bernoulli_beta_between = 1.0, dirichlet_alpha = 1.0, lambda = 1.0, target_acceptance = 0.8, max_tree_depth = 10L, na_impute = FALSE, missing_index_nullable = NULL, delta = 0.0) {
-    .Call(`_bgms_sample_ggm`, inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, sampler_type, seed, no_threads, progress_type, progress_callback, edge_prior, beta_bernoulli_alpha, beta_bernoulli_beta, beta_bernoulli_alpha_between, beta_bernoulli_beta_between, dirichlet_alpha, lambda, target_acceptance, max_tree_depth, na_impute, missing_index_nullable, delta)
+sample_ggm <- function(inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, sampler_type, seed, no_threads, progress_type, progress_callback = NULL, edge_prior = "Bernoulli", beta_bernoulli_alpha = 1.0, beta_bernoulli_beta = 1.0, beta_bernoulli_alpha_between = 1.0, beta_bernoulli_beta_between = 1.0, dirichlet_alpha = 1.0, lambda = 1.0, target_acceptance = 0.8, max_tree_depth = 10L, na_impute = FALSE, missing_index_nullable = NULL, delta = 0.0, graph_prior_spec = "joint", z_ratio_M_inner = 100L, z_ratio_kappa = 1.0, z_ratio_rho = 0.5, use_manuscript_nlo = FALSE, mh_U = FALSE) {
+    .Call(`_bgms_sample_ggm`, inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, sampler_type, seed, no_threads, progress_type, progress_callback, edge_prior, beta_bernoulli_alpha, beta_bernoulli_beta, beta_bernoulli_alpha_between, beta_bernoulli_beta_between, dirichlet_alpha, lambda, target_acceptance, max_tree_depth, na_impute, missing_index_nullable, delta, graph_prior_spec, z_ratio_M_inner, z_ratio_kappa, z_ratio_rho, use_manuscript_nlo, mh_U)
 }
 
 sample_mixed_mrf <- function(inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, seed, no_threads, progress_type, progress_callback = NULL, edge_prior = "Bernoulli", beta_bernoulli_alpha = 1.0, beta_bernoulli_beta = 1.0, beta_bernoulli_alpha_between = 1.0, beta_bernoulli_beta_between = 1.0, dirichlet_alpha = 1.0, lambda = 1.0, sampler_type = "mh", target_acceptance = 0.80, max_tree_depth = 10L, na_impute = FALSE, missing_index_discrete_nullable = NULL, missing_index_continuous_nullable = NULL, delta = 0.0) {
diff --git a/R/bgm.R b/R/bgm.R
index 14ecd6bb..a4003b15 100644
--- a/R/bgm.R
+++ b/R/bgm.R
@@ -111,6 +111,30 @@
 #'   apply the tilt. Not allowed for pure ordinal models (no precision
 #'   matrix to tilt).
 #'
+#' @param graph_prior_spec Character; one of \code{"joint"} (default)
+#'   or \code{"hierarchical"}. Controls the marginal prior on the graph
+#'   indicators \eqn{\Gamma}. Under \code{"joint"} the implicit
+#'   \eqn{\Gamma}-marginal is \eqn{\pi(\Gamma) \cdot Z(\Gamma)}, where
+#'   \eqn{Z(\Gamma)} is the normalising constant of the precision-matrix
+#'   prior conditional on the graph. Under \code{"hierarchical"} the
+#'   chain compensates with an unbiased estimator of
+#'   \eqn{Z(\Gamma_\text{curr}) / Z(\Gamma_\text{star})}, recovering
+#'   \eqn{\pi(\Gamma)} as the \eqn{\Gamma}-marginal. Only supported when
+#'   the interaction prior is \code{normal_prior(...)} and the precision-
+#'   scale prior is \code{gamma_prior(...)} (the prior families for which
+#'   the closed-form Laplace-NLO normaliser approximation is implemented).
+#'   Default: \code{"joint"}.
+#'
+#' @param z_ratio_tuning Named list with components \code{M_inner}
+#'   (positive integer, default 100), \code{kappa} (positive numeric,
+#'   default 1.0), and \code{rho} (numeric in (0, 1), default 0.5).
+#'   Tuning knobs for the V/Russian-Roulette estimator used when
+#'   \code{graph_prior_spec = "hierarchical"}; ignored otherwise.
+#'   \code{M_inner} is the number of inner Bartlett-Cholesky importance
+#'   samples per Russian-Roulette pool, \code{kappa} sets the analytic
+#'   centring \eqn{c = \kappa \exp(\log Z_\text{NLO}(\Gamma))}, and
+#'   \code{rho} is the geometric-truncation continuation probability.
+#'
 #' @param pairwise_scale `r lifecycle::badge("deprecated")` Double.
 #'   Scale of the Cauchy prior for pairwise
 #'   interaction parameters. Use \code{interaction_prior} instead.
@@ -343,6 +367,8 @@ bgm = function(
   means_prior = normal_prior(scale = 1),
   precision_scale_prior = gamma_prior(shape = 1, rate = 1),
   delta = NULL,
+  graph_prior_spec = c("joint", "hierarchical"),
+  z_ratio_tuning = list(M_inner = 100L, kappa = 1.0, rho = 0.5),
   edge_selection = TRUE,
   edge_prior = bernoulli_prior(0.5),
   na_action = c("listwise", "impute"),
@@ -511,6 +537,8 @@ bgm = function(
     scale_shape = sp$scale_shape,
     scale_rate = sp$scale_rate,
     delta = delta,
+    graph_prior_spec = graph_prior_spec,
+    z_ratio_tuning = z_ratio_tuning,
     standardize = standardize,
     edge_selection = edge_selection,
     edge_prior = edge_prior,
diff --git a/R/bgm_spec.R b/R/bgm_spec.R
index dee204d4..a43f44cc 100644
--- a/R/bgm_spec.R
+++ b/R/bgm_spec.R
@@ -266,6 +266,10 @@ bgm_spec = function(x,
                     scale_shape = 1,
                     scale_rate = 1,
                     delta = NULL,
+                    graph_prior_spec = c("joint", "hierarchical"),
+                    z_ratio_tuning = list(M_inner = 100L,
+                                          kappa   = 1.0,
+                                          rho     = 0.5),
                     standardize = FALSE,
                     edge_selection = TRUE,
                     edge_prior = bernoulli_prior(0.5),
@@ -364,6 +368,65 @@ bgm_spec = function(x,
      !is.finite(delta) || delta < 0) {
     stop("'delta' must be a single finite non-negative numeric, or NULL.")
   }
+  # Validate hierarchical-spec args (only meaningful for ggm/mixed_mrf).
+  graph_prior_spec = if(is.character(graph_prior_spec) &&
+                        length(graph_prior_spec) > 1L) {
+    match.arg(graph_prior_spec)
+  } else {
+    if(!(length(graph_prior_spec) == 1L &&
+         is.character(graph_prior_spec) &&
+         graph_prior_spec %in% c("joint", "hierarchical"))) {
+      stop("'graph_prior_spec' must be \"joint\" or \"hierarchical\".")
+    }
+    graph_prior_spec
+  }
+  if(graph_prior_spec == "hierarchical" &&
+     !(model_type %in% c("ggm", "mixed_mrf"))) {
+    stop(
+      "'graph_prior_spec = \"hierarchical\"' requires continuous data; ",
+      "the current model_type is '", model_type, "', which has no ",
+      "continuous precision block. Use \"joint\" or supply continuous data."
+    )
+  }
+  if(graph_prior_spec == "hierarchical" &&
+     interaction_prior_type != "normal") {
+    stop(
+      "'graph_prior_spec = \"hierarchical\"' requires a Normal slab ",
+      "prior (interaction_prior_type = \"normal\"). Re-fit with ",
+      "interaction_prior = normal_prior(scale = ...)."
+    )
+  }
+  if(graph_prior_spec == "hierarchical" &&
+     scale_prior_type != "gamma") {
+    stop(
+      "'graph_prior_spec = \"hierarchical\"' requires a Gamma diagonal ",
+      "prior (scale_prior_type = \"gamma\")."
+    )
+  }
+  # Validate z_ratio_tuning shape (only enforced if hierarchical; for joint
+  # the defaults pass through unused).
+  if(!is.list(z_ratio_tuning))
+    stop("'z_ratio_tuning' must be a list with components M_inner, kappa, rho.")
+  zrt_M_inner = z_ratio_tuning$M_inner %||% 100L
+  zrt_kappa   = z_ratio_tuning$kappa   %||% 1.0
+  zrt_rho     = z_ratio_tuning$rho     %||% 0.5
+  zrt_use_manuscript_nlo = isTRUE(z_ratio_tuning$use_manuscript_nlo)
+  zrt_mh_U = isTRUE(z_ratio_tuning$mh_U)
+  if(!is.numeric(zrt_M_inner) || length(zrt_M_inner) != 1L ||
+     !is.finite(zrt_M_inner) || zrt_M_inner < 1L)
+    stop("'z_ratio_tuning$M_inner' must be a positive integer.")
+  if(!is.numeric(zrt_kappa) || length(zrt_kappa) != 1L ||
+     !is.finite(zrt_kappa) || zrt_kappa <= 0)
+    stop("'z_ratio_tuning$kappa' must be a positive number.")
+  if(!is.numeric(zrt_rho) || length(zrt_rho) != 1L ||
+     !is.finite(zrt_rho) || zrt_rho <= 0 || zrt_rho >= 1)
+    stop("'z_ratio_tuning$rho' must be in (0, 1).")
+  z_ratio_tuning = list(M_inner = as.integer(zrt_M_inner),
+                        kappa   = as.numeric(zrt_kappa),
+                        rho     = as.numeric(zrt_rho),
+                        use_manuscript_nlo = zrt_use_manuscript_nlo,
+                        mh_U = zrt_mh_U)
+
   if(delta > 0 && model_type %in% c("omrf", "compare")) {
     stop(
       "'delta' (determinant tilt) requires continuous variables; the ",
@@ -444,6 +507,8 @@ bgm_spec = function(x,
       scale_shape = scale_shape,
       scale_rate = scale_rate,
       delta = delta,
+      graph_prior_spec = graph_prior_spec,
+      z_ratio_tuning = z_ratio_tuning,
       edge_prior_flat = ep_flat
     )
   } else if(model_type == "mixed_mrf") {
@@ -536,6 +601,10 @@ build_spec_ggm = function(x, data_columnnames, num_variables,
                           interaction_alpha, interaction_beta,
                           scale_prior_type, scale_shape, scale_rate,
                           delta = 0,
+                          graph_prior_spec = "joint",
+                          z_ratio_tuning = list(M_inner = 100L,
+                                                kappa   = 1.0,
+                                                rho     = 0.5),
                           edge_prior_flat) {
   # Missing data
   md = validate_missing_data(
@@ -577,6 +646,8 @@ build_spec_ggm = function(x, data_columnnames, num_variables,
       scale_shape = scale_shape,
       scale_rate = scale_rate,
       delta = delta,
+      graph_prior_spec = graph_prior_spec,
+      z_ratio_tuning = z_ratio_tuning,
       edge_selection = ep$edge_selection,
       edge_prior = ep$edge_prior,
       inclusion_probability = ep$inclusion_probability,
diff --git a/R/bgms_posterior_mean.R b/R/bgms_posterior_mean.R
new file mode 100644
index 00000000..650eaab4
--- /dev/null
+++ b/R/bgms_posterior_mean.R
@@ -0,0 +1,125 @@
+#' Sign-corrected posterior means for a bgms fit
+#'
+#' Computes posterior means using Lyne (2015)'s sign-corrected ergodic
+#' estimator. For a functional \eqn{f(K)} of the precision matrix,
+#'
+#' \deqn{E[f(K) \mid Y] \;\approx\; \frac{\sum_i \mathrm{sign}_i \cdot f(K_i)}{\sum_i \mathrm{sign}_i}}
+#'
+#' where \eqn{\mathrm{sign}_i} is the sign of the Russian-Roulette
+#' estimator of \eqn{1/Z(\Gamma)} at iteration \eqn{i}. The correction
+#' is required for the ergodic averages to converge to the true
+#' posterior expectation when running the hierarchical-spec GGM
+#' (\code{graph_prior_spec = "hierarchical"}) under settings that
+#' permit signed V values.
+#'
+#' Under operational tunings (low \eqn{\delta}, reasonably large
+#' \eqn{\kappa}) sign is identically \eqn{+1} and the correction
+#' collapses to the plain posterior mean. Sign correction matters
+#' primarily at high \eqn{\delta} or aggressive \eqn{\kappa}. The
+#' diagnostic vector lives at \code{fit$v_ratio_diagnostics$sign}.
+#'
+#' For joint-spec fits, or for any fit that has no
+#' \code{v_ratio_diagnostics} field, this function returns the
+#' \code{posterior_mean_*} fields unchanged.
+#'
+#' @param fit  A bgm() fit object.
+#' @return A list with components:
+#'   \describe{
+#'     \item{\code{main}}{Sign-corrected posterior mean of main effects.
+#'       \code{NULL} for GGM (which has no main effects).}
+#'     \item{\code{pairwise}}{Sign-corrected posterior mean of pairwise
+#'       associations, as a symmetric \eqn{p \times p} matrix. For GGM
+#'       this is on the association scale \eqn{-K_{ij}/2}.}
+#'     \item{\code{indicator}}{Sign-corrected posterior mean of edge
+#'       indicators \eqn{\gamma_{ij}}, as a symmetric \eqn{p \times p}
+#'       matrix. Present only when the fit used edge selection.}
+#'     \item{\code{residual_variance}}{Sign-corrected posterior mean
+#'       of the residual variance \eqn{1/K_{ii}}. Present only for GGM
+#'       fits.}
+#'   }
+#' @export
+bgms_posterior_mean = function(fit) {
+  diag = fit$v_ratio_diagnostics
+
+  # No sign data → identity fall-through to existing posterior means.
+  # (Joint-spec fits, or hierarchical fits before F2 wiring landed.)
+  if(is.null(diag)) {
+    return(list(
+      main              = fit$posterior_mean_main,
+      pairwise          = fit$posterior_mean_pairwise,
+      indicator         = fit$posterior_mean_indicator,
+      residual_variance = fit$posterior_mean_residual_variance
+    ))
+  }
+
+  raw = fit$raw_samples
+  signs = unlist(diag$sign)
+  sum_signs = sum(signs)
+  if(sum_signs == 0) {
+    stop(
+      "All sign(V) entries sum to zero; the sign-corrected posterior ",
+      "mean is undefined. This usually indicates that the V/RR estimator ",
+      "is operating outside its convergence regime - try refitting with ",
+      "a larger kappa (passed via z_ratio_tuning).",
+      call. = FALSE
+    )
+  }
+
+  # Pool per-chain samples and compute sign-weighted column means in one shot.
+  # Chain order in `signs` must match chain order in `raw$*` (both unlisted
+  # / rbind'd over the same lapply order).
+  weighted_col_means = function(samples_per_chain) {
+    if(is.null(samples_per_chain)) return(NULL)
+    pooled = do.call(rbind, samples_per_chain)
+    colSums(pooled * signs) / sum_signs
+  }
+
+  # Whether this is a GGM fit (the only model class that produces sign data
+  # currently). Heuristic: GGM has residual_variance, OMRF/mixed do not.
+  is_continuous = !is.null(fit$posterior_mean_residual_variance)
+  num_variables = nrow(fit$posterior_mean_pairwise)
+
+  out = list()
+
+  # Pairwise → symmetric matrix in association scale (GGM: precision * -0.5).
+  pair_means = weighted_col_means(raw$pairwise)
+  pairwise_mat = matrix(0, num_variables, num_variables,
+                        dimnames = dimnames(fit$posterior_mean_pairwise))
+  pairwise_mat[lower.tri(pairwise_mat)] = pair_means
+  pairwise_mat = pairwise_mat + t(pairwise_mat)
+  if(is_continuous) pairwise_mat = -0.5 * pairwise_mat
+  out$pairwise = pairwise_mat
+
+  # Main: GGM has no main effects; OMRF/mixed don't (yet) support
+  # hierarchical V-correction. Carry the existing field through for shape
+  # parity with the field of the same name on the fit object. Use
+  # `out["main"] = list(NULL)` so the slot name survives even when the
+  # value is NULL (plain `out$main = NULL` would drop the slot).
+  out["main"] = list(fit$posterior_mean_main)
+
+  # Indicator (when edge selection was active).
+  if(!is.null(raw$indicator)) {
+    ind_means = weighted_col_means(raw$indicator)
+    indicator_mat = matrix(0, num_variables, num_variables,
+                           dimnames = dimnames(fit$posterior_mean_indicator))
+    indicator_mat[lower.tri(indicator_mat)] = ind_means
+    indicator_mat = indicator_mat + t(indicator_mat)
+    out$indicator = indicator_mat
+  } else {
+    out["indicator"] = list(NULL)
+  }
+
+  # Residual variance for GGM: sign-correct on the per-sample functional
+  # 1/K_ii (matches the per-sample-inversion form used by the plain field
+  # to avoid Jensen bias).
+  if(is_continuous && !is.null(raw$main)) {
+    inv_main_per_chain = lapply(raw$main, function(m) 1 / m)
+    rv = weighted_col_means(inv_main_per_chain)
+    names(rv) = names(fit$posterior_mean_residual_variance)
+    out$residual_variance = rv
+  } else {
+    out["residual_variance"] = list(NULL)
+  }
+
+  out
+}
diff --git a/R/bgms_s7.R b/R/bgms_s7.R
index 08d85afb..3e124dc8 100644
--- a/R/bgms_s7.R
+++ b/R/bgms_s7.R
@@ -80,6 +80,7 @@ bgms_class = new_class("bgms",
     # --- Optional ---
     nuts_diag = new_property(class_any, default = NULL),
     am_diag = new_property(class_any, default = NULL),
+    v_ratio_diagnostics = new_property(class_any, default = NULL),
 
     # --- easybgm compatibility (deprecated) ---
     indicator = new_property(class_any, default = NULL),
@@ -119,6 +120,7 @@ s3_list_to_bgms = function(results) {
     posterior_summary_pairwise_allocations = .subset2(results, "posterior_summary_pairwise_allocations"),
     nuts_diag = .subset2(results, "nuts_diag"),
     am_diag = .subset2(results, "am_diag"),
+    v_ratio_diagnostics = .subset2(results, "v_ratio_diagnostics"),
     indicator = .subset2(results, "indicator"),
     interactions = .subset2(results, "interactions"),
     thresholds = .subset2(results, "thresholds"),
@@ -193,6 +195,7 @@ bgmCompare_class = new_class("bgmCompare",
     # --- Optional ---
     nuts_diag = new_property(class_any, default = NULL),
     am_diag = new_property(class_any, default = NULL),
+    v_ratio_diagnostics = new_property(class_any, default = NULL),
 
     # --- Internal ---
     .bgm_spec = new_property(class_any, default = NULL),
diff --git a/R/build_output.R b/R/build_output.R
index 3b4b9fe5..3e240cee 100644
--- a/R/build_output.R
+++ b/R/build_output.R
@@ -309,6 +309,8 @@ build_output_bgm = function(spec, raw) {
       if(!is.null(chain$energy)) res[["energy__"]] = chain$energy
       if(!is.null(chain$accept_prob)) res[["accept_prob__"]] = chain$accept_prob
       if(!is.null(chain$am_accept_prob)) res[["am_accept_prob__"]] = chain$am_accept_prob
+      if(!is.null(chain$v_sign)) res[["v_sign__"]] = chain$v_sign
+      if(!is.null(chain$v_log_abs)) res[["v_log_abs__"]] = chain$v_log_abs
       res
     })
   } else {
@@ -528,6 +530,20 @@ build_output_bgm = function(spec, raw) {
     results$am_diag = summarize_am_diagnostics(raw, names_main = names_main, names_pairwise = edge_names, target_accept = s$target_accept)
   }
 
+  # --- V-ratio diagnostics (hierarchical-spec GGM only) -----------------------
+  # Per-iteration sign(V_curr) and log|V_curr| for Lyne (2015) sign-corrected
+  # ergodic averaging. In the operational regime sign === +1 and the correction
+  # collapses to the plain posterior mean; the diagnostic is exposed for
+  # transparency and as the data source for bgms_posterior_mean() (F3).
+  # `raw` at this point has been transformed by build_raw_samples_list's
+  # lapply (line ~292): per-chain keys use the trailing-`__` convention.
+  if(!is.null(raw[[1L]][["v_sign__"]])) {
+    results$v_ratio_diagnostics = list(
+      sign    = lapply(raw, function(ch) ch[["v_sign__"]]),
+      log_abs = lapply(raw, function(ch) ch[["v_log_abs__"]])
+    )
+  }
+
   results$.bgm_spec = spec
   if(needs_easybgm_s3_compat()) {
     results
diff --git a/R/run_sampler.R b/R/run_sampler.R
index 53af96b0..b1bbeb17 100644
--- a/R/run_sampler.R
+++ b/R/run_sampler.R
@@ -105,7 +105,13 @@ run_sampler_ggm = function(spec) {
     max_tree_depth = s$nuts_max_depth,
     na_impute = m$na_impute,
     missing_index_nullable = m$missing_index,
-    delta = p$delta
+    delta = p$delta,
+    graph_prior_spec = p$graph_prior_spec %||% "joint",
+    z_ratio_M_inner = p$z_ratio_tuning$M_inner %||% 100L,
+    z_ratio_kappa   = p$z_ratio_tuning$kappa   %||% 1.0,
+    z_ratio_rho     = p$z_ratio_tuning$rho     %||% 0.5,
+    use_manuscript_nlo = isTRUE(p$z_ratio_tuning$use_manuscript_nlo),
+    mh_U = isTRUE(p$z_ratio_tuning$mh_U)
   )
 
   out_raw
diff --git a/man/bgm.Rd b/man/bgm.Rd
index 0871c6d7..4246ffd7 100644
--- a/man/bgm.Rd
+++ b/man/bgm.Rd
@@ -15,6 +15,8 @@ bgm(
   means_prior = normal_prior(scale = 1),
   precision_scale_prior = gamma_prior(shape = 1, rate = 1),
   delta = NULL,
+  graph_prior_spec = c("joint", "hierarchical"),
+  z_ratio_tuning = list(M_inner = 100L, kappa = 1, rho = 0.5),
   edge_selection = TRUE,
   edge_prior = bernoulli_prior(0.5),
   na_action = c("listwise", "impute"),
@@ -133,6 +135,30 @@ numeric to override. Both NUTS and adaptive-Metropolis update paths
 apply the tilt. Not allowed for pure ordinal models (no precision
 matrix to tilt).}
 
+\item{graph_prior_spec}{Character; one of \code{"joint"} (default)
+or \code{"hierarchical"}. Controls the marginal prior on the graph
+indicators \eqn{\Gamma}. Under \code{"joint"} the implicit
+\eqn{\Gamma}-marginal is \eqn{\pi(\Gamma) \cdot Z(\Gamma)}, where
+\eqn{Z(\Gamma)} is the normalising constant of the precision-matrix
+prior conditional on the graph. Under \code{"hierarchical"} the
+chain compensates with an unbiased estimator of
+\eqn{Z(\Gamma_\text{curr}) / Z(\Gamma_\text{star})}, recovering
+\eqn{\pi(\Gamma)} as the \eqn{\Gamma}-marginal. Only supported when
+the interaction prior is \code{normal_prior(...)} and the precision-
+scale prior is \code{gamma_prior(...)} (the prior families for which
+the closed-form Laplace-NLO normaliser approximation is implemented).
+Default: \code{"joint"}.}
+
+\item{z_ratio_tuning}{Named list with components \code{M_inner}
+(positive integer, default 100), \code{kappa} (positive numeric,
+default 1.0), and \code{rho} (numeric in (0, 1), default 0.5).
+Tuning knobs for the V/Russian-Roulette estimator used when
+\code{graph_prior_spec = "hierarchical"}; ignored otherwise.
+\code{M_inner} is the number of inner Bartlett-Cholesky importance
+samples per Russian-Roulette pool, \code{kappa} sets the analytic
+centring \eqn{c = \kappa \exp(\log Z_\text{NLO}(\Gamma))}, and
+\code{rho} is the geometric-truncation continuation probability.}
+
 \item{edge_selection}{Logical. Whether to perform Bayesian edge selection.
 If \code{FALSE}, the model estimates all edges. Default: \code{TRUE}.}
 
diff --git a/man/bgms_posterior_mean.Rd b/man/bgms_posterior_mean.Rd
new file mode 100644
index 00000000..d3474db3
--- /dev/null
+++ b/man/bgms_posterior_mean.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bgms_posterior_mean.R
+\name{bgms_posterior_mean}
+\alias{bgms_posterior_mean}
+\title{Sign-corrected posterior means for a bgms fit}
+\usage{
+bgms_posterior_mean(fit)
+}
+\arguments{
+\item{fit}{A bgm() fit object.}
+}
+\value{
+A list with components:
+\describe{
+\item{\code{main}}{Sign-corrected posterior mean of main effects.
+\code{NULL} for GGM (which has no main effects).}
+\item{\code{pairwise}}{Sign-corrected posterior mean of pairwise
+associations, as a symmetric \eqn{p \times p} matrix. For GGM
+this is on the association scale \eqn{-K_{ij}/2}.}
+\item{\code{indicator}}{Sign-corrected posterior mean of edge
+indicators \eqn{\gamma_{ij}}, as a symmetric \eqn{p \times p}
+matrix. Present only when the fit used edge selection.}
+\item{\code{residual_variance}}{Sign-corrected posterior mean
+of the residual variance \eqn{1/K_{ii}}. Present only for GGM
+fits.}
+}
+}
+\description{
+Computes posterior means using Lyne (2015)'s sign-corrected ergodic
+estimator. For a functional \eqn{f(K)} of the precision matrix,
+}
+\details{
+\deqn{E[f(K) \mid Y] \;\approx\; \frac{\sum_i \mathrm{sign}_i \cdot f(K_i)}{\sum_i \mathrm{sign}_i}}
+
+where \eqn{\mathrm{sign}_i} is the sign of the Russian-Roulette
+estimator of \eqn{1/Z(\Gamma)} at iteration \eqn{i}. The correction
+is required for the ergodic averages to converge to the true
+posterior expectation when running the hierarchical-spec GGM
+(\code{graph_prior_spec = "hierarchical"}) under settings that
+permit signed V values.
+
+Under operational tunings (low \eqn{\delta}, reasonably large
+\eqn{\kappa}) sign is identically \eqn{+1} and the correction
+collapses to the plain posterior mean. Sign correction matters
+primarily at high \eqn{\delta} or aggressive \eqn{\kappa}. The
+diagnostic vector lives at \code{fit$v_ratio_diagnostics$sign}.
+
+For joint-spec fits, or for any fit that has no
+\code{v_ratio_diagnostics} field, this function returns the
+\code{posterior_mean_*} fields unchanged.
+}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 2ed5aa3e..eb191c38 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -149,6 +149,314 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// log_Z_NLO_gamma_cpp
+double log_Z_NLO_gamma_cpp(const arma::imat& G, double alpha, double beta, double sigma, bool include_F, double delta);
+RcppExport SEXP _bgms_log_Z_NLO_gamma_cpp(SEXP GSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP include_FSEXP, SEXP deltaSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::imat& >::type G(GSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< bool >::type include_F(include_FSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    rcpp_result_gen = Rcpp::wrap(log_Z_NLO_gamma_cpp(G, alpha, beta, sigma, include_F, delta));
+    return rcpp_result_gen;
+END_RCPP
+}
+// log_Z_manuscript_NLO_alpha1_cpp
+double log_Z_manuscript_NLO_alpha1_cpp(const arma::imat& G, double beta, double sigma, double delta);
+RcppExport SEXP _bgms_log_Z_manuscript_NLO_alpha1_cpp(SEXP GSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::imat& >::type G(GSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    rcpp_result_gen = Rcpp::wrap(log_Z_manuscript_NLO_alpha1_cpp(G, beta, sigma, delta));
+    return rcpp_result_gen;
+END_RCPP
+}
+// log_Z_manuscript_NLO_alpha1_degord_cpp
+double log_Z_manuscript_NLO_alpha1_degord_cpp(const arma::imat& G, int i, int j, double beta, double sigma, double delta);
+RcppExport SEXP _bgms_log_Z_manuscript_NLO_alpha1_degord_cpp(SEXP GSEXP, SEXP iSEXP, SEXP jSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::imat& >::type G(GSEXP);
+    Rcpp::traits::input_parameter< int >::type i(iSEXP);
+    Rcpp::traits::input_parameter< int >::type j(jSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    rcpp_result_gen = Rcpp::wrap(log_Z_manuscript_NLO_alpha1_degord_cpp(G, i, j, beta, sigma, delta));
+    return rcpp_result_gen;
+END_RCPP
+}
+// log_Z_NLO_gamma_degord_cpp
+double log_Z_NLO_gamma_degord_cpp(const arma::imat& G, int i, int j, double alpha, double beta, double sigma, bool include_F, double delta);
+RcppExport SEXP _bgms_log_Z_NLO_gamma_degord_cpp(SEXP GSEXP, SEXP iSEXP, SEXP jSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP include_FSEXP, SEXP deltaSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::imat& >::type G(GSEXP);
+    Rcpp::traits::input_parameter< int >::type i(iSEXP);
+    Rcpp::traits::input_parameter< int >::type j(jSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< bool >::type include_F(include_FSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    rcpp_result_gen = Rcpp::wrap(log_Z_NLO_gamma_degord_cpp(G, i, j, alpha, beta, sigma, include_F, delta));
+    return rcpp_result_gen;
+END_RCPP
+}
+// log_Z_NLO_gamma_delta_incr_alpha1_cpp
+double log_Z_NLO_gamma_delta_incr_alpha1_cpp(const arma::imat& G_before, int i, int j, double beta, double sigma, double delta, bool include_F);
+RcppExport SEXP _bgms_log_Z_NLO_gamma_delta_incr_alpha1_cpp(SEXP G_beforeSEXP, SEXP iSEXP, SEXP jSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP, SEXP include_FSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_before(G_beforeSEXP);
+    Rcpp::traits::input_parameter< int >::type i(iSEXP);
+    Rcpp::traits::input_parameter< int >::type j(jSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    Rcpp::traits::input_parameter< bool >::type include_F(include_FSEXP);
+    rcpp_result_gen = Rcpp::wrap(log_Z_NLO_gamma_delta_incr_alpha1_cpp(G_before, i, j, beta, sigma, delta, include_F));
+    return rcpp_result_gen;
+END_RCPP
+}
+// log_Z_NLO_gamma_delta_incr_alphaN_cpp
+double log_Z_NLO_gamma_delta_incr_alphaN_cpp(const arma::imat& G_before, int i, int j, double alpha, double beta, double sigma, double delta, bool include_F);
+RcppExport SEXP _bgms_log_Z_NLO_gamma_delta_incr_alphaN_cpp(SEXP G_beforeSEXP, SEXP iSEXP, SEXP jSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP, SEXP include_FSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_before(G_beforeSEXP);
+    Rcpp::traits::input_parameter< int >::type i(iSEXP);
+    Rcpp::traits::input_parameter< int >::type j(jSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    Rcpp::traits::input_parameter< bool >::type include_F(include_FSEXP);
+    rcpp_result_gen = Rcpp::wrap(log_Z_NLO_gamma_delta_incr_alphaN_cpp(G_before, i, j, alpha, beta, sigma, delta, include_F));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_chain_aux_cpp
+Rcpp::List degord_chain_aux_cpp(int q, double alpha, double beta, double sigma, double delta);
+RcppExport SEXP _bgms_degord_chain_aux_cpp(SEXP qSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< int >::type q(qSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_chain_aux_cpp(q, alpha, beta, sigma, delta));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_pi_aux_cpp
+Rcpp::List degord_pi_aux_cpp(const arma::imat& G_pi, double alpha, double beta, double sigma, double delta);
+RcppExport SEXP _bgms_degord_pi_aux_cpp(SEXP G_piSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_pi(G_piSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_pi_aux_cpp(G_pi, alpha, beta, sigma, delta));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_permute_graph_cpp
+arma::imat degord_permute_graph_cpp(const arma::imat& G, int i, int j);
+RcppExport SEXP _bgms_degord_permute_graph_cpp(SEXP GSEXP, SEXP iSEXP, SEXP jSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::imat& >::type G(GSEXP);
+    Rcpp::traits::input_parameter< int >::type i(iSEXP);
+    Rcpp::traits::input_parameter< int >::type j(jSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_permute_graph_cpp(G, i, j));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_log_Zhat_pi_from_pool_cpp
+double degord_log_Zhat_pi_from_pool_cpp(const arma::mat& noise_pool_t, const arma::imat& G_pi, double alpha, double beta, double sigma, double delta, int slab_tilt_mode);
+RcppExport SEXP _bgms_degord_log_Zhat_pi_from_pool_cpp(SEXP noise_pool_tSEXP, SEXP G_piSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP, SEXP slab_tilt_modeSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::mat& >::type noise_pool_t(noise_pool_tSEXP);
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_pi(G_piSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    Rcpp::traits::input_parameter< int >::type slab_tilt_mode(slab_tilt_modeSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_log_Zhat_pi_from_pool_cpp(noise_pool_t, G_pi, alpha, beta, sigma, delta, slab_tilt_mode));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_delta_log_Zhat_pi_toggle_cpp
+double degord_delta_log_Zhat_pi_toggle_cpp(const arma::mat& noise_pool, const arma::mat& noise_pool_t, const arma::imat& G_curr, int i, int j, double alpha, double beta, double sigma, double delta, int slab_tilt_mode);
+RcppExport SEXP _bgms_degord_delta_log_Zhat_pi_toggle_cpp(SEXP noise_poolSEXP, SEXP noise_pool_tSEXP, SEXP G_currSEXP, SEXP iSEXP, SEXP jSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP, SEXP slab_tilt_modeSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::mat& >::type noise_pool(noise_poolSEXP);
+    Rcpp::traits::input_parameter< const arma::mat& >::type noise_pool_t(noise_pool_tSEXP);
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_curr(G_currSEXP);
+    Rcpp::traits::input_parameter< int >::type i(iSEXP);
+    Rcpp::traits::input_parameter< int >::type j(jSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    Rcpp::traits::input_parameter< int >::type slab_tilt_mode(slab_tilt_modeSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_delta_log_Zhat_pi_toggle_cpp(noise_pool, noise_pool_t, G_curr, i, j, alpha, beta, sigma, delta, slab_tilt_mode));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_draw_bartlett_pool_cpp
+arma::mat degord_draw_bartlett_pool_cpp(int q, int M_inner, int seed);
+RcppExport SEXP _bgms_degord_draw_bartlett_pool_cpp(SEXP qSEXP, SEXP M_innerSEXP, SEXP seedSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< int >::type q(qSEXP);
+    Rcpp::traits::input_parameter< int >::type M_inner(M_innerSEXP);
+    Rcpp::traits::input_parameter< int >::type seed(seedSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_draw_bartlett_pool_cpp(q, M_inner, seed));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_V_at_Gamma_pi_cpp
+double degord_V_at_Gamma_pi_cpp(int K_depth, const Rcpp::List& pools_t, const arma::imat& G_pi, double alpha, double beta, double sigma, double delta, double c_val, double rho, int slab_tilt_mode);
+RcppExport SEXP _bgms_degord_V_at_Gamma_pi_cpp(SEXP K_depthSEXP, SEXP pools_tSEXP, SEXP G_piSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP, SEXP c_valSEXP, SEXP rhoSEXP, SEXP slab_tilt_modeSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< int >::type K_depth(K_depthSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::List& >::type pools_t(pools_tSEXP);
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_pi(G_piSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    Rcpp::traits::input_parameter< double >::type c_val(c_valSEXP);
+    Rcpp::traits::input_parameter< double >::type rho(rhoSEXP);
+    Rcpp::traits::input_parameter< int >::type slab_tilt_mode(slab_tilt_modeSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_V_at_Gamma_pi_cpp(K_depth, pools_t, G_pi, alpha, beta, sigma, delta, c_val, rho, slab_tilt_mode));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_V_log_at_Gamma_pi_cpp
+Rcpp::List degord_V_log_at_Gamma_pi_cpp(int K_depth, const Rcpp::List& pools_t, const arma::imat& G_pi, double alpha, double beta, double sigma, double delta, double log_c, double rho, int slab_tilt_mode);
+RcppExport SEXP _bgms_degord_V_log_at_Gamma_pi_cpp(SEXP K_depthSEXP, SEXP pools_tSEXP, SEXP G_piSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP, SEXP log_cSEXP, SEXP rhoSEXP, SEXP slab_tilt_modeSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< int >::type K_depth(K_depthSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::List& >::type pools_t(pools_tSEXP);
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_pi(G_piSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    Rcpp::traits::input_parameter< double >::type log_c(log_cSEXP);
+    Rcpp::traits::input_parameter< double >::type rho(rhoSEXP);
+    Rcpp::traits::input_parameter< int >::type slab_tilt_mode(slab_tilt_modeSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_V_log_at_Gamma_pi_cpp(K_depth, pools_t, G_pi, alpha, beta, sigma, delta, log_c, rho, slab_tilt_mode));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_V_log_pair_at_Gamma_curr_star_cpp
+Rcpp::List degord_V_log_pair_at_Gamma_curr_star_cpp(int K_depth, const Rcpp::List& pools_t, const arma::imat& G_pi_curr, const arma::imat& G_pi_star, double alpha, double beta, double sigma, double delta, double log_c_curr, double log_c_star, double rho, int slab_tilt_mode);
+RcppExport SEXP _bgms_degord_V_log_pair_at_Gamma_curr_star_cpp(SEXP K_depthSEXP, SEXP pools_tSEXP, SEXP G_pi_currSEXP, SEXP G_pi_starSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP, SEXP log_c_currSEXP, SEXP log_c_starSEXP, SEXP rhoSEXP, SEXP slab_tilt_modeSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< int >::type K_depth(K_depthSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::List& >::type pools_t(pools_tSEXP);
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_pi_curr(G_pi_currSEXP);
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_pi_star(G_pi_starSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    Rcpp::traits::input_parameter< double >::type log_c_curr(log_c_currSEXP);
+    Rcpp::traits::input_parameter< double >::type log_c_star(log_c_starSEXP);
+    Rcpp::traits::input_parameter< double >::type rho(rhoSEXP);
+    Rcpp::traits::input_parameter< int >::type slab_tilt_mode(slab_tilt_modeSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_V_log_pair_at_Gamma_curr_star_cpp(K_depth, pools_t, G_pi_curr, G_pi_star, alpha, beta, sigma, delta, log_c_curr, log_c_star, rho, slab_tilt_mode));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_log_Zhat_star_from_cache_cpp
+double degord_log_Zhat_star_from_cache_cpp(const arma::mat& noise_pool_t, const arma::imat& G_pi_curr, const arma::imat& G_pi_star, double alpha, double beta, double sigma, double delta, int slab_tilt_mode);
+RcppExport SEXP _bgms_degord_log_Zhat_star_from_cache_cpp(SEXP noise_pool_tSEXP, SEXP G_pi_currSEXP, SEXP G_pi_starSEXP, SEXP alphaSEXP, SEXP betaSEXP, SEXP sigmaSEXP, SEXP deltaSEXP, SEXP slab_tilt_modeSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::mat& >::type noise_pool_t(noise_pool_tSEXP);
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_pi_curr(G_pi_currSEXP);
+    Rcpp::traits::input_parameter< const arma::imat& >::type G_pi_star(G_pi_starSEXP);
+    Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP);
+    Rcpp::traits::input_parameter< double >::type beta(betaSEXP);
+    Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    Rcpp::traits::input_parameter< int >::type slab_tilt_mode(slab_tilt_modeSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_log_Zhat_star_from_cache_cpp(noise_pool_t, G_pi_curr, G_pi_star, alpha, beta, sigma, delta, slab_tilt_mode));
+    return rcpp_result_gen;
+END_RCPP
+}
+// degord_draw_U_rr_cpp
+Rcpp::List degord_draw_U_rr_cpp(int M_inner, int q, double rho, int seed);
+RcppExport SEXP _bgms_degord_draw_U_rr_cpp(SEXP M_innerSEXP, SEXP qSEXP, SEXP rhoSEXP, SEXP seedSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< int >::type M_inner(M_innerSEXP);
+    Rcpp::traits::input_parameter< int >::type q(qSEXP);
+    Rcpp::traits::input_parameter< double >::type rho(rhoSEXP);
+    Rcpp::traits::input_parameter< int >::type seed(seedSEXP);
+    rcpp_result_gen = Rcpp::wrap(degord_draw_U_rr_cpp(M_inner, q, rho, seed));
+    return rcpp_result_gen;
+END_RCPP
+}
+// ggm_hierarchical_smoke_cpp
+Rcpp::List ggm_hierarchical_smoke_cpp(const arma::mat& observations, double inclusion_prob, double interaction_scale, double diagonal_shape, double diagonal_rate, double delta, int M_inner, double kappa, double rho, int n_sweeps, int seed, bool use_manuscript_nlo);
+RcppExport SEXP _bgms_ggm_hierarchical_smoke_cpp(SEXP observationsSEXP, SEXP inclusion_probSEXP, SEXP interaction_scaleSEXP, SEXP diagonal_shapeSEXP, SEXP diagonal_rateSEXP, SEXP deltaSEXP, SEXP M_innerSEXP, SEXP kappaSEXP, SEXP rhoSEXP, SEXP n_sweepsSEXP, SEXP seedSEXP, SEXP use_manuscript_nloSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::mat& >::type observations(observationsSEXP);
+    Rcpp::traits::input_parameter< double >::type inclusion_prob(inclusion_probSEXP);
+    Rcpp::traits::input_parameter< double >::type interaction_scale(interaction_scaleSEXP);
+    Rcpp::traits::input_parameter< double >::type diagonal_shape(diagonal_shapeSEXP);
+    Rcpp::traits::input_parameter< double >::type diagonal_rate(diagonal_rateSEXP);
+    Rcpp::traits::input_parameter< double >::type delta(deltaSEXP);
+    Rcpp::traits::input_parameter< int >::type M_inner(M_innerSEXP);
+    Rcpp::traits::input_parameter< double >::type kappa(kappaSEXP);
+    Rcpp::traits::input_parameter< double >::type rho(rhoSEXP);
+    Rcpp::traits::input_parameter< int >::type n_sweeps(n_sweepsSEXP);
+    Rcpp::traits::input_parameter< int >::type seed(seedSEXP);
+    Rcpp::traits::input_parameter< bool >::type use_manuscript_nlo(use_manuscript_nloSEXP);
+    rcpp_result_gen = Rcpp::wrap(ggm_hierarchical_smoke_cpp(observations, inclusion_prob, interaction_scale, diagonal_shape, diagonal_rate, delta, M_inner, kappa, rho, n_sweeps, seed, use_manuscript_nlo));
+    return rcpp_result_gen;
+END_RCPP
+}
 // compute_ess_cpp
 Rcpp::NumericVector compute_ess_cpp(Rcpp::NumericVector array3d);
 RcppExport SEXP _bgms_compute_ess_cpp(SEXP array3dSEXP) {
@@ -557,8 +865,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // sample_ggm
-Rcpp::List sample_ggm(const Rcpp::List& inputFromR, const arma::mat& prior_inclusion_prob, const arma::imat& initial_edge_indicators, const int no_iter, const int no_warmup, const int no_chains, const bool edge_selection, const std::string& sampler_type, const int seed, const int no_threads, const int progress_type, SEXP progress_callback, const std::string& edge_prior, const double beta_bernoulli_alpha, const double beta_bernoulli_beta, const double beta_bernoulli_alpha_between, const double beta_bernoulli_beta_between, const double dirichlet_alpha, const double lambda, const double target_acceptance, const int max_tree_depth, const bool na_impute, const Rcpp::Nullable<Rcpp::IntegerMatrix> missing_index_nullable, const double delta);
-RcppExport SEXP _bgms_sample_ggm(SEXP inputFromRSEXP, SEXP prior_inclusion_probSEXP, SEXP initial_edge_indicatorsSEXP, SEXP no_iterSEXP, SEXP no_warmupSEXP, SEXP no_chainsSEXP, SEXP edge_selectionSEXP, SEXP sampler_typeSEXP, SEXP seedSEXP, SEXP no_threadsSEXP, SEXP progress_typeSEXP, SEXP progress_callbackSEXP, SEXP edge_priorSEXP, SEXP beta_bernoulli_alphaSEXP, SEXP beta_bernoulli_betaSEXP, SEXP beta_bernoulli_alpha_betweenSEXP, SEXP beta_bernoulli_beta_betweenSEXP, SEXP dirichlet_alphaSEXP, SEXP lambdaSEXP, SEXP target_acceptanceSEXP, SEXP max_tree_depthSEXP, SEXP na_imputeSEXP, SEXP missing_index_nullableSEXP, SEXP deltaSEXP) {
+Rcpp::List sample_ggm(const Rcpp::List& inputFromR, const arma::mat& prior_inclusion_prob, const arma::imat& initial_edge_indicators, const int no_iter, const int no_warmup, const int no_chains, const bool edge_selection, const std::string& sampler_type, const int seed, const int no_threads, const int progress_type, SEXP progress_callback, const std::string& edge_prior, const double beta_bernoulli_alpha, const double beta_bernoulli_beta, const double beta_bernoulli_alpha_between, const double beta_bernoulli_beta_between, const double dirichlet_alpha, const double lambda, const double target_acceptance, const int max_tree_depth, const bool na_impute, const Rcpp::Nullable<Rcpp::IntegerMatrix> missing_index_nullable, const double delta, const std::string& graph_prior_spec, const int z_ratio_M_inner, const double z_ratio_kappa, const double z_ratio_rho, const bool use_manuscript_nlo, const bool mh_U);
+RcppExport SEXP _bgms_sample_ggm(SEXP inputFromRSEXP, SEXP prior_inclusion_probSEXP, SEXP initial_edge_indicatorsSEXP, SEXP no_iterSEXP, SEXP no_warmupSEXP, SEXP no_chainsSEXP, SEXP edge_selectionSEXP, SEXP sampler_typeSEXP, SEXP seedSEXP, SEXP no_threadsSEXP, SEXP progress_typeSEXP, SEXP progress_callbackSEXP, SEXP edge_priorSEXP, SEXP beta_bernoulli_alphaSEXP, SEXP beta_bernoulli_betaSEXP, SEXP beta_bernoulli_alpha_betweenSEXP, SEXP beta_bernoulli_beta_betweenSEXP, SEXP dirichlet_alphaSEXP, SEXP lambdaSEXP, SEXP target_acceptanceSEXP, SEXP max_tree_depthSEXP, SEXP na_imputeSEXP, SEXP missing_index_nullableSEXP, SEXP deltaSEXP, SEXP graph_prior_specSEXP, SEXP z_ratio_M_innerSEXP, SEXP z_ratio_kappaSEXP, SEXP z_ratio_rhoSEXP, SEXP use_manuscript_nloSEXP, SEXP mh_USEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -586,7 +894,13 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const bool >::type na_impute(na_imputeSEXP);
     Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::IntegerMatrix> >::type missing_index_nullable(missing_index_nullableSEXP);
     Rcpp::traits::input_parameter< const double >::type delta(deltaSEXP);
-    rcpp_result_gen = Rcpp::wrap(sample_ggm(inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, sampler_type, seed, no_threads, progress_type, progress_callback, edge_prior, beta_bernoulli_alpha, beta_bernoulli_beta, beta_bernoulli_alpha_between, beta_bernoulli_beta_between, dirichlet_alpha, lambda, target_acceptance, max_tree_depth, na_impute, missing_index_nullable, delta));
+    Rcpp::traits::input_parameter< const std::string& >::type graph_prior_spec(graph_prior_specSEXP);
+    Rcpp::traits::input_parameter< const int >::type z_ratio_M_inner(z_ratio_M_innerSEXP);
+    Rcpp::traits::input_parameter< const double >::type z_ratio_kappa(z_ratio_kappaSEXP);
+    Rcpp::traits::input_parameter< const double >::type z_ratio_rho(z_ratio_rhoSEXP);
+    Rcpp::traits::input_parameter< const bool >::type use_manuscript_nlo(use_manuscript_nloSEXP);
+    Rcpp::traits::input_parameter< const bool >::type mh_U(mh_USEXP);
+    rcpp_result_gen = Rcpp::wrap(sample_ggm(inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, sampler_type, seed, no_threads, progress_type, progress_callback, edge_prior, beta_bernoulli_alpha, beta_bernoulli_beta, beta_bernoulli_alpha_between, beta_bernoulli_beta_between, dirichlet_alpha, lambda, target_acceptance, max_tree_depth, na_impute, missing_index_nullable, delta, graph_prior_spec, z_ratio_M_inner, z_ratio_kappa, z_ratio_rho, use_manuscript_nlo, mh_U));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -682,6 +996,24 @@ static const R_CallMethodDef CallEntries[] = {
     {"_bgms_ggm_test_logp_and_gradient", (DL_FUNC) &_bgms_ggm_test_logp_and_gradient, 5},
     {"_bgms_ggm_test_forward_map", (DL_FUNC) &_bgms_ggm_test_forward_map, 2},
     {"_bgms_sample_ggm_prior", (DL_FUNC) &_bgms_sample_ggm_prior, 14},
+    {"_bgms_log_Z_NLO_gamma_cpp", (DL_FUNC) &_bgms_log_Z_NLO_gamma_cpp, 6},
+    {"_bgms_log_Z_manuscript_NLO_alpha1_cpp", (DL_FUNC) &_bgms_log_Z_manuscript_NLO_alpha1_cpp, 4},
+    {"_bgms_log_Z_manuscript_NLO_alpha1_degord_cpp", (DL_FUNC) &_bgms_log_Z_manuscript_NLO_alpha1_degord_cpp, 6},
+    {"_bgms_log_Z_NLO_gamma_degord_cpp", (DL_FUNC) &_bgms_log_Z_NLO_gamma_degord_cpp, 8},
+    {"_bgms_log_Z_NLO_gamma_delta_incr_alpha1_cpp", (DL_FUNC) &_bgms_log_Z_NLO_gamma_delta_incr_alpha1_cpp, 7},
+    {"_bgms_log_Z_NLO_gamma_delta_incr_alphaN_cpp", (DL_FUNC) &_bgms_log_Z_NLO_gamma_delta_incr_alphaN_cpp, 8},
+    {"_bgms_degord_chain_aux_cpp", (DL_FUNC) &_bgms_degord_chain_aux_cpp, 5},
+    {"_bgms_degord_pi_aux_cpp", (DL_FUNC) &_bgms_degord_pi_aux_cpp, 5},
+    {"_bgms_degord_permute_graph_cpp", (DL_FUNC) &_bgms_degord_permute_graph_cpp, 3},
+    {"_bgms_degord_log_Zhat_pi_from_pool_cpp", (DL_FUNC) &_bgms_degord_log_Zhat_pi_from_pool_cpp, 7},
+    {"_bgms_degord_delta_log_Zhat_pi_toggle_cpp", (DL_FUNC) &_bgms_degord_delta_log_Zhat_pi_toggle_cpp, 10},
+    {"_bgms_degord_draw_bartlett_pool_cpp", (DL_FUNC) &_bgms_degord_draw_bartlett_pool_cpp, 3},
+    {"_bgms_degord_V_at_Gamma_pi_cpp", (DL_FUNC) &_bgms_degord_V_at_Gamma_pi_cpp, 10},
+    {"_bgms_degord_V_log_at_Gamma_pi_cpp", (DL_FUNC) &_bgms_degord_V_log_at_Gamma_pi_cpp, 10},
+    {"_bgms_degord_V_log_pair_at_Gamma_curr_star_cpp", (DL_FUNC) &_bgms_degord_V_log_pair_at_Gamma_curr_star_cpp, 12},
+    {"_bgms_degord_log_Zhat_star_from_cache_cpp", (DL_FUNC) &_bgms_degord_log_Zhat_star_from_cache_cpp, 8},
+    {"_bgms_degord_draw_U_rr_cpp", (DL_FUNC) &_bgms_degord_draw_U_rr_cpp, 4},
+    {"_bgms_ggm_hierarchical_smoke_cpp", (DL_FUNC) &_bgms_ggm_hierarchical_smoke_cpp, 12},
     {"_bgms_compute_ess_cpp", (DL_FUNC) &_bgms_compute_ess_cpp, 1},
     {"_bgms_compute_rhat_cpp", (DL_FUNC) &_bgms_compute_rhat_cpp, 1},
     {"_bgms_compute_indicator_ess_cpp", (DL_FUNC) &_bgms_compute_indicator_ess_cpp, 1},
@@ -703,7 +1035,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_bgms_test_parameter_prior", (DL_FUNC) &_bgms_test_parameter_prior, 6},
     {"_bgms_test_scale_prior", (DL_FUNC) &_bgms_test_scale_prior, 4},
     {"_bgms_ggm_test_logp_and_gradient_prior", (DL_FUNC) &_bgms_ggm_test_logp_and_gradient_prior, 11},
-    {"_bgms_sample_ggm", (DL_FUNC) &_bgms_sample_ggm, 24},
+    {"_bgms_sample_ggm", (DL_FUNC) &_bgms_sample_ggm, 30},
     {"_bgms_sample_mixed_mrf", (DL_FUNC) &_bgms_sample_mixed_mrf, 25},
     {"_bgms_sample_omrf", (DL_FUNC) &_bgms_sample_omrf, 24},
     {"_bgms_compute_Vn_mfm_sbm", (DL_FUNC) &_bgms_compute_Vn_mfm_sbm, 4},
diff --git a/src/log_z_test_interface.cpp b/src/log_z_test_interface.cpp
new file mode 100644
index 00000000..2207d679
--- /dev/null
+++ b/src/log_z_test_interface.cpp
@@ -0,0 +1,357 @@
+// Test interface for the closed-form log Z(G) approximations used by the
+// hierarchical-spec MH (Stage 3 of dev/plans/backlog/hierarchical-ggm-degord-rr.md).
+// Exposes the C++ entry points to R for parity and incremental checks.
+
+#include <RcppArmadillo.h>
+#include "models/ggm/log_z_nlo.h"
+#include "models/ggm/manuscript_nlo.h"
+#include "models/ggm/degord_sampler.h"
+#include "models/ggm/z_ratio_estimator.h"
+#include "models/ggm/ggm_model.h"
+#include "rng/rng_utils.h"
+
+
+// [[Rcpp::export]]
+double log_Z_NLO_gamma_cpp(
+    const arma::imat& G,
+    double alpha, double beta, double sigma,
+    bool include_F = false,
+    double delta = 0.0
+) {
+    return log_Z_NLO_gamma(G, alpha, beta, sigma, include_F, delta);
+}
+
+
+// Manuscript App C NLO at alpha = 1 (companion-AI delivery 2026-05-21).
+// Tracks ~/SV/Z/R/src/manuscript_NLO.h::log_Z_manuscript_NLO_alpha1.
+//
+// [[Rcpp::export]]
+double log_Z_manuscript_NLO_alpha1_cpp(
+    const arma::imat& G,
+    double beta, double sigma, double delta
+) {
+    return ggm_nlo::log_Z_manuscript_NLO_alpha1(G, beta, sigma, delta);
+}
+
+
+// Manuscript NLO under DEGORD reordering (relabel toggle (i, j) -> (0, 1)).
+//
+// [[Rcpp::export]]
+double log_Z_manuscript_NLO_alpha1_degord_cpp(
+    const arma::imat& G, int i, int j,
+    double beta, double sigma, double delta
+) {
+    return ggm_nlo::log_Z_manuscript_NLO_alpha1_degord(
+        G, i, j, beta, sigma, delta);
+}
+
+
+// [[Rcpp::export]]
+double log_Z_NLO_gamma_degord_cpp(
+    const arma::imat& G, int i, int j,
+    double alpha, double beta, double sigma,
+    bool include_F = false,
+    double delta = 0.0
+) {
+    return log_Z_NLO_gamma_degord(G, i, j, alpha, beta, sigma, include_F, delta);
+}
+
+
+// [[Rcpp::export]]
+double log_Z_NLO_gamma_delta_incr_alpha1_cpp(
+    const arma::imat& G_before, int i, int j,
+    double beta, double sigma, double delta,
+    bool include_F = false
+) {
+    return log_Z_NLO_gamma_delta_incr_alpha1(
+        G_before, i, j, beta, sigma, delta, include_F);
+}
+
+
+// [[Rcpp::export]]
+double log_Z_NLO_gamma_delta_incr_alphaN_cpp(
+    const arma::imat& G_before, int i, int j,
+    double alpha, double beta, double sigma, double delta,
+    bool include_F = false
+) {
+    return log_Z_NLO_gamma_delta_incr_alphaN(
+        G_before, i, j, alpha, beta, sigma, delta, include_F);
+}
+
+
+// ---- DEGORD sampler test interface ------------------------------------
+
+// [[Rcpp::export]]
+Rcpp::List degord_chain_aux_cpp(
+    int q, double alpha, double beta, double sigma, double delta
+) {
+    auto c = degord::make_chain_aux(q, alpha, beta, sigma, delta);
+    return Rcpp::List::create(
+        Rcpp::Named("q")                        = c.q,
+        Rcpp::Named("alpha")                    = c.alpha,
+        Rcpp::Named("beta")                     = c.beta,
+        Rcpp::Named("sigma")                    = c.sigma,
+        Rcpp::Named("delta")                    = c.delta,
+        Rcpp::Named("sigma_diag")               = c.sigma_diag,
+        Rcpp::Named("nu_chi_df")                = Rcpp::wrap(c.nu_chi_df),
+        Rcpp::Named("nu_mu_l")                  = Rcpp::wrap(c.nu_mu_l),
+        Rcpp::Named("nu_H_e_saddle")            = Rcpp::wrap(c.nu_H_e_saddle),
+        Rcpp::Named("nu_lgamma_half_k")         = Rcpp::wrap(c.nu_lgamma_half_k),
+        Rcpp::Named("nu_diag_const")            = Rcpp::wrap(c.nu_diag_const),
+        Rcpp::Named("nu_slab_const_saddle")     = Rcpp::wrap(c.nu_slab_const_saddle),
+        Rcpp::Named("nu_per_vertex")            = Rcpp::wrap(c.nu_per_vertex)
+    );
+}
+
+
+// [[Rcpp::export]]
+Rcpp::List degord_pi_aux_cpp(
+    const arma::imat& G_pi,
+    double alpha, double beta, double sigma, double delta
+) {
+    int q = G_pi.n_rows;
+    auto c = degord::make_chain_aux(q, alpha, beta, sigma, delta);
+    auto a = degord::make_pi_aux(G_pi, c);
+    return Rcpp::List::create(
+        Rcpp::Named("q")       = a.q,
+        Rcpp::Named("nu_pi")   = Rcpp::wrap(a.nu_pi),
+        Rcpp::Named("E_count") = a.E_count,
+        Rcpp::Named("log_C0")  = a.log_C0
+    );
+}
+
+
+// [[Rcpp::export]]
+arma::imat degord_permute_graph_cpp(const arma::imat& G, int i, int j) {
+    int q = G.n_rows;
+    auto pi = degord::degord_permutation(q, i, j);
+    return degord::permute_graph(G, pi);
+}
+
+
+// [[Rcpp::export]]
+double degord_log_Zhat_pi_from_pool_cpp(
+    const arma::mat& noise_pool_t,
+    const arma::imat& G_pi,
+    double alpha, double beta, double sigma, double delta,
+    int slab_tilt_mode = 0
+) {
+    int q = G_pi.n_rows;
+    auto c = degord::make_chain_aux(q, alpha, beta, sigma, delta);
+    c.slab_tilt_mode = slab_tilt_mode;
+    auto a = degord::make_pi_aux(G_pi, c);
+    return degord::log_Zhat_pi_from_pool(noise_pool_t, a, c);
+}
+
+
+// [[Rcpp::export]]
+double degord_delta_log_Zhat_pi_toggle_cpp(
+    const arma::mat& noise_pool,
+    const arma::mat& noise_pool_t,
+    const arma::imat& G_curr,
+    int i, int j,
+    double alpha, double beta, double sigma, double delta,
+    int slab_tilt_mode = 0
+) {
+    int q = G_curr.n_rows;
+    auto c = degord::make_chain_aux(q, alpha, beta, sigma, delta);
+    c.slab_tilt_mode = slab_tilt_mode;
+    return degord::delta_log_Zhat_pi_toggle(
+        noise_pool, noise_pool_t, G_curr, i, j, c);
+}
+
+
+// [[Rcpp::export]]
+arma::mat degord_draw_bartlett_pool_cpp(int q, int M_inner, int seed) {
+    SafeRNG rng(seed);
+    return degord::draw_bartlett_pool(rng, q, M_inner);
+}
+
+
+// ---- Phase 3: V(Γ, U) Russian-Roulette estimator test interface --------
+
+// [[Rcpp::export]]
+double degord_V_at_Gamma_pi_cpp(
+    int K_depth,
+    const Rcpp::List& pools_t,
+    const arma::imat& G_pi,
+    double alpha, double beta, double sigma, double delta,
+    double c_val, double rho,
+    int slab_tilt_mode = 0
+) {
+    int q = G_pi.n_rows;
+    auto chain_aux = degord::make_chain_aux(q, alpha, beta, sigma, delta);
+    chain_aux.slab_tilt_mode = slab_tilt_mode;
+    std::vector<arma::mat> pools_t_cpp;
+    pools_t_cpp.reserve(static_cast<size_t>(K_depth));
+    for (int n = 0; n < K_depth; ++n)
+        pools_t_cpp.push_back(Rcpp::as<arma::mat>(pools_t[n]));
+    return degord::V_at_Gamma_pi_degord(
+        K_depth, pools_t_cpp, G_pi, chain_aux, c_val, rho);
+}
+
+
+// Log-space V test interface. Returns (log_abs, sign) for log|V| and sign(V).
+// sign = 0 with log_abs = NaN signals an auto-reject sentinel (S = 0 or any
+// non-finite intermediate).
+//
+// [[Rcpp::export]]
+Rcpp::List degord_V_log_at_Gamma_pi_cpp(
+    int K_depth,
+    const Rcpp::List& pools_t,
+    const arma::imat& G_pi,
+    double alpha, double beta, double sigma, double delta,
+    double log_c, double rho,
+    int slab_tilt_mode = 0
+) {
+    int q = G_pi.n_rows;
+    auto chain_aux = degord::make_chain_aux(q, alpha, beta, sigma, delta);
+    chain_aux.slab_tilt_mode = slab_tilt_mode;
+    std::vector<arma::mat> pools_t_cpp;
+    pools_t_cpp.reserve(static_cast<size_t>(K_depth));
+    for (int n = 0; n < K_depth; ++n)
+        pools_t_cpp.push_back(Rcpp::as<arma::mat>(pools_t[n]));
+    auto res = degord::V_log_at_Gamma_pi_degord(
+        K_depth, pools_t_cpp, G_pi, chain_aux, log_c, rho);
+    return Rcpp::List::create(
+        Rcpp::Named("log_abs") = res.first,
+        Rcpp::Named("sign")    = res.second
+    );
+}
+
+
+// Paired log-space V at (Γ_curr, Γ_star) with within-pool cache reuse.
+// Returns a list with `curr` and `star` sub-lists, each carrying
+// (log_abs, sign).
+//
+// [[Rcpp::export]]
+Rcpp::List degord_V_log_pair_at_Gamma_curr_star_cpp(
+    int K_depth,
+    const Rcpp::List& pools_t,
+    const arma::imat& G_pi_curr,
+    const arma::imat& G_pi_star,
+    double alpha, double beta, double sigma, double delta,
+    double log_c_curr, double log_c_star, double rho,
+    int slab_tilt_mode = 0
+) {
+    int q = G_pi_curr.n_rows;
+    auto chain_aux = degord::make_chain_aux(q, alpha, beta, sigma, delta);
+    chain_aux.slab_tilt_mode = slab_tilt_mode;
+    std::vector<arma::mat> pools_t_cpp;
+    pools_t_cpp.reserve(static_cast<size_t>(K_depth));
+    for (int n = 0; n < K_depth; ++n)
+        pools_t_cpp.push_back(Rcpp::as<arma::mat>(pools_t[n]));
+    auto res = degord::V_log_pair_at_Gamma_curr_star_degord(
+        K_depth, pools_t_cpp, G_pi_curr, G_pi_star, chain_aux,
+        log_c_curr, log_c_star, rho);
+    return Rcpp::List::create(
+        Rcpp::Named("curr") = Rcpp::List::create(
+            Rcpp::Named("log_abs") = res.curr.first,
+            Rcpp::Named("sign")    = res.curr.second),
+        Rcpp::Named("star") = Rcpp::List::create(
+            Rcpp::Named("log_abs") = res.star.first,
+            Rcpp::Named("sign")    = res.star.second)
+    );
+}
+
+
+// log_Zhat under G_pi_star using cached state built under G_pi_curr.
+// Used by tests to validate that the cache adapter matches a fresh
+// log_Zhat_pi_from_pool at G_pi_star to FP-reordering tolerance.
+//
+// [[Rcpp::export]]
+double degord_log_Zhat_star_from_cache_cpp(
+    const arma::mat& noise_pool_t,
+    const arma::imat& G_pi_curr,
+    const arma::imat& G_pi_star,
+    double alpha, double beta, double sigma, double delta,
+    int slab_tilt_mode = 0
+) {
+    int q = G_pi_curr.n_rows;
+    auto chain_aux = degord::make_chain_aux(q, alpha, beta, sigma, delta);
+    chain_aux.slab_tilt_mode = slab_tilt_mode;
+    auto a_curr = degord::make_pi_aux(G_pi_curr, chain_aux);
+    auto a_star = degord::make_pi_aux(G_pi_star, chain_aux);
+    degord::PoolCache cache_curr;
+    (void) degord::log_Zhat_pi_from_pool_cache(
+        noise_pool_t, a_curr, chain_aux, cache_curr);
+    return degord::log_Zhat_star_from_cache(
+        noise_pool_t, a_star, chain_aux, cache_curr);
+}
+
+
+// [[Rcpp::export]]
+Rcpp::List degord_draw_U_rr_cpp(int M_inner, int q, double rho, int seed) {
+    SafeRNG rng(seed);
+    int K_depth = 0;
+    std::vector<arma::mat> pools_t;
+    degord::draw_U_degord_rr(rng, K_depth, pools_t, M_inner, q, rho);
+    Rcpp::List pools_R(K_depth);
+    for (int n = 0; n < K_depth; ++n) pools_R[n] = pools_t[n];
+    return Rcpp::List::create(
+        Rcpp::Named("K_depth") = K_depth,
+        Rcpp::Named("pools_t") = pools_R
+    );
+}
+
+
+// ---- Phase 4a: hierarchical-spec smoke test ----------------------------
+
+// Constructs a small GGMModel with Normal slab + Gamma diagonal, switches to
+// hierarchical-spec, runs n_sweeps of MH, and returns the final edge
+// indicators and a few summary statistics. Crashes here are a regression
+// in the Phase 4a wiring.
+
+// [[Rcpp::export]]
+Rcpp::List ggm_hierarchical_smoke_cpp(
+    const arma::mat& observations,
+    double inclusion_prob,
+    double interaction_scale,    // sigma for Normal slab
+    double diagonal_shape,       // alpha for Gamma diag
+    double diagonal_rate,        // beta for Gamma diag
+    double delta,                // determinant tilt
+    int    M_inner,
+    double kappa,
+    double rho,
+    int    n_sweeps,
+    int    seed,
+    bool   use_manuscript_nlo = false
+) {
+    int p = observations.n_cols;
+    arma::mat inclusion_probability(p, p, arma::fill::value(inclusion_prob));
+    arma::imat initial_edges(p, p, arma::fill::zeros);
+    // Start at empty graph: edge_indicators are 0 off-diagonal, 1 on the
+    // diagonal (by GGMModel convention).
+    for (int i = 0; i < p; ++i) initial_edges(i, i) = 1;
+
+    auto slab = std::make_unique<NormalPrior>(interaction_scale);
+    auto diag = std::make_unique<GammaScalePrior>(diagonal_shape, diagonal_rate);
+
+    GGMModel model(observations,
+                   inclusion_probability,
+                   initial_edges,
+                   /*edge_selection=*/true,
+                   std::move(slab),
+                   std::move(diag),
+                   /*na_impute=*/false);
+    model.set_seed(seed);
+    model.set_determinant_tilt(delta);
+    model.set_z_ratio_tuning(M_inner, kappa, rho);
+    model.set_use_manuscript_nlo(use_manuscript_nlo);
+    model.set_graph_prior_spec(GraphPriorSpec::Hierarchical);
+
+    arma::ivec n_edges(n_sweeps, arma::fill::zeros);
+    for (int s = 0; s < n_sweeps; ++s) {
+        model.prepare_iteration();
+        model.update_edge_indicators();
+        const arma::imat& E = model.get_edge_indicators();
+        n_edges[s] = arma::accu(E) / 2;  // off-diagonal edges (E is symmetric)
+        // Discount the diagonal-1 convention if it applies. Standard ggm
+        // counts edges as accu(upper-tri) which is accu(E)/2 here.
+    }
+
+    return Rcpp::List::create(
+        Rcpp::Named("final_edges")  = model.get_edge_indicators(),
+        Rcpp::Named("n_edges_path") = n_edges
+    );
+}
diff --git a/src/mcmc/execution/chain_result.h b/src/mcmc/execution/chain_result.h
index e14696f6..37524e68 100644
--- a/src/mcmc/execution/chain_result.h
+++ b/src/mcmc/execution/chain_result.h
@@ -56,6 +56,25 @@ class ChainResult {
     /// Whether AM diagnostics are stored.
     bool        has_am_diagnostics = false;
 
+    /// Hierarchical-spec V-ratio diagnostics. sign(V_curr) ∈ {-1, +1} and
+    /// log|V_curr| recorded per iteration, snapshotted at end-of-iteration
+    /// from the model state. Used by Lyne (2015) sign-corrected ergodic
+    /// averaging (bgms_posterior_mean() helper, F3). In operational cells
+    /// (low δ, well-tuned κ) sign ≡ +1 and the correction collapses to
+    /// the plain mean; the diagnostic exists primarily for transparency
+    /// and outlier detection.
+    arma::ivec  v_sign_samples;
+    arma::vec   v_log_abs_samples;
+    /// Whether V-ratio diagnostics are stored (true only for hierarchical-
+    /// spec GGM chains).
+    bool        has_v_ratio_diagnostics = false;
+
+    /// End-of-chain model diagnostics snapshot (model.get_diagnostics_summary()).
+    /// Default-constructed empty list; populated once at the end of
+    /// run_mcmc_chain. Used by GGMModel to surface hierarchical auto-reject
+    /// counters.
+    Rcpp::List  diagnostics_summary = Rcpp::List::create();
+
     /**
      * Reserve storage for samples
      * @param param_dim  Number of parameters per sample
@@ -107,6 +126,16 @@ class ChainResult {
         has_am_diagnostics = true;
     }
 
+    /**
+     * Reserve storage for hierarchical-spec V-ratio diagnostics.
+     * @param n_iter  Number of sampling iterations
+     */
+    void reserve_v_ratio_diagnostics(const size_t n_iter) {
+        v_sign_samples.set_size(n_iter);
+        v_log_abs_samples.set_size(n_iter);
+        has_v_ratio_diagnostics = true;
+    }
+
     /**
      * Store a parameter sample
      * @param iter    Iteration index (0-based)
@@ -159,4 +188,15 @@ class ChainResult {
     void store_am_diagnostics(const size_t iter, double accept_prob) {
         am_accept_prob_samples(iter) = accept_prob;
     }
+
+    /**
+     * Store V-ratio diagnostics for one iteration (hierarchical-spec only).
+     * @param iter      Iteration index (0-based)
+     * @param sign      sign(V_curr) at end of iteration, ∈ {-1, +1}
+     * @param log_abs   log|V_curr| at end of iteration
+     */
+    void store_v_ratio_diagnostics(const size_t iter, int sign, double log_abs) {
+        v_sign_samples(iter) = sign;
+        v_log_abs_samples(iter) = log_abs;
+    }
 };
diff --git a/src/mcmc/execution/chain_runner.cpp b/src/mcmc/execution/chain_runner.cpp
index 6fe737b8..e439f987 100644
--- a/src/mcmc/execution/chain_runner.cpp
+++ b/src/mcmc/execution/chain_runner.cpp
@@ -89,6 +89,13 @@ void run_mcmc_chain(
                 chain_result.store_am_diagnostics(sample_index, result.accept_prob);
             }
 
+            if (chain_result.has_v_ratio_diagnostics) {
+                chain_result.store_v_ratio_diagnostics(
+                    sample_index,
+                    model.current_sign_V(),
+                    model.current_log_abs_V());
+            }
+
             chain_result.store_sample(sample_index, model.get_storage_vectorized_parameters());
 
             if (chain_result.has_indicators) {
@@ -107,6 +114,10 @@ void run_mcmc_chain(
         }
     }
 
+    // Capture end-of-chain diagnostic snapshot from the model. For GGMModel
+    // this surfaces the hierarchical auto-reject counters; for other models
+    // the override returns an empty list and we just store that.
+    chain_result.diagnostics_summary = model.get_diagnostics_summary();
 }
 
 
@@ -164,6 +175,10 @@ std::vector<ChainResult> run_mcmc_sampler(
         if (has_am_diag) {
             results[c].reserve_am_diagnostics(config.no_iter);
         }
+
+        if (model.has_v_ratio_diagnostics()) {
+            results[c].reserve_v_ratio_diagnostics(config.no_iter);
+        }
     }
 
     if (no_threads > 1) {
@@ -231,6 +246,15 @@ Rcpp::List convert_results_to_list(const std::vector<ChainResult>& results) {
             if (chain.has_am_diagnostics) {
                 chain_list["am_accept_prob"] = chain.am_accept_prob_samples;
             }
+
+            if (chain.has_v_ratio_diagnostics) {
+                chain_list["v_sign"]    = chain.v_sign_samples;
+                chain_list["v_log_abs"] = chain.v_log_abs_samples;
+            }
+
+            if (chain.diagnostics_summary.size() > 0) {
+                chain_list["diagnostics_summary"] = chain.diagnostics_summary;
+            }
         }
 
         output[i] = chain_list;
diff --git a/src/models/base_model.h b/src/models/base_model.h
index c9eea1cd..d10e5b14 100644
--- a/src/models/base_model.h
+++ b/src/models/base_model.h
@@ -106,6 +106,39 @@ class BaseModel {
         return std::numeric_limits<double>::quiet_NaN();
     }
 
+    /**
+     * @return True if this model maintains V-ratio diagnostics
+     * (sign(V_curr), log|V_curr|) suitable for Lyne-style sign-corrected
+     * ergodic averaging. Default: false. Overridden by GGMModel under
+     * hierarchical graph_prior_spec.
+     */
+    virtual bool has_v_ratio_diagnostics() const { return false; }
+
+    /**
+     * @return Current sign(V_curr) ∈ {-1, +1} for the running V state.
+     * Only meaningful when has_v_ratio_diagnostics() returns true.
+     */
+    virtual int current_sign_V() const { return 1; }
+
+    /**
+     * @return Current log|V_curr| for the running V state, or NaN if
+     * V has not been computed yet. Only meaningful when
+     * has_v_ratio_diagnostics() returns true.
+     */
+    virtual double current_log_abs_V() const {
+        return std::numeric_limits<double>::quiet_NaN();
+    }
+
+    /**
+     * Per-chain diagnostic summary captured once at the end of the chain.
+     * Default is an empty list; models override to return counters / state
+     * that aren't naturally per-iteration. Used by run_mcmc_chain to plumb
+     * GGMModel's hierarchical auto-reject counters into ChainResult.
+     */
+    virtual Rcpp::List get_diagnostics_summary() const {
+        return Rcpp::List::create();
+    }
+
     /**
      * Set the target Metropolis acceptance rate for Robbins-Monro proposal
      * adaptation. Called by the sampler entry points (sample_omrf,
diff --git a/src/models/ggm/degord_sampler.cpp b/src/models/ggm/degord_sampler.cpp
new file mode 100644
index 00000000..69a75369
--- /dev/null
+++ b/src/models/ggm/degord_sampler.cpp
@@ -0,0 +1,482 @@
+// DEGORD-permuted Bartlett-Cholesky importance sampler for log Zhat(G).
+// Direct port of ~/SV/Z/R/src/degord_sampler.h (v4, 2026-05-18) with the
+// header-only static-inline bodies moved into one translation unit, and
+// a SafeRNG-based Bartlett pool draw added at the end.
+//
+// The kernel is bit-identical to the z reference up to floating-point
+// reordering when given the same noise pool and ChainAux/PiAux state.
+
+#include "models/ggm/degord_sampler.h"
+#include <RcppArmadillo.h>
+#include <cmath>
+#include <limits>
+#include <algorithm>
+
+namespace degord {
+
+ChainAux make_chain_aux(int q, double alpha, double beta,
+                        double sigma, double delta) {
+  ChainAux c;
+  c.q = q;
+  c.alpha = alpha; c.beta = beta; c.sigma = sigma; c.delta = delta;
+  c.sigma2 = sigma * sigma;
+  c.inv_sigma2 = 1.0 / c.sigma2;
+  c.two_beta = 2.0 * beta;
+  c.inv_two_beta = 1.0 / c.two_beta;
+  c.log_sigma = std::log(sigma);
+  c.log_2pi = std::log(2.0 * M_PI);
+  c.slab_kernel_const = -0.5 * std::log(2.0 * M_PI * c.sigma2);
+  c.sigma_diag = 1.0 / std::sqrt(4.0 * beta);
+  c.inv_sigma_diag2 = 4.0 * beta;
+  c.half_log_2pi_sigma_diag2 = 0.5 * std::log(2.0 * M_PI / (4.0 * beta));
+  c.slab_tilt_mode = 0;
+
+  int nmax = q + 1;
+  c.nu_chi_df.assign(nmax, 0.0);
+  c.nu_half_k.assign(nmax, 0.0);
+  c.nu_km1.assign(nmax, 0.0);
+  c.nu_lgamma_half_k.assign(nmax, 0.0);
+  c.nu_mu_l.assign(nmax, 0.0);
+  c.nu_diag_const.assign(nmax, 0.0);
+  c.nu_H_e_saddle.assign(nmax, 0.0);
+  c.nu_inv_sqrt_H_e_saddle.assign(nmax, 0.0);
+  c.nu_mu_coef_saddle.assign(nmax, 0.0);
+  c.nu_sigma_star_sq_saddle.assign(nmax, 0.0);
+  c.nu_inv_sigma_star_sq_saddle.assign(nmax, 0.0);
+  c.nu_slab_const_saddle.assign(nmax, 0.0);
+  c.nu_per_vertex.assign(nmax, 0.0);
+
+  double log_beta = std::log(beta);
+  for (int nu = 0; nu < nmax; ++nu) {
+    double k    = static_cast<double>(nu) + 2.0 + 2.0 * delta;
+    double hk   = 0.5 * k;
+    double km1  = k - 1.0;
+    double lgk  = std::lgamma(hk);
+    double mu_l = std::sqrt(km1 * c.inv_two_beta);
+    double mu_l2 = km1 * c.inv_two_beta;
+    double H_e  = c.two_beta + mu_l2 * c.inv_sigma2;
+    double inv_sqrt_H_e = 1.0 / std::sqrt(H_e);
+    double mu_coef = -mu_l * c.inv_sigma2 / H_e;
+    double sig_st2 = mu_l2 * c.inv_two_beta + c.sigma2;
+    double inv_sig_st2 = 1.0 / sig_st2;
+    double slab_const_saddle = c.log_sigma - 0.5 * std::log(sig_st2);
+    double diag_const = std::log(2.0) + hk * log_beta - lgk
+                        + c.half_log_2pi_sigma_diag2;
+    double per_vertex = lgk - hk * log_beta;
+    c.nu_chi_df[nu]                   = k;
+    c.nu_half_k[nu]                   = hk;
+    c.nu_km1[nu]                      = km1;
+    c.nu_lgamma_half_k[nu]            = lgk;
+    c.nu_mu_l[nu]                     = mu_l;
+    c.nu_diag_const[nu]               = diag_const;
+    c.nu_H_e_saddle[nu]               = H_e;
+    c.nu_inv_sqrt_H_e_saddle[nu]      = inv_sqrt_H_e;
+    c.nu_mu_coef_saddle[nu]           = mu_coef;
+    c.nu_sigma_star_sq_saddle[nu]     = sig_st2;
+    c.nu_inv_sigma_star_sq_saddle[nu] = inv_sig_st2;
+    c.nu_slab_const_saddle[nu]        = slab_const_saddle;
+    c.nu_per_vertex[nu]               = per_vertex;
+  }
+  return c;
+}
+
+
+PiAux make_pi_aux(const arma::imat& G_pi, const ChainAux& c) {
+  int q = G_pi.n_rows;
+  PiAux a;
+  a.q = q;
+  a.G_pi = G_pi;
+  a.nu_pi.assign(q, 0);
+  int E = 0;
+  for (int r = 0; r < q - 1; ++r) {
+    for (int s = r + 1; s < q; ++s) {
+      if (G_pi(r, s) == 1) { a.nu_pi[r] += 1; ++E; }
+    }
+  }
+  a.E_count = E;
+  double log_beta = std::log(c.beta);
+  double log_pi_over_beta = std::log(M_PI) - log_beta;
+  double per_vertex = 0.0;
+  for (int l = 0; l < q; ++l) per_vertex += c.nu_per_vertex[a.nu_pi[l]];
+  double log_C0 = static_cast<double>(q) * log_beta
+                + (-0.5 * static_cast<double>(E)) * std::log(2.0 * M_PI * c.sigma2)
+                + per_vertex
+                + 0.5 * static_cast<double>(E) * log_pi_over_beta;
+  a.log_C0 = log_C0;
+  return a;
+}
+
+
+arma::imat permute_graph(const arma::imat& G, const arma::ivec& pi) {
+  int q = G.n_rows;
+  arma::imat G_pi(q, q, arma::fill::zeros);
+  for (int u = 0; u < q; ++u) {
+    int pu = pi[u];
+    for (int v = u + 1; v < q; ++v) {
+      int pv = pi[v];
+      if (G(u, v) == 1) {
+        int rmin = std::min(pu, pv), rmax = std::max(pu, pv);
+        G_pi(rmin, rmax) = 1;
+        G_pi(rmax, rmin) = 1;
+      }
+    }
+  }
+  return G_pi;
+}
+
+
+arma::ivec degord_permutation(int q, int i, int j) {
+  int lo = std::min(i, j), hi = std::max(i, j);
+  arma::ivec pi(q, arma::fill::zeros);
+  int next = 0;
+  for (int v = 0; v < q; ++v) {
+    if (v == lo || v == hi) continue;
+    pi[v] = next++;
+  }
+  pi[lo] = q - 2;
+  pi[hi] = q - 1;
+  return pi;
+}
+
+
+double phi_pi_sample_from_noise(
+    arma::mat& Phi_pi,
+    arma::vec& row_logw,
+    const double* noise,
+    const PiAux& a,
+    const ChainAux& c
+) {
+  int q = a.q;
+  auto edge_offset = [q](int r, int s) -> int {
+    return r * (q - 1) - r * (r - 1) / 2 + (s - r - 1);
+  };
+  double total_logw = 0.0;
+  double half_inv_s2  = 0.5 * c.inv_sigma2;
+
+  for (int r = 0; r < q - 1; ++r) {
+    int nu_r = a.nu_pi[r];
+    double mu_l_r        = c.nu_mu_l[nu_r];
+    double km1_r         = c.nu_km1[nu_r];
+    double diag_const_r  = c.nu_diag_const[nu_r];
+    double slab_const_saddle_r        = c.nu_slab_const_saddle[nu_r];
+    double inv_sqrt_H_e_r             = c.nu_inv_sqrt_H_e_saddle[nu_r];
+    double mu_coef_saddle_r           = c.nu_mu_coef_saddle[nu_r];
+    double inv_sigma_star_sq_saddle_r = c.nu_inv_sigma_star_sq_saddle[nu_r];
+
+    double phi_rr = mu_l_r + c.sigma_diag * noise[r];
+    if (phi_rr < 1e-12) phi_rr = 1e-12;
+    Phi_pi(r, r) = phi_rr;
+
+    double phi_rr2       = phi_rr * phi_rr;
+    double sigma_star2   = phi_rr2 * c.inv_two_beta + c.sigma2;
+    double inv_sigma_st2 = 1.0 / sigma_star2;
+    double tau           = c.two_beta + phi_rr2 * c.inv_sigma2;
+    double inv_sqrt_tau  = 1.0 / std::sqrt(tau);
+    double mu_coef       = -phi_rr * c.inv_sigma2 / tau;
+    double inv_phi_rr    = 1.0 / phi_rr;
+    double slab_per_edge = c.log_sigma - 0.5 * std::log(sigma_star2);
+
+    double dphi = phi_rr - mu_l_r;
+    double diag_logw = diag_const_r
+                     + km1_r * std::log(phi_rr)
+                     - c.beta * phi_rr2
+                     + 0.5 * dphi * dphi * c.inv_sigma_diag2;
+
+    double phi_minus_mu = phi_rr - mu_l_r;
+    double phi_plus_mu  = phi_rr + mu_l_r;
+
+    double slab_const_for_row = (c.slab_tilt_mode == 1)
+        ? slab_const_saddle_r
+        : slab_per_edge;
+    double rw = static_cast<double>(nu_r) * slab_const_for_row + diag_logw;
+
+    const double* col_r = Phi_pi.colptr(r);
+    for (int s = r + 1; s < q; ++s) {
+      const double* col_s = Phi_pi.colptr(s);
+      double S_rs = 0.0;
+      for (int k = 0; k < r; ++k) S_rs += col_r[k] * col_s[k];
+      if (a.G_pi(r, s) == 1) {
+        int idx = q + edge_offset(r, s);
+        double phi_rs;
+        if (c.slab_tilt_mode == 1) {
+          phi_rs = mu_coef_saddle_r * S_rs
+                 + noise[idx] * inv_sqrt_H_e_r;
+          Phi_pi(r, s) = phi_rs;
+          rw -= 0.5 * S_rs * S_rs * inv_sigma_star_sq_saddle_r;
+          rw -= phi_minus_mu * phi_rs
+              * (phi_plus_mu * phi_rs + 2.0 * S_rs) * half_inv_s2;
+        } else {
+          phi_rs = mu_coef * S_rs + noise[idx] * inv_sqrt_tau;
+          Phi_pi(r, s) = phi_rs;
+          rw -= 0.5 * S_rs * S_rs * inv_sigma_st2;
+        }
+      } else {
+        double phi_rs = -S_rs * inv_phi_rr;
+        Phi_pi(r, s) = phi_rs;
+        rw -= c.beta * phi_rs * phi_rs;
+      }
+    }
+    row_logw[r] = rw;
+    total_logw += rw;
+  }
+
+  // Trailing row q-1.
+  {
+    int r = q - 1;
+    int nu_r = a.nu_pi[r];
+    double mu_l_r       = c.nu_mu_l[nu_r];
+    double km1_r        = c.nu_km1[nu_r];
+    double diag_const_r = c.nu_diag_const[nu_r];
+    double phi_rr = mu_l_r + c.sigma_diag * noise[r];
+    if (phi_rr < 1e-12) phi_rr = 1e-12;
+    Phi_pi(r, r) = phi_rr;
+    double dphi = phi_rr - mu_l_r;
+    double diag_logw = diag_const_r
+                     + km1_r * std::log(phi_rr)
+                     - c.beta * phi_rr * phi_rr
+                     + 0.5 * dphi * dphi * c.inv_sigma_diag2;
+    row_logw[r] = diag_logw;
+    total_logw += diag_logw;
+  }
+  return total_logw;
+}
+
+
+double log_Zhat_pi_from_pool(
+    const arma::mat& noise_pool_t,
+    const PiAux& a,
+    const ChainAux& c
+) {
+  int q = a.q;
+  int M = static_cast<int>(noise_pool_t.n_cols);
+  double neg_inf = -std::numeric_limits<double>::infinity();
+  arma::vec log_w(M);
+  log_w.fill(neg_inf);
+  arma::mat Phi(q, q, arma::fill::zeros);
+  arma::vec row_logw(q, arma::fill::zeros);
+  double m = neg_inf;
+  int n_finite = 0;
+  for (int s = 0; s < M; ++s) {
+    double lw = phi_pi_sample_from_noise(
+        Phi, row_logw, noise_pool_t.colptr(s), a, c);
+    if (std::isfinite(lw)) {
+      log_w[s] = lw;
+      ++n_finite;
+      if (lw > m) m = lw;
+    }
+  }
+  if (n_finite == 0) return neg_inf;
+  double acc = 0.0;
+  for (int s = 0; s < M; ++s)
+    if (std::isfinite(log_w[s])) acc += std::exp(log_w[s] - m);
+  return a.log_C0 + m + std::log(acc) - std::log(static_cast<double>(M));
+}
+
+
+double log_Zhat_pi_from_pool_cache(
+    const arma::mat& noise_pool_t,
+    const PiAux& a,
+    const ChainAux& c,
+    PoolCache& cache
+) {
+  int q = a.q;
+  int M = static_cast<int>(noise_pool_t.n_cols);
+  double neg_inf = -std::numeric_limits<double>::infinity();
+  cache.log_w.set_size(M);    cache.log_w.fill(neg_inf);
+  cache.rw_head.set_size(M);
+  cache.S_trail.set_size(M);
+  arma::mat Phi(q, q, arma::fill::zeros);
+  arma::vec row_logw(q, arma::fill::zeros);
+  double m = neg_inf;
+  int n_finite = 0;
+  for (int s = 0; s < M; ++s) {
+    double lw = phi_pi_sample_from_noise(
+        Phi, row_logw, noise_pool_t.colptr(s), a, c);
+    if (std::isfinite(lw)) {
+      cache.log_w[s] = lw;
+      ++n_finite;
+      if (lw > m) m = lw;
+    }
+    double rh = 0.0;
+    for (int r = 0; r < q - 2; ++r) rh += row_logw[r];
+    rh += row_logw[q - 1];   // r_qm1 is invariant across curr/star (nu_pi[q-1] = 0 always);
+                             // include it in the head so delta_log_Zhat_pi_toggle's star
+                             // aggregation matches direct log_Zhat(star) - log_Zhat(curr).
+    cache.rw_head[s] = rh;
+    double s_trail = 0.0;
+    for (int k = 0; k < q - 2; ++k) s_trail += Phi(k, q - 2) * Phi(k, q - 1);
+    cache.S_trail[s] = s_trail;
+  }
+  if (n_finite == 0) return neg_inf;
+  double acc = 0.0;
+  for (int s = 0; s < M; ++s)
+    if (std::isfinite(cache.log_w[s])) acc += std::exp(cache.log_w[s] - m);
+  return a.log_C0 + m + std::log(acc) - std::log(static_cast<double>(M));
+}
+
+
+double row_qm2_logw_from_S(
+    double z_qm2,
+    double z_trail,
+    double S_trail,
+    const PiAux& a,
+    const ChainAux& c
+) {
+  int q = a.q;
+  int r = q - 2;
+  int nu_r = a.nu_pi[r];
+  double mu_l_r       = c.nu_mu_l[nu_r];
+  double km1_r        = c.nu_km1[nu_r];
+  double diag_const_r = c.nu_diag_const[nu_r];
+  double phi_rr = mu_l_r + c.sigma_diag * z_qm2;
+  if (phi_rr < 1e-12) phi_rr = 1e-12;
+  double phi_rr2 = phi_rr * phi_rr;
+  double dphi = phi_rr - mu_l_r;
+  double diag_logw = diag_const_r
+                   + km1_r * std::log(phi_rr)
+                   - c.beta * phi_rr2
+                   + 0.5 * dphi * dphi * c.inv_sigma_diag2;
+  if (a.G_pi(r, q - 1) == 1) {
+    if (c.slab_tilt_mode == 1) {
+      double mu_coef_saddle_r           = c.nu_mu_coef_saddle[nu_r];
+      double inv_sqrt_H_e_r             = c.nu_inv_sqrt_H_e_saddle[nu_r];
+      double slab_const_saddle_r        = c.nu_slab_const_saddle[nu_r];
+      double inv_sigma_star_sq_saddle_r = c.nu_inv_sigma_star_sq_saddle[nu_r];
+      double mu_rs   = mu_coef_saddle_r * S_trail;
+      double phi_rs  = mu_rs + z_trail * inv_sqrt_H_e_r;
+      double slab_logw = slab_const_saddle_r
+                       - 0.5 * S_trail * S_trail * inv_sigma_star_sq_saddle_r;
+      double phi_minus_mu = phi_rr - mu_l_r;
+      double phi_plus_mu  = phi_rr + mu_l_r;
+      double mismatch     = phi_minus_mu * phi_rs
+                          * (phi_plus_mu * phi_rs + 2.0 * S_trail)
+                          * 0.5 * c.inv_sigma2;
+      return diag_logw + slab_logw - mismatch;
+    } else {
+      (void) z_trail;
+      double sigma_star2 = phi_rr2 * c.inv_two_beta + c.sigma2;
+      return diag_logw
+           + c.log_sigma - 0.5 * std::log(sigma_star2)
+           - 0.5 * S_trail * S_trail / sigma_star2;
+    }
+  } else {
+    (void) z_trail;
+    double phi_rs = -S_trail / phi_rr;
+    return diag_logw - c.beta * phi_rs * phi_rs;
+  }
+}
+
+
+double log_Zhat_star_from_cache(
+    const arma::mat& noise_pool_t,
+    const PiAux& a_star,
+    const ChainAux& c,
+    const PoolCache& cache_curr
+) {
+  int q = a_star.q;
+  int M = static_cast<int>(noise_pool_t.n_cols);
+  double neg_inf = -std::numeric_limits<double>::infinity();
+  // Slab slot at (q-2, q-1) inside the per-sample noise vector. Mirrors
+  // the slab_idx calculation in delta_log_Zhat_pi_toggle.
+  int slab_idx = q + (q - 2) * (q + 1) / 2;
+  arma::vec log_w_star(M);
+  log_w_star.fill(neg_inf);
+  double m = neg_inf;
+  int n_finite = 0;
+  for (int s = 0; s < M; ++s) {
+    // Strided arma reads. For the V-pair hot path at moderate q the
+    // 2M strided reads are amortised against the M Phi rebuilds we are
+    // skipping (q-1 rows of dense work per sample).
+    double z_qm2   = noise_pool_t(q - 2, s);
+    double z_trail = noise_pool_t(slab_idx, s);
+    double rw_qm2_star = row_qm2_logw_from_S(
+        z_qm2, z_trail, cache_curr.S_trail[s], a_star, c);
+    double total = cache_curr.rw_head[s] + rw_qm2_star;
+    if (std::isfinite(total)) {
+      log_w_star[s] = total;
+      ++n_finite;
+      if (total > m) m = total;
+    }
+  }
+  if (n_finite == 0) return neg_inf;
+  double acc = 0.0;
+  for (int s = 0; s < M; ++s)
+    if (std::isfinite(log_w_star[s])) acc += std::exp(log_w_star[s] - m);
+  return a_star.log_C0 + m + std::log(acc) - std::log(static_cast<double>(M));
+}
+
+
+double delta_log_Zhat_pi_toggle(
+    const arma::mat& noise_pool,
+    const arma::mat& noise_pool_t,
+    const arma::imat& G_curr,
+    int i, int j,
+    const ChainAux& c
+) {
+  int q = c.q;
+  double neg_inf = -std::numeric_limits<double>::infinity();
+  arma::ivec pi = degord_permutation(q, i, j);
+  arma::imat G_pi_curr = permute_graph(G_curr, pi);
+  arma::imat G_pi_star = G_pi_curr;
+  int toggled = 1 - G_pi_curr(q - 2, q - 1);
+  G_pi_star(q - 2, q - 1) = toggled;
+  G_pi_star(q - 1, q - 2) = toggled;
+
+  PiAux a_curr = make_pi_aux(G_pi_curr, c);
+  PiAux a_star = make_pi_aux(G_pi_star, c);
+
+  PoolCache cache_curr;
+  double log_Zhat_curr = log_Zhat_pi_from_pool_cache(
+      noise_pool_t, a_curr, c, cache_curr);
+
+  int M = static_cast<int>(noise_pool.n_rows);
+  // z_qm2 access is contiguous along s for fixed d = q-2 in the M x dim layout.
+  const double* col_qm2 = noise_pool.colptr(q - 2);
+  // z_trail: slab noise slot for the trailing off-diagonal (q-2, q-1). The
+  // off-diagonal section begins at offset q in the noise vector and uses
+  // edge_offset(r, s) = r*(q-1) - r*(r-1)/2 + (s - r - 1). At
+  // (r, s) = (q-2, q-1) this collapses to (q-2)*(q+1)/2; total slab index
+  // is q + (q-2)*(q+1)/2. Used only when slab_tilt_mode == 1 (saddle-shifted
+  // IS path inside row_qm2_logw_from_S); slab_tilt_mode = 0 path ignores
+  // z_trail via (void) z_trail. Hardcoding z_trail = 0.0 here was the v4
+  // mode-1 sibling of the rw_head-misses-row-q-1 bug.
+  int slab_idx = q + (q - 2) * (q + 1) / 2;
+  const double* col_slab = noise_pool.colptr(slab_idx);
+  arma::vec log_w_star(M);
+  log_w_star.fill(neg_inf);
+  double m = neg_inf;
+  int n_finite = 0;
+  for (int s = 0; s < M; ++s) {
+    double z_qm2 = col_qm2[s];
+    double z_trail = col_slab[s];
+    double rw_qm2_star = row_qm2_logw_from_S(
+        z_qm2, z_trail, cache_curr.S_trail[s], a_star, c);
+    double total = cache_curr.rw_head[s] + rw_qm2_star;
+    if (std::isfinite(total)) {
+      log_w_star[s] = total;
+      ++n_finite;
+      if (total > m) m = total;
+    }
+  }
+  double log_Zhat_star;
+  if (n_finite == 0) {
+    log_Zhat_star = neg_inf;
+  } else {
+    double acc = 0.0;
+    for (int s = 0; s < M; ++s)
+      if (std::isfinite(log_w_star[s])) acc += std::exp(log_w_star[s] - m);
+    log_Zhat_star = a_star.log_C0 + m + std::log(acc)
+                  - std::log(static_cast<double>(M));
+  }
+  return log_Zhat_star - log_Zhat_curr;
+}
+
+
+arma::mat draw_bartlett_pool(SafeRNG& rng, int q, int M_inner) {
+  int dim = bartlett_pool_dim(q);
+  return arma_rnorm_mat(rng, static_cast<arma::uword>(dim),
+                        static_cast<arma::uword>(M_inner));
+}
+
+
+}  // namespace degord
diff --git a/src/models/ggm/degord_sampler.h b/src/models/ggm/degord_sampler.h
new file mode 100644
index 00000000..073f1a79
--- /dev/null
+++ b/src/models/ggm/degord_sampler.h
@@ -0,0 +1,243 @@
+#pragma once
+
+#include <RcppArmadillo.h>
+#include <vector>
+
+#include "rng/rng_utils.h"
+
+// DEGORD-permuted Bartlett-Cholesky importance sampler for log Zhat(G).
+//
+// Port of ~/SV/Z/R/src/degord_sampler.h (v4 layout, 2026-05-18). See the
+// z header for the derivation; this header preserves the v4 architecture
+// (per-nu transcendental tables, pre-transposed noise pool, trimmed
+// PiAux) and naming.
+//
+// Convention: the DEGORD permutation in *this* file sends the toggle
+// endpoints (i, j) to positions (q - 2, q - 1), so the toggled edge sits
+// at the trailing diagonal-plus-off-diagonal pair. This is different
+// from log_Z_NLO_gamma_degord (log_z_nlo.h), which places the toggle at
+// (0, 1) for the closed-form formula's vertex-ordered structure. Both
+// conventions are correct in their respective uses: the analytical c
+// in the V estimator and the inner Zhat-from-pool importance sampler
+// are computed under their own permutations of the same G.
+
+namespace degord {
+
+// ----------------------------------------------------------------------
+// Per-chain auxiliary constants.
+//
+// Built once at the start of a chain (or when (alpha, beta, sigma,
+// delta) changes). Holds all (q, alpha, beta, sigma, delta)-dependent
+// constants plus per-nu transcendental tables indexed by forward degree
+// nu in 0..q. Tables are read inside the inner kernel via
+// c.nu_X[a.nu_pi[r]], avoiding per-row arma::vec allocations.
+// ----------------------------------------------------------------------
+struct ChainAux {
+  int    q;
+  double alpha;
+  double beta;
+  double sigma;
+  double delta;
+  double sigma2;
+  double inv_sigma2;
+  double two_beta;
+  double inv_two_beta;
+  double log_sigma;
+  double log_2pi;
+  double slab_kernel_const;
+  double sigma_diag;
+  double inv_sigma_diag2;
+  double half_log_2pi_sigma_diag2;
+  // Off-diagonal importance distribution selector:
+  //   0 (default) - sample phi_rs around the unshifted saddle (tau-based).
+  //   1           - sample phi_rs around the slab-tilt-shifted saddle.
+  int    slab_tilt_mode;
+  // Per-nu transcendental tables. Indexed by forward-degree nu in 0..q.
+  std::vector<double> nu_chi_df;
+  std::vector<double> nu_half_k;
+  std::vector<double> nu_km1;
+  std::vector<double> nu_lgamma_half_k;
+  std::vector<double> nu_mu_l;
+  std::vector<double> nu_diag_const;
+  std::vector<double> nu_H_e_saddle;
+  std::vector<double> nu_inv_sqrt_H_e_saddle;
+  std::vector<double> nu_mu_coef_saddle;
+  std::vector<double> nu_sigma_star_sq_saddle;
+  std::vector<double> nu_inv_sigma_star_sq_saddle;
+  std::vector<double> nu_slab_const_saddle;
+  std::vector<double> nu_per_vertex;
+};
+
+
+// Build a ChainAux from (q, alpha, beta, sigma, delta). slab_tilt_mode is
+// initialised to 0 (caller may overwrite after construction).
+ChainAux make_chain_aux(int q, double alpha, double beta,
+                        double sigma, double delta);
+
+
+// ----------------------------------------------------------------------
+// Per-permutation auxiliary state.
+//
+// Trimmed v4 layout: stores only the permuted graph, per-row forward
+// degree, edge count, and log_C0. Per-row constants are read from
+// ChainAux's nu_X[] tables.
+// ----------------------------------------------------------------------
+struct PiAux {
+  int                 q;
+  arma::imat          G_pi;
+  std::vector<int>    nu_pi;
+  int                 E_count;
+  double              log_C0;
+};
+
+
+PiAux make_pi_aux(const arma::imat& G_pi, const ChainAux& c);
+
+
+// ----------------------------------------------------------------------
+// Graph permutation helpers.
+// ----------------------------------------------------------------------
+
+// Apply a vertex permutation pi to G. pi[u] gives the new index of u.
+arma::imat permute_graph(const arma::imat& G, const arma::ivec& pi);
+
+// DEGORD permutation that sends (i, j) -> (q-2, q-1), with all other
+// vertices keeping their original order in positions 0..q-3.
+arma::ivec degord_permutation(int q, int i, int j);
+
+
+// ----------------------------------------------------------------------
+// Inner kernel.
+//
+// Given a length-(q + q(q-1)/2) noise vector (Gaussian deviates), fill
+// the upper-triangular Bartlett-Cholesky factor Phi_pi and return the
+// total log-importance-weight for this sample. row_logw is filled with
+// the per-row contributions for caching.
+//
+// Layout of noise[]:
+//   noise[0 .. q-1]                                : diagonal innovations
+//                                                    (Phi(r, r) for r = 0..q-1).
+//   noise[q + edge_offset(r, s)] for r < s         : off-diagonal innovation
+//                                                    for the (r, s) slot.
+//     edge_offset(r, s) = r*(q-1) - r*(r-1)/2 + (s - r - 1)
+//
+// Off-diagonal slots are allocated for all (r, s) with r < s. Slots
+// corresponding to non-edges are still consumed (filled with the slaving
+// Phi(r, s) = -S_rs / Phi(r, r)) so the noise indexing stays stable.
+// ----------------------------------------------------------------------
+double phi_pi_sample_from_noise(
+    arma::mat& Phi_pi,
+    arma::vec& row_logw,
+    const double* noise,
+    const PiAux& a,
+    const ChainAux& c);
+
+
+// ----------------------------------------------------------------------
+// log Zhat(G_pi) from a pre-transposed noise pool of shape (dim x M),
+// column-major (so each sample's noise vector is a contiguous column).
+//
+//   dim = q + q*(q - 1) / 2
+//   M   = number of inner samples
+//
+// Returns log Zhat (= log_C0 + log mean(exp(log_w))). Returns -Inf if
+// every sample's log_w is non-finite.
+// ----------------------------------------------------------------------
+double log_Zhat_pi_from_pool(
+    const arma::mat& noise_pool_t,
+    const PiAux& a,
+    const ChainAux& c);
+
+
+// ----------------------------------------------------------------------
+// PoolCache: per-sample state from a log_Zhat_pi_from_pool_cache call
+// that delta_log_Zhat_pi_toggle reuses to avoid recomputing the head
+// rows (0..q-3) when only the trailing edge (q-2, q-1) toggles.
+// ----------------------------------------------------------------------
+struct PoolCache {
+  arma::vec log_w;      // per-sample total log-importance weight
+  arma::vec rw_head;    // per-sample sum of row_logw[0..q-3]
+  arma::vec S_trail;    // per-sample dot product of columns q-2 and q-1 over rows 0..q-3
+};
+
+
+double log_Zhat_pi_from_pool_cache(
+    const arma::mat& noise_pool_t,
+    const PiAux& a,
+    const ChainAux& c,
+    PoolCache& cache);
+
+
+// Re-evaluate row (q-2)'s log_w (diag + slab/slaved at the trailing edge)
+// for a *new* edge indicator G_pi(q-2, q-1) given the cached S_trail.
+//
+// z_qm2 is the diagonal innovation at row q-2 (noise[q-2]); z_trail is
+// the off-diagonal innovation at the (q-2, q-1) slot (only used when
+// slab_tilt_mode == 1).
+double row_qm2_logw_from_S(
+    double z_qm2,
+    double z_trail,
+    double S_trail,
+    const PiAux& a,
+    const ChainAux& c);
+
+
+// ----------------------------------------------------------------------
+// log Zhat(G_pi_star) under a single-edge toggle at the trailing slot
+// (q-2, q-1), reusing a PoolCache built from log_Zhat_pi_from_pool_cache
+// at G_pi_curr (i.e., a_curr). Mirrors delta_log_Zhat_pi_toggle's
+// per-sample loop but returns log_Zhat_star rather than the delta;
+// composes with V_log_pair_at_Gamma_curr_star_degord for within-toggle
+// cache reuse on the V estimator.
+//
+// noise_pool_t : dim x M (the layout already held by ZRatioState's pools_t).
+//                z_qm2 and z_trail are read via strided arma access.
+// a_star       : pi_aux for G_pi_star (must share q with a_curr's; toggle
+//                must sit at (q-2, q-1) in both).
+// cache_curr   : the PoolCache produced by log_Zhat_pi_from_pool_cache
+//                under a_curr on this same noise_pool_t.
+// ----------------------------------------------------------------------
+double log_Zhat_star_from_cache(
+    const arma::mat& noise_pool_t,
+    const PiAux& a_star,
+    const ChainAux& c,
+    const PoolCache& cache_curr);
+
+
+// ----------------------------------------------------------------------
+// Efficient delta: log Zhat(Gamma_star) - log Zhat(Gamma_curr) under a
+// single-edge toggle (i, j), with G_pi_star differing from G_pi_curr
+// only at the trailing slot (q-2, q-1).
+//
+// Takes BOTH pool views:
+//   noise_pool   : M x dim, column-major. z_qm2 = noise_pool(s, q-2) is
+//                  contiguous along s.
+//   noise_pool_t : dim x M, column-major. The kernel's per-sample
+//                  contiguous noise extraction. Chain wrappers maintain
+//                  both views together.
+// ----------------------------------------------------------------------
+double delta_log_Zhat_pi_toggle(
+    const arma::mat& noise_pool,
+    const arma::mat& noise_pool_t,
+    const arma::imat& G_curr,
+    int i, int j,
+    const ChainAux& c);
+
+
+// ----------------------------------------------------------------------
+// Per-sample noise dimension for a chain of size q.
+// ----------------------------------------------------------------------
+inline int bartlett_pool_dim(int q) {
+  return q + q * (q - 1) / 2;
+}
+
+
+// ----------------------------------------------------------------------
+// Draw an independent standard-normal Bartlett pool of shape
+// (dim x M_inner), column-major. Each column is one inner sample's
+// noise vector. Uses SafeRNG so chain seeds are deterministic.
+// ----------------------------------------------------------------------
+arma::mat draw_bartlett_pool(SafeRNG& rng, int q, int M_inner);
+
+
+}  // namespace degord
diff --git a/src/models/ggm/ggm_model.cpp b/src/models/ggm/ggm_model.cpp
index fd00efbd..096cc383 100644
--- a/src/models/ggm/ggm_model.cpp
+++ b/src/models/ggm/ggm_model.cpp
@@ -4,6 +4,28 @@
 #include "math/cholupdate.h"
 #include "mcmc/execution/step_result.h"
 #include "mcmc/execution/warmup_schedule.h"
+#include "models/ggm/log_z_nlo.h"
+#include "models/ggm/manuscript_nlo.h"
+#include "models/ggm/z_ratio_estimator.h"
+#include <stdexcept>
+
+// ----------------------------------------------------------------------
+// log_Z_NLO closed-form selector. Returns the centring log Z_NLO at G:
+//   - manuscript App C NLO (eq:NLO-decomp) when use_manuscript_nlo_ && α=1
+//   - bgms's pre-2026-05-21 log_Z_NLO_gamma otherwise (also the fallback
+//     at α ≠ 1, where the manuscript form's Na/Nb/Nc terms are not ported).
+// File-local helper so it doesn't pollute the GGMModel public API.
+// ----------------------------------------------------------------------
+static inline double compute_log_Z_NLO_centre(
+    const arma::imat& G,
+    double alpha, double beta, double sigma, double delta,
+    bool use_manuscript
+) {
+    if (use_manuscript && alpha == 1.0) {
+        return ggm_nlo::log_Z_manuscript_NLO_alpha1(G, beta, sigma, delta);
+    }
+    return log_Z_NLO_gamma(G, alpha, beta, sigma, /*include_F=*/false, delta);
+}
 
 // =====================================================================
 // NUTS gradient support
@@ -334,24 +356,44 @@ void GGMModel::cholesky_update_after_edge(double omega_ij_old, double omega_jj_o
     vf2_[i] = v2_[0];
     vf2_[j] = v2_[1];
 
-    // we now have
-    // aOmega_prop - (aOmega + vf1 %*% t(vf2) + vf2 %*% t(vf1))
-
+    // K_new = K_old + vf1 vf2^T + vf2 vf1^T = K_old + u1 u1^T - u2 u2^T,
+    // where u1 = (vf1 + vf2) / sqrt(2), u2 = (vf1 - vf2) / sqrt(2). The
+    // change-of-basis diagonalises the symmetric rank-2 update so the chol
+    // factor advances via one rank-1 update + one rank-1 downdate.
     u1_ = (vf1_ + vf2_) / sqrt(2);
     u2_ = (vf1_ - vf2_) / sqrt(2);
 
-    // update phi (2x O(p^2))
+    // L update (2 x O(p^2)). Still required so cholesky_of_precision_ tracks
+    // K (used by get_log_det in the next iteration).
     cholesky_update(cholesky_of_precision_, u1_);
     cholesky_downdate(cholesky_of_precision_, u2_);
 
-    // update inverse — fall back to full recomputation if rank-1
-    // updates have caused numerical drift
-    bool ok = arma::solve(inv_cholesky_of_precision_, arma::trimatu(cholesky_of_precision_),
-                          arma::eye(p_, p_), arma::solve_opts::fast);
-    if (!ok) {
+    // Sherman-Morrison-Woodbury rank-2 update of covariance_matrix_ = inv(K).
+    // Replaces the prior arma::solve(trimatu(L), I) (O(p^3)) + inv_L * inv_L.t()
+    // (O(p^3)) refresh with 4 x O(p^2) work:
+    //   K_new = K_old + M D M^T, M = [u1, u2], D = diag(+1, -1)
+    //   inv(K_new) = inv(K_old) - A C^{-1} A^T,
+    //     A = inv(K_old) M = [a1, a2],
+    //     C = D^{-1} + M^T inv(K_old) M = diag(+1, -1) + symmetric 2x2.
+    // Capacitance singularity (|det C| ~ 0) falls back to refresh_cholesky.
+    // inv_cholesky_of_precision_ is no longer maintained per accept — only
+    // refresh_cholesky() updates it (it's a scratch artefact of the prior
+    // path, not read between accepts).
+    arma::vec a1 = covariance_matrix_ * u1_;
+    arma::vec a2 = covariance_matrix_ * u2_;
+    double c11 =  1.0 + arma::dot(u1_, a1);
+    double c12 =        arma::dot(u1_, a2);
+    double c22 = -1.0 + arma::dot(u2_, a2);
+    double det = c11 * c22 - c12 * c12;
+    if (!std::isfinite(det) || std::abs(det) < 1e-14) {
         refresh_cholesky();
     } else {
-        covariance_matrix_ = inv_cholesky_of_precision_ * inv_cholesky_of_precision_.t();
+        double inv_c00 =  c22 / det;
+        double inv_c11 =  c11 / det;
+        double inv_c01 = -c12 / det;
+        covariance_matrix_ -= inv_c00 * (a1 * a1.t());
+        covariance_matrix_ -= inv_c11 * (a2 * a2.t());
+        covariance_matrix_ -= inv_c01 * (a1 * a2.t() + a2 * a1.t());
     }
 
     // reset for next iteration
@@ -404,19 +446,29 @@ void GGMModel::cholesky_update_after_diag(double omega_ii_old, size_t i)
     bool s = delta > 0;
     vf1_(i) = std::sqrt(std::abs(delta));
 
+    // L rank-1 update so cholesky_of_precision_ tracks K (used by get_log_det).
     if (s)
         cholesky_downdate(cholesky_of_precision_, vf1_);
     else
         cholesky_update(cholesky_of_precision_, vf1_);
 
-    // update inverse — fall back to full recomputation if rank-1
-    // updates have caused numerical drift
-    bool ok = arma::solve(inv_cholesky_of_precision_, arma::trimatu(cholesky_of_precision_),
-                          arma::eye(p_, p_), arma::solve_opts::fast);
-    if (!ok) {
+    // SMW rank-1 update of covariance_matrix_ = inv(K). Replaces the prior
+    // O(p^3) solve+matmul refresh with one O(p^2) outer-product update.
+    //   K_new = K_old + alpha * e_i e_i^T, alpha = K_new(i,i) - K_old(i,i)
+    //   inv(K_new) = inv(K_old) - alpha * c_i c_i^T / (1 + alpha * c_i[i]),
+    //     c_i = inv(K_old).col(i).
+    // alpha = precision_proposal_(i,i) - omega_ii_old (note sign: delta is
+    // defined the other way around above so the chol update/downdate branch
+    // matches K_new > or < K_old). Refresh-fall-back guards near-singular
+    // denom.
+    double alpha = precision_proposal_(i, i) - omega_ii_old;
+    arma::vec ci = covariance_matrix_.col(i);
+    double denom = 1.0 + alpha * ci(i);
+    if (!std::isfinite(denom) || std::abs(denom) < 1e-14) {
         refresh_cholesky();
     } else {
-        covariance_matrix_ = inv_cholesky_of_precision_ * inv_cholesky_of_precision_.t();
+        double coeff = alpha / denom;
+        covariance_matrix_ -= coeff * (ci * ci.t());
     }
 
     // reset for next iteration
@@ -453,6 +505,11 @@ void GGMModel::update_edge_indicator_parameter_pair(size_t i, size_t j) {
 
         // Determinant-tilt prior: |K|^delta contributes delta * log_det_ratio
         // to the MH ratio. The rank-2 update at (i,j),(j,j) makes this O(p).
+        // NOTE: under the Roverato slaving (K_jj <- c_3 + phi^2 with phi chosen
+        // so K_ij <- 0), |K| is invariant to machine precision (proven via the
+        // 2x2 cofactor identity and verified numerically at q<=10). So this
+        // term is identically zero in practice; it's kept here defensively for
+        // any future non-Roverato proposal variant.
         if (determinant_tilt_ != 0.0) {
             ln_alpha += determinant_tilt_ * log_det_ratio_edge(i, j);
         }
@@ -469,6 +526,99 @@ void GGMModel::update_edge_indicator_parameter_pair(size_t i, size_t j) {
         ln_alpha += diagonal_prior_->logp(0.5 * precision_proposal_(j, j));
         ln_alpha -= diagonal_prior_->logp(0.5 * precision_matrix_(j, j));
 
+        // Hierarchical-spec correction. The joint-spec MH ratio implicitly
+        // targets π(Γ)·Z(Γ) marginally; to convert to the hierarchical
+        // target with marginal π(Γ), multiply by Z(Γ_curr)/Z(Γ_star). With
+        // V(Γ) ≈ 1/Z(Γ), this is V(Γ_star) / V(Γ_curr). In log form:
+        //   ln_alpha += log|V(Γ_star)| - log|V(Γ_curr)|.
+        // Lyne (2015) RR debias with the DEGORD-permuted Bartlett-Cholesky
+        // inner sampler.
+        double log_Z_NLO_star = log_Z_NLO_curr_;  // tentative; set below if hierarchical
+        // F2: V(Γ_star) carried to the accept block so we can advance the
+        // running V-diagnostic only on accept (set inside the hier_active
+        // branch below).
+        int    V_star_sign_for_diag   = 1;
+        double V_star_log_abs_for_diag = std::numeric_limits<double>::quiet_NaN();
+        bool hier_active = (graph_prior_spec_ == GraphPriorSpec::Hierarchical);
+        if (hier_active) {
+            ++n_hier_del_attempts_;
+            ensure_hierarchical_state_();
+            // Γ_star: this branch DELETES edge (i, j).
+            arma::imat G_star = edge_indicators_;
+            G_star(i, j) = 0;
+            G_star(j, i) = 0;
+            // log_Z_NLO_star via the cheap O(deg²) incremental at α=1 under
+            // the default formula; full-recompute when use_manuscript_nlo_ is
+            // set (no manuscript-incremental ported yet). At α ≠ 1 the
+            // selector falls back to bgms's formula either way.
+            if (prior_alpha_ == 1.0 && !use_manuscript_nlo_) {
+                double d = log_Z_NLO_gamma_delta_incr_alpha1(
+                    edge_indicators_, static_cast<int>(i), static_cast<int>(j),
+                    prior_beta_, prior_sigma_, determinant_tilt_, false);
+                log_Z_NLO_star = log_Z_NLO_curr_ + d;
+            } else {
+                log_Z_NLO_star = compute_log_Z_NLO_centre(
+                    G_star, prior_alpha_, prior_beta_, prior_sigma_,
+                    determinant_tilt_, use_manuscript_nlo_);
+            }
+            // V evaluated under the DEGORD permutation π that sends (i, j)
+            // to (q-2, q-1).
+            arma::ivec pi = degord::degord_permutation(
+                static_cast<int>(p_), static_cast<int>(i), static_cast<int>(j));
+            arma::imat G_pi_curr = degord::permute_graph(edge_indicators_, pi);
+            arma::imat G_pi_star = degord::permute_graph(G_star, pi);
+            // Log-space V: avoids underflow in c = kappa * exp(log_Z_NLO) at
+            // large p (log_Z_NLO is ~ -3500 at p=100, δ=1 → c flushes to 0).
+            // log_kappa cancels in the MH ratio, but log_c per Γ is needed
+            // to evaluate log|expm1(log_Zhat_m - log_c)| pointwise.
+            //
+            // Paired call shares the inner Phi-build across Γ_curr / Γ_star
+            // by caching (rw_head, S_trail) under a_curr and re-evaluating
+            // only row q-2 under a_star — halves per-pool work vs two
+            // independent V_log_at_Gamma_pi_degord calls.
+            double log_kappa = std::log(v_kappa_);
+            double log_c_curr = log_kappa + log_Z_NLO_curr_;
+            double log_c_star = log_kappa + log_Z_NLO_star;
+            auto V_pair = degord::V_log_pair_at_Gamma_curr_star_degord(
+                v_K_depth_, v_pools_t_,
+                G_pi_curr, G_pi_star, chain_aux_degord_,
+                log_c_curr, log_c_star, v_rho_);
+            // F2: initialise running V-diagnostic from V_pair.curr the first
+            // time we see a finite value (so the side-car has a meaningful
+            // entry from iteration 1 even before any accept). On accept the
+            // value is overwritten below from V_pair.star.
+            if (!v_diag_initialized_ &&
+                std::isfinite(V_pair.curr.first) && V_pair.curr.second != 0) {
+                current_sign_V_     = V_pair.curr.second;
+                current_log_abs_V_  = V_pair.curr.first;
+                v_diag_initialized_ = true;
+                last_v_pi_i_ = static_cast<int>(i);
+                last_v_pi_j_ = static_cast<int>(j);
+            }
+            V_star_sign_for_diag    = V_pair.star.second;
+            V_star_log_abs_for_diag = V_pair.star.first;
+            // Auto-reject on non-finite log|V| (sentinel for V = 0 or
+            // non-finite Zhat) or on sign flip across Γ_curr / Γ_star. The
+            // sign-flip reject remains until a proper Lyne sign accumulator
+            // composes downstream (F3).
+            if (!std::isfinite(V_pair.curr.first) || V_pair.curr.second == 0 ||
+                !std::isfinite(V_pair.star.first) || V_pair.star.second == 0 ||
+                V_pair.curr.second != V_pair.star.second) {
+                // Classify the auto-reject for diagnostics.
+                if (!std::isfinite(V_pair.curr.first) ||
+                    !std::isfinite(V_pair.star.first)) {
+                    ++n_hier_del_nonfinite_;
+                } else if (V_pair.curr.second == 0 || V_pair.star.second == 0) {
+                    ++n_hier_del_signzero_;
+                } else {
+                    ++n_hier_del_signflip_;
+                }
+                ln_alpha = -std::numeric_limits<double>::infinity();
+            } else {
+                ln_alpha += V_pair.star.first - V_pair.curr.first;
+            }
+        }
+
         if (MY_LOG(runif(rng_)) < ln_alpha) {
 
             // Store old values for Cholesky update
@@ -488,6 +638,15 @@ void GGMModel::update_edge_indicator_parameter_pair(size_t i, size_t j) {
 
             constraint_dirty_ = true;
             theta_valid_ = false;
+            if (hier_active) {
+                log_Z_NLO_curr_ = log_Z_NLO_star;
+                // Γ_star is now Γ_curr; advance the running V state.
+                current_sign_V_    = V_star_sign_for_diag;
+                current_log_abs_V_ = V_star_log_abs_for_diag;
+                v_diag_initialized_ = true;
+                last_v_pi_i_ = static_cast<int>(i);
+                last_v_pi_j_ = static_cast<int>(j);
+            }
         }
 
     } else {
@@ -517,7 +676,8 @@ void GGMModel::update_edge_indicator_parameter_pair(size_t i, size_t j) {
         // }
 
         // Determinant-tilt prior: |K|^delta contributes delta * log_det_ratio
-        // to the MH ratio.
+        // to the MH ratio. See the DELETE branch for the Roverato-invariance
+        // note - kept here defensively.
         if (determinant_tilt_ != 0.0) {
             ln_alpha += determinant_tilt_ * log_det_ratio_edge(i, j);
         }
@@ -536,12 +696,87 @@ void GGMModel::update_edge_indicator_parameter_pair(size_t i, size_t j) {
         // Proposal term: proposed edge value given it was generated from truncated normal
         ln_alpha -= R::dnorm(omega_prop_ij / constants_[3], 0.0, proposal_sd, true) - MY_LOG(constants_[3]);
 
+        // Hierarchical-spec correction (ADD branch): see DELETE branch for the
+        // rationale. Γ_star ADDS edge (i, j) here, so log_Z_NLO_star differs
+        // from log_Z_NLO_curr by the +add direction of the incremental.
+        double log_Z_NLO_star_add = log_Z_NLO_curr_;
+        // F2: V(Γ_star) carried to the accept block (mirrors DELETE branch).
+        int    V_star_sign_for_diag_add   = 1;
+        double V_star_log_abs_for_diag_add = std::numeric_limits<double>::quiet_NaN();
+        bool hier_active_add = (graph_prior_spec_ == GraphPriorSpec::Hierarchical);
+        if (hier_active_add) {
+            ++n_hier_add_attempts_;
+            ensure_hierarchical_state_();
+            arma::imat G_star = edge_indicators_;
+            G_star(i, j) = 1;
+            G_star(j, i) = 1;
+            if (prior_alpha_ == 1.0 && !use_manuscript_nlo_) {
+                double d = log_Z_NLO_gamma_delta_incr_alpha1(
+                    edge_indicators_, static_cast<int>(i), static_cast<int>(j),
+                    prior_beta_, prior_sigma_, determinant_tilt_, false);
+                log_Z_NLO_star_add = log_Z_NLO_curr_ + d;
+            } else {
+                log_Z_NLO_star_add = compute_log_Z_NLO_centre(
+                    G_star, prior_alpha_, prior_beta_, prior_sigma_,
+                    determinant_tilt_, use_manuscript_nlo_);
+            }
+            arma::ivec pi = degord::degord_permutation(
+                static_cast<int>(p_), static_cast<int>(i), static_cast<int>(j));
+            arma::imat G_pi_curr = degord::permute_graph(edge_indicators_, pi);
+            arma::imat G_pi_star = degord::permute_graph(G_star, pi);
+            // Log-space V with within-toggle cache reuse (see DELETE branch
+            // for the underflow rationale and the sign-flip auto-reject
+            // contract).
+            double log_kappa = std::log(v_kappa_);
+            double log_c_curr = log_kappa + log_Z_NLO_curr_;
+            double log_c_star = log_kappa + log_Z_NLO_star_add;
+            auto V_pair = degord::V_log_pair_at_Gamma_curr_star_degord(
+                v_K_depth_, v_pools_t_,
+                G_pi_curr, G_pi_star, chain_aux_degord_,
+                log_c_curr, log_c_star, v_rho_);
+            // F2: same diagnostic seeding as in the DELETE branch.
+            if (!v_diag_initialized_ &&
+                std::isfinite(V_pair.curr.first) && V_pair.curr.second != 0) {
+                current_sign_V_     = V_pair.curr.second;
+                current_log_abs_V_  = V_pair.curr.first;
+                v_diag_initialized_ = true;
+                last_v_pi_i_ = static_cast<int>(i);
+                last_v_pi_j_ = static_cast<int>(j);
+            }
+            V_star_sign_for_diag_add    = V_pair.star.second;
+            V_star_log_abs_for_diag_add = V_pair.star.first;
+            if (!std::isfinite(V_pair.curr.first) || V_pair.curr.second == 0 ||
+                !std::isfinite(V_pair.star.first) || V_pair.star.second == 0 ||
+                V_pair.curr.second != V_pair.star.second) {
+                if (!std::isfinite(V_pair.curr.first) ||
+                    !std::isfinite(V_pair.star.first)) {
+                    ++n_hier_add_nonfinite_;
+                } else if (V_pair.curr.second == 0 || V_pair.star.second == 0) {
+                    ++n_hier_add_signzero_;
+                } else {
+                    ++n_hier_add_signflip_;
+                }
+                ln_alpha = -std::numeric_limits<double>::infinity();
+            } else {
+                ln_alpha += V_pair.star.first - V_pair.curr.first;
+            }
+        }
+
         if (MY_LOG(runif(rng_)) < ln_alpha) {
             // Accept: turn ON the edge
             // Store old values for Cholesky update
             double omega_ij_old = precision_matrix_(i, j);
             double omega_jj_old = precision_matrix_(j, j);
 
+            if (hier_active_add) {
+                log_Z_NLO_curr_ = log_Z_NLO_star_add;
+                current_sign_V_    = V_star_sign_for_diag_add;
+                current_log_abs_V_ = V_star_log_abs_for_diag_add;
+                v_diag_initialized_ = true;
+                last_v_pi_i_ = static_cast<int>(i);
+                last_v_pi_j_ = static_cast<int>(j);
+            }
+
             // Update omega
             precision_matrix_(i, j) = omega_prop_ij;
             precision_matrix_(j, i) = omega_prop_ij;
@@ -590,6 +825,10 @@ void GGMModel::do_one_metropolis_step(int iteration) {
     if (metropolis_adapter_) {
         metropolis_adapter_->update(index_mask, accept_prob, iteration);
     }
+
+    // Catch SMW-accumulated drift in covariance_matrix_ over a long chain.
+    // O(p^2); the refresh path is only taken when drift exceeds tolerance.
+    check_and_refresh_if_drift_();
 }
 
 void GGMModel::init_metropolis_adaptation(const WarmupSchedule& schedule) {
@@ -601,8 +840,192 @@ void GGMModel::prepare_iteration() {
     // Shuffle edge visit order for random-scan edge selection.
     // Called unconditionally to keep RNG state consistent.
     shuffled_edge_order_ = arma_randperm(rng_, num_pairwise_);
+    // Refresh the V/RR U-pool at iteration start.
+    //   Legacy (mh_U_ = false): unconditional draw from μ(U). This breaks
+    //     PMMH invariance on (Γ, K, U, N) — the conditional under the
+    //     augmented target is V·μ, not μ alone — and yields a small Γ-
+    //     marginal bias (~−0.001 nats at p=20, p_inc=0.05).
+    //   Fixed (mh_U_ = true): V-ratio MH step on U, accepting on
+    //     log|V_new| − log|V_old|. Companion-AI delivery 2026-05-21.
+    //   On the very first prepare_iteration after lazy init, the state has
+    //     just been seeded with a fresh U via ensure_hierarchical_state_();
+    //     no comparison is possible, so we skip the MH step and treat the
+    //     init draw as the iteration-0 U.
+    if (graph_prior_spec_ == GraphPriorSpec::Hierarchical) {
+        bool was_built = hierarchical_state_built_;
+        ensure_hierarchical_state_();
+        if (!was_built) {
+            // First iteration: state seeded by ensure_hierarchical_state_;
+            // nothing more to do.
+            return;
+        }
+        if (mh_U_) {
+            mh_on_U_step_();
+        } else {
+            refresh_z_ratio_pool_();
+        }
+    }
 }
 
+
+// ----------------------------------------------------------------------
+// Hierarchical-spec (Phase 4) implementation
+// ----------------------------------------------------------------------
+
+void GGMModel::set_graph_prior_spec(GraphPriorSpec spec) {
+    if (spec == graph_prior_spec_) return;
+    graph_prior_spec_ = spec;
+    hierarchical_state_built_ = false;  // force rebuild on next use
+}
+
+
+void GGMModel::set_z_ratio_tuning(int M_inner, double kappa, double rho) {
+    if (M_inner < 1) throw std::runtime_error("M_inner must be >= 1");
+    if (!(rho > 0.0 && rho < 1.0))
+        throw std::runtime_error("rho must be in (0, 1)");
+    if (!(kappa > 0.0))
+        throw std::runtime_error("kappa must be > 0");
+    v_M_inner_ = M_inner;
+    v_kappa_   = kappa;
+    v_rho_     = rho;
+    hierarchical_state_built_ = false;
+}
+
+
+void GGMModel::ensure_hierarchical_state_() {
+    if (hierarchical_state_built_) return;
+    // Validate prior family. The closed-form log_Z_NLO_gamma machinery only
+    // covers slab = Normal(0, σ) and diag = Gamma(α, β) on K_ii/2.
+    const auto* slab = dynamic_cast<const NormalPrior*>(interaction_prior_.get());
+    if (slab == nullptr)
+        throw std::runtime_error(
+            "Hierarchical graph_prior_spec requires a Normal slab "
+            "(NormalPrior). Re-fit with interaction_prior_type = 'normal'.");
+    const auto* diag = dynamic_cast<const GammaScalePrior*>(diagonal_prior_.get());
+    if (diag == nullptr)
+        throw std::runtime_error(
+            "Hierarchical graph_prior_spec requires a Gamma diagonal prior "
+            "(GammaScalePrior).");
+
+    prior_sigma_ = slab->scale();
+    prior_alpha_ = diag->shape();
+    prior_beta_  = diag->rate();
+    double delta = determinant_tilt_;
+
+    chain_aux_degord_ = degord::make_chain_aux(
+        static_cast<int>(p_), prior_alpha_, prior_beta_, prior_sigma_, delta);
+
+    // Analytic centring at the current Γ (full-recompute; the incremental
+    // form is only used on accept). Use F = false to match the production
+    // convention (NLO without the F-piece — the F overcorrects at α > 1).
+    // The selector picks the manuscript App C NLO at α = 1 when the
+    // use_manuscript_nlo_ flag is set; otherwise it uses bgms's pre-
+    // 2026-05-21 log_Z_NLO_gamma.
+    log_Z_NLO_curr_ = compute_log_Z_NLO_centre(
+        edge_indicators_, prior_alpha_, prior_beta_, prior_sigma_,
+        delta, use_manuscript_nlo_);
+
+    refresh_z_ratio_pool_();
+    hierarchical_state_built_ = true;
+}
+
+
+void GGMModel::refresh_z_ratio_pool_() {
+    degord::draw_U_degord_rr(
+        rng_, v_K_depth_, v_pools_t_, v_M_inner_, static_cast<int>(p_), v_rho_);
+}
+
+
+// MH step on (U, K_depth) at the augmented target. The proposal is a fresh
+// draw from μ(U)·P(N), so the proposal density on the forward and reverse
+// move cancels and the MH ratio reduces to V_new / V_old. Auto-rejects on
+// any non-finite log|V| or sign(V_new) ≠ sign(V_old) — same convention as
+// the between-Γ MH (deferred Lyne sign accumulator). Choice of permutation
+// π: arbitrary; any π yields an unbiased V estimator. We pick π = (0, 1)
+// for simplicity (gives the canonical degord reordering that maps the first
+// two vertices to themselves).
+void GGMModel::mh_on_U_step_() {
+    if (graph_prior_spec_ != GraphPriorSpec::Hierarchical) return;
+
+    ++n_mh_U_attempts_;
+
+    // V_old at (Γ_curr, U_old). Reuse the cached running V state when it's
+    // available: the between-Γ MH stores V evaluated at the last-toggled
+    // edge's DEGORD permutation π_{last_v_pi_i_, last_v_pi_j_}, and the V
+    // estimator is unbiased under any permutation. Reusing the cache skips
+    // a full V_log_at_Gamma_pi_degord call — which dominates per-iter cost
+    // when K_depth is large.
+    //
+    // If the cache hasn't been seeded yet (first iteration with no finite
+    // V_pair), fall back to recomputing V_old at the canonical π = (0, 1).
+    double log_c = std::log(v_kappa_) + log_Z_NLO_curr_;
+    int    pi_i, pi_j;
+    std::pair<double, int> V_old;
+    if (v_diag_initialized_) {
+        V_old = { current_log_abs_V_, current_sign_V_ };
+        pi_i = last_v_pi_i_;
+        pi_j = last_v_pi_j_;
+    } else {
+        pi_i = 0;
+        pi_j = 1;
+        arma::ivec pi_canon = degord::degord_permutation(
+            static_cast<int>(p_), pi_i, pi_j);
+        arma::imat G_pi_canon = degord::permute_graph(edge_indicators_, pi_canon);
+        V_old = degord::V_log_at_Gamma_pi_degord(
+            v_K_depth_, v_pools_t_, G_pi_canon, chain_aux_degord_,
+            log_c, v_rho_);
+    }
+
+    // Proposal: fresh U_new ~ μ, K_depth_new ~ P(N).
+    int K_depth_new;
+    std::vector<arma::mat> pools_new;
+    degord::draw_U_degord_rr(
+        rng_, K_depth_new, pools_new, v_M_inner_, static_cast<int>(p_), v_rho_);
+
+    // V_new at (Γ_curr, U_new) evaluated under the SAME π as the cached V_old.
+    arma::ivec pi_vec = degord::degord_permutation(
+        static_cast<int>(p_), pi_i, pi_j);
+    arma::imat G_pi_curr = degord::permute_graph(edge_indicators_, pi_vec);
+    auto V_new = degord::V_log_at_Gamma_pi_degord(
+        K_depth_new, pools_new, G_pi_curr, chain_aux_degord_,
+        log_c, v_rho_);
+
+    // Auto-reject paths.
+    if (!std::isfinite(V_old.first) || !std::isfinite(V_new.first)) {
+        ++n_mh_U_nonfinite_;
+        return;
+    }
+    if (V_old.second == 0 || V_new.second == 0) {
+        ++n_mh_U_signzero_;
+        return;
+    }
+    if (V_old.second != V_new.second) {
+        ++n_mh_U_signflip_;
+        return;
+    }
+
+    double log_alpha = V_new.first - V_old.first;
+    if (MY_LOG(runif(rng_)) < log_alpha) {
+        // Accept: install proposed pool and update running V state to V_new.
+        // last_v_pi_i_ / last_v_pi_j_ already point at this π, so they don't
+        // need updating.
+        v_pools_t_ = std::move(pools_new);
+        v_K_depth_ = K_depth_new;
+        current_log_abs_V_ = V_new.first;
+        current_sign_V_    = V_new.second;
+        v_diag_initialized_ = true;
+        ++n_mh_U_accepts_;
+    }
+    // Reject: current_log_abs_V_ / current_sign_V_ already correspond to
+    // (Γ_curr, U_old) under last_v_pi_i_,last_v_pi_j_; nothing to update.
+}
+
+
+// NOTE: the on-accept update of log_Z_NLO_curr_ lives inline in
+// update_edge_indicator_parameter_pair (both branches set log_Z_NLO_curr_ to
+// the pre-computed log_Z_NLO_star{,_add} inside their MH accept blocks).
+// The incremental form is the alpha=1 fast path; alpha != 1 full-recomputes.
+
 void GGMModel::update_edge_indicators() {
     for (size_t idx = 0; idx < num_pairwise_; ++idx) {
         size_t flat = shuffled_edge_order_(idx);
@@ -621,6 +1044,8 @@ void GGMModel::update_edge_indicators() {
         }
         update_edge_indicator_parameter_pair(i, j);
     }
+    // SMW drift check; same rationale as the end-of-MH-step path.
+    check_and_refresh_if_drift_();
 }
 
 void GGMModel::tune_proposal_sd(int iteration, const WarmupSchedule& schedule) {
@@ -715,6 +1140,18 @@ void GGMModel::tune_proposal_sd(int iteration, const WarmupSchedule& schedule) {
     theta_valid_ = false;
 }
 
+void GGMModel::check_and_refresh_if_drift_() {
+    // diag(cov * K) should be ones. Compute the max abs deviation in O(p^2)
+    // via the elementwise product (K is symmetric so cov(i,:) * K(:,i)
+    // equals arma::dot(cov.row(i), K.row(i))).
+    arma::vec d = arma::sum(covariance_matrix_ % precision_matrix_, 1) - 1.0;
+    double drift = arma::abs(d).max();
+    if (!std::isfinite(drift) || drift > kCovDriftTol_) {
+        refresh_cholesky();
+    }
+}
+
+
 void GGMModel::refresh_cholesky() {
     cholesky_of_precision_ = arma::chol(precision_matrix_, "upper");
     arma::solve(inv_cholesky_of_precision_, arma::trimatu(cholesky_of_precision_),
diff --git a/src/models/ggm/ggm_model.h b/src/models/ggm/ggm_model.h
index a0493d1e..c83618d4 100644
--- a/src/models/ggm/ggm_model.h
+++ b/src/models/ggm/ggm_model.h
@@ -2,15 +2,35 @@
 
 #include <array>
 #include <memory>
+#include <vector>
 #include "models/base_model.h"
 #include "math/cholesky_helpers.h"
 #include "rng/rng_utils.h"
 #include "models/ggm/graph_constraint_structure.h"
 #include "models/ggm/ggm_gradient.h"
+#include "models/ggm/degord_sampler.h"
 #include "priors/parameter_prior.h"
 #include "mcmc/samplers/metropolis_adaptation.h"
 
 
+/**
+ * Graph-prior specification for the GGM with edge selection.
+ *
+ *   Joint         (default): π_joint(K, Γ) ∝ slab·diag·|K|^δ·1{K∈M+(Γ)}·π(Γ).
+ *                 Γ marginal is π(Γ)·Z(Γ).
+ *   Hierarchical: π_hier(K, Γ)  ∝ slab·diag·|K|^δ·1{K∈M+(Γ)}/Z(Γ)·π(Γ).
+ *                 Γ marginal is π(Γ) directly. Requires the Z(Γ) ratio to be
+ *                 estimated unbiasedly per between-edge proposal; implemented
+ *                 via the DEGORD-permuted V/RR estimator (Phase 2 + 3).
+ *
+ * Hierarchical mode requires the slab to be NormalPrior and the diagonal to
+ * be GammaScalePrior (the closed-form log_Z_NLO_gamma machinery only
+ * supports this prior family). Construction will throw if hierarchical is
+ * requested under any other family.
+ */
+enum class GraphPriorSpec { Joint, Hierarchical };
+
+
 /**
  * GGMModel - Gaussian Graphical Model
  *
@@ -119,11 +139,25 @@ class GGMModel : public BaseModel {
         initialize_precision_from_mle();
     }
 
-    /** Copy constructor for cloning (required for parallel chains). */
+    /** Copy constructor for cloning (required for parallel chains).
+     *
+     * Note on hierarchical-spec state: only the *configuration* (graph_prior_spec_,
+     * v_M_inner_, v_kappa_, v_rho_) is copied. The lazy state (pools, chain aux,
+     * log_Z_NLO_curr_) is reset so each cloned chain rebuilds it on first
+     * ensure_hierarchical_state_() call - this is intentional because pools are
+     * RNG-derived and would otherwise share random draws across chains.
+     */
     GGMModel(const GGMModel& other)
         : BaseModel(other),
           target_accept_(other.target_accept_),
           determinant_tilt_(other.determinant_tilt_),
+          graph_prior_spec_(other.graph_prior_spec_),
+          hierarchical_state_built_(false),
+          use_manuscript_nlo_(other.use_manuscript_nlo_),
+          mh_U_(other.mh_U_),
+          v_M_inner_(other.v_M_inner_),
+          v_kappa_(other.v_kappa_),
+          v_rho_(other.v_rho_),
           n_(other.n_),
           p_(other.p_),
           dim_(other.dim_),
@@ -164,6 +198,14 @@ class GGMModel : public BaseModel {
     /** @return true when missing-data imputation is active. */
     bool has_missing_data()    const override { return has_missing_; }
 
+    /** @return true under hierarchical graph_prior_spec — the only path
+     *  where V(Γ, U) is computed and sign / log|V| are meaningful. */
+    bool has_v_ratio_diagnostics() const override {
+        return graph_prior_spec_ == GraphPriorSpec::Hierarchical;
+    }
+    int    current_sign_V()    const override { return current_sign_V_; }
+    double current_log_abs_V() const override { return current_log_abs_V_; }
+
     /** Impute missing entries from full-conditional normal distributions. */
     void impute_missing() override;
 
@@ -219,6 +261,58 @@ class GGMModel : public BaseModel {
         constraint_dirty_ = true;
     }
 
+    /**
+     * Switch the chain to hierarchical-spec inference (default is Joint).
+     * Validates the slab/diag prior family is (NormalPrior, GammaScalePrior)
+     * — throws std::runtime_error if not. Lazy: state is built on first
+     * use (next prepare_iteration or between-edge proposal).
+     */
+    void set_graph_prior_spec(GraphPriorSpec spec);
+
+    /**
+     * Configure the V/RR estimator tuning. Defaults: M_inner=100, kappa=1.0,
+     * rho=0.5. Only consumed when graph_prior_spec_ == Hierarchical.
+     */
+    void set_z_ratio_tuning(int M_inner, double kappa, double rho);
+
+    /**
+     * Select the analytic NLO formula used as the RR centring in the
+     * between-Γ MH ratio. `false` (default) keeps the pre-2026-05-21 bgms
+     * formula (log_Z_NLO_gamma). `true` switches to the manuscript App C
+     * NLO (eq:NLO-decomp; ~/SV/Z/notes/2026-05-21_message-to-bgms-companion-NLO-fix.md).
+     * The manuscript form drops the `(ν_i + α) / M_v[i]` factor in B_e
+     * compared to bgms's existing formula; at α=1, δ=0 the two coincide
+     * bit-exactly, with divergence growing with δ. Only consumed under
+     * Hierarchical graph_prior_spec and α=1; at α ≠ 1 the flag is a no-op
+     * (manuscript Na/Nb/Nc terms are not ported yet).
+     */
+    void set_use_manuscript_nlo(bool on) { use_manuscript_nlo_ = on; }
+    bool use_manuscript_nlo() const { return use_manuscript_nlo_; }
+
+    /**
+     * Enable a Metropolis–Hastings step on the auxiliary U-pool at the start
+     * of each iteration in place of the legacy "draw U fresh from μ".
+     *
+     * Background: the chain is a block PMMH on (Γ, K, U, N) targeting
+     * π(Γ, K) · V(U, N) · μ(U) · P(N). Under that target, p(U | Γ, K) is
+     * proportional to V(U; Γ, K) · μ(U) — NOT μ(U) alone. Refreshing U
+     * by a fresh draw from μ skips the V-tilt and breaks invariance;
+     * empirically this yields a small but systematic Γ-marginal bias
+     * (~−0.001 nats at p=20, p_inc=0.05; 5/5 seeds same sign in our
+     * tests).
+     *
+     * With this flag, the U-refresh is replaced by a proper MH step:
+     * propose U_new ~ μ, N_new ~ P(N) and accept with log α =
+     * log|V(Γ, U_new)| − log|V(Γ, U_old)| (μ, P(N) cancel by proposal
+     * symmetry). Cross-implementation experiments confirm this collapses
+     * the bias to MC noise (companion-AI delivery 2026-05-21).
+     *
+     * Default `false` preserves the pre-2026-05-21 chain and SBC-clean
+     * baselines.
+     */
+    void set_mh_U(bool on) { mh_U_ = on; }
+    bool mh_U() const { return mh_U_; }
+
     /** Shuffle edge visit order (random scan). */
     void prepare_iteration() override;
 
@@ -402,6 +496,117 @@ class GGMModel : public BaseModel {
     // GGMGradientEngine on every rebuild.
     double determinant_tilt_ = 0.0;
 
+    // ---- Hierarchical-spec inference (Phase 4) ----
+    // Default is Joint (the existing chain semantics). Hierarchical mode
+    // multiplies the between-edge MH ratio by V(Γ_curr)/V(Γ_star), an
+    // unbiased estimator of Z(Γ_star)/Z(Γ_curr), to convert the joint-
+    // marginal Γ target π(Γ)·Z(Γ) into the user-specified π(Γ).
+    GraphPriorSpec graph_prior_spec_ = GraphPriorSpec::Joint;
+    bool   hierarchical_state_built_ = false;
+    // Toggle for the manuscript App C NLO closed form (see
+    // set_use_manuscript_nlo). Default `false` preserves the pre-2026-05-21
+    // bgms formula and the SBC-clean baselines built on it.
+    bool   use_manuscript_nlo_ = false;
+    // Toggle for the MH-on-U fix at sweep boundary (see set_mh_U). Default
+    // `false` preserves the legacy fresh-from-μ refresh.
+    bool   mh_U_ = false;
+    int    v_M_inner_ = 100;
+    double v_kappa_   = 1.0;
+    double v_rho_     = 0.5;
+    // Per-Γ_curr state (only relevant when graph_prior_spec_ == Hierarchical):
+    //   chain_aux_degord_       : (alpha, beta, sigma, delta) constants for DEGORD.
+    //   log_Z_NLO_curr_         : analytic centring at Γ_curr (incremented on accept).
+    //   v_pools_t_, v_K_depth_  : current U = (K_depth, K pools), refreshed per iteration.
+    degord::ChainAux       chain_aux_degord_;
+    double                 log_Z_NLO_curr_ = 0.0;
+    int                    v_K_depth_     = 0;
+    std::vector<arma::mat> v_pools_t_;
+    // Extracted prior-family params (cached at ensure_hierarchical_state_).
+    double prior_sigma_ = 1.0;  // NormalPrior scale
+    double prior_alpha_ = 1.0;  // GammaScalePrior shape
+    double prior_beta_  = 1.0;  // GammaScalePrior rate
+
+    // Running V-ratio state for Lyne-style sign-corrected ergodic averaging
+    // (F2). Updated inside update_edge_indicator_parameter_pair on accept;
+    // chain runner snapshots into ChainResult at end of each sampling
+    // iteration. v_diag_initialized_ stays false until the first V_log_pair
+    // call produces a finite value, so chain-runner reads NaN for any
+    // iteration that hits no toggle proposals (degenerate).
+    int    current_sign_V_     = 1;
+    double current_log_abs_V_  = std::numeric_limits<double>::quiet_NaN();
+    bool   v_diag_initialized_ = false;
+    // Endpoints of the toggle whose DEGORD permutation π_{i,j} was last used
+    // to evaluate the cached V state (current_log_abs_V_, current_sign_V_).
+    // mh_on_U_step_ reuses this permutation so V_old can be read from the
+    // cache rather than recomputed. Initialised to (0, 1) — the canonical
+    // permutation — so mh_U is well-defined even if no toggle has fired yet.
+    int    last_v_pi_i_ = 0;
+    int    last_v_pi_j_ = 1;
+
+    // ---- Hierarchical-spec auto-reject counters ----
+    // Counters for the between-Γ MH step's auto-reject sentinel. Incremented
+    // inside update_edge_indicator_parameter_pair whenever the V/RR machinery
+    // forces ln_alpha = -inf. Each proposal is classified into one of three
+    // failure modes:
+    //   *_nonfinite : non-finite log|V| at Γ_curr or Γ_star (V = 0 or RR
+    //                 underflow / overflow).
+    //   *_signzero  : sign(V) == 0 sentinel at Γ_curr or Γ_star.
+    //   *_signflip  : both signs are finite and non-zero but differ — the
+    //                 deferred Lyne sign-corrected weighting would handle
+    //                 this; the current chain auto-rejects.
+    // Counters survive across iterations (cumulative); read once at end of
+    // chain via get_diagnostics_summary().
+    mutable long long n_hier_add_attempts_   = 0;
+    mutable long long n_hier_add_nonfinite_  = 0;
+    mutable long long n_hier_add_signzero_   = 0;
+    mutable long long n_hier_add_signflip_   = 0;
+    mutable long long n_hier_del_attempts_   = 0;
+    mutable long long n_hier_del_nonfinite_  = 0;
+    mutable long long n_hier_del_signzero_   = 0;
+    mutable long long n_hier_del_signflip_   = 0;
+
+    // MH-on-U counters (set_mh_U fix). Each iteration's start adds one to
+    // n_mh_U_attempts_ (after lazy init), plus exactly one of accepts or
+    // the failure-mode counters.
+    mutable long long n_mh_U_attempts_   = 0;
+    mutable long long n_mh_U_accepts_    = 0;
+    mutable long long n_mh_U_nonfinite_  = 0;
+    mutable long long n_mh_U_signzero_   = 0;
+    mutable long long n_mh_U_signflip_   = 0;
+
+  public:
+    /// @inheritdoc
+    Rcpp::List get_diagnostics_summary() const override {
+        return Rcpp::List::create(
+            Rcpp::Named("hier_add_attempts")  = static_cast<double>(n_hier_add_attempts_),
+            Rcpp::Named("hier_add_nonfinite") = static_cast<double>(n_hier_add_nonfinite_),
+            Rcpp::Named("hier_add_signzero")  = static_cast<double>(n_hier_add_signzero_),
+            Rcpp::Named("hier_add_signflip")  = static_cast<double>(n_hier_add_signflip_),
+            Rcpp::Named("hier_del_attempts")  = static_cast<double>(n_hier_del_attempts_),
+            Rcpp::Named("hier_del_nonfinite") = static_cast<double>(n_hier_del_nonfinite_),
+            Rcpp::Named("hier_del_signzero")  = static_cast<double>(n_hier_del_signzero_),
+            Rcpp::Named("hier_del_signflip")  = static_cast<double>(n_hier_del_signflip_),
+            Rcpp::Named("mh_U_attempts")      = static_cast<double>(n_mh_U_attempts_),
+            Rcpp::Named("mh_U_accepts")       = static_cast<double>(n_mh_U_accepts_),
+            Rcpp::Named("mh_U_nonfinite")     = static_cast<double>(n_mh_U_nonfinite_),
+            Rcpp::Named("mh_U_signzero")      = static_cast<double>(n_mh_U_signzero_),
+            Rcpp::Named("mh_U_signflip")      = static_cast<double>(n_mh_U_signflip_)
+        );
+    }
+
+  private:
+
+    /// Lazy initialiser for the V/RR machinery. Validates prior family,
+    /// builds chain_aux_degord_, computes log_Z_NLO_curr_ via full-recompute,
+    /// draws the first U-pool. Idempotent (no-op when state is fresh).
+    void ensure_hierarchical_state_();
+    /// Draw a fresh (K_depth, pools_t) U for the V estimator.
+    void refresh_z_ratio_pool_();
+    /// MH step on (U, K_depth) using a fresh draw from μ as proposal. Accepts
+    /// on log|V(Γ_curr; U_new)| − log|V(Γ_curr; U_old)|; μ and P(N) cancel by
+    /// proposal symmetry. Companion-AI delivery 2026-05-21 (see set_mh_U).
+    void mh_on_U_step_();
+
     /** Extract upper triangle of the precision matrix into a vector. */
     arma::vec extract_upper_triangle() const {
         arma::vec result(dim_);
@@ -639,6 +844,26 @@ class GGMModel : public BaseModel {
      */
     void refresh_cholesky();
 
+    /**
+     * End-of-sweep drift check on covariance_matrix_.
+     *
+     * The SMW rank-1/rank-2 updates that replace the per-accept
+     * O(p^3) refresh in cholesky_update_after_{edge,diag} are
+     * backward stable but still accumulate FP error in
+     * covariance_matrix_ over a long chain. This helper measures
+     * max_i |sum_k cov(i,k) * K(k,i) - 1| -- the worst-case
+     * diagonal entry of cov*K minus the identity -- and triggers
+     * refresh_cholesky() when it exceeds kCovDriftTol_.
+     * Computed in O(p^2) so the check fits comfortably inside the
+     * MH sweep budget.
+     */
+    void check_and_refresh_if_drift_();
+
+    /// Absolute tolerance on max|diag(cov*K) - 1| before refresh.
+    /// Set conservatively; on a clean refresh this quantity is
+    /// O(p * eps * cond(K)).
+    static constexpr double kCovDriftTol_ = 1e-8;
+
     /**
      * Initialize precision matrix at the regularized MLE.
      *
diff --git a/src/models/ggm/log_z_nlo.cpp b/src/models/ggm/log_z_nlo.cpp
new file mode 100644
index 00000000..3363d4a0
--- /dev/null
+++ b/src/models/ggm/log_z_nlo.cpp
@@ -0,0 +1,624 @@
+// Closed-form log Z(G) approximations for the spikeslab GGM prior with
+// Gamma diagonal + determinant tilt. See log_z_nlo.h for derivation
+// references; algorithm is a direct port of branchB_chain.cpp:470-620 and
+// incremental_log_Z_NLO_gamma.h in ~/SV/Z/R/src/.
+
+#include "models/ggm/log_z_nlo.h"
+#include <vector>
+#include <cmath>
+#include <limits>
+
+// ----------------------------------------------------------------------
+// Local helpers (alpha = 1 specialisations for the incremental form).
+// ----------------------------------------------------------------------
+static inline int nu_at(int l, const arma::imat& G) {
+  int q = G.n_rows;
+  int n = 0;
+  for (int m = l + 1; m < q; ++m) if (G(l, m) == 1) n += 1;
+  return n;
+}
+
+static inline double Mv_at_alpha1(int l, const arma::imat& G, double delta) {
+  return static_cast<double>(nu_at(l, G)) + 2.0 * (1.0 + delta) - 1.0;
+}
+
+static inline double He_alpha1(int k, const arma::imat& G,
+                                double beta, double sigma, double delta) {
+  double Mv_k = Mv_at_alpha1(k, G, delta);
+  return 2.0 * beta + Mv_k / (2.0 * sigma * sigma * beta);
+}
+
+// ----------------------------------------------------------------------
+// Full-recompute log Z_LO + log Z_NLO at general alpha, with optional
+// NNLO_H F-piece (off by default; on alpha > 1 the Exp-style F over-
+// corrects). At delta = 0 collapses to the pre-tilt formula bit-exact.
+// ----------------------------------------------------------------------
+double log_Z_NLO_gamma(
+    const arma::imat& G,
+    double alpha, double beta, double sigma,
+    bool include_F,
+    double delta
+) {
+  int q = G.n_rows;
+  std::vector<int> nu(q, 0);
+  int E_count = 0;
+  for (int l = 0; l < q; ++l)
+    for (int m = l + 1; m < q; ++m)
+      if (G(l, m) == 1) { ++nu[l]; ++E_count; }
+
+  double sigma2 = sigma * sigma;
+
+  // Diagonal Gamma integral per vertex + tilt-induced constants.
+  // Tilt shifts the lgamma argument by 2 delta and adds -q delta log beta
+  // to the diagonal piece summed over the q vertices.
+  double log_C0 = 0.5 * static_cast<double>(E_count) * std::log(M_PI)
+                  - 0.5 * static_cast<double>(E_count) * std::log(2.0 * M_PI * sigma2)
+                  - static_cast<double>(E_count) * std::log(beta)
+                  - static_cast<double>(q) * std::lgamma(alpha)
+                  - static_cast<double>(q) * delta * std::log(beta);
+  for (int l = 0; l < q; ++l)
+    log_C0 += std::lgamma((static_cast<double>(nu[l]) + 2.0 * (alpha + delta)) / 2.0);
+
+  if (E_count == 0) return log_C0;
+
+  // Tilt-shifted saddle exponent M_v = nu_v + 2 (alpha + delta) - 1.
+  std::vector<double> M_v(q);
+  for (int l = 0; l < q; ++l)
+    M_v[l] = static_cast<double>(nu[l]) + 2.0 * (alpha + delta) - 1.0;
+
+  // H_e per edge (k, v), k < v:
+  //   H_e = 2 beta + M_k / (2 sigma^2 beta) - 4 beta (alpha - 1) / M_v.
+  arma::mat H_e_lookup(q, q, arma::fill::zeros);
+  for (int k = 0; k < q; ++k)
+    for (int v = k + 1; v < q; ++v)
+      if (G(k, v) == 1) {
+        double h = 2.0 * beta
+                   + M_v[k] / (2.0 * sigma2 * beta)
+                   - 4.0 * beta * (alpha - 1.0) / M_v[v];
+        H_e_lookup(k, v) = h;
+        H_e_lookup(v, k) = h;
+      }
+
+  // LO slab: 0.5 sum_e log(2 beta / H_e). Guard against H_e <= 0.
+  double log_LO = 0.0;
+  for (int k = 0; k < q; ++k)
+    for (int v = k + 1; v < q; ++v)
+      if (G(k, v) == 1) {
+        double h = H_e_lookup(k, v);
+        if (h <= 0.0) return -std::numeric_limits<double>::infinity();
+        log_LO += 0.5 * std::log(2.0 * beta / h);
+      }
+
+  // T (outgoing edges, v as smaller endpoint) and S (incoming).
+  std::vector<double> T1(q, 0.0), T2(q, 0.0), S1(q, 0.0), S2(q, 0.0);
+  for (int v = 0; v < q; ++v) {
+    for (int w = v + 1; w < q; ++w)
+      if (G(v, w) == 1) {
+        double inv = 1.0 / H_e_lookup(v, w);
+        T1[v] += inv; T2[v] += inv * inv;
+      }
+    for (int k = 0; k < v; ++k)
+      if (G(k, v) == 1) {
+        double inv = 1.0 / H_e_lookup(k, v);
+        S1[v] += inv; S2[v] += inv * inv;
+      }
+  }
+
+  double delta_NLO = 0.0;
+
+  // B_e per edge: -(nu_i + alpha) / (4 beta sigma^2 M_i H_e), e = (i, j).
+  for (int i = 0; i < q; ++i)
+    for (int j = i + 1; j < q; ++j)
+      if (G(i, j) == 1)
+        delta_NLO -= (static_cast<double>(nu[i]) + alpha)
+                     / (4.0 * beta * sigma2 * M_v[i] * H_e_lookup(i, j));
+
+  // C_{e, k} per (edge e=(i,j), common predecessor k < i with G(k,i)=G(k,j)=1).
+  for (int i = 0; i < q; ++i) {
+    for (int j = i + 1; j < q; ++j) {
+      if (G(i, j) != 1) continue;
+      double H_e = H_e_lookup(i, j);
+      for (int k = 0; k < i; ++k)
+        if (G(k, i) == 1 && G(k, j) == 1) {
+          double H_ki = H_e_lookup(k, i), H_kj = H_e_lookup(k, j);
+          delta_NLO += -1.0 / (2.0 * sigma2 * H_ki * H_kj)
+                       + M_v[i] / (4.0 * beta * sigma2 * sigma2 * H_e * H_ki * H_kj);
+        }
+    }
+  }
+
+  // D_v per vertex with at least one outgoing edge.
+  for (int v = 0; v < q; ++v)
+    if (T1[v] > 0.0)
+      delta_NLO += M_v[v] / (16.0 * beta * beta * sigma2 * sigma2)
+                   * (2.0 * T2[v] + T1[v] * T1[v]);
+
+  // E_{lm} (+ optional F_{lm}) per non-edge with common predecessors.
+  for (int l = 1; l < q - 1; ++l) {
+    for (int m = l + 1; m < q; ++m) {
+      if (G(l, m) != 0) continue;
+      double s1 = 0.0, s2 = 0.0;
+      bool has_common = false;
+      for (int k = 0; k < l; ++k)
+        if (G(k, l) == 1 && G(k, m) == 1) {
+          double H_kl = H_e_lookup(k, l), H_km = H_e_lookup(k, m);
+          double inv_prod = 1.0 / (H_kl * H_km);
+          s1 += inv_prod; s2 += inv_prod * inv_prod;
+          has_common = true;
+        }
+      if (!has_common) continue;
+      double Ml = M_v[l], b2 = beta * beta, b4 = b2 * b2;
+      delta_NLO -= 2.0 * b2 / Ml * s1;
+      if (include_F) {
+        delta_NLO -= 2.0 * b2 / (Ml * Ml) * s1;
+        delta_NLO += 12.0 * b4 / (Ml * Ml) * s2;
+        delta_NLO += 4.0 * b4 / (Ml * Ml) * s1 * s1;
+      }
+    }
+  }
+
+  // N_v per vertex (alpha > 1 Gamma piece; zero at alpha = 1).
+  if (alpha != 1.0) {
+    double am1 = alpha - 1.0;
+    for (int v = 0; v < q; ++v) {
+      double Mv = M_v[v];
+      double bracket_4 = 2.0 * S2[v] + S1[v] * S1[v];
+      double Mv2 = Mv * Mv, Mv3 = Mv2 * Mv;
+      double Na = -am1 * ( 3.0 / (8.0 * Mv2)
+                          - 3.0 * beta * S1[v] / Mv2
+                          + 2.0 * beta * beta * bracket_4 / Mv2 );
+      double Nb = am1 * am1 * ( 5.0 / (12.0 * Mv3)
+                                - 2.0 * beta * S1[v] / Mv3
+                                + 4.0 * beta * beta * bracket_4 / Mv3 );
+      double Nc = am1 * (static_cast<double>(nu[v]) + 1.0)
+                  * ( 5.0 / (12.0 * Mv3) - beta * S1[v] / Mv3 );
+      delta_NLO += Na + Nb + Nc;
+    }
+    // M_e per edge.
+    for (int i = 0; i < q; ++i)
+      for (int j = i + 1; j < q; ++j)
+        if (G(i, j) == 1)
+          delta_NLO += am1 / (sigma2 * M_v[i] * H_e_lookup(i, j))
+                       * (-1.0 / (4.0 * beta) + S1[i]);
+  }
+
+  return log_C0 + log_LO + delta_NLO;
+}
+
+// ----------------------------------------------------------------------
+// DEGORD reordering: permute toggle endpoints to positions (0, 1) and
+// keep all other vertices in their original order. The closed-form Z
+// is not permutation-invariant, so MH ratios computing log Z(G+)-log Z(G-)
+// must use the same reordering in both terms.
+// ----------------------------------------------------------------------
+double log_Z_NLO_gamma_degord(
+    const arma::imat& G, int i, int j,
+    double alpha, double beta, double sigma,
+    bool include_F, double delta
+) {
+  int q = G.n_rows;
+  std::vector<int> perm;
+  perm.reserve(q);
+  perm.push_back(i);
+  perm.push_back(j);
+  for (int v = 0; v < q; ++v) if (v != i && v != j) perm.push_back(v);
+  arma::imat G_perm(q, q, arma::fill::zeros);
+  for (int a = 0; a < q; ++a)
+    for (int b = 0; b < q; ++b)
+      G_perm(a, b) = G(perm[a], perm[b]);
+  return log_Z_NLO_gamma(G_perm, alpha, beta, sigma, include_F, delta);
+}
+
+// ----------------------------------------------------------------------
+// alpha = 1 incremental form. Sum of all log Z_NLO contributions whose
+// value depends on (nu_i, M_v[i], H_e for edges adjacent to i), evaluated
+// at vertex i in graph G. At alpha = 1 a single-edge toggle (i, j) with
+// i < j changes only these terms (the §4.6 locality result).
+// ----------------------------------------------------------------------
+static double vertex_i_contribs_alpha1(
+    const arma::imat& G, int i,
+    double beta, double sigma, double delta,
+    bool include_F
+) {
+  int q = G.n_rows;
+  double sigma2 = sigma * sigma;
+  double sigma4 = sigma2 * sigma2;
+  double beta2  = beta * beta;
+  double beta4  = beta2 * beta2;
+
+  std::vector<int> fwd_i;
+  for (int v = i + 1; v < q; ++v) if (G(i, v) == 1) fwd_i.push_back(v);
+  int nu_i = static_cast<int>(fwd_i.size());
+  double Mv_i = static_cast<double>(nu_i) + 2.0 * (1.0 + delta) - 1.0;
+  double H_i  = 2.0 * beta + Mv_i / (2.0 * sigma2 * beta);
+
+  double total = 0.0;
+
+  // LO + C0 contributions.
+  double C0_per_edge = 0.5 * std::log(M_PI)
+                       - 0.5 * std::log(2.0 * M_PI * sigma2)
+                       - std::log(beta);
+  total += static_cast<double>(nu_i) * C0_per_edge;
+  total += std::lgamma((static_cast<double>(nu_i) + 2.0 * (1.0 + delta)) / 2.0);
+  if (nu_i > 0)
+    total += 0.5 * static_cast<double>(nu_i) * std::log(2.0 * beta / H_i);
+
+  // Block 1 slab term per forward edge of i:
+  // -(nu_i + 1) / (4 beta sigma^2 M_v[i] H_e(i, v)). At alpha = 1, H_e(i, v) = H_i.
+  double block1_per_edge = -(static_cast<double>(nu_i) + 1.0)
+                            / (4.0 * beta * sigma2 * Mv_i * H_i);
+  total += static_cast<double>(nu_i) * block1_per_edge;
+
+  // Block 2 cross-predecessor per (forward edge (i, v), common pred k < i).
+  for (int v_idx = 0; v_idx < nu_i; ++v_idx) {
+    int v = fwd_i[v_idx];
+    for (int k = 0; k < i; ++k) {
+      if (G(k, i) == 1 && G(k, v) == 1) {
+        double H_ki = He_alpha1(k, G, beta, sigma, delta);
+        double H_kv = He_alpha1(k, G, beta, sigma, delta);
+        total += -1.0 / (2.0 * sigma2 * H_ki * H_kv)
+                 + Mv_i / (4.0 * beta * sigma4 * H_i * H_ki * H_kv);
+      }
+    }
+  }
+
+  // Block 3 D_v at v = i. At alpha = 1, H_e(i, w) = H_i for all forward
+  // edges of i, so T1[i] = nu_i / H_i, T2[i] = nu_i / H_i^2.
+  {
+    double T1_i = static_cast<double>(nu_i) / H_i;
+    double T2_i = static_cast<double>(nu_i) / (H_i * H_i);
+    total += Mv_i / (16.0 * beta2 * sigma4) * (2.0 * T2_i + T1_i * T1_i);
+  }
+
+  // Block 4 non-edges (i, m) where i is the smaller endpoint.
+  if (i > 0) {
+    for (int m = i + 1; m < q; ++m) {
+      if (G(i, m) != 0) continue;
+      double s1 = 0.0, s2 = 0.0;
+      bool any = false;
+      for (int k = 0; k < i; ++k) {
+        if (G(k, i) == 1 && G(k, m) == 1) {
+          double H_ki = He_alpha1(k, G, beta, sigma, delta);
+          double H_km = He_alpha1(k, G, beta, sigma, delta);
+          double inv = 1.0 / (H_ki * H_km);
+          s1 += inv;
+          s2 += inv * inv;
+          any = true;
+        }
+      }
+      if (!any) continue;
+      total += -2.0 * beta2 / Mv_i * s1;
+      if (include_F) {
+        total += -2.0 * beta2 / (Mv_i * Mv_i) * s1;
+        total += 12.0 * beta4 / (Mv_i * Mv_i) * s2;
+        total += 4.0 * beta4 / (Mv_i * Mv_i) * s1 * s1;
+      }
+    }
+  }
+
+  // Block 2 (k = i case) AND Block 4 (k = i case): pairs of forward
+  // neighbours (a, b) of i with i < a < b.
+  if (nu_i >= 2) {
+    for (int idx_a = 0; idx_a < nu_i - 1; ++idx_a) {
+      int a = fwd_i[idx_a];
+      double Mv_a = Mv_at_alpha1(a, G, delta);
+      double H_ia = He_alpha1(i, G, beta, sigma, delta);
+      for (int idx_b = idx_a + 1; idx_b < nu_i; ++idx_b) {
+        int b = fwd_i[idx_b];
+        double H_ib = He_alpha1(i, G, beta, sigma, delta);
+        if (G(a, b) == 1) {
+          double H_ab = He_alpha1(a, G, beta, sigma, delta);
+          total += -1.0 / (2.0 * sigma2 * H_ia * H_ib)
+                   + Mv_a / (4.0 * beta * sigma4 * H_ab * H_ia * H_ib);
+        } else {
+          double v_ab = 1.0 / (H_ia * H_ib);
+          double sum_other = 0.0;
+          for (int k = 0; k < a; ++k) {
+            if (k == i) continue;
+            if (G(k, a) == 1 && G(k, b) == 1) {
+              double H_ka = He_alpha1(k, G, beta, sigma, delta);
+              double H_kb = He_alpha1(k, G, beta, sigma, delta);
+              sum_other += 1.0 / (H_ka * H_kb);
+            }
+          }
+          total += -2.0 * beta2 / Mv_a * v_ab;
+          if (include_F) {
+            total += -2.0 * beta2 / (Mv_a * Mv_a) * v_ab;
+            total += 12.0 * beta4 / (Mv_a * Mv_a) * (v_ab * v_ab);
+            total += 4.0 * beta4 / (Mv_a * Mv_a) * (v_ab * v_ab);
+            total += 4.0 * beta4 / (Mv_a * Mv_a) * 2.0 * v_ab * sum_other;
+          }
+        }
+      }
+    }
+  }
+
+  return total;
+}
+
+// Public alpha = 1 incremental log Z_NLO ratio under single-edge toggle (i, j).
+// Computes the before-side first so that any accidental aliasing between the
+// two graphs (Armadillo copy-on-write) does not corrupt the before evaluation.
+double log_Z_NLO_gamma_delta_incr_alpha1(
+    const arma::imat& G_before, int i, int j,
+    double beta, double sigma, double delta,
+    bool include_F
+) {
+  int i_min = std::min(i, j);
+  double before_i = vertex_i_contribs_alpha1(
+      G_before, i_min, beta, sigma, delta, include_F);
+  arma::imat G_after(G_before);
+  G_after(i, j) = 1 - G_before(i, j);
+  G_after(j, i) = G_after(i, j);
+  double after_i = vertex_i_contribs_alpha1(
+      G_after, i_min, beta, sigma, delta, include_F);
+  return after_i - before_i;
+}
+
+// ----------------------------------------------------------------------
+// Partial log Z_NLO on an affected-vertex set V_a.
+//
+// Returns the sum of all log_Z_NLO_gamma term instances whose index set
+// intersects V_a. Instances disjoint from V_a are invariant under any
+// toggle that only changes edges incident to V_a, so they cancel in the
+// difference partial(G_after) - partial(G_before).
+//
+// Loop bounds are restricted to V_a's neighbourhood, using a canonical-
+// representative rule (the lowest-index V_a member of an instance owns
+// its enumeration) to count each qualifying instance exactly once. M_v,
+// H_e, and per-vertex T/S sums are precomputed on the full graph in
+// O(|E|) once the adjacency lists are built; the per-term loops are then
+// O(deg^2 + deg * q) instead of O(q^2 + q^3).
+//
+// Inputs: in_V (size q membership bitmap) and V_a_idx (V_a indices in
+// ascending order) — caller supplies both.
+// ----------------------------------------------------------------------
+static double partial_log_Z_NLO_gamma_on_V(
+    const arma::imat& G,
+    const std::vector<bool>& in_V,
+    const std::vector<int>& V_a_idx,
+    double alpha, double beta, double sigma,
+    bool include_F, double delta
+) {
+  int q = G.n_rows;
+  double sigma2 = sigma * sigma;
+  double sigma4 = sigma2 * sigma2;
+  double beta2  = beta * beta;
+  double beta4  = beta2 * beta2;
+
+  // Adjacency lists for fast neighbour iteration. O(|E|) to build.
+  std::vector<std::vector<int>> fwd(q), bwd(q);
+  for (int l = 0; l < q; ++l)
+    for (int m = l + 1; m < q; ++m)
+      if (G(l, m) == 1) {
+        fwd[l].push_back(m);
+        bwd[m].push_back(l);
+      }
+
+  std::vector<int> nu(q);
+  for (int l = 0; l < q; ++l) nu[l] = static_cast<int>(fwd[l].size());
+
+  // Count edges incident to V_a via the canonical-rep rule (each edge owned
+  // by its lowest-index V_a endpoint).
+  int E_count_in_V = 0;
+  for (int p : V_a_idx) {
+    E_count_in_V += static_cast<int>(fwd[p].size());        // (p, m), p smaller
+    for (int l : bwd[p]) if (!in_V[l]) ++E_count_in_V;       // (l, p), l smaller, l not in V_a
+  }
+  int q_in_V = static_cast<int>(V_a_idx.size());
+
+  // log_C0 partial.
+  double log_C0 = 0.5 * static_cast<double>(E_count_in_V) * std::log(M_PI)
+                  - 0.5 * static_cast<double>(E_count_in_V) * std::log(2.0 * M_PI * sigma2)
+                  - static_cast<double>(E_count_in_V) * std::log(beta)
+                  - static_cast<double>(q_in_V) * std::lgamma(alpha)
+                  - static_cast<double>(q_in_V) * delta * std::log(beta);
+  for (int p : V_a_idx)
+    log_C0 += std::lgamma((static_cast<double>(nu[p]) + 2.0 * (alpha + delta)) / 2.0);
+
+  // Total edge count: needed to short-circuit when graph is empty.
+  int E_count_total = 0;
+  for (int l = 0; l < q; ++l) E_count_total += nu[l];
+  if (E_count_total == 0) return log_C0;
+
+  // M_v[l] for all l (any v adjacent to V_a can appear as an index of an
+  // included term).
+  std::vector<double> M_v(q);
+  for (int l = 0; l < q; ++l)
+    M_v[l] = static_cast<double>(nu[l]) + 2.0 * (alpha + delta) - 1.0;
+
+  // Full H_e lookup (some non-V_a-incident edges are still cross-referenced
+  // in C_{e=(a, b), k} triples where neither k nor a is in V_a but b is).
+  arma::mat H_e_lookup(q, q, arma::fill::zeros);
+  for (int k = 0; k < q; ++k)
+    for (int v : fwd[k]) {
+      double h = 2.0 * beta
+                 + M_v[k] / (2.0 * sigma2 * beta)
+                 - 4.0 * beta * (alpha - 1.0) / M_v[v];
+      H_e_lookup(k, v) = h;
+      H_e_lookup(v, k) = h;
+    }
+
+  // T (forward, v as smaller) and S (backward, v as larger). For M_e in
+  // the alpha > 1 branch we need S1[a] at edges (a, b) where a is a
+  // backward neighbour of some V_a vertex (so a may be outside V_a). Keep
+  // T/S for all q vertices; cost O(|E|).
+  std::vector<double> T1(q, 0.0), T2(q, 0.0), S1(q, 0.0), S2(q, 0.0);
+  for (int v = 0; v < q; ++v) {
+    for (int w : fwd[v]) {
+      double inv = 1.0 / H_e_lookup(v, w);
+      T1[v] += inv; T2[v] += inv * inv;
+    }
+    for (int k : bwd[v]) {
+      double inv = 1.0 / H_e_lookup(k, v);
+      S1[v] += inv; S2[v] += inv * inv;
+    }
+  }
+
+  double log_LO = 0.0;
+  double delta_NLO = 0.0;
+
+  // ---- Edge-indexed terms (log_LO, B_e, M_e): enumerate V_a-incident
+  // edges via canonical rep (smaller V_a endpoint owns).
+  bool alpha_nontrivial = (alpha != 1.0);
+  double am1 = alpha - 1.0;
+
+  auto process_edge = [&](int a, int b) {
+    double h = H_e_lookup(a, b);
+    if (h <= 0.0) {
+      log_LO = -std::numeric_limits<double>::infinity();
+      return;
+    }
+    log_LO    += 0.5 * std::log(2.0 * beta / h);
+    delta_NLO -= (static_cast<double>(nu[a]) + alpha)
+                 / (4.0 * beta * sigma2 * M_v[a] * h);
+    if (alpha_nontrivial)
+      delta_NLO += am1 / (sigma2 * M_v[a] * h)
+                   * (-1.0 / (4.0 * beta) + S1[a]);
+  };
+
+  for (int p : V_a_idx) {
+    // (p, m) with p smaller: p owns.
+    for (int m : fwd[p]) process_edge(p, m);
+    if (!std::isfinite(log_LO)) return log_LO;
+    // (l, p) with l < p smaller, l not in V_a: p owns.
+    for (int l : bwd[p])
+      if (!in_V[l]) process_edge(l, p);
+    if (!std::isfinite(log_LO)) return log_LO;
+  }
+
+  // ---- C_{(a, b), k}: triples (k, a, b), k < a < b, all three edges
+  // present. Canonical rep = lowest-index V_a member of {k, a, b}.
+  auto process_C_triple = [&](int k, int a, int b) {
+    double H_ka = H_e_lookup(k, a);
+    double H_kb = H_e_lookup(k, b);
+    double H_ab = H_e_lookup(a, b);
+    delta_NLO += -1.0 / (2.0 * sigma2 * H_ka * H_kb)
+                 + M_v[a] / (4.0 * beta * sigma4 * H_ab * H_ka * H_kb);
+  };
+
+  for (int p : V_a_idx) {
+    // Case p = k: triples (p, a, b), p < a < b, all edges present. p is the
+    // smallest of the triple so p is canon (lowest V_a member).
+    int nf = static_cast<int>(fwd[p].size());
+    for (int ai = 0; ai < nf; ++ai) {
+      int a = fwd[p][ai];
+      for (int bi = ai + 1; bi < nf; ++bi) {
+        int b = fwd[p][bi];
+        if (G(a, b) == 1) process_C_triple(p, a, b);
+      }
+    }
+    // Case p = a: triples (k, p, b), k < p < b. Canon = p iff k not in V_a.
+    for (int k : bwd[p]) {
+      if (in_V[k]) continue;
+      for (int b : fwd[p])
+        if (G(k, b) == 1) process_C_triple(k, p, b);
+    }
+    // Case p = b: triples (k, a, p), k < a < p. Canon = p iff neither k nor
+    // a is in V_a.
+    int nb = static_cast<int>(bwd[p].size());
+    for (int ai = 0; ai < nb; ++ai) {
+      int a = bwd[p][ai];
+      if (in_V[a]) continue;
+      for (int ki = 0; ki < ai; ++ki) {
+        int k = bwd[p][ki];
+        if (in_V[k]) continue;
+        if (G(k, a) == 1) process_C_triple(k, a, p);
+      }
+    }
+  }
+
+  // ---- D_v: vertex term. v in V_a, T1[v] > 0.
+  for (int p : V_a_idx)
+    if (T1[p] > 0.0)
+      delta_NLO += M_v[p] / (16.0 * beta2 * sigma4)
+                   * (2.0 * T2[p] + T1[p] * T1[p]);
+
+  // ---- E_{lm} (+ F): non-edges (l, m), l < m, with at least one common
+  // predecessor. Canonical rep = lowest-index V_a member of {l, m}.
+  auto process_E_pair = [&](int l, int m) {
+    double s1 = 0.0, s2 = 0.0;
+    bool has_common = false;
+    // Common predecessors: k < l with G(k, l) = G(k, m) = 1. Use bwd[l]
+    // (predecessors of l) and check G(k, m).
+    for (int k : bwd[l]) {
+      if (G(k, m) == 1) {
+        double H_kl = H_e_lookup(k, l), H_km = H_e_lookup(k, m);
+        double inv_prod = 1.0 / (H_kl * H_km);
+        s1 += inv_prod; s2 += inv_prod * inv_prod;
+        has_common = true;
+      }
+    }
+    if (!has_common) return;
+    double Ml = M_v[l];
+    delta_NLO -= 2.0 * beta2 / Ml * s1;
+    if (include_F) {
+      delta_NLO -= 2.0 * beta2 / (Ml * Ml) * s1;
+      delta_NLO += 12.0 * beta4 / (Ml * Ml) * s2;
+      delta_NLO += 4.0 * beta4 / (Ml * Ml) * s1 * s1;
+    }
+  };
+
+  for (int p : V_a_idx) {
+    // Non-edges (p, m) with p smaller, m > p.
+    for (int m = p + 1; m < q; ++m)
+      if (G(p, m) == 0) process_E_pair(p, m);
+    // Non-edges (l, p) with l < p smaller, l not in V_a.
+    for (int l = 0; l < p; ++l)
+      if (G(l, p) == 0 && !in_V[l]) process_E_pair(l, p);
+  }
+
+  // ---- N_v (alpha > 1 only): vertex term at v in V_a. The N_v formula
+  // also depends on S1[v]/S2[v], which we already have for all v.
+  if (alpha_nontrivial) {
+    for (int p : V_a_idx) {
+      double Mv = M_v[p];
+      double bracket_4 = 2.0 * S2[p] + S1[p] * S1[p];
+      double Mv2 = Mv * Mv, Mv3 = Mv2 * Mv;
+      double Na = -am1 * ( 3.0 / (8.0 * Mv2)
+                          - 3.0 * beta * S1[p] / Mv2
+                          + 2.0 * beta2 * bracket_4 / Mv2 );
+      double Nb = am1 * am1 * ( 5.0 / (12.0 * Mv3)
+                                - 2.0 * beta * S1[p] / Mv3
+                                + 4.0 * beta2 * bracket_4 / Mv3 );
+      double Nc = am1 * (static_cast<double>(nu[p]) + 1.0)
+                  * ( 5.0 / (12.0 * Mv3) - beta * S1[p] / Mv3 );
+      delta_NLO += Na + Nb + Nc;
+    }
+  }
+
+  return log_C0 + log_LO + delta_NLO;
+}
+
+// Public general-alpha incremental log Z_NLO ratio under single-edge toggle.
+double log_Z_NLO_gamma_delta_incr_alphaN(
+    const arma::imat& G_before, int i, int j,
+    double alpha, double beta, double sigma, double delta,
+    bool include_F
+) {
+  int q = G_before.n_rows;
+
+  // Affected vertex set V_a = {i, j} U N_G_before(i). N_after(i) differs
+  // from N_before(i) by at most {j}, and j is in V_a regardless.
+  std::vector<bool> in_V(q, false);
+  in_V[i] = true;
+  in_V[j] = true;
+  for (int v = 0; v < q; ++v)
+    if (v != i && G_before(i, v) == 1)
+      in_V[v] = true;
+  std::vector<int> V_a_idx;
+  V_a_idx.reserve(q);
+  for (int v = 0; v < q; ++v) if (in_V[v]) V_a_idx.push_back(v);
+
+  // Evaluate partial sum on G_before first so any accidental aliasing
+  // between G_before and G_after (Armadillo copy-on-write) cannot corrupt
+  // the before evaluation.
+  double before = partial_log_Z_NLO_gamma_on_V(
+      G_before, in_V, V_a_idx, alpha, beta, sigma, include_F, delta);
+  arma::imat G_after(G_before);
+  G_after(i, j) = 1 - G_before(i, j);
+  G_after(j, i) = G_after(i, j);
+  double after = partial_log_Z_NLO_gamma_on_V(
+      G_after, in_V, V_a_idx, alpha, beta, sigma, include_F, delta);
+  return after - before;
+}
diff --git a/src/models/ggm/log_z_nlo.h b/src/models/ggm/log_z_nlo.h
new file mode 100644
index 00000000..4f455db4
--- /dev/null
+++ b/src/models/ggm/log_z_nlo.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include <RcppArmadillo.h>
+
+// Laplace + NLO closed-form approximations to log Z(G) for the
+// spikeslab GGM prior with Gamma diagonal and determinant tilt:
+//
+//   K_ij ~ N(0, sigma^2) on off-diagonals (slab),
+//   K_ii ~ Gamma(alpha, beta) on diagonals (rate parameterisation),
+//   tilt |K|^delta with delta >= 0,
+//   restricted to the PD cone supported by edge indicator matrix G.
+//
+// The full-recompute call costs O(q + |E| + non_edges_with_common_pred);
+// the alpha = 1 incremental form costs O(deg(i)^2 + deg(i) * q) per
+// single-edge toggle. Both reduce to the pre-tilt formula bit-exact at
+// delta = 0.
+//
+// Port of:
+//   ~/SV/Z/R/src/branchB_chain.cpp:470-620     (full-recompute)
+//   ~/SV/Z/R/src/incremental_log_Z_NLO_gamma.h (alpha=1 incremental)
+// validated SBC-clean in the z-project program update of 2026-05-17.
+
+double log_Z_NLO_gamma(
+    const arma::imat& G,
+    double alpha, double beta, double sigma,
+    bool include_F = false,
+    double delta = 0.0);
+
+// Toggle-endpoint reordering ("DEGORD"): relabel (i, j) to (0, 1) and
+// permute all other vertices in their original order, then evaluate
+// log_Z_NLO_gamma on the permuted graph. The closed-form is not
+// permutation-invariant, so the chain's MH ratio must always evaluate
+// both endpoint graphs in the same reordering.
+double log_Z_NLO_gamma_degord(
+    const arma::imat& G, int i, int j,
+    double alpha, double beta, double sigma,
+    bool include_F = false,
+    double delta = 0.0);
+
+// log Z_NLO(G_after) - log Z_NLO(G_before) under a single-edge toggle
+// (i, j), at alpha = 1. Equivalent to the difference of two full-recompute
+// calls to log_Z_NLO_gamma at alpha = 1 to machine precision, but evaluated
+// via the §4.6 locality decomposition: only vertex min(i,j) contributes a
+// change.
+double log_Z_NLO_gamma_delta_incr_alpha1(
+    const arma::imat& G_before, int i, int j,
+    double beta, double sigma, double delta,
+    bool include_F = false);
+
+// log Z_NLO(G_after) - log Z_NLO(G_before) under a single-edge toggle
+// (i, j), at general alpha (including alpha > 1).
+//
+// At alpha > 1 the alpha = 1 locality breaks: H_e gains a -4 beta (alpha - 1)
+// / M_v[v] piece that depends on the larger endpoint, so toggling (i, j)
+// cascades changes to H_e on every edge incident to i (forward AND backward).
+//
+// Implementation evaluates the partial sum
+//
+//   partial_log_Z_NLO_gamma_on_V(G, V_a)
+//     = sum over log_Z_NLO_gamma term instances with at least one index
+//       in V_a, with V_a = {i, j} ∪ N_G_before(i) (i, the toggled endpoint
+//       j, and i's graph neighbours - the vertices whose H_e to/from i
+//       cascade-changes under the toggle).
+//
+// then returns partial(G_after, V_a) - partial(G_before, V_a). Instances
+// outside V_a are invariant under the toggle and cancel out.
+//
+// Reduces to log_Z_NLO_gamma_delta_incr_alpha1 at alpha = 1 to machine
+// precision.
+double log_Z_NLO_gamma_delta_incr_alphaN(
+    const arma::imat& G_before, int i, int j,
+    double alpha, double beta, double sigma, double delta,
+    bool include_F = false);
diff --git a/src/models/ggm/manuscript_nlo.h b/src/models/ggm/manuscript_nlo.h
new file mode 100644
index 00000000..38517468
--- /dev/null
+++ b/src/models/ggm/manuscript_nlo.h
@@ -0,0 +1,144 @@
+// Closed-form NLO under the manuscript App C decomposition.
+//
+// eq:NLO-decomp, eq:Be / eq:Cel / eq:Di / eq:Elm. A_l is omitted because the
+// exact log_C0 (Bartlett base, lgamma terms) already handles the diagonal
+// class-(i) integrals; including A_l would double-count and shift log Z(empty)
+// away from 0.
+//
+// Caveat: accurate in the sparse regime (n_edges <= ~q*q/4). Over-corrects at
+// dense / high-max-degree centres (asymptotic-series breakdown). At p=50
+// operational density (~5%) the sparse regime applies.
+//
+// Companion-AI delivery: ~/SV/Z/R/src/manuscript_NLO.h (2026-05-21).
+
+#pragma once
+
+#include <RcppArmadillo.h>
+#include <cmath>
+#include <vector>
+
+namespace ggm_nlo {
+
+// Manuscript NLO at alpha = 1 (no Na/Nb/Nc terms; those vanish at alpha=1).
+// Returns log_C0 + log_LO + delta_NLO_manuscript.
+inline double log_Z_manuscript_NLO_alpha1(
+    const arma::imat& G,
+    double beta, double sigma, double delta
+) {
+    int q = G.n_rows;
+    std::vector<int> nu(q, 0);
+    int E_count = 0;
+    for (int l = 0; l < q; ++l)
+        for (int m = l + 1; m < q; ++m)
+            if (G(l, m) == 1) { ++nu[l]; ++E_count; }
+
+    const double sigma2 = sigma * sigma;
+    const double sigma4 = sigma2 * sigma2;
+    const double lambda = beta;
+    const double lambda2 = lambda * lambda;
+
+    // log C0 (Bartlett base, exact; alpha = 1)
+    double log_C0 = 0.5 * static_cast<double>(E_count) * std::log(M_PI)
+                    - 0.5 * static_cast<double>(E_count) * std::log(2.0 * M_PI * sigma2)
+                    - static_cast<double>(E_count) * std::log(beta)
+                    - static_cast<double>(q) * std::lgamma(1.0)      // alpha = 1 -> 0
+                    - static_cast<double>(q) * delta * std::log(beta);
+    for (int l = 0; l < q; ++l)
+        log_C0 += std::lgamma((static_cast<double>(nu[l]) + 2.0 * (1.0 + delta)) / 2.0);
+    if (E_count == 0) return log_C0;
+
+    // M_tilde_l = nu_l + 1 + 2 delta;  H_l^off = 2 lambda + M_tilde_l/(2 sigma^2 lambda)
+    std::vector<double> M_tilde(q), H_off(q);
+    for (int l = 0; l < q; ++l) {
+        M_tilde[l] = static_cast<double>(nu[l]) + 1.0 + 2.0 * delta;
+        H_off[l]   = 2.0 * lambda + M_tilde[l] / (2.0 * sigma2 * lambda);
+    }
+
+    // log_LO = sum_e 0.5 log(2 lambda / H_e), with H_e = H_{i_e}^off
+    double log_LO = 0.0;
+    for (int i = 0; i < q; ++i)
+        for (int j = i + 1; j < q; ++j)
+            if (G(i, j) == 1) {
+                double H_e = H_off[i];
+                if (H_e <= 0.0) return R_NegInf;
+                log_LO += 0.5 * std::log(2.0 * lambda / H_e);
+            }
+
+    // delta_NLO = sum_e B_e + sum_(e, l in C_e^off) C_{e,l}
+    //           + sum_{i: nu_i >= 1} D_i + sum_{(l,m) not in E, l<m} E_{l,m}
+    double dNLO = 0.0;
+
+    // B_e
+    for (int i = 0; i < q; ++i)
+        for (int j = i + 1; j < q; ++j)
+            if (G(i, j) == 1) {
+                double H_e = H_off[i];
+                dNLO += -1.0 / (4.0 * lambda * sigma2 * H_e);
+            }
+
+    // C_{e,l}: edge e = (i, j), common predecessor l < i with G(l, i) = G(l, j) = 1
+    for (int i = 1; i < q; ++i)
+        for (int j = i + 1; j < q; ++j) {
+            if (G(i, j) != 1) continue;
+            double H_e = H_off[i];
+            for (int l = 0; l < i; ++l)
+                if (G(l, i) == 1 && G(l, j) == 1) {
+                    double Hl = H_off[l];
+                    double Hl2 = Hl * Hl;
+                    dNLO += -1.0 / (2.0 * sigma2 * Hl2)
+                            + M_tilde[i] / (4.0 * lambda * sigma4 * H_e * Hl2);
+                }
+        }
+
+    // D_i (only vertices with nu_i >= 1)
+    for (int i = 0; i < q; ++i)
+        if (nu[i] >= 1) {
+            double Hi = H_off[i];
+            dNLO += static_cast<double>(nu[i]) * (static_cast<double>(nu[i]) + 2.0)
+                    * M_tilde[i]
+                    / (16.0 * lambda2 * sigma4 * Hi * Hi);
+        }
+
+    // E_{l,m}: non-edges (l, m), l < m, common predecessors k < l with G(k,l)=G(k,m)=1
+    for (int l = 0; l < q - 1; ++l)
+        for (int m = l + 1; m < q; ++m) {
+            if (G(l, m) == 1) continue;
+            if (l < 1) continue;  // need k < l
+            double sum_inv_H2 = 0.0;
+            for (int k = 0; k < l; ++k)
+                if (G(k, l) == 1 && G(k, m) == 1) {
+                    double Hk = H_off[k];
+                    sum_inv_H2 += 1.0 / (Hk * Hk);
+                }
+            if (sum_inv_H2 > 0.0) {
+                dNLO += -2.0 * lambda2 / M_tilde[l] * sum_inv_H2;
+            }
+        }
+
+    return log_C0 + log_LO + dNLO;
+}
+
+
+// Toggle-endpoint reordering ("DEGORD"): same convention as
+// log_Z_NLO_gamma_degord — relabel (i, j) to (0, 1) and permute remaining
+// vertices in their original order before applying the closed form. Required
+// for the chain MH ratio because the closed form is not permutation-invariant.
+inline double log_Z_manuscript_NLO_alpha1_degord(
+    const arma::imat& G, int i, int j,
+    double beta, double sigma, double delta
+) {
+    int q = G.n_rows;
+    std::vector<int> perm;
+    perm.reserve(q);
+    perm.push_back(i);
+    perm.push_back(j);
+    for (int v = 0; v < q; ++v) if (v != i && v != j) perm.push_back(v);
+    arma::imat G_perm(q, q, arma::fill::zeros);
+    for (int a = 0; a < q; ++a)
+        for (int b = 0; b < q; ++b)
+            G_perm(a, b) = G(perm[a], perm[b]);
+    return log_Z_manuscript_NLO_alpha1(G_perm, beta, sigma, delta);
+}
+
+
+}  // namespace ggm_nlo
diff --git a/src/models/ggm/z_ratio_estimator.cpp b/src/models/ggm/z_ratio_estimator.cpp
new file mode 100644
index 00000000..a47eed34
--- /dev/null
+++ b/src/models/ggm/z_ratio_estimator.cpp
@@ -0,0 +1,290 @@
+// Russian-Roulette V(Γ, U) estimator of 1/Z(Γ). Built on the Phase 2 DEGORD
+// Bartlett-Cholesky sampler. See z_ratio_estimator.h for the construction.
+//
+// Port of ~/SV/Z/R/src/branchB_chain_route3a_degord.cpp:192-228 (V function)
+// and :215-228 (pool draw), with R::rnorm / R::rgeom replaced by SafeRNG
+// for chain-seed portability.
+
+#include "models/ggm/z_ratio_estimator.h"
+#include <cmath>
+#include <limits>
+
+#include "rng/rng_utils.h"
+
+namespace degord {
+
+
+double V_at_Gamma_pi_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const PiAux& pi_aux,
+    const ChainAux& chain_aux,
+    double c_val,
+    double rho
+) {
+    if (K_depth == 0) return 1.0 / c_val;
+    double acc = 1.0 / c_val;
+    double running_prod = 1.0;
+    for (int n = 1; n <= K_depth; ++n) {
+        double log_Zhat_n = log_Zhat_pi_from_pool(
+            pools_t[n - 1], pi_aux, chain_aux);
+        if (!std::isfinite(log_Zhat_n))
+            return std::numeric_limits<double>::quiet_NaN();
+        double Zhat_n = std::exp(log_Zhat_n);
+        running_prod *= (Zhat_n - c_val) / c_val;
+        double sgn = (n % 2 == 0) ? 1.0 : -1.0;
+        acc += sgn * running_prod / (c_val * std::pow(rho, static_cast<double>(n)));
+    }
+    return acc;
+}
+
+
+double V_at_Gamma_pi_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const arma::imat& G_pi,
+    const ChainAux& chain_aux,
+    double c_val,
+    double rho
+) {
+    PiAux pi_aux = make_pi_aux(G_pi, chain_aux);
+    return V_at_Gamma_pi_degord(K_depth, pools_t, pi_aux, chain_aux, c_val, rho);
+}
+
+
+// log|expm1(x)| with explicit sign(expm1(x)).
+//   x > 0: expm1(x) > 0, log|.| = x + log1p(-exp(-x))
+//   x < 0: expm1(x) < 0, log|.| = log1p(-exp(x))
+//   x == 0: expm1(0) = 0; caller treats as sign=0, contribution=0.
+// Both numerically stable near x = 0 (log1p(-exp(-|x|)) ~ log|x|).
+static inline std::pair<double, int> log_abs_expm1_signed(double x) {
+    if (!std::isfinite(x)) {
+        return {std::numeric_limits<double>::quiet_NaN(), 0};
+    }
+    if (x == 0.0) {
+        return {-std::numeric_limits<double>::infinity(), 0};
+    }
+    if (x > 0.0) {
+        return {x + std::log1p(-std::exp(-x)), +1};
+    }
+    return {std::log1p(-std::exp(x)), -1};
+}
+
+
+// Build (log|V|, sign(V)) from a sequence of log_Zhat_n values (n = 1..K)
+// at fixed (log_c, rho). Computes log|S|, sign(S) over the K+1 truncated
+// series terms via signed log-sum-exp, then returns (-log_c + log|S|,
+// sign(S)). K = 0 short-circuits to {-log_c, +1}.
+//
+// Returns {NaN, 0} on any non-finite intermediate or S = 0 collapse.
+static std::pair<double, int> V_log_from_log_Zhats(
+    const std::vector<double>& log_Zhats,
+    double log_c,
+    double rho
+) {
+    const double NaN = std::numeric_limits<double>::quiet_NaN();
+    const double neg_inf = -std::numeric_limits<double>::infinity();
+
+    if (!std::isfinite(log_c) || !(rho > 0.0 && rho < 1.0)) {
+        return {NaN, 0};
+    }
+
+    const int K_depth = static_cast<int>(log_Zhats.size());
+
+    // Accumulate (log|term_n|, sign(term_n)) for n = 0..K_depth, then resolve
+    // via signed log-sum-exp at the end.
+    std::vector<double> log_abs;
+    std::vector<int>    sgn;
+    log_abs.reserve(static_cast<size_t>(K_depth) + 1u);
+    sgn.reserve(static_cast<size_t>(K_depth) + 1u);
+
+    // Term n = 0: +1.
+    log_abs.push_back(0.0);
+    sgn.push_back(+1);
+
+    if (K_depth > 0) {
+        const double log_rho = std::log(rho);
+        double log_abs_prod = 0.0;
+        int    sign_prod = +1;
+        for (int n = 1; n <= K_depth; ++n) {
+            double log_Zhat_n = log_Zhats[static_cast<size_t>(n - 1)];
+            if (!std::isfinite(log_Zhat_n)) return {NaN, 0};
+
+            double x = log_Zhat_n - log_c;
+            auto em1 = log_abs_expm1_signed(x);
+            double log_abs_em1 = em1.first;
+            int    sgn_em1     = em1.second;
+            if (!std::isfinite(log_abs_em1) && sgn_em1 == 0 && x == 0.0) {
+                // expm1(x) = 0 exactly → all further (and this) term_n vanish.
+                // Subsequent terms inherit the same zero factor, so we are
+                // done extending the truncated series.
+                break;
+            }
+            if (sgn_em1 == 0) {
+                // Non-finite x; bail out.
+                return {NaN, 0};
+            }
+            log_abs_prod += log_abs_em1;
+            sign_prod    *= sgn_em1;
+
+            double log_abs_term = -static_cast<double>(n) * log_rho + log_abs_prod;
+            int    sign_term    = ((n & 1) ? -1 : +1) * sign_prod;
+
+            if (!std::isfinite(log_abs_term)) {
+                // Overflow in magnitude (e.g. tail of the series ran away).
+                // log_Zhat overflow already screened above; this guards the
+                // log1p(-exp) edge cases.
+                if (log_abs_term == neg_inf) continue;
+                return {NaN, 0};
+            }
+
+            log_abs.push_back(log_abs_term);
+            sgn.push_back(sign_term);
+        }
+    }
+
+    // Signed log-sum-exp.
+    double M = neg_inf;
+    for (double la : log_abs) {
+        if (la > M) M = la;
+    }
+    if (M == neg_inf) {
+        // All terms exactly zero; S = 0, sign undefined.
+        return {NaN, 0};
+    }
+    double s = 0.0;
+    for (size_t k = 0; k < log_abs.size(); ++k) {
+        if (log_abs[k] == neg_inf) continue;
+        s += static_cast<double>(sgn[k]) * std::exp(log_abs[k] - M);
+    }
+    if (s == 0.0 || !std::isfinite(s)) {
+        return {NaN, 0};
+    }
+    double log_abs_S = M + std::log(std::abs(s));
+    int    sign_S    = (s > 0.0) ? +1 : -1;
+
+    double log_abs_V = -log_c + log_abs_S;
+    if (!std::isfinite(log_abs_V)) {
+        return {NaN, 0};
+    }
+    return {log_abs_V, sign_S};
+}
+
+
+std::pair<double, int> V_log_at_Gamma_pi_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const PiAux& pi_aux,
+    const ChainAux& chain_aux,
+    double log_c,
+    double rho
+) {
+    std::vector<double> log_Zhats;
+    log_Zhats.reserve(static_cast<size_t>(K_depth));
+    for (int n = 0; n < K_depth; ++n) {
+        double log_Zhat_n = log_Zhat_pi_from_pool(
+            pools_t[n], pi_aux, chain_aux);
+        if (!std::isfinite(log_Zhat_n)) {
+            return {std::numeric_limits<double>::quiet_NaN(), 0};
+        }
+        log_Zhats.push_back(log_Zhat_n);
+    }
+    return V_log_from_log_Zhats(log_Zhats, log_c, rho);
+}
+
+
+std::pair<double, int> V_log_at_Gamma_pi_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const arma::imat& G_pi,
+    const ChainAux& chain_aux,
+    double log_c,
+    double rho
+) {
+    PiAux pi_aux = make_pi_aux(G_pi, chain_aux);
+    return V_log_at_Gamma_pi_degord(
+        K_depth, pools_t, pi_aux, chain_aux, log_c, rho);
+}
+
+
+LogSignedVPair V_log_pair_at_Gamma_curr_star_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const PiAux& a_curr,
+    const PiAux& a_star,
+    const ChainAux& chain_aux,
+    double log_c_curr,
+    double log_c_star,
+    double rho
+) {
+    const double NaN = std::numeric_limits<double>::quiet_NaN();
+    LogSignedVPair out;
+    out.curr = {NaN, 0};
+    out.star = {NaN, 0};
+
+    // Loop once over the K_depth pools, building cache_curr from a_curr and
+    // reusing it to evaluate log_Zhat_n at a_star without rebuilding Phi.
+    std::vector<double> log_Zhats_curr, log_Zhats_star;
+    log_Zhats_curr.reserve(static_cast<size_t>(K_depth));
+    log_Zhats_star.reserve(static_cast<size_t>(K_depth));
+    for (int n = 0; n < K_depth; ++n) {
+        PoolCache cache_curr;
+        double log_Zhat_n_curr = log_Zhat_pi_from_pool_cache(
+            pools_t[n], a_curr, chain_aux, cache_curr);
+        if (!std::isfinite(log_Zhat_n_curr)) return out;
+        double log_Zhat_n_star = log_Zhat_star_from_cache(
+            pools_t[n], a_star, chain_aux, cache_curr);
+        if (!std::isfinite(log_Zhat_n_star)) return out;
+        log_Zhats_curr.push_back(log_Zhat_n_curr);
+        log_Zhats_star.push_back(log_Zhat_n_star);
+    }
+
+    out.curr = V_log_from_log_Zhats(log_Zhats_curr, log_c_curr, rho);
+    out.star = V_log_from_log_Zhats(log_Zhats_star, log_c_star, rho);
+    return out;
+}
+
+
+LogSignedVPair V_log_pair_at_Gamma_curr_star_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const arma::imat& G_pi_curr,
+    const arma::imat& G_pi_star,
+    const ChainAux& chain_aux,
+    double log_c_curr,
+    double log_c_star,
+    double rho
+) {
+    PiAux a_curr = make_pi_aux(G_pi_curr, chain_aux);
+    PiAux a_star = make_pi_aux(G_pi_star, chain_aux);
+    return V_log_pair_at_Gamma_curr_star_degord(
+        K_depth, pools_t, a_curr, a_star, chain_aux,
+        log_c_curr, log_c_star, rho);
+}
+
+
+void draw_U_degord_rr(
+    SafeRNG& rng,
+    int& K_depth,
+    std::vector<arma::mat>& pools_t,
+    int M_inner,
+    int q,
+    double rho
+) {
+    // K_depth ~ Geom(1 - rho). boost::random doesn't ship a geometric directly
+    // here; we draw via inverse-CDF on a uniform: K = floor(log(U) / log(rho)).
+    // This matches R::rgeom(1 - rho) in distribution (number of failures
+    // before the first success when success prob = 1 - rho).
+    double u = runif(rng);
+    if (u <= 0.0) u = std::numeric_limits<double>::min();  // guard log(0)
+    K_depth = static_cast<int>(std::floor(std::log(u) / std::log(rho)));
+
+    pools_t.clear();
+    pools_t.reserve(static_cast<size_t>(K_depth));
+    for (int n = 0; n < K_depth; ++n) {
+        pools_t.push_back(draw_bartlett_pool(rng, q, M_inner));
+    }
+}
+
+
+}  // namespace degord
diff --git a/src/models/ggm/z_ratio_estimator.h b/src/models/ggm/z_ratio_estimator.h
new file mode 100644
index 00000000..f517cda8
--- /dev/null
+++ b/src/models/ggm/z_ratio_estimator.h
@@ -0,0 +1,172 @@
+#pragma once
+
+#include <RcppArmadillo.h>
+#include <utility>
+#include <vector>
+
+#include "models/ggm/degord_sampler.h"
+#include "rng/rng_utils.h"
+
+// Russian-Roulette V(Γ, U) estimator of 1/Z(Γ) for the hierarchical-spec
+// GGM. Built on top of the Phase 2 DEGORD Bartlett-Cholesky sampler.
+//
+// Construction (Lyne 2015 / z-project Stage 3.2A):
+//   V(Γ, U) = (1/c) · [1 + sum_{n=1..K_depth} (-1)^n · prod_{i=1..n} (Zhat_i - c)/c
+//                                              / rho^n]
+// where
+//   Zhat_i  ~ log_Zhat_pi_from_pool(pools_t[i-1], pi_aux, chain_aux), expd
+//   c       = kappa * exp(log_Z_NLO_degord(Γ))   -- analytic centring
+//   K_depth ~ Geom(1 - rho)                       -- random truncation depth
+//   rho     in (0, 1)                             -- truncation continuation prob
+//
+// E[V(Γ, U)] = 1/Z(Γ) exactly under (kappa, rho) chosen so the geometric
+// series converges (radius + moment conditions; see route3a_V_helpers
+// notes/exactness-proposition.md §2). The chain composes log|V_star/V_curr|
+// into the between-edge MH ratio and tracks sign(V) separately for
+// Lyne-style sign-corrected ergodic averaging.
+//
+// Port of:
+//   ~/SV/Z/R/src/branchB_chain_route3a_degord.cpp:192-228
+// validated on Z at q=10, delta=2 post the disable_log_r code-motion fix.
+
+namespace degord {
+
+
+// Per-Gamma V-estimator state, owned by the chain across between-step
+// proposals. Updated lazily when the chain accepts a Γ-toggle.
+struct ZRatioState {
+    std::vector<arma::mat> pools_t;     // K_depth pools, each (dim x M_inner)
+    int                    K_depth;     // Geom(1 - rho) draw
+    double                 kappa;       // c = kappa * exp(log_Z_NLO)
+    double                 rho;         // geometric truncation prob
+    double                 log_Z_NLO_curr;   // analytic centring at Γ_curr
+    int                    sign_curr;        // ±1, tracked from V(Γ_curr, U) sign
+};
+
+
+// V(Γ, U) at fixed (G_pi, pi_aux, c_val, rho). Returns the signed V.
+//
+// K_depth = 0 short-circuits to V = 1/c (the n=0 term only).
+// Returns NaN if any Zhat_n is non-finite (caller treats as auto-reject).
+double V_at_Gamma_pi_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const PiAux& pi_aux,
+    const ChainAux& chain_aux,
+    double c_val,
+    double rho);
+
+
+// Convenience overload: take G_pi instead of pre-built pi_aux (builds it
+// internally). Used by R-callable test entry points; chain hot-path
+// should pass the pre-built pi_aux to amortise.
+double V_at_Gamma_pi_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const arma::imat& G_pi,
+    const ChainAux& chain_aux,
+    double c_val,
+    double rho);
+
+
+// Log-space variant of V(Γ, U). Factors V = (1/c) · S with
+//
+//   S = 1 + sum_{n=1..K_depth} (-1/rho)^n · prod_{m=1..n} (Zhat_m - c) / c
+//     = 1 + sum_n term_n,
+//
+//   log|term_n| = -n · log(rho) + sum_{m=1..n} log|expm1(log_Zhat_m - log_c)|
+//   sign(term_n) = (-1)^n · prod_m sign(expm1(log_Zhat_m - log_c))
+//
+// Computes log|S| via signed log-sum-exp over the K_depth + 1 terms, then
+// returns (log|V|, sign(V)) = (-log_c + log|S|, sign(S)).
+//
+// The linear form V_at_Gamma_pi_degord exponentiates log_Z_NLO into c, which
+// underflows to 0 at large p (e.g. log_Z_NLO ~ -3500 at p = 100 makes c = 0),
+// silently breaking the MH ratio. The log-space form never materialises c.
+//
+// Returns {NaN, 0} on auto-reject (non-finite Zhat, S evaluates to 0, or
+// any other non-finite intermediate). Caller treats as ln_alpha = -Inf.
+//
+// In the MH ratio, the log_kappa term cancels:
+//   log|V_curr| - log|V_star|
+//     = (log_Z_NLO_star - log_Z_NLO_curr) + (log|S_curr| - log|S_star|).
+// Callers pass log_c = log(kappa) + log_Z_NLO at the relevant Gamma.
+std::pair<double, int> V_log_at_Gamma_pi_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const PiAux& pi_aux,
+    const ChainAux& chain_aux,
+    double log_c,
+    double rho);
+
+
+// Convenience overload mirroring the linear form: take G_pi instead of a
+// pre-built pi_aux. Used by R-callable test entry points.
+std::pair<double, int> V_log_at_Gamma_pi_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const arma::imat& G_pi,
+    const ChainAux& chain_aux,
+    double log_c,
+    double rho);
+
+
+// Paired (log|V|, sign) for Gamma_curr / Gamma_star, computed with
+// within-pool cache reuse. For each of K_depth pools, the inner loop
+// builds Phi-state under a_curr ONCE (per log_Zhat_pi_from_pool_cache),
+// then re-evaluates only row q-2 under a_star via row_qm2_logw_from_S
+// using the cached (rw_head, S_trail). This halves the per-pool Phi
+// rebuild cost vs two separate V_log_at_Gamma_pi_degord calls.
+//
+// G_pi_curr and G_pi_star must share q and may differ only at the
+// trailing slot (q - 2, q - 1) — the DEGORD convention enforced by
+// callers via degord_permutation(q, i, j). Returns {NaN, 0} on
+// either side if any Zhat_n is non-finite, the signed sum collapses to
+// zero, or log_c is non-finite.
+struct LogSignedVPair {
+    std::pair<double, int> curr;
+    std::pair<double, int> star;
+};
+
+
+LogSignedVPair V_log_pair_at_Gamma_curr_star_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const PiAux& a_curr,
+    const PiAux& a_star,
+    const ChainAux& chain_aux,
+    double log_c_curr,
+    double log_c_star,
+    double rho);
+
+
+// Convenience overload mirroring the single-graph variant: take G_pi_curr
+// and G_pi_star instead of pre-built PiAux'es. Used by R-callable test
+// entry points; chain hot-path should pass pre-built pi_aux to amortise.
+LogSignedVPair V_log_pair_at_Gamma_curr_star_degord(
+    int K_depth,
+    const std::vector<arma::mat>& pools_t,
+    const arma::imat& G_pi_curr,
+    const arma::imat& G_pi_star,
+    const ChainAux& chain_aux,
+    double log_c_curr,
+    double log_c_star,
+    double rho);
+
+
+// Fresh U-pool draw: K_depth ~ Geom(1 - rho); pools_t[n] is (dim x M_inner)
+// with iid N(0, 1) entries. Uses SafeRNG so chain seeds remain
+// deterministic across platforms.
+//
+// Each pool is in pre-transposed (dim x M_inner) layout so the inner
+// DEGORD kernel can access each sample's noise as a contiguous column.
+void draw_U_degord_rr(
+    SafeRNG& rng,
+    int& K_depth,
+    std::vector<arma::mat>& pools_t,
+    int M_inner,
+    int q,
+    double rho);
+
+
+}  // namespace degord
diff --git a/src/priors/parameter_prior.h b/src/priors/parameter_prior.h
index bb687a54..7e2f4ca2 100644
--- a/src/priors/parameter_prior.h
+++ b/src/priors/parameter_prior.h
@@ -118,6 +118,8 @@ class NormalPrior final : public BaseParameterPrior {
         return std::make_unique<NormalPrior>(*this);
     }
 
+    double scale() const { return scale_; }
+
 private:
     double scale_;
 };
@@ -178,6 +180,9 @@ class GammaScalePrior final : public BaseParameterPrior {
         return std::make_unique<GammaScalePrior>(*this);
     }
 
+    double shape() const { return shape_; }
+    double rate()  const { return rate_; }
+
 private:
     double shape_;
     double rate_;
diff --git a/src/sample_ggm.cpp b/src/sample_ggm.cpp
index 8a979d4d..bb95e7a9 100644
--- a/src/sample_ggm.cpp
+++ b/src/sample_ggm.cpp
@@ -38,7 +38,13 @@ Rcpp::List sample_ggm(
     const int max_tree_depth = 10,
     const bool na_impute = false,
     const Rcpp::Nullable<Rcpp::IntegerMatrix> missing_index_nullable = R_NilValue,
-    const double delta = 0.0
+    const double delta = 0.0,
+    const std::string& graph_prior_spec = "joint",
+    const int    z_ratio_M_inner = 100,
+    const double z_ratio_kappa   = 1.0,
+    const double z_ratio_rho     = 0.5,
+    const bool   use_manuscript_nlo = false,
+    const bool   mh_U = false
 ) {
 
     // Create parameter priors from R input
@@ -85,6 +91,19 @@ Rcpp::List sample_ggm(
     // both gradient paths and all four MH ratios in GGMModel.
     model.set_determinant_tilt(delta);
 
+    // Graph-prior spec (joint vs hierarchical). Hierarchical mode adds the
+    // V(Γ_curr)/V(Γ_star) factor to the between-edge MH ratio, converting
+    // the implicit joint-marginal target π(Γ)·Z(Γ) into the user-specified
+    // π(Γ). Requires Normal slab + Gamma diagonal (validated at lazy init).
+    if (graph_prior_spec == "hierarchical") {
+        model.set_z_ratio_tuning(z_ratio_M_inner, z_ratio_kappa, z_ratio_rho);
+        model.set_use_manuscript_nlo(use_manuscript_nlo);
+        model.set_mh_U(mh_U);
+        model.set_graph_prior_spec(GraphPriorSpec::Hierarchical);
+    } else if (graph_prior_spec != "joint") {
+        Rcpp::stop("graph_prior_spec must be 'joint' or 'hierarchical'.");
+    }
+
     // Set up missing data imputation (same pattern as OMRF)
     if (na_impute && missing_index_nullable.isNotNull()) {
         arma::imat missing_index = Rcpp::as<arma::imat>(
diff --git a/tests/testthat/fixtures/degord_sampler_reference.rds b/tests/testthat/fixtures/degord_sampler_reference.rds
new file mode 100644
index 00000000..c7b2c3ca
Binary files /dev/null and b/tests/testthat/fixtures/degord_sampler_reference.rds differ
diff --git a/tests/testthat/fixtures/log_z_nlo_reference.rds b/tests/testthat/fixtures/log_z_nlo_reference.rds
new file mode 100644
index 00000000..5122eb3b
Binary files /dev/null and b/tests/testthat/fixtures/log_z_nlo_reference.rds differ
diff --git a/tests/testthat/test-degord-sampler.R b/tests/testthat/test-degord-sampler.R
new file mode 100644
index 00000000..ad98bf55
--- /dev/null
+++ b/tests/testthat/test-degord-sampler.R
@@ -0,0 +1,332 @@
+# --------------------------------------------------------------------------- #
+# DEGORD sampler (Phase 2): port of the Bartlett-Cholesky importance
+# sampler for log Zhat(G) from ~/SV/Z/R/src/degord_sampler.h (v4).
+#
+# Exercises:
+#   degord_chain_aux_cpp                     - ChainAux constants
+#   degord_pi_aux_cpp                        - PiAux: nu_pi, E_count, log_C0
+#   degord_permute_graph_cpp                 - DEGORD permutation -> (q-2, q-1)
+#   degord_log_Zhat_pi_from_pool_cpp         - main Zhat-from-pool entry
+#   degord_delta_log_Zhat_pi_toggle_cpp      - cache-based delta under trailing toggle
+#   degord_draw_bartlett_pool_cpp            - SafeRNG-based standard-normal pool
+#
+# Ground truth is a fixture
+# (tests/testthat/fixtures/degord_sampler_reference.rds) pre-generated from
+# the z reference (see dev/numerical_analyses/generate_degord_fixture.R).
+# --------------------------------------------------------------------------- #
+
+
+# ---- log_Zhat_pi_from_pool: bit-parity against z on shared noise pool -------
+
+test_that("log_Zhat_pi_from_pool matches the z reference bit-exact", {
+  fixture_path <- testthat::test_path("fixtures", "degord_sampler_reference.rds")
+  cases <- readRDS(fixture_path)
+  for (case in cases) {
+    for (alpha in c(1.0, 2.0)) {
+      for (delta in c(0.0, 0.5, 1.0)) {
+        for (tilt_mode in c(0L, 1L)) {
+          key <- sprintf("a%g_d%g_t%d", alpha, delta, tilt_mode)
+          ours <- degord_log_Zhat_pi_from_pool_cpp(
+            case$pool_t, case$G_pi, alpha, 1.0, 1.0, delta, tilt_mode
+          )
+          ref <- case$values[[key]]
+          expect_equal(
+            ours, ref,
+            tolerance = 1e-12,
+            info = sprintf("q=%d rep=%d %s", case$q, case$rep, key)
+          )
+        }
+      }
+    }
+  }
+})
+
+
+# ---- DEGORD permutation correctness ----------------------------------------
+
+test_that("DEGORD permutation sends (i, j) to (q-2, q-1)", {
+  set.seed(31)
+  for (q in c(3L, 5L, 7L)) {
+    G <- matrix(0L, q, q)
+    for (i in 1:(q - 1)) for (j in (i + 1):q)
+      if (runif(1) < 0.5) {
+        G[i, j] <- 1L
+        G[j, i] <- 1L
+      }
+    for (i0 in 0:(q - 2)) {
+      for (j0 in (i0 + 1):(q - 1)) {
+        G_pi <- degord_permute_graph_cpp(G, i0, j0)
+        # The DEGORD permutation sends (i, j) -> (q-2, q-1). So
+        # G_pi[q-1, q] (1-based) must equal G[i+1, j+1] (1-based).
+        expect_equal(
+          G_pi[q - 1, q], G[i0 + 1, j0 + 1],
+          info = sprintf("q=%d (i,j)=(%d,%d)", q, i0, j0)
+        )
+        # G_pi must be symmetric, integer-valued.
+        expect_true(isSymmetric(G_pi))
+        expect_true(all(G_pi %in% c(0L, 1L)))
+      }
+    }
+  }
+})
+
+
+# ---- ChainAux constants: bgms vs z (interactive, requires z wrapper) -------
+
+# This is more thorough but requires the z source to be available, so it is
+# gated behind the wrapper. The fixture above is sufficient for CI parity;
+# this block runs locally when the developer has the z project on disk.
+
+test_that("ChainAux nu-tables match the z reference (local-only)", {
+  wrapper <- testthat::test_path("..", "..", "dev", "numerical_analyses",
+                                  "z_degord_wrapper.cpp")
+  skip_if_not(file.exists(wrapper),
+              "z DEGORD wrapper not present (regenerate fixture instead).")
+  z_src <- "~/Dropbox/Projecten/sv/z/R/src/degord_sampler.h"
+  skip_if_not(file.exists(z_src), "z reference header not present.")
+  suppressMessages(Rcpp::sourceCpp(wrapper))
+
+  fields <- c("sigma_diag", "nu_chi_df", "nu_mu_l", "nu_H_e_saddle",
+              "nu_lgamma_half_k", "nu_diag_const", "nu_slab_const_saddle",
+              "nu_per_vertex")
+  for (q in c(3L, 5L, 10L)) {
+    for (alpha in c(1.0, 2.0)) {
+      for (delta in c(0.0, 0.5, 1.0)) {
+        z <- z_degord_chain_aux(q, alpha, 1.0, 1.0, delta)
+        b <- degord_chain_aux_cpp(q, alpha, 1.0, 1.0, delta)
+        for (f in fields) {
+          expect_equal(
+            b[[f]], z[[f]],
+            tolerance = 1e-12,
+            info = sprintf("q=%d alpha=%g delta=%g %s", q, alpha, delta, f)
+          )
+        }
+      }
+    }
+  }
+})
+
+
+# ---- delta_log_Zhat_pi_toggle: cache-delta vs direct full-recompute ---------
+
+# This used to be a known-discrepancy note (~0.17 nat gap between cached
+# delta and the direct log_Zhat(star) - log_Zhat(curr)). Two bugs in the
+# v4 cache trick caused it:
+#
+#   1. rw_head spans rows 0..q-3 but row q-1's diagonal log-weight is
+#      invariant across (curr, star) AND sample-dependent; omitting it
+#      from the star aggregation left a sample-shifted bias.
+#      Fix: rw_head extended to include row q-1 (Option 2; companion
+#      Z-side proposal 2026-05-19, applied at degord_sampler.cpp:302).
+#
+#   2. delta_log_Zhat_pi_toggle passed z_trail = 0.0 to row_qm2_logw_from_S
+#      under slab_tilt_mode == 1, dropping the saddle-shifted slab
+#      innovation noise[q + edge_offset(q-2, q-1)] = noise[q + (q-2)(q+1)/2].
+#      Fix: pass the actual slab slot via noise_pool.colptr(slab_idx)
+#      (applied at degord_sampler.cpp:398-404).
+#
+# With both fixes the cache-delta matches the direct full-recompute
+# difference at machine precision under both slab_tilt_modes. This is
+# the SBC-relevant invariant — without it the chain's delta_log_Zhat
+# acceptance contribution silently drifts from log Zhat(star) /
+# log Zhat(curr) by a tilt-amplified bias.
+
+test_that("delta_log_Zhat_pi_toggle equals direct full-recompute at machine precision", {
+  # CI-portable regression net for the cache-fix + z_trail-fix pair: the
+  # cache-trick delta MUST equal the direct log_Zhat(star) - log_Zhat(curr)
+  # at machine precision under BOTH slab_tilt_modes. Without this assertion
+  # silent regressions in the cache aggregation (rw_head, S_trail) or in
+  # the z_trail slot index would re-introduce a tilt-amplified bias that
+  # only shows up under SBC stress at high delta.
+  draw_G <- function(q, seed) {
+    set.seed(seed)
+    G <- matrix(0L, q, q)
+    for (i in 1:(q - 1)) for (j in (i + 1):q)
+      if (runif(1) < 0.5) {
+        G[i, j] <- 1L
+        G[j, i] <- 1L
+      }
+    G
+  }
+
+  # Explicit regression row: q=10, delta=0, slab_tilt_mode=1, toggle (3, 9).
+  # This is the cell that surfaced the z_trail bug during the bgms port.
+  {
+    set.seed(2026)
+    q <- 10L
+    G <- draw_G(q, q + 13L)
+    dim_pool <- q + q * (q - 1) / 2
+    M <- 100L
+    pool   <- matrix(rnorm(M * dim_pool), M, dim_pool)
+    pool_t <- t(pool)
+    d_cache <- degord_delta_log_Zhat_pi_toggle_cpp(
+      pool, pool_t, G, 3L, 9L, 1.0, 1.0, 1.0, 0.0, 1L
+    )
+    G_pi_curr <- degord_permute_graph_cpp(G, 3L, 9L)
+    G_pi_star <- G_pi_curr
+    G_pi_star[q - 1, q] <- 1L - G_pi_curr[q - 1, q]
+    G_pi_star[q, q - 1] <- G_pi_star[q - 1, q]
+    lZ_curr <- degord_log_Zhat_pi_from_pool_cpp(
+      pool_t, G_pi_curr, 1.0, 1.0, 1.0, 0.0, 1L
+    )
+    lZ_star <- degord_log_Zhat_pi_from_pool_cpp(
+      pool_t, G_pi_star, 1.0, 1.0, 1.0, 0.0, 1L
+    )
+    expect_equal(
+      d_cache, lZ_star - lZ_curr, tolerance = 1e-10,
+      info = "z_trail-fix regression row: q=10, delta=0, tilt=1, toggle (3, 9)"
+    )
+  }
+
+  # Full sweep across (q, alpha, delta, tilt_mode, toggles).
+  for (q in c(5L, 10L)) {
+    set.seed(2026 + q)
+    G <- draw_G(q, q + 13L)
+    dim_pool <- q + q * (q - 1) / 2
+    M <- 50L
+    pool   <- matrix(rnorm(M * dim_pool), M, dim_pool)
+    pool_t <- t(pool)
+    for (alpha in c(1.0, 2.0)) {
+      for (delta in c(0.0, 0.5, 1.0, 2.0)) {
+        for (tilt_mode in c(0L, 1L)) {
+          for (i0 in 0:(q - 2)) {
+            for (j0 in (i0 + 1):(q - 1)) {
+              d_cache <- degord_delta_log_Zhat_pi_toggle_cpp(
+                pool, pool_t, G, i0, j0, alpha, 1.0, 1.0, delta, tilt_mode
+              )
+              G_pi_curr <- degord_permute_graph_cpp(G, i0, j0)
+              G_pi_star <- G_pi_curr
+              G_pi_star[q - 1, q] <- 1L - G_pi_curr[q - 1, q]
+              G_pi_star[q, q - 1] <- G_pi_star[q - 1, q]
+              lZ_curr <- degord_log_Zhat_pi_from_pool_cpp(
+                pool_t, G_pi_curr, alpha, 1.0, 1.0, delta, tilt_mode
+              )
+              lZ_star <- degord_log_Zhat_pi_from_pool_cpp(
+                pool_t, G_pi_star, alpha, 1.0, 1.0, delta, tilt_mode
+              )
+              expect_equal(
+                d_cache, lZ_star - lZ_curr, tolerance = 1e-10,
+                info = sprintf("q=%d alpha=%g delta=%g tilt=%d (%d,%d)",
+                               q, alpha, delta, tilt_mode, i0, j0)
+              )
+            }
+          }
+        }
+      }
+    }
+  }
+})
+
+
+test_that("delta_log_Zhat_pi_toggle matches the z reference bit-exact", {
+  wrapper <- testthat::test_path("..", "..", "dev", "numerical_analyses",
+                                  "z_degord_wrapper.cpp")
+  skip_if_not(file.exists(wrapper),
+              "z DEGORD wrapper not present (delta-toggle z parity is local-only).")
+  z_src <- "~/Dropbox/Projecten/sv/z/R/src/degord_sampler.h"
+  skip_if_not(file.exists(z_src), "z reference header not present.")
+  suppressMessages(Rcpp::sourceCpp(wrapper))
+
+  set.seed(99)
+  for (q in c(3L, 5L, 7L)) {
+    G <- matrix(0L, q, q)
+    for (i in 1:(q - 1)) for (j in (i + 1):q)
+      if (runif(1) < 0.5) {
+        G[i, j] <- 1L
+        G[j, i] <- 1L
+      }
+    dim_pool <- q + q * (q - 1) / 2
+    M <- 50L
+    pool   <- matrix(rnorm(M * dim_pool), M, dim_pool)
+    pool_t <- t(pool)
+
+    for (alpha in c(1.0, 2.0)) {
+      for (delta in c(0.0, 0.5)) {
+        for (tilt_mode in c(0L, 1L)) {
+          for (i0 in 0:(q - 2)) {
+            for (j0 in (i0 + 1):(q - 1)) {
+              d_bgms <- degord_delta_log_Zhat_pi_toggle_cpp(
+                pool, pool_t, G, i0, j0, alpha, 1.0, 1.0, delta, tilt_mode
+              )
+              d_z <- z_degord_delta_log_Zhat_pi_toggle(
+                pool, pool_t, G, i0, j0, alpha, 1.0, 1.0, delta, tilt_mode
+              )
+              expect_equal(
+                d_bgms, d_z, tolerance = 1e-12,
+                info = sprintf("q=%d (i,j)=(%d,%d) alpha=%g delta=%g tilt=%d",
+                               q, i0, j0, alpha, delta, tilt_mode)
+              )
+            }
+          }
+        }
+      }
+    }
+  }
+})
+
+
+# ---- Variance of log Zhat scales as 1/M_inner ------------------------------
+
+test_that("variance of log Zhat scales as 1/M_inner (Phase 2 acceptance)", {
+  q <- 5L
+  set.seed(42)
+  G_pi <- matrix(0L, q, q)
+  for (i in 1:(q - 1)) for (j in (i + 1):q)
+    if (runif(1) < 0.5) {
+      G_pi[i, j] <- 1L
+      G_pi[j, i] <- 1L
+    }
+  alpha <- 2.0
+  beta  <- 1.0
+  sigma <- 1.0
+  delta <- 0.5
+
+  M_grid <- c(30L, 1000L)
+  n_reps <- 200L
+  vars <- numeric(length(M_grid))
+  for (k in seq_along(M_grid)) {
+    M <- M_grid[k]
+    vals <- vapply(seq_len(n_reps), function(r) {
+      pool_t <- degord_draw_bartlett_pool_cpp(q, M, seed = r + 1000L * k)
+      degord_log_Zhat_pi_from_pool_cpp(
+        pool_t, G_pi, alpha, beta, sigma, delta
+      )
+    }, double(1))
+    vars[k] <- var(vals)
+  }
+  # Under 1/M scaling, vars[1] / vars[2] approx M_grid[2] / M_grid[1] = 33.3.
+  # MC noise on the variance estimate at n_reps=200 is large; allow a
+  # 2x window around the theoretical ratio.
+  ratio <- vars[1] / vars[2]
+  expected_ratio <- M_grid[2] / M_grid[1]
+  expect_gt(ratio, 0.5 * expected_ratio)
+  expect_lt(ratio, 2.0 * expected_ratio)
+})
+
+
+# ---- SafeRNG-based Bartlett pool is the right shape ------------------------
+
+test_that("draw_bartlett_pool returns a (dim x M) standard-normal matrix", {
+  for (q in c(3L, 5L, 10L)) {
+    M <- 50L
+    pool_t <- degord_draw_bartlett_pool_cpp(q, M, seed = 1L)
+    dim_expected <- q + q * (q - 1) / 2
+    expect_equal(nrow(pool_t), dim_expected, info = sprintf("q=%d", q))
+    expect_equal(ncol(pool_t), M)
+    # Mean ~ 0, sd ~ 1 over many samples. Use a wide window for MC noise.
+    expect_lt(abs(mean(pool_t)), 0.1)
+    expect_gt(sd(as.numeric(pool_t)), 0.85)
+    expect_lt(sd(as.numeric(pool_t)), 1.15)
+  }
+})
+
+
+# ---- Same SafeRNG seed produces the same pool ------------------------------
+
+test_that("draw_bartlett_pool is deterministic in seed", {
+  pool_a <- degord_draw_bartlett_pool_cpp(5L, 30L, seed = 42L)
+  pool_b <- degord_draw_bartlett_pool_cpp(5L, 30L, seed = 42L)
+  expect_identical(pool_a, pool_b)
+  pool_c <- degord_draw_bartlett_pool_cpp(5L, 30L, seed = 43L)
+  expect_false(identical(pool_a, pool_c))
+})
diff --git a/tests/testthat/test-ggm-hierarchical.R b/tests/testthat/test-ggm-hierarchical.R
new file mode 100644
index 00000000..be78db4b
--- /dev/null
+++ b/tests/testthat/test-ggm-hierarchical.R
@@ -0,0 +1,412 @@
+# --------------------------------------------------------------------------- #
+# Phase 4a: hierarchical-spec MH integration smoke tests.
+#
+# Exercises the GGMModel between-edge MH path with graph_prior_spec_ set
+# to GraphPriorSpec::Hierarchical: the joint MH ratio is multiplied by
+# V(Γ_curr)/V(Γ_star) using the Phase 2 DEGORD sampler + Phase 3 V/RR
+# estimator, and log_Z_NLO_curr is incremented on accept via Phase 1.
+#
+# These tests cover the wiring (chain runs, output shape is sensible,
+# prior-family validation fires). Full SBC validation of the
+# hierarchical-spec target is Phase 5.
+# --------------------------------------------------------------------------- #
+
+
+test_that("hierarchical-spec chain runs without crash and stays in valid state", {
+  set.seed(7)
+  p <- 5L
+  n <- 100L
+  Y <- scale(matrix(rnorm(n * p), n, p), scale = FALSE)
+
+  out <- ggm_hierarchical_smoke_cpp(
+    observations      = Y,
+    inclusion_prob    = 0.5,
+    interaction_scale = 1.0,
+    diagonal_shape    = 1.0,
+    diagonal_rate     = 1.0,
+    delta             = 0.5,
+    M_inner           = 100L,
+    kappa             = 1.0,
+    rho               = 0.5,
+    n_sweeps          = 200L,
+    seed              = 42L
+  )
+
+  expect_true(is.list(out))
+  expect_true(is.matrix(out$final_edges))
+  expect_equal(dim(out$final_edges), c(p, p))
+  expect_true(isSymmetric(out$final_edges))
+  # Edge counts in [0, p(p-1)/2] across the trajectory.
+  max_edges <- p * (p - 1L) / 2L
+  expect_true(all(out$n_edges_path >= 0L))
+  expect_true(all(out$n_edges_path <= max_edges))
+  # Some sweeps with non-trivial edge mass (chain isn't stuck at 0 or full).
+  steady <- out$n_edges_path[101:200]
+  expect_gt(length(unique(steady)), 1L)
+})
+
+
+test_that("hierarchical-spec chain is reproducible under fixed seed", {
+  set.seed(13)
+  p <- 4L
+  n <- 50L
+  Y <- scale(matrix(rnorm(n * p), n, p), scale = FALSE)
+  args <- list(
+    observations = Y, inclusion_prob = 0.5,
+    interaction_scale = 1.0,
+    diagonal_shape = 1.0, diagonal_rate = 1.0,
+    delta = 0.5, M_inner = 50L, kappa = 1.0, rho = 0.5,
+    n_sweeps = 50L, seed = 99L
+  )
+  out_a <- do.call(ggm_hierarchical_smoke_cpp, args)
+  out_b <- do.call(ggm_hierarchical_smoke_cpp, args)
+  expect_identical(out_a$final_edges,  out_b$final_edges)
+  expect_identical(out_a$n_edges_path, out_b$n_edges_path)
+  # Different seed -> different trajectory (with overwhelming probability).
+  args$seed <- 100L
+  out_c <- do.call(ggm_hierarchical_smoke_cpp, args)
+  expect_false(identical(out_a$n_edges_path, out_c$n_edges_path))
+})
+
+
+test_that("bgm() R API accepts graph_prior_spec = 'hierarchical' end-to-end", {
+  set.seed(99)
+  p <- 5L
+  n <- 100L
+  Y <- scale(matrix(rnorm(n * p), n, p), scale = FALSE)
+  colnames(Y) <- paste0("V", seq_len(p))
+
+  fit <- bgm(
+    Y, variable_type = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta = 0.5,
+    graph_prior_spec = "hierarchical",
+    z_ratio_tuning   = list(M_inner = 50L, kappa = 1.0, rho = 0.5),
+    iter = 200L, warmup = 50L,
+    update_method = "adaptive-metropolis",
+    chains = 1L, cores = 1L, seed = 1L,
+    display_progress = "none", verbose = FALSE
+  )
+  ind <- S7::prop(fit, "posterior_mean_indicator")
+  expect_true(is.matrix(ind))
+  expect_equal(dim(ind), c(p, p))
+  expect_true(all(ind >= 0 & ind <= 1))
+  expect_true(all(is.finite(ind)))
+})
+
+
+test_that("bgm() accepts update_method = 'nuts' + 'hierarchical' (2x2 API)", {
+  # F1. The hierarchical-spec hook lives in the between-edge MH update;
+  # NUTS only governs the within-model continuous block. The 2x2
+  # cross-product (NUTS/AMH x joint/hierarchical) is supposed to be a clean
+  # cross-product, so smoke-test the NUTS leg here. Mirrors the AMH smoke
+  # right above; differs only in update_method.
+  set.seed(99)
+  p <- 5L
+  n <- 100L
+  Y <- scale(matrix(rnorm(n * p), n, p), scale = FALSE)
+  colnames(Y) <- paste0("V", seq_len(p))
+
+  fit <- bgm(
+    Y, variable_type = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta = 0.5,
+    graph_prior_spec = "hierarchical",
+    z_ratio_tuning   = list(M_inner = 50L, kappa = 1.0, rho = 0.5),
+    iter = 200L, warmup = 50L,
+    update_method = "nuts",
+    chains = 1L, cores = 1L, seed = 1L,
+    display_progress = "none", verbose = FALSE
+  )
+  ind <- S7::prop(fit, "posterior_mean_indicator")
+  expect_true(is.matrix(ind))
+  expect_equal(dim(ind), c(p, p))
+  expect_true(all(ind >= 0 & ind <= 1))
+  expect_true(all(is.finite(ind)))
+  # Edge-count trajectory should be non-degenerate (chain isn't locked at
+  # the empty or full graph for the whole post-warmup window).
+  raw <- S7::prop(fit, "raw_samples")
+  ind_chn <- raw$indicator[[1L]]
+  n_edges_path <- rowSums(ind_chn)
+  expect_gt(length(unique(n_edges_path)), 1L)
+  max_edges <- p * (p - 1L) / 2L
+  expect_true(all(n_edges_path >= 0L & n_edges_path <= max_edges))
+})
+
+
+test_that("bgm() exposes v_ratio_diagnostics under hierarchical (F2)", {
+  # F2 smoke. Hierarchical fit must surface per-iteration sign(V_curr) and
+  # log|V_curr| as a top-level v_ratio_diagnostics field. Joint-spec fits
+  # must NOT have that field. Operational cell (q=5, δ=0.5, κ=1, ρ=0.5)
+  # is well inside the sign-positive regime, so we additionally assert
+  # that the vast majority of recorded signs are +1.
+  set.seed(101)
+  p <- 5L
+  n <- 100L
+  Y <- scale(matrix(rnorm(n * p), n, p), scale = FALSE)
+  fit_hier <- bgm(
+    Y, variable_type = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta = 0.5,
+    graph_prior_spec = "hierarchical",
+    z_ratio_tuning   = list(M_inner = 50L, kappa = 1.0, rho = 0.5),
+    iter = 200L, warmup = 50L,
+    update_method = "adaptive-metropolis",
+    chains = 1L, cores = 1L, seed = 1L,
+    display_progress = "none", verbose = FALSE
+  )
+  expect_true(!is.null(fit_hier$v_ratio_diagnostics))
+  expect_named(fit_hier$v_ratio_diagnostics, c("sign", "log_abs"))
+  expect_length(fit_hier$v_ratio_diagnostics$sign, 1L)
+  expect_length(fit_hier$v_ratio_diagnostics$log_abs, 1L)
+  s <- fit_hier$v_ratio_diagnostics$sign[[1L]]
+  la <- fit_hier$v_ratio_diagnostics$log_abs[[1L]]
+  expect_length(s, 200L)
+  expect_length(la, 200L)
+  expect_true(all(s %in% c(-1L, 1L)))
+  expect_true(all(is.finite(la)))
+  # Operational cell: sign should be +1 nearly always.
+  expect_gt(mean(s == 1L), 0.95)
+
+  # Joint-spec fit should not surface the diagnostic.
+  fit_joint <- bgm(
+    Y, variable_type = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta = 0.5,
+    graph_prior_spec = "joint",
+    iter = 100L, warmup = 50L,
+    update_method = "adaptive-metropolis",
+    chains = 1L, cores = 1L, seed = 1L,
+    display_progress = "none", verbose = FALSE
+  )
+  expect_null(fit_joint$v_ratio_diagnostics)
+})
+
+
+test_that("bgms_posterior_mean() applies sign-correction (F3/F4)", {
+  # F3 sign-corrected posterior mean. In the operational cell, sign === +1
+  # so the sign-corrected mean must coincide bit-by-bit with the plain
+  # posterior_mean_* fields. For a joint-spec fit the function falls
+  # through to the plain fields unchanged.
+  set.seed(102)
+  p <- 5L
+  n <- 100L
+  Y <- scale(matrix(rnorm(n * p), n, p), scale = FALSE)
+
+  # Hierarchical fit in the operational cell - all signs should be +1.
+  fit_hier <- bgm(
+    Y, variable_type = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta = 0.5,
+    graph_prior_spec = "hierarchical",
+    z_ratio_tuning   = list(M_inner = 50L, kappa = 1.0, rho = 0.5),
+    iter = 200L, warmup = 50L,
+    update_method = "adaptive-metropolis",
+    chains = 1L, cores = 1L, seed = 1L,
+    display_progress = "none", verbose = FALSE
+  )
+  stopifnot(all(fit_hier$v_ratio_diagnostics$sign[[1L]] == 1L))
+  pm_hier <- bgms_posterior_mean(fit_hier)
+  expect_named(pm_hier, c("pairwise", "main", "indicator", "residual_variance"))
+  # Bit-equal to the plain mean when sign === +1.
+  expect_equal(pm_hier$pairwise,          fit_hier$posterior_mean_pairwise,
+               tolerance = 1e-12)
+  expect_equal(pm_hier$indicator,         fit_hier$posterior_mean_indicator,
+               tolerance = 1e-12)
+  expect_equal(pm_hier$residual_variance, fit_hier$posterior_mean_residual_variance,
+               tolerance = 1e-12)
+  expect_null(pm_hier$main)
+
+  # Joint-spec fit: function falls through to the plain fields unchanged.
+  fit_joint <- bgm(
+    Y, variable_type = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta = 0.5,
+    graph_prior_spec = "joint",
+    iter = 200L, warmup = 50L,
+    update_method = "adaptive-metropolis",
+    chains = 1L, cores = 1L, seed = 1L,
+    display_progress = "none", verbose = FALSE
+  )
+  pm_joint <- bgms_posterior_mean(fit_joint)
+  expect_equal(pm_joint$pairwise,          fit_joint$posterior_mean_pairwise)
+  expect_equal(pm_joint$indicator,         fit_joint$posterior_mean_indicator)
+  expect_equal(pm_joint$residual_variance, fit_joint$posterior_mean_residual_variance)
+})
+
+
+test_that("bgms_posterior_mean() applies sign correction when signs flip", {
+  # Synthetic check: take a real hierarchical fit, flip half of the
+  # recorded signs by hand, and confirm bgms_posterior_mean() returns a
+  # different (sign-weighted) mean. This validates the weighting math
+  # without needing to engineer a tuning that actually produces -1 signs.
+  set.seed(103)
+  p <- 4L
+  n <- 80L
+  Y <- scale(matrix(rnorm(n * p), n, p), scale = FALSE)
+  fit <- bgm(
+    Y, variable_type = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta = 0.5,
+    graph_prior_spec = "hierarchical",
+    z_ratio_tuning   = list(M_inner = 50L, kappa = 1.0, rho = 0.5),
+    iter = 200L, warmup = 50L,
+    update_method = "adaptive-metropolis",
+    chains = 1L, cores = 1L, seed = 7L,
+    display_progress = "none", verbose = FALSE
+  )
+  # Force every other iteration to sign = -1. bgms_posterior_mean reads
+  # `fit` via `$`, which routes through S7::prop for the real fit but
+  # works on plain lists too - construct a list view here so we can
+  # mutate the sign vector without fighting S7 immutability.
+  n_iter <- length(fit$v_ratio_diagnostics$sign[[1L]])
+  # 2/3 +1, 1/3 -1 - non-trivial weighting that avoids sum-zero degeneracy.
+  fake_signs <- rep_len(c(1L, 1L, -1L), n_iter)
+  fit_mut <- list(
+    v_ratio_diagnostics              = list(sign    = list(fake_signs),
+                                            log_abs = fit$v_ratio_diagnostics$log_abs),
+    raw_samples                      = fit$raw_samples,
+    posterior_mean_main              = fit$posterior_mean_main,
+    posterior_mean_pairwise          = fit$posterior_mean_pairwise,
+    posterior_mean_indicator         = fit$posterior_mean_indicator,
+    posterior_mean_residual_variance = fit$posterior_mean_residual_variance
+  )
+
+  pm_plain <- bgms_posterior_mean(fit)
+  pm_signed <- bgms_posterior_mean(fit_mut)
+
+  # Hand-compute expected sign-weighted pairwise (precision scale, then
+  # mapped to association scale).
+  pairwise_mat <- do.call(rbind, fit_mut$raw_samples$pairwise)
+  expected_pair_precision <- colSums(pairwise_mat * fake_signs) / sum(fake_signs)
+  expected_pair_assoc <- matrix(0, p, p,
+                                dimnames = dimnames(fit$posterior_mean_pairwise))
+  expected_pair_assoc[lower.tri(expected_pair_assoc)] <- expected_pair_precision
+  expected_pair_assoc <- expected_pair_assoc + t(expected_pair_assoc)
+  expected_pair_assoc <- -0.5 * expected_pair_assoc
+
+  expect_equal(pm_signed$pairwise, expected_pair_assoc, tolerance = 1e-12)
+  # And the signed result must differ from the plain mean (signs were
+  # half-flipped, so it almost surely differs by more than numerical noise).
+  expect_gt(max(abs(pm_signed$pairwise - pm_plain$pairwise)), 1e-6)
+})
+
+
+test_that("bgm() with hierarchical errors helpfully for Cauchy slab", {
+  set.seed(11)
+  Y <- scale(matrix(rnorm(50 * 4L), 50, 4L), scale = FALSE)
+  expect_error(
+    bgm(Y, variable_type = "continuous",
+        interaction_prior     = cauchy_prior(scale = 1),
+        graph_prior_spec      = "hierarchical",
+        iter = 50L, warmup = 25L,
+        update_method = "adaptive-metropolis",
+        chains = 1L, cores = 1L, seed = 1L,
+        display_progress = "none", verbose = FALSE),
+    regexp = "Normal slab"
+  )
+})
+
+
+test_that("bgm() rejects hierarchical for non-continuous models", {
+  set.seed(13)
+  # Pure ordinal data should land in 'omrf' which has no precision matrix.
+  X <- matrix(sample(0:3, 200 * 4L, replace = TRUE), 200, 4L)
+  expect_error(
+    bgm(X, variable_type = "ordinal",
+        graph_prior_spec = "hierarchical",
+        iter = 50L, warmup = 25L,
+        update_method = "adaptive-metropolis",
+        chains = 1L, cores = 1L, seed = 1L,
+        display_progress = "none", verbose = FALSE),
+    regexp = "continuous"
+  )
+})
+
+
+test_that("z_ratio_tuning validation rejects out-of-range values", {
+  set.seed(11)
+  Y <- scale(matrix(rnorm(50 * 4L), 50, 4L), scale = FALSE)
+  for (bad in list(
+    list(M_inner = 0L, kappa = 1, rho = 0.5),
+    list(M_inner = 100, kappa = -1, rho = 0.5),
+    list(M_inner = 100, kappa = 1, rho = 0),
+    list(M_inner = 100, kappa = 1, rho = 1)
+  )) {
+    expect_error(
+      bgm(Y, variable_type = "continuous",
+          interaction_prior     = normal_prior(scale = 1),
+          precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+          graph_prior_spec = "hierarchical",
+          z_ratio_tuning   = bad,
+          iter = 50L, warmup = 25L,
+          update_method = "adaptive-metropolis",
+          chains = 1L, cores = 1L, seed = 1L,
+          display_progress = "none", verbose = FALSE),
+      regexp = "z_ratio_tuning"
+    )
+  }
+})
+
+
+test_that("hierarchical-spec stays finite at p = 20 where linear c underflows", {
+  # F5 regression. At p = 20 with δ = 1, log_Z_NLO is on the order of -500
+  # nats, so c = κ · exp(log_Z_NLO) flushes to 0 in double precision. The
+  # pre-F5 linear V_at_Gamma_pi_degord then evaluates to NaN / Inf, the MH
+  # ratio is auto-rejected, and the chain locks at its initial state. The
+  # log-space V must keep ln_alpha finite and let the chain move.
+  skip_if_not_installed("MASS")
+  set.seed(2026)
+  p <- 20L
+  n <- 200L
+  Y <- scale(matrix(rnorm(n * p), n, p), scale = FALSE)
+  fit <- bgm(
+    Y, variable_type = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta = 1.0,
+    graph_prior_spec = "hierarchical",
+    z_ratio_tuning   = list(M_inner = 50L, kappa = 1.0, rho = 0.5),
+    iter = 100L, warmup = 50L,
+    update_method = "adaptive-metropolis",
+    chains = 1L, cores = 1L, seed = 2026L,
+    display_progress = "none", verbose = FALSE
+  )
+  ind <- S7::prop(fit, "posterior_mean_indicator")
+  expect_true(is.matrix(ind))
+  expect_equal(dim(ind), c(p, p))
+  expect_true(all(is.finite(ind)))
+  expect_true(all(ind >= 0 & ind <= 1))
+  # The chain should NOT be stuck at its initial state (all-zero edges).
+  # If F5 regressed and every proposal auto-rejected, every off-diagonal
+  # ind entry would be exactly 0.
+  off <- ind[upper.tri(ind)]
+  expect_gt(sum(off > 0), 0L)
+})
+
+
+test_that("hierarchical-spec scales with delta sensibly", {
+  # As δ increases, the |K|^δ tilt should push K further into the
+  # interior of M+(Γ), making large connected graphs feasible. We don't
+  # validate the SBC target here — only that the chain doesn't degenerate
+  # to all-zero or all-one as δ varies. (Full SBC validation is Phase 5.)
+  set.seed(91)
+  p <- 4L
+  n <- 80L
+  Y <- scale(matrix(rnorm(n * p), n, p), scale = FALSE)
+  for (delta in c(0.0, 0.5, 1.0)) {
+    out <- ggm_hierarchical_smoke_cpp(
+      Y, 0.5, 1.0, 1.0, 1.0, delta, 50L, 1.0, 0.5, 100L, 17L
+    )
+    steady <- out$n_edges_path[51:100]
+    expect_gt(length(unique(steady)), 1L,
+              label = sprintf("steady is constant at delta=%g", delta))
+  }
+})
diff --git a/tests/testthat/test-log-z-nlo.R b/tests/testthat/test-log-z-nlo.R
new file mode 100644
index 00000000..0918f93a
--- /dev/null
+++ b/tests/testthat/test-log-z-nlo.R
@@ -0,0 +1,236 @@
+# --------------------------------------------------------------------------- #
+# Closed-form log Z(G) approximations: parity vs the z-project reference,
+# and incremental-vs-full agreement at alpha = 1.
+#
+# These tests exercise the Stage 3 Phase 1a numerics primitives:
+#   log_Z_NLO_gamma_cpp                       (general-alpha full-recompute)
+#   log_Z_NLO_gamma_degord_cpp                (degord wrapper)
+#   log_Z_NLO_gamma_delta_incr_alpha1_cpp     (alpha = 1 incremental form)
+#
+# Ground truth is a fixture (tests/testthat/fixtures/log_z_nlo_reference.rds)
+# pre-generated from log_Z_laplace_NLO_gamma_cpp at branchB_chain.cpp:470-620
+# in the z project (see dev/numerical_analyses/generate_log_z_fixture.R).
+# --------------------------------------------------------------------------- #
+
+
+# ---- Helpers -----------------------------------------------------------------
+
+draw_random_graph <- function(q, seed, p_edge = 0.5) {
+  set.seed(seed)
+  G <- matrix(0L, q, q)
+  if (q < 2) return(G)
+  for (i in 1:(q - 1)) for (j in (i + 1):q) {
+    if (runif(1) < p_edge) {
+      G[i, j] <- 1L
+      G[j, i] <- 1L
+    }
+  }
+  G
+}
+
+
+# ---- Parity against the z reference -----------------------------------------
+
+test_that("log_Z_NLO_gamma matches the z reference bit-exact on the fixture", {
+  fixture_path <- testthat::test_path("fixtures", "log_z_nlo_reference.rds")
+  cases <- readRDS(fixture_path)
+
+  for (case in cases) {
+    for (alpha in c(1.0, 2.0)) {
+      for (delta in c(0.0, 0.5, 1.0)) {
+        for (include_F in c(FALSE, TRUE)) {
+          key <- sprintf("a%g_d%g_F%s", alpha, delta, include_F)
+          ours <- log_Z_NLO_gamma_cpp(
+            case$G, alpha, 1.0, 1.0, include_F, delta
+          )
+          ref <- case$values[[key]]
+          expect_equal(
+            ours, ref,
+            tolerance = 1e-12,
+            info = sprintf("q=%d rep=%d %s", case$q, case$rep, key)
+          )
+        }
+      }
+    }
+  }
+})
+
+
+# ---- Empty-graph closed form ------------------------------------------------
+
+test_that("log_Z_NLO_gamma at empty graph equals the analytic constant", {
+  # Empty G: log Z = sum_v lgamma(alpha + delta) - q lgamma(alpha) - q delta log(beta).
+  # The (E_count = 0) branch in the implementation returns this directly.
+  for (q in c(2L, 5L, 10L)) {
+    for (alpha in c(0.5, 1.0, 2.5)) {
+      for (delta in c(0.0, 0.5, 1.0)) {
+        for (beta in c(0.5, 1.0, 2.0)) {
+          G <- matrix(0L, q, q)
+          expected <- q * lgamma(alpha + delta) - q * lgamma(alpha) -
+                      q * delta * log(beta)
+          got <- log_Z_NLO_gamma_cpp(G, alpha, beta, 1.0, FALSE, delta)
+          expect_equal(got, expected, tolerance = 1e-12,
+                       info = sprintf("q=%d alpha=%g beta=%g delta=%g",
+                                      q, alpha, beta, delta))
+        }
+      }
+    }
+  }
+})
+
+
+# ---- DEGORD wrapper: permutation-consistent --------------------------------
+
+test_that("log_Z_NLO_gamma_degord equals full-recompute on hand-permuted graph", {
+  for (q in c(5L, 7L)) {
+    G <- draw_random_graph(q, seed = 41 + q)
+    for (i_zero in c(0L, 2L)) {
+      for (j_zero in c(1L, 3L, q - 1L)) {
+        if (i_zero >= j_zero) next
+        perm <- c(i_zero, j_zero, setdiff(0:(q - 1), c(i_zero, j_zero)))
+        G_perm <- G[perm + 1L, perm + 1L]
+        for (alpha in c(1.0, 2.0)) {
+          for (delta in c(0.0, 1.0)) {
+            via_degord <- log_Z_NLO_gamma_degord_cpp(
+              G, i_zero, j_zero, alpha, 1.0, 1.0, FALSE, delta
+            )
+            via_full <- log_Z_NLO_gamma_cpp(
+              G_perm, alpha, 1.0, 1.0, FALSE, delta
+            )
+            expect_equal(
+              via_degord, via_full,
+              tolerance = 1e-12,
+              info = sprintf("q=%d (i,j)=(%d,%d) alpha=%g delta=%g",
+                             q, i_zero, j_zero, alpha, delta)
+            )
+          }
+        }
+      }
+    }
+  }
+})
+
+
+# ---- alpha = 1 incremental agrees with full-recompute difference -----------
+
+test_that("alpha = 1 incremental matches full-recompute log-Z difference", {
+  for (q in c(3L, 5L, 7L)) {
+    G <- draw_random_graph(q, seed = 71 + q)
+    for (delta in c(0.0, 0.5, 1.0)) {
+      for (include_F in c(FALSE, TRUE)) {
+        for (i_zero in 0:(q - 2)) {
+          for (j_zero in (i_zero + 1):(q - 1)) {
+            G_after <- G
+            G_after[i_zero + 1, j_zero + 1] <- 1L - G[i_zero + 1, j_zero + 1]
+            G_after[j_zero + 1, i_zero + 1] <- G_after[i_zero + 1, j_zero + 1]
+            full_diff <- log_Z_NLO_gamma_cpp(
+              G_after, 1.0, 1.0, 1.0, include_F, delta
+            ) - log_Z_NLO_gamma_cpp(
+              G, 1.0, 1.0, 1.0, include_F, delta
+            )
+            inc <- log_Z_NLO_gamma_delta_incr_alpha1_cpp(
+              G, i_zero, j_zero, 1.0, 1.0, delta, include_F
+            )
+            expect_equal(
+              inc, full_diff,
+              tolerance = 1e-10,
+              info = sprintf("q=%d (i,j)=(%d,%d) delta=%g F=%s",
+                             q, i_zero, j_zero, delta, include_F)
+            )
+          }
+        }
+      }
+    }
+  }
+})
+
+
+# ---- alpha > 1 incremental agrees with full-recompute difference -----------
+
+test_that("general-alpha incremental matches full-recompute log-Z difference", {
+  # Phase 1b: the alpha > 1 cascade adds H_e dependence on the larger endpoint
+  # via the -4 beta (alpha - 1) / M_v[v] piece, so a toggle (i, j) cascades to
+  # every edge incident to i (forward AND backward). V_a = {i, j} ∪ N(i)
+  # captures the full affected set.
+  for (q in c(3L, 5L, 7L)) {
+    G <- draw_random_graph(q, seed = 200 + q)
+    for (alpha in c(1.5, 2.0, 3.0)) {
+      for (delta in c(0.0, 0.5, 1.0)) {
+        for (include_F in c(FALSE, TRUE)) {
+          for (i_zero in 0:(q - 2)) {
+            for (j_zero in (i_zero + 1):(q - 1)) {
+              G_after <- G
+              G_after[i_zero + 1, j_zero + 1] <- 1L - G[i_zero + 1, j_zero + 1]
+              G_after[j_zero + 1, i_zero + 1] <- G_after[i_zero + 1, j_zero + 1]
+              full_diff <- log_Z_NLO_gamma_cpp(
+                G_after, alpha, 1.0, 1.0, include_F, delta
+              ) - log_Z_NLO_gamma_cpp(
+                G, alpha, 1.0, 1.0, include_F, delta
+              )
+              inc <- log_Z_NLO_gamma_delta_incr_alphaN_cpp(
+                G, i_zero, j_zero, alpha, 1.0, 1.0, delta, include_F
+              )
+              expect_equal(
+                inc, full_diff,
+                tolerance = 1e-10,
+                info = sprintf("q=%d (i,j)=(%d,%d) alpha=%g delta=%g F=%s",
+                               q, i_zero, j_zero, alpha, delta, include_F)
+              )
+            }
+          }
+        }
+      }
+    }
+  }
+})
+
+
+# ---- alpha = 1 reduction: general-alpha == alpha-1 specialisation ----------
+
+test_that("general-alpha incremental reduces to alpha=1 form at alpha = 1", {
+  for (q in c(3L, 5L, 7L)) {
+    G <- draw_random_graph(q, seed = 311 + q)
+    for (delta in c(0.0, 0.5, 1.0)) {
+      for (include_F in c(FALSE, TRUE)) {
+        for (i_zero in 0:(q - 2)) {
+          for (j_zero in (i_zero + 1):(q - 1)) {
+            via_alpha1 <- log_Z_NLO_gamma_delta_incr_alpha1_cpp(
+              G, i_zero, j_zero, 1.0, 1.0, delta, include_F
+            )
+            via_alphaN <- log_Z_NLO_gamma_delta_incr_alphaN_cpp(
+              G, i_zero, j_zero, 1.0, 1.0, 1.0, delta, include_F
+            )
+            expect_equal(
+              via_alphaN, via_alpha1,
+              tolerance = 1e-10,
+              info = sprintf("q=%d (i,j)=(%d,%d) delta=%g F=%s",
+                             q, i_zero, j_zero, delta, include_F)
+            )
+          }
+        }
+      }
+    }
+  }
+})
+
+
+# ---- delta finite-difference vs analytic delta-derivative (alpha = 1) -------
+
+test_that("d(log Z) / d(delta) matches finite differences at alpha = 1", {
+  # Sanity: at small steps in delta, the log Z value should be smooth in delta.
+  # We check second-order central FD vs successive evaluations agree to O(h^2).
+  for (q in c(3L, 4L)) {
+    G <- draw_random_graph(q, seed = 91 + q)
+    delta0 <- 0.7
+    h <- 1e-4
+    for (include_F in c(FALSE, TRUE)) {
+      z_minus <- log_Z_NLO_gamma_cpp(G, 1.0, 1.0, 1.0, include_F, delta0 - h)
+      z_plus  <- log_Z_NLO_gamma_cpp(G, 1.0, 1.0, 1.0, include_F, delta0 + h)
+      # Symmetric FD: (z_plus - z_minus) / (2 h). We just check the value is
+      # finite and the function evaluates without NaN/Inf across a delta sweep.
+      d_fd <- (z_plus - z_minus) / (2 * h)
+      expect_true(is.finite(d_fd),
+                  info = sprintf("q=%d F=%s d_fd not finite", q, include_F))
+    }
+  }
+})
diff --git a/tests/testthat/test-sbc-ggm-hierarchical-q10.R b/tests/testthat/test-sbc-ggm-hierarchical-q10.R
new file mode 100644
index 00000000..45846242
--- /dev/null
+++ b/tests/testthat/test-sbc-ggm-hierarchical-q10.R
@@ -0,0 +1,196 @@
+# --------------------------------------------------------------------------- #
+# SBC for the hierarchical-spec GGM with edge selection at q = 10.
+#
+# Sister of test-sbc-ggm-hierarchical.R (which fixes q = 5). Truth is
+# the same NUTS-at-fixed-Gamma generator from sample_ggm_prior() with a
+# longer warmup; we trust it at q = 10 under the operational alpha = 1
+# Gamma diagonal prior. AKM-J is NOT used here (its perfect-sampler
+# cap-saturation at q = 10, delta = 2 would corrupt the truth pool).
+#
+# Truth generation:
+#   1. Draw Gamma_true ~ Bern(p_inc) on the upper triangle.
+#   2. Draw one well-mixed K_true via sample_ggm_prior() at fixed
+#      Gamma_true (constrained NUTS, n_warmup_truth iterations).
+#   3. Generate Y | K_true ~ N(0, K_true^{-1}), n_obs cases.
+#
+# Inference:
+#   4. Fit bgm() with graph_prior_spec = "hierarchical" on Y.
+#
+# Tested cells:
+#   delta ∈ {0, 1, 2}, alpha = 1, R = 500 reps each.
+#
+# Uniformity tested by KS at alpha = 0.01 per K_ii per delta. Edge-
+# indicator marginal calibration (P(gamma_ij = 1 | Y) tracks observed
+# Gamma_true frequency) is a sanity check.
+#
+# Gated behind BGMS_RUN_SLOW_TESTS. Expected runtime is multi-hour on
+# M5 Pro; this is the validation sweep meant to be run overnight.
+# --------------------------------------------------------------------------- #
+
+
+# ---- Skip gate -------------------------------------------------------------
+
+skip_if_not(
+  identical(tolower(Sys.getenv("BGMS_RUN_SLOW_TESTS")), "true"),
+  "Set BGMS_RUN_SLOW_TESTS=true to run hierarchical-spec SBC tests."
+)
+
+
+# ---- Test parameters -------------------------------------------------------
+
+p          <- 10L
+n_obs      <- 200L
+R          <- 500L
+delta_grid <- c(0.0, 1.0, 2.0)
+p_inc      <- 0.5
+# Z-matched chain config (see ~/Dropbox/Projecten/SV/Z/R/scripts/
+# sbc_degord_hier_paired_sweep.R). Z's q=10 SBC passes at delta ∈ {1, 2}
+# only with these heavier chains; bgms's earlier 100 + 400 was 20× too
+# short. delta = 0 is OUT of the operational regime for the V/RR
+# estimator and is expected to fail (kept in the sweep as documentation).
+iter_post  <- 2000L
+warmup_post <- 2000L
+n_warmup_truth <- 2000L
+
+# Across-rep parallelism. Each rep is single-threaded (bgm(cores = 1L)); the
+# fan-out happens here via parallel::mclapply. Capped at 12 to match the user-
+# specified core budget; on machines with fewer cores the cap drops to
+# detectCores().
+N_CORES <- min(parallel::detectCores(), 12L)
+
+
+# ---- One-rep helper --------------------------------------------------------
+
+one_rep <- function(r, delta) {
+  # Truth: Gamma_true ~ Bern(p_inc) on upper triangle, K_true | Gamma_true.
+  set.seed(10000L + r * 100L + as.integer(delta * 100))
+  G_true <- matrix(0L, p, p)
+  for (i in 1:(p - 1)) for (j in (i + 1):p) {
+    if (runif(1) < p_inc) {
+      G_true[i, j] <- 1L
+      G_true[j, i] <- 1L
+    }
+  }
+  G_full <- G_true
+  diag(G_full) <- 1L
+
+  truth <- sample_ggm_prior(
+    p             = p,
+    n_samples     = 1L,
+    n_warmup      = n_warmup_truth,
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta            = delta,
+    edge_indicators  = G_full,
+    seed             = 20000L + r,
+    verbose          = FALSE
+  )
+  K_diag_true    <- as.numeric(truth$K_diag[1L, ])
+  K_offdiag_true <- as.numeric(truth$K_offdiag[1L, ])
+
+  # Reconstruct K_true (p x p). offdiag_names is row-major; parse the
+  # names rather than trust the natural ordering.
+  K_true <- diag(K_diag_true)
+  for (k in seq_along(truth$offdiag_names)) {
+    ij <- strsplit(truth$offdiag_names[k], "_")[[1L]]
+    i <- as.integer(ij[2L])
+    j <- as.integer(ij[3L])
+    K_true[i, j] <- truth$K_offdiag[1L, k]
+    K_true[j, i] <- K_true[i, j]
+  }
+
+  # Data: Y ~ N(0, K^{-1}).
+  Sigma_true <- solve(K_true)
+  Y <- MASS::mvrnorm(n_obs, mu = rep(0, p), Sigma = Sigma_true)
+
+  # Inference under the hierarchical-spec chain.
+  fit <- bgm(
+    Y,
+    variable_type         = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta            = delta,
+    graph_prior_spec = "hierarchical",
+    # Z-matched: M_inner = 50 (half), kappa = 2 (doubled for bigger V-series
+    # convergence margin at q = 10).
+    z_ratio_tuning   = list(M_inner = 50L, kappa = 2.0, rho = 0.5),
+    iter             = iter_post,
+    warmup           = warmup_post,
+    update_method    = "adaptive-metropolis",
+    chains           = 1L, cores = 1L,
+    seed             = 30000L + r,
+    display_progress = "none",
+    verbose          = FALSE
+  )
+  raw      <- S7::prop(fit, "raw_samples")
+  main_chn <- raw$main[[1L]]            # iter x p, bgms convention: K_yy_ii = K_ii / 2
+  ind_chn  <- raw$indicator[[1L]]       # iter x p(p-1)/2, 0/1
+  # Rank K_ii on the K_yy partial-association scale (both truth and bgm).
+  K_ii_rank <- vapply(seq_len(p), function(i) {
+    sum(main_chn[, i] < K_diag_true[i])
+  }, integer(1L))
+
+  # Gamma_true upper triangle, edge-order matched to bgm indicator slots.
+  gamma_true_vec <- G_true[upper.tri(G_true)]
+  gamma_post_mean <- colMeans(ind_chn)
+
+  list(
+    K_ii_rank       = K_ii_rank,
+    gamma_true_vec  = gamma_true_vec,
+    gamma_post_mean = gamma_post_mean,
+    n_iter_post     = nrow(main_chn)
+  )
+}
+
+
+# ---- Run + uniformity tests across the delta sweep -------------------------
+
+for (delta in delta_grid) {
+  results <- parallel::mclapply(
+    seq_len(R),
+    function(r) one_rep(r, delta),
+    mc.cores = N_CORES,
+    mc.preschedule = TRUE
+  )
+  # Surface any worker errors before the assertion stage so the failure
+  # message references the bad rep rather than a downstream stack-rbind.
+  err_idx <- which(vapply(results, inherits, logical(1L), what = "try-error"))
+  if (length(err_idx) > 0L) {
+    stop(sprintf(
+      "SBC q=10: %d/%d reps errored at delta=%g; first failure at r=%d:\n%s",
+      length(err_idx), R, delta, err_idx[1L], conditionMessage(attr(results[[err_idx[1L]]], "condition"))
+    ))
+  }
+
+  K_ii_ranks <- do.call(rbind, lapply(results, `[[`, "K_ii_rank"))
+  gamma_true_mat <- do.call(rbind, lapply(results, `[[`, "gamma_true_vec"))
+  gamma_post_mat <- do.call(rbind, lapply(results, `[[`, "gamma_post_mean"))
+  n_iter <- results[[1L]]$n_iter_post
+
+  # Per-K_ii KS test of normalised ranks ~ Uniform(0, 1).
+  ks_p <- vapply(seq_len(p), function(i) {
+    u <- (K_ii_ranks[, i] + 0.5) / (n_iter + 1L)
+    suppressWarnings(stats::ks.test(u, "punif")$p.value)
+  }, double(1L))
+
+  for (i in seq_len(p)) {
+    test_that(sprintf("SBC q=10: K_ii[%d] ranks uniform under hierarchical (delta=%g)",
+                      i, delta), {
+      expect_gt(ks_p[i], 0.01,
+                label = sprintf("ks_p=%.3g (delta=%g, K_ii[%d])",
+                                ks_p[i], delta, i))
+    })
+  }
+
+  # Gamma marginal calibration: posterior P(gamma_ij = 1 | Y) averaged across
+  # reps should track the prior inclusion probability p_inc.
+  test_that(sprintf("SBC q=10: hierarchical-spec gamma marginal tracks p_inc (delta=%g)",
+                    delta), {
+    mean_post_gamma <- mean(gamma_post_mat)
+    mean_true_gamma <- mean(gamma_true_mat)
+    se_gap <- sqrt(p_inc * (1 - p_inc) / (R * ncol(gamma_post_mat)))
+    expect_lt(abs(mean_post_gamma - mean_true_gamma), 4 * se_gap,
+              label = sprintf("|post-mean(gamma) - true-mean(gamma)|=%.3g, SE=%.3g",
+                              abs(mean_post_gamma - mean_true_gamma), se_gap))
+  })
+}
diff --git a/tests/testthat/test-sbc-ggm-hierarchical.R b/tests/testthat/test-sbc-ggm-hierarchical.R
new file mode 100644
index 00000000..b594296a
--- /dev/null
+++ b/tests/testthat/test-sbc-ggm-hierarchical.R
@@ -0,0 +1,182 @@
+# --------------------------------------------------------------------------- #
+# SBC for the hierarchical-spec GGM with edge selection (Phase 5).
+#
+# Simulation-based calibration of the bgms hierarchical-spec chain at
+# p = 5 across the dimension-adaptive delta sweep. Restricted to q = 5
+# rather than q = 10 because at q = 10 the AKM-J perfect-sampler truth
+# pool hits its max_proposals cap on ~23% of delta = 2 draws (selection
+# bias on hard high-tilt cases), which would corrupt the truth itself.
+# At q = 5 we trust the NUTS-at-fixed-Gamma truth from sample_ggm_prior()
+# completely.
+#
+# Truth generation:
+#   1. Draw Gamma_true ~ Bern(p_inc) on the upper triangle.
+#   2. Draw one well-mixed K_true via sample_ggm_prior() at fixed
+#      Gamma_true (constrained NUTS, n_warmup = 500).
+#   3. Generate Y | K_true ~ N(0, K_true^{-1}), n_obs cases.
+#
+# Inference:
+#   4. Fit bgm() with graph_prior_spec = "hierarchical" on Y.
+#
+# Per-replicate ranks:
+#   - K_ii ranks of K_diag_true[i] in raw_samples$main[[1]][, i].
+#     Both sample_ggm_prior$K_diag and bgm raw_samples$main are reported
+#     on the bgms partial-association scale (the same K_yy convention);
+#     no scale conversion needed. Verified empirically by matching
+#     marginal means at low delta on a near-empty graph.
+#
+# Uniformity tested by KS at alpha = 0.01 per parameter per delta.
+# Global chi-squared as a fallback. Edge-indicator marginal calibration
+# (P(gamma_ij = 1 | Y) tracks observed Gamma_true frequency) is a sanity
+# check rather than strict SBC.
+#
+# Gated behind BGMS_RUN_SLOW_TESTS — expected runtime ~ 1.5–2 h on M5 Pro
+# for R = 300 reps across 4 delta values.
+# --------------------------------------------------------------------------- #
+
+
+# ---- Skip gate -------------------------------------------------------------
+
+skip_if_not(
+  identical(tolower(Sys.getenv("BGMS_RUN_SLOW_TESTS")), "true"),
+  "Set BGMS_RUN_SLOW_TESTS=true to run hierarchical-spec SBC tests."
+)
+
+
+# ---- Test parameters -------------------------------------------------------
+
+p          <- 5L
+n_obs      <- 100L
+R          <- 300L
+delta_grid <- c(0.0, 0.5, 1.0, 2.0)
+p_inc      <- 0.5
+iter_post  <- 400L
+warmup_post <- 100L
+n_warmup_truth <- 500L
+
+
+# ---- One-rep helper --------------------------------------------------------
+
+one_rep <- function(r, delta) {
+  # Truth: Gamma_true ~ Bern(p_inc) on upper triangle, K_true | Gamma_true.
+  set.seed(10000L + r * 100L + as.integer(delta * 100))
+  G_true <- matrix(0L, p, p)
+  for (i in 1:(p - 1)) for (j in (i + 1):p) {
+    if (runif(1) < p_inc) {
+      G_true[i, j] <- 1L
+      G_true[j, i] <- 1L
+    }
+  }
+  G_full <- G_true
+  diag(G_full) <- 1L
+
+  truth <- sample_ggm_prior(
+    p             = p,
+    n_samples     = 1L,
+    n_warmup      = n_warmup_truth,
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta            = delta,
+    edge_indicators  = G_full,
+    seed             = 20000L + r,
+    verbose          = FALSE
+  )
+  K_diag_true    <- as.numeric(truth$K_diag[1L, ])
+  K_offdiag_true <- as.numeric(truth$K_offdiag[1L, ])
+
+  # Reconstruct K_true (p x p). offdiag_names is row-major ("K_1_2", "K_1_3",
+  # "K_1_4", ..., "K_2_3", ...) while upper.tri() returns column-major
+  # indices, so we parse the names rather than trust the natural ordering.
+  K_true <- diag(K_diag_true)
+  for (k in seq_along(truth$offdiag_names)) {
+    ij <- strsplit(truth$offdiag_names[k], "_")[[1L]]
+    i <- as.integer(ij[2L])
+    j <- as.integer(ij[3L])
+    K_true[i, j] <- truth$K_offdiag[1L, k]
+    K_true[j, i] <- K_true[i, j]
+  }
+
+  # Data: Y ~ N(0, K^{-1}).
+  Sigma_true <- solve(K_true)
+  Y <- MASS::mvrnorm(n_obs, mu = rep(0, p), Sigma = Sigma_true)
+
+  # Inference under the hierarchical-spec chain.
+  fit <- bgm(
+    Y,
+    variable_type         = "continuous",
+    interaction_prior     = normal_prior(scale = 1),
+    precision_scale_prior = gamma_prior(shape = 1, rate = 1),
+    delta            = delta,
+    graph_prior_spec = "hierarchical",
+    z_ratio_tuning   = list(M_inner = 100L, kappa = 1.0, rho = 0.5),
+    iter             = iter_post,
+    warmup           = warmup_post,
+    update_method    = "adaptive-metropolis",
+    chains           = 1L, cores = 1L,
+    seed             = 30000L + r,
+    display_progress = "none",
+    verbose          = FALSE
+  )
+  raw      <- S7::prop(fit, "raw_samples")
+  main_chn <- raw$main[[1L]]            # iter x p, bgms convention: K_yy_ii = K_ii / 2
+  ind_chn  <- raw$indicator[[1L]]       # iter x p(p-1)/2, 0/1
+  # Rank K_ii on the K_yy partial-association scale (both truth and bgm).
+  K_ii_rank <- vapply(seq_len(p), function(i) {
+    sum(main_chn[, i] < K_diag_true[i])
+  }, integer(1L))
+
+  # Gamma_true upper triangle, edge-order matched to bgm indicator slots.
+  gamma_true_vec <- G_true[upper.tri(G_true)]
+  gamma_post_mean <- colMeans(ind_chn)
+
+  list(
+    K_ii_rank       = K_ii_rank,
+    gamma_true_vec  = gamma_true_vec,
+    gamma_post_mean = gamma_post_mean,
+    n_iter_post     = nrow(main_chn)
+  )
+}
+
+
+# ---- Run + uniformity tests across the delta sweep -------------------------
+
+for (delta in delta_grid) {
+  results <- vector("list", R)
+  for (r in seq_len(R)) results[[r]] <- one_rep(r, delta)
+
+  # Stack ranks: R x p.
+  K_ii_ranks <- do.call(rbind, lapply(results, `[[`, "K_ii_rank"))
+  gamma_true_mat <- do.call(rbind, lapply(results, `[[`, "gamma_true_vec"))
+  gamma_post_mat <- do.call(rbind, lapply(results, `[[`, "gamma_post_mean"))
+  n_iter <- results[[1L]]$n_iter_post
+
+  # Per-K_ii KS test of normalised ranks ~ Uniform(0, 1).
+  ks_p <- vapply(seq_len(p), function(i) {
+    u <- (K_ii_rank_to_u <- (K_ii_ranks[, i] + 0.5) / (n_iter + 1L))
+    suppressWarnings(stats::ks.test(u, "punif")$p.value)
+  }, double(1L))
+
+  for (i in seq_len(p)) {
+    test_that(sprintf("SBC: K_ii[%d] ranks uniform under hierarchical (delta=%g)",
+                      i, delta), {
+      expect_gt(ks_p[i], 0.01,
+                label = sprintf("ks_p=%.3g (delta=%g, K_ii[%d])",
+                                ks_p[i], delta, i))
+    })
+  }
+
+  # Gamma marginal calibration: posterior P(gamma_ij = 1 | Y) averaged across
+  # reps should track the prior inclusion probability p_inc (which equals the
+  # marginal in the hierarchical-spec target since pi(Gamma) is Bernoulli).
+  test_that(sprintf("hierarchical-spec gamma marginal tracks p_inc (delta=%g)",
+                    delta), {
+    mean_post_gamma <- mean(gamma_post_mat)
+    mean_true_gamma <- mean(gamma_true_mat)
+    # 3-SE tolerance on the Monte Carlo gap (both means estimate the same
+    # population value under correct calibration).
+    se_gap <- sqrt(p_inc * (1 - p_inc) / (R * ncol(gamma_post_mat)))
+    expect_lt(abs(mean_post_gamma - mean_true_gamma), 4 * se_gap,
+              label = sprintf("|post-mean(gamma) - true-mean(gamma)|=%.3g, SE=%.3g",
+                              abs(mean_post_gamma - mean_true_gamma), se_gap))
+  })
+}
diff --git a/tests/testthat/test-z-ratio-estimator.R b/tests/testthat/test-z-ratio-estimator.R
new file mode 100644
index 00000000..51266809
--- /dev/null
+++ b/tests/testthat/test-z-ratio-estimator.R
@@ -0,0 +1,397 @@
+# --------------------------------------------------------------------------- #
+# Phase 3: Russian-Roulette V(Γ, U) estimator of 1/Z(Γ).
+#
+# Exercises:
+#   degord_V_at_Gamma_pi_cpp   - signed V at fixed (G_pi, c, rho)
+#   degord_draw_U_rr_cpp       - fresh (K_depth, pools_t) draw
+#
+# Acceptance per the Phase 3 plan section:
+#   - Mean V across many independent (K_depth, pools) draws sits within MC
+#     CI of 1/Z(Γ).
+#   - K_depth has the Geom(1 - rho) distribution.
+#   - Outputs are deterministic in the SafeRNG seed.
+# --------------------------------------------------------------------------- #
+
+
+# ---- Helpers -----------------------------------------------------------------
+
+draw_random_graph <- function(q, seed, p_edge = 0.5) {
+  set.seed(seed)
+  G <- matrix(0L, q, q)
+  if (q < 2) return(G)
+  for (i in 1:(q - 1)) for (j in (i + 1):q)
+    if (runif(1) < p_edge) {
+      G[i, j] <- 1L
+      G[j, i] <- 1L
+    }
+  G
+}
+
+
+# ---- V is unbiased for 1/Z within MC noise ---------------------------------
+
+test_that("mean V across independent pools converges to 1/Z within MC noise", {
+  # Test V's pointwise unbiasedness: E[V(Γ, U)] = 1/Z(Γ).
+  # "Truth" is built from a high-precision log_Zhat at very large M_inner,
+  # averaged over many independent pools to dampen MC noise to << test
+  # tolerance.
+  q <- 5L
+  G_pi <- draw_random_graph(q, seed = 1L)
+  alpha <- 1.0
+  beta  <- 1.0
+  sigma <- 1.0
+  delta <- 0.5
+
+  # Truth proxy: M = 5000, n_truth = 20 → ~22000 effective samples; the SE on
+  # log_Zhat is dominated by Z_truth's MC noise which is ~ sd/sqrt(n_truth).
+  M_truth <- 5000L
+  n_truth <- 20L
+  log_Zhat_truth <- vapply(seq_len(n_truth), function(k) {
+    pool_t <- degord_draw_bartlett_pool_cpp(q, M_truth, seed = 9000L + k)
+    degord_log_Zhat_pi_from_pool_cpp(
+      pool_t, G_pi, alpha, beta, sigma, delta, 0L
+    )
+  }, double(1))
+  Z_truth    <- exp(mean(log_Zhat_truth))
+  invZ_truth <- 1 / Z_truth
+
+  # V estimator
+  kappa   <- 1.5
+  rho     <- 0.5
+  M_inner <- 100L
+  c_val   <- kappa * Z_truth
+  n_outer <- 1000L
+  V_samples <- vapply(seq_len(n_outer), function(m) {
+    U <- degord_draw_U_rr_cpp(M_inner, q, rho, seed = 100000L + m)
+    degord_V_at_Gamma_pi_cpp(
+      U$K_depth, U$pools_t, G_pi,
+      alpha, beta, sigma, delta, c_val, rho, 0L
+    )
+  }, double(1))
+
+  mean_V  <- mean(V_samples)
+  se_mean <- sd(V_samples) / sqrt(n_outer)
+  z_score <- (mean_V - invZ_truth) / se_mean
+
+  # Allow up to 4 SD - tail probability < 1e-4, generous enough to absorb
+  # Z_truth's MC residual without flaking on CI.
+  expect_lt(abs(z_score), 4.0,
+            label = sprintf("|z|=%.2f (mean_V=%.4f vs 1/Z=%.4f, SE=%.4g)",
+                            abs(z_score), mean_V, invZ_truth, se_mean))
+})
+
+
+# ---- K_depth follows Geom(1 - rho) -----------------------------------------
+
+test_that("K_depth ~ Geom(1 - rho) under SafeRNG", {
+  rho <- 0.5
+  n_draws <- 10000L
+  K_samples <- vapply(seq_len(n_draws), function(m) {
+    U <- degord_draw_U_rr_cpp(M_inner = 1L, q = 3L, rho = rho,
+                              seed = 50000L + m)
+    U$K_depth
+  }, integer(1))
+  # Geom(1 - rho) has mean rho / (1 - rho) and P(K=0) = (1 - rho).
+  # Tolerances are absolute (testthat's `tolerance` is relative for numeric);
+  # we compute the absolute gap directly to avoid relative-tolerance traps on
+  # the small tail probability.
+  expect_lt(abs(mean(K_samples) - rho / (1 - rho)), 0.05)
+  expect_lt(abs(mean(K_samples == 0L) - (1 - rho)), 0.02)
+  # Tail: P(K >= 5) = rho^5 = 0.03125. SE at n=10000 ~ 0.0017, allow 3*SE.
+  expect_lt(abs(mean(K_samples >= 5L) - rho^5), 0.005)
+})
+
+
+# ---- K_depth = 0 short-circuits to V = 1/c ---------------------------------
+
+test_that("V at K_depth = 0 equals 1/c exactly", {
+  q <- 5L
+  G_pi <- draw_random_graph(q, seed = 17L)
+  empty_pools <- list()
+  for (c_val in c(0.5, 1.0, 3.7, 12.0)) {
+    v <- degord_V_at_Gamma_pi_cpp(
+      0L, empty_pools, G_pi,
+      1.0, 1.0, 1.0, 0.5, c_val, 0.5, 0L
+    )
+    expect_equal(v, 1 / c_val, tolerance = 1e-12,
+                 info = sprintf("c_val=%g", c_val))
+  }
+})
+
+
+# ---- draw_U_degord_rr is deterministic in seed -----------------------------
+
+test_that("draw_U_degord_rr produces identical output under identical seed", {
+  U_a <- degord_draw_U_rr_cpp(M_inner = 30L, q = 5L, rho = 0.5, seed = 42L)
+  U_b <- degord_draw_U_rr_cpp(M_inner = 30L, q = 5L, rho = 0.5, seed = 42L)
+  expect_equal(U_a$K_depth, U_b$K_depth)
+  for (n in seq_along(U_a$pools_t))
+    expect_identical(U_a$pools_t[[n]], U_b$pools_t[[n]])
+
+  # Different seed → different output (with high prob).
+  U_c <- degord_draw_U_rr_cpp(M_inner = 30L, q = 5L, rho = 0.5, seed = 43L)
+  has_pool <- length(U_c$pools_t) > 0L && length(U_a$pools_t) > 0L &&
+              U_a$K_depth == U_c$K_depth
+  if (has_pool) {
+    diffs <- abs(U_a$pools_t[[1]] - U_c$pools_t[[1]])
+    expect_gt(max(diffs), 0)
+  }
+})
+
+
+# ---- Log-space V matches linear V at small p, survives underflow at large p --
+
+test_that("V_log_at_Gamma_pi matches V_at_Gamma_pi in the safe regime", {
+  # F5 bit-equality smoke. log-space and linear forms operate on the same
+  # pools_t / G_pi / chain_aux and should produce identical |V| (modulo
+  # FP reordering in the signed log-sum-exp).
+  alpha <- 1.0
+  beta  <- 1.0
+  sigma <- 1.0
+  delta <- 0.5
+  kappa <- 1.0
+  rho   <- 0.5
+  for (q in c(5L, 10L, 20L)) {
+    G_pi <- draw_random_graph(q, seed = 31L + q)
+    # Centre c near 1/Z by approximating log_Z with a single Bartlett pool
+    # at large M. Tolerance is set on differences, so the centring need not
+    # be perfect; we just need the linear form to stay finite.
+    pool_truth <- degord_draw_bartlett_pool_cpp(q, 2000L, seed = 41L + q)
+    log_Z <- degord_log_Zhat_pi_from_pool_cpp(
+      pool_truth, G_pi, alpha, beta, sigma, delta, 0L
+    )
+    c_val <- kappa * exp(log_Z)
+    log_c <- log(kappa) + log_Z
+    M_inner <- 100L
+    n_samples <- 50L
+    diffs <- numeric(n_samples)
+    n_finite <- 0L
+    for (m in seq_len(n_samples)) {
+      U <- degord_draw_U_rr_cpp(M_inner, q, rho, seed = 300000L + 1000L * q + m)
+      v_lin <- degord_V_at_Gamma_pi_cpp(
+        U$K_depth, U$pools_t, G_pi,
+        alpha, beta, sigma, delta, c_val, rho, 0L
+      )
+      v_log <- degord_V_log_at_Gamma_pi_cpp(
+        U$K_depth, U$pools_t, G_pi,
+        alpha, beta, sigma, delta, log_c, rho, 0L
+      )
+      if (!is.finite(v_lin) || v_lin == 0 ||
+          !is.finite(v_log$log_abs) || v_log$sign == 0) next
+      lhs <- log(abs(v_lin))
+      rhs <- v_log$log_abs
+      n_finite <- n_finite + 1L
+      diffs[n_finite] <- abs(lhs - rhs)
+      # Sign agreement is exact (no FP reordering can flip sign).
+      expect_equal(sign(v_lin), v_log$sign,
+                   info = sprintf("q=%d, m=%d, K=%d", q, m, U$K_depth))
+    }
+    diffs <- diffs[seq_len(n_finite)]
+    # At q = 5 the series barely truncates; tolerance ~ 1e-12. At q = 20 the
+    # alternating series can accumulate more cancellation; loosen to 1e-9.
+    tol <- if (q <= 5L) 1e-12 else if (q <= 10L) 1e-10 else 1e-9
+    expect_true(max(diffs) < tol,
+                info = sprintf("q=%d, max|log_abs gap|=%.3g (tol=%.3g, n_finite=%d)",
+                               q, max(diffs), tol, n_finite))
+  }
+})
+
+
+test_that("V_log_at_Gamma_pi stays finite where V_at_Gamma_pi underflows", {
+  # F5 motivating regime: when log_c is far below 0 in magnitude (c = exp(log_c)
+  # flushes to 0 in double precision), the linear form returns Inf / NaN. The
+  # log-space form must remain finite.
+  q <- 5L
+  G_pi <- draw_random_graph(q, seed = 7L)
+  alpha <- 1.0
+  beta  <- 1.0
+  sigma <- 1.0
+  delta <- 0.5
+  rho   <- 0.5
+  # log_c = -3500 mirrors what log_Z_NLO + log_kappa can reach at p = 100;
+  # exp(-3500) is 0 in double precision.
+  log_c <- -3500
+  c_val <- exp(log_c)
+  expect_equal(c_val, 0)
+  M_inner <- 50L
+  n_samples <- 20L
+  n_log_finite <- 0L
+  n_lin_finite <- 0L
+  for (m in seq_len(n_samples)) {
+    U <- degord_draw_U_rr_cpp(M_inner, q, rho, seed = 400000L + m)
+    v_lin <- degord_V_at_Gamma_pi_cpp(
+      U$K_depth, U$pools_t, G_pi,
+      alpha, beta, sigma, delta, c_val, rho, 0L
+    )
+    v_log <- degord_V_log_at_Gamma_pi_cpp(
+      U$K_depth, U$pools_t, G_pi,
+      alpha, beta, sigma, delta, log_c, rho, 0L
+    )
+    if (is.finite(v_lin) && v_lin != 0) n_lin_finite <- n_lin_finite + 1L
+    if (is.finite(v_log$log_abs) && v_log$sign != 0) n_log_finite <- n_log_finite + 1L
+  }
+  # The linear form should mostly explode here.
+  expect_lt(n_lin_finite, n_samples / 2L)
+  # The log-space form should mostly stay finite.
+  expect_gt(n_log_finite, n_samples / 2L)
+})
+
+
+# ---- F6: paired V_log with within-pool cache reuse ------------------------
+
+test_that("paired V_log matches two independent V_log calls bit-equal", {
+  # F6 bit-equality. The paired call shares the inner Phi-build across
+  # G_pi_curr / G_pi_star by caching (rw_head, S_trail) under a_curr.
+  # The single-graph call rebuilds Phi from scratch for each graph. Both
+  # paths should produce identical (log|V|, sign(V)) for both curr and
+  # star to FP-reordering tolerance.
+  alpha <- 1.0
+  beta  <- 1.0
+  sigma <- 1.0
+  delta <- 0.5
+  rho   <- 0.5
+  cases <- list(
+    list(q = 5L,  K = 0L, seed = 11L),
+    list(q = 5L,  K = 2L, seed = 12L),
+    list(q = 5L,  K = 5L, seed = 13L),
+    list(q = 10L, K = 0L, seed = 21L),
+    list(q = 10L, K = 2L, seed = 22L),
+    list(q = 10L, K = 5L, seed = 23L),
+    list(q = 20L, K = 0L, seed = 31L),
+    list(q = 20L, K = 2L, seed = 32L),
+    list(q = 20L, K = 5L, seed = 33L)
+  )
+  for (case in cases) {
+    q <- case$q
+    K <- case$K
+    seed <- case$seed
+    # G_pi_curr is a random symmetric 0/1 matrix; G_pi_star toggles only
+    # the trailing slot (q-2, q-1). This mirrors what the chain hot path
+    # passes after degord_permutation places the toggled edge at (q-2, q-1).
+    G_pi_curr <- draw_random_graph(q, seed = seed, p_edge = 0.5)
+    G_pi_star <- G_pi_curr
+    G_pi_star[q - 1L, q] <- 1L - G_pi_curr[q - 1L, q]
+    G_pi_star[q, q - 1L] <- G_pi_star[q - 1L, q]
+    # Build a K-deep pool list deterministically.
+    M_inner <- 50L
+    pools_t <- vector("list", K)
+    for (n in seq_len(K)) {
+      pools_t[[n]] <- degord_draw_bartlett_pool_cpp(
+        q, M_inner, seed = 700000L + 1000L * seed + n)
+    }
+    # log_c values: choose so c ~ exp(log_Z) is near 1/Z (any reasonable
+    # value lets V be finite).
+    if (K > 0L) {
+      log_Z_proxy <- degord_log_Zhat_pi_from_pool_cpp(
+        pools_t[[1L]], G_pi_curr, alpha, beta, sigma, delta, 0L
+      )
+    } else {
+      log_Z_proxy <- 0
+    }
+    log_c_curr <- log_Z_proxy
+    log_c_star <- log_Z_proxy + 0.1   # small offset; both must stay finite
+    # Independent path.
+    v_curr_ind <- degord_V_log_at_Gamma_pi_cpp(
+      K, pools_t, G_pi_curr,
+      alpha, beta, sigma, delta, log_c_curr, rho, 0L
+    )
+    v_star_ind <- degord_V_log_at_Gamma_pi_cpp(
+      K, pools_t, G_pi_star,
+      alpha, beta, sigma, delta, log_c_star, rho, 0L
+    )
+    # Paired path.
+    v_pair <- degord_V_log_pair_at_Gamma_curr_star_cpp(
+      K, pools_t, G_pi_curr, G_pi_star,
+      alpha, beta, sigma, delta, log_c_curr, log_c_star, rho, 0L
+    )
+    # Sign agreement is exact.
+    expect_equal(v_pair$curr$sign, v_curr_ind$sign,
+                 info = sprintf("q=%d, K=%d (curr)", q, K))
+    expect_equal(v_pair$star$sign, v_star_ind$sign,
+                 info = sprintf("q=%d, K=%d (star)", q, K))
+    # log_abs agreement modulo FP reordering inside the inner kernel.
+    # Both paths execute the same per-sample row_logw arithmetic; only the
+    # row q-2 evaluation differs (cache reuses S_trail). Tolerance must
+    # absorb the order-of-additions difference in the log-sum-exp wrap-up
+    # at large q.
+    tol <- if (q <= 10L) 1e-12 else 1e-9
+    if (is.finite(v_pair$curr$log_abs) && is.finite(v_curr_ind$log_abs)) {
+      expect_lt(abs(v_pair$curr$log_abs - v_curr_ind$log_abs), tol,
+                label = sprintf("q=%d, K=%d, curr log_abs gap=%.3g",
+                                q, K,
+                                abs(v_pair$curr$log_abs - v_curr_ind$log_abs)))
+    }
+    if (is.finite(v_pair$star$log_abs) && is.finite(v_star_ind$log_abs)) {
+      expect_lt(abs(v_pair$star$log_abs - v_star_ind$log_abs), tol,
+                label = sprintf("q=%d, K=%d, star log_abs gap=%.3g",
+                                q, K,
+                                abs(v_pair$star$log_abs - v_star_ind$log_abs)))
+    }
+  }
+})
+
+
+test_that("log_Zhat_star_from_cache matches fresh log_Zhat at G_pi_star", {
+  # Direct check on the cache adapter. log_Zhat_star_from_cache must
+  # produce the same value as a fresh log_Zhat_pi_from_pool on the star
+  # graph, to FP-reordering tolerance (the cache path reuses row_logw[r]
+  # for r != q-2 instead of recomputing).
+  alpha <- 1.0
+  beta  <- 1.0
+  sigma <- 1.0
+  delta <- 0.5
+  for (q in c(5L, 10L, 20L)) {
+    G_pi_curr <- draw_random_graph(q, seed = 41L + q, p_edge = 0.5)
+    G_pi_star <- G_pi_curr
+    G_pi_star[q - 1L, q] <- 1L - G_pi_curr[q - 1L, q]
+    G_pi_star[q, q - 1L] <- G_pi_star[q - 1L, q]
+    pool <- degord_draw_bartlett_pool_cpp(
+      q, M_inner = 200L, seed = 800000L + q)
+    log_Zhat_star_cache <- degord_log_Zhat_star_from_cache_cpp(
+      pool, G_pi_curr, G_pi_star,
+      alpha, beta, sigma, delta, 0L
+    )
+    log_Zhat_star_direct <- degord_log_Zhat_pi_from_pool_cpp(
+      pool, G_pi_star, alpha, beta, sigma, delta, 0L
+    )
+    tol <- if (q <= 10L) 1e-12 else 1e-9
+    expect_lt(abs(log_Zhat_star_cache - log_Zhat_star_direct), tol,
+              label = sprintf("q=%d, cache=%.6f, direct=%.6f, gap=%.3g",
+                              q, log_Zhat_star_cache, log_Zhat_star_direct,
+                              abs(log_Zhat_star_cache - log_Zhat_star_direct)))
+  }
+})
+
+
+# ---- Signed V can flip sign for K_depth >= 1 -------------------------------
+
+test_that("V tracks the alternating-series sign", {
+  # When Zhat_n - c is consistently small relative to c, V stays positive;
+  # when Zhat_n - c can be larger than c (or negative), V can change sign.
+  # Force an extreme case: choose c far away from 1/Z so the (Zhat - c)/c
+  # products grow and the alternating series can produce negative V.
+  q <- 5L
+  G_pi <- draw_random_graph(q, seed = 23L)
+  alpha <- 1.0
+  beta  <- 1.0
+  sigma <- 1.0
+  delta <- 0.5
+  rho   <- 0.6
+  M_inner <- 50L
+  # Tiny c (far below 1/Z) inflates each (Zhat - c)/c factor; K_depth >= 1
+  # samples will fluctuate widely in sign.
+  c_val <- 1e-4
+  n_outer <- 200L
+  V_samples <- vapply(seq_len(n_outer), function(m) {
+    U <- degord_draw_U_rr_cpp(M_inner, q, rho, seed = 200000L + m)
+    if (U$K_depth == 0L) return(NA_real_)
+    degord_V_at_Gamma_pi_cpp(
+      U$K_depth, U$pools_t, G_pi,
+      alpha, beta, sigma, delta, c_val, rho, 0L
+    )
+  }, double(1))
+  V_samples <- V_samples[!is.na(V_samples) & is.finite(V_samples)]
+  # We should see SOME negative values when c is small (alternating series).
+  expect_true(any(V_samples < 0),
+              label = "no negative V samples under small c_val (alternating sign expected)")
+})