Source code for experiments.harness.ab_regime

"""Experiment: a^2 != b^2 distributional regime."""

import time
from typing import Optional

import numpy as np
from numpy.random import default_rng

from experiments.harness.phi import make_sparse_plus_noise
from experiments.harness.results import ExperimentResult
from experiments.harness.worker import TrialSpec, run_trials_parallel


[docs] def run_ab_regime_experiment( n_range: range = range(4, 7), gaps: Optional[list[float]] = None, num_trials: int = 20, epsilon: float = 0.3, qfs_shots: int = 2000, classical_samples_prover: int = 1000, classical_samples_verifier: int = 3000, base_seed: int = 42, max_workers: int = 1, shard_index: int | None = None, num_shards: int | None = None, ) -> ExperimentResult: r"""Distributional regime sweep: verification with :math:`a^2 \neq b^2`. For each :math:`n` in *n_range* and each *gap* value, the experiment constructs a sparse-plus-noise :math:`\varphi` whose true Parseval weight is :math:`pw \approx 0.52`, then sets .. math:: a^2 = pw - \tfrac{\text{gap}}{2}, \qquad b^2 = pw + \tfrac{\text{gap}}{2} with :math:`a^2` clamped to a minimum of 0.01. When gap = 0 this recovers the standard :math:`a = b` regime used by all other experiments; positive gaps exercise Definition 14 of [ITCS2024]_ where the verifier is told that :math:`a < b`. .. note:: **Theorem 12 completeness precondition is satisfied only at gap = 0.** Theorem 12 requires :math:`\varepsilon \ge 2\sqrt{b^2 - a^2}`; with :math:`\varepsilon = 0.3` this means :math:`\mathrm{gap} \le (\varepsilon/2)^2 = 0.0225`, i.e. only the ``gap = 0`` cell of the default sweep formally satisfies the precondition. For ``gap`` :math:`> 0.0225` the experiment is intentionally running outside Theorem 12's completeness guarantee on a benign ``sparse_plus_noise`` :math:`\varphi` whose true Parseval mass :math:`\|\tilde\varphi\|_2^2 = 0.52` sits at the centre of :math:`[a^2, b^2]`, so honest interactions still produce :math:`\sum \widehat\xi^2 \ge a^2 - \varepsilon^2/8`. This is **not** evidence that Theorem 13 (a worst-case sample-complexity lower bound for distinguishing random parities from :math:`U_{n+1}`) is loose; Theorem 13 does not upper-bound the acceptance probability of any specific honest run on benign inputs. The previous figure-script interpretation was corrected per ``audit/ab_regime.md`` (M1). The "ab regime" sweep is structurally a **1-D** :math:`a^2`-sweep: :math:`b^2 \le 0.72` and the list-size cap :math:`64 b^2/\vartheta^2 \le 512` never binds the maximum honest list of 4 (M2 in the audit). To probe both completeness and soundness of Definition 14, the centre :math:`pw` would need to vary independently --- see ``audit/FOLLOW_UPS.md``. Parameters ---------- n_range : range Range of :math:`n` values to sweep. gaps : list[float] or None Values of :math:`b^2 - a^2` to sweep. Default: ``[0.0, 0.05, 0.1, 0.2, 0.3, 0.4]``. num_trials : int Trials per :math:`(n, \text{gap})` cell. epsilon : float Accuracy parameter :math:`\varepsilon`. qfs_shots : int QFS copy budget per trial. classical_samples_prover : int Classical samples for the prover. classical_samples_verifier : int Classical samples for the verifier. base_seed : int Base random seed. max_workers : int Number of parallel worker processes. shard_index : int or None 0-based shard index for distributed execution. num_shards : int or None Total number of shards for distributed execution. Returns ------- ExperimentResult """ if gaps is None: gaps = [0.0, 0.05, 0.1, 0.2, 0.3, 0.4] print( f"=== Ab Regime: n in {list(n_range)}, gaps in {gaps}, " f"{max_workers} workers ===" ) rng = default_rng(base_seed) specs: list[TrialSpec] = [] for n in n_range: for gap in gaps: for _ in range(num_trials): seed = int(rng.integers(0, 2**31)) trial_rng = default_rng(seed) phi, target_s, parseval_weight = make_sparse_plus_noise(n, trial_rng) a_sq = max(parseval_weight - gap / 2.0, 0.01) b_sq = min(parseval_weight + gap / 2.0, 1.0) # In the verifier path theta only enters via the list # bound 64 b^2/theta^2 (which doesn't bind at |L| <= 4), # so any theta <= epsilon works here. Audit fix m4 # (audit/ab_regime.md): the previous comment about # "keep theta below the dominant coefficient" was a # prover-side heuristic, irrelevant in the verifier # path tested by this experiment. theta = min(epsilon, 0.6) specs.append( TrialSpec( n=n, phi=phi, noise_rate=0.0, target_s=target_s, epsilon=epsilon, delta=0.1, theta=theta, a_sq=a_sq, b_sq=b_sq, qfs_shots=qfs_shots, classical_samples_prover=classical_samples_prover, classical_samples_verifier=classical_samples_verifier, seed=seed, phi_description=f"ab_regime_gap={gap}_n={n}", ) ) t0 = time.time() trials = run_trials_parallel( specs, max_workers=max_workers, label="ab_regime", shard_index=shard_index, num_shards=num_shards, ) wall = time.time() - t0 result = ExperimentResult( experiment_name="ab_regime", timestamp=time.strftime("%Y-%m-%dT%H:%M:%S"), wall_clock_s=wall, max_workers=max_workers, trials=trials, parameters={ "n_range": list(n_range), "gaps": gaps, "num_trials": num_trials, "epsilon": epsilon, }, ) # Print per-gap summary print(f"\n {'gap':>5s} {'a_sq':>6s} {'b_sq':>6s} {'accept%':>8s} {'correct%':>9s}") for gap in gaps: gt = [t for t in trials if f"gap={gap}" in t.phi_description] if not gt: continue ar = np.mean([t.accepted for t in gt]) cr = np.mean([t.hypothesis_correct for t in gt]) a_sq_avg = np.mean([t.a_sq for t in gt]) b_sq_avg = np.mean([t.b_sq for t in gt]) print(f" {gap:5.2f} {a_sq_avg:6.3f} {b_sq_avg:6.3f} {ar:8.0%} {cr:9.0%}") print(f" Wall-clock time: {wall:.1f}s") return result