Source code for experiments.harness.truncation

"""Experiment 3: Verifier truncation tradeoffs."""

import time
from typing import Optional

import numpy as np
from numpy.random import default_rng

from experiments.harness.phi import make_single_parity
from experiments.harness.results import ExperimentResult
from experiments.harness.worker import TrialSpec, run_trials_parallel



[docs]
def run_truncation_experiment(
    n: int = 6,
    epsilon_range: Optional[list[float]] = None,
    verifier_sample_range: Optional[list[int]] = None,
    num_trials: int = 20,
    qfs_shots: int = 2000,
    classical_samples_prover: int = 1000,
    base_seed: int = 42,
    max_workers: int = 1,
    shard_index: int | None = None,
    num_shards: int | None = None,
) -> ExperimentResult:
    r"""Verifier truncation tradeoff experiment.

    Fixes :math:`n` and the prover's resources, then sweeps the
    verifier's classical sample budget :math:`m_V` and accuracy
    parameter :math:`\varepsilon` to map the completeness frontier.

    Uses noisy parity with :math:`\eta = 0.15` (effective coefficient
    :math:`(1 - 2\eta) = 0.7`, so :math:`a^2 = b^2 = 0.49`), which
    gives a tight Fourier weight margin:

    .. math::

        \text{weight} \approx 0.49, \quad
        \tau = a^2 - \varepsilon^2 / 8

    At small :math:`m_V`, the Hoeffding estimation noise in the
    verifier's coefficient estimates pushes the accumulated weight
    below :math:`\tau`, causing rejection even for honest provers.
    At large :math:`\varepsilon`, :math:`\tau` drops and the check
    becomes lenient.

    .. warning::

       **Sub-Hoeffding regime — figures are NOT Theorem 12 boundary
       measurements.** The default grid
       :math:`m_V \in \{50, \dots, 3000\}` lies 3-4 orders of magnitude
       *below* the Theorem 12 verifier sample-complexity prescription
       :math:`m \ge (2/\mathrm{tol}^2)\,\log(4|L|/\delta)` with
       :math:`\mathrm{tol} = \varepsilon^2/(16|L|)` (see
       ``ql/verifier.py:493``).  For :math:`|L|=1` and :math:`\delta=0.1`
       the analytic minimum is roughly :math:`1.9 \times 10^7` at
       :math:`\varepsilon=0.1` and :math:`3.0 \times 10^4` at
       :math:`\varepsilon=0.5`, so the grid never reaches the asymptotic
       regime that the threshold :math:`\tau = a^2 - \varepsilon^2/8`
       was designed for.

       Consequently the visible "knees" for :math:`\varepsilon \le 0.3`
       are **squaring-bias artefacts** of the unbiased estimator
       :math:`\widehat\xi(s)^2 = \widehat{\varphi}(s)^2 +
       \mathrm{Var}(\widehat\xi)`, whose bias :math:`\approx 0.51/m_V`
       is comparable to or exceeds the acceptance margin
       :math:`\varepsilon^2/8`.  Some rows in
       ``truncation_summary.csv`` therefore show
       ``accept@50 > accept@3000`` (acceptance *decreasing* with
       budget), which would be impossible in the true Hoeffding regime.

       Additionally, the prover budgets ``qfs_shots=2000`` and
       ``classical_samples_prover=1000`` sit far below the
       Corollary 5 prescription, so the figures partly reflect
       prover-side QFS failure modes (the prover may fail to place
       the target string into :math:`L` at small :math:`\varepsilon`
       and large :math:`n`) rather than pure verifier-side truncation.

       The 2D grid :math:`(\varepsilon \times m_V)` is therefore best
       interpreted as a **non-asymptotic / sub-Hoeffding feasibility
       sweep** at fixed prover and verifier budgets, *not* as a
       measurement of the tradeoff surface predicted by Theorem 12.
       See ``audit/truncation.md`` (M1, M2) and ``audit/FOLLOW_UPS.md``
       for the rerun specifications that would actually cross the
       Theorem 12 boundary.

    Parameters
    ----------
    n : int
        Number of input bits (fixed across the sweep).
    epsilon_range : list[float] or None
        Values of :math:`\varepsilon` to test.
        Default: ``[0.1, 0.2, 0.3, 0.4, 0.5]``.
    verifier_sample_range : list[int] or None
        Verifier classical sample budgets :math:`m_V` to test.
        Default: ``[50, 100, 200, 500, 1000, 3000]``.
    num_trials : int
        Number of independent trials per :math:`(\varepsilon, m_V)` cell.
    qfs_shots : int
        QFS copy budget per trial (prover).
    classical_samples_prover : int
        Classical samples for the prover.
    base_seed : int
        Base random seed.
    max_workers : int
        Number of parallel worker processes.

    Returns
    -------
    ExperimentResult
    """
    if epsilon_range is None:
        epsilon_range = [0.1, 0.2, 0.3, 0.4, 0.5]
    if verifier_sample_range is None:
        verifier_sample_range = [50, 100, 200, 500, 1000, 3000]

    noise_rate = 0.15
    a_sq = (1.0 - 2.0 * noise_rate) ** 2

    print(
        f"=== Truncation Experiment: n={n}, eta={noise_rate}, {max_workers} workers ==="
    )
    rng = default_rng(base_seed)

    target_s = 1
    phi = make_single_parity(n, target_s)

    specs: list[TrialSpec] = []
    for eps in epsilon_range:
        for v_samples in verifier_sample_range:
            for _ in range(num_trials):
                seed = int(rng.integers(0, 2**31))
                specs.append(
                    TrialSpec(
                        n=n,
                        phi=phi,
                        noise_rate=noise_rate,
                        target_s=target_s,
                        epsilon=eps,
                        delta=0.1,
                        theta=min(eps, 0.5),
                        a_sq=a_sq,
                        b_sq=a_sq,
                        qfs_shots=qfs_shots,
                        classical_samples_prover=classical_samples_prover,
                        classical_samples_verifier=v_samples,
                        seed=seed,
                        phi_description=f"trunc_eps={eps}_vsamp={v_samples}",
                    )
                )

    t0 = time.time()
    trials = run_trials_parallel(
        specs, max_workers=max_workers, label="trunc",
        shard_index=shard_index, num_shards=num_shards,
    )
    wall = time.time() - t0

    result = ExperimentResult(
        experiment_name="verifier_truncation",
        timestamp=time.strftime("%Y-%m-%dT%H:%M:%S"),
        wall_clock_s=wall,
        max_workers=max_workers,
        trials=trials,
        parameters={
            "n": n,
            "noise_rate": noise_rate,
            "a_sq": a_sq,
            "epsilon_range": epsilon_range,
            "verifier_sample_range": verifier_sample_range,
            "num_trials": num_trials,
            "qfs_shots": qfs_shots,
        },
    )

    # Print summary grid
    print("\n  Accept rates (eps x verifier_samples):")
    header = f"  {'eps':>5s}" + "".join(f"  {v:>5d}" for v in verifier_sample_range)
    print(header)
    for eps in epsilon_range:
        row_trials = [t for t in trials if abs(t.epsilon - eps) < 1e-9]
        cells = []
        for v in verifier_sample_range:
            vt = [t for t in row_trials if t.verifier_samples == v]
            rate = np.mean([t.accepted for t in vt]) if vt else 0.0
            cells.append(f"  {rate:5.0%}")
        print(f"  {eps:5.2f}" + "".join(cells))

    print(f"  Wall-clock time: {wall:.1f}s")
    return result