Source code for experiments.harness.average_case

"""Experiment 7: Average-case performance across function families."""

import time
from typing import Optional

import numpy as np
from numpy.random import default_rng

from experiments.harness.phi import (
    make_k_sparse,
    make_sparse_plus_noise,
)
from experiments.harness.results import ExperimentResult
from experiments.harness.worker import TrialSpec, run_trials_parallel

# ``random_boolean`` was removed after audit/average_case.md M2: a uniform
# random truth table at :math:`n \ge 6` violates Definition 11 by
# construction (every Fourier coefficient has magnitude
# :math:`\sim 2^{-n/2} < \vartheta`), so the verifier *correctly* rejects
# it.  Including it as an "average case" representative was misleading.
_DEFAULT_FAMILIES = ["k_sparse_2", "k_sparse_4", "sparse_plus_noise"]


def _generate_trial(
    n: int,
    family: str,
    epsilon: float,
    delta: float,
    qfs_shots: int,
    classical_samples_prover: int,
    classical_samples_verifier: int,
    rng: np.random.Generator,
) -> TrialSpec:
    r"""Build a :class:`TrialSpec` for one (n, family) trial.

    Dispatches to the appropriate phi generator and sets
    protocol parameters (:math:`\vartheta`, :math:`a^2`, :math:`b^2`)
    to match the Fourier structure of the chosen function family.
    """
    seed = int(rng.integers(0, 2**31))
    trial_rng = default_rng(seed)

    # ``k`` is now plumbed into ``TrialSpec`` so the worker dispatches
    # k-sparse families to ``verify_fourier_sparse`` (Theorems 9/10/15)
    # rather than ``verify_parity`` (Theorem 12).  See
    # ``audit/average_case.md`` (M1).
    if family.startswith("k_sparse_"):
        k = int(family.split("_")[-1])
        phi, target_s, pw = make_k_sparse(n, k, trial_rng)
        max_coeff = 1.0 / k  # expected order; actual varies per draw
        # Use the Dirichlet draw's actual max via pw is imprecise;
        # the safe bound is that the heaviest coeff >= 1/k in
        # expectation. Adapt theta to stay below the expected
        # heaviest coefficient so GL can detect it.
        theta = min(epsilon, max(0.01, max_coeff * 0.9))
        a_sq = b_sq = pw
        spec_k = k
    elif family == "sparse_plus_noise":
        phi, target_s, pw = make_sparse_plus_noise(n, trial_rng)
        theta = epsilon  # dominant coeff 0.7 >> epsilon
        a_sq = b_sq = pw
        # 1 dominant + 3 secondary = 4 heavy coefficients (see
        # ``phi.make_sparse_plus_noise``).  Routes to
        # ``verify_fourier_sparse`` with k=4.
        spec_k = 4
    else:
        raise ValueError(f"Unknown function family: {family}")

    return TrialSpec(
        n=n,
        phi=phi,
        noise_rate=0.0,
        target_s=target_s,
        epsilon=epsilon,
        delta=delta,
        theta=theta,
        a_sq=a_sq,
        b_sq=b_sq,
        qfs_shots=qfs_shots,
        classical_samples_prover=classical_samples_prover,
        classical_samples_verifier=classical_samples_verifier,
        seed=seed,
        phi_description=f"{family}_n={n}",
        k=spec_k,
    )


[docs] def run_average_case_experiment( n_range: range = range(4, 11), families: Optional[list[str]] = None, num_trials: int = 20, epsilon: float = 0.3, delta: float = 0.1, qfs_shots: int = 2000, classical_samples_prover: int = 1000, classical_samples_verifier: int = 3000, base_seed: int = 42, max_workers: int = 1, shard_index: int | None = None, num_shards: int | None = None, ) -> ExperimentResult: r"""Average-case experiment: protocol performance on diverse function families. For each :math:`n` in *n_range* and each function family, samples *num_trials* random functions from the family, runs the full honest prover :math:`\to` verifier protocol, and records acceptance rate and hypothesis quality. The experiment goes beyond the single-parity regime of Exp 1 by testing three Fourier-sparse function families: ``k_sparse_2``, ``k_sparse_4`` :math:`k`-Fourier-sparse functions with random Dirichlet coefficients on the :math:`k`-simplex (Corollary 7). :math:`\vartheta` is adapted per *k* so the Goldreich--Levin threshold remains below the expected heaviest coefficient. Each trial sets ``TrialSpec.k = k`` so the worker dispatches to :meth:`~ql.verifier.MoSVerifier.verify_fourier_sparse` (Theorems 9/10/15) with the k-sparse acceptance threshold :math:`a^2 - \varepsilon^2/(128 k^2)`. ``sparse_plus_noise`` One dominant parity (:math:`c_{\mathrm{dom}} = 0.7`) plus three secondary coefficients (:math:`c_{\mathrm{sec}} = 0.1` each), giving 4 heavy coefficients in total. Sets ``TrialSpec.k = 4`` so the verifier runs the Fourier-sparse path matching the actual sparsity of the target. .. note:: **History.** Prior to the audit fix in ``audit/average_case.md`` (M1), all three families dispatched to ``verify_parity`` because ``TrialSpec.k`` was never set. The audit identified this as the highest-value single fix; as a result the previous on-disk ``results/average_case_4_16_100.pb`` was generated against the wrong verifier path and is invalid until re-generated. See ``audit/FOLLOW_UPS.md``. The fourth family ``random_boolean`` (uniform truth tables) was dropped: at :math:`n \ge 6` it violates Definition 11 by construction, so the verifier *correctly* rejects it (M2 in the audit). Including it as an "average case" representative conflated promise-class violation with average-case behaviour. :math:`\vartheta = \min(\varepsilon,\, 0.9/k)` is a heuristic (m4 in the audit), not derived from a theorem; for Dirichlet draws a non-trivial fraction of trials will still have :math:`c_{\min} < \vartheta`. Parameters ---------- n_range : range Range of :math:`n` values to sweep. families : list[str] or None Function family names. Default: ``["k_sparse_2", "k_sparse_4", "random_boolean", "sparse_plus_noise"]``. num_trials : int Trials per :math:`(n, \text{family})` cell. epsilon : float Accuracy parameter :math:`\varepsilon`. delta : float Confidence parameter :math:`\delta`. qfs_shots : int QFS copy budget per trial. classical_samples_prover : int Classical samples for the prover. classical_samples_verifier : int Classical samples for the verifier. base_seed : int Base random seed. max_workers : int Number of parallel worker processes. Returns ------- ExperimentResult """ if families is None: families = list(_DEFAULT_FAMILIES) print( f"=== Average-Case Experiment: n in {list(n_range)}, " f"families={families}, {num_trials} trials each, " f"{max_workers} workers ===" ) rng = default_rng(base_seed) specs: list[TrialSpec] = [] for n in n_range: for family in families: for _ in range(num_trials): specs.append( _generate_trial( n, family, epsilon, delta, qfs_shots, classical_samples_prover, classical_samples_verifier, rng, ) ) t0 = time.time() trials = run_trials_parallel( specs, max_workers=max_workers, label="avg_case", shard_index=shard_index, num_shards=num_shards, ) wall = time.time() - t0 result = ExperimentResult( experiment_name="average_case", timestamp=time.strftime("%Y-%m-%dT%H:%M:%S"), wall_clock_s=wall, max_workers=max_workers, trials=trials, parameters={ "n_range": list(n_range), "families": families, "num_trials": num_trials, "epsilon": epsilon, "delta": delta, "qfs_shots": qfs_shots, "classical_samples_prover": classical_samples_prover, "classical_samples_verifier": classical_samples_verifier, }, ) # Per-family summary print(f"\n {'family':>20s} {'accept%':>8s} {'correct%':>9s}") for fam in families: ft = [t for t in trials if t.phi_description.startswith(fam)] ar = np.mean([t.accepted for t in ft]) if ft else 0.0 cr = np.mean([t.hypothesis_correct for t in ft]) if ft else 0.0 print(f" {fam:>20s} {ar:8.0%} {cr:9.0%}") print(f"\n{result.summary_table()}") print(f" Wall-clock time: {wall:.1f}s") return result