{
  "schemaVersion": 2,
  "artifactType": "release-conformance-dashboard",
  "generatedAt": "2026-06-12T11:23:00.611Z",
  "commit": "dab57ac7",
  "branch": "main",
  "dirty": true,
  "activeGameKey": "aurora-galactica",
  "games": [
    {
      "gameKey": "aurora-galactica",
      "gameName": "Aurora Galactica",
      "gameStatus": "Active conformance investment",
      "currentInvestment": "Current focus: raise Aurora gameplay/audio/level-arc/visual conformance toward the next major release gate.",
      "releaseRead": "Aurora is the shipped first Platinum game and the current subject of long-cycle conformance scoring.",
      "releaseGate": [
        {
          "Gate": "Overall quality",
          "Current": "8.8/10",
          "Target": ">=9.3",
          "Notes": "Full score refresh after all major cycles"
        },
        {
          "Gate": "Audio identity",
          "Current": "7.3/10",
          "Target": ">=7.5",
          "Notes": "Primary user-experience gap"
        },
        {
          "Gate": "Challenge-stage set-piece conformance",
          "Current": "4.3/10",
          "Target": ">=5.0 before next beta claim; >=6.0 next major gate; >=9.0 mature",
          "Notes": "Strict movement/graphics/alien-novelty gate; safety does not inflate this score"
        },
        {
          "Gate": "Direct target sprite conformance",
          "Current": "6/10",
          "Target": ">=5.5 before next beta claim; >=7.5 mature preview",
          "Notes": "Strict runtime-vs-promoted-target-crop row; static proxy scores do not satisfy this gate"
        },
        {
          "Gate": "Level arc",
          "Current": "8.8/10",
          "Target": ">=8.8",
          "Notes": "Long-play gameplay-quality gate"
        },
        {
          "Gate": "Alien entry and challenge-stage novelty",
          "Current": "8.2/10",
          "Target": ">=7.5 first gate; >=9.0 mature",
          "Notes": "New high-priority long-cycle gameplay-authenticity gate"
        },
        {
          "Gate": "Boss entry and formation grammar",
          "Current": "9.4/10",
          "Target": ">=8.0 first gate; >=9.0 mature",
          "Notes": "New measured gate for stage choreography"
        },
        {
          "Gate": "Alien entry / formations",
          "Current": "10/10 measured",
          "Target": ">=9.2 with path/rack scorer",
          "Notes": "Now backed by dedicated alien-entry/challenge variation scorer"
        },
        {
          "Gate": "Challenge variation",
          "Current": "4.3/10 measured",
          "Target": ">=9.2 with dedicated scorer",
          "Notes": "Dedicated stage-by-stage challenge conformance gate"
        },
        {
          "Gate": "Visual look and feel",
          "Current": "8.6/10",
          "Target": ">=8.4",
          "Notes": "New explicit gate; first-pass scorer measured"
        },
        {
          "Gate": "Arcade frame and popup surfaces",
          "Current": "9.2/10",
          "Target": ">=9.4",
          "Notes": "Split from generic UI shell before final gate"
        },
        {
          "Gate": "No-regression guardrails",
          "Current": "movement/combat/capture >=10; challenge timing >=9.8",
          "Target": "Maintain",
          "Notes": "Hard blockers"
        }
      ],
      "priorityRows": [
        {
          "rank": 1,
          "metric": "Challenge-stage set-piece conformance: movement, graphics, alien novelty",
          "explanation": {
            "calculation": "Strict score averages movement conformance, graphical conformance, alien/stage novelty, and stage progression; no-shot/no-kill safety is a separate guardrail and does not make the stage interesting. Current supporting artifacts: 8 set-piece contracts, 25 first-five grammar group contracts, 10 reusable motion primitives, and 17218 indexed sweep candidates.",
            "grounding": "Current grounding is Galaga challenge contact-sheet labels, Aurora browser runtime challenge probes, trajectory-vector comparison, and static/runtime sprite evidence. Best-case grounding adds full temporal trajectory traces and active sprite-motion windows for every challenge. Runtime promotion is blocked unless the expected-reference, target-video, human-perfect, safety, and human-visible gates all hold at once.",
            "meaning": "Players should feel a spectacular, safe, learnable bonus exhibition with different aliens and movement from one challenge to the next. Designers use this as the main anti-repetition gate for Aurora challenge stages. The current dashboard should be read as evidence that the next value is richer motion representation, not more shallow tuning."
          },
          "scoreContext": {
            "confidence": "medium-high for gap direction; medium-low for exact lift estimate",
            "resolution": "strict stage-by-stage challenge scorer using 1/10 baseline for interest, movement, and graphics; no-shot/no-kill is treated as a guardrail rather than score inflation",
            "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
          },
          "score10": 4.3,
          "current": "4.3/10",
          "target": ">=5.0 before next beta claim; 6.0 after three authored challenges; 9.0+ mature",
          "status": "Strict dedicated stage-by-stage scorer; current high-priority gameplay-authenticity gap",
          "why": "The challenging stages should be spectacular safe Galaga-like bonus exhibitions. Current safety is good, but movement variation, alien novelty, and graphical conformance are not yet close.",
          "effort": "High; long-cycle CPU/browser extraction plus gameplay authoring and sprite-motion/reference labeling",
          "next": "Recent candidate sweeps improved measurement and search evidence, but no stage currently has a runtime keeper. Next: build richer movement primitives before runtime promotion; first target reference-spline-fit from 10 primitives, with first-five grammar at 25 group contracts and 25 reference-backed paths.",
          "evidence": "reference-artifacts/analyses/challenge-stage-conformance/2026-06-09-abb5c464/report.json; reference-artifacts/analyses/challenge-stage-candidate-sweep-index/latest.json; reference-artifacts/analyses/challenge-movement-grammar/latest.json; reference-artifacts/analyses/challenge-motion-primitives/latest.json; reference-artifacts/analyses/challenge-setpiece-contracts/latest.json",
          "decisionEvidence": [
            {
              "stage": 3,
              "decision": "no-runtime-keeper-yet",
              "bestCandidateId": "stage3-target-reference-paths-shape-lower-p0",
              "expectedLift10": "-0.4/10",
              "targetVideoObjectFitLift10": "1.5/10",
              "humanPerfectPotentialLift10": "0.3/10",
              "humanVisibleLift10": "-0.2/10",
              "read": "No runtime keeper: candidate regresses human-visible challenge readability by 0.2/10 or fails visibility/arrival/spacing/bunching gates."
            },
            {
              "stage": 7,
              "decision": "no-runtime-keeper-yet",
              "bestCandidateId": "stage7-centerline-spacing-spread-left-right-lead078-flat-widefield-centered-id-sd018",
              "expectedLift10": "0/10",
              "targetVideoObjectFitLift10": "0.3/10",
              "humanPerfectPotentialLift10": "0/10",
              "humanVisibleLift10": "2.3/10",
              "read": "No runtime keeper: candidate regresses human-visible challenge readability by 2.3/10 or fails visibility/arrival/spacing/bunching gates."
            },
            {
              "stage": 11,
              "decision": "no-runtime-keeper-yet",
              "bestCandidateId": "stage11-target-controls-shape-blend-p0",
              "expectedLift10": "0.2/10",
              "targetVideoObjectFitLift10": "0/10",
              "humanPerfectPotentialLift10": "0/10",
              "humanVisibleLift10": "0.1/10",
              "read": "No runtime keeper: candidate regresses human-visible challenge readability by 0.1/10 or fails visibility/arrival/spacing/bunching gates."
            },
            {
              "stage": 15,
              "decision": "no-runtime-keeper-yet",
              "bestCandidateId": "stage15-a168-d108-x68-w106-s008-lb0-y3-p0",
              "expectedLift10": "0.7/10",
              "targetVideoObjectFitLift10": "-0.2/10",
              "humanPerfectPotentialLift10": "0.4/10",
              "humanVisibleLift10": "-0.4/10",
              "read": "No runtime keeper: candidate regresses human-visible challenge readability by 0.4/10 or fails visibility/arrival/spacing/bunching gates."
            },
            {
              "stage": 19,
              "decision": "no-runtime-keeper-yet",
              "bestCandidateId": "stage19-target-reference-paths-direct-p0",
              "expectedLift10": "-0.5/10",
              "targetVideoObjectFitLift10": "0.8/10",
              "humanPerfectPotentialLift10": "1.7/10",
              "humanVisibleLift10": "-0.3/10",
              "read": "No runtime keeper: candidate regresses human-visible challenge readability by 0.3/10 or fails visibility/arrival/spacing/bunching gates."
            },
            {
              "stage": 23,
              "decision": "no-runtime-keeper-yet",
              "bestCandidateId": "stage23-target-compressed-late-hold-speed-p0",
              "expectedLift10": "0.5/10",
              "targetVideoObjectFitLift10": "0.6/10",
              "humanPerfectPotentialLift10": "null/10",
              "humanVisibleLift10": "null/10",
              "read": "No runtime keeper yet; use this row as search evidence and improve reference identity/path shape before gameplay promotion."
            },
            {
              "stage": 27,
              "decision": "no-runtime-keeper-yet",
              "bestCandidateId": "stage27-target-controls-shape-only-p1",
              "expectedLift10": "0.7/10",
              "targetVideoObjectFitLift10": "-0.8/10",
              "humanPerfectPotentialLift10": "null/10",
              "humanVisibleLift10": "null/10",
              "read": "No runtime keeper: late-stage identity blocked because best match challenge-1-arrival-group-1 does not represent challenge 7."
            },
            {
              "stage": 31,
              "decision": "no-runtime-keeper-yet",
              "bestCandidateId": "stage31-a162-d108-x86-w096-s007-lbs0-y3-p0",
              "expectedLift10": "1.8/10",
              "targetVideoObjectFitLift10": "-0.5/10",
              "humanPerfectPotentialLift10": "null/10",
              "humanVisibleLift10": "null/10",
              "read": "No runtime keeper yet; use this row as search evidence and improve reference identity/path shape before gameplay promotion."
            }
          ],
          "costContext": {
            "costClass": "high",
            "expectedResources": "cpu, browser, gpu",
            "trackedAxes": [
              "level-arc",
              "conformance-loop"
            ],
            "trackedSpend": "162 runs; 24 min wall; 39.8 min CPU",
            "trackedRuns": 162,
            "trackedWallSeconds": 1441.8960000000002,
            "trackedCpuSeconds": 2389.19,
            "expectedLift10": 1.8,
            "expectedOverallLift10": 0.138,
            "investmentScore": 8.4,
            "gapToTarget": "+0.7",
            "summary": "high; cpu, browser, gpu",
            "valueCostRead": "Expected lift 1.8/10 on metric, 0.138/10 overall; investment score 8.4."
          }
        },
        {
          "rank": 2,
          "metric": "Audio identity, event feedback, and cue alignment",
          "explanation": {
            "calculation": "Release audio score blends cue identity, reference spectral similarity, reference-window precision, overlap, event alignment, semantic event mapping, and cue-contract readiness from audio comparison and promotion artifacts.",
            "grounding": "Best-case grounding comes from labeled Galaga-family reference audio clips, cue contracts, segmented Aurora runtime captures, cue/event logs, waveform and spectral measurements, promotion prechecks, and live recapture.",
            "meaning": "Players hear whether shots, hits, explosions, boss damage, capture, rescue, pressure, loss, and challenge results communicate the right event at the right moment. Designers use it to protect feedback clarity, arcade identity, and safe theme variation."
          },
          "scoreContext": {
            "confidence": "medium-high",
            "resolution": "21 cue/event comparisons with waveform, spectral, overlap, alignment, and semantic event-mapping features",
            "scoreMeaning": "Material conformance gap with meaningful user-experience or reference-identity impact."
          },
          "score10": 7.3,
          "current": "7.3/10",
          "target": "7.5-8.0",
          "status": "Measured release category; weakest axis",
          "why": "Largest current score gap and high user-experience impact: shots, explosions, boss damage, challenge results, capture/rescue feedback.",
          "effort": "High; 3-6 hrs local/model-assisted analysis",
          "next": "Challenge Perfect runtime trial rejected perfect-clean-onset-soft-tail; do not directly promote the focused keeper. Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json; reference-artifacts/analyses/aurora-audio-cue-contracts/2026-05-11-b83393cd-dirty-201628/report.json",
          "costContext": {
            "costClass": "high",
            "expectedResources": "cpu, model-api, openai-api",
            "trackedAxes": [
              "audio"
            ],
            "trackedSpend": "317 runs; 254.3 min wall; 460.3 min CPU",
            "trackedRuns": 317,
            "trackedWallSeconds": 15258.692,
            "trackedCpuSeconds": 27618.62,
            "expectedLift10": 0.7,
            "expectedOverallLift10": 0.054,
            "investmentScore": 2.67,
            "gapToTarget": "+0.2",
            "summary": "high; cpu, model-api, openai-api",
            "valueCostRead": "Expected lift 0.7/10 on metric, 0.054/10 overall; investment score 2.67."
          }
        },
        {
          "rank": 3,
          "metric": "Alien entry and broad challenge-stage novelty",
          "explanation": {
            "calculation": "Composite proxy: 45% stage-opening timing fidelity, 35% stage-opening geometry fidelity, and 20% movement-grammar expansion until alien entry is promoted to its own scorer.",
            "grounding": "Best-case grounding will use reference and Aurora stage-entry contact sheets, rack timing traces, path-family labels, formation geometry, and early/mid/late level comparisons.",
            "meaning": "Players read the whole level from the first entry pattern. Designers use it to make stages feel authored, recognizable, and increasingly sophisticated before combat fully starts."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "composite proxy from opening timing, geometry, and movement grammar",
            "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
          },
          "score10": 8.2,
          "current": "8.2/10",
          "target": "7.5 first gate; 9.0+ mature",
          "status": "Dedicated long-cycle broad scorer; useful diagnostic but less strict than set-piece score",
          "why": "Regular-stage alien entry, challenge-stage trajectories, and new-alien introduction still need stronger reference grounding; this broad metric should not mask the stricter challenge-stage gap.",
          "effort": "High; long-cycle CPU/browser extraction plus reference contact-sheet and path-labeling pass",
          "next": "Attack Regular-entry geometry separation: Minimum regular geometry distance 0.083; mean regular geometry distance 0.127; closest pair mid-run-entry-variant / late-run-cleanup-or-failure.",
          "evidence": "reference-artifacts/analyses/alien-entry-challenge-variation/2026-05-16-82fd62cb/report.json",
          "costContext": {
            "costClass": "estimated",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "level-arc",
              "quality-score"
            ],
            "trackedSpend": "113 runs; 26.4 min wall; 34.9 min CPU",
            "trackedRuns": 113,
            "trackedWallSeconds": 1586.7530000000002,
            "trackedCpuSeconds": 2092.86,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "at target",
            "summary": "estimated; cpu, browser",
            "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
          }
        },
        {
          "rank": 4,
          "metric": "Level arc and encounter shape",
          "explanation": {
            "calculation": "Level arc is read from the level-arc conformance report, combining stage distinctiveness, challenge-stage identity, later-stage complexity, pressure curve, reward/rescue layering, and learning/mastery windows.",
            "grounding": "Best-case grounding is multi-stage reference evidence plus Aurora harness windows for stage families, challenge layers, stage signatures, pressure/loss windows, and persona progression.",
            "meaning": "Players feel whether the game grows, teaches, surprises, and rewards mastery over time. Designers use it to detect repetition, flat difficulty, or escalation without reward."
          },
          "scoreContext": {
            "confidence": "medium-high",
            "resolution": "multi-submetric level-arc report with stage families, challenge layers, pressure, reward, and persona evidence",
            "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
          },
          "score10": 8.8,
          "current": "8.8/10",
          "target": "8.8-9.0",
          "status": "Measured release category",
          "why": "Controls whether long play feels like Galaga-like escalation rather than repeated pressure.",
          "effort": "Medium-high; 2-5 hrs",
          "next": "Use the top-ranked opportunity window to add or widen deterministic evidence before changing gameplay tuning.",
          "evidence": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "low",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "level-arc",
              "conformance-loop"
            ],
            "trackedSpend": "162 runs; 24 min wall; 39.8 min CPU",
            "trackedRuns": 162,
            "trackedWallSeconds": 1441.8960000000002,
            "trackedCpuSeconds": 2389.19,
            "expectedLift10": 0.24,
            "expectedOverallLift10": 0.018,
            "investmentScore": 1.55,
            "gapToTarget": "at target",
            "summary": "low; cpu, browser",
            "valueCostRead": "Expected lift 0.24/10 on metric, 0.018/10 overall; investment score 1.55."
          }
        },
        {
          "rank": 4.5,
          "metric": "Direct target sprite and impact feedback conformance",
          "explanation": {
            "calculation": "Score is read from the latest generated conformance artifact for this metric or from the dashboard composite proxy when the metric has not yet been promoted to a dedicated scorer.",
            "grounding": "Best-case grounding is a canonical reference window or scorer-backed harness report with provenance, repeatable scenarios, and current Aurora comparison artifacts.",
            "meaning": "For a player or designer, this metric says whether this part of the experience feels intentional, readable, fair, and close to the Galaga-like target rather than merely functional."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "scorer-backed artifact with selected harness windows",
            "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
          },
          "score10": 5.97,
          "current": "6/10",
          "target": ">=5.5 before next beta claim; >=7.5 mature preview",
          "status": "Application artifact scorecard measured; strict direct target-crop row is intentionally sobering",
          "why": "The player-visible ship, enemy, hit, and explosion shapes are a first-glance arcade quality signal. Static sprite proxy scores must not hide the stricter target-crop gap.",
          "effort": "Medium-high; 2-5 hrs renderer/crop/harness work plus visual review",
          "next": "Raise direct sprite target score and impact/explosion feedback together; current impact/explosion static read is 5.8/10.",
          "evidence": "reference-artifacts/analyses/application-artifact-conformance/latest.json",
          "costContext": {
            "costClass": "estimated",
            "expectedResources": "cpu",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "at target",
            "summary": "estimated; cpu",
            "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
          }
        },
        {
          "rank": 5,
          "metric": "Boss entry and formation grammar",
          "explanation": {
            "calculation": "Boss/formation grammar is read from the dedicated formation-boss-grammar report, blending boss entry timing, boss/escort composition, formation settle evidence, challenge pattern identity, stage variation, and path-shape precision.",
            "grounding": "Current grounding comes from Aurora level-expansion event logs, trace summaries, stage-signature distance, runtime path/slot extraction, media-backed Galaga path-family labels when available, and the formation/boss grammar reference profile. Best-case grounding adds tracked Galaga boss/escort/challenge trajectories and rack slot coordinates.",
            "meaning": "Players feel whether each stage has recognizable arcade choreography: bosses enter with readable intent, escorts matter, formations settle convincingly, and challenge stages teach memorable set pieces."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "first-class boss/formation scorer using stage-window event grammar, boss timing, escort composition, challenge identity, and explicit path/slot measurement debt",
            "scoreMeaning": "Strong measured conformance with known remaining risk mostly in narrower edge cases, coverage, or polish."
          },
          "score10": 9.4,
          "current": "9.4/10",
          "target": "8.0-8.5 first gate; 9.0+ with path/slot extraction",
          "status": "Measured release category; new first-class axis",
          "why": "Boss entries, escorts, formation settling, and challenge set pieces are core Galaga choreography and directly affect whether stages feel authored.",
          "effort": "Medium-high; 2-5 hrs, then recurring low-cost guardrail",
          "next": "Label boss, escort, rack-settle, and challenge path families from Galaga reference contact sheets or video traces, then replace heuristic coverage with direct shape-distance scoring.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "high",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "level-arc",
              "conformance-loop"
            ],
            "trackedSpend": "162 runs; 24 min wall; 39.8 min CPU",
            "trackedRuns": 162,
            "trackedWallSeconds": 1441.8960000000002,
            "trackedCpuSeconds": 2389.19,
            "expectedLift10": 0.28,
            "expectedOverallLift10": 0.022,
            "investmentScore": 0.7,
            "gapToTarget": "at target",
            "summary": "high; cpu, browser",
            "valueCostRead": "Expected lift 0.28/10 on metric, 0.022/10 overall; investment score 0.7."
          }
        },
        {
          "rank": 6,
          "metric": "Overall visual look and feel: gameplay, start page, typography complexity",
          "explanation": {
            "calculation": "Current score is an estimated planning value until a dedicated visual conformance scorer lands; it is informed by UI shell checks, screenshots, contact sheets, and known visual debt.",
            "grounding": "Best-case grounding will compare reference and Aurora contact sheets across start, attract, gameplay, score, popup, and game-over surfaces with palette, typography, density, sprite readability, and layout checks.",
            "meaning": "Players decide at a glance whether the game feels like a polished arcade object. Designers use it to align readability, theme, typography, and visual hierarchy before subjective tuning."
          },
          "scoreContext": {
            "confidence": "medium-low",
            "resolution": "first-pass visual scorer when available; still needs reference-backed contact sheets and sprite/style sub-scorers",
            "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
          },
          "score10": 8.62,
          "current": "8.6/10",
          "target": "8.4-8.8",
          "status": "Measured visual scorer; medium-low confidence",
          "why": "A high score can still feel off if start text, density, contrast, alien readability, and arcade typography do not cohere.",
          "effort": "Medium; next pass should add reference-backed contact sheets and GPU/model-assisted review",
          "next": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap.",
          "evidence": "reference-artifacts/analyses/aurora-visual-look-conformance/2026-05-08-fee8820-dirty/report.json",
          "costContext": {
            "costClass": "medium",
            "expectedResources": "cpu, browser, gpu",
            "trackedAxes": [
              "visual-look"
            ],
            "trackedSpend": "1 runs; 0.1 min wall; 0.1 min CPU",
            "trackedRuns": 1,
            "trackedWallSeconds": 3.49,
            "trackedCpuSeconds": 3.43,
            "expectedLift10": 0.12,
            "expectedOverallLift10": 0.009,
            "investmentScore": 0.38,
            "gapToTarget": "at target",
            "summary": "medium; cpu, browser, gpu",
            "valueCostRead": "Expected lift 0.12/10 on metric, 0.009/10 overall; investment score 0.38."
          }
        },
        {
          "rank": 7,
          "metric": "Stage 4 pressure exact replay / pressure curve precision",
          "explanation": {
            "calculation": "Score is the current weakest level-arc pressure submetric, focused on whether known pressure/loss windows reproduce as exact or same-window replay events under controlled harness runs.",
            "grounding": "Best-case grounding is source pressure/loss windows, frozen seeds, replayable input paths, collision diagnostics, and repeated current-vs-source pressure curve comparisons.",
            "meaning": "Players should feel pressure that is learnable and fair, not random. Designers use it to tune threat density, dodge lanes, and failure recovery without creating arbitrary deaths."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "narrow pressure/loss replay windows; exact replay coverage still limited",
            "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
          },
          "score10": 6,
          "current": "6/10",
          "target": "8.2-8.6",
          "status": "Measured level-arc weak submetric",
          "why": "Pressure should be learnable and reproducible, not merely present in one run.",
          "effort": "Medium-high; prior runs ~12.8 min wall / 18.5 min CPU",
          "next": "Run focused source-window replay matching after the Stage 12 loop validates candidate mechanics.",
          "evidence": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "medium",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "stage4-pressure"
            ],
            "trackedSpend": "28 runs; 12.8 min wall; 18.5 min CPU",
            "trackedRuns": 28,
            "trackedWallSeconds": 769.422,
            "trackedCpuSeconds": 1111.38,
            "expectedLift10": 0.35,
            "expectedOverallLift10": 0.027,
            "investmentScore": 1.39,
            "gapToTarget": "+2.2",
            "summary": "medium; cpu, browser",
            "valueCostRead": "Expected lift 0.35/10 on metric, 0.027/10 overall; investment score 1.39."
          }
        },
        {
          "rank": 8,
          "metric": "Alien entry to levels: formation, timing, and methods",
          "explanation": {
            "calculation": "Composite proxy: 45% stage-opening timing fidelity, 35% stage-opening geometry fidelity, and 20% movement-grammar expansion until alien entry is promoted to its own scorer.",
            "grounding": "Best-case grounding will use reference and Aurora stage-entry contact sheets, rack timing traces, path-family labels, formation geometry, and early/mid/late level comparisons.",
            "meaning": "Players read the whole level from the first entry pattern. Designers use it to make stages feel authored, recognizable, and increasingly sophisticated before combat fully starts."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "composite proxy from opening timing, geometry, and movement grammar",
            "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
          },
          "score10": 10,
          "current": "10/10",
          "target": "9.0-9.4 with path and rack-slot scorer",
          "status": "Dedicated alien-entry submetric",
          "why": "Entry formations and rack timing are a first-order arcade authenticity signal before combat even starts.",
          "effort": "Medium; 1-3 hrs plus visual review",
          "next": "Raise regular-stage minimum signature distance and add stage-specific alien entry scripts before retuning broad level arc.",
          "evidence": "reference-artifacts/analyses/alien-entry-challenge-variation/2026-05-16-82fd62cb/report.json",
          "costContext": {
            "costClass": "estimated",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "level-arc",
              "quality-score"
            ],
            "trackedSpend": "113 runs; 26.4 min wall; 34.9 min CPU",
            "trackedRuns": 113,
            "trackedWallSeconds": 1586.7530000000002,
            "trackedCpuSeconds": 2092.86,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "at target",
            "summary": "estimated; cpu, browser",
            "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
          }
        },
        {
          "rank": 9,
          "metric": "Challenge-stage variation and new alien/formations introduction",
          "explanation": {
            "calculation": "Strict dedicated scorer reads each sampled challenging stage as its own set piece: no-shot/no-kill safety, Galaga reference vector fit, arrival geometry, alien-role semantics, active visual evidence, and durable stage-specific contracts. If that report is unavailable, the dashboard falls back to a challenge timing/identity/non-repetition proxy.",
            "grounding": "Current grounding is browser-backed Aurora challenge probes plus media-backed Galaga challenge labels/contact sheets. Best-case grounding adds more late-stage reference labels, tracked trajectories, active sprite-motion windows, bonus opportunity windows, and result feedback timing.",
            "meaning": "Players should experience challenge stages as learnable bonus set pieces that introduce new motion and scoring opportunities. Designers use it to prevent bonus rounds from becoming repetitive pauses."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "strict dedicated stage-by-stage challenge conformance report when available; fallback proxy uses challenge timing, challenge identity, and non-repetition",
            "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
          },
          "score10": 4.3,
          "current": "4.3/10",
          "target": "9.0-9.4 with dedicated scorer",
          "status": "Dedicated stage-by-stage challenge conformance report",
          "why": "Challenge stages should teach new motion/reward patterns, not only pause normal combat.",
          "effort": "Medium-high; 2-4 hrs",
          "next": "Close dedicated challenge-stage gaps: current challenge stages are functionally safe but not yet fully credible Galaga-like bonus exhibitions: strict movement is 4.4/10, strict graphics is 4.5/10, alien/stage novelty is 3.9/10, player shot opportunity is 5.4/10, target-video object-track fit is 3.5/10, and sprite-motion correspondence is 6.18/10 with target timing status frame-labeled-segmented-reference-windows. Diagnostic legacy coverage was 6.8/10, which is why the old read was too generous.",
          "evidence": "reference-artifacts/analyses/challenge-stage-conformance/2026-06-09-abb5c464/report.json",
          "costContext": {
            "costClass": "estimated",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "level-arc",
              "quality-score"
            ],
            "trackedSpend": "113 runs; 26.4 min wall; 34.9 min CPU",
            "trackedRuns": 113,
            "trackedWallSeconds": 1586.7530000000002,
            "trackedCpuSeconds": 2092.86,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "+4.7",
            "summary": "estimated; cpu, browser",
            "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
          }
        },
        {
          "rank": 10,
          "metric": "Progression and persona depth",
          "explanation": {
            "calculation": "Score is read from the progression/persona quality category, including persona safety checks, stage ordering, and whether different skill profiles see an appropriate ramp.",
            "grounding": "Best-case grounding comes from controlled persona runs, stage snapshots, loss/recovery traces, and reference-informed expectations for learning, mastery, and escalation.",
            "meaning": "Players should feel the game becoming harder for understandable reasons. Designers use it to keep novice, advanced, and expert experiences coherent across a long session."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "scorer-backed artifact with selected harness windows",
            "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
          },
          "score10": 8.4,
          "current": "8.4/10",
          "target": "9.1+",
          "status": "Measured release category",
          "why": "Keeps the game learnable across skill levels and supports later-stage quality.",
          "effort": "Low-medium; 1-2 hrs",
          "next": "Resolve remaining ordering edge case after higher-value audio/level-arc work.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "estimated",
            "expectedResources": "cpu",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "+0.7",
            "summary": "estimated; cpu",
            "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
          }
        },
        {
          "rank": 11,
          "metric": "Stage 1 opening timing fidelity",
          "explanation": {
            "calculation": "Score is read from the stage-1 opening timing category, comparing measured Aurora event timing against reference opening-window timing metrics and tolerances.",
            "grounding": "Best-case grounding is a canonical stage-1 reference window with event timestamps, Aurora controlled-clock captures, and delta reports for first entry, arrival, and first dive timing.",
            "meaning": "Players form their first feel judgment in the opening seconds. Designers use it to lock the initial rhythm before tuning deeper complexity."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "scorer-backed artifact with selected harness windows",
            "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
          },
          "score10": 8.5,
          "current": "8.5/10",
          "target": "8.8-9.2",
          "status": "Measured release category",
          "why": "First impression and direct reference feel.",
          "effort": "Low-medium; 1-2 hrs",
          "next": "Defer until higher-gap audio and level-arc candidates have been exercised.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "medium",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": 0.18,
            "expectedOverallLift10": 0.014,
            "investmentScore": 0.73,
            "gapToTarget": "+0.3",
            "summary": "medium; cpu, browser",
            "valueCostRead": "Expected lift 0.18/10 on metric, 0.014/10 overall; investment score 0.73."
          }
        },
        {
          "rank": 12,
          "metric": "Arcade console frame UI style",
          "explanation": {
            "calculation": "Current score uses the UI shell quality category as a proxy until the frame gets its own arcade-style rubric.",
            "grounding": "Best-case grounding will score cabinet rails, bezel proportions, button density, labels, build/date treatment, chroming, responsive fit, and visual consistency across local/dev/beta/prod surfaces.",
            "meaning": "Players experience the frame as the cabinet around every game. Designers use it to make the platform feel trustworthy, arcade-native, and not like a generic web wrapper."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "UI shell proxy; dedicated visual/modal rubric still needed",
            "scoreMeaning": "Strong measured conformance with known remaining risk mostly in narrower edge cases, coverage, or polish."
          },
          "score10": 9.2,
          "current": "9.2/10",
          "target": "9.4-9.6",
          "status": "Measured as UI shell; needs separate arcade-frame style rubric",
          "why": "The cabinet frame is the constant product surface around every game.",
          "effort": "Medium; 1-3 hrs visual QA",
          "next": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "medium",
            "expectedResources": "cpu, browser, gpu",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": 0.12,
            "expectedOverallLift10": 0.009,
            "investmentScore": 0.38,
            "gapToTarget": "+0.2",
            "summary": "medium; cpu, browser, gpu",
            "valueCostRead": "Expected lift 0.12/10 on metric, 0.009/10 overall; investment score 0.38."
          }
        },
        {
          "rank": 13,
          "metric": "Popup/help/scoring/leaderboard surface formatting",
          "explanation": {
            "calculation": "Current score uses the UI shell suite as a proxy until help, score, account, feedback, leaderboard, and game-over modals get a modal-specific scorer.",
            "grounding": "Best-case grounding will compare each modal surface for layout, typography, arcade tone, score clarity, keyboard/controller ergonomics, and no-overlap responsive behavior.",
            "meaning": "Players rely on these screens to understand scoring, recover from a run, file feedback, and trust records. Designers use it to keep utility surfaces polished without breaking arcade immersion."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "UI shell proxy; dedicated visual/modal rubric still needed",
            "scoreMeaning": "Strong measured conformance with known remaining risk mostly in narrower edge cases, coverage, or polish."
          },
          "score10": 9.2,
          "current": "9.2/10",
          "target": "9.4-9.6",
          "status": "Measured through UI shell suite; needs modal-specific scoring",
          "why": "Popup surfaces carry learning, scoring trust, feedback, and player records.",
          "effort": "Low-medium; 1-2 hrs",
          "next": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "medium",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": 0.12,
            "expectedOverallLift10": 0.009,
            "investmentScore": 0.38,
            "gapToTarget": "+0.2",
            "summary": "medium; cpu, browser",
            "valueCostRead": "Expected lift 0.12/10 on metric, 0.009/10 overall; investment score 0.38."
          }
        },
        {
          "rank": 14,
          "metric": "Dive fairness and safety",
          "explanation": {
            "calculation": "Score is read from the dive-safety quality category and associated harness checks for unfair collision, lane, and persona safety regressions.",
            "grounding": "Best-case grounding is repeated persona/seed sweeps, collision windows, near-miss traces, and pressure diagnostics after every risky movement or threat change.",
            "meaning": "Players accept hard deaths when they feel earned. Designers use it as a guardrail so added pressure does not become unfairness."
          },
          "scoreContext": {
            "confidence": "medium-high",
            "resolution": "seed/persona safety guardrails and pressure-sensitive collision checks",
            "scoreMeaning": "Strong measured conformance with known remaining risk mostly in narrower edge cases, coverage, or polish."
          },
          "score10": 9.1,
          "current": "9.1/10",
          "target": "9.3+",
          "status": "Measured release category",
          "why": "Protects user trust while pressure is increased.",
          "effort": "Guardrail; 30-90 min per risky gameplay cycle",
          "next": "Keep as required guardrail for pressure/reward changes.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "guardrail",
            "expectedResources": "cpu",
            "trackedAxes": [
              "stage4-pressure",
              "quality-score"
            ],
            "trackedSpend": "44 runs; 30.5 min wall; 38.9 min CPU",
            "trackedRuns": 44,
            "trackedWallSeconds": 1827.5230000000001,
            "trackedCpuSeconds": 2335.11,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "+0.2",
            "summary": "guardrail; cpu",
            "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
          }
        },
        {
          "rank": 15,
          "metric": "Player movement conformance",
          "explanation": {
            "calculation": "Score is read from the player-movement correspondence category, comparing movement traces and control response against the reference-derived movement target.",
            "grounding": "Best-case grounding is reference movement traces, Aurora controlled input traces, speed/position deltas, and regression checks across viewport and persona modes.",
            "meaning": "Players feel this as the basic trust in the ship. Designers use it as a do-not-regress foundation for every other gameplay improvement."
          },
          "scoreContext": {
            "confidence": "high-current-pass",
            "resolution": "reference trace plus controlled movement harness checks; expert micro-feel can still exceed scorer resolution",
            "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
          },
          "score10": 10,
          "current": "10/10",
          "target": "Maintain 10",
          "status": "Measured release category",
          "why": "Core control feel is already excellent.",
          "effort": "Guardrail only",
          "next": "Do not tune unless a new reference metric proves a gap.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "guardrail",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "at target",
            "summary": "guardrail; cpu, browser",
            "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
          }
        },
        {
          "rank": 16,
          "metric": "Shot and hit responsiveness",
          "explanation": {
            "calculation": "Score is read from the combat-responsiveness category, covering shot timing, hit registration, close-shot behavior, and event feedback guardrails.",
            "grounding": "Best-case grounding is controlled shot/hit scenarios, close-contact tests, event logs, and paired visual/audio feedback timing.",
            "meaning": "Players need shots and impacts to feel immediate and legible. Designers use it to protect core combat feel while improving explosions and sound semantics."
          },
          "scoreContext": {
            "confidence": "high-current-pass",
            "resolution": "functional combat-response guardrails; audiovisual semantics are scored separately",
            "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
          },
          "score10": 10,
          "current": "10/10",
          "target": "Maintain 10",
          "status": "Measured release category",
          "why": "Core combat response is already excellent.",
          "effort": "Guardrail only",
          "next": "Protect during explosion/audio/event feedback work.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "guardrail",
            "expectedResources": "cpu",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "at target",
            "summary": "guardrail; cpu",
            "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
          }
        },
        {
          "rank": 17,
          "metric": "Stage 1 opening geometry fidelity",
          "explanation": {
            "calculation": "Score is read from the stage-1 opening geometry category, comparing formation layout and opening positions against the reference geometry target.",
            "grounding": "Best-case grounding is reference contact sheets, Aurora opening captures, formation coordinate traces, and geometry tolerance checks.",
            "meaning": "Players read formation authenticity before they consciously notice details. Designers use it as the locked baseline for alien-entry and formation work."
          },
          "scoreContext": {
            "confidence": "high-current-pass",
            "resolution": "opening formation geometry checks; later-stage entry variation is separate",
            "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
          },
          "score10": 10,
          "current": "10/10",
          "target": "Maintain 10",
          "status": "Measured release category",
          "why": "Formation geometry is already locked.",
          "effort": "Guardrail only",
          "next": "Protect during alien-entry visual work.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "guardrail",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "at target",
            "summary": "guardrail; cpu, browser",
            "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
          }
        },
        {
          "rank": 18,
          "metric": "Capture and rescue rule fidelity",
          "explanation": {
            "calculation": "Score is read from the capture/rescue quality category, including capture, no-leak, rescue, and rule-boundary harness checks.",
            "grounding": "Best-case grounding is reference capture/rescue behavior, controlled Aurora scenarios, event logs, state assertions, and score/reward feedback traces.",
            "meaning": "Players see capture and rescue as a signature Galaga risk/reward mechanic. Designers use it as a hard identity guardrail while improving feedback and scoring opportunities."
          },
          "scoreContext": {
            "confidence": "high-current-pass",
            "resolution": "rule/state harness checks; feedback clarity and reward feel are separate",
            "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
          },
          "score10": 10,
          "current": "10/10",
          "target": "Maintain 10",
          "status": "Measured release category",
          "why": "Strong Galaga identity mechanic; should not regress while feedback improves.",
          "effort": "Guardrail only",
          "next": "Use as release blocker for capture/rescue-adjacent audio or explosion changes.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "guardrail",
            "expectedResources": "cpu",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "at target",
            "summary": "guardrail; cpu",
            "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
          }
        },
        {
          "rank": 19,
          "metric": "Challenge-stage timing fidelity",
          "explanation": {
            "calculation": "Score is read from the challenge-stage timing category, comparing Aurora challenge entry, transition, result, and timing metrics against reference tolerances.",
            "grounding": "Best-case grounding is reference challenge-stage timing windows, Aurora controlled-clock scenarios, result feedback traces, and pass/fail delta reports.",
            "meaning": "Players need bonus stages to feel rhythmic and fair. Designers use this as the timing guardrail while adding more challenge-stage variety."
          },
          "scoreContext": {
            "confidence": "high-current-pass",
            "resolution": "challenge timing deltas within tolerance; variation and teaching value are separate",
            "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
          },
          "score10": 10,
          "current": "10/10",
          "target": "Maintain 9.8+",
          "status": "Measured release category",
          "why": "Timing is strong; variation is the gap, not baseline timing.",
          "effort": "Guardrail only",
          "next": "Preserve while adding challenge variation scoring.",
          "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
          "costContext": {
            "costClass": "guardrail",
            "expectedResources": "cpu, browser",
            "trackedAxes": [
              "quality-score"
            ],
            "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
            "trackedRuns": 16,
            "trackedWallSeconds": 1058.101,
            "trackedCpuSeconds": 1223.73,
            "expectedLift10": null,
            "expectedOverallLift10": null,
            "investmentScore": null,
            "gapToTarget": "at target",
            "summary": "guardrail; cpu, browser",
            "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
          }
        }
      ],
      "economicsSummary": {
        "latestOverallScore10": 8.8,
        "latestLevelArcScore10": 8.8,
        "metricPointCount": 1276,
        "deltaCount": 138,
        "measuredRuns": 984,
        "wallSeconds": 58850.295,
        "cpuSeconds": 59494.99,
        "artifactBytes": 1512759390,
        "computeApplication": {
          "gpuUseByPurpose": [
            {
              "purpose": "Audio conformance and cue feedback",
              "runs": 9,
              "wallSeconds": 14143.688,
              "wallMinutes": 235.73,
              "cpuSeconds": 71.4,
              "share": 0.6444,
              "sharePercent": 64.4,
              "examples": [
                "Challenge Perfect candidate audio sweep for measured centroid/risk reduction",
                "Challenge Perfect second-pass candidate sweep with gated keeper selection",
                "Challenge Perfect isolated candidate sweep after capture hardening"
              ],
              "interpretation": "Moves the moment-to-moment arcade feel: impact clarity, ambience identity, reward/loss feedback, and player understanding."
            },
            {
              "purpose": "Gameplay behavior and level complexity",
              "runs": 1,
              "wallSeconds": 4500,
              "wallMinutes": 75,
              "cpuSeconds": 0,
              "share": 0.205,
              "sharePercent": 20.5,
              "examples": [
                "Model-assisted 10-step sprite, impact-feedback, challenge-contract, documentation, and validation cycle; manual GPU-equivalent estimate for Codex planning/code review/interpretation."
              ],
              "interpretation": "Moves player-facing pressure, stage shape, alien entry novelty, challenge-stage learning value, and long-play texture."
            },
            {
              "purpose": "Dashboard, docs, and release planning",
              "runs": 2,
              "wallSeconds": 3300,
              "wallMinutes": 55,
              "cpuSeconds": 0,
              "share": 0.1504,
              "sharePercent": 15,
              "examples": [
                "manual estimate: model-assisted CPU/GPU economics dashboard design and implementation",
                "model-assisted self-critical conformance investment review, docs/dashboard wiring, and recommendations"
              ],
              "interpretation": "Moves decision quality: what to invest in next, how to explain releases, and how to keep dev/beta/prod evidence aligned."
            },
            {
              "purpose": "Visual and video reference analysis",
              "runs": 1,
              "wallSeconds": 3.49,
              "wallMinutes": 0.06,
              "cpuSeconds": 3.43,
              "share": 0.0002,
              "sharePercent": 0,
              "examples": [
                "First Aurora visual look conformance scorer baseline"
              ],
              "interpretation": "Moves graphical identity, reference inspection, contact-sheet review, sprite/surface comparison, and readability."
            }
          ],
          "cpuUseByPurpose": [
            {
              "purpose": "Audio conformance and cue feedback",
              "runs": 519,
              "wallSeconds": 30285.053,
              "wallMinutes": 504.75,
              "cpuSeconds": 48699.53,
              "share": 0.8196,
              "sharePercent": 82,
              "examples": [
                "Stage4 lane2 after bounded column5 cue",
                "Stage4 lane2 source-exact scenario after cue",
                "Stage4 loss windows after lane2 cue"
              ],
              "interpretation": "Moves the moment-to-moment arcade feel: impact clarity, ambience identity, reward/loss feedback, and player understanding."
            },
            {
              "purpose": "Gameplay behavior and level complexity",
              "runs": 425,
              "wallSeconds": 6025.98,
              "wallMinutes": 100.43,
              "cpuSeconds": 10053.99,
              "share": 0.1631,
              "sharePercent": 16.3,
              "examples": [
                "Stage4 lane2 action precision baseline",
                "Stage4 lane2 action precision baseline",
                "Stage4 lane2 action precision after harness fix"
              ],
              "interpretation": "Moves player-facing pressure, stage shape, alien entry novelty, challenge-stage learning value, and long-play texture."
            },
            {
              "purpose": "Harness, ingestion, and assessment logic",
              "runs": 9,
              "wallSeconds": 610.144,
              "wallMinutes": 10.17,
              "cpuSeconds": 704.93,
              "share": 0.0165,
              "sharePercent": 1.7,
              "examples": [
                "quality score after cadence generator no-keeper cycle",
                "quality score after pincer/crown geometry pass",
                "loss reward rollback quality score"
              ],
              "interpretation": "Moves reusable automation: scorers, artifact extraction, candidate loops, measurement confidence, and future game ingestion."
            },
            {
              "purpose": "Visual and video reference analysis",
              "runs": 18,
              "wallSeconds": 27.559,
              "wallMinutes": 0.46,
              "cpuSeconds": 35.07,
              "share": 0.0007,
              "sharePercent": 0.1,
              "examples": [
                "First Aurora visual look conformance scorer baseline",
                [
                  "npm",
                  "run",
                  "harness:analyze:aurora-runtime-sprite-conformance"
                ],
                [
                  "npm",
                  "run",
                  "harness:analyze:aurora-runtime-vs-galaga-target-crops"
                ]
              ],
              "interpretation": "Moves graphical identity, reference inspection, contact-sheet review, sprite/surface comparison, and readability."
            },
            {
              "purpose": "Dashboard, docs, and release planning",
              "runs": 6,
              "wallSeconds": 1.559,
              "wallMinutes": 0.03,
              "cpuSeconds": 1.47,
              "share": 0,
              "sharePercent": 0,
              "examples": [
                "quick-ledger-smoke",
                "dashboard refresh after conformance economics update",
                "local dev dashboard refresh after economics update"
              ],
              "interpretation": "Moves decision quality: what to invest in next, how to explain releases, and how to keep dev/beta/prod evidence aligned."
            }
          ],
          "gameplayImprovementByPart": [
            {
              "part": "Gameplay complexity and stage arc",
              "positiveScore10": 47.4,
              "share": 0.7418,
              "sharePercent": 74.2,
              "axes": [
                "quality:challenge-timing",
                "quality:stage1-timing",
                "stage4-pressure-collision-diagnostic-coverage",
                "stage4-pressure-exact-replay-coverage",
                "stage-signature-distance",
                "quality:formation-boss-grammar",
                "level-arc:stage-distinctiveness",
                "level-arc:movement-grammar-expansion"
              ],
              "interpretation": "Player-perceived variety, pressure, alien choreography, challenge-stage novelty, and long-play learning curve."
            },
            {
              "part": "Core mechanics and control feel",
              "positiveScore10": 7.9,
              "share": 0.1236,
              "sharePercent": 12.4,
              "axes": [
                "quality:movement"
              ],
              "interpretation": "Player-perceived fairness, responsiveness, collision quality, and trust in combat outcomes."
            },
            {
              "part": "Audio feedback and event clarity",
              "positiveScore10": 5.8,
              "share": 0.0908,
              "sharePercent": 9.1,
              "axes": [
                "quality:audio"
              ],
              "interpretation": "Player-perceived clarity from sounds that explain danger, reward, loss, and arcade identity."
            },
            {
              "part": "Overall release-quality rollup",
              "positiveScore10": 2.8,
              "share": 0.0438,
              "sharePercent": 4.4,
              "axes": [
                "overall-quality"
              ],
              "interpretation": "Composite release score movement that reflects several subsystems at once."
            }
          ],
          "totals": {
            "gpuEquivalentWallSeconds": 21947.178,
            "cpuLocalWallSeconds": 36950.295,
            "positiveScore10Attributed": 63.9
          },
          "limitations": [
            "GPU-equivalent accounting includes declared Codex/OpenAI/model/API/GPU usage and manual ledger entries. The repo cannot automatically read every Codex chat token or quota draw.",
            "Impact attribution is best-effort. It groups positive score movement by conformance axis, not a controlled causal experiment.",
            "A harness or documentation gain may improve future decision quality without immediately moving a gameplay score."
          ]
        },
        "charts": [
          "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/score-trends.svg",
          "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/largest-score-deltas.svg",
          "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/compute-minutes-by-resource.svg",
          "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/cost-per-positive-score-point.svg",
          "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/gpu-equivalent-use-by-purpose.svg",
          "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/cpu-use-by-purpose.svg",
          "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/gameplay-improvement-by-project-part.svg"
        ],
        "retrospective": {
          "generatedAt": "2026-05-18T12:43:17.737Z",
          "executiveRead": "The past focused block substantially improved our honesty and repeatability, but only modestly improved player-facing conformance. Challenge stages are now scored with a strict 1/10 baseline and have risen to 3.8/10; that is a real improvement from the strict 2.5/10 baseline, but still far from human-level Galaga conformance. The biggest remaining failures are movement grammar, alien novelty, stage-to-stage challenge progression, and stable audio runtime promotion.",
          "workWindow": {
            "label": "Estimated focused conformance work block",
            "estimatedHours": 10,
            "basis": "Committed artifacts, challenge-stage history, quality score history, dashboard cost context, candidate sweep reports, and current audio/application artifacts. Some Codex/model/human orchestration time is estimated because it was not automatically logged."
          },
          "movedMost": [
            "Strict challenge-stage truth improved: the old broad read around 6.1/10 was replaced by a stricter baseline at 2.5/10, then recovered to 3.8/10. This is progress, but it is mostly better measurement plus partial graphical evidence, not a solved gameplay problem.",
            "Challenge graphical conformance moved from 2.1/10 to 4.3/10 after object-track/static visual evidence landed.",
            "Overall quality under the strict challenge metric moved only 8.7/10 -> 8.8/10, which is an honest signal that the user-visible game has not leapt forward as much as the harness did."
          ],
          "movedLeast": [
            "Alien novelty remains 3.4/10; it did not materially move during the focused block.",
            "Stage-to-stage challenge progression remains 3/10; late challenges still do not yet read as distinct Galaga-like lessons.",
            "Challenge movement conformance is only 3.4/10 and has plateaued relative to the amount of analysis effort.",
            "Audio runtime promotion is still zero accepted cues even though cue contracts and candidate loops improved the process."
          ],
          "failurePatterns": [
            "Challenge-stage layout sweeps are too shallow for the real problem. The gap is trajectory grammar, entry/exit choreography, alien-family staging, and temporal sprite motion, not just spawn timing and lane offsets.",
            "Audio candidate loops are optimizing isolated clips faster than they are improving full-theme live capture. Reference-vs-reference calibration and repeated full-theme stability gates must come before more runtime promotion.",
            "Some graphics artifacts are useful to the harness but not useful enough to a human reviewer. Dense contact sheets and tiny \"view larger\" images need to become stage-by-stage temporal crop strips and object-track overlays.",
            "The economics ledger still undercounts Codex/model/human orchestration time. The charts correctly show measured local CPU/browser spend, but cloud/model work is only visible when manually logged.",
            "High broad scores can mask low strict scores. The broad alien/challenge novelty score is useful context, but the strict challenge-stage set-piece score is the one that matches the human complaint."
          ],
          "recommendations": [
            "Make challenge-stage path grammar the next primary gameplay investment: define per-challenge contracts for group order, first-visible frame, path length, turn count, exit side, alien family, animation phases, and bonus-shot opportunity.",
            "Build direct target object tracks from the supplied Galaga challenge videos and compare Aurora tracks against those trajectories before authoring another large sweep.",
            "Replace dense challenge contact sheets in the human docs with larger expandable crop sequences: reference target strip, Aurora current strip, object-track overlay, and per-axis score.",
            "Freeze audio runtime promotion until reference-vs-reference and current-vs-current variance is known for challengePerfect, challengeTransition, gameOver, captureBeam, and stagePulse.",
            "Log every multi-hour cycle with `npm run harness:measure`, and add a manual GPU-equivalent Codex entry whenever model work materially designs, interprets, or changes the harness.",
            "Treat the next beta justification as requiring visible player-facing lift in challenge movement/novelty or audio clarity, not just more documentation or scorer sophistication."
          ],
          "metricMovements": [
            {
              "id": "challenge-set-piece",
              "label": "Challenge-stage strict conformance",
              "startScore10": 2.5,
              "currentScore10": 3.8,
              "delta10": 1.3,
              "progressClass": "advanced",
              "playerMeaning": "Highest-priority gameplay authenticity gap.",
              "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "challenge-interest",
              "label": "Challenge-stage interesting factor",
              "startScore10": 2.6,
              "currentScore10": 3.8,
              "delta10": 1.2,
              "progressClass": "advanced",
              "playerMeaning": "Bonus stages should feel authored and exciting, not merely safe.",
              "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "challenge-movement",
              "label": "Challenge movement / trajectory conformance",
              "startScore10": 2.3,
              "currentScore10": 3.4,
              "delta10": 1.1,
              "progressClass": "advanced",
              "playerMeaning": "True alien path grammar and motion shape.",
              "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "challenge-graphics",
              "label": "Challenge graphical conformance",
              "startScore10": 2.1,
              "currentScore10": 4.3,
              "delta10": 2.2,
              "progressClass": "advanced",
              "playerMeaning": "Visible alien/sprite/readability fit against target challenge artifacts.",
              "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "challenge-alien-novelty",
              "label": "Challenge alien novelty",
              "startScore10": 3.4,
              "currentScore10": 3.4,
              "delta10": 0,
              "progressClass": "stalled",
              "playerMeaning": "Whether later challenges introduce memorable alien families and roles.",
              "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "challenge-progression",
              "label": "Challenge stage-to-stage progression",
              "startScore10": 2.8,
              "currentScore10": 3,
              "delta10": 0.2,
              "progressClass": "nudged",
              "playerMeaning": "Whether the eight challenge stages escalate as distinct lessons.",
              "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "challenge-shot-opportunity",
              "label": "Challenge scoring/shot opportunity",
              "startScore10": null,
              "currentScore10": 5.1,
              "delta10": null,
              "progressClass": "stalled",
              "playerMeaning": "Whether players get clear, learnable bonus-shot routes.",
              "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "challenge-safety",
              "label": "Challenge no-combat safety guardrail",
              "startScore10": 10,
              "currentScore10": 10,
              "delta10": 0,
              "progressClass": "guardrail",
              "playerMeaning": "No enemy shots, no attack starts, no ship deaths in challenge windows.",
              "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "overall-quality",
              "label": "Overall quality rollup after strict challenge metric",
              "startScore10": 8.7,
              "currentScore10": 8.8,
              "delta10": 0.1,
              "progressClass": "nudged",
              "playerMeaning": "Release score moved only slightly because the stricter challenge metric exposed a large gap.",
              "startReport": "reference-artifacts/analyses/quality-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/quality-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "audio-release-category",
              "label": "Audio release-category read",
              "startScore10": 7,
              "currentScore10": 7.3,
              "delta10": 0.3,
              "progressClass": "mixed",
              "playerMeaning": "The release score nudged upward, but accepted runtime cue promotion remains blocked by full-theme instability.",
              "startReport": "reference-artifacts/analyses/quality-conformance/2026-05-17-edf46536/report.json",
              "currentReport": "reference-artifacts/analyses/quality-conformance/2026-05-18-6d76050d/report.json"
            },
            {
              "id": "audio-runtime-promotion",
              "label": "Audio runtime promotion success",
              "startScore10": 0,
              "currentScore10": 0,
              "delta10": 0,
              "progressClass": "stalled",
              "playerMeaning": "No candidate is accepted into runtime audio yet; the process improved, the shipped sound did not meaningfully move from these candidates.",
              "currentReport": "reference-artifacts/analyses/aurora-audio-conformance-lab-v2/latest.json"
            },
            {
              "id": "runtime-sprite-static",
              "label": "Live runtime static sprite conformance",
              "startScore10": null,
              "currentScore10": 6.19,
              "delta10": null,
              "progressClass": "known-gap",
              "playerMeaning": "Static runtime sprite identity is measured around 6/10 and active motion is still a planning row, so graphics remain visually incomplete.",
              "currentReport": "reference-artifacts/analyses/application-artifact-conformance/latest.json"
            }
          ],
          "resourceRead": {
            "dashboardChallengeSpend": {
              "runs": 161,
              "wallMinutes": 24,
              "cpuMinutes": 39.7,
              "label": "161 runs; 24 min wall; 39.7 min CPU"
            },
            "dashboardAudioSpend": {
              "runs": 309,
              "wallMinutes": 253.7,
              "cpuMinutes": 459.4,
              "label": "309 runs; 253.7 min wall; 459.4 min CPU"
            },
            "challengeCurrentScore10": 3.8,
            "audioCurrentScore10": 7.3,
            "estimatedUnloggedHours": 10,
            "accountingDebt": "Recent repo work includes merge/review/documentation and model-assisted reasoning that is not fully represented in the measured run ledger. Treat cost charts as a lower bound until manual Codex/model entries are logged per work cycle.",
            "dashboardSource": "reference-artifacts/analyses/release-conformance-dashboard/latest.json"
          },
          "deepLinks": [
            {
              "label": "Local Cost / Value dashboard",
              "href": "http://127.0.0.1:4312/local-dev/conformance-dashboard.html?game=aurora-galactica#cost"
            },
            {
              "label": "Hosted dev Cost / Value dashboard",
              "href": "https://sgwoods.github.io/Aurora-Galactica/dev/conformance-dashboard.html?game=aurora-galactica#cost"
            },
            {
              "label": "Hosted dev conformance dashboard",
              "href": "https://sgwoods.github.io/Aurora-Galactica/dev/conformance-dashboard.html?game=aurora-galactica#conformance"
            },
            {
              "label": "Project guide retrospective section",
              "href": "project-guide.html#conformance-investment-retrospective-doc"
            }
          ],
          "charts": [
            "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/score-movement-critical-axes.svg",
            "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/largest-human-conformance-gaps.svg",
            "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/spend-versus-confidence-gaps.svg"
          ],
          "sourceReport": "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/report.json"
        }
      },
      "ingestionSummary": {
        "sourceFamilyCount": 16,
        "highConfidenceCount": 10,
        "mixedOrLowConfidenceCount": 2,
        "scoredOrPromotedCount": 11,
        "nextBestUpgrade": "Add Galaga-family visual contact-sheet comparison, sprite readability labels, and model-assisted visual critique.",
        "framing": "Ingestion turns reference media and Aurora runtime captures into repeatable evidence: clips, contact sheets, traces, event logs, labels, scores, confidence, and next missing annotations."
      },
      "ingestionRows": [
        {
          "rank": 1,
          "source": "Galaga-family reference audio clips",
          "axis": "audio identity / event feedback",
          "artifactType": "reference m4a cue clips",
          "coverage": "0 clips",
          "annotationStatus": "clipped, mapped, partially scored",
          "confidence": "medium-high",
          "linkedMetric": "Audio identity, event feedback, and cue alignment",
          "anchor": "src/assets/reference-audio",
          "next": "Add finer event labels for explosion, impact, boss damage, immunity/entry, capture, and rescue semantics."
        },
        {
          "rank": 2,
          "source": "Aurora audio cue comparison and event-gap reports",
          "axis": "audio cue scoring",
          "artifactType": "waveform/spectral/alignment/semantic reports",
          "coverage": "21 compared cues; semantic 9.78/10; 0 attention rows",
          "annotationStatus": "semantic-scored",
          "confidence": "medium-high",
          "linkedMetric": "Audio identity, event feedback, and cue alignment",
          "anchor": "reference-artifacts/analyses/aurora-audio-event-gap/2026-05-16-08c327dd-dirty-155303/report.json",
          "next": "Tune the highest segment-level gap next: challengePerfect onset. Rerun audio comparison and event-gap analysis after the change."
        },
        {
          "rank": 3,
          "source": "Aurora Audio Conformance Lab v2",
          "axis": "audio candidate loop / family promotion decisions",
          "artifactType": "cue-family risk, candidate history, keeper decision, promotion gate",
          "coverage": "8/8 target cues swept; 2 keeper candidates tracked; runtime promotions 0; rejected runtime trials 3",
          "annotationStatus": "family-scored",
          "confidence": "medium-high",
          "linkedMetric": "Audio identity, event feedback, and cue alignment",
          "anchor": "reference-artifacts/analyses/aurora-audio-conformance-lab-v2/2026-05-17-f9e7374c-dirty/report.json",
          "next": "challengePerfect: Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together."
        },
        {
          "rank": 4,
          "source": "Aurora audio cue contracts",
          "axis": "audio semantic contract / theme latitude / promotion safety",
          "artifactType": "cue contract readiness, theme lanes, runtime-trial blockers",
          "coverage": "8 contracts; readiness 9.1/10; blocked 7; rejected trials n/a",
          "annotationStatus": "contract-scored",
          "confidence": "medium-high",
          "linkedMetric": "Audio identity, event feedback, and cue alignment",
          "anchor": "reference-artifacts/analyses/aurora-audio-cue-contracts/2026-05-11-b83393cd-dirty-201628/report.json",
          "next": "Keep the calibrated layered playerHit runtime cue; next either refine the residual playerHit tail/body gap with the same calibrated scorer or move effort to stagePulse pressure-bed strategy if user impact per compute looks higher."
        },
        {
          "rank": 5,
          "source": "Aurora audio runtime trial decisions",
          "axis": "audio promotion evidence / release guardrails",
          "artifactType": "accepted, rejected, and inconclusive live runtime-trial outcomes",
          "coverage": "challengePerfect runtime-trial-rejected; candidate perfect-clean-onset-soft-tail",
          "annotationStatus": "trial-recorded",
          "confidence": "medium-high",
          "linkedMetric": "Audio identity, event feedback, and cue alignment",
          "anchor": "reference-artifacts/analyses/aurora-audio-runtime-trials/2026-05-17-f9e7374c-dirty-123945-challenge-perfect-rejected/report.json",
          "next": "Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together."
        },
        {
          "rank": 6,
          "source": "Aurora audio risk stability",
          "axis": "audio measurement stability / promotion confidence",
          "artifactType": "repeated event-gap volatility report",
          "coverage": "8 reports; 19 volatile cues; most volatile captureBeam 3.89/10 range",
          "annotationStatus": "stability-scored",
          "confidence": "medium-high",
          "linkedMetric": "Audio identity, event feedback, and cue alignment",
          "anchor": "reference-artifacts/analyses/aurora-audio-risk-stability/2026-05-17-f9e7374c-dirty-124419/report.json",
          "next": "Use median/repeated confirmation before promoting audio changes. Start by stabilizing captureBeam scoring, then retest challengePerfect."
        },
        {
          "rank": 7,
          "source": "Aurora audio promotion stability gate",
          "axis": "audio promotion safety / variance-aware gating",
          "artifactType": "candidate, precheck, event-gap, and stability join",
          "coverage": "3 cues; 0 runtime trials allowed; 3 stability rejections",
          "annotationStatus": "variance-gated",
          "confidence": "medium-high",
          "linkedMetric": "Audio identity, event feedback, and cue alignment",
          "anchor": "reference-artifacts/analyses/aurora-audio-promotion-stability-gate/2026-05-17-f9e7374c-dirty-125733/report.json",
          "next": "Do not promote challengePerfect. Preserve the candidate/precheck evidence and either stabilize measurement or generate a candidate whose full-theme win exceeds the current stability threshold."
        },
        {
          "rank": 8,
          "source": "Aurora audio strategy review",
          "axis": "audio conformance strategy / failure analysis",
          "artifactType": "diagnosis, revised strategy, and next calibration experiment",
          "coverage": "5 diagnoses; 6 strategy changes; next challengePerfect",
          "annotationStatus": "strategy-reviewed",
          "confidence": "medium-high",
          "linkedMetric": "Audio identity, event feedback, and cue alignment",
          "anchor": "reference-artifacts/analyses/aurora-audio-strategy-review/2026-05-17-f9e7374c-dirty-125741/report.json",
          "next": "Before any more runtime audio promotion, build a calibration pass that captures Galaga reference cues through the same browser path twice and measures reference-vs-reference, current-vs-current, and current-vs-reference variance for challengePerfect, challengeTransition, gameOver, captureBeam, and stagePulse."
        },
        {
          "rank": 9,
          "source": "Aurora stagePulse cadence pressure analysis",
          "axis": "formation pressure / cadence audio",
          "artifactType": "tracked cadence pressure axes from full audio comparison",
          "coverage": "pressure 2.7/10; weakest brightness-control",
          "annotationStatus": "scored",
          "confidence": "medium-high",
          "linkedMetric": "Audio identity, event feedback, and cue alignment",
          "anchor": "reference-artifacts/analyses/aurora-stage-pulse-cadence/2026-05-15-93dbdad8-dirty/report.json",
          "next": "Add a cadence-specific candidate generator that jointly optimizes low-band body, brightness control, zero-crossing calm, and gain. Promote only after both repeated focus gates and full audio-theme comparison improve."
        },
        {
          "rank": 10,
          "source": "Boss entry and formation grammar scorer",
          "axis": "formation grammar / boss entry / challenge identity",
          "artifactType": "event grammar, timing, stage-signature, and measurement-debt report",
          "coverage": "11 boss/formation windows",
          "annotationStatus": "scored",
          "confidence": "medium",
          "linkedMetric": "Boss entry and formation grammar",
          "anchor": "reference-artifacts/analyses/formation-boss-grammar-conformance/2026-05-24-ff249bba/report.json",
          "next": "Promote frame-level boss/escort path traces and formation rack slot coordinates so visual choreography can be scored directly."
        },
        {
          "rank": 11,
          "source": "Level arc and encounter-shape evidence",
          "axis": "level arc / challenge / reward",
          "artifactType": "stage signatures, pressure windows, persona reports",
          "coverage": "6/6 stage families; 11/6 evidence windows",
          "annotationStatus": "scored",
          "confidence": "medium-high",
          "linkedMetric": "Level arc and encounter shape",
          "anchor": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json",
          "next": "Add more long-play reference windows and expert-route scoring for challenge/reward opportunities."
        },
        {
          "rank": 12,
          "source": "Stage 4 pressure and loss-window diagnostics",
          "axis": "pressure / fairness",
          "artifactType": "loss windows, replay geometry, collision traces",
          "coverage": "3 promoted windows",
          "annotationStatus": "mined, replay-diagnostic",
          "confidence": "medium",
          "linkedMetric": "Stage 4 pressure exact replay / pressure curve precision",
          "anchor": "reference-artifacts/analyses/aurora-stage4-loss-windows/2026-05-07-fb2f674/report.json",
          "next": "Improve exact replay matching and preserve per-frame attacker/player/shot geometry for candidate tuning."
        },
        {
          "rank": 13,
          "source": "Aurora visual look screenshots",
          "axis": "visual look / UI readability",
          "artifactType": "browser screenshots plus DOM/canvas metrics",
          "coverage": "4 surfaces",
          "annotationStatus": "first-pass scored",
          "confidence": "medium-low",
          "linkedMetric": "Overall visual look and feel",
          "anchor": "reference-artifacts/analyses/aurora-visual-look-conformance/2026-05-08-fee8820-dirty/report.json",
          "next": "Add Galaga-family visual contact-sheet comparison, sprite readability labels, and model-assisted visual critique."
        },
        {
          "rank": 14,
          "source": "Aurora evidence-cycle windows",
          "axis": "general ingestion framework",
          "artifactType": "manifests, contact sheets, traces, event logs, audio timelines",
          "coverage": "4 planned windows",
          "annotationStatus": "seed-plan-only",
          "confidence": "medium",
          "linkedMetric": "Level arc / challenge variation / visual look",
          "anchor": "reference-artifacts/analyses/evidence-cycle-dashboard/evidence-cycle-dashboard.json",
          "next": "Refresh evidence-cycle dashboard and promote window status into a canonical reference-corpus manifest."
        },
        {
          "rank": 15,
          "source": "Reference manifests and event logs inventory",
          "axis": "source provenance / annotation coverage",
          "artifactType": "source-manifest.json and reference-events.json",
          "coverage": "31 manifests; 11 event logs",
          "annotationStatus": "mixed",
          "confidence": "mixed",
          "linkedMetric": "All conformance metrics",
          "anchor": "reference-artifacts/analyses",
          "next": "Normalize provenance, duration, source confidence, and linked metric fields into a generated corpus manifest."
        },
        {
          "rank": 16,
          "source": "Reference contact sheets and frame evidence",
          "axis": "visual / motion / entry formation",
          "artifactType": "contact sheets and still frames",
          "coverage": "72 contact/frame evidence files",
          "annotationStatus": "extracted, partially labeled",
          "confidence": "medium",
          "linkedMetric": "Visual look, alien entry, challenge variation",
          "anchor": "reference-artifacts/analyses",
          "next": "Attach contact-sheet families to metric rows and add image-level comparison scores."
        }
      ],
      "scoreSemantics": {
        "headline": "An x/10 score is a measured rollup at the current scorer resolution, not a claim of arcade-perfect behavior.",
        "tenOutOfTen": "10/10 means no known measured gap under the current scorer and evidence coverage. It remains a guardrail pass until broader reference, expert-play, and edge-case evidence says otherwise.",
        "confidence": "Confidence estimates how much trust to place in the score as a release signal.",
        "resolution": "Resolution describes how fine-grained the scorer currently is and which blind spots may remain."
      },
      "sourceReports": {
        "quality": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
        "investmentPriority": "reference-artifacts/analyses/conformance-investment-priorities/2026-05-19-fba7f625/report.json",
        "levelArc": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json",
        "economics": "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/report.json",
        "conformanceInvestmentRetrospective": "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/report.json",
        "alienEntryChallenge": "reference-artifacts/analyses/alien-entry-challenge-variation/2026-05-16-82fd62cb/report.json",
        "challengeStageConformance": "reference-artifacts/analyses/challenge-stage-conformance/2026-06-09-abb5c464/report.json",
        "challengeStageCandidateSweepIndex": "reference-artifacts/analyses/challenge-stage-candidate-sweep-index/latest.json",
        "challengeMovementGrammar": "reference-artifacts/analyses/challenge-movement-grammar/latest.json",
        "challengeMotionPrimitives": "reference-artifacts/analyses/challenge-motion-primitives/latest.json",
        "challengeSetpieceContracts": "reference-artifacts/analyses/challenge-setpiece-contracts/latest.json",
        "audioLabV2": "reference-artifacts/analyses/aurora-audio-conformance-lab-v2/2026-05-17-f9e7374c-dirty/report.json",
        "audioCueContracts": "reference-artifacts/analyses/aurora-audio-cue-contracts/2026-05-11-b83393cd-dirty-201628/report.json",
        "visualLook": "reference-artifacts/analyses/aurora-visual-look-conformance/2026-05-08-fee8820-dirty/report.json",
        "evidenceCycle": "reference-artifacts/analyses/evidence-cycle-dashboard/evidence-cycle-dashboard.json"
      },
      "newFirstClassAxes": [
        "Alien entry to levels: formation layout, timing, path method, and whether different stages enter differently.",
        "Boss entry and formation grammar: boss timing, escort composition, formation settle evidence, challenge pattern identity, stage variation, and path/slot precision.",
        "Challenge-stage variation: new alien types, new entry formations/styles, path families, reward/result feedback, and teaching value.",
        "Overall visual look and feel: gameplay readability, start/attract typography density, copy complexity, color discipline, and reference contact sheets.",
        "Arcade console frame UI: cabinet frame, bezel/rails, build/date trust signals, button density, and arcade-style containment.",
        "Popup/help/scoring surfaces: help, scoring, leaderboard, account, feedback, and game-over result formatting as their own modal-quality family."
      ]
    },
    {
      "gameKey": "galaxy-guardians-preview",
      "gameName": "Galaxy Guardians",
      "gameStatus": "Preview / ingestion",
      "currentInvestment": "Paused while Aurora remains the active conformance investment; ready for measured preview review when we switch back.",
      "releaseRead": "Useful playable-preview evidence now extends beyond the one-level public slice. The score table, wave progression, visible attract/score-table surfaces, frame-motion proxy, object-track proxy, runtime-vs-reference movement comparison, longer-surface stage-band review, persona review runs, stage-rank pressure contract, sprite-reference extraction, component sprite targets, waveform/spectrogram audio comparison, stricter measured cue targets, labeled cue-window previews, reusable audio conformance lab, playtest-weighted conformance review, opening-slice motion targets, combat-feedback frame authority, and platform-frame parity contract now exist as non-production preview contracts. The best-return next work is now deeper stage 3-5 and 6-9 fairness review, a browser-side audio/listening pass, and public-readiness work that extends beyond the current one-level mission_complete slice.",
      "scoreSemantics": {
        "headline": "Galaxy Guardians scores are preview/ingestion scores, not public-release conformance claims.",
        "tenOutOfTen": "A high preview score would mean the current preview contracts are satisfied, not that a full Galaxian-style game is complete.",
        "confidence": "Confidence is tied to source manifests, runtime gates, audio lab artifacts, and live play review.",
        "resolution": "Current resolution is preview-slice level; full-game progression, scoring, and release readiness remain separate gaps."
      },
      "releaseGate": [
        {
          "Gate": "Preview reference conformance",
          "Current": "7.6/10",
          "Target": ">=7.5 for compelling preview",
          "Notes": "Useful playable-preview evidence now extends beyond the one-level public slice. The score table, wave progression, visible attract/score-table surfaces, frame-motion proxy, object-track proxy, runtime-vs-reference movement comparison, longer-surface stage-band review, persona review runs, stage-rank pressure contract, sprite-reference extraction, component sprite targets, waveform/spectrogram audio comparison, stricter measured cue targets, labeled cue-window previews, reusable audio conformance lab, playtest-weighted conformance review, opening-slice motion targets, combat-feedback frame authority, and platform-frame parity contract now exist as non-production preview contracts. The best-return next work is now deeper stage 3-5 and 6-9 fairness review, a browser-side audio/listening pass, and public-readiness work that extends beyond the current one-level mission_complete slice."
        },
        {
          "Gate": "Playtest-weighted conformance",
          "Current": "6.9/10",
          "Target": ">=7.0 for preview confidence",
          "Notes": "Closer, and now reviewable across a longer surface rather than only a one-level peek. The runtime has measured later-band pressure, stage-theme progression, rack-preserving life loss, deterministic review personas, component-derived sprites, labeled cue-window influenced audio, a reusable Platinum audio conformance lab, explicit opening-motion targets, combat-feedback frame authority, and a shared-platform parity contract. The preview still needs a tighter WAIT/score-table board read, stronger palette progression, and better stage-five-and-beyond survivability before it feels compelling."
        },
        {
          "Gate": "Public release readiness",
          "Current": "4.2/10",
          "Target": "defer",
          "Notes": "Preview game remains intentionally non-production."
        },
        {
          "Gate": "Audio conformance lab",
          "Current": "8.5/10",
          "Target": ">=7.0 preview target",
          "Notes": "This lab is game-configured and can be registered for Aurora or future Platinum game packs."
        }
      ],
      "priorityRows": [
        {
          "rank": 1,
          "metric": "Preview reference conformance",
          "explanation": {
            "calculation": "Score is read from the latest generated conformance artifact for this metric or from the dashboard composite proxy when the metric has not yet been promoted to a dedicated scorer.",
            "grounding": "Best-case grounding is a canonical reference window or scorer-backed harness report with provenance, repeatable scenarios, and current Aurora comparison artifacts.",
            "meaning": "For a player or designer, this metric says whether this part of the experience feels intentional, readable, fair, and close to the Galaga-like target rather than merely functional."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "scorer-backed artifact with selected harness windows",
            "scoreMeaning": "Material conformance gap with meaningful user-experience or reference-identity impact."
          },
          "score10": 7.6,
          "current": "7.6/10",
          "target": ">=7.5 compelling preview",
          "status": "Measured preview artifact; not public release art",
          "why": "Keeps the second game grounded in Galaxian-style evidence instead of Aurora inheritance.",
          "effort": "Medium; 2-4 hrs browser/playback review plus focused scorer promotion",
          "next": "Human-review cue windows and browser play feel, then rerun reference conformance before widening public claims.",
          "evidence": "reference-artifacts/analyses/galaxy-guardians-identity/reference-conformance-0.1.json"
        },
        {
          "rank": 2,
          "metric": "Playable-preview conformance and feel",
          "explanation": {
            "calculation": "Score is read from the latest generated conformance artifact for this metric or from the dashboard composite proxy when the metric has not yet been promoted to a dedicated scorer.",
            "grounding": "Best-case grounding is a canonical reference window or scorer-backed harness report with provenance, repeatable scenarios, and current Aurora comparison artifacts.",
            "meaning": "For a player or designer, this metric says whether this part of the experience feels intentional, readable, fair, and close to the Galaga-like target rather than merely functional."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "scorer-backed artifact with selected harness windows",
            "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
          },
          "score10": 6.9,
          "current": "6.9/10",
          "target": ">=7.0 preview confidence",
          "status": "Playtest-weighted preview score",
          "why": "A preview can be technically present yet still not compelling enough to represent the game family.",
          "effort": "Medium-high; 3-5 hrs with local browser/video review",
          "next": "Run a motion-feel pass against formation entry, dive pacing, player shots, and loss feedback.",
          "evidence": "reference-artifacts/analyses/galaxy-guardians-identity/playtest-conformance-review-0.1.json"
        },
        {
          "rank": 3,
          "metric": "Audio lab cue identity",
          "explanation": {
            "calculation": "Score is read from the latest generated conformance artifact for this metric or from the dashboard composite proxy when the metric has not yet been promoted to a dedicated scorer.",
            "grounding": "Best-case grounding is a canonical reference window or scorer-backed harness report with provenance, repeatable scenarios, and current Aurora comparison artifacts.",
            "meaning": "For a player or designer, this metric says whether this part of the experience feels intentional, readable, fair, and close to the Galaga-like target rather than merely functional."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "scorer-backed artifact with selected harness windows",
            "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
          },
          "score10": 8.5,
          "current": "8.5/10",
          "target": ">=7.0 preview target",
          "status": "Reusable Platinum audio conformance lab",
          "why": "This is the first evidence that ingestion-derived cue targets can score a non-Aurora game.",
          "effort": "Low-medium; 1-2 hrs listening review plus cue-window refinements",
          "next": "Review weakest cue and promote accepted cue windows as reusable per-game audio targets.",
          "evidence": "reference-artifacts/analyses/audio-conformance-lab/galaxy-guardians-preview/audio-conformance-lab-0.1.json"
        },
        {
          "rank": 4,
          "metric": "Public release readiness",
          "explanation": {
            "calculation": "Score is read from the latest generated conformance artifact for this metric or from the dashboard composite proxy when the metric has not yet been promoted to a dedicated scorer.",
            "grounding": "Best-case grounding is a canonical reference window or scorer-backed harness report with provenance, repeatable scenarios, and current Aurora comparison artifacts.",
            "meaning": "For a player or designer, this metric says whether this part of the experience feels intentional, readable, fair, and close to the Galaga-like target rather than merely functional."
          },
          "scoreContext": {
            "confidence": "medium",
            "resolution": "scorer-backed artifact with selected harness windows",
            "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
          },
          "score10": 4.2,
          "current": "4.2/10",
          "target": "deferred until full game path exists",
          "status": "Intentionally low preview/public boundary score",
          "why": "Protects Platinum from presenting a preview as a shipped second game before rules, stages, scoring, and polish are ready.",
          "effort": "High; multi-cycle game construction after preview acceptance",
          "next": "Keep public playable claims disabled until ingestion, runtime, scoring, and harness coverage support a real release path.",
          "evidence": "reference-artifacts/analyses/galaxy-guardians-identity/reference-conformance-0.1.json"
        }
      ],
      "ingestionSummary": {
        "sourceFamilyCount": 4,
        "highConfidenceCount": 1,
        "mixedOrLowConfidenceCount": 0,
        "scoredOrPromotedCount": 2,
        "nextBestUpgrade": "Promote the strongest contracts into a reusable new-game ingestion manifest.",
        "framing": "Ingestion turns reference media and Aurora runtime captures into repeatable evidence: clips, contact sheets, traces, event logs, labels, scores, confidence, and next missing annotations."
      },
      "ingestionRows": [
        {
          "rank": 1,
          "source": "Galaxy Guardians identity artifacts",
          "axis": "reference conformance / runtime identity",
          "artifactType": "game-owned preview contracts",
          "coverage": "38 JSON artifacts",
          "annotationStatus": "preview-scored",
          "confidence": "medium",
          "linkedMetric": "Preview reference conformance",
          "anchor": "reference-artifacts/analyses/galaxy-guardians-identity",
          "next": "Promote the strongest contracts into a reusable new-game ingestion manifest."
        },
        {
          "rank": 2,
          "source": "Galaxy Guardians audio conformance lab",
          "axis": "audio cue identity",
          "artifactType": "waveform/spectrogram cue comparisons",
          "coverage": "8 cue targets",
          "annotationStatus": "scored",
          "confidence": "medium-high",
          "linkedMetric": "Audio lab cue identity",
          "anchor": "reference-artifacts/analyses/audio-conformance-lab/galaxy-guardians-preview/audio-conformance-lab-0.1.json",
          "next": "Use the lab as the template for game-selectable audio scoring across future packs."
        },
        {
          "rank": 3,
          "source": "Candidate 0.1 runtime gate",
          "axis": "runtime events / surfaces",
          "artifactType": "required events, surfaces, cues, forbidden Aurora capabilities",
          "coverage": "14 required runtime events",
          "annotationStatus": "gate-defined",
          "confidence": "medium",
          "linkedMetric": "Playable-preview conformance and feel",
          "anchor": "reference-artifacts/analyses/galaxy-guardians-identity/candidate-0.1.json",
          "next": "Generate dashboard maturity submetrics directly from required events and implemented evidence."
        },
        {
          "rank": 4,
          "source": "Visual readability contract",
          "axis": "sprite / formation readability",
          "artifactType": "runtime readability and sprite distinction rules",
          "coverage": "4 visual IDs",
          "annotationStatus": "dev-preview-visual-readability-contract-not-public-release-art",
          "confidence": "medium",
          "linkedMetric": "Preview reference conformance",
          "anchor": "reference-artifacts/analyses/galaxy-guardians-identity/visual-readability-0.1.json",
          "next": "Tie sprite-component extraction and visual readability into a scored dashboard submetric."
        }
      ],
      "economicsSummary": {
        "measuredRuns": 0,
        "wallSeconds": 0,
        "cpuSeconds": 0,
        "metricPointCount": 0,
        "deltaCount": 0,
        "charts": []
      },
      "sourceReports": {
        "referenceConformance": "reference-artifacts/analyses/galaxy-guardians-identity/reference-conformance-0.1.json",
        "playtestReview": "reference-artifacts/analyses/galaxy-guardians-identity/playtest-conformance-review-0.1.json",
        "audioLab": "reference-artifacts/analyses/audio-conformance-lab/galaxy-guardians-preview/audio-conformance-lab-0.1.json",
        "candidateGate": "reference-artifacts/analyses/galaxy-guardians-identity/candidate-0.1.json",
        "visualReadability": "reference-artifacts/analyses/galaxy-guardians-identity/visual-readability-0.1.json"
      },
      "newFirstClassAxes": [
        "Preview reference conformance: reference maturity, implementation gate coverage, and public-release boundary.",
        "Audio cue identity: reusable game-configured cue scoring from isolated reference windows.",
        "Runtime event and surface gate: required events, score table, attract text, and forbidden Aurora capabilities.",
        "Visual readability: sprite distinction, formation visibility, and hit feedback at preview scale."
      ]
    }
  ],
  "sourceReports": {
    "quality": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
    "investmentPriority": "reference-artifacts/analyses/conformance-investment-priorities/2026-05-19-fba7f625/report.json",
    "levelArc": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json",
    "economics": "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/report.json",
    "conformanceInvestmentRetrospective": "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/report.json",
    "alienEntryChallenge": "reference-artifacts/analyses/alien-entry-challenge-variation/2026-05-16-82fd62cb/report.json",
    "challengeStageConformance": "reference-artifacts/analyses/challenge-stage-conformance/2026-06-09-abb5c464/report.json",
    "challengeStageCandidateSweepIndex": "reference-artifacts/analyses/challenge-stage-candidate-sweep-index/latest.json",
    "challengeMovementGrammar": "reference-artifacts/analyses/challenge-movement-grammar/latest.json",
    "challengeMotionPrimitives": "reference-artifacts/analyses/challenge-motion-primitives/latest.json",
    "challengeSetpieceContracts": "reference-artifacts/analyses/challenge-setpiece-contracts/latest.json",
    "audioLabV2": "reference-artifacts/analyses/aurora-audio-conformance-lab-v2/2026-05-17-f9e7374c-dirty/report.json",
    "audioCueContracts": "reference-artifacts/analyses/aurora-audio-cue-contracts/2026-05-11-b83393cd-dirty-201628/report.json",
    "visualLook": "reference-artifacts/analyses/aurora-visual-look-conformance/2026-05-08-fee8820-dirty/report.json",
    "evidenceCycle": "reference-artifacts/analyses/evidence-cycle-dashboard/evidence-cycle-dashboard.json"
  },
  "scoreSemantics": {
    "headline": "An x/10 score is a measured rollup at the current scorer resolution, not a claim of arcade-perfect behavior.",
    "tenOutOfTen": "10/10 means no known measured gap under the current scorer and evidence coverage. It remains a guardrail pass until broader reference, expert-play, and edge-case evidence says otherwise.",
    "confidence": "Confidence estimates how much trust to place in the score as a release signal.",
    "resolution": "Resolution describes how fine-grained the scorer currently is and which blind spots may remain."
  },
  "equalQualityCategoryWeight": 0.077,
  "releaseGate": [
    {
      "Gate": "Overall quality",
      "Current": "8.8/10",
      "Target": ">=9.3",
      "Notes": "Full score refresh after all major cycles"
    },
    {
      "Gate": "Audio identity",
      "Current": "7.3/10",
      "Target": ">=7.5",
      "Notes": "Primary user-experience gap"
    },
    {
      "Gate": "Challenge-stage set-piece conformance",
      "Current": "4.3/10",
      "Target": ">=5.0 before next beta claim; >=6.0 next major gate; >=9.0 mature",
      "Notes": "Strict movement/graphics/alien-novelty gate; safety does not inflate this score"
    },
    {
      "Gate": "Direct target sprite conformance",
      "Current": "6/10",
      "Target": ">=5.5 before next beta claim; >=7.5 mature preview",
      "Notes": "Strict runtime-vs-promoted-target-crop row; static proxy scores do not satisfy this gate"
    },
    {
      "Gate": "Level arc",
      "Current": "8.8/10",
      "Target": ">=8.8",
      "Notes": "Long-play gameplay-quality gate"
    },
    {
      "Gate": "Alien entry and challenge-stage novelty",
      "Current": "8.2/10",
      "Target": ">=7.5 first gate; >=9.0 mature",
      "Notes": "New high-priority long-cycle gameplay-authenticity gate"
    },
    {
      "Gate": "Boss entry and formation grammar",
      "Current": "9.4/10",
      "Target": ">=8.0 first gate; >=9.0 mature",
      "Notes": "New measured gate for stage choreography"
    },
    {
      "Gate": "Alien entry / formations",
      "Current": "10/10 measured",
      "Target": ">=9.2 with path/rack scorer",
      "Notes": "Now backed by dedicated alien-entry/challenge variation scorer"
    },
    {
      "Gate": "Challenge variation",
      "Current": "4.3/10 measured",
      "Target": ">=9.2 with dedicated scorer",
      "Notes": "Dedicated stage-by-stage challenge conformance gate"
    },
    {
      "Gate": "Visual look and feel",
      "Current": "8.6/10",
      "Target": ">=8.4",
      "Notes": "New explicit gate; first-pass scorer measured"
    },
    {
      "Gate": "Arcade frame and popup surfaces",
      "Current": "9.2/10",
      "Target": ">=9.4",
      "Notes": "Split from generic UI shell before final gate"
    },
    {
      "Gate": "No-regression guardrails",
      "Current": "movement/combat/capture >=10; challenge timing >=9.8",
      "Target": "Maintain",
      "Notes": "Hard blockers"
    }
  ],
  "priorityRows": [
    {
      "rank": 1,
      "metric": "Challenge-stage set-piece conformance: movement, graphics, alien novelty",
      "explanation": {
        "calculation": "Strict score averages movement conformance, graphical conformance, alien/stage novelty, and stage progression; no-shot/no-kill safety is a separate guardrail and does not make the stage interesting. Current supporting artifacts: 8 set-piece contracts, 25 first-five grammar group contracts, 10 reusable motion primitives, and 17218 indexed sweep candidates.",
        "grounding": "Current grounding is Galaga challenge contact-sheet labels, Aurora browser runtime challenge probes, trajectory-vector comparison, and static/runtime sprite evidence. Best-case grounding adds full temporal trajectory traces and active sprite-motion windows for every challenge. Runtime promotion is blocked unless the expected-reference, target-video, human-perfect, safety, and human-visible gates all hold at once.",
        "meaning": "Players should feel a spectacular, safe, learnable bonus exhibition with different aliens and movement from one challenge to the next. Designers use this as the main anti-repetition gate for Aurora challenge stages. The current dashboard should be read as evidence that the next value is richer motion representation, not more shallow tuning."
      },
      "scoreContext": {
        "confidence": "medium-high for gap direction; medium-low for exact lift estimate",
        "resolution": "strict stage-by-stage challenge scorer using 1/10 baseline for interest, movement, and graphics; no-shot/no-kill is treated as a guardrail rather than score inflation",
        "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
      },
      "score10": 4.3,
      "current": "4.3/10",
      "target": ">=5.0 before next beta claim; 6.0 after three authored challenges; 9.0+ mature",
      "status": "Strict dedicated stage-by-stage scorer; current high-priority gameplay-authenticity gap",
      "why": "The challenging stages should be spectacular safe Galaga-like bonus exhibitions. Current safety is good, but movement variation, alien novelty, and graphical conformance are not yet close.",
      "effort": "High; long-cycle CPU/browser extraction plus gameplay authoring and sprite-motion/reference labeling",
      "next": "Recent candidate sweeps improved measurement and search evidence, but no stage currently has a runtime keeper. Next: build richer movement primitives before runtime promotion; first target reference-spline-fit from 10 primitives, with first-five grammar at 25 group contracts and 25 reference-backed paths.",
      "evidence": "reference-artifacts/analyses/challenge-stage-conformance/2026-06-09-abb5c464/report.json; reference-artifacts/analyses/challenge-stage-candidate-sweep-index/latest.json; reference-artifacts/analyses/challenge-movement-grammar/latest.json; reference-artifacts/analyses/challenge-motion-primitives/latest.json; reference-artifacts/analyses/challenge-setpiece-contracts/latest.json",
      "decisionEvidence": [
        {
          "stage": 3,
          "decision": "no-runtime-keeper-yet",
          "bestCandidateId": "stage3-target-reference-paths-shape-lower-p0",
          "expectedLift10": "-0.4/10",
          "targetVideoObjectFitLift10": "1.5/10",
          "humanPerfectPotentialLift10": "0.3/10",
          "humanVisibleLift10": "-0.2/10",
          "read": "No runtime keeper: candidate regresses human-visible challenge readability by 0.2/10 or fails visibility/arrival/spacing/bunching gates."
        },
        {
          "stage": 7,
          "decision": "no-runtime-keeper-yet",
          "bestCandidateId": "stage7-centerline-spacing-spread-left-right-lead078-flat-widefield-centered-id-sd018",
          "expectedLift10": "0/10",
          "targetVideoObjectFitLift10": "0.3/10",
          "humanPerfectPotentialLift10": "0/10",
          "humanVisibleLift10": "2.3/10",
          "read": "No runtime keeper: candidate regresses human-visible challenge readability by 2.3/10 or fails visibility/arrival/spacing/bunching gates."
        },
        {
          "stage": 11,
          "decision": "no-runtime-keeper-yet",
          "bestCandidateId": "stage11-target-controls-shape-blend-p0",
          "expectedLift10": "0.2/10",
          "targetVideoObjectFitLift10": "0/10",
          "humanPerfectPotentialLift10": "0/10",
          "humanVisibleLift10": "0.1/10",
          "read": "No runtime keeper: candidate regresses human-visible challenge readability by 0.1/10 or fails visibility/arrival/spacing/bunching gates."
        },
        {
          "stage": 15,
          "decision": "no-runtime-keeper-yet",
          "bestCandidateId": "stage15-a168-d108-x68-w106-s008-lb0-y3-p0",
          "expectedLift10": "0.7/10",
          "targetVideoObjectFitLift10": "-0.2/10",
          "humanPerfectPotentialLift10": "0.4/10",
          "humanVisibleLift10": "-0.4/10",
          "read": "No runtime keeper: candidate regresses human-visible challenge readability by 0.4/10 or fails visibility/arrival/spacing/bunching gates."
        },
        {
          "stage": 19,
          "decision": "no-runtime-keeper-yet",
          "bestCandidateId": "stage19-target-reference-paths-direct-p0",
          "expectedLift10": "-0.5/10",
          "targetVideoObjectFitLift10": "0.8/10",
          "humanPerfectPotentialLift10": "1.7/10",
          "humanVisibleLift10": "-0.3/10",
          "read": "No runtime keeper: candidate regresses human-visible challenge readability by 0.3/10 or fails visibility/arrival/spacing/bunching gates."
        },
        {
          "stage": 23,
          "decision": "no-runtime-keeper-yet",
          "bestCandidateId": "stage23-target-compressed-late-hold-speed-p0",
          "expectedLift10": "0.5/10",
          "targetVideoObjectFitLift10": "0.6/10",
          "humanPerfectPotentialLift10": "null/10",
          "humanVisibleLift10": "null/10",
          "read": "No runtime keeper yet; use this row as search evidence and improve reference identity/path shape before gameplay promotion."
        },
        {
          "stage": 27,
          "decision": "no-runtime-keeper-yet",
          "bestCandidateId": "stage27-target-controls-shape-only-p1",
          "expectedLift10": "0.7/10",
          "targetVideoObjectFitLift10": "-0.8/10",
          "humanPerfectPotentialLift10": "null/10",
          "humanVisibleLift10": "null/10",
          "read": "No runtime keeper: late-stage identity blocked because best match challenge-1-arrival-group-1 does not represent challenge 7."
        },
        {
          "stage": 31,
          "decision": "no-runtime-keeper-yet",
          "bestCandidateId": "stage31-a162-d108-x86-w096-s007-lbs0-y3-p0",
          "expectedLift10": "1.8/10",
          "targetVideoObjectFitLift10": "-0.5/10",
          "humanPerfectPotentialLift10": "null/10",
          "humanVisibleLift10": "null/10",
          "read": "No runtime keeper yet; use this row as search evidence and improve reference identity/path shape before gameplay promotion."
        }
      ],
      "costContext": {
        "costClass": "high",
        "expectedResources": "cpu, browser, gpu",
        "trackedAxes": [
          "level-arc",
          "conformance-loop"
        ],
        "trackedSpend": "162 runs; 24 min wall; 39.8 min CPU",
        "trackedRuns": 162,
        "trackedWallSeconds": 1441.8960000000002,
        "trackedCpuSeconds": 2389.19,
        "expectedLift10": 1.8,
        "expectedOverallLift10": 0.138,
        "investmentScore": 8.4,
        "gapToTarget": "+0.7",
        "summary": "high; cpu, browser, gpu",
        "valueCostRead": "Expected lift 1.8/10 on metric, 0.138/10 overall; investment score 8.4."
      }
    },
    {
      "rank": 2,
      "metric": "Audio identity, event feedback, and cue alignment",
      "explanation": {
        "calculation": "Release audio score blends cue identity, reference spectral similarity, reference-window precision, overlap, event alignment, semantic event mapping, and cue-contract readiness from audio comparison and promotion artifacts.",
        "grounding": "Best-case grounding comes from labeled Galaga-family reference audio clips, cue contracts, segmented Aurora runtime captures, cue/event logs, waveform and spectral measurements, promotion prechecks, and live recapture.",
        "meaning": "Players hear whether shots, hits, explosions, boss damage, capture, rescue, pressure, loss, and challenge results communicate the right event at the right moment. Designers use it to protect feedback clarity, arcade identity, and safe theme variation."
      },
      "scoreContext": {
        "confidence": "medium-high",
        "resolution": "21 cue/event comparisons with waveform, spectral, overlap, alignment, and semantic event-mapping features",
        "scoreMeaning": "Material conformance gap with meaningful user-experience or reference-identity impact."
      },
      "score10": 7.3,
      "current": "7.3/10",
      "target": "7.5-8.0",
      "status": "Measured release category; weakest axis",
      "why": "Largest current score gap and high user-experience impact: shots, explosions, boss damage, challenge results, capture/rescue feedback.",
      "effort": "High; 3-6 hrs local/model-assisted analysis",
      "next": "Challenge Perfect runtime trial rejected perfect-clean-onset-soft-tail; do not directly promote the focused keeper. Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json; reference-artifacts/analyses/aurora-audio-cue-contracts/2026-05-11-b83393cd-dirty-201628/report.json",
      "costContext": {
        "costClass": "high",
        "expectedResources": "cpu, model-api, openai-api",
        "trackedAxes": [
          "audio"
        ],
        "trackedSpend": "317 runs; 254.3 min wall; 460.3 min CPU",
        "trackedRuns": 317,
        "trackedWallSeconds": 15258.692,
        "trackedCpuSeconds": 27618.62,
        "expectedLift10": 0.7,
        "expectedOverallLift10": 0.054,
        "investmentScore": 2.67,
        "gapToTarget": "+0.2",
        "summary": "high; cpu, model-api, openai-api",
        "valueCostRead": "Expected lift 0.7/10 on metric, 0.054/10 overall; investment score 2.67."
      }
    },
    {
      "rank": 3,
      "metric": "Alien entry and broad challenge-stage novelty",
      "explanation": {
        "calculation": "Composite proxy: 45% stage-opening timing fidelity, 35% stage-opening geometry fidelity, and 20% movement-grammar expansion until alien entry is promoted to its own scorer.",
        "grounding": "Best-case grounding will use reference and Aurora stage-entry contact sheets, rack timing traces, path-family labels, formation geometry, and early/mid/late level comparisons.",
        "meaning": "Players read the whole level from the first entry pattern. Designers use it to make stages feel authored, recognizable, and increasingly sophisticated before combat fully starts."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "composite proxy from opening timing, geometry, and movement grammar",
        "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
      },
      "score10": 8.2,
      "current": "8.2/10",
      "target": "7.5 first gate; 9.0+ mature",
      "status": "Dedicated long-cycle broad scorer; useful diagnostic but less strict than set-piece score",
      "why": "Regular-stage alien entry, challenge-stage trajectories, and new-alien introduction still need stronger reference grounding; this broad metric should not mask the stricter challenge-stage gap.",
      "effort": "High; long-cycle CPU/browser extraction plus reference contact-sheet and path-labeling pass",
      "next": "Attack Regular-entry geometry separation: Minimum regular geometry distance 0.083; mean regular geometry distance 0.127; closest pair mid-run-entry-variant / late-run-cleanup-or-failure.",
      "evidence": "reference-artifacts/analyses/alien-entry-challenge-variation/2026-05-16-82fd62cb/report.json",
      "costContext": {
        "costClass": "estimated",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "level-arc",
          "quality-score"
        ],
        "trackedSpend": "113 runs; 26.4 min wall; 34.9 min CPU",
        "trackedRuns": 113,
        "trackedWallSeconds": 1586.7530000000002,
        "trackedCpuSeconds": 2092.86,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "at target",
        "summary": "estimated; cpu, browser",
        "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
      }
    },
    {
      "rank": 4,
      "metric": "Level arc and encounter shape",
      "explanation": {
        "calculation": "Level arc is read from the level-arc conformance report, combining stage distinctiveness, challenge-stage identity, later-stage complexity, pressure curve, reward/rescue layering, and learning/mastery windows.",
        "grounding": "Best-case grounding is multi-stage reference evidence plus Aurora harness windows for stage families, challenge layers, stage signatures, pressure/loss windows, and persona progression.",
        "meaning": "Players feel whether the game grows, teaches, surprises, and rewards mastery over time. Designers use it to detect repetition, flat difficulty, or escalation without reward."
      },
      "scoreContext": {
        "confidence": "medium-high",
        "resolution": "multi-submetric level-arc report with stage families, challenge layers, pressure, reward, and persona evidence",
        "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
      },
      "score10": 8.8,
      "current": "8.8/10",
      "target": "8.8-9.0",
      "status": "Measured release category",
      "why": "Controls whether long play feels like Galaga-like escalation rather than repeated pressure.",
      "effort": "Medium-high; 2-5 hrs",
      "next": "Use the top-ranked opportunity window to add or widen deterministic evidence before changing gameplay tuning.",
      "evidence": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "low",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "level-arc",
          "conformance-loop"
        ],
        "trackedSpend": "162 runs; 24 min wall; 39.8 min CPU",
        "trackedRuns": 162,
        "trackedWallSeconds": 1441.8960000000002,
        "trackedCpuSeconds": 2389.19,
        "expectedLift10": 0.24,
        "expectedOverallLift10": 0.018,
        "investmentScore": 1.55,
        "gapToTarget": "at target",
        "summary": "low; cpu, browser",
        "valueCostRead": "Expected lift 0.24/10 on metric, 0.018/10 overall; investment score 1.55."
      }
    },
    {
      "rank": 4.5,
      "metric": "Direct target sprite and impact feedback conformance",
      "explanation": {
        "calculation": "Score is read from the latest generated conformance artifact for this metric or from the dashboard composite proxy when the metric has not yet been promoted to a dedicated scorer.",
        "grounding": "Best-case grounding is a canonical reference window or scorer-backed harness report with provenance, repeatable scenarios, and current Aurora comparison artifacts.",
        "meaning": "For a player or designer, this metric says whether this part of the experience feels intentional, readable, fair, and close to the Galaga-like target rather than merely functional."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "scorer-backed artifact with selected harness windows",
        "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
      },
      "score10": 5.97,
      "current": "6/10",
      "target": ">=5.5 before next beta claim; >=7.5 mature preview",
      "status": "Application artifact scorecard measured; strict direct target-crop row is intentionally sobering",
      "why": "The player-visible ship, enemy, hit, and explosion shapes are a first-glance arcade quality signal. Static sprite proxy scores must not hide the stricter target-crop gap.",
      "effort": "Medium-high; 2-5 hrs renderer/crop/harness work plus visual review",
      "next": "Raise direct sprite target score and impact/explosion feedback together; current impact/explosion static read is 5.8/10.",
      "evidence": "reference-artifacts/analyses/application-artifact-conformance/latest.json",
      "costContext": {
        "costClass": "estimated",
        "expectedResources": "cpu",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "at target",
        "summary": "estimated; cpu",
        "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
      }
    },
    {
      "rank": 5,
      "metric": "Boss entry and formation grammar",
      "explanation": {
        "calculation": "Boss/formation grammar is read from the dedicated formation-boss-grammar report, blending boss entry timing, boss/escort composition, formation settle evidence, challenge pattern identity, stage variation, and path-shape precision.",
        "grounding": "Current grounding comes from Aurora level-expansion event logs, trace summaries, stage-signature distance, runtime path/slot extraction, media-backed Galaga path-family labels when available, and the formation/boss grammar reference profile. Best-case grounding adds tracked Galaga boss/escort/challenge trajectories and rack slot coordinates.",
        "meaning": "Players feel whether each stage has recognizable arcade choreography: bosses enter with readable intent, escorts matter, formations settle convincingly, and challenge stages teach memorable set pieces."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "first-class boss/formation scorer using stage-window event grammar, boss timing, escort composition, challenge identity, and explicit path/slot measurement debt",
        "scoreMeaning": "Strong measured conformance with known remaining risk mostly in narrower edge cases, coverage, or polish."
      },
      "score10": 9.4,
      "current": "9.4/10",
      "target": "8.0-8.5 first gate; 9.0+ with path/slot extraction",
      "status": "Measured release category; new first-class axis",
      "why": "Boss entries, escorts, formation settling, and challenge set pieces are core Galaga choreography and directly affect whether stages feel authored.",
      "effort": "Medium-high; 2-5 hrs, then recurring low-cost guardrail",
      "next": "Label boss, escort, rack-settle, and challenge path families from Galaga reference contact sheets or video traces, then replace heuristic coverage with direct shape-distance scoring.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "high",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "level-arc",
          "conformance-loop"
        ],
        "trackedSpend": "162 runs; 24 min wall; 39.8 min CPU",
        "trackedRuns": 162,
        "trackedWallSeconds": 1441.8960000000002,
        "trackedCpuSeconds": 2389.19,
        "expectedLift10": 0.28,
        "expectedOverallLift10": 0.022,
        "investmentScore": 0.7,
        "gapToTarget": "at target",
        "summary": "high; cpu, browser",
        "valueCostRead": "Expected lift 0.28/10 on metric, 0.022/10 overall; investment score 0.7."
      }
    },
    {
      "rank": 6,
      "metric": "Overall visual look and feel: gameplay, start page, typography complexity",
      "explanation": {
        "calculation": "Current score is an estimated planning value until a dedicated visual conformance scorer lands; it is informed by UI shell checks, screenshots, contact sheets, and known visual debt.",
        "grounding": "Best-case grounding will compare reference and Aurora contact sheets across start, attract, gameplay, score, popup, and game-over surfaces with palette, typography, density, sprite readability, and layout checks.",
        "meaning": "Players decide at a glance whether the game feels like a polished arcade object. Designers use it to align readability, theme, typography, and visual hierarchy before subjective tuning."
      },
      "scoreContext": {
        "confidence": "medium-low",
        "resolution": "first-pass visual scorer when available; still needs reference-backed contact sheets and sprite/style sub-scorers",
        "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
      },
      "score10": 8.62,
      "current": "8.6/10",
      "target": "8.4-8.8",
      "status": "Measured visual scorer; medium-low confidence",
      "why": "A high score can still feel off if start text, density, contrast, alien readability, and arcade typography do not cohere.",
      "effort": "Medium; next pass should add reference-backed contact sheets and GPU/model-assisted review",
      "next": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap.",
      "evidence": "reference-artifacts/analyses/aurora-visual-look-conformance/2026-05-08-fee8820-dirty/report.json",
      "costContext": {
        "costClass": "medium",
        "expectedResources": "cpu, browser, gpu",
        "trackedAxes": [
          "visual-look"
        ],
        "trackedSpend": "1 runs; 0.1 min wall; 0.1 min CPU",
        "trackedRuns": 1,
        "trackedWallSeconds": 3.49,
        "trackedCpuSeconds": 3.43,
        "expectedLift10": 0.12,
        "expectedOverallLift10": 0.009,
        "investmentScore": 0.38,
        "gapToTarget": "at target",
        "summary": "medium; cpu, browser, gpu",
        "valueCostRead": "Expected lift 0.12/10 on metric, 0.009/10 overall; investment score 0.38."
      }
    },
    {
      "rank": 7,
      "metric": "Stage 4 pressure exact replay / pressure curve precision",
      "explanation": {
        "calculation": "Score is the current weakest level-arc pressure submetric, focused on whether known pressure/loss windows reproduce as exact or same-window replay events under controlled harness runs.",
        "grounding": "Best-case grounding is source pressure/loss windows, frozen seeds, replayable input paths, collision diagnostics, and repeated current-vs-source pressure curve comparisons.",
        "meaning": "Players should feel pressure that is learnable and fair, not random. Designers use it to tune threat density, dodge lanes, and failure recovery without creating arbitrary deaths."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "narrow pressure/loss replay windows; exact replay coverage still limited",
        "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
      },
      "score10": 6,
      "current": "6/10",
      "target": "8.2-8.6",
      "status": "Measured level-arc weak submetric",
      "why": "Pressure should be learnable and reproducible, not merely present in one run.",
      "effort": "Medium-high; prior runs ~12.8 min wall / 18.5 min CPU",
      "next": "Run focused source-window replay matching after the Stage 12 loop validates candidate mechanics.",
      "evidence": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "medium",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "stage4-pressure"
        ],
        "trackedSpend": "28 runs; 12.8 min wall; 18.5 min CPU",
        "trackedRuns": 28,
        "trackedWallSeconds": 769.422,
        "trackedCpuSeconds": 1111.38,
        "expectedLift10": 0.35,
        "expectedOverallLift10": 0.027,
        "investmentScore": 1.39,
        "gapToTarget": "+2.2",
        "summary": "medium; cpu, browser",
        "valueCostRead": "Expected lift 0.35/10 on metric, 0.027/10 overall; investment score 1.39."
      }
    },
    {
      "rank": 8,
      "metric": "Alien entry to levels: formation, timing, and methods",
      "explanation": {
        "calculation": "Composite proxy: 45% stage-opening timing fidelity, 35% stage-opening geometry fidelity, and 20% movement-grammar expansion until alien entry is promoted to its own scorer.",
        "grounding": "Best-case grounding will use reference and Aurora stage-entry contact sheets, rack timing traces, path-family labels, formation geometry, and early/mid/late level comparisons.",
        "meaning": "Players read the whole level from the first entry pattern. Designers use it to make stages feel authored, recognizable, and increasingly sophisticated before combat fully starts."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "composite proxy from opening timing, geometry, and movement grammar",
        "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
      },
      "score10": 10,
      "current": "10/10",
      "target": "9.0-9.4 with path and rack-slot scorer",
      "status": "Dedicated alien-entry submetric",
      "why": "Entry formations and rack timing are a first-order arcade authenticity signal before combat even starts.",
      "effort": "Medium; 1-3 hrs plus visual review",
      "next": "Raise regular-stage minimum signature distance and add stage-specific alien entry scripts before retuning broad level arc.",
      "evidence": "reference-artifacts/analyses/alien-entry-challenge-variation/2026-05-16-82fd62cb/report.json",
      "costContext": {
        "costClass": "estimated",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "level-arc",
          "quality-score"
        ],
        "trackedSpend": "113 runs; 26.4 min wall; 34.9 min CPU",
        "trackedRuns": 113,
        "trackedWallSeconds": 1586.7530000000002,
        "trackedCpuSeconds": 2092.86,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "at target",
        "summary": "estimated; cpu, browser",
        "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
      }
    },
    {
      "rank": 9,
      "metric": "Challenge-stage variation and new alien/formations introduction",
      "explanation": {
        "calculation": "Strict dedicated scorer reads each sampled challenging stage as its own set piece: no-shot/no-kill safety, Galaga reference vector fit, arrival geometry, alien-role semantics, active visual evidence, and durable stage-specific contracts. If that report is unavailable, the dashboard falls back to a challenge timing/identity/non-repetition proxy.",
        "grounding": "Current grounding is browser-backed Aurora challenge probes plus media-backed Galaga challenge labels/contact sheets. Best-case grounding adds more late-stage reference labels, tracked trajectories, active sprite-motion windows, bonus opportunity windows, and result feedback timing.",
        "meaning": "Players should experience challenge stages as learnable bonus set pieces that introduce new motion and scoring opportunities. Designers use it to prevent bonus rounds from becoming repetitive pauses."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "strict dedicated stage-by-stage challenge conformance report when available; fallback proxy uses challenge timing, challenge identity, and non-repetition",
        "scoreMeaning": "Major conformance gap or immature metric requiring stronger evidence before release confidence."
      },
      "score10": 4.3,
      "current": "4.3/10",
      "target": "9.0-9.4 with dedicated scorer",
      "status": "Dedicated stage-by-stage challenge conformance report",
      "why": "Challenge stages should teach new motion/reward patterns, not only pause normal combat.",
      "effort": "Medium-high; 2-4 hrs",
      "next": "Close dedicated challenge-stage gaps: current challenge stages are functionally safe but not yet fully credible Galaga-like bonus exhibitions: strict movement is 4.4/10, strict graphics is 4.5/10, alien/stage novelty is 3.9/10, player shot opportunity is 5.4/10, target-video object-track fit is 3.5/10, and sprite-motion correspondence is 6.18/10 with target timing status frame-labeled-segmented-reference-windows. Diagnostic legacy coverage was 6.8/10, which is why the old read was too generous.",
      "evidence": "reference-artifacts/analyses/challenge-stage-conformance/2026-06-09-abb5c464/report.json",
      "costContext": {
        "costClass": "estimated",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "level-arc",
          "quality-score"
        ],
        "trackedSpend": "113 runs; 26.4 min wall; 34.9 min CPU",
        "trackedRuns": 113,
        "trackedWallSeconds": 1586.7530000000002,
        "trackedCpuSeconds": 2092.86,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "+4.7",
        "summary": "estimated; cpu, browser",
        "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
      }
    },
    {
      "rank": 10,
      "metric": "Progression and persona depth",
      "explanation": {
        "calculation": "Score is read from the progression/persona quality category, including persona safety checks, stage ordering, and whether different skill profiles see an appropriate ramp.",
        "grounding": "Best-case grounding comes from controlled persona runs, stage snapshots, loss/recovery traces, and reference-informed expectations for learning, mastery, and escalation.",
        "meaning": "Players should feel the game becoming harder for understandable reasons. Designers use it to keep novice, advanced, and expert experiences coherent across a long session."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "scorer-backed artifact with selected harness windows",
        "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
      },
      "score10": 8.4,
      "current": "8.4/10",
      "target": "9.1+",
      "status": "Measured release category",
      "why": "Keeps the game learnable across skill levels and supports later-stage quality.",
      "effort": "Low-medium; 1-2 hrs",
      "next": "Resolve remaining ordering edge case after higher-value audio/level-arc work.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "estimated",
        "expectedResources": "cpu",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "+0.7",
        "summary": "estimated; cpu",
        "valueCostRead": "Estimated cost/value; dedicated investment candidate not yet generated."
      }
    },
    {
      "rank": 11,
      "metric": "Stage 1 opening timing fidelity",
      "explanation": {
        "calculation": "Score is read from the stage-1 opening timing category, comparing measured Aurora event timing against reference opening-window timing metrics and tolerances.",
        "grounding": "Best-case grounding is a canonical stage-1 reference window with event timestamps, Aurora controlled-clock captures, and delta reports for first entry, arrival, and first dive timing.",
        "meaning": "Players form their first feel judgment in the opening seconds. Designers use it to lock the initial rhythm before tuning deeper complexity."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "scorer-backed artifact with selected harness windows",
        "scoreMeaning": "Good conformance, but the gap is likely visible to attentive players or designers in some scenarios."
      },
      "score10": 8.5,
      "current": "8.5/10",
      "target": "8.8-9.2",
      "status": "Measured release category",
      "why": "First impression and direct reference feel.",
      "effort": "Low-medium; 1-2 hrs",
      "next": "Defer until higher-gap audio and level-arc candidates have been exercised.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "medium",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": 0.18,
        "expectedOverallLift10": 0.014,
        "investmentScore": 0.73,
        "gapToTarget": "+0.3",
        "summary": "medium; cpu, browser",
        "valueCostRead": "Expected lift 0.18/10 on metric, 0.014/10 overall; investment score 0.73."
      }
    },
    {
      "rank": 12,
      "metric": "Arcade console frame UI style",
      "explanation": {
        "calculation": "Current score uses the UI shell quality category as a proxy until the frame gets its own arcade-style rubric.",
        "grounding": "Best-case grounding will score cabinet rails, bezel proportions, button density, labels, build/date treatment, chroming, responsive fit, and visual consistency across local/dev/beta/prod surfaces.",
        "meaning": "Players experience the frame as the cabinet around every game. Designers use it to make the platform feel trustworthy, arcade-native, and not like a generic web wrapper."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "UI shell proxy; dedicated visual/modal rubric still needed",
        "scoreMeaning": "Strong measured conformance with known remaining risk mostly in narrower edge cases, coverage, or polish."
      },
      "score10": 9.2,
      "current": "9.2/10",
      "target": "9.4-9.6",
      "status": "Measured as UI shell; needs separate arcade-frame style rubric",
      "why": "The cabinet frame is the constant product surface around every game.",
      "effort": "Medium; 1-3 hrs visual QA",
      "next": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "medium",
        "expectedResources": "cpu, browser, gpu",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": 0.12,
        "expectedOverallLift10": 0.009,
        "investmentScore": 0.38,
        "gapToTarget": "+0.2",
        "summary": "medium; cpu, browser, gpu",
        "valueCostRead": "Expected lift 0.12/10 on metric, 0.009/10 overall; investment score 0.38."
      }
    },
    {
      "rank": 13,
      "metric": "Popup/help/scoring/leaderboard surface formatting",
      "explanation": {
        "calculation": "Current score uses the UI shell suite as a proxy until help, score, account, feedback, leaderboard, and game-over modals get a modal-specific scorer.",
        "grounding": "Best-case grounding will compare each modal surface for layout, typography, arcade tone, score clarity, keyboard/controller ergonomics, and no-overlap responsive behavior.",
        "meaning": "Players rely on these screens to understand scoring, recover from a run, file feedback, and trust records. Designers use it to keep utility surfaces polished without breaking arcade immersion."
      },
      "scoreContext": {
        "confidence": "medium",
        "resolution": "UI shell proxy; dedicated visual/modal rubric still needed",
        "scoreMeaning": "Strong measured conformance with known remaining risk mostly in narrower edge cases, coverage, or polish."
      },
      "score10": 9.2,
      "current": "9.2/10",
      "target": "9.4-9.6",
      "status": "Measured through UI shell suite; needs modal-specific scoring",
      "why": "Popup surfaces carry learning, scoring trust, feedback, and player records.",
      "effort": "Low-medium; 1-2 hrs",
      "next": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "medium",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": 0.12,
        "expectedOverallLift10": 0.009,
        "investmentScore": 0.38,
        "gapToTarget": "+0.2",
        "summary": "medium; cpu, browser",
        "valueCostRead": "Expected lift 0.12/10 on metric, 0.009/10 overall; investment score 0.38."
      }
    },
    {
      "rank": 14,
      "metric": "Dive fairness and safety",
      "explanation": {
        "calculation": "Score is read from the dive-safety quality category and associated harness checks for unfair collision, lane, and persona safety regressions.",
        "grounding": "Best-case grounding is repeated persona/seed sweeps, collision windows, near-miss traces, and pressure diagnostics after every risky movement or threat change.",
        "meaning": "Players accept hard deaths when they feel earned. Designers use it as a guardrail so added pressure does not become unfairness."
      },
      "scoreContext": {
        "confidence": "medium-high",
        "resolution": "seed/persona safety guardrails and pressure-sensitive collision checks",
        "scoreMeaning": "Strong measured conformance with known remaining risk mostly in narrower edge cases, coverage, or polish."
      },
      "score10": 9.1,
      "current": "9.1/10",
      "target": "9.3+",
      "status": "Measured release category",
      "why": "Protects user trust while pressure is increased.",
      "effort": "Guardrail; 30-90 min per risky gameplay cycle",
      "next": "Keep as required guardrail for pressure/reward changes.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "guardrail",
        "expectedResources": "cpu",
        "trackedAxes": [
          "stage4-pressure",
          "quality-score"
        ],
        "trackedSpend": "44 runs; 30.5 min wall; 38.9 min CPU",
        "trackedRuns": 44,
        "trackedWallSeconds": 1827.5230000000001,
        "trackedCpuSeconds": 2335.11,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "+0.2",
        "summary": "guardrail; cpu",
        "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
      }
    },
    {
      "rank": 15,
      "metric": "Player movement conformance",
      "explanation": {
        "calculation": "Score is read from the player-movement correspondence category, comparing movement traces and control response against the reference-derived movement target.",
        "grounding": "Best-case grounding is reference movement traces, Aurora controlled input traces, speed/position deltas, and regression checks across viewport and persona modes.",
        "meaning": "Players feel this as the basic trust in the ship. Designers use it as a do-not-regress foundation for every other gameplay improvement."
      },
      "scoreContext": {
        "confidence": "high-current-pass",
        "resolution": "reference trace plus controlled movement harness checks; expert micro-feel can still exceed scorer resolution",
        "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
      },
      "score10": 10,
      "current": "10/10",
      "target": "Maintain 10",
      "status": "Measured release category",
      "why": "Core control feel is already excellent.",
      "effort": "Guardrail only",
      "next": "Do not tune unless a new reference metric proves a gap.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "guardrail",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "at target",
        "summary": "guardrail; cpu, browser",
        "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
      }
    },
    {
      "rank": 16,
      "metric": "Shot and hit responsiveness",
      "explanation": {
        "calculation": "Score is read from the combat-responsiveness category, covering shot timing, hit registration, close-shot behavior, and event feedback guardrails.",
        "grounding": "Best-case grounding is controlled shot/hit scenarios, close-contact tests, event logs, and paired visual/audio feedback timing.",
        "meaning": "Players need shots and impacts to feel immediate and legible. Designers use it to protect core combat feel while improving explosions and sound semantics."
      },
      "scoreContext": {
        "confidence": "high-current-pass",
        "resolution": "functional combat-response guardrails; audiovisual semantics are scored separately",
        "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
      },
      "score10": 10,
      "current": "10/10",
      "target": "Maintain 10",
      "status": "Measured release category",
      "why": "Core combat response is already excellent.",
      "effort": "Guardrail only",
      "next": "Protect during explosion/audio/event feedback work.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "guardrail",
        "expectedResources": "cpu",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "at target",
        "summary": "guardrail; cpu",
        "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
      }
    },
    {
      "rank": 17,
      "metric": "Stage 1 opening geometry fidelity",
      "explanation": {
        "calculation": "Score is read from the stage-1 opening geometry category, comparing formation layout and opening positions against the reference geometry target.",
        "grounding": "Best-case grounding is reference contact sheets, Aurora opening captures, formation coordinate traces, and geometry tolerance checks.",
        "meaning": "Players read formation authenticity before they consciously notice details. Designers use it as the locked baseline for alien-entry and formation work."
      },
      "scoreContext": {
        "confidence": "high-current-pass",
        "resolution": "opening formation geometry checks; later-stage entry variation is separate",
        "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
      },
      "score10": 10,
      "current": "10/10",
      "target": "Maintain 10",
      "status": "Measured release category",
      "why": "Formation geometry is already locked.",
      "effort": "Guardrail only",
      "next": "Protect during alien-entry visual work.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "guardrail",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "at target",
        "summary": "guardrail; cpu, browser",
        "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
      }
    },
    {
      "rank": 18,
      "metric": "Capture and rescue rule fidelity",
      "explanation": {
        "calculation": "Score is read from the capture/rescue quality category, including capture, no-leak, rescue, and rule-boundary harness checks.",
        "grounding": "Best-case grounding is reference capture/rescue behavior, controlled Aurora scenarios, event logs, state assertions, and score/reward feedback traces.",
        "meaning": "Players see capture and rescue as a signature Galaga risk/reward mechanic. Designers use it as a hard identity guardrail while improving feedback and scoring opportunities."
      },
      "scoreContext": {
        "confidence": "high-current-pass",
        "resolution": "rule/state harness checks; feedback clarity and reward feel are separate",
        "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
      },
      "score10": 10,
      "current": "10/10",
      "target": "Maintain 10",
      "status": "Measured release category",
      "why": "Strong Galaga identity mechanic; should not regress while feedback improves.",
      "effort": "Guardrail only",
      "next": "Use as release blocker for capture/rescue-adjacent audio or explosion changes.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "guardrail",
        "expectedResources": "cpu",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "at target",
        "summary": "guardrail; cpu",
        "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
      }
    },
    {
      "rank": 19,
      "metric": "Challenge-stage timing fidelity",
      "explanation": {
        "calculation": "Score is read from the challenge-stage timing category, comparing Aurora challenge entry, transition, result, and timing metrics against reference tolerances.",
        "grounding": "Best-case grounding is reference challenge-stage timing windows, Aurora controlled-clock scenarios, result feedback traces, and pass/fail delta reports.",
        "meaning": "Players need bonus stages to feel rhythmic and fair. Designers use this as the timing guardrail while adding more challenge-stage variety."
      },
      "scoreContext": {
        "confidence": "high-current-pass",
        "resolution": "challenge timing deltas within tolerance; variation and teaching value are separate",
        "scoreMeaning": "No known measured gap under the current scorer and evidence coverage. This is a guardrail pass, not proof of perfection."
      },
      "score10": 10,
      "current": "10/10",
      "target": "Maintain 9.8+",
      "status": "Measured release category",
      "why": "Timing is strong; variation is the gap, not baseline timing.",
      "effort": "Guardrail only",
      "next": "Preserve while adding challenge variation scoring.",
      "evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json",
      "costContext": {
        "costClass": "guardrail",
        "expectedResources": "cpu, browser",
        "trackedAxes": [
          "quality-score"
        ],
        "trackedSpend": "16 runs; 17.6 min wall; 20.4 min CPU",
        "trackedRuns": 16,
        "trackedWallSeconds": 1058.101,
        "trackedCpuSeconds": 1223.73,
        "expectedLift10": null,
        "expectedOverallLift10": null,
        "investmentScore": null,
        "gapToTarget": "at target",
        "summary": "guardrail; cpu, browser",
        "valueCostRead": "Guardrail spend: value is preventing regression rather than raising the score."
      }
    }
  ],
  "priorityTable": [
    {
      "Priority": 1,
      "Metric": "Challenge-stage set-piece conformance: movement, graphics, alien novelty",
      "Current": "4.3/10",
      "Confidence": "medium-high for gap direction; medium-low for exact lift estimate",
      "Resolution": "strict stage-by-stage challenge scorer using 1/10 baseline for interest, movement, and graphics; no-shot/no-kill is treated as a guardrail rather than score inflation",
      "Cost / resources": "high; cpu, browser, gpu",
      "Tracked spend": "162 runs; 24 min wall; 39.8 min CPU",
      "Major-gate target": ">=5.0 before next beta claim; 6.0 after three authored challenges; 9.0+ mature",
      "Measurement status": "Strict dedicated stage-by-stage scorer; current high-priority gameplay-authenticity gap",
      "Why this matters": "The challenging stages should be spectacular safe Galaga-like bonus exhibitions. Current safety is good, but movement variation, alien novelty, and graphical conformance are not yet close.",
      "Effort / time estimate": "High; long-cycle CPU/browser extraction plus gameplay authoring and sprite-motion/reference labeling",
      "Recommended next step": "Recent candidate sweeps improved measurement and search evidence, but no stage currently has a runtime keeper. Next: build richer movement primitives before runtime promotion; first target reference-spline-fit from 10 primitives, with first-five grammar at 25 group contracts and 25 reference-backed paths.",
      "Evidence": "reference-artifacts/analyses/challenge-stage-conformance/2026-06-09-abb5c464/report.json; reference-artifacts/analyses/challenge-stage-candidate-sweep-index/latest.json; reference-artifacts/analyses/challenge-movement-grammar/latest.json; reference-artifacts/analyses/challenge-motion-primitives/latest.json; reference-artifacts/analyses/challenge-setpiece-contracts/latest.json"
    },
    {
      "Priority": 2,
      "Metric": "Audio identity, event feedback, and cue alignment",
      "Current": "7.3/10",
      "Confidence": "medium-high",
      "Resolution": "21 cue/event comparisons with waveform, spectral, overlap, alignment, and semantic event-mapping features",
      "Cost / resources": "high; cpu, model-api, openai-api",
      "Tracked spend": "317 runs; 254.3 min wall; 460.3 min CPU",
      "Major-gate target": "7.5-8.0",
      "Measurement status": "Measured release category; weakest axis",
      "Why this matters": "Largest current score gap and high user-experience impact: shots, explosions, boss damage, challenge results, capture/rescue feedback.",
      "Effort / time estimate": "High; 3-6 hrs local/model-assisted analysis",
      "Recommended next step": "Challenge Perfect runtime trial rejected perfect-clean-onset-soft-tail; do not directly promote the focused keeper. Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json; reference-artifacts/analyses/aurora-audio-cue-contracts/2026-05-11-b83393cd-dirty-201628/report.json"
    },
    {
      "Priority": 3,
      "Metric": "Alien entry and broad challenge-stage novelty",
      "Current": "8.2/10",
      "Confidence": "medium",
      "Resolution": "composite proxy from opening timing, geometry, and movement grammar",
      "Cost / resources": "estimated; cpu, browser",
      "Tracked spend": "113 runs; 26.4 min wall; 34.9 min CPU",
      "Major-gate target": "7.5 first gate; 9.0+ mature",
      "Measurement status": "Dedicated long-cycle broad scorer; useful diagnostic but less strict than set-piece score",
      "Why this matters": "Regular-stage alien entry, challenge-stage trajectories, and new-alien introduction still need stronger reference grounding; this broad metric should not mask the stricter challenge-stage gap.",
      "Effort / time estimate": "High; long-cycle CPU/browser extraction plus reference contact-sheet and path-labeling pass",
      "Recommended next step": "Attack Regular-entry geometry separation: Minimum regular geometry distance 0.083; mean regular geometry distance 0.127; closest pair mid-run-entry-variant / late-run-cleanup-or-failure.",
      "Evidence": "reference-artifacts/analyses/alien-entry-challenge-variation/2026-05-16-82fd62cb/report.json"
    },
    {
      "Priority": 4,
      "Metric": "Level arc and encounter shape",
      "Current": "8.8/10",
      "Confidence": "medium-high",
      "Resolution": "multi-submetric level-arc report with stage families, challenge layers, pressure, reward, and persona evidence",
      "Cost / resources": "low; cpu, browser",
      "Tracked spend": "162 runs; 24 min wall; 39.8 min CPU",
      "Major-gate target": "8.8-9.0",
      "Measurement status": "Measured release category",
      "Why this matters": "Controls whether long play feels like Galaga-like escalation rather than repeated pressure.",
      "Effort / time estimate": "Medium-high; 2-5 hrs",
      "Recommended next step": "Use the top-ranked opportunity window to add or widen deterministic evidence before changing gameplay tuning.",
      "Evidence": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 4.5,
      "Metric": "Direct target sprite and impact feedback conformance",
      "Current": "6/10",
      "Confidence": "medium",
      "Resolution": "scorer-backed artifact with selected harness windows",
      "Cost / resources": "estimated; cpu",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": ">=5.5 before next beta claim; >=7.5 mature preview",
      "Measurement status": "Application artifact scorecard measured; strict direct target-crop row is intentionally sobering",
      "Why this matters": "The player-visible ship, enemy, hit, and explosion shapes are a first-glance arcade quality signal. Static sprite proxy scores must not hide the stricter target-crop gap.",
      "Effort / time estimate": "Medium-high; 2-5 hrs renderer/crop/harness work plus visual review",
      "Recommended next step": "Raise direct sprite target score and impact/explosion feedback together; current impact/explosion static read is 5.8/10.",
      "Evidence": "reference-artifacts/analyses/application-artifact-conformance/latest.json"
    },
    {
      "Priority": 5,
      "Metric": "Boss entry and formation grammar",
      "Current": "9.4/10",
      "Confidence": "medium",
      "Resolution": "first-class boss/formation scorer using stage-window event grammar, boss timing, escort composition, challenge identity, and explicit path/slot measurement debt",
      "Cost / resources": "high; cpu, browser",
      "Tracked spend": "162 runs; 24 min wall; 39.8 min CPU",
      "Major-gate target": "8.0-8.5 first gate; 9.0+ with path/slot extraction",
      "Measurement status": "Measured release category; new first-class axis",
      "Why this matters": "Boss entries, escorts, formation settling, and challenge set pieces are core Galaga choreography and directly affect whether stages feel authored.",
      "Effort / time estimate": "Medium-high; 2-5 hrs, then recurring low-cost guardrail",
      "Recommended next step": "Label boss, escort, rack-settle, and challenge path families from Galaga reference contact sheets or video traces, then replace heuristic coverage with direct shape-distance scoring.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 6,
      "Metric": "Overall visual look and feel: gameplay, start page, typography complexity",
      "Current": "8.6/10",
      "Confidence": "medium-low",
      "Resolution": "first-pass visual scorer when available; still needs reference-backed contact sheets and sprite/style sub-scorers",
      "Cost / resources": "medium; cpu, browser, gpu",
      "Tracked spend": "1 runs; 0.1 min wall; 0.1 min CPU",
      "Major-gate target": "8.4-8.8",
      "Measurement status": "Measured visual scorer; medium-low confidence",
      "Why this matters": "A high score can still feel off if start text, density, contrast, alien readability, and arcade typography do not cohere.",
      "Effort / time estimate": "Medium; next pass should add reference-backed contact sheets and GPU/model-assisted review",
      "Recommended next step": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap.",
      "Evidence": "reference-artifacts/analyses/aurora-visual-look-conformance/2026-05-08-fee8820-dirty/report.json"
    },
    {
      "Priority": 7,
      "Metric": "Stage 4 pressure exact replay / pressure curve precision",
      "Current": "6/10",
      "Confidence": "medium",
      "Resolution": "narrow pressure/loss replay windows; exact replay coverage still limited",
      "Cost / resources": "medium; cpu, browser",
      "Tracked spend": "28 runs; 12.8 min wall; 18.5 min CPU",
      "Major-gate target": "8.2-8.6",
      "Measurement status": "Measured level-arc weak submetric",
      "Why this matters": "Pressure should be learnable and reproducible, not merely present in one run.",
      "Effort / time estimate": "Medium-high; prior runs ~12.8 min wall / 18.5 min CPU",
      "Recommended next step": "Run focused source-window replay matching after the Stage 12 loop validates candidate mechanics.",
      "Evidence": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 8,
      "Metric": "Alien entry to levels: formation, timing, and methods",
      "Current": "10/10",
      "Confidence": "medium",
      "Resolution": "composite proxy from opening timing, geometry, and movement grammar",
      "Cost / resources": "estimated; cpu, browser",
      "Tracked spend": "113 runs; 26.4 min wall; 34.9 min CPU",
      "Major-gate target": "9.0-9.4 with path and rack-slot scorer",
      "Measurement status": "Dedicated alien-entry submetric",
      "Why this matters": "Entry formations and rack timing are a first-order arcade authenticity signal before combat even starts.",
      "Effort / time estimate": "Medium; 1-3 hrs plus visual review",
      "Recommended next step": "Raise regular-stage minimum signature distance and add stage-specific alien entry scripts before retuning broad level arc.",
      "Evidence": "reference-artifacts/analyses/alien-entry-challenge-variation/2026-05-16-82fd62cb/report.json"
    },
    {
      "Priority": 9,
      "Metric": "Challenge-stage variation and new alien/formations introduction",
      "Current": "4.3/10",
      "Confidence": "medium",
      "Resolution": "strict dedicated stage-by-stage challenge conformance report when available; fallback proxy uses challenge timing, challenge identity, and non-repetition",
      "Cost / resources": "estimated; cpu, browser",
      "Tracked spend": "113 runs; 26.4 min wall; 34.9 min CPU",
      "Major-gate target": "9.0-9.4 with dedicated scorer",
      "Measurement status": "Dedicated stage-by-stage challenge conformance report",
      "Why this matters": "Challenge stages should teach new motion/reward patterns, not only pause normal combat.",
      "Effort / time estimate": "Medium-high; 2-4 hrs",
      "Recommended next step": "Close dedicated challenge-stage gaps: current challenge stages are functionally safe but not yet fully credible Galaga-like bonus exhibitions: strict movement is 4.4/10, strict graphics is 4.5/10, alien/stage novelty is 3.9/10, player shot opportunity is 5.4/10, target-video object-track fit is 3.5/10, and sprite-motion correspondence is 6.18/10 with target timing status frame-labeled-segmented-reference-windows. Diagnostic legacy coverage was 6.8/10, which is why the old read was too generous.",
      "Evidence": "reference-artifacts/analyses/challenge-stage-conformance/2026-06-09-abb5c464/report.json"
    },
    {
      "Priority": 10,
      "Metric": "Progression and persona depth",
      "Current": "8.4/10",
      "Confidence": "medium",
      "Resolution": "scorer-backed artifact with selected harness windows",
      "Cost / resources": "estimated; cpu",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": "9.1+",
      "Measurement status": "Measured release category",
      "Why this matters": "Keeps the game learnable across skill levels and supports later-stage quality.",
      "Effort / time estimate": "Low-medium; 1-2 hrs",
      "Recommended next step": "Resolve remaining ordering edge case after higher-value audio/level-arc work.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 11,
      "Metric": "Stage 1 opening timing fidelity",
      "Current": "8.5/10",
      "Confidence": "medium",
      "Resolution": "scorer-backed artifact with selected harness windows",
      "Cost / resources": "medium; cpu, browser",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": "8.8-9.2",
      "Measurement status": "Measured release category",
      "Why this matters": "First impression and direct reference feel.",
      "Effort / time estimate": "Low-medium; 1-2 hrs",
      "Recommended next step": "Defer until higher-gap audio and level-arc candidates have been exercised.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 12,
      "Metric": "Arcade console frame UI style",
      "Current": "9.2/10",
      "Confidence": "medium",
      "Resolution": "UI shell proxy; dedicated visual/modal rubric still needed",
      "Cost / resources": "medium; cpu, browser, gpu",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": "9.4-9.6",
      "Measurement status": "Measured as UI shell; needs separate arcade-frame style rubric",
      "Why this matters": "The cabinet frame is the constant product surface around every game.",
      "Effort / time estimate": "Medium; 1-3 hrs visual QA",
      "Recommended next step": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 13,
      "Metric": "Popup/help/scoring/leaderboard surface formatting",
      "Current": "9.2/10",
      "Confidence": "medium",
      "Resolution": "UI shell proxy; dedicated visual/modal rubric still needed",
      "Cost / resources": "medium; cpu, browser",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": "9.4-9.6",
      "Measurement status": "Measured through UI shell suite; needs modal-specific scoring",
      "Why this matters": "Popup surfaces carry learning, scoring trust, feedback, and player records.",
      "Effort / time estimate": "Low-medium; 1-2 hrs",
      "Recommended next step": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 14,
      "Metric": "Dive fairness and safety",
      "Current": "9.1/10",
      "Confidence": "medium-high",
      "Resolution": "seed/persona safety guardrails and pressure-sensitive collision checks",
      "Cost / resources": "guardrail; cpu",
      "Tracked spend": "44 runs; 30.5 min wall; 38.9 min CPU",
      "Major-gate target": "9.3+",
      "Measurement status": "Measured release category",
      "Why this matters": "Protects user trust while pressure is increased.",
      "Effort / time estimate": "Guardrail; 30-90 min per risky gameplay cycle",
      "Recommended next step": "Keep as required guardrail for pressure/reward changes.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 15,
      "Metric": "Player movement conformance",
      "Current": "10/10",
      "Confidence": "high-current-pass",
      "Resolution": "reference trace plus controlled movement harness checks; expert micro-feel can still exceed scorer resolution",
      "Cost / resources": "guardrail; cpu, browser",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": "Maintain 10",
      "Measurement status": "Measured release category",
      "Why this matters": "Core control feel is already excellent.",
      "Effort / time estimate": "Guardrail only",
      "Recommended next step": "Do not tune unless a new reference metric proves a gap.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 16,
      "Metric": "Shot and hit responsiveness",
      "Current": "10/10",
      "Confidence": "high-current-pass",
      "Resolution": "functional combat-response guardrails; audiovisual semantics are scored separately",
      "Cost / resources": "guardrail; cpu",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": "Maintain 10",
      "Measurement status": "Measured release category",
      "Why this matters": "Core combat response is already excellent.",
      "Effort / time estimate": "Guardrail only",
      "Recommended next step": "Protect during explosion/audio/event feedback work.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 17,
      "Metric": "Stage 1 opening geometry fidelity",
      "Current": "10/10",
      "Confidence": "high-current-pass",
      "Resolution": "opening formation geometry checks; later-stage entry variation is separate",
      "Cost / resources": "guardrail; cpu, browser",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": "Maintain 10",
      "Measurement status": "Measured release category",
      "Why this matters": "Formation geometry is already locked.",
      "Effort / time estimate": "Guardrail only",
      "Recommended next step": "Protect during alien-entry visual work.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 18,
      "Metric": "Capture and rescue rule fidelity",
      "Current": "10/10",
      "Confidence": "high-current-pass",
      "Resolution": "rule/state harness checks; feedback clarity and reward feel are separate",
      "Cost / resources": "guardrail; cpu",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": "Maintain 10",
      "Measurement status": "Measured release category",
      "Why this matters": "Strong Galaga identity mechanic; should not regress while feedback improves.",
      "Effort / time estimate": "Guardrail only",
      "Recommended next step": "Use as release blocker for capture/rescue-adjacent audio or explosion changes.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    },
    {
      "Priority": 19,
      "Metric": "Challenge-stage timing fidelity",
      "Current": "10/10",
      "Confidence": "high-current-pass",
      "Resolution": "challenge timing deltas within tolerance; variation and teaching value are separate",
      "Cost / resources": "guardrail; cpu, browser",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Major-gate target": "Maintain 9.8+",
      "Measurement status": "Measured release category",
      "Why this matters": "Timing is strong; variation is the gap, not baseline timing.",
      "Effort / time estimate": "Guardrail only",
      "Recommended next step": "Preserve while adding challenge variation scoring.",
      "Evidence": "reference-artifacts/analyses/quality-conformance/2026-05-24-ff249bba/report.json"
    }
  ],
  "economicsSummary": {
    "latestOverallScore10": 8.8,
    "latestLevelArcScore10": 8.8,
    "metricPointCount": 1276,
    "deltaCount": 138,
    "measuredRuns": 984,
    "wallSeconds": 58850.295,
    "cpuSeconds": 59494.99,
    "artifactBytes": 1512759390,
    "computeApplication": {
      "gpuUseByPurpose": [
        {
          "purpose": "Audio conformance and cue feedback",
          "runs": 9,
          "wallSeconds": 14143.688,
          "wallMinutes": 235.73,
          "cpuSeconds": 71.4,
          "share": 0.6444,
          "sharePercent": 64.4,
          "examples": [
            "Challenge Perfect candidate audio sweep for measured centroid/risk reduction",
            "Challenge Perfect second-pass candidate sweep with gated keeper selection",
            "Challenge Perfect isolated candidate sweep after capture hardening"
          ],
          "interpretation": "Moves the moment-to-moment arcade feel: impact clarity, ambience identity, reward/loss feedback, and player understanding."
        },
        {
          "purpose": "Gameplay behavior and level complexity",
          "runs": 1,
          "wallSeconds": 4500,
          "wallMinutes": 75,
          "cpuSeconds": 0,
          "share": 0.205,
          "sharePercent": 20.5,
          "examples": [
            "Model-assisted 10-step sprite, impact-feedback, challenge-contract, documentation, and validation cycle; manual GPU-equivalent estimate for Codex planning/code review/interpretation."
          ],
          "interpretation": "Moves player-facing pressure, stage shape, alien entry novelty, challenge-stage learning value, and long-play texture."
        },
        {
          "purpose": "Dashboard, docs, and release planning",
          "runs": 2,
          "wallSeconds": 3300,
          "wallMinutes": 55,
          "cpuSeconds": 0,
          "share": 0.1504,
          "sharePercent": 15,
          "examples": [
            "manual estimate: model-assisted CPU/GPU economics dashboard design and implementation",
            "model-assisted self-critical conformance investment review, docs/dashboard wiring, and recommendations"
          ],
          "interpretation": "Moves decision quality: what to invest in next, how to explain releases, and how to keep dev/beta/prod evidence aligned."
        },
        {
          "purpose": "Visual and video reference analysis",
          "runs": 1,
          "wallSeconds": 3.49,
          "wallMinutes": 0.06,
          "cpuSeconds": 3.43,
          "share": 0.0002,
          "sharePercent": 0,
          "examples": [
            "First Aurora visual look conformance scorer baseline"
          ],
          "interpretation": "Moves graphical identity, reference inspection, contact-sheet review, sprite/surface comparison, and readability."
        }
      ],
      "cpuUseByPurpose": [
        {
          "purpose": "Audio conformance and cue feedback",
          "runs": 519,
          "wallSeconds": 30285.053,
          "wallMinutes": 504.75,
          "cpuSeconds": 48699.53,
          "share": 0.8196,
          "sharePercent": 82,
          "examples": [
            "Stage4 lane2 after bounded column5 cue",
            "Stage4 lane2 source-exact scenario after cue",
            "Stage4 loss windows after lane2 cue"
          ],
          "interpretation": "Moves the moment-to-moment arcade feel: impact clarity, ambience identity, reward/loss feedback, and player understanding."
        },
        {
          "purpose": "Gameplay behavior and level complexity",
          "runs": 425,
          "wallSeconds": 6025.98,
          "wallMinutes": 100.43,
          "cpuSeconds": 10053.99,
          "share": 0.1631,
          "sharePercent": 16.3,
          "examples": [
            "Stage4 lane2 action precision baseline",
            "Stage4 lane2 action precision baseline",
            "Stage4 lane2 action precision after harness fix"
          ],
          "interpretation": "Moves player-facing pressure, stage shape, alien entry novelty, challenge-stage learning value, and long-play texture."
        },
        {
          "purpose": "Harness, ingestion, and assessment logic",
          "runs": 9,
          "wallSeconds": 610.144,
          "wallMinutes": 10.17,
          "cpuSeconds": 704.93,
          "share": 0.0165,
          "sharePercent": 1.7,
          "examples": [
            "quality score after cadence generator no-keeper cycle",
            "quality score after pincer/crown geometry pass",
            "loss reward rollback quality score"
          ],
          "interpretation": "Moves reusable automation: scorers, artifact extraction, candidate loops, measurement confidence, and future game ingestion."
        },
        {
          "purpose": "Visual and video reference analysis",
          "runs": 18,
          "wallSeconds": 27.559,
          "wallMinutes": 0.46,
          "cpuSeconds": 35.07,
          "share": 0.0007,
          "sharePercent": 0.1,
          "examples": [
            "First Aurora visual look conformance scorer baseline",
            [
              "npm",
              "run",
              "harness:analyze:aurora-runtime-sprite-conformance"
            ],
            [
              "npm",
              "run",
              "harness:analyze:aurora-runtime-vs-galaga-target-crops"
            ]
          ],
          "interpretation": "Moves graphical identity, reference inspection, contact-sheet review, sprite/surface comparison, and readability."
        },
        {
          "purpose": "Dashboard, docs, and release planning",
          "runs": 6,
          "wallSeconds": 1.559,
          "wallMinutes": 0.03,
          "cpuSeconds": 1.47,
          "share": 0,
          "sharePercent": 0,
          "examples": [
            "quick-ledger-smoke",
            "dashboard refresh after conformance economics update",
            "local dev dashboard refresh after economics update"
          ],
          "interpretation": "Moves decision quality: what to invest in next, how to explain releases, and how to keep dev/beta/prod evidence aligned."
        }
      ],
      "gameplayImprovementByPart": [
        {
          "part": "Gameplay complexity and stage arc",
          "positiveScore10": 47.4,
          "share": 0.7418,
          "sharePercent": 74.2,
          "axes": [
            "quality:challenge-timing",
            "quality:stage1-timing",
            "stage4-pressure-collision-diagnostic-coverage",
            "stage4-pressure-exact-replay-coverage",
            "stage-signature-distance",
            "quality:formation-boss-grammar",
            "level-arc:stage-distinctiveness",
            "level-arc:movement-grammar-expansion"
          ],
          "interpretation": "Player-perceived variety, pressure, alien choreography, challenge-stage novelty, and long-play learning curve."
        },
        {
          "part": "Core mechanics and control feel",
          "positiveScore10": 7.9,
          "share": 0.1236,
          "sharePercent": 12.4,
          "axes": [
            "quality:movement"
          ],
          "interpretation": "Player-perceived fairness, responsiveness, collision quality, and trust in combat outcomes."
        },
        {
          "part": "Audio feedback and event clarity",
          "positiveScore10": 5.8,
          "share": 0.0908,
          "sharePercent": 9.1,
          "axes": [
            "quality:audio"
          ],
          "interpretation": "Player-perceived clarity from sounds that explain danger, reward, loss, and arcade identity."
        },
        {
          "part": "Overall release-quality rollup",
          "positiveScore10": 2.8,
          "share": 0.0438,
          "sharePercent": 4.4,
          "axes": [
            "overall-quality"
          ],
          "interpretation": "Composite release score movement that reflects several subsystems at once."
        }
      ],
      "totals": {
        "gpuEquivalentWallSeconds": 21947.178,
        "cpuLocalWallSeconds": 36950.295,
        "positiveScore10Attributed": 63.9
      },
      "limitations": [
        "GPU-equivalent accounting includes declared Codex/OpenAI/model/API/GPU usage and manual ledger entries. The repo cannot automatically read every Codex chat token or quota draw.",
        "Impact attribution is best-effort. It groups positive score movement by conformance axis, not a controlled causal experiment.",
        "A harness or documentation gain may improve future decision quality without immediately moving a gameplay score."
      ]
    },
    "charts": [
      "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/score-trends.svg",
      "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/largest-score-deltas.svg",
      "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/compute-minutes-by-resource.svg",
      "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/cost-per-positive-score-point.svg",
      "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/gpu-equivalent-use-by-purpose.svg",
      "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/cpu-use-by-purpose.svg",
      "reference-artifacts/analyses/conformance-economics/2026-06-12-dab57ac7/gameplay-improvement-by-project-part.svg"
    ],
    "retrospective": {
      "generatedAt": "2026-05-18T12:43:17.737Z",
      "executiveRead": "The past focused block substantially improved our honesty and repeatability, but only modestly improved player-facing conformance. Challenge stages are now scored with a strict 1/10 baseline and have risen to 3.8/10; that is a real improvement from the strict 2.5/10 baseline, but still far from human-level Galaga conformance. The biggest remaining failures are movement grammar, alien novelty, stage-to-stage challenge progression, and stable audio runtime promotion.",
      "workWindow": {
        "label": "Estimated focused conformance work block",
        "estimatedHours": 10,
        "basis": "Committed artifacts, challenge-stage history, quality score history, dashboard cost context, candidate sweep reports, and current audio/application artifacts. Some Codex/model/human orchestration time is estimated because it was not automatically logged."
      },
      "movedMost": [
        "Strict challenge-stage truth improved: the old broad read around 6.1/10 was replaced by a stricter baseline at 2.5/10, then recovered to 3.8/10. This is progress, but it is mostly better measurement plus partial graphical evidence, not a solved gameplay problem.",
        "Challenge graphical conformance moved from 2.1/10 to 4.3/10 after object-track/static visual evidence landed.",
        "Overall quality under the strict challenge metric moved only 8.7/10 -> 8.8/10, which is an honest signal that the user-visible game has not leapt forward as much as the harness did."
      ],
      "movedLeast": [
        "Alien novelty remains 3.4/10; it did not materially move during the focused block.",
        "Stage-to-stage challenge progression remains 3/10; late challenges still do not yet read as distinct Galaga-like lessons.",
        "Challenge movement conformance is only 3.4/10 and has plateaued relative to the amount of analysis effort.",
        "Audio runtime promotion is still zero accepted cues even though cue contracts and candidate loops improved the process."
      ],
      "failurePatterns": [
        "Challenge-stage layout sweeps are too shallow for the real problem. The gap is trajectory grammar, entry/exit choreography, alien-family staging, and temporal sprite motion, not just spawn timing and lane offsets.",
        "Audio candidate loops are optimizing isolated clips faster than they are improving full-theme live capture. Reference-vs-reference calibration and repeated full-theme stability gates must come before more runtime promotion.",
        "Some graphics artifacts are useful to the harness but not useful enough to a human reviewer. Dense contact sheets and tiny \"view larger\" images need to become stage-by-stage temporal crop strips and object-track overlays.",
        "The economics ledger still undercounts Codex/model/human orchestration time. The charts correctly show measured local CPU/browser spend, but cloud/model work is only visible when manually logged.",
        "High broad scores can mask low strict scores. The broad alien/challenge novelty score is useful context, but the strict challenge-stage set-piece score is the one that matches the human complaint."
      ],
      "recommendations": [
        "Make challenge-stage path grammar the next primary gameplay investment: define per-challenge contracts for group order, first-visible frame, path length, turn count, exit side, alien family, animation phases, and bonus-shot opportunity.",
        "Build direct target object tracks from the supplied Galaga challenge videos and compare Aurora tracks against those trajectories before authoring another large sweep.",
        "Replace dense challenge contact sheets in the human docs with larger expandable crop sequences: reference target strip, Aurora current strip, object-track overlay, and per-axis score.",
        "Freeze audio runtime promotion until reference-vs-reference and current-vs-current variance is known for challengePerfect, challengeTransition, gameOver, captureBeam, and stagePulse.",
        "Log every multi-hour cycle with `npm run harness:measure`, and add a manual GPU-equivalent Codex entry whenever model work materially designs, interprets, or changes the harness.",
        "Treat the next beta justification as requiring visible player-facing lift in challenge movement/novelty or audio clarity, not just more documentation or scorer sophistication."
      ],
      "metricMovements": [
        {
          "id": "challenge-set-piece",
          "label": "Challenge-stage strict conformance",
          "startScore10": 2.5,
          "currentScore10": 3.8,
          "delta10": 1.3,
          "progressClass": "advanced",
          "playerMeaning": "Highest-priority gameplay authenticity gap.",
          "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "challenge-interest",
          "label": "Challenge-stage interesting factor",
          "startScore10": 2.6,
          "currentScore10": 3.8,
          "delta10": 1.2,
          "progressClass": "advanced",
          "playerMeaning": "Bonus stages should feel authored and exciting, not merely safe.",
          "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "challenge-movement",
          "label": "Challenge movement / trajectory conformance",
          "startScore10": 2.3,
          "currentScore10": 3.4,
          "delta10": 1.1,
          "progressClass": "advanced",
          "playerMeaning": "True alien path grammar and motion shape.",
          "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "challenge-graphics",
          "label": "Challenge graphical conformance",
          "startScore10": 2.1,
          "currentScore10": 4.3,
          "delta10": 2.2,
          "progressClass": "advanced",
          "playerMeaning": "Visible alien/sprite/readability fit against target challenge artifacts.",
          "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "challenge-alien-novelty",
          "label": "Challenge alien novelty",
          "startScore10": 3.4,
          "currentScore10": 3.4,
          "delta10": 0,
          "progressClass": "stalled",
          "playerMeaning": "Whether later challenges introduce memorable alien families and roles.",
          "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "challenge-progression",
          "label": "Challenge stage-to-stage progression",
          "startScore10": 2.8,
          "currentScore10": 3,
          "delta10": 0.2,
          "progressClass": "nudged",
          "playerMeaning": "Whether the eight challenge stages escalate as distinct lessons.",
          "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "challenge-shot-opportunity",
          "label": "Challenge scoring/shot opportunity",
          "startScore10": null,
          "currentScore10": 5.1,
          "delta10": null,
          "progressClass": "stalled",
          "playerMeaning": "Whether players get clear, learnable bonus-shot routes.",
          "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "challenge-safety",
          "label": "Challenge no-combat safety guardrail",
          "startScore10": 10,
          "currentScore10": 10,
          "delta10": 0,
          "progressClass": "guardrail",
          "playerMeaning": "No enemy shots, no attack starts, no ship deaths in challenge windows.",
          "startReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/challenge-stage-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "overall-quality",
          "label": "Overall quality rollup after strict challenge metric",
          "startScore10": 8.7,
          "currentScore10": 8.8,
          "delta10": 0.1,
          "progressClass": "nudged",
          "playerMeaning": "Release score moved only slightly because the stricter challenge metric exposed a large gap.",
          "startReport": "reference-artifacts/analyses/quality-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/quality-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "audio-release-category",
          "label": "Audio release-category read",
          "startScore10": 7,
          "currentScore10": 7.3,
          "delta10": 0.3,
          "progressClass": "mixed",
          "playerMeaning": "The release score nudged upward, but accepted runtime cue promotion remains blocked by full-theme instability.",
          "startReport": "reference-artifacts/analyses/quality-conformance/2026-05-17-edf46536/report.json",
          "currentReport": "reference-artifacts/analyses/quality-conformance/2026-05-18-6d76050d/report.json"
        },
        {
          "id": "audio-runtime-promotion",
          "label": "Audio runtime promotion success",
          "startScore10": 0,
          "currentScore10": 0,
          "delta10": 0,
          "progressClass": "stalled",
          "playerMeaning": "No candidate is accepted into runtime audio yet; the process improved, the shipped sound did not meaningfully move from these candidates.",
          "currentReport": "reference-artifacts/analyses/aurora-audio-conformance-lab-v2/latest.json"
        },
        {
          "id": "runtime-sprite-static",
          "label": "Live runtime static sprite conformance",
          "startScore10": null,
          "currentScore10": 6.19,
          "delta10": null,
          "progressClass": "known-gap",
          "playerMeaning": "Static runtime sprite identity is measured around 6/10 and active motion is still a planning row, so graphics remain visually incomplete.",
          "currentReport": "reference-artifacts/analyses/application-artifact-conformance/latest.json"
        }
      ],
      "resourceRead": {
        "dashboardChallengeSpend": {
          "runs": 161,
          "wallMinutes": 24,
          "cpuMinutes": 39.7,
          "label": "161 runs; 24 min wall; 39.7 min CPU"
        },
        "dashboardAudioSpend": {
          "runs": 309,
          "wallMinutes": 253.7,
          "cpuMinutes": 459.4,
          "label": "309 runs; 253.7 min wall; 459.4 min CPU"
        },
        "challengeCurrentScore10": 3.8,
        "audioCurrentScore10": 7.3,
        "estimatedUnloggedHours": 10,
        "accountingDebt": "Recent repo work includes merge/review/documentation and model-assisted reasoning that is not fully represented in the measured run ledger. Treat cost charts as a lower bound until manual Codex/model entries are logged per work cycle.",
        "dashboardSource": "reference-artifacts/analyses/release-conformance-dashboard/latest.json"
      },
      "deepLinks": [
        {
          "label": "Local Cost / Value dashboard",
          "href": "http://127.0.0.1:4312/local-dev/conformance-dashboard.html?game=aurora-galactica#cost"
        },
        {
          "label": "Hosted dev Cost / Value dashboard",
          "href": "https://sgwoods.github.io/Aurora-Galactica/dev/conformance-dashboard.html?game=aurora-galactica#cost"
        },
        {
          "label": "Hosted dev conformance dashboard",
          "href": "https://sgwoods.github.io/Aurora-Galactica/dev/conformance-dashboard.html?game=aurora-galactica#conformance"
        },
        {
          "label": "Project guide retrospective section",
          "href": "project-guide.html#conformance-investment-retrospective-doc"
        }
      ],
      "charts": [
        "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/score-movement-critical-axes.svg",
        "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/largest-human-conformance-gaps.svg",
        "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/spend-versus-confidence-gaps.svg"
      ],
      "sourceReport": "reference-artifacts/analyses/conformance-investment-retrospective/2026-05-18-e583b558/report.json"
    }
  },
  "resourceSpendTable": [
    {
      "Resource": "gpu-equivalent",
      "Measured runs": 18,
      "Wall time": "630.8 min",
      "CPU time": "1.2 min"
    },
    {
      "Resource": "cpu",
      "Measured runs": 938,
      "Wall time": "585.6 min",
      "CPU time": "938.2 min"
    },
    {
      "Resource": "browser",
      "Measured runs": 365,
      "Wall time": "434.2 min",
      "CPU time": "668.8 min"
    },
    {
      "Resource": "codex",
      "Measured runs": 13,
      "Wall time": "365.7 min",
      "CPU time": "1.2 min"
    },
    {
      "Resource": "openai-gpu-equivalent",
      "Measured runs": 1,
      "Wall time": "75 min",
      "CPU time": "0 min"
    },
    {
      "Resource": "model-api",
      "Measured runs": 2,
      "Wall time": "25 min",
      "CPU time": "0 min"
    },
    {
      "Resource": "local-browser",
      "Measured runs": 4,
      "Wall time": "17 min",
      "CPU time": "30.8 min"
    },
    {
      "Resource": "gpu",
      "Measured runs": 1,
      "Wall time": "0.1 min",
      "CPU time": "0.1 min"
    }
  ],
  "axisSpendTable": [
    {
      "Axis": "audio",
      "Measured runs": 317,
      "Wall time": "254.3 min",
      "CPU time": "460.3 min"
    },
    {
      "Axis": "conformance-analysis",
      "Measured runs": 12,
      "Wall time": "236.5 min",
      "CPU time": "2.7 min"
    },
    {
      "Axis": "challenge-perfect",
      "Measured runs": 71,
      "Wall time": "180.7 min",
      "CPU time": "180.1 min"
    },
    {
      "Axis": "audio-runtime-trial",
      "Measured runs": 27,
      "Wall time": "162.1 min",
      "CPU time": "31.9 min"
    },
    {
      "Axis": "audio-activity-profile",
      "Measured runs": 10,
      "Wall time": "127.2 min",
      "CPU time": "14 min"
    },
    {
      "Axis": "challenge-stage",
      "Measured runs": 205,
      "Wall time": "112.6 min",
      "CPU time": "59.4 min"
    },
    {
      "Axis": "audio-risk-stability",
      "Measured runs": 8,
      "Wall time": "91.4 min",
      "CPU time": "2.7 min"
    },
    {
      "Axis": "release-hardening",
      "Measured runs": 1,
      "Wall time": "90 min",
      "CPU time": "0 min"
    }
  ],
  "nextGoalEstimateTable": [
    {
      "Priority": 1,
      "Metric": "Challenge-stage set-piece conformance: movement, graphics, alien novelty",
      "Current": "4.3/10",
      "Target": ">=5.0 before next beta claim; 6.0 after three authored challenges; 9.0+ mature",
      "Gap to target": "+0.7",
      "Estimated effort": "High; long-cycle CPU/browser extraction plus gameplay authoring and sprite-motion/reference labeling",
      "Expected resources": "cpu, browser, gpu",
      "Tracked spend": "162 runs; 24 min wall; 39.8 min CPU",
      "Value / cost read": "Expected lift 1.8/10 on metric, 0.138/10 overall; investment score 8.4.",
      "Next action": "Recent candidate sweeps improved measurement and search evidence, but no stage currently has a runtime keeper. Next: build richer movement primitives before runtime promotion; first target reference-spline-fit from 10 primitives, with first-five grammar at 25 group contracts and 25 reference-backed paths."
    },
    {
      "Priority": 2,
      "Metric": "Audio identity, event feedback, and cue alignment",
      "Current": "7.3/10",
      "Target": "7.5-8.0",
      "Gap to target": "+0.2",
      "Estimated effort": "High; 3-6 hrs local/model-assisted analysis",
      "Expected resources": "cpu, model-api, openai-api",
      "Tracked spend": "317 runs; 254.3 min wall; 460.3 min CPU",
      "Value / cost read": "Expected lift 0.7/10 on metric, 0.054/10 overall; investment score 2.67.",
      "Next action": "Challenge Perfect runtime trial rejected perfect-clean-onset-soft-tail; do not directly promote the focused keeper. Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together."
    },
    {
      "Priority": 3,
      "Metric": "Alien entry and broad challenge-stage novelty",
      "Current": "8.2/10",
      "Target": "7.5 first gate; 9.0+ mature",
      "Gap to target": "at target",
      "Estimated effort": "High; long-cycle CPU/browser extraction plus reference contact-sheet and path-labeling pass",
      "Expected resources": "cpu, browser",
      "Tracked spend": "113 runs; 26.4 min wall; 34.9 min CPU",
      "Value / cost read": "Estimated cost/value; dedicated investment candidate not yet generated.",
      "Next action": "Attack Regular-entry geometry separation: Minimum regular geometry distance 0.083; mean regular geometry distance 0.127; closest pair mid-run-entry-variant / late-run-cleanup-or-failure."
    },
    {
      "Priority": 4,
      "Metric": "Level arc and encounter shape",
      "Current": "8.8/10",
      "Target": "8.8-9.0",
      "Gap to target": "at target",
      "Estimated effort": "Medium-high; 2-5 hrs",
      "Expected resources": "cpu, browser",
      "Tracked spend": "162 runs; 24 min wall; 39.8 min CPU",
      "Value / cost read": "Expected lift 0.24/10 on metric, 0.018/10 overall; investment score 1.55.",
      "Next action": "Use the top-ranked opportunity window to add or widen deterministic evidence before changing gameplay tuning."
    },
    {
      "Priority": 4.5,
      "Metric": "Direct target sprite and impact feedback conformance",
      "Current": "6/10",
      "Target": ">=5.5 before next beta claim; >=7.5 mature preview",
      "Gap to target": "at target",
      "Estimated effort": "Medium-high; 2-5 hrs renderer/crop/harness work plus visual review",
      "Expected resources": "cpu",
      "Tracked spend": "16 runs; 17.6 min wall; 20.4 min CPU",
      "Value / cost read": "Estimated cost/value; dedicated investment candidate not yet generated.",
      "Next action": "Raise direct sprite target score and impact/explosion feedback together; current impact/explosion static read is 5.8/10."
    },
    {
      "Priority": 5,
      "Metric": "Boss entry and formation grammar",
      "Current": "9.4/10",
      "Target": "8.0-8.5 first gate; 9.0+ with path/slot extraction",
      "Gap to target": "at target",
      "Estimated effort": "Medium-high; 2-5 hrs, then recurring low-cost guardrail",
      "Expected resources": "cpu, browser",
      "Tracked spend": "162 runs; 24 min wall; 39.8 min CPU",
      "Value / cost read": "Expected lift 0.28/10 on metric, 0.022/10 overall; investment score 0.7.",
      "Next action": "Label boss, escort, rack-settle, and challenge path families from Galaga reference contact sheets or video traces, then replace heuristic coverage with direct shape-distance scoring."
    },
    {
      "Priority": 6,
      "Metric": "Overall visual look and feel: gameplay, start page, typography complexity",
      "Current": "8.6/10",
      "Target": "8.4-8.8",
      "Gap to target": "at target",
      "Estimated effort": "Medium; next pass should add reference-backed contact sheets and GPU/model-assisted review",
      "Expected resources": "cpu, browser, gpu",
      "Tracked spend": "1 runs; 0.1 min wall; 0.1 min CPU",
      "Value / cost read": "Expected lift 0.12/10 on metric, 0.009/10 overall; investment score 0.38.",
      "Next action": "Defer unless new ingestion evidence reveals a larger graphics-conformance gap."
    },
    {
      "Priority": 7,
      "Metric": "Stage 4 pressure exact replay / pressure curve precision",
      "Current": "6/10",
      "Target": "8.2-8.6",
      "Gap to target": "+2.2",
      "Estimated effort": "Medium-high; prior runs ~12.8 min wall / 18.5 min CPU",
      "Expected resources": "cpu, browser",
      "Tracked spend": "28 runs; 12.8 min wall; 18.5 min CPU",
      "Value / cost read": "Expected lift 0.35/10 on metric, 0.027/10 overall; investment score 1.39.",
      "Next action": "Run focused source-window replay matching after the Stage 12 loop validates candidate mechanics."
    }
  ],
  "ingestionSummary": {
    "sourceFamilyCount": 16,
    "highConfidenceCount": 10,
    "mixedOrLowConfidenceCount": 2,
    "scoredOrPromotedCount": 11,
    "nextBestUpgrade": "Add Galaga-family visual contact-sheet comparison, sprite readability labels, and model-assisted visual critique.",
    "framing": "Ingestion turns reference media and Aurora runtime captures into repeatable evidence: clips, contact sheets, traces, event logs, labels, scores, confidence, and next missing annotations."
  },
  "ingestionRows": [
    {
      "rank": 1,
      "source": "Galaga-family reference audio clips",
      "axis": "audio identity / event feedback",
      "artifactType": "reference m4a cue clips",
      "coverage": "0 clips",
      "annotationStatus": "clipped, mapped, partially scored",
      "confidence": "medium-high",
      "linkedMetric": "Audio identity, event feedback, and cue alignment",
      "anchor": "src/assets/reference-audio",
      "next": "Add finer event labels for explosion, impact, boss damage, immunity/entry, capture, and rescue semantics."
    },
    {
      "rank": 2,
      "source": "Aurora audio cue comparison and event-gap reports",
      "axis": "audio cue scoring",
      "artifactType": "waveform/spectral/alignment/semantic reports",
      "coverage": "21 compared cues; semantic 9.78/10; 0 attention rows",
      "annotationStatus": "semantic-scored",
      "confidence": "medium-high",
      "linkedMetric": "Audio identity, event feedback, and cue alignment",
      "anchor": "reference-artifacts/analyses/aurora-audio-event-gap/2026-05-16-08c327dd-dirty-155303/report.json",
      "next": "Tune the highest segment-level gap next: challengePerfect onset. Rerun audio comparison and event-gap analysis after the change."
    },
    {
      "rank": 3,
      "source": "Aurora Audio Conformance Lab v2",
      "axis": "audio candidate loop / family promotion decisions",
      "artifactType": "cue-family risk, candidate history, keeper decision, promotion gate",
      "coverage": "8/8 target cues swept; 2 keeper candidates tracked; runtime promotions 0; rejected runtime trials 3",
      "annotationStatus": "family-scored",
      "confidence": "medium-high",
      "linkedMetric": "Audio identity, event feedback, and cue alignment",
      "anchor": "reference-artifacts/analyses/aurora-audio-conformance-lab-v2/2026-05-17-f9e7374c-dirty/report.json",
      "next": "challengePerfect: Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together."
    },
    {
      "rank": 4,
      "source": "Aurora audio cue contracts",
      "axis": "audio semantic contract / theme latitude / promotion safety",
      "artifactType": "cue contract readiness, theme lanes, runtime-trial blockers",
      "coverage": "8 contracts; readiness 9.1/10; blocked 7; rejected trials n/a",
      "annotationStatus": "contract-scored",
      "confidence": "medium-high",
      "linkedMetric": "Audio identity, event feedback, and cue alignment",
      "anchor": "reference-artifacts/analyses/aurora-audio-cue-contracts/2026-05-11-b83393cd-dirty-201628/report.json",
      "next": "Keep the calibrated layered playerHit runtime cue; next either refine the residual playerHit tail/body gap with the same calibrated scorer or move effort to stagePulse pressure-bed strategy if user impact per compute looks higher."
    },
    {
      "rank": 5,
      "source": "Aurora audio runtime trial decisions",
      "axis": "audio promotion evidence / release guardrails",
      "artifactType": "accepted, rejected, and inconclusive live runtime-trial outcomes",
      "coverage": "challengePerfect runtime-trial-rejected; candidate perfect-clean-onset-soft-tail",
      "annotationStatus": "trial-recorded",
      "confidence": "medium-high",
      "linkedMetric": "Audio identity, event feedback, and cue alignment",
      "anchor": "reference-artifacts/analyses/aurora-audio-runtime-trials/2026-05-17-f9e7374c-dirty-123945-challenge-perfect-rejected/report.json",
      "next": "Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together."
    },
    {
      "rank": 6,
      "source": "Aurora audio risk stability",
      "axis": "audio measurement stability / promotion confidence",
      "artifactType": "repeated event-gap volatility report",
      "coverage": "8 reports; 19 volatile cues; most volatile captureBeam 3.89/10 range",
      "annotationStatus": "stability-scored",
      "confidence": "medium-high",
      "linkedMetric": "Audio identity, event feedback, and cue alignment",
      "anchor": "reference-artifacts/analyses/aurora-audio-risk-stability/2026-05-17-f9e7374c-dirty-124419/report.json",
      "next": "Use median/repeated confirmation before promoting audio changes. Start by stabilizing captureBeam scoring, then retest challengePerfect."
    },
    {
      "rank": 7,
      "source": "Aurora audio promotion stability gate",
      "axis": "audio promotion safety / variance-aware gating",
      "artifactType": "candidate, precheck, event-gap, and stability join",
      "coverage": "3 cues; 0 runtime trials allowed; 3 stability rejections",
      "annotationStatus": "variance-gated",
      "confidence": "medium-high",
      "linkedMetric": "Audio identity, event feedback, and cue alignment",
      "anchor": "reference-artifacts/analyses/aurora-audio-promotion-stability-gate/2026-05-17-f9e7374c-dirty-125733/report.json",
      "next": "Do not promote challengePerfect. Preserve the candidate/precheck evidence and either stabilize measurement or generate a candidate whose full-theme win exceeds the current stability threshold."
    },
    {
      "rank": 8,
      "source": "Aurora audio strategy review",
      "axis": "audio conformance strategy / failure analysis",
      "artifactType": "diagnosis, revised strategy, and next calibration experiment",
      "coverage": "5 diagnoses; 6 strategy changes; next challengePerfect",
      "annotationStatus": "strategy-reviewed",
      "confidence": "medium-high",
      "linkedMetric": "Audio identity, event feedback, and cue alignment",
      "anchor": "reference-artifacts/analyses/aurora-audio-strategy-review/2026-05-17-f9e7374c-dirty-125741/report.json",
      "next": "Before any more runtime audio promotion, build a calibration pass that captures Galaga reference cues through the same browser path twice and measures reference-vs-reference, current-vs-current, and current-vs-reference variance for challengePerfect, challengeTransition, gameOver, captureBeam, and stagePulse."
    },
    {
      "rank": 9,
      "source": "Aurora stagePulse cadence pressure analysis",
      "axis": "formation pressure / cadence audio",
      "artifactType": "tracked cadence pressure axes from full audio comparison",
      "coverage": "pressure 2.7/10; weakest brightness-control",
      "annotationStatus": "scored",
      "confidence": "medium-high",
      "linkedMetric": "Audio identity, event feedback, and cue alignment",
      "anchor": "reference-artifacts/analyses/aurora-stage-pulse-cadence/2026-05-15-93dbdad8-dirty/report.json",
      "next": "Add a cadence-specific candidate generator that jointly optimizes low-band body, brightness control, zero-crossing calm, and gain. Promote only after both repeated focus gates and full audio-theme comparison improve."
    },
    {
      "rank": 10,
      "source": "Boss entry and formation grammar scorer",
      "axis": "formation grammar / boss entry / challenge identity",
      "artifactType": "event grammar, timing, stage-signature, and measurement-debt report",
      "coverage": "11 boss/formation windows",
      "annotationStatus": "scored",
      "confidence": "medium",
      "linkedMetric": "Boss entry and formation grammar",
      "anchor": "reference-artifacts/analyses/formation-boss-grammar-conformance/2026-05-24-ff249bba/report.json",
      "next": "Promote frame-level boss/escort path traces and formation rack slot coordinates so visual choreography can be scored directly."
    },
    {
      "rank": 11,
      "source": "Level arc and encounter-shape evidence",
      "axis": "level arc / challenge / reward",
      "artifactType": "stage signatures, pressure windows, persona reports",
      "coverage": "6/6 stage families; 11/6 evidence windows",
      "annotationStatus": "scored",
      "confidence": "medium-high",
      "linkedMetric": "Level arc and encounter shape",
      "anchor": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json",
      "next": "Add more long-play reference windows and expert-route scoring for challenge/reward opportunities."
    },
    {
      "rank": 12,
      "source": "Stage 4 pressure and loss-window diagnostics",
      "axis": "pressure / fairness",
      "artifactType": "loss windows, replay geometry, collision traces",
      "coverage": "3 promoted windows",
      "annotationStatus": "mined, replay-diagnostic",
      "confidence": "medium",
      "linkedMetric": "Stage 4 pressure exact replay / pressure curve precision",
      "anchor": "reference-artifacts/analyses/aurora-stage4-loss-windows/2026-05-07-fb2f674/report.json",
      "next": "Improve exact replay matching and preserve per-frame attacker/player/shot geometry for candidate tuning."
    },
    {
      "rank": 13,
      "source": "Aurora visual look screenshots",
      "axis": "visual look / UI readability",
      "artifactType": "browser screenshots plus DOM/canvas metrics",
      "coverage": "4 surfaces",
      "annotationStatus": "first-pass scored",
      "confidence": "medium-low",
      "linkedMetric": "Overall visual look and feel",
      "anchor": "reference-artifacts/analyses/aurora-visual-look-conformance/2026-05-08-fee8820-dirty/report.json",
      "next": "Add Galaga-family visual contact-sheet comparison, sprite readability labels, and model-assisted visual critique."
    },
    {
      "rank": 14,
      "source": "Aurora evidence-cycle windows",
      "axis": "general ingestion framework",
      "artifactType": "manifests, contact sheets, traces, event logs, audio timelines",
      "coverage": "4 planned windows",
      "annotationStatus": "seed-plan-only",
      "confidence": "medium",
      "linkedMetric": "Level arc / challenge variation / visual look",
      "anchor": "reference-artifacts/analyses/evidence-cycle-dashboard/evidence-cycle-dashboard.json",
      "next": "Refresh evidence-cycle dashboard and promote window status into a canonical reference-corpus manifest."
    },
    {
      "rank": 15,
      "source": "Reference manifests and event logs inventory",
      "axis": "source provenance / annotation coverage",
      "artifactType": "source-manifest.json and reference-events.json",
      "coverage": "31 manifests; 11 event logs",
      "annotationStatus": "mixed",
      "confidence": "mixed",
      "linkedMetric": "All conformance metrics",
      "anchor": "reference-artifacts/analyses",
      "next": "Normalize provenance, duration, source confidence, and linked metric fields into a generated corpus manifest."
    },
    {
      "rank": 16,
      "source": "Reference contact sheets and frame evidence",
      "axis": "visual / motion / entry formation",
      "artifactType": "contact sheets and still frames",
      "coverage": "72 contact/frame evidence files",
      "annotationStatus": "extracted, partially labeled",
      "confidence": "medium",
      "linkedMetric": "Visual look, alien entry, challenge variation",
      "anchor": "reference-artifacts/analyses",
      "next": "Attach contact-sheet families to metric rows and add image-level comparison scores."
    }
  ],
  "ingestionTable": [
    {
      "Priority": 1,
      "Source / evidence family": "Galaga-family reference audio clips",
      "Axis": "audio identity / event feedback",
      "Artifact type": "reference m4a cue clips",
      "Coverage": "0 clips",
      "Annotation status": "clipped, mapped, partially scored",
      "Confidence": "medium-high",
      "Linked metric": "Audio identity, event feedback, and cue alignment",
      "Anchor": "src/assets/reference-audio",
      "Missing next": "Add finer event labels for explosion, impact, boss damage, immunity/entry, capture, and rescue semantics."
    },
    {
      "Priority": 2,
      "Source / evidence family": "Aurora audio cue comparison and event-gap reports",
      "Axis": "audio cue scoring",
      "Artifact type": "waveform/spectral/alignment/semantic reports",
      "Coverage": "21 compared cues; semantic 9.78/10; 0 attention rows",
      "Annotation status": "semantic-scored",
      "Confidence": "medium-high",
      "Linked metric": "Audio identity, event feedback, and cue alignment",
      "Anchor": "reference-artifacts/analyses/aurora-audio-event-gap/2026-05-16-08c327dd-dirty-155303/report.json",
      "Missing next": "Tune the highest segment-level gap next: challengePerfect onset. Rerun audio comparison and event-gap analysis after the change."
    },
    {
      "Priority": 3,
      "Source / evidence family": "Aurora Audio Conformance Lab v2",
      "Axis": "audio candidate loop / family promotion decisions",
      "Artifact type": "cue-family risk, candidate history, keeper decision, promotion gate",
      "Coverage": "8/8 target cues swept; 2 keeper candidates tracked; runtime promotions 0; rejected runtime trials 3",
      "Annotation status": "family-scored",
      "Confidence": "medium-high",
      "Linked metric": "Audio identity, event feedback, and cue alignment",
      "Anchor": "reference-artifacts/analyses/aurora-audio-conformance-lab-v2/2026-05-17-f9e7374c-dirty/report.json",
      "Missing next": "challengePerfect: Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together."
    },
    {
      "Priority": 4,
      "Source / evidence family": "Aurora audio cue contracts",
      "Axis": "audio semantic contract / theme latitude / promotion safety",
      "Artifact type": "cue contract readiness, theme lanes, runtime-trial blockers",
      "Coverage": "8 contracts; readiness 9.1/10; blocked 7; rejected trials n/a",
      "Annotation status": "contract-scored",
      "Confidence": "medium-high",
      "Linked metric": "Audio identity, event feedback, and cue alignment",
      "Anchor": "reference-artifacts/analyses/aurora-audio-cue-contracts/2026-05-11-b83393cd-dirty-201628/report.json",
      "Missing next": "Keep the calibrated layered playerHit runtime cue; next either refine the residual playerHit tail/body gap with the same calibrated scorer or move effort to stagePulse pressure-bed strategy if user impact per compute looks higher."
    },
    {
      "Priority": 5,
      "Source / evidence family": "Aurora audio runtime trial decisions",
      "Axis": "audio promotion evidence / release guardrails",
      "Artifact type": "accepted, rejected, and inconclusive live runtime-trial outcomes",
      "Coverage": "challengePerfect runtime-trial-rejected; candidate perfect-clean-onset-soft-tail",
      "Annotation status": "trial-recorded",
      "Confidence": "medium-high",
      "Linked metric": "Audio identity, event feedback, and cue alignment",
      "Anchor": "reference-artifacts/analyses/aurora-audio-runtime-trials/2026-05-17-f9e7374c-dirty-123945-challenge-perfect-rejected/report.json",
      "Missing next": "Do not promote Challenge Perfect from isolated onset/body candidates. Replace the next audio strategy with full-phrase/segment-boundary work: stabilize the scorer on canonical reference-vs-reference capture, then generate candidates that optimize onset, body, tail, and live capture segmentation together."
    },
    {
      "Priority": 6,
      "Source / evidence family": "Aurora audio risk stability",
      "Axis": "audio measurement stability / promotion confidence",
      "Artifact type": "repeated event-gap volatility report",
      "Coverage": "8 reports; 19 volatile cues; most volatile captureBeam 3.89/10 range",
      "Annotation status": "stability-scored",
      "Confidence": "medium-high",
      "Linked metric": "Audio identity, event feedback, and cue alignment",
      "Anchor": "reference-artifacts/analyses/aurora-audio-risk-stability/2026-05-17-f9e7374c-dirty-124419/report.json",
      "Missing next": "Use median/repeated confirmation before promoting audio changes. Start by stabilizing captureBeam scoring, then retest challengePerfect."
    },
    {
      "Priority": 7,
      "Source / evidence family": "Aurora audio promotion stability gate",
      "Axis": "audio promotion safety / variance-aware gating",
      "Artifact type": "candidate, precheck, event-gap, and stability join",
      "Coverage": "3 cues; 0 runtime trials allowed; 3 stability rejections",
      "Annotation status": "variance-gated",
      "Confidence": "medium-high",
      "Linked metric": "Audio identity, event feedback, and cue alignment",
      "Anchor": "reference-artifacts/analyses/aurora-audio-promotion-stability-gate/2026-05-17-f9e7374c-dirty-125733/report.json",
      "Missing next": "Do not promote challengePerfect. Preserve the candidate/precheck evidence and either stabilize measurement or generate a candidate whose full-theme win exceeds the current stability threshold."
    },
    {
      "Priority": 8,
      "Source / evidence family": "Aurora audio strategy review",
      "Axis": "audio conformance strategy / failure analysis",
      "Artifact type": "diagnosis, revised strategy, and next calibration experiment",
      "Coverage": "5 diagnoses; 6 strategy changes; next challengePerfect",
      "Annotation status": "strategy-reviewed",
      "Confidence": "medium-high",
      "Linked metric": "Audio identity, event feedback, and cue alignment",
      "Anchor": "reference-artifacts/analyses/aurora-audio-strategy-review/2026-05-17-f9e7374c-dirty-125741/report.json",
      "Missing next": "Before any more runtime audio promotion, build a calibration pass that captures Galaga reference cues through the same browser path twice and measures reference-vs-reference, current-vs-current, and current-vs-reference variance for challengePerfect, challengeTransition, gameOver, captureBeam, and stagePulse."
    },
    {
      "Priority": 9,
      "Source / evidence family": "Aurora stagePulse cadence pressure analysis",
      "Axis": "formation pressure / cadence audio",
      "Artifact type": "tracked cadence pressure axes from full audio comparison",
      "Coverage": "pressure 2.7/10; weakest brightness-control",
      "Annotation status": "scored",
      "Confidence": "medium-high",
      "Linked metric": "Audio identity, event feedback, and cue alignment",
      "Anchor": "reference-artifacts/analyses/aurora-stage-pulse-cadence/2026-05-15-93dbdad8-dirty/report.json",
      "Missing next": "Add a cadence-specific candidate generator that jointly optimizes low-band body, brightness control, zero-crossing calm, and gain. Promote only after both repeated focus gates and full audio-theme comparison improve."
    },
    {
      "Priority": 10,
      "Source / evidence family": "Boss entry and formation grammar scorer",
      "Axis": "formation grammar / boss entry / challenge identity",
      "Artifact type": "event grammar, timing, stage-signature, and measurement-debt report",
      "Coverage": "11 boss/formation windows",
      "Annotation status": "scored",
      "Confidence": "medium",
      "Linked metric": "Boss entry and formation grammar",
      "Anchor": "reference-artifacts/analyses/formation-boss-grammar-conformance/2026-05-24-ff249bba/report.json",
      "Missing next": "Promote frame-level boss/escort path traces and formation rack slot coordinates so visual choreography can be scored directly."
    },
    {
      "Priority": 11,
      "Source / evidence family": "Level arc and encounter-shape evidence",
      "Axis": "level arc / challenge / reward",
      "Artifact type": "stage signatures, pressure windows, persona reports",
      "Coverage": "6/6 stage families; 11/6 evidence windows",
      "Annotation status": "scored",
      "Confidence": "medium-high",
      "Linked metric": "Level arc and encounter shape",
      "Anchor": "reference-artifacts/analyses/level-arc-conformance/2026-05-24-ff249bba/report.json",
      "Missing next": "Add more long-play reference windows and expert-route scoring for challenge/reward opportunities."
    },
    {
      "Priority": 12,
      "Source / evidence family": "Stage 4 pressure and loss-window diagnostics",
      "Axis": "pressure / fairness",
      "Artifact type": "loss windows, replay geometry, collision traces",
      "Coverage": "3 promoted windows",
      "Annotation status": "mined, replay-diagnostic",
      "Confidence": "medium",
      "Linked metric": "Stage 4 pressure exact replay / pressure curve precision",
      "Anchor": "reference-artifacts/analyses/aurora-stage4-loss-windows/2026-05-07-fb2f674/report.json",
      "Missing next": "Improve exact replay matching and preserve per-frame attacker/player/shot geometry for candidate tuning."
    },
    {
      "Priority": 13,
      "Source / evidence family": "Aurora visual look screenshots",
      "Axis": "visual look / UI readability",
      "Artifact type": "browser screenshots plus DOM/canvas metrics",
      "Coverage": "4 surfaces",
      "Annotation status": "first-pass scored",
      "Confidence": "medium-low",
      "Linked metric": "Overall visual look and feel",
      "Anchor": "reference-artifacts/analyses/aurora-visual-look-conformance/2026-05-08-fee8820-dirty/report.json",
      "Missing next": "Add Galaga-family visual contact-sheet comparison, sprite readability labels, and model-assisted visual critique."
    },
    {
      "Priority": 14,
      "Source / evidence family": "Aurora evidence-cycle windows",
      "Axis": "general ingestion framework",
      "Artifact type": "manifests, contact sheets, traces, event logs, audio timelines",
      "Coverage": "4 planned windows",
      "Annotation status": "seed-plan-only",
      "Confidence": "medium",
      "Linked metric": "Level arc / challenge variation / visual look",
      "Anchor": "reference-artifacts/analyses/evidence-cycle-dashboard/evidence-cycle-dashboard.json",
      "Missing next": "Refresh evidence-cycle dashboard and promote window status into a canonical reference-corpus manifest."
    },
    {
      "Priority": 15,
      "Source / evidence family": "Reference manifests and event logs inventory",
      "Axis": "source provenance / annotation coverage",
      "Artifact type": "source-manifest.json and reference-events.json",
      "Coverage": "31 manifests; 11 event logs",
      "Annotation status": "mixed",
      "Confidence": "mixed",
      "Linked metric": "All conformance metrics",
      "Anchor": "reference-artifacts/analyses",
      "Missing next": "Normalize provenance, duration, source confidence, and linked metric fields into a generated corpus manifest."
    },
    {
      "Priority": 16,
      "Source / evidence family": "Reference contact sheets and frame evidence",
      "Axis": "visual / motion / entry formation",
      "Artifact type": "contact sheets and still frames",
      "Coverage": "72 contact/frame evidence files",
      "Annotation status": "extracted, partially labeled",
      "Confidence": "medium",
      "Linked metric": "Visual look, alien entry, challenge variation",
      "Anchor": "reference-artifacts/analyses",
      "Missing next": "Attach contact-sheet families to metric rows and add image-level comparison scores."
    }
  ],
  "newFirstClassAxes": [
    "Alien entry to levels: formation layout, timing, path method, and whether different stages enter differently.",
    "Boss entry and formation grammar: boss timing, escort composition, formation settle evidence, challenge pattern identity, stage variation, and path/slot precision.",
    "Challenge-stage variation: new alien types, new entry formations/styles, path families, reward/result feedback, and teaching value.",
    "Overall visual look and feel: gameplay readability, start/attract typography density, copy complexity, color discipline, and reference contact sheets.",
    "Arcade console frame UI: cabinet frame, bezel/rails, build/date trust signals, button density, and arcade-style containment.",
    "Popup/help/scoring surfaces: help, scoring, leaderboard, account, feedback, and game-over result formatting as their own modal-quality family."
  ],
  "maintenanceRules": [
    "Refresh this artifact after each full quality score, investment-priority run, or major conformance loop.",
    "Before a serious /dev, /beta, or /production release candidate, refresh npm run harness:analyze:conformance-economics and npm run harness:build:release-conformance-dashboard so release docs include conformance, resource/time, chart, past-goal, and next-goal reads.",
    "Any long-cycle local compute or model/API/GPU-assisted assessment should be wrapped with npm run harness:measure and declared with its axis and resource classes.",
    "Ship the read-only conformance dashboard with each dev/beta/production lane; keep raw ingestion workspaces and unreviewed evidence engineering-owned unless a Root-gated evidence browser is explicitly approved.",
    "Treat rows marked estimated/composite as measurement debt: useful for planning, but not release-proof until backed by a harness.",
    "Keep user-facing release gates separate from harness-learning wins. A rejected candidate still belongs in artifacts when it teaches the loop what not to keep.",
    "Prefer work with a large score gap, high user-experience impact, reusable ingestion/harness value, and clear guardrails."
  ]
}
