// /benchmark/lib/hub-app.jsx — the hub.
// Reads window.BATCHES (manifest) and window.BENCH (latest-batch data).
//
// v1 ships one published batch + one archived. The trend chart + archive
// list come back when a third batch publishes. See
// docs/runbooks/publish-benchmark.md.

function HubHero() {
  const latest = window.BATCHES.list[0];
  const m = window.BENCH.meta;
  return (
    <header className="hero">
      <div className="hero-eyebrow">
        <span className="dot"></span>
        <span>Public benchmark, {latest.date}</span>
      </div>
      <h1>
        Same agent. Same model.<br />
        <span className="stat">~32%</span> smaller LLM bill, cheaper on <span className="stat">every model</span>.
      </h1>
      <div className="hero-meta">
        <span>WebArena</span>
        <span className="dot-sep">·</span>
        <span>{m.totalAttempts} attempts</span>
        <span className="dot-sep">·</span>
        <span>{m.nModels} models</span>
        <span className="dot-sep">·</span>
        <span>{m.nTasksTotal} tasks</span>
        <span className="dot-sep">·</span>
        <span>matched pair</span>
        <span className="dot-sep">·</span>
        <a href="#methodology" style={{color:'var(--accent)'}}>methodology</a>
      </div>
      <div className="hero-ctas">
        <a className="btn primary" href="/#bottom-cta">
          Try it on your agent <span className="arrow">→</span>
        </a>
      </div>
    </header>
  );
}

function LatestPointer({ batch }) {
  if (!batch) return null;
  return (
    <a className="latest-pointer" href={batch.detailPath || '#'}>
      <div className="latest-pointer-left">
        <div className="latest-pointer-eyebrow">
          <span className="dot"></span>
          <span>Latest, {batch.date}, v{batch.codecVersion}</span>
        </div>
        <div className="latest-pointer-headline">{batch.headline}</div>
      </div>
      <div className="latest-pointer-cta">
        open full batch <span className="arrow">→</span>
      </div>
    </a>
  );
}

function PrivacyByDesign() {
  const p = window.BENCH.privacy;
  if (!p) return null;
  return (
    <div className="privacy-card">
      <div className="privacy-grid">
        <div className="privacy-side jdc">
          <div className="privacy-label">Codec on</div>
          <div className="privacy-answer"><code>{p.examples[0].agentAnswer}</code></div>
          <div className="privacy-counter">
            <span className="privacy-counter-v">{p.summary.codecOnKeptOffWire}</span>
            <span className="privacy-counter-k">attempts kept the email off the wire</span>
          </div>
          <div className="privacy-note">Email replaced with a placeholder on the customer’s machine before any snapshot reached the model.</div>
        </div>
        <div className="privacy-vs">vs.</div>
        <div className="privacy-side bypass">
          <div className="privacy-label">Codec off</div>
          <div className="privacy-answer"><code>{p.examples[1].agentAnswer}</code></div>
          <div className="privacy-counter">
            <span className="privacy-counter-v">{p.summary.codecOffSentAsIs}</span>
            <span className="privacy-counter-k">attempts sent the email to the model</span>
          </div>
          <div className="privacy-note">No redaction layer in path. The model received the email as it appeared on the page.</div>
        </div>
      </div>
      <div className="privacy-caption">
        Same agent, same model, same task. The codec did what it was meant to do.
      </div>
      <div className="privacy-footnote">
        <p style={{margin:'0 0 10px'}}>
          <strong style={{color:'var(--fg-2)', fontWeight:500}}>Names are not redacted.</strong>{' '}
          Names overlap too heavily with everyday words and product copy for
          regex redaction to be safe. False-positive redaction would corrupt
          the page state the agent navigates against. Contact identifiers
          (email, phone, address, SSN, credit-card-like numbers) are
          structurally identifiable and what regulated workloads care about.
          That’s where the redaction line is drawn.
        </p>
        <p style={{margin:0}}>
          Private by design. A deliberate per-task override is on the
          roadmap for benchmark and test runs where exposing personal data
          to the model is explicit.
        </p>
      </div>
    </div>
  );
}

function HubApp() {
  const latest = window.BATCHES.list[0];

  return (
    <div className="page">
      <TopNav>
        <span className="crumb crumb-here">benchmarks</span>
      </TopNav>

      <HubHero />

      <section className="section">
        <div className="section-head">
          <h2>The smartest model. <em>The cheapest bill</em>.</h2>
          <p className="blurb">
            Same agent, same tasks. Only the codec changes. Even on the
            strongest model in this batch, switching the codec on solves
            more tasks at a lower price per success.
          </p>
        </div>
        <LatestPointer batch={latest} />
        <Inversion />
      </section>

      <section className="section">
        <div className="section-head">
          <h2>Four models. <em>Same story</em> on every one.</h2>
          <p className="blurb">
            One matched pair per model: same agent, same tasks, codec on
            versus codec off. Cheaper per successful task on every model,
            without ever losing on tasks finished. The full task-by-task
            breakdown lives on the
            <a href={latest.detailPath} style={{color:'var(--accent)'}}> batch page</a>.
          </p>
        </div>
        <PairCards />
      </section>

      <section className="section">
        <div className="section-head">
          <h2>Private <em>by design</em>.</h2>
          <p className="blurb">
            One of the ten tasks in this batch asks the agent to return a
            customer’s email address. The codec redacts personal data on
            your machine before any snapshot leaves your network, so the
            email never reaches the model. We report this task separately
            because the codec is meant to “fail” the evaluator here. That’s
            the feature.
          </p>
        </div>
        <PrivacyByDesign />
      </section>

      <section className="section">
        <div className="section-head">
          <h2>Benchmark <em>methodology</em>.</h2>
          <p className="blurb">
            The rules we apply on every batch. Per-batch fingerprints (codec
            version, upstream commits, pricing snapshot) live on each batch’s
            own page.
          </p>
        </div>
        <HubMethodology />
      </section>

      <section className="section">
        <div className="section-head">
          <h2>Frequently asked <em>questions</em>.</h2>
          <p className="blurb">
            What we get asked most, plus the questions we keep volunteering
            because anyone running benchmarks already wonders.
          </p>
        </div>
        <FAQ />
      </section>

    </div>
  );
}

window.HubApp = HubApp;
window.PrivacyByDesign = PrivacyByDesign;

const hubRoot = ReactDOM.createRoot(document.getElementById('root'));
hubRoot.render(<HubApp />);
