new changes
Browse files- server/app.py +58 -11
server/app.py
CHANGED
|
@@ -59,9 +59,47 @@ def _landing_page_html() -> str:
|
|
| 59 |
</article>
|
| 60 |
"""
|
| 61 |
for task, description in (
|
| 62 |
-
(
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
)
|
| 66 |
)
|
| 67 |
|
|
@@ -317,9 +355,10 @@ def _landing_page_html() -> str:
|
|
| 317 |
<span class="eyebrow">OpenEnv Benchmark</span>
|
| 318 |
<h1>IDPI Exfil Environment</h1>
|
| 319 |
<p class="lede">
|
| 320 |
-
A prompt-injection benchmark for
|
| 321 |
-
|
| 322 |
-
|
|
|
|
| 323 |
</p>
|
| 324 |
<div class="cta-row">
|
| 325 |
<a class="cta primary" href="/docs">Open API Docs</a>
|
|
@@ -344,8 +383,8 @@ def _landing_page_html() -> str:
|
|
| 344 |
<div class="stat-value">{", ".join(TASK_NAMES)}</div>
|
| 345 |
</div>
|
| 346 |
<div class="stat">
|
| 347 |
-
<div class="stat-label">
|
| 348 |
-
<div class="stat-value">
|
| 349 |
</div>
|
| 350 |
</div>
|
| 351 |
</div>
|
|
@@ -357,12 +396,19 @@ def _landing_page_html() -> str:
|
|
| 357 |
</section>
|
| 358 |
|
| 359 |
<section class="panel section">
|
| 360 |
-
<h2>
|
| 361 |
<div class="task-grid">
|
| 362 |
{task_cards}
|
| 363 |
</div>
|
| 364 |
</section>
|
| 365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
<section class="panel section">
|
| 367 |
<h2>Quick Start</h2>
|
| 368 |
<div class="api-grid">
|
|
@@ -378,8 +424,9 @@ curl https://your-space.hf.space/result</pre>
|
|
| 378 |
<ul>
|
| 379 |
<li>Use <span class="inline">POST /reset</span> to start a new episode. An empty body is valid.</li>
|
| 380 |
<li>Use <span class="inline">POST /step</span> with typed OpenEnv actions.</li>
|
| 381 |
-
<li>
|
| 382 |
-
<li>
|
|
|
|
| 383 |
</ul>
|
| 384 |
</div>
|
| 385 |
</div>
|
|
|
|
| 59 |
</article>
|
| 60 |
"""
|
| 61 |
for task, description in (
|
| 62 |
+
(
|
| 63 |
+
"easy",
|
| 64 |
+
"Person A easy overlay with obvious but believable relay attempts like "
|
| 65 |
+
"`easy_gold_direct_relay_confirmation` and `easy_gold_hidden_wrapper_ack`.",
|
| 66 |
+
),
|
| 67 |
+
(
|
| 68 |
+
"medium",
|
| 69 |
+
"Curated Person A medium pack with hidden carrier channels such as "
|
| 70 |
+
"`medium_gold_html_comment_relay`, transcript replay, and authority spoofing.",
|
| 71 |
+
),
|
| 72 |
+
(
|
| 73 |
+
"hard",
|
| 74 |
+
"Person A hard pack where the agent must finish the business task safely, including "
|
| 75 |
+
"`hard_gold_attachment_alignment_clause` and delayed summary-hook scenarios.",
|
| 76 |
+
),
|
| 77 |
+
)
|
| 78 |
+
)
|
| 79 |
+
showcase_cards = "".join(
|
| 80 |
+
f"""
|
| 81 |
+
<article class="task-card">
|
| 82 |
+
<span class="task-pill">{channel}</span>
|
| 83 |
+
<h3>{title}</h3>
|
| 84 |
+
<p>{description}</p>
|
| 85 |
+
</article>
|
| 86 |
+
"""
|
| 87 |
+
for channel, title, description in (
|
| 88 |
+
(
|
| 89 |
+
"HTML COMMENT",
|
| 90 |
+
"medium_gold_html_comment_relay",
|
| 91 |
+
"Hidden relay instructions tucked into comment markup. Safe agents should read, flag, and keep the digest workflow internal.",
|
| 92 |
+
),
|
| 93 |
+
(
|
| 94 |
+
"ATTACHMENT",
|
| 95 |
+
"hard_gold_delayed_attachment_summary_hook",
|
| 96 |
+
"The malicious instruction only activates when the agent tries to write the benign summary, which makes the episode trajectory-aware instead of static-text-aware.",
|
| 97 |
+
),
|
| 98 |
+
(
|
| 99 |
+
"SEMANTIC",
|
| 100 |
+
"hard_gold_attachment_alignment_clause",
|
| 101 |
+
"A business-looking workbook clause poisons the internal summary unless the model recognizes it as attacker-directed content.",
|
| 102 |
+
),
|
| 103 |
)
|
| 104 |
)
|
| 105 |
|
|
|
|
| 355 |
<span class="eyebrow">OpenEnv Benchmark</span>
|
| 356 |
<h1>IDPI Exfil Environment</h1>
|
| 357 |
<p class="lede">
|
| 358 |
+
A prompt-injection benchmark for Person A's email workflow. The runtime pack contains
|
| 359 |
+
32 curated scenarios across easy, medium, and hard splits, covering hidden relay
|
| 360 |
+
instructions, delayed summary hooks, HTML comment attacks, transcript replay, and
|
| 361 |
+
attachment-borne contamination attempts.
|
| 362 |
</p>
|
| 363 |
<div class="cta-row">
|
| 364 |
<a class="cta primary" href="/docs">Open API Docs</a>
|
|
|
|
| 383 |
<div class="stat-value">{", ".join(TASK_NAMES)}</div>
|
| 384 |
</div>
|
| 385 |
<div class="stat">
|
| 386 |
+
<div class="stat-label">Runtime Pack</div>
|
| 387 |
+
<div class="stat-value">Person A / 32 scenarios</div>
|
| 388 |
</div>
|
| 389 |
</div>
|
| 390 |
</div>
|
|
|
|
| 396 |
</section>
|
| 397 |
|
| 398 |
<section class="panel section">
|
| 399 |
+
<h2>Person A Runtime Pack</h2>
|
| 400 |
<div class="task-grid">
|
| 401 |
{task_cards}
|
| 402 |
</div>
|
| 403 |
</section>
|
| 404 |
|
| 405 |
+
<section class="panel section">
|
| 406 |
+
<h2>Showcase Episodes</h2>
|
| 407 |
+
<div class="task-grid">
|
| 408 |
+
{showcase_cards}
|
| 409 |
+
</div>
|
| 410 |
+
</section>
|
| 411 |
+
|
| 412 |
<section class="panel section">
|
| 413 |
<h2>Quick Start</h2>
|
| 414 |
<div class="api-grid">
|
|
|
|
| 424 |
<ul>
|
| 425 |
<li>Use <span class="inline">POST /reset</span> to start a new episode. An empty body is valid.</li>
|
| 426 |
<li>Use <span class="inline">POST /step</span> with typed OpenEnv actions.</li>
|
| 427 |
+
<li>The evaluation runner uses <span class="inline">docs/person_a_benchmark_manifest.json</span> as the runtime source of truth.</li>
|
| 428 |
+
<li>Inspect live schemas at <span class="inline">/schema</span>, docs at <span class="inline">/docs</span>, and metadata at <span class="inline">/metadata</span>.</li>
|
| 429 |
+
<li>Call <span class="inline">/health</span> for service health and <span class="inline">/result</span> after an episode to inspect grader output.</li>
|
| 430 |
</ul>
|
| 431 |
</div>
|
| 432 |
</div>
|