{
  "fixture_version": "1",
  "fixture_name": "validation-set-v1",
  "measured_at": "2026-05-12T02:47:41.637Z",
  "base": "http://localhost:3000",
  "with_llm": false,
  "counts": {
    "total": 20,
    "passed": 20,
    "failed": 0,
    "skipped": 0,
    "true_positive": 15,
    "true_negative": 5,
    "false_positive": 0,
    "false_negative": 0
  },
  "metrics": {
    "precision": 1,
    "recall": 1,
    "f1": 1,
    "pass_rate": 1
  },
  "by_category": {
    "lancet-fabricated": {
      "total": 3,
      "passed": 3,
      "http_failed": 0,
      "pass_rate": 1
    },
    "known-good": {
      "total": 5,
      "passed": 5,
      "http_failed": 0,
      "pass_rate": 1
    },
    "wrong-doi": {
      "total": 4,
      "passed": 4,
      "http_failed": 0,
      "pass_rate": 1
    },
    "paraphrase": {
      "total": 4,
      "passed": 4,
      "http_failed": 0,
      "pass_rate": 1
    },
    "invented": {
      "total": 4,
      "passed": 4,
      "http_failed": 0,
      "pass_rate": 1
    }
  },
  "results": [
    {
      "id": "lancet-example-a",
      "category": "lancet-fabricated",
      "http_status": 200,
      "verdict": "mismatch",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "mismatch",
        "confidence_in": ["high"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 5137,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "crossref",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 0
          }
        ]
      }
    },
    {
      "id": "lancet-example-b",
      "category": "lancet-fabricated",
      "http_status": 200,
      "verdict": "mismatch",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "mismatch",
        "confidence_in": ["high"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 2057,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "crossref",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 0
          }
        ]
      }
    },
    {
      "id": "lancet-example-c",
      "category": "lancet-fabricated",
      "http_status": 200,
      "verdict": "mismatch",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "mismatch",
        "confidence_in": ["high"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 3064,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "crossref",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 10
          }
        ]
      }
    },
    {
      "id": "good-topaz-lancet-2026",
      "category": "known-good",
      "http_status": 200,
      "verdict": "matched",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "matched",
        "confidence_in": ["high", "medium"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 758,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare"],
        "resolved_via": "crossref"
      }
    },
    {
      "id": "good-bnt162b2-polack",
      "category": "known-good",
      "http_status": 200,
      "verdict": "matched",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "matched",
        "confidence_in": ["high", "medium"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 789,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare"],
        "resolved_via": "crossref"
      }
    },
    {
      "id": "good-attention-vaswani",
      "category": "known-good",
      "http_status": 200,
      "verdict": "matched",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "matched",
        "confidence_in": ["high", "medium"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 344,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare"],
        "resolved_via": "arxiv"
      }
    },
    {
      "id": "good-bert-devlin",
      "category": "known-good",
      "http_status": 200,
      "verdict": "matched",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "matched",
        "confidence_in": ["high", "medium"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 284,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare"],
        "resolved_via": "arxiv"
      }
    },
    {
      "id": "good-chatgpt-neuroscience",
      "category": "known-good",
      "http_status": 200,
      "verdict": "matched",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "matched",
        "confidence_in": ["high", "medium"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 1208,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare"],
        "resolved_via": "crossref"
      }
    },
    {
      "id": "wrong-doi-topaz-on-bnt-doi",
      "category": "wrong-doi",
      "http_status": 200,
      "verdict": "ambiguous",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict_in": ["ambiguous", "mismatch"],
        "confidence_in": ["high", "medium"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 1664,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "crossref",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 1
          }
        ]
      }
    },
    {
      "id": "wrong-doi-attention-on-topaz-doi",
      "category": "wrong-doi",
      "http_status": 200,
      "verdict": "ambiguous",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict_in": ["ambiguous", "mismatch"],
        "confidence_in": ["high", "medium"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 1788,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "crossref",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 10
          }
        ]
      }
    },
    {
      "id": "wrong-doi-bert-on-attention-arxiv",
      "category": "wrong-doi",
      "http_status": 200,
      "verdict": "mismatch",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict_in": ["ambiguous", "mismatch"],
        "confidence_in": ["high", "medium"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 9663,
      "attempts": 2,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "arxiv",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 7
          }
        ]
      }
    },
    {
      "id": "wrong-doi-bnt-on-topaz-doi",
      "category": "wrong-doi",
      "http_status": 200,
      "verdict": "ambiguous",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict_in": ["ambiguous", "mismatch"],
        "confidence_in": ["high", "medium"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 61327,
      "attempts": 2,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "crossref",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 2
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 10
          }
        ]
      }
    },
    {
      "id": "paraphrase-bert-no-acronym",
      "category": "paraphrase",
      "http_status": 200,
      "verdict": "mismatch",
      "confidence": "low",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "mismatch",
        "confidence_in": ["low"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 2061,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "arxiv",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 7
          }
        ]
      }
    },
    {
      "id": "paraphrase-chatgpt-abbreviated",
      "category": "paraphrase",
      "http_status": 200,
      "verdict": "mismatch",
      "confidence": "low",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "mismatch",
        "confidence_in": ["low"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 2164,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "crossref",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 1
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 3
          }
        ]
      }
    },
    {
      "id": "paraphrase-bnt-phase3",
      "category": "paraphrase",
      "http_status": 200,
      "verdict": "mismatch",
      "confidence": "low",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "mismatch",
        "confidence_in": ["low"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 2352,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "crossref",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 1
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 10
          }
        ]
      }
    },
    {
      "id": "paraphrase-topaz-reordered",
      "category": "paraphrase",
      "http_status": 200,
      "verdict": "mismatch",
      "confidence": "low",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "mismatch",
        "confidence_in": ["low"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 5000,
      "attempts": 1,
      "provenance": {
        "stages_run": ["compare", "search"],
        "resolved_via": "crossref",
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 10
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 1
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 1
          }
        ]
      }
    },
    {
      "id": "invented-fake-doi-plausible-title",
      "category": "invented",
      "http_status": 200,
      "verdict": "not_found",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "not_found",
        "confidence_in": ["high"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 3697,
      "attempts": 1,
      "provenance": {
        "stages_run": ["search"],
        "resolved_via": null,
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 0
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 0
          }
        ]
      }
    },
    {
      "id": "invented-title-only-no-identifier",
      "category": "invented",
      "http_status": 200,
      "verdict": "not_found",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "not_found",
        "confidence_in": ["high"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 2395,
      "attempts": 1,
      "provenance": {
        "stages_run": ["search"],
        "resolved_via": null,
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 0
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 0
          }
        ]
      }
    },
    {
      "id": "invented-fake-pmid",
      "category": "invented",
      "http_status": 200,
      "verdict": "not_found",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "not_found",
        "confidence_in": ["high"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 1891,
      "attempts": 1,
      "provenance": {
        "stages_run": ["search"],
        "resolved_via": null,
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 0
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 0
          }
        ]
      }
    },
    {
      "id": "invented-fake-doi-resolves-to-nothing",
      "category": "invented",
      "http_status": 200,
      "verdict": "not_found",
      "confidence": "high",
      "error_code": null,
      "error_message": null,
      "expected": {
        "verdict": "not_found",
        "confidence_in": ["high"]
      },
      "match": {
        "ok": true
      },
      "elapsed_ms": 3154,
      "attempts": 1,
      "provenance": {
        "stages_run": ["search"],
        "resolved_via": null,
        "registries_searched": [
          {
            "registry": "crossref",
            "ok": true,
            "count": 0
          },
          {
            "registry": "pubmed",
            "ok": true,
            "count": 0
          },
          {
            "registry": "openalex",
            "ok": true,
            "count": 0
          }
        ]
      }
    }
  ]
}
