[BugFix][Frontend] apply task instruction as system prompt in cohere v2/embed (#38362)

Signed-off-by: walterbm <walter.beller.morales@gmail.com>
2026-03-28 14:30:54 -04:00
parent aa4eb0db78
commit fafca38adc
3 changed files with 245 additions and 42 deletions
--- a/tests/entrypoints/pooling/embed/test_cohere_openai_parity.py
+++ b/tests/entrypoints/pooling/embed/test_cohere_openai_parity.py
@@ -57,16 +57,25 @@ def _openai_embed(
    return [item["embedding"] for item in resp.json()["data"]]


+def _cosine_sim(a: list[float], b: list[float]) -> float:
+    va, vb = np.array(a), np.array(b)
+    return float(np.dot(va, vb) / (np.linalg.norm(va) * np.linalg.norm(vb)))
+
+
 def test_single_text_parity(server: RemoteOpenAIServer):
-    """A single text should produce identical embeddings via both APIs."""
+    """A single text should produce equivalent embeddings via both APIs."""
    texts = ["the quick brown fox jumps over the lazy dog"]
    v2 = _cohere_embed(server, texts)
    v1 = _openai_embed(server, texts)
-    np.testing.assert_allclose(v2[0], v1[0], rtol=1e-5)
+    # Full-suite BF16 runs can introduce tiny numerical drift even when both
+    # endpoints are functionally equivalent, so compare semantic equivalence
+    # instead of exact elementwise equality.
+    cos = _cosine_sim(v2[0], v1[0])
+    assert cos > 0.9999, f"single-text parity failed, cosine={cos}"


 def test_batch_parity(server: RemoteOpenAIServer):
-    """A batch of texts should produce identical embeddings via both APIs,
+    """A batch of texts should produce equivalent embeddings via both APIs,
    in the same order."""
    texts = [
        "machine learning",
@@ -76,8 +85,18 @@ def test_batch_parity(server: RemoteOpenAIServer):
    v2 = _cohere_embed(server, texts)
    v1 = _openai_embed(server, texts)
    assert len(v2) == len(v1) == 3
+
+    similarities = np.array(
+        [[_cosine_sim(v2_emb, v1_emb) for v1_emb in v1] for v2_emb in v2]
+    )
    for i in range(3):
-        np.testing.assert_allclose(v2[i], v1[i], rtol=1e-5, err_msg=f"index {i}")
+        assert int(np.argmax(similarities[i])) == i, (
+            f"batch parity order mismatch at index {i}: "
+            f"similarities={similarities[i].tolist()}"
+        )
+        assert similarities[i, i] > 0.9999, (
+            f"batch parity failed at index {i}, cosine={similarities[i, i]}"
+        )


 def test_token_count_parity(server: RemoteOpenAIServer):