ai-robot-core/ai-service/tests/test_semantic_matcher.py

211 lines
7.4 KiB
Python

"""
Unit tests for SemanticMatcher.
[AC-AISVC-113, AC-AISVC-114] Tests for semantic matching.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
import uuid
from app.services.intent.semantic_matcher import SemanticMatcher
from app.services.intent.models import (
FusionConfig,
SemanticCandidate,
SemanticMatchResult,
)
@pytest.fixture
def mock_embedding_provider():
"""Create a mock embedding provider."""
provider = AsyncMock()
provider.embed = AsyncMock(return_value=[0.1] * 768)
provider.embed_batch = AsyncMock(return_value=[[0.1] * 768, [0.2] * 768])
return provider
@pytest.fixture
def mock_rule():
"""Create a mock intent rule with semantic config."""
rule = MagicMock()
rule.id = uuid.uuid4()
rule.name = "Test Intent"
rule.intent_vector = [0.1] * 768
rule.semantic_examples = None
rule.is_enabled = True
return rule
@pytest.fixture
def mock_rule_with_examples():
"""Create a mock intent rule with semantic examples."""
rule = MagicMock()
rule.id = uuid.uuid4()
rule.name = "Test Intent with Examples"
rule.intent_vector = None
rule.semantic_examples = ["我想退货", "如何退款"]
rule.is_enabled = True
return rule
@pytest.fixture
def config():
"""Create a fusion config."""
return FusionConfig()
class TestSemanticMatcher:
"""Tests for SemanticMatcher class."""
@pytest.mark.asyncio
async def test_init(self, mock_embedding_provider, config):
"""Test SemanticMatcher initialization."""
matcher = SemanticMatcher(mock_embedding_provider, config)
assert matcher._embedding_provider == mock_embedding_provider
assert matcher._config == config
@pytest.mark.asyncio
async def test_match_disabled(self, mock_embedding_provider):
"""Test match when semantic matcher is disabled."""
config = FusionConfig(semantic_matcher_enabled=False)
matcher = SemanticMatcher(mock_embedding_provider, config)
result = await matcher.match("test message", [], "tenant-1")
assert result.skipped is True
assert result.skip_reason == "disabled"
assert result.candidates == []
@pytest.mark.asyncio
async def test_match_no_semantic_config(
self, mock_embedding_provider, config, mock_rule
):
"""Test match when no rules have semantic config."""
mock_rule.intent_vector = None
mock_rule.semantic_examples = None
matcher = SemanticMatcher(mock_embedding_provider, config)
result = await matcher.match("test message", [mock_rule], "tenant-1")
assert result.skipped is True
assert result.skip_reason == "no_semantic_config"
@pytest.mark.asyncio
async def test_match_mode_a_with_intent_vector(
self, mock_embedding_provider, config, mock_rule
):
"""Test match with pre-computed intent vector (Mode A)."""
mock_embedding_provider.embed.return_value = [0.1] * 768
matcher = SemanticMatcher(mock_embedding_provider, config)
result = await matcher.match("我想退货", [mock_rule], "tenant-1")
assert result.skipped is False
assert result.skip_reason is None
assert len(result.candidates) == 1
assert result.top_score > 0.9
assert result.duration_ms >= 0
@pytest.mark.asyncio
async def test_match_mode_b_with_examples(
self, mock_embedding_provider, config, mock_rule_with_examples
):
"""Test match with semantic examples (Mode B)."""
mock_embedding_provider.embed.return_value = [0.1] * 768
mock_embedding_provider.embed_batch.return_value = [[0.1] * 768, [0.1] * 768]
matcher = SemanticMatcher(mock_embedding_provider, config)
result = await matcher.match("我想退货", [mock_rule_with_examples], "tenant-1")
assert result.skipped is False
assert len(result.candidates) == 1
assert result.top_score > 0.9
@pytest.mark.asyncio
async def test_match_embedding_timeout(self, mock_embedding_provider, config, mock_rule):
"""Test match when embedding times out."""
import asyncio
mock_embedding_provider.embed.side_effect = asyncio.TimeoutError()
config = FusionConfig(semantic_matcher_timeout_ms=100)
matcher = SemanticMatcher(mock_embedding_provider, config)
result = await matcher.match("test message", [mock_rule], "tenant-1")
assert result.skipped is True
assert "embedding_timeout" in result.skip_reason
@pytest.mark.asyncio
async def test_match_embedding_error(self, mock_embedding_provider, config, mock_rule):
"""Test match when embedding fails with error."""
mock_embedding_provider.embed.side_effect = Exception("Embedding failed")
matcher = SemanticMatcher(mock_embedding_provider, config)
result = await matcher.match("test message", [mock_rule], "tenant-1")
assert result.skipped is True
assert "embedding_error" in result.skip_reason
@pytest.mark.asyncio
async def test_match_top_k_limit(self, mock_embedding_provider, config):
"""Test that match returns only top_k candidates."""
rules = []
for i in range(5):
rule = MagicMock()
rule.id = uuid.uuid4()
rule.name = f"Intent {i}"
rule.intent_vector = [0.1 + i * 0.01] * 768
rule.semantic_examples = None
rule.is_enabled = True
rules.append(rule)
mock_embedding_provider.embed.return_value = [0.1] * 768
config = FusionConfig(semantic_top_k=3)
matcher = SemanticMatcher(mock_embedding_provider, config)
result = await matcher.match("test message", rules, "tenant-1")
assert len(result.candidates) <= 3
def test_cosine_similarity(self, mock_embedding_provider, config):
"""Test cosine similarity calculation."""
matcher = SemanticMatcher(mock_embedding_provider, config)
v1 = [1.0, 0.0, 0.0]
v2 = [1.0, 0.0, 0.0]
similarity = matcher._cosine_similarity(v1, v2)
assert similarity == 1.0
v1 = [1.0, 0.0, 0.0]
v2 = [0.0, 1.0, 0.0]
similarity = matcher._cosine_similarity(v1, v2)
assert similarity == 0.0
v1 = [1.0, 1.0, 0.0]
v2 = [1.0, 0.0, 0.0]
similarity = matcher._cosine_similarity(v1, v2)
assert 0.0 < similarity < 1.0
def test_cosine_similarity_empty_vectors(self, mock_embedding_provider, config):
"""Test cosine similarity with empty vectors."""
matcher = SemanticMatcher(mock_embedding_provider, config)
assert matcher._cosine_similarity([], [1.0]) == 0.0
assert matcher._cosine_similarity([1.0], []) == 0.0
assert matcher._cosine_similarity([], []) == 0.0
def test_has_semantic_config(self, mock_embedding_provider, config, mock_rule):
"""Test checking if rule has semantic config."""
matcher = SemanticMatcher(mock_embedding_provider, config)
mock_rule.intent_vector = [0.1] * 768
mock_rule.semantic_examples = None
assert matcher._has_semantic_config(mock_rule) is True
mock_rule.intent_vector = None
mock_rule.semantic_examples = ["example"]
assert matcher._has_semantic_config(mock_rule) is True
mock_rule.intent_vector = None
mock_rule.semantic_examples = None
assert matcher._has_semantic_config(mock_rule) is False