211 lines
7.4 KiB
Python
211 lines
7.4 KiB
Python
"""
|
|
Unit tests for SemanticMatcher.
|
|
[AC-AISVC-113, AC-AISVC-114] Tests for semantic matching.
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
import uuid
|
|
|
|
from app.services.intent.semantic_matcher import SemanticMatcher
|
|
from app.services.intent.models import (
|
|
FusionConfig,
|
|
SemanticCandidate,
|
|
SemanticMatchResult,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_embedding_provider():
|
|
"""Create a mock embedding provider."""
|
|
provider = AsyncMock()
|
|
provider.embed = AsyncMock(return_value=[0.1] * 768)
|
|
provider.embed_batch = AsyncMock(return_value=[[0.1] * 768, [0.2] * 768])
|
|
return provider
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_rule():
|
|
"""Create a mock intent rule with semantic config."""
|
|
rule = MagicMock()
|
|
rule.id = uuid.uuid4()
|
|
rule.name = "Test Intent"
|
|
rule.intent_vector = [0.1] * 768
|
|
rule.semantic_examples = None
|
|
rule.is_enabled = True
|
|
return rule
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_rule_with_examples():
|
|
"""Create a mock intent rule with semantic examples."""
|
|
rule = MagicMock()
|
|
rule.id = uuid.uuid4()
|
|
rule.name = "Test Intent with Examples"
|
|
rule.intent_vector = None
|
|
rule.semantic_examples = ["我想退货", "如何退款"]
|
|
rule.is_enabled = True
|
|
return rule
|
|
|
|
|
|
@pytest.fixture
|
|
def config():
|
|
"""Create a fusion config."""
|
|
return FusionConfig()
|
|
|
|
|
|
class TestSemanticMatcher:
|
|
"""Tests for SemanticMatcher class."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_init(self, mock_embedding_provider, config):
|
|
"""Test SemanticMatcher initialization."""
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
assert matcher._embedding_provider == mock_embedding_provider
|
|
assert matcher._config == config
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_match_disabled(self, mock_embedding_provider):
|
|
"""Test match when semantic matcher is disabled."""
|
|
config = FusionConfig(semantic_matcher_enabled=False)
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
|
|
result = await matcher.match("test message", [], "tenant-1")
|
|
|
|
assert result.skipped is True
|
|
assert result.skip_reason == "disabled"
|
|
assert result.candidates == []
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_match_no_semantic_config(
|
|
self, mock_embedding_provider, config, mock_rule
|
|
):
|
|
"""Test match when no rules have semantic config."""
|
|
mock_rule.intent_vector = None
|
|
mock_rule.semantic_examples = None
|
|
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
result = await matcher.match("test message", [mock_rule], "tenant-1")
|
|
|
|
assert result.skipped is True
|
|
assert result.skip_reason == "no_semantic_config"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_match_mode_a_with_intent_vector(
|
|
self, mock_embedding_provider, config, mock_rule
|
|
):
|
|
"""Test match with pre-computed intent vector (Mode A)."""
|
|
mock_embedding_provider.embed.return_value = [0.1] * 768
|
|
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
result = await matcher.match("我想退货", [mock_rule], "tenant-1")
|
|
|
|
assert result.skipped is False
|
|
assert result.skip_reason is None
|
|
assert len(result.candidates) == 1
|
|
assert result.top_score > 0.9
|
|
assert result.duration_ms >= 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_match_mode_b_with_examples(
|
|
self, mock_embedding_provider, config, mock_rule_with_examples
|
|
):
|
|
"""Test match with semantic examples (Mode B)."""
|
|
mock_embedding_provider.embed.return_value = [0.1] * 768
|
|
mock_embedding_provider.embed_batch.return_value = [[0.1] * 768, [0.1] * 768]
|
|
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
result = await matcher.match("我想退货", [mock_rule_with_examples], "tenant-1")
|
|
|
|
assert result.skipped is False
|
|
assert len(result.candidates) == 1
|
|
assert result.top_score > 0.9
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_match_embedding_timeout(self, mock_embedding_provider, config, mock_rule):
|
|
"""Test match when embedding times out."""
|
|
import asyncio
|
|
mock_embedding_provider.embed.side_effect = asyncio.TimeoutError()
|
|
|
|
config = FusionConfig(semantic_matcher_timeout_ms=100)
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
result = await matcher.match("test message", [mock_rule], "tenant-1")
|
|
|
|
assert result.skipped is True
|
|
assert "embedding_timeout" in result.skip_reason
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_match_embedding_error(self, mock_embedding_provider, config, mock_rule):
|
|
"""Test match when embedding fails with error."""
|
|
mock_embedding_provider.embed.side_effect = Exception("Embedding failed")
|
|
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
result = await matcher.match("test message", [mock_rule], "tenant-1")
|
|
|
|
assert result.skipped is True
|
|
assert "embedding_error" in result.skip_reason
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_match_top_k_limit(self, mock_embedding_provider, config):
|
|
"""Test that match returns only top_k candidates."""
|
|
rules = []
|
|
for i in range(5):
|
|
rule = MagicMock()
|
|
rule.id = uuid.uuid4()
|
|
rule.name = f"Intent {i}"
|
|
rule.intent_vector = [0.1 + i * 0.01] * 768
|
|
rule.semantic_examples = None
|
|
rule.is_enabled = True
|
|
rules.append(rule)
|
|
|
|
mock_embedding_provider.embed.return_value = [0.1] * 768
|
|
|
|
config = FusionConfig(semantic_top_k=3)
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
result = await matcher.match("test message", rules, "tenant-1")
|
|
|
|
assert len(result.candidates) <= 3
|
|
|
|
def test_cosine_similarity(self, mock_embedding_provider, config):
|
|
"""Test cosine similarity calculation."""
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
|
|
v1 = [1.0, 0.0, 0.0]
|
|
v2 = [1.0, 0.0, 0.0]
|
|
similarity = matcher._cosine_similarity(v1, v2)
|
|
assert similarity == 1.0
|
|
|
|
v1 = [1.0, 0.0, 0.0]
|
|
v2 = [0.0, 1.0, 0.0]
|
|
similarity = matcher._cosine_similarity(v1, v2)
|
|
assert similarity == 0.0
|
|
|
|
v1 = [1.0, 1.0, 0.0]
|
|
v2 = [1.0, 0.0, 0.0]
|
|
similarity = matcher._cosine_similarity(v1, v2)
|
|
assert 0.0 < similarity < 1.0
|
|
|
|
def test_cosine_similarity_empty_vectors(self, mock_embedding_provider, config):
|
|
"""Test cosine similarity with empty vectors."""
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
|
|
assert matcher._cosine_similarity([], [1.0]) == 0.0
|
|
assert matcher._cosine_similarity([1.0], []) == 0.0
|
|
assert matcher._cosine_similarity([], []) == 0.0
|
|
|
|
def test_has_semantic_config(self, mock_embedding_provider, config, mock_rule):
|
|
"""Test checking if rule has semantic config."""
|
|
matcher = SemanticMatcher(mock_embedding_provider, config)
|
|
|
|
mock_rule.intent_vector = [0.1] * 768
|
|
mock_rule.semantic_examples = None
|
|
assert matcher._has_semantic_config(mock_rule) is True
|
|
|
|
mock_rule.intent_vector = None
|
|
mock_rule.semantic_examples = ["example"]
|
|
assert matcher._has_semantic_config(mock_rule) is True
|
|
|
|
mock_rule.intent_vector = None
|
|
mock_rule.semantic_examples = None
|
|
assert matcher._has_semantic_config(mock_rule) is False
|