ai-robot-core/ai-service/tests/test_clarification.py

544 lines
16 KiB
Python

"""
Tests for clarification mechanism.
[AC-CLARIFY] 澄清机制测试
"""
import pytest
from unittest.mock import MagicMock, patch
from app.services.intent.clarification import (
ClarificationEngine,
ClarifyMetrics,
ClarifyReason,
ClarifySessionManager,
ClarifyState,
HybridIntentResult,
IntentCandidate,
T_HIGH,
T_LOW,
MAX_CLARIFY_RETRY,
get_clarify_metrics,
)
class TestClarifyMetrics:
def test_singleton_pattern(self):
m1 = ClarifyMetrics()
m2 = ClarifyMetrics()
assert m1 is m2
def test_record_clarify_trigger(self):
metrics = ClarifyMetrics()
metrics.reset()
metrics.record_clarify_trigger()
metrics.record_clarify_trigger()
metrics.record_clarify_trigger()
counts = metrics.get_metrics()
assert counts["clarify_trigger_rate"] == 3
def test_record_clarify_converge(self):
metrics = ClarifyMetrics()
metrics.reset()
metrics.record_clarify_converge()
metrics.record_clarify_converge()
counts = metrics.get_metrics()
assert counts["clarify_converge_rate"] == 2
def test_record_misroute(self):
metrics = ClarifyMetrics()
metrics.reset()
metrics.record_misroute()
counts = metrics.get_metrics()
assert counts["misroute_rate"] == 1
def test_get_rates(self):
metrics = ClarifyMetrics()
metrics.reset()
metrics.record_clarify_trigger()
metrics.record_clarify_converge()
metrics.record_misroute()
rates = metrics.get_rates(100)
assert rates["clarify_trigger_rate"] == 0.01
assert rates["clarify_converge_rate"] == 1.0
assert rates["misroute_rate"] == 0.01
def test_get_rates_zero_requests(self):
metrics = ClarifyMetrics()
metrics.reset()
rates = metrics.get_rates(0)
assert rates["clarify_trigger_rate"] == 0.0
assert rates["clarify_converge_rate"] == 0.0
assert rates["misroute_rate"] == 0.0
def test_reset(self):
metrics = ClarifyMetrics()
metrics.record_clarify_trigger()
metrics.record_clarify_converge()
metrics.record_misroute()
metrics.reset()
counts = metrics.get_metrics()
assert counts["clarify_trigger_rate"] == 0
assert counts["clarify_converge_rate"] == 0
assert counts["misroute_rate"] == 0
class TestIntentCandidate:
def test_to_dict(self):
candidate = IntentCandidate(
intent_id="intent-1",
intent_name="退货意图",
confidence=0.85,
response_type="flow",
target_kb_ids=["kb-1"],
flow_id="flow-1",
fixed_reply=None,
transfer_message=None,
)
result = candidate.to_dict()
assert result["intent_id"] == "intent-1"
assert result["intent_name"] == "退货意图"
assert result["confidence"] == 0.85
assert result["response_type"] == "flow"
assert result["target_kb_ids"] == ["kb-1"]
assert result["flow_id"] == "flow-1"
class TestHybridIntentResult:
def test_to_dict(self):
candidate = IntentCandidate(
intent_id="intent-1",
intent_name="退货意图",
confidence=0.85,
)
result = HybridIntentResult(
intent=candidate,
confidence=0.85,
candidates=[candidate],
need_clarify=False,
clarify_reason=None,
missing_slots=[],
)
d = result.to_dict()
assert d["intent"]["intent_id"] == "intent-1"
assert d["confidence"] == 0.85
assert len(d["candidates"]) == 1
assert d["need_clarify"] is False
def test_from_fusion_result(self):
mock_fusion = MagicMock()
mock_fusion.final_intent = MagicMock()
mock_fusion.final_intent.id = "intent-1"
mock_fusion.final_intent.name = "退货意图"
mock_fusion.final_intent.response_type = "flow"
mock_fusion.final_intent.target_kb_ids = ["kb-1"]
mock_fusion.final_intent.flow_id = None
mock_fusion.final_intent.fixed_reply = None
mock_fusion.final_intent.transfer_message = None
mock_fusion.final_confidence = 0.85
mock_fusion.need_clarify = False
mock_fusion.decision_reason = "rule_high_confidence"
mock_fusion.clarify_candidates = []
result = HybridIntentResult.from_fusion_result(mock_fusion)
assert result.intent is not None
assert result.intent.intent_id == "intent-1"
assert result.confidence == 0.85
assert result.need_clarify is False
def test_from_fusion_result_with_clarify(self):
mock_fusion = MagicMock()
mock_fusion.final_intent = None
mock_fusion.final_confidence = 0.5
mock_fusion.need_clarify = True
mock_fusion.decision_reason = "multi_intent"
candidate1 = MagicMock()
candidate1.id = "intent-1"
candidate1.name = "退货意图"
candidate1.response_type = "flow"
candidate1.target_kb_ids = None
candidate1.flow_id = None
candidate1.fixed_reply = None
candidate1.transfer_message = None
candidate2 = MagicMock()
candidate2.id = "intent-2"
candidate2.name = "换货意图"
candidate2.response_type = "flow"
candidate2.target_kb_ids = None
candidate2.flow_id = None
candidate2.fixed_reply = None
candidate2.transfer_message = None
mock_fusion.clarify_candidates = [candidate1, candidate2]
result = HybridIntentResult.from_fusion_result(mock_fusion)
assert result.need_clarify is True
assert result.clarify_reason == ClarifyReason.MULTI_INTENT
assert len(result.candidates) == 2
class TestClarifyState:
def test_to_dict(self):
candidate = IntentCandidate(
intent_id="intent-1",
intent_name="退货意图",
confidence=0.5,
)
state = ClarifyState(
reason=ClarifyReason.INTENT_AMBIGUITY,
asked_slot=None,
retry_count=1,
candidates=[candidate],
asked_intent_ids=["intent-1"],
)
d = state.to_dict()
assert d["reason"] == "intent_ambiguity"
assert d["retry_count"] == 1
assert len(d["candidates"]) == 1
def test_increment_retry(self):
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
state.increment_retry()
assert state.retry_count == 1
state.increment_retry()
assert state.retry_count == 2
def test_is_max_retry(self):
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
assert not state.is_max_retry()
state.retry_count = MAX_CLARIFY_RETRY
assert state.is_max_retry()
class TestClarificationEngine:
def test_compute_confidence_rule_only(self):
engine = ClarificationEngine()
confidence = engine.compute_confidence(
rule_score=1.0,
semantic_score=0.0,
llm_score=0.0,
w_rule=1.0,
w_semantic=0.0,
w_llm=0.0,
)
assert confidence == 1.0
def test_compute_confidence_semantic_only(self):
engine = ClarificationEngine()
confidence = engine.compute_confidence(
rule_score=0.0,
semantic_score=0.8,
llm_score=0.0,
w_rule=0.3,
w_semantic=0.5,
w_llm=0.2,
)
# With weights w_rule=0.3, w_semantic=0.5, w_llm=0.2 and scores
# rule=0.0, semantic=0.8, llm=0.0:
# confidence = (0.0*0.3 + 0.8*0.5 + 0.0*0.2) / (0.3+0.5+0.2) = 0.4/1.0 = 0.4
assert confidence == 0.4
def test_compute_confidence_weighted(self):
engine = ClarificationEngine()
confidence = engine.compute_confidence(
rule_score=1.0,
semantic_score=0.8,
llm_score=0.9,
w_rule=0.5,
w_semantic=0.3,
w_llm=0.2,
)
expected = (1.0 * 0.5 + 0.8 * 0.3 + 0.9 * 0.2) / 1.0
assert abs(confidence - expected) < 0.001
def test_check_hard_block_low_confidence(self):
engine = ClarificationEngine()
result = HybridIntentResult(
intent=None,
confidence=0.5,
candidates=[],
)
is_blocked, reason = engine.check_hard_block(result)
assert is_blocked is True
assert reason == ClarifyReason.LOW_CONFIDENCE
def test_check_hard_block_high_confidence(self):
engine = ClarificationEngine()
result = HybridIntentResult(
intent=IntentCandidate(
intent_id="intent-1",
intent_name="退货意图",
confidence=0.85,
),
confidence=0.85,
candidates=[],
)
is_blocked, reason = engine.check_hard_block(result)
assert is_blocked is False
assert reason is None
def test_check_hard_block_missing_slots(self):
engine = ClarificationEngine()
result = HybridIntentResult(
intent=IntentCandidate(
intent_id="intent-1",
intent_name="退货意图",
confidence=0.85,
),
confidence=0.85,
candidates=[],
)
is_blocked, reason = engine.check_hard_block(
result,
required_slots=["order_id", "product_id"],
filled_slots={"order_id": "123"},
)
assert is_blocked is True
assert reason == ClarifyReason.MISSING_SLOT
def test_should_trigger_clarify_below_t_low(self):
engine = ClarificationEngine()
get_clarify_metrics().reset()
result = HybridIntentResult(
intent=None,
confidence=0.3,
candidates=[],
)
should_clarify, state = engine.should_trigger_clarify(result)
assert should_clarify is True
assert state is not None
assert state.reason == ClarifyReason.LOW_CONFIDENCE
def test_should_trigger_clarify_gray_zone(self):
engine = ClarificationEngine()
get_clarify_metrics().reset()
candidate = IntentCandidate(
intent_id="intent-1",
intent_name="退货意图",
confidence=0.5,
)
result = HybridIntentResult(
intent=candidate,
confidence=0.5,
candidates=[candidate],
need_clarify=True,
clarify_reason=ClarifyReason.INTENT_AMBIGUITY,
)
should_clarify, state = engine.should_trigger_clarify(result)
assert should_clarify is True
assert state is not None
assert state.reason == ClarifyReason.INTENT_AMBIGUITY
def test_should_trigger_clarify_above_t_high(self):
engine = ClarificationEngine()
get_clarify_metrics().reset()
candidate = IntentCandidate(
intent_id="intent-1",
intent_name="退货意图",
confidence=0.85,
)
result = HybridIntentResult(
intent=candidate,
confidence=0.85,
candidates=[candidate],
)
should_clarify, state = engine.should_trigger_clarify(result)
assert should_clarify is False
assert state is None
def test_generate_clarify_prompt_missing_slot(self):
engine = ClarificationEngine()
state = ClarifyState(
reason=ClarifyReason.MISSING_SLOT,
asked_slot="order_id",
)
prompt = engine.generate_clarify_prompt(state)
assert "order_id" in prompt or "相关信息" in prompt
def test_generate_clarify_prompt_low_confidence(self):
engine = ClarificationEngine()
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
prompt = engine.generate_clarify_prompt(state)
assert "理解" in prompt or "详细" in prompt
def test_generate_clarify_prompt_multi_intent(self):
engine = ClarificationEngine()
candidates = [
IntentCandidate(intent_id="1", intent_name="退货", confidence=0.5),
IntentCandidate(intent_id="2", intent_name="换货", confidence=0.4),
]
state = ClarifyState(
reason=ClarifyReason.MULTI_INTENT,
candidates=candidates,
)
prompt = engine.generate_clarify_prompt(state)
assert "退货" in prompt
assert "换货" in prompt
def test_process_clarify_response_max_retry(self):
engine = ClarificationEngine()
get_clarify_metrics().reset()
state = ClarifyState(
reason=ClarifyReason.LOW_CONFIDENCE,
retry_count=MAX_CLARIFY_RETRY,
)
result = engine.process_clarify_response("用户回复", state)
assert result.intent is None
assert result.confidence == 0.0
assert result.need_clarify is False
def test_process_clarify_response_missing_slot(self):
engine = ClarificationEngine()
get_clarify_metrics().reset()
candidate = IntentCandidate(
intent_id="intent-1",
intent_name="退货意图",
confidence=0.8,
)
state = ClarifyState(
reason=ClarifyReason.MISSING_SLOT,
asked_slot="order_id",
candidates=[candidate],
)
result = engine.process_clarify_response("订单号是123", state)
assert result.intent is not None
assert result.need_clarify is False
def test_get_metrics(self):
engine = ClarificationEngine()
get_clarify_metrics().reset()
engine._metrics.record_clarify_trigger()
engine._metrics.record_clarify_converge()
metrics = engine.get_metrics()
assert metrics["clarify_trigger_rate"] == 1
assert metrics["clarify_converge_rate"] == 1
class TestClarifySessionManager:
def test_set_and_get_session(self):
ClarifySessionManager.clear_session("test-session")
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
ClarifySessionManager.set_session("test-session", state)
retrieved = ClarifySessionManager.get_session("test-session")
assert retrieved is not None
assert retrieved.reason == ClarifyReason.LOW_CONFIDENCE
def test_clear_session(self):
ClarifySessionManager.set_session(
"test-session",
ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE),
)
ClarifySessionManager.clear_session("test-session")
retrieved = ClarifySessionManager.get_session("test-session")
assert retrieved is None
def test_has_active_clarify(self):
ClarifySessionManager.clear_session("test-session")
assert not ClarifySessionManager.has_active_clarify("test-session")
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
ClarifySessionManager.set_session("test-session", state)
assert ClarifySessionManager.has_active_clarify("test-session")
state.retry_count = MAX_CLARIFY_RETRY
assert not ClarifySessionManager.has_active_clarify("test-session")
class TestThresholds:
def test_t_high_value(self):
assert T_HIGH == 0.75
def test_t_low_value(self):
assert T_LOW == 0.45
def test_t_high_greater_than_t_low(self):
assert T_HIGH > T_LOW
def test_max_retry_value(self):
assert MAX_CLARIFY_RETRY == 3