544 lines
16 KiB
Python
544 lines
16 KiB
Python
|
|
"""
|
||
|
|
Tests for clarification mechanism.
|
||
|
|
[AC-CLARIFY] 澄清机制测试
|
||
|
|
"""
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
from unittest.mock import MagicMock, patch
|
||
|
|
|
||
|
|
from app.services.intent.clarification import (
|
||
|
|
ClarificationEngine,
|
||
|
|
ClarifyMetrics,
|
||
|
|
ClarifyReason,
|
||
|
|
ClarifySessionManager,
|
||
|
|
ClarifyState,
|
||
|
|
HybridIntentResult,
|
||
|
|
IntentCandidate,
|
||
|
|
T_HIGH,
|
||
|
|
T_LOW,
|
||
|
|
MAX_CLARIFY_RETRY,
|
||
|
|
get_clarify_metrics,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class TestClarifyMetrics:
|
||
|
|
def test_singleton_pattern(self):
|
||
|
|
m1 = ClarifyMetrics()
|
||
|
|
m2 = ClarifyMetrics()
|
||
|
|
assert m1 is m2
|
||
|
|
|
||
|
|
def test_record_clarify_trigger(self):
|
||
|
|
metrics = ClarifyMetrics()
|
||
|
|
metrics.reset()
|
||
|
|
|
||
|
|
metrics.record_clarify_trigger()
|
||
|
|
metrics.record_clarify_trigger()
|
||
|
|
metrics.record_clarify_trigger()
|
||
|
|
|
||
|
|
counts = metrics.get_metrics()
|
||
|
|
assert counts["clarify_trigger_rate"] == 3
|
||
|
|
|
||
|
|
def test_record_clarify_converge(self):
|
||
|
|
metrics = ClarifyMetrics()
|
||
|
|
metrics.reset()
|
||
|
|
|
||
|
|
metrics.record_clarify_converge()
|
||
|
|
metrics.record_clarify_converge()
|
||
|
|
|
||
|
|
counts = metrics.get_metrics()
|
||
|
|
assert counts["clarify_converge_rate"] == 2
|
||
|
|
|
||
|
|
def test_record_misroute(self):
|
||
|
|
metrics = ClarifyMetrics()
|
||
|
|
metrics.reset()
|
||
|
|
|
||
|
|
metrics.record_misroute()
|
||
|
|
|
||
|
|
counts = metrics.get_metrics()
|
||
|
|
assert counts["misroute_rate"] == 1
|
||
|
|
|
||
|
|
def test_get_rates(self):
|
||
|
|
metrics = ClarifyMetrics()
|
||
|
|
metrics.reset()
|
||
|
|
|
||
|
|
metrics.record_clarify_trigger()
|
||
|
|
metrics.record_clarify_converge()
|
||
|
|
metrics.record_misroute()
|
||
|
|
|
||
|
|
rates = metrics.get_rates(100)
|
||
|
|
assert rates["clarify_trigger_rate"] == 0.01
|
||
|
|
assert rates["clarify_converge_rate"] == 1.0
|
||
|
|
assert rates["misroute_rate"] == 0.01
|
||
|
|
|
||
|
|
def test_get_rates_zero_requests(self):
|
||
|
|
metrics = ClarifyMetrics()
|
||
|
|
metrics.reset()
|
||
|
|
|
||
|
|
rates = metrics.get_rates(0)
|
||
|
|
assert rates["clarify_trigger_rate"] == 0.0
|
||
|
|
assert rates["clarify_converge_rate"] == 0.0
|
||
|
|
assert rates["misroute_rate"] == 0.0
|
||
|
|
|
||
|
|
def test_reset(self):
|
||
|
|
metrics = ClarifyMetrics()
|
||
|
|
metrics.record_clarify_trigger()
|
||
|
|
metrics.record_clarify_converge()
|
||
|
|
metrics.record_misroute()
|
||
|
|
|
||
|
|
metrics.reset()
|
||
|
|
|
||
|
|
counts = metrics.get_metrics()
|
||
|
|
assert counts["clarify_trigger_rate"] == 0
|
||
|
|
assert counts["clarify_converge_rate"] == 0
|
||
|
|
assert counts["misroute_rate"] == 0
|
||
|
|
|
||
|
|
|
||
|
|
class TestIntentCandidate:
|
||
|
|
def test_to_dict(self):
|
||
|
|
candidate = IntentCandidate(
|
||
|
|
intent_id="intent-1",
|
||
|
|
intent_name="退货意图",
|
||
|
|
confidence=0.85,
|
||
|
|
response_type="flow",
|
||
|
|
target_kb_ids=["kb-1"],
|
||
|
|
flow_id="flow-1",
|
||
|
|
fixed_reply=None,
|
||
|
|
transfer_message=None,
|
||
|
|
)
|
||
|
|
|
||
|
|
result = candidate.to_dict()
|
||
|
|
|
||
|
|
assert result["intent_id"] == "intent-1"
|
||
|
|
assert result["intent_name"] == "退货意图"
|
||
|
|
assert result["confidence"] == 0.85
|
||
|
|
assert result["response_type"] == "flow"
|
||
|
|
assert result["target_kb_ids"] == ["kb-1"]
|
||
|
|
assert result["flow_id"] == "flow-1"
|
||
|
|
|
||
|
|
|
||
|
|
class TestHybridIntentResult:
|
||
|
|
def test_to_dict(self):
|
||
|
|
candidate = IntentCandidate(
|
||
|
|
intent_id="intent-1",
|
||
|
|
intent_name="退货意图",
|
||
|
|
confidence=0.85,
|
||
|
|
)
|
||
|
|
|
||
|
|
result = HybridIntentResult(
|
||
|
|
intent=candidate,
|
||
|
|
confidence=0.85,
|
||
|
|
candidates=[candidate],
|
||
|
|
need_clarify=False,
|
||
|
|
clarify_reason=None,
|
||
|
|
missing_slots=[],
|
||
|
|
)
|
||
|
|
|
||
|
|
d = result.to_dict()
|
||
|
|
|
||
|
|
assert d["intent"]["intent_id"] == "intent-1"
|
||
|
|
assert d["confidence"] == 0.85
|
||
|
|
assert len(d["candidates"]) == 1
|
||
|
|
assert d["need_clarify"] is False
|
||
|
|
|
||
|
|
def test_from_fusion_result(self):
|
||
|
|
mock_fusion = MagicMock()
|
||
|
|
mock_fusion.final_intent = MagicMock()
|
||
|
|
mock_fusion.final_intent.id = "intent-1"
|
||
|
|
mock_fusion.final_intent.name = "退货意图"
|
||
|
|
mock_fusion.final_intent.response_type = "flow"
|
||
|
|
mock_fusion.final_intent.target_kb_ids = ["kb-1"]
|
||
|
|
mock_fusion.final_intent.flow_id = None
|
||
|
|
mock_fusion.final_intent.fixed_reply = None
|
||
|
|
mock_fusion.final_intent.transfer_message = None
|
||
|
|
mock_fusion.final_confidence = 0.85
|
||
|
|
mock_fusion.need_clarify = False
|
||
|
|
mock_fusion.decision_reason = "rule_high_confidence"
|
||
|
|
mock_fusion.clarify_candidates = []
|
||
|
|
|
||
|
|
result = HybridIntentResult.from_fusion_result(mock_fusion)
|
||
|
|
|
||
|
|
assert result.intent is not None
|
||
|
|
assert result.intent.intent_id == "intent-1"
|
||
|
|
assert result.confidence == 0.85
|
||
|
|
assert result.need_clarify is False
|
||
|
|
|
||
|
|
def test_from_fusion_result_with_clarify(self):
|
||
|
|
mock_fusion = MagicMock()
|
||
|
|
mock_fusion.final_intent = None
|
||
|
|
mock_fusion.final_confidence = 0.5
|
||
|
|
mock_fusion.need_clarify = True
|
||
|
|
mock_fusion.decision_reason = "multi_intent"
|
||
|
|
|
||
|
|
candidate1 = MagicMock()
|
||
|
|
candidate1.id = "intent-1"
|
||
|
|
candidate1.name = "退货意图"
|
||
|
|
candidate1.response_type = "flow"
|
||
|
|
candidate1.target_kb_ids = None
|
||
|
|
candidate1.flow_id = None
|
||
|
|
candidate1.fixed_reply = None
|
||
|
|
candidate1.transfer_message = None
|
||
|
|
|
||
|
|
candidate2 = MagicMock()
|
||
|
|
candidate2.id = "intent-2"
|
||
|
|
candidate2.name = "换货意图"
|
||
|
|
candidate2.response_type = "flow"
|
||
|
|
candidate2.target_kb_ids = None
|
||
|
|
candidate2.flow_id = None
|
||
|
|
candidate2.fixed_reply = None
|
||
|
|
candidate2.transfer_message = None
|
||
|
|
|
||
|
|
mock_fusion.clarify_candidates = [candidate1, candidate2]
|
||
|
|
|
||
|
|
result = HybridIntentResult.from_fusion_result(mock_fusion)
|
||
|
|
|
||
|
|
assert result.need_clarify is True
|
||
|
|
assert result.clarify_reason == ClarifyReason.MULTI_INTENT
|
||
|
|
assert len(result.candidates) == 2
|
||
|
|
|
||
|
|
|
||
|
|
class TestClarifyState:
|
||
|
|
def test_to_dict(self):
|
||
|
|
candidate = IntentCandidate(
|
||
|
|
intent_id="intent-1",
|
||
|
|
intent_name="退货意图",
|
||
|
|
confidence=0.5,
|
||
|
|
)
|
||
|
|
|
||
|
|
state = ClarifyState(
|
||
|
|
reason=ClarifyReason.INTENT_AMBIGUITY,
|
||
|
|
asked_slot=None,
|
||
|
|
retry_count=1,
|
||
|
|
candidates=[candidate],
|
||
|
|
asked_intent_ids=["intent-1"],
|
||
|
|
)
|
||
|
|
|
||
|
|
d = state.to_dict()
|
||
|
|
|
||
|
|
assert d["reason"] == "intent_ambiguity"
|
||
|
|
assert d["retry_count"] == 1
|
||
|
|
assert len(d["candidates"]) == 1
|
||
|
|
|
||
|
|
def test_increment_retry(self):
|
||
|
|
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
|
||
|
|
|
||
|
|
state.increment_retry()
|
||
|
|
|
||
|
|
assert state.retry_count == 1
|
||
|
|
|
||
|
|
state.increment_retry()
|
||
|
|
|
||
|
|
assert state.retry_count == 2
|
||
|
|
|
||
|
|
def test_is_max_retry(self):
|
||
|
|
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
|
||
|
|
|
||
|
|
assert not state.is_max_retry()
|
||
|
|
|
||
|
|
state.retry_count = MAX_CLARIFY_RETRY
|
||
|
|
|
||
|
|
assert state.is_max_retry()
|
||
|
|
|
||
|
|
|
||
|
|
class TestClarificationEngine:
|
||
|
|
def test_compute_confidence_rule_only(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
|
||
|
|
confidence = engine.compute_confidence(
|
||
|
|
rule_score=1.0,
|
||
|
|
semantic_score=0.0,
|
||
|
|
llm_score=0.0,
|
||
|
|
w_rule=1.0,
|
||
|
|
w_semantic=0.0,
|
||
|
|
w_llm=0.0,
|
||
|
|
)
|
||
|
|
|
||
|
|
assert confidence == 1.0
|
||
|
|
|
||
|
|
def test_compute_confidence_semantic_only(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
|
||
|
|
confidence = engine.compute_confidence(
|
||
|
|
rule_score=0.0,
|
||
|
|
semantic_score=0.8,
|
||
|
|
llm_score=0.0,
|
||
|
|
w_rule=0.3,
|
||
|
|
w_semantic=0.5,
|
||
|
|
w_llm=0.2,
|
||
|
|
)
|
||
|
|
|
||
|
|
# With weights w_rule=0.3, w_semantic=0.5, w_llm=0.2 and scores
|
||
|
|
# rule=0.0, semantic=0.8, llm=0.0:
|
||
|
|
# confidence = (0.0*0.3 + 0.8*0.5 + 0.0*0.2) / (0.3+0.5+0.2) = 0.4/1.0 = 0.4
|
||
|
|
assert confidence == 0.4
|
||
|
|
|
||
|
|
def test_compute_confidence_weighted(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
|
||
|
|
confidence = engine.compute_confidence(
|
||
|
|
rule_score=1.0,
|
||
|
|
semantic_score=0.8,
|
||
|
|
llm_score=0.9,
|
||
|
|
w_rule=0.5,
|
||
|
|
w_semantic=0.3,
|
||
|
|
w_llm=0.2,
|
||
|
|
)
|
||
|
|
|
||
|
|
expected = (1.0 * 0.5 + 0.8 * 0.3 + 0.9 * 0.2) / 1.0
|
||
|
|
assert abs(confidence - expected) < 0.001
|
||
|
|
|
||
|
|
def test_check_hard_block_low_confidence(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
|
||
|
|
result = HybridIntentResult(
|
||
|
|
intent=None,
|
||
|
|
confidence=0.5,
|
||
|
|
candidates=[],
|
||
|
|
)
|
||
|
|
|
||
|
|
is_blocked, reason = engine.check_hard_block(result)
|
||
|
|
|
||
|
|
assert is_blocked is True
|
||
|
|
assert reason == ClarifyReason.LOW_CONFIDENCE
|
||
|
|
|
||
|
|
def test_check_hard_block_high_confidence(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
|
||
|
|
result = HybridIntentResult(
|
||
|
|
intent=IntentCandidate(
|
||
|
|
intent_id="intent-1",
|
||
|
|
intent_name="退货意图",
|
||
|
|
confidence=0.85,
|
||
|
|
),
|
||
|
|
confidence=0.85,
|
||
|
|
candidates=[],
|
||
|
|
)
|
||
|
|
|
||
|
|
is_blocked, reason = engine.check_hard_block(result)
|
||
|
|
|
||
|
|
assert is_blocked is False
|
||
|
|
assert reason is None
|
||
|
|
|
||
|
|
def test_check_hard_block_missing_slots(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
|
||
|
|
result = HybridIntentResult(
|
||
|
|
intent=IntentCandidate(
|
||
|
|
intent_id="intent-1",
|
||
|
|
intent_name="退货意图",
|
||
|
|
confidence=0.85,
|
||
|
|
),
|
||
|
|
confidence=0.85,
|
||
|
|
candidates=[],
|
||
|
|
)
|
||
|
|
|
||
|
|
is_blocked, reason = engine.check_hard_block(
|
||
|
|
result,
|
||
|
|
required_slots=["order_id", "product_id"],
|
||
|
|
filled_slots={"order_id": "123"},
|
||
|
|
)
|
||
|
|
|
||
|
|
assert is_blocked is True
|
||
|
|
assert reason == ClarifyReason.MISSING_SLOT
|
||
|
|
|
||
|
|
def test_should_trigger_clarify_below_t_low(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
get_clarify_metrics().reset()
|
||
|
|
|
||
|
|
result = HybridIntentResult(
|
||
|
|
intent=None,
|
||
|
|
confidence=0.3,
|
||
|
|
candidates=[],
|
||
|
|
)
|
||
|
|
|
||
|
|
should_clarify, state = engine.should_trigger_clarify(result)
|
||
|
|
|
||
|
|
assert should_clarify is True
|
||
|
|
assert state is not None
|
||
|
|
assert state.reason == ClarifyReason.LOW_CONFIDENCE
|
||
|
|
|
||
|
|
def test_should_trigger_clarify_gray_zone(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
get_clarify_metrics().reset()
|
||
|
|
|
||
|
|
candidate = IntentCandidate(
|
||
|
|
intent_id="intent-1",
|
||
|
|
intent_name="退货意图",
|
||
|
|
confidence=0.5,
|
||
|
|
)
|
||
|
|
|
||
|
|
result = HybridIntentResult(
|
||
|
|
intent=candidate,
|
||
|
|
confidence=0.5,
|
||
|
|
candidates=[candidate],
|
||
|
|
need_clarify=True,
|
||
|
|
clarify_reason=ClarifyReason.INTENT_AMBIGUITY,
|
||
|
|
)
|
||
|
|
|
||
|
|
should_clarify, state = engine.should_trigger_clarify(result)
|
||
|
|
|
||
|
|
assert should_clarify is True
|
||
|
|
assert state is not None
|
||
|
|
assert state.reason == ClarifyReason.INTENT_AMBIGUITY
|
||
|
|
|
||
|
|
def test_should_trigger_clarify_above_t_high(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
get_clarify_metrics().reset()
|
||
|
|
|
||
|
|
candidate = IntentCandidate(
|
||
|
|
intent_id="intent-1",
|
||
|
|
intent_name="退货意图",
|
||
|
|
confidence=0.85,
|
||
|
|
)
|
||
|
|
|
||
|
|
result = HybridIntentResult(
|
||
|
|
intent=candidate,
|
||
|
|
confidence=0.85,
|
||
|
|
candidates=[candidate],
|
||
|
|
)
|
||
|
|
|
||
|
|
should_clarify, state = engine.should_trigger_clarify(result)
|
||
|
|
|
||
|
|
assert should_clarify is False
|
||
|
|
assert state is None
|
||
|
|
|
||
|
|
def test_generate_clarify_prompt_missing_slot(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
|
||
|
|
state = ClarifyState(
|
||
|
|
reason=ClarifyReason.MISSING_SLOT,
|
||
|
|
asked_slot="order_id",
|
||
|
|
)
|
||
|
|
|
||
|
|
prompt = engine.generate_clarify_prompt(state)
|
||
|
|
|
||
|
|
assert "order_id" in prompt or "相关信息" in prompt
|
||
|
|
|
||
|
|
def test_generate_clarify_prompt_low_confidence(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
|
||
|
|
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
|
||
|
|
|
||
|
|
prompt = engine.generate_clarify_prompt(state)
|
||
|
|
|
||
|
|
assert "理解" in prompt or "详细" in prompt
|
||
|
|
|
||
|
|
def test_generate_clarify_prompt_multi_intent(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
|
||
|
|
candidates = [
|
||
|
|
IntentCandidate(intent_id="1", intent_name="退货", confidence=0.5),
|
||
|
|
IntentCandidate(intent_id="2", intent_name="换货", confidence=0.4),
|
||
|
|
]
|
||
|
|
|
||
|
|
state = ClarifyState(
|
||
|
|
reason=ClarifyReason.MULTI_INTENT,
|
||
|
|
candidates=candidates,
|
||
|
|
)
|
||
|
|
|
||
|
|
prompt = engine.generate_clarify_prompt(state)
|
||
|
|
|
||
|
|
assert "退货" in prompt
|
||
|
|
assert "换货" in prompt
|
||
|
|
|
||
|
|
def test_process_clarify_response_max_retry(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
get_clarify_metrics().reset()
|
||
|
|
|
||
|
|
state = ClarifyState(
|
||
|
|
reason=ClarifyReason.LOW_CONFIDENCE,
|
||
|
|
retry_count=MAX_CLARIFY_RETRY,
|
||
|
|
)
|
||
|
|
|
||
|
|
result = engine.process_clarify_response("用户回复", state)
|
||
|
|
|
||
|
|
assert result.intent is None
|
||
|
|
assert result.confidence == 0.0
|
||
|
|
assert result.need_clarify is False
|
||
|
|
|
||
|
|
def test_process_clarify_response_missing_slot(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
get_clarify_metrics().reset()
|
||
|
|
|
||
|
|
candidate = IntentCandidate(
|
||
|
|
intent_id="intent-1",
|
||
|
|
intent_name="退货意图",
|
||
|
|
confidence=0.8,
|
||
|
|
)
|
||
|
|
|
||
|
|
state = ClarifyState(
|
||
|
|
reason=ClarifyReason.MISSING_SLOT,
|
||
|
|
asked_slot="order_id",
|
||
|
|
candidates=[candidate],
|
||
|
|
)
|
||
|
|
|
||
|
|
result = engine.process_clarify_response("订单号是123", state)
|
||
|
|
|
||
|
|
assert result.intent is not None
|
||
|
|
assert result.need_clarify is False
|
||
|
|
|
||
|
|
def test_get_metrics(self):
|
||
|
|
engine = ClarificationEngine()
|
||
|
|
get_clarify_metrics().reset()
|
||
|
|
|
||
|
|
engine._metrics.record_clarify_trigger()
|
||
|
|
engine._metrics.record_clarify_converge()
|
||
|
|
|
||
|
|
metrics = engine.get_metrics()
|
||
|
|
|
||
|
|
assert metrics["clarify_trigger_rate"] == 1
|
||
|
|
assert metrics["clarify_converge_rate"] == 1
|
||
|
|
|
||
|
|
|
||
|
|
class TestClarifySessionManager:
|
||
|
|
def test_set_and_get_session(self):
|
||
|
|
ClarifySessionManager.clear_session("test-session")
|
||
|
|
|
||
|
|
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
|
||
|
|
|
||
|
|
ClarifySessionManager.set_session("test-session", state)
|
||
|
|
|
||
|
|
retrieved = ClarifySessionManager.get_session("test-session")
|
||
|
|
|
||
|
|
assert retrieved is not None
|
||
|
|
assert retrieved.reason == ClarifyReason.LOW_CONFIDENCE
|
||
|
|
|
||
|
|
def test_clear_session(self):
|
||
|
|
ClarifySessionManager.set_session(
|
||
|
|
"test-session",
|
||
|
|
ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE),
|
||
|
|
)
|
||
|
|
|
||
|
|
ClarifySessionManager.clear_session("test-session")
|
||
|
|
|
||
|
|
retrieved = ClarifySessionManager.get_session("test-session")
|
||
|
|
|
||
|
|
assert retrieved is None
|
||
|
|
|
||
|
|
def test_has_active_clarify(self):
|
||
|
|
ClarifySessionManager.clear_session("test-session")
|
||
|
|
|
||
|
|
assert not ClarifySessionManager.has_active_clarify("test-session")
|
||
|
|
|
||
|
|
state = ClarifyState(reason=ClarifyReason.LOW_CONFIDENCE)
|
||
|
|
ClarifySessionManager.set_session("test-session", state)
|
||
|
|
|
||
|
|
assert ClarifySessionManager.has_active_clarify("test-session")
|
||
|
|
|
||
|
|
state.retry_count = MAX_CLARIFY_RETRY
|
||
|
|
|
||
|
|
assert not ClarifySessionManager.has_active_clarify("test-session")
|
||
|
|
|
||
|
|
|
||
|
|
class TestThresholds:
|
||
|
|
def test_t_high_value(self):
|
||
|
|
assert T_HIGH == 0.75
|
||
|
|
|
||
|
|
def test_t_low_value(self):
|
||
|
|
assert T_LOW == 0.45
|
||
|
|
|
||
|
|
def test_t_high_greater_than_t_low(self):
|
||
|
|
assert T_HIGH > T_LOW
|
||
|
|
|
||
|
|
def test_max_retry_value(self):
|
||
|
|
assert MAX_CLARIFY_RETRY == 3
|