""" Confidence calculation for AI Service. [AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Confidence scoring and transfer suggestion logic. Design reference: design.md Section 4.3 - 检索不中兜底与置信度策略 - Retrieval insufficiency detection - Confidence calculation based on retrieval scores - shouldTransfer logic with threshold T_low """ import logging from dataclasses import dataclass, field from typing import Any from app.core.config import get_settings from app.services.retrieval.base import RetrievalResult logger = logging.getLogger(__name__) @dataclass class ConfidenceConfig: """ Configuration for confidence calculation. [AC-AISVC-17, AC-AISVC-18] Configurable thresholds. """ score_threshold: float = 0.7 min_hits: int = 1 confidence_low_threshold: float = 0.5 confidence_high_threshold: float = 0.8 insufficient_penalty: float = 0.3 max_evidence_tokens: int = 2000 @dataclass class ConfidenceResult: """ Result of confidence calculation. [AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Contains confidence and transfer suggestion. """ confidence: float should_transfer: bool transfer_reason: str | None = None is_retrieval_insufficient: bool = False diagnostics: dict[str, Any] = field(default_factory=dict) class ConfidenceCalculator: """ [AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Calculator for response confidence. Design reference: design.md Section 4.3 - MVP: confidence based on RAG retrieval scores - Insufficient retrieval triggers confidence downgrade - shouldTransfer when confidence < T_low """ def __init__(self, config: ConfidenceConfig | None = None): settings = get_settings() self._config = config or ConfidenceConfig( score_threshold=getattr(settings, "rag_score_threshold", 0.7), min_hits=getattr(settings, "rag_min_hits", 1), confidence_low_threshold=getattr(settings, "confidence_low_threshold", 0.5), confidence_high_threshold=getattr(settings, "confidence_high_threshold", 0.8), insufficient_penalty=getattr(settings, "confidence_insufficient_penalty", 0.3), max_evidence_tokens=getattr(settings, "rag_max_evidence_tokens", 2000), ) def is_retrieval_insufficient( self, retrieval_result: RetrievalResult, evidence_tokens: int | None = None, ) -> tuple[bool, str]: """ [AC-AISVC-17] Determine if retrieval results are insufficient. Conditions for insufficiency: 1. hits.size < min_hits 2. max(score) < score_threshold 3. evidence tokens exceed limit (optional) Args: retrieval_result: Result from retrieval operation evidence_tokens: Optional token count for evidence Returns: Tuple of (is_insufficient, reason) """ reasons = [] if retrieval_result.hit_count < self._config.min_hits: reasons.append( f"hit_count({retrieval_result.hit_count}) < min_hits({self._config.min_hits})" ) if retrieval_result.max_score < self._config.score_threshold: reasons.append( f"max_score({retrieval_result.max_score:.3f}) < threshold({self._config.score_threshold})" ) if evidence_tokens is not None and evidence_tokens > self._config.max_evidence_tokens: reasons.append( f"evidence_tokens({evidence_tokens}) > max({self._config.max_evidence_tokens})" ) is_insufficient = len(reasons) > 0 reason = "; ".join(reasons) if reasons else "sufficient" return is_insufficient, reason def calculate_confidence( self, retrieval_result: RetrievalResult, evidence_tokens: int | None = None, additional_factors: dict[str, float] | None = None, ) -> ConfidenceResult: """ [AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Calculate confidence and transfer suggestion. MVP Strategy: 1. Base confidence from max retrieval score 2. Adjust for hit count (more hits = higher confidence) 3. Penalize if retrieval is insufficient 4. Determine shouldTransfer based on T_low threshold Args: retrieval_result: Result from retrieval operation evidence_tokens: Optional token count for evidence additional_factors: Optional additional confidence factors Returns: ConfidenceResult with confidence and transfer suggestion """ is_insufficient, insufficiency_reason = self.is_retrieval_insufficient( retrieval_result, evidence_tokens ) base_confidence = retrieval_result.max_score hit_count_factor = min(1.0, retrieval_result.hit_count / 5.0) confidence = base_confidence * 0.7 + hit_count_factor * 0.3 if is_insufficient: confidence -= self._config.insufficient_penalty logger.info( f"[AC-AISVC-17] Retrieval insufficient: {insufficiency_reason}, " f"applying penalty -{self._config.insufficient_penalty}" ) if additional_factors: for factor_name, factor_value in additional_factors.items(): confidence += factor_value * 0.1 confidence = max(0.0, min(1.0, confidence)) should_transfer = confidence < self._config.confidence_low_threshold transfer_reason = None if should_transfer: if is_insufficient: transfer_reason = "检索结果不足,无法提供高置信度回答" else: transfer_reason = "置信度低于阈值,建议转人工" elif confidence < self._config.confidence_high_threshold and is_insufficient: transfer_reason = "检索结果有限,回答可能不够准确" diagnostics = { "base_confidence": base_confidence, "hit_count": retrieval_result.hit_count, "max_score": retrieval_result.max_score, "is_insufficient": is_insufficient, "insufficiency_reason": insufficiency_reason if is_insufficient else None, "penalty_applied": self._config.insufficient_penalty if is_insufficient else 0.0, "threshold_low": self._config.confidence_low_threshold, "threshold_high": self._config.confidence_high_threshold, } logger.info( f"[AC-AISVC-17, AC-AISVC-18] Confidence calculated: " f"{confidence:.3f}, should_transfer={should_transfer}, " f"insufficient={is_insufficient}" ) return ConfidenceResult( confidence=round(confidence, 3), should_transfer=should_transfer, transfer_reason=transfer_reason, is_retrieval_insufficient=is_insufficient, diagnostics=diagnostics, ) def calculate_confidence_no_retrieval(self) -> ConfidenceResult: """ [AC-AISVC-17] Calculate confidence when no retrieval was performed. Returns a low confidence result suggesting transfer. """ return ConfidenceResult( confidence=0.3, should_transfer=True, transfer_reason="未进行知识库检索,建议转人工", is_retrieval_insufficient=True, diagnostics={ "base_confidence": 0.0, "hit_count": 0, "max_score": 0.0, "is_insufficient": True, "insufficiency_reason": "no_retrieval", "penalty_applied": 0.0, "threshold_low": self._config.confidence_low_threshold, "threshold_high": self._config.confidence_high_threshold, }, ) _confidence_calculator: ConfidenceCalculator | None = None def get_confidence_calculator() -> ConfidenceCalculator: """Get or create confidence calculator instance.""" global _confidence_calculator if _confidence_calculator is None: _confidence_calculator = ConfidenceCalculator() return _confidence_calculator