ai-robot-core/ai-service/app/services/confidence.py

225 lines
8.0 KiB
Python

"""
Confidence calculation for AI Service.
[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Confidence scoring and transfer suggestion logic.
Design reference: design.md Section 4.3 - 检索不中兜底与置信度策略
- Retrieval insufficiency detection
- Confidence calculation based on retrieval scores
- shouldTransfer logic with threshold T_low
"""
import logging
from dataclasses import dataclass, field
from typing import Any
from app.core.config import get_settings
from app.services.retrieval.base import RetrievalResult
logger = logging.getLogger(__name__)
@dataclass
class ConfidenceConfig:
"""
Configuration for confidence calculation.
[AC-AISVC-17, AC-AISVC-18] Configurable thresholds.
"""
score_threshold: float = 0.7
min_hits: int = 1
confidence_low_threshold: float = 0.5
confidence_high_threshold: float = 0.8
insufficient_penalty: float = 0.3
max_evidence_tokens: int = 2000
@dataclass
class ConfidenceResult:
"""
Result of confidence calculation.
[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Contains confidence and transfer suggestion.
"""
confidence: float
should_transfer: bool
transfer_reason: str | None = None
is_retrieval_insufficient: bool = False
diagnostics: dict[str, Any] = field(default_factory=dict)
class ConfidenceCalculator:
"""
[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Calculator for response confidence.
Design reference: design.md Section 4.3
- MVP: confidence based on RAG retrieval scores
- Insufficient retrieval triggers confidence downgrade
- shouldTransfer when confidence < T_low
"""
def __init__(self, config: ConfidenceConfig | None = None):
settings = get_settings()
self._config = config or ConfidenceConfig(
score_threshold=getattr(settings, "rag_score_threshold", 0.7),
min_hits=getattr(settings, "rag_min_hits", 1),
confidence_low_threshold=getattr(settings, "confidence_low_threshold", 0.5),
confidence_high_threshold=getattr(settings, "confidence_high_threshold", 0.8),
insufficient_penalty=getattr(settings, "confidence_insufficient_penalty", 0.3),
max_evidence_tokens=getattr(settings, "rag_max_evidence_tokens", 2000),
)
def is_retrieval_insufficient(
self,
retrieval_result: RetrievalResult,
evidence_tokens: int | None = None,
) -> tuple[bool, str]:
"""
[AC-AISVC-17] Determine if retrieval results are insufficient.
Conditions for insufficiency:
1. hits.size < min_hits
2. max(score) < score_threshold
3. evidence tokens exceed limit (optional)
Args:
retrieval_result: Result from retrieval operation
evidence_tokens: Optional token count for evidence
Returns:
Tuple of (is_insufficient, reason)
"""
reasons = []
if retrieval_result.hit_count < self._config.min_hits:
reasons.append(
f"hit_count({retrieval_result.hit_count}) < min_hits({self._config.min_hits})"
)
if retrieval_result.max_score < self._config.score_threshold:
reasons.append(
f"max_score({retrieval_result.max_score:.3f}) < threshold({self._config.score_threshold})"
)
if evidence_tokens is not None and evidence_tokens > self._config.max_evidence_tokens:
reasons.append(
f"evidence_tokens({evidence_tokens}) > max({self._config.max_evidence_tokens})"
)
is_insufficient = len(reasons) > 0
reason = "; ".join(reasons) if reasons else "sufficient"
return is_insufficient, reason
def calculate_confidence(
self,
retrieval_result: RetrievalResult,
evidence_tokens: int | None = None,
additional_factors: dict[str, float] | None = None,
) -> ConfidenceResult:
"""
[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Calculate confidence and transfer suggestion.
MVP Strategy:
1. Base confidence from max retrieval score
2. Adjust for hit count (more hits = higher confidence)
3. Penalize if retrieval is insufficient
4. Determine shouldTransfer based on T_low threshold
Args:
retrieval_result: Result from retrieval operation
evidence_tokens: Optional token count for evidence
additional_factors: Optional additional confidence factors
Returns:
ConfidenceResult with confidence and transfer suggestion
"""
is_insufficient, insufficiency_reason = self.is_retrieval_insufficient(
retrieval_result, evidence_tokens
)
base_confidence = retrieval_result.max_score
hit_count_factor = min(1.0, retrieval_result.hit_count / 5.0)
confidence = base_confidence * 0.7 + hit_count_factor * 0.3
if is_insufficient:
confidence -= self._config.insufficient_penalty
logger.info(
f"[AC-AISVC-17] Retrieval insufficient: {insufficiency_reason}, "
f"applying penalty -{self._config.insufficient_penalty}"
)
if additional_factors:
for factor_name, factor_value in additional_factors.items():
confidence += factor_value * 0.1
confidence = max(0.0, min(1.0, confidence))
should_transfer = confidence < self._config.confidence_low_threshold
transfer_reason = None
if should_transfer:
if is_insufficient:
transfer_reason = "检索结果不足,无法提供高置信度回答"
else:
transfer_reason = "置信度低于阈值,建议转人工"
elif confidence < self._config.confidence_high_threshold and is_insufficient:
transfer_reason = "检索结果有限,回答可能不够准确"
diagnostics = {
"base_confidence": base_confidence,
"hit_count": retrieval_result.hit_count,
"max_score": retrieval_result.max_score,
"is_insufficient": is_insufficient,
"insufficiency_reason": insufficiency_reason if is_insufficient else None,
"penalty_applied": self._config.insufficient_penalty if is_insufficient else 0.0,
"threshold_low": self._config.confidence_low_threshold,
"threshold_high": self._config.confidence_high_threshold,
}
logger.info(
f"[AC-AISVC-17, AC-AISVC-18] Confidence calculated: "
f"{confidence:.3f}, should_transfer={should_transfer}, "
f"insufficient={is_insufficient}"
)
return ConfidenceResult(
confidence=round(confidence, 3),
should_transfer=should_transfer,
transfer_reason=transfer_reason,
is_retrieval_insufficient=is_insufficient,
diagnostics=diagnostics,
)
def calculate_confidence_no_retrieval(self) -> ConfidenceResult:
"""
[AC-AISVC-17] Calculate confidence when no retrieval was performed.
Returns a low confidence result suggesting transfer.
"""
return ConfidenceResult(
confidence=0.3,
should_transfer=True,
transfer_reason="未进行知识库检索,建议转人工",
is_retrieval_insufficient=True,
diagnostics={
"base_confidence": 0.0,
"hit_count": 0,
"max_score": 0.0,
"is_insufficient": True,
"insufficiency_reason": "no_retrieval",
"penalty_applied": 0.0,
"threshold_low": self._config.confidence_low_threshold,
"threshold_high": self._config.confidence_high_threshold,
},
)
_confidence_calculator: ConfidenceCalculator | None = None
def get_confidence_calculator() -> ConfidenceCalculator:
"""Get or create confidence calculator instance."""
global _confidence_calculator
if _confidence_calculator is None:
_confidence_calculator = ConfidenceCalculator()
return _confidence_calculator