225 lines
8.0 KiB
Python
225 lines
8.0 KiB
Python
|
|
"""
|
||
|
|
Confidence calculation for AI Service.
|
||
|
|
[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Confidence scoring and transfer suggestion logic.
|
||
|
|
|
||
|
|
Design reference: design.md Section 4.3 - 检索不中兜底与置信度策略
|
||
|
|
- Retrieval insufficiency detection
|
||
|
|
- Confidence calculation based on retrieval scores
|
||
|
|
- shouldTransfer logic with threshold T_low
|
||
|
|
"""
|
||
|
|
|
||
|
|
import logging
|
||
|
|
from dataclasses import dataclass, field
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
from app.core.config import get_settings
|
||
|
|
from app.services.retrieval.base import RetrievalResult
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class ConfidenceConfig:
|
||
|
|
"""
|
||
|
|
Configuration for confidence calculation.
|
||
|
|
[AC-AISVC-17, AC-AISVC-18] Configurable thresholds.
|
||
|
|
"""
|
||
|
|
score_threshold: float = 0.7
|
||
|
|
min_hits: int = 1
|
||
|
|
confidence_low_threshold: float = 0.5
|
||
|
|
confidence_high_threshold: float = 0.8
|
||
|
|
insufficient_penalty: float = 0.3
|
||
|
|
max_evidence_tokens: int = 2000
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class ConfidenceResult:
|
||
|
|
"""
|
||
|
|
Result of confidence calculation.
|
||
|
|
[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Contains confidence and transfer suggestion.
|
||
|
|
"""
|
||
|
|
confidence: float
|
||
|
|
should_transfer: bool
|
||
|
|
transfer_reason: str | None = None
|
||
|
|
is_retrieval_insufficient: bool = False
|
||
|
|
diagnostics: dict[str, Any] = field(default_factory=dict)
|
||
|
|
|
||
|
|
|
||
|
|
class ConfidenceCalculator:
|
||
|
|
"""
|
||
|
|
[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Calculator for response confidence.
|
||
|
|
|
||
|
|
Design reference: design.md Section 4.3
|
||
|
|
- MVP: confidence based on RAG retrieval scores
|
||
|
|
- Insufficient retrieval triggers confidence downgrade
|
||
|
|
- shouldTransfer when confidence < T_low
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self, config: ConfidenceConfig | None = None):
|
||
|
|
settings = get_settings()
|
||
|
|
self._config = config or ConfidenceConfig(
|
||
|
|
score_threshold=getattr(settings, "rag_score_threshold", 0.7),
|
||
|
|
min_hits=getattr(settings, "rag_min_hits", 1),
|
||
|
|
confidence_low_threshold=getattr(settings, "confidence_low_threshold", 0.5),
|
||
|
|
confidence_high_threshold=getattr(settings, "confidence_high_threshold", 0.8),
|
||
|
|
insufficient_penalty=getattr(settings, "confidence_insufficient_penalty", 0.3),
|
||
|
|
max_evidence_tokens=getattr(settings, "rag_max_evidence_tokens", 2000),
|
||
|
|
)
|
||
|
|
|
||
|
|
def is_retrieval_insufficient(
|
||
|
|
self,
|
||
|
|
retrieval_result: RetrievalResult,
|
||
|
|
evidence_tokens: int | None = None,
|
||
|
|
) -> tuple[bool, str]:
|
||
|
|
"""
|
||
|
|
[AC-AISVC-17] Determine if retrieval results are insufficient.
|
||
|
|
|
||
|
|
Conditions for insufficiency:
|
||
|
|
1. hits.size < min_hits
|
||
|
|
2. max(score) < score_threshold
|
||
|
|
3. evidence tokens exceed limit (optional)
|
||
|
|
|
||
|
|
Args:
|
||
|
|
retrieval_result: Result from retrieval operation
|
||
|
|
evidence_tokens: Optional token count for evidence
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Tuple of (is_insufficient, reason)
|
||
|
|
"""
|
||
|
|
reasons = []
|
||
|
|
|
||
|
|
if retrieval_result.hit_count < self._config.min_hits:
|
||
|
|
reasons.append(
|
||
|
|
f"hit_count({retrieval_result.hit_count}) < min_hits({self._config.min_hits})"
|
||
|
|
)
|
||
|
|
|
||
|
|
if retrieval_result.max_score < self._config.score_threshold:
|
||
|
|
reasons.append(
|
||
|
|
f"max_score({retrieval_result.max_score:.3f}) < threshold({self._config.score_threshold})"
|
||
|
|
)
|
||
|
|
|
||
|
|
if evidence_tokens is not None and evidence_tokens > self._config.max_evidence_tokens:
|
||
|
|
reasons.append(
|
||
|
|
f"evidence_tokens({evidence_tokens}) > max({self._config.max_evidence_tokens})"
|
||
|
|
)
|
||
|
|
|
||
|
|
is_insufficient = len(reasons) > 0
|
||
|
|
reason = "; ".join(reasons) if reasons else "sufficient"
|
||
|
|
|
||
|
|
return is_insufficient, reason
|
||
|
|
|
||
|
|
def calculate_confidence(
|
||
|
|
self,
|
||
|
|
retrieval_result: RetrievalResult,
|
||
|
|
evidence_tokens: int | None = None,
|
||
|
|
additional_factors: dict[str, float] | None = None,
|
||
|
|
) -> ConfidenceResult:
|
||
|
|
"""
|
||
|
|
[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Calculate confidence and transfer suggestion.
|
||
|
|
|
||
|
|
MVP Strategy:
|
||
|
|
1. Base confidence from max retrieval score
|
||
|
|
2. Adjust for hit count (more hits = higher confidence)
|
||
|
|
3. Penalize if retrieval is insufficient
|
||
|
|
4. Determine shouldTransfer based on T_low threshold
|
||
|
|
|
||
|
|
Args:
|
||
|
|
retrieval_result: Result from retrieval operation
|
||
|
|
evidence_tokens: Optional token count for evidence
|
||
|
|
additional_factors: Optional additional confidence factors
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
ConfidenceResult with confidence and transfer suggestion
|
||
|
|
"""
|
||
|
|
is_insufficient, insufficiency_reason = self.is_retrieval_insufficient(
|
||
|
|
retrieval_result, evidence_tokens
|
||
|
|
)
|
||
|
|
|
||
|
|
base_confidence = retrieval_result.max_score
|
||
|
|
|
||
|
|
hit_count_factor = min(1.0, retrieval_result.hit_count / 5.0)
|
||
|
|
confidence = base_confidence * 0.7 + hit_count_factor * 0.3
|
||
|
|
|
||
|
|
if is_insufficient:
|
||
|
|
confidence -= self._config.insufficient_penalty
|
||
|
|
logger.info(
|
||
|
|
f"[AC-AISVC-17] Retrieval insufficient: {insufficiency_reason}, "
|
||
|
|
f"applying penalty -{self._config.insufficient_penalty}"
|
||
|
|
)
|
||
|
|
|
||
|
|
if additional_factors:
|
||
|
|
for factor_name, factor_value in additional_factors.items():
|
||
|
|
confidence += factor_value * 0.1
|
||
|
|
|
||
|
|
confidence = max(0.0, min(1.0, confidence))
|
||
|
|
|
||
|
|
should_transfer = confidence < self._config.confidence_low_threshold
|
||
|
|
transfer_reason = None
|
||
|
|
|
||
|
|
if should_transfer:
|
||
|
|
if is_insufficient:
|
||
|
|
transfer_reason = "检索结果不足,无法提供高置信度回答"
|
||
|
|
else:
|
||
|
|
transfer_reason = "置信度低于阈值,建议转人工"
|
||
|
|
elif confidence < self._config.confidence_high_threshold and is_insufficient:
|
||
|
|
transfer_reason = "检索结果有限,回答可能不够准确"
|
||
|
|
|
||
|
|
diagnostics = {
|
||
|
|
"base_confidence": base_confidence,
|
||
|
|
"hit_count": retrieval_result.hit_count,
|
||
|
|
"max_score": retrieval_result.max_score,
|
||
|
|
"is_insufficient": is_insufficient,
|
||
|
|
"insufficiency_reason": insufficiency_reason if is_insufficient else None,
|
||
|
|
"penalty_applied": self._config.insufficient_penalty if is_insufficient else 0.0,
|
||
|
|
"threshold_low": self._config.confidence_low_threshold,
|
||
|
|
"threshold_high": self._config.confidence_high_threshold,
|
||
|
|
}
|
||
|
|
|
||
|
|
logger.info(
|
||
|
|
f"[AC-AISVC-17, AC-AISVC-18] Confidence calculated: "
|
||
|
|
f"{confidence:.3f}, should_transfer={should_transfer}, "
|
||
|
|
f"insufficient={is_insufficient}"
|
||
|
|
)
|
||
|
|
|
||
|
|
return ConfidenceResult(
|
||
|
|
confidence=round(confidence, 3),
|
||
|
|
should_transfer=should_transfer,
|
||
|
|
transfer_reason=transfer_reason,
|
||
|
|
is_retrieval_insufficient=is_insufficient,
|
||
|
|
diagnostics=diagnostics,
|
||
|
|
)
|
||
|
|
|
||
|
|
def calculate_confidence_no_retrieval(self) -> ConfidenceResult:
|
||
|
|
"""
|
||
|
|
[AC-AISVC-17] Calculate confidence when no retrieval was performed.
|
||
|
|
|
||
|
|
Returns a low confidence result suggesting transfer.
|
||
|
|
"""
|
||
|
|
return ConfidenceResult(
|
||
|
|
confidence=0.3,
|
||
|
|
should_transfer=True,
|
||
|
|
transfer_reason="未进行知识库检索,建议转人工",
|
||
|
|
is_retrieval_insufficient=True,
|
||
|
|
diagnostics={
|
||
|
|
"base_confidence": 0.0,
|
||
|
|
"hit_count": 0,
|
||
|
|
"max_score": 0.0,
|
||
|
|
"is_insufficient": True,
|
||
|
|
"insufficiency_reason": "no_retrieval",
|
||
|
|
"penalty_applied": 0.0,
|
||
|
|
"threshold_low": self._config.confidence_low_threshold,
|
||
|
|
"threshold_high": self._config.confidence_high_threshold,
|
||
|
|
},
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
_confidence_calculator: ConfidenceCalculator | None = None
|
||
|
|
|
||
|
|
|
||
|
|
def get_confidence_calculator() -> ConfidenceCalculator:
|
||
|
|
"""Get or create confidence calculator instance."""
|
||
|
|
global _confidence_calculator
|
||
|
|
if _confidence_calculator is None:
|
||
|
|
_confidence_calculator = ConfidenceCalculator()
|
||
|
|
return _confidence_calculator
|