ai-robot-core/ai-service/app/services/retrieval/routing_config.py

188 lines
6.2 KiB
Python

"""
Retrieval and Embedding Strategy Configuration.
[AC-AISVC-RES-01~15] Configuration for strategy routing and mode routing.
"""
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
class StrategyType(str, Enum):
"""Strategy type for retrieval pipeline selection."""
DEFAULT = "default"
ENHANCED = "enhanced"
class RagRuntimeMode(str, Enum):
"""RAG runtime mode for execution path selection."""
DIRECT = "direct"
REACT = "react"
AUTO = "auto"
@dataclass
class RoutingConfig:
"""
[AC-AISVC-RES-01~15] Routing configuration for strategy and mode selection.
Configuration hierarchy:
1. Strategy selection (default vs enhanced)
2. Mode selection (direct/react/auto)
3. Auto routing rules (complexity/confidence thresholds)
4. Fallback behavior
"""
enabled: bool = True
strategy: StrategyType = StrategyType.DEFAULT
grayscale_percentage: float = 0.0
grayscale_allowlist: list[str] = field(default_factory=list)
rag_runtime_mode: RagRuntimeMode = RagRuntimeMode.AUTO
react_trigger_confidence_threshold: float = 0.6
react_trigger_complexity_score: float = 0.5
react_max_steps: int = 5
direct_fallback_on_low_confidence: bool = True
direct_fallback_confidence_threshold: float = 0.4
performance_budget_ms: int = 5000
performance_degradation_threshold: float = 0.2
def should_use_enhanced_strategy(self, tenant_id: str | None = None) -> bool:
"""
[AC-AISVC-RES-02, AC-AISVC-RES-03] Determine if enhanced strategy should be used.
Priority:
1. If strategy is explicitly set to ENHANCED, use enhanced
2. If strategy is DEFAULT, use default
3. If grayscale is enabled, check percentage/allowlist
"""
if self.strategy == StrategyType.ENHANCED:
return True
if self.strategy == StrategyType.DEFAULT:
return False
if self.grayscale_percentage > 0:
import hashlib
if tenant_id:
hash_val = int(hashlib.md5(tenant_id.encode()).hexdigest()[:8], 16)
return (hash_val % 100) < (self.grayscale_percentage * 100)
return False
if self.grayscale_allowlist and tenant_id:
return tenant_id in self.grayscale_allowlist
return False
def get_rag_runtime_mode(self) -> RagRuntimeMode:
"""Get the configured RAG runtime mode."""
return self.rag_runtime_mode
def should_fallback_direct_to_react(self, confidence: float) -> bool:
"""
[AC-AISVC-RES-14] Determine if direct mode should fallback to react.
Args:
confidence: Retrieval confidence score (0.0 ~ 1.0)
Returns:
True if fallback should be triggered
"""
if not self.direct_fallback_on_low_confidence:
return False
return confidence < self.direct_fallback_confidence_threshold
def should_trigger_react_in_auto_mode(
self,
confidence: float,
complexity_score: float,
) -> bool:
"""
[AC-AISVC-RES-11, AC-AISVC-RES-12, AC-AISVC-RES-13]
Determine if react mode should be triggered in auto mode.
Direct conditions (优先):
- Short query, clear intent
- High metadata confidence
- No cross-domain/multi-condition
React conditions:
- Multi-condition/multi-constraint
- Low metadata confidence
- Need for secondary confirmation or multi-step reasoning
Args:
confidence: Metadata inference confidence (0.0 ~ 1.0)
complexity_score: Query complexity score (0.0 ~ 1.0)
Returns:
True if react mode should be used
"""
if confidence < self.react_trigger_confidence_threshold:
return True
if complexity_score > self.react_trigger_complexity_score:
return True
return False
def validate(self) -> tuple[bool, list[str]]:
"""
[AC-AISVC-RES-06] Validate configuration consistency.
Returns:
(is_valid, list of error messages)
"""
errors = []
if self.grayscale_percentage < 0 or self.grayscale_percentage > 1.0:
errors.append("grayscale_percentage must be between 0.0 and 1.0")
if self.react_trigger_confidence_threshold < 0 or self.react_trigger_confidence_threshold > 1.0:
errors.append("react_trigger_confidence_threshold must be between 0.0 and 1.0")
if self.react_trigger_complexity_score < 0 or self.react_trigger_complexity_score > 1.0:
errors.append("react_trigger_complexity_score must be between 0.0 and 1.0")
if self.react_max_steps < 3 or self.react_max_steps > 10:
errors.append("react_max_steps must be between 3 and 10")
if self.direct_fallback_confidence_threshold < 0 or self.direct_fallback_confidence_threshold > 1.0:
errors.append("direct_fallback_confidence_threshold must be between 0.0 and 1.0")
if self.performance_budget_ms < 1000:
errors.append("performance_budget_ms must be at least 1000")
if self.performance_degradation_threshold < 0 or self.performance_degradation_threshold > 1.0:
errors.append("performance_degradation_threshold must be between 0.0 and 1.0")
return (len(errors) == 0, errors)
@dataclass
class StrategyContext:
"""Context for strategy routing decision."""
tenant_id: str
query: str
metadata_filter: dict[str, Any] | None = None
metadata_confidence: float = 1.0
complexity_score: float = 0.0
kb_ids: list[str] | None = None
top_k: int = 5
additional_context: dict[str, Any] = field(default_factory=dict)
@dataclass
class StrategyResult:
"""Result from strategy routing."""
strategy: StrategyType
mode: RagRuntimeMode
should_fallback: bool = False
fallback_reason: str | None = None
diagnostics: dict[str, Any] = field(default_factory=dict)