188 lines
6.2 KiB
Python
188 lines
6.2 KiB
Python
|
|
"""
|
||
|
|
Retrieval and Embedding Strategy Configuration.
|
||
|
|
[AC-AISVC-RES-01~15] Configuration for strategy routing and mode routing.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from dataclasses import dataclass, field
|
||
|
|
from enum import Enum
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
|
||
|
|
class StrategyType(str, Enum):
|
||
|
|
"""Strategy type for retrieval pipeline selection."""
|
||
|
|
DEFAULT = "default"
|
||
|
|
ENHANCED = "enhanced"
|
||
|
|
|
||
|
|
|
||
|
|
class RagRuntimeMode(str, Enum):
|
||
|
|
"""RAG runtime mode for execution path selection."""
|
||
|
|
DIRECT = "direct"
|
||
|
|
REACT = "react"
|
||
|
|
AUTO = "auto"
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class RoutingConfig:
|
||
|
|
"""
|
||
|
|
[AC-AISVC-RES-01~15] Routing configuration for strategy and mode selection.
|
||
|
|
|
||
|
|
Configuration hierarchy:
|
||
|
|
1. Strategy selection (default vs enhanced)
|
||
|
|
2. Mode selection (direct/react/auto)
|
||
|
|
3. Auto routing rules (complexity/confidence thresholds)
|
||
|
|
4. Fallback behavior
|
||
|
|
"""
|
||
|
|
|
||
|
|
enabled: bool = True
|
||
|
|
strategy: StrategyType = StrategyType.DEFAULT
|
||
|
|
|
||
|
|
grayscale_percentage: float = 0.0
|
||
|
|
grayscale_allowlist: list[str] = field(default_factory=list)
|
||
|
|
|
||
|
|
rag_runtime_mode: RagRuntimeMode = RagRuntimeMode.AUTO
|
||
|
|
|
||
|
|
react_trigger_confidence_threshold: float = 0.6
|
||
|
|
react_trigger_complexity_score: float = 0.5
|
||
|
|
react_max_steps: int = 5
|
||
|
|
|
||
|
|
direct_fallback_on_low_confidence: bool = True
|
||
|
|
direct_fallback_confidence_threshold: float = 0.4
|
||
|
|
|
||
|
|
performance_budget_ms: int = 5000
|
||
|
|
performance_degradation_threshold: float = 0.2
|
||
|
|
|
||
|
|
def should_use_enhanced_strategy(self, tenant_id: str | None = None) -> bool:
|
||
|
|
"""
|
||
|
|
[AC-AISVC-RES-02, AC-AISVC-RES-03] Determine if enhanced strategy should be used.
|
||
|
|
|
||
|
|
Priority:
|
||
|
|
1. If strategy is explicitly set to ENHANCED, use enhanced
|
||
|
|
2. If strategy is DEFAULT, use default
|
||
|
|
3. If grayscale is enabled, check percentage/allowlist
|
||
|
|
"""
|
||
|
|
if self.strategy == StrategyType.ENHANCED:
|
||
|
|
return True
|
||
|
|
|
||
|
|
if self.strategy == StrategyType.DEFAULT:
|
||
|
|
return False
|
||
|
|
|
||
|
|
if self.grayscale_percentage > 0:
|
||
|
|
import hashlib
|
||
|
|
if tenant_id:
|
||
|
|
hash_val = int(hashlib.md5(tenant_id.encode()).hexdigest()[:8], 16)
|
||
|
|
return (hash_val % 100) < (self.grayscale_percentage * 100)
|
||
|
|
return False
|
||
|
|
|
||
|
|
if self.grayscale_allowlist and tenant_id:
|
||
|
|
return tenant_id in self.grayscale_allowlist
|
||
|
|
|
||
|
|
return False
|
||
|
|
|
||
|
|
def get_rag_runtime_mode(self) -> RagRuntimeMode:
|
||
|
|
"""Get the configured RAG runtime mode."""
|
||
|
|
return self.rag_runtime_mode
|
||
|
|
|
||
|
|
def should_fallback_direct_to_react(self, confidence: float) -> bool:
|
||
|
|
"""
|
||
|
|
[AC-AISVC-RES-14] Determine if direct mode should fallback to react.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
confidence: Retrieval confidence score (0.0 ~ 1.0)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if fallback should be triggered
|
||
|
|
"""
|
||
|
|
if not self.direct_fallback_on_low_confidence:
|
||
|
|
return False
|
||
|
|
|
||
|
|
return confidence < self.direct_fallback_confidence_threshold
|
||
|
|
|
||
|
|
def should_trigger_react_in_auto_mode(
|
||
|
|
self,
|
||
|
|
confidence: float,
|
||
|
|
complexity_score: float,
|
||
|
|
) -> bool:
|
||
|
|
"""
|
||
|
|
[AC-AISVC-RES-11, AC-AISVC-RES-12, AC-AISVC-RES-13]
|
||
|
|
Determine if react mode should be triggered in auto mode.
|
||
|
|
|
||
|
|
Direct conditions (优先):
|
||
|
|
- Short query, clear intent
|
||
|
|
- High metadata confidence
|
||
|
|
- No cross-domain/multi-condition
|
||
|
|
|
||
|
|
React conditions:
|
||
|
|
- Multi-condition/multi-constraint
|
||
|
|
- Low metadata confidence
|
||
|
|
- Need for secondary confirmation or multi-step reasoning
|
||
|
|
|
||
|
|
Args:
|
||
|
|
confidence: Metadata inference confidence (0.0 ~ 1.0)
|
||
|
|
complexity_score: Query complexity score (0.0 ~ 1.0)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if react mode should be used
|
||
|
|
"""
|
||
|
|
if confidence < self.react_trigger_confidence_threshold:
|
||
|
|
return True
|
||
|
|
|
||
|
|
if complexity_score > self.react_trigger_complexity_score:
|
||
|
|
return True
|
||
|
|
|
||
|
|
return False
|
||
|
|
|
||
|
|
def validate(self) -> tuple[bool, list[str]]:
|
||
|
|
"""
|
||
|
|
[AC-AISVC-RES-06] Validate configuration consistency.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
(is_valid, list of error messages)
|
||
|
|
"""
|
||
|
|
errors = []
|
||
|
|
|
||
|
|
if self.grayscale_percentage < 0 or self.grayscale_percentage > 1.0:
|
||
|
|
errors.append("grayscale_percentage must be between 0.0 and 1.0")
|
||
|
|
|
||
|
|
if self.react_trigger_confidence_threshold < 0 or self.react_trigger_confidence_threshold > 1.0:
|
||
|
|
errors.append("react_trigger_confidence_threshold must be between 0.0 and 1.0")
|
||
|
|
|
||
|
|
if self.react_trigger_complexity_score < 0 or self.react_trigger_complexity_score > 1.0:
|
||
|
|
errors.append("react_trigger_complexity_score must be between 0.0 and 1.0")
|
||
|
|
|
||
|
|
if self.react_max_steps < 3 or self.react_max_steps > 10:
|
||
|
|
errors.append("react_max_steps must be between 3 and 10")
|
||
|
|
|
||
|
|
if self.direct_fallback_confidence_threshold < 0 or self.direct_fallback_confidence_threshold > 1.0:
|
||
|
|
errors.append("direct_fallback_confidence_threshold must be between 0.0 and 1.0")
|
||
|
|
|
||
|
|
if self.performance_budget_ms < 1000:
|
||
|
|
errors.append("performance_budget_ms must be at least 1000")
|
||
|
|
|
||
|
|
if self.performance_degradation_threshold < 0 or self.performance_degradation_threshold > 1.0:
|
||
|
|
errors.append("performance_degradation_threshold must be between 0.0 and 1.0")
|
||
|
|
|
||
|
|
return (len(errors) == 0, errors)
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class StrategyContext:
|
||
|
|
"""Context for strategy routing decision."""
|
||
|
|
tenant_id: str
|
||
|
|
query: str
|
||
|
|
metadata_filter: dict[str, Any] | None = None
|
||
|
|
metadata_confidence: float = 1.0
|
||
|
|
complexity_score: float = 0.0
|
||
|
|
kb_ids: list[str] | None = None
|
||
|
|
top_k: int = 5
|
||
|
|
additional_context: dict[str, Any] = field(default_factory=dict)
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class StrategyResult:
|
||
|
|
"""Result from strategy routing."""
|
||
|
|
strategy: StrategyType
|
||
|
|
mode: RagRuntimeMode
|
||
|
|
should_fallback: bool = False
|
||
|
|
fallback_reason: str | None = None
|
||
|
|
diagnostics: dict[str, Any] = field(default_factory=dict)
|