247 lines
7.9 KiB
Python
247 lines
7.9 KiB
Python
"""
|
||
LLM judge for intent arbitration.
|
||
[AC-AISVC-118, AC-AISVC-119] LLM-based intent arbitration.
|
||
"""
|
||
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import time
|
||
from typing import TYPE_CHECKING, Any
|
||
|
||
from app.services.intent.models import (
|
||
FusionConfig,
|
||
LlmJudgeInput,
|
||
LlmJudgeResult,
|
||
RuleMatchResult,
|
||
SemanticMatchResult,
|
||
)
|
||
|
||
if TYPE_CHECKING:
|
||
from app.services.llm.base import LLMClient
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class LlmJudge:
|
||
"""
|
||
[AC-AISVC-118] LLM-based intent arbitrator.
|
||
|
||
Triggered when:
|
||
- Rule vs Semantic conflict
|
||
- Gray zone (low confidence)
|
||
- Multiple intent candidates with similar scores
|
||
"""
|
||
|
||
JUDGE_PROMPT = """你是一个意图识别仲裁器。根据用户消息和候选意图,判断最匹配的意图。
|
||
|
||
用户消息:{message}
|
||
|
||
候选意图:
|
||
{candidates}
|
||
|
||
请返回 JSON 格式(不要包含```json标记):
|
||
{{
|
||
"intent_id": "最匹配的意图ID",
|
||
"intent_name": "意图名称",
|
||
"confidence": 0.0-1.0之间的置信度,
|
||
"reasoning": "判断理由"
|
||
}}"""
|
||
|
||
def __init__(
|
||
self,
|
||
llm_client: "LLMClient",
|
||
config: FusionConfig,
|
||
):
|
||
"""
|
||
Initialize LLM judge.
|
||
|
||
Args:
|
||
llm_client: LLM client for generating responses
|
||
config: Fusion configuration
|
||
"""
|
||
self._llm_client = llm_client
|
||
self._config = config
|
||
|
||
def should_trigger(
|
||
self,
|
||
rule_result: RuleMatchResult,
|
||
semantic_result: SemanticMatchResult,
|
||
config: FusionConfig | None = None,
|
||
) -> tuple[bool, str]:
|
||
"""
|
||
[AC-AISVC-118] Check if LLM judge should be triggered.
|
||
|
||
Trigger conditions:
|
||
1. Conflict: Rule and Semantic match different intents with close scores
|
||
2. Gray zone: Max confidence in gray zone range
|
||
3. Multi-intent: Multiple candidates with similar scores
|
||
|
||
Args:
|
||
rule_result: Rule matching result
|
||
semantic_result: Semantic matching result
|
||
config: Optional config override
|
||
|
||
Returns:
|
||
Tuple of (should_trigger, trigger_reason)
|
||
"""
|
||
effective_config = config or self._config
|
||
|
||
if not effective_config.llm_judge_enabled:
|
||
return False, "disabled"
|
||
|
||
rule_score = rule_result.score
|
||
semantic_score = semantic_result.top_score
|
||
|
||
if rule_score > 0 and semantic_score > 0:
|
||
if semantic_result.candidates:
|
||
top_semantic_rule_id = semantic_result.candidates[0].rule.id
|
||
if rule_result.rule_id != top_semantic_rule_id:
|
||
if abs(rule_score - semantic_score) < effective_config.conflict_threshold:
|
||
logger.info(
|
||
f"[AC-AISVC-118] LLM judge triggered: rule_semantic_conflict, "
|
||
f"rule_id={rule_result.rule_id}, semantic_id={top_semantic_rule_id}, "
|
||
f"rule_score={rule_score}, semantic_score={semantic_score}"
|
||
)
|
||
return True, "rule_semantic_conflict"
|
||
|
||
max_score = max(rule_score, semantic_score)
|
||
if effective_config.min_trigger_threshold < max_score < effective_config.gray_zone_threshold:
|
||
logger.info(
|
||
f"[AC-AISVC-118] LLM judge triggered: gray_zone, "
|
||
f"max_score={max_score}"
|
||
)
|
||
return True, "gray_zone"
|
||
|
||
if len(semantic_result.candidates) >= 2:
|
||
top1_score = semantic_result.candidates[0].score
|
||
top2_score = semantic_result.candidates[1].score
|
||
if abs(top1_score - top2_score) < effective_config.multi_intent_threshold:
|
||
logger.info(
|
||
f"[AC-AISVC-118] LLM judge triggered: multi_intent, "
|
||
f"top1_score={top1_score}, top2_score={top2_score}"
|
||
)
|
||
return True, "multi_intent"
|
||
|
||
return False, ""
|
||
|
||
async def judge(
|
||
self,
|
||
input_data: LlmJudgeInput,
|
||
tenant_id: str,
|
||
) -> LlmJudgeResult:
|
||
"""
|
||
[AC-AISVC-119] Perform LLM arbitration.
|
||
|
||
Args:
|
||
input_data: Judge input with message and candidates
|
||
tenant_id: Tenant ID for isolation
|
||
|
||
Returns:
|
||
LlmJudgeResult with arbitration decision
|
||
"""
|
||
start_time = time.time()
|
||
|
||
candidates_text = "\n".join([
|
||
f"- ID: {c['id']}, 名称: {c['name']}, 描述: {c.get('description', 'N/A')}"
|
||
for c in input_data.candidates
|
||
])
|
||
|
||
prompt = self.JUDGE_PROMPT.format(
|
||
message=input_data.message,
|
||
candidates=candidates_text,
|
||
)
|
||
|
||
try:
|
||
from app.services.llm.base import LLMConfig
|
||
|
||
response = await asyncio.wait_for(
|
||
self._llm_client.generate(
|
||
messages=[{"role": "user", "content": prompt}],
|
||
config=LLMConfig(
|
||
max_tokens=200,
|
||
temperature=0,
|
||
),
|
||
),
|
||
timeout=self._config.llm_judge_timeout_ms / 1000,
|
||
)
|
||
|
||
result = self._parse_response(response.content or "")
|
||
duration_ms = int((time.time() - start_time) * 1000)
|
||
|
||
tokens_used = 0
|
||
if response.usage:
|
||
tokens_used = response.usage.get("total_tokens", 0)
|
||
|
||
logger.info(
|
||
f"[AC-AISVC-119] LLM judge completed for tenant={tenant_id}, "
|
||
f"intent_id={result.get('intent_id')}, confidence={result.get('confidence', 0):.3f}, "
|
||
f"duration={duration_ms}ms, tokens={tokens_used}"
|
||
)
|
||
|
||
return LlmJudgeResult(
|
||
intent_id=result.get("intent_id"),
|
||
intent_name=result.get("intent_name"),
|
||
score=float(result.get("confidence", 0.5)),
|
||
reasoning=result.get("reasoning"),
|
||
duration_ms=duration_ms,
|
||
tokens_used=tokens_used,
|
||
triggered=True,
|
||
)
|
||
|
||
except asyncio.TimeoutError:
|
||
duration_ms = int((time.time() - start_time) * 1000)
|
||
logger.warning(
|
||
f"[AC-AISVC-119] LLM judge timeout for tenant={tenant_id}, "
|
||
f"timeout={self._config.llm_judge_timeout_ms}ms"
|
||
)
|
||
return LlmJudgeResult(
|
||
intent_id=None,
|
||
intent_name=None,
|
||
score=0.0,
|
||
reasoning="LLM timeout",
|
||
duration_ms=duration_ms,
|
||
tokens_used=0,
|
||
triggered=True,
|
||
)
|
||
except Exception as e:
|
||
duration_ms = int((time.time() - start_time) * 1000)
|
||
logger.error(
|
||
f"[AC-AISVC-119] LLM judge error for tenant={tenant_id}: {e}"
|
||
)
|
||
return LlmJudgeResult(
|
||
intent_id=None,
|
||
intent_name=None,
|
||
score=0.0,
|
||
reasoning=f"LLM error: {str(e)}",
|
||
duration_ms=duration_ms,
|
||
tokens_used=0,
|
||
triggered=True,
|
||
)
|
||
|
||
def _parse_response(self, content: str) -> dict[str, Any]:
|
||
"""
|
||
Parse LLM response to extract JSON result.
|
||
|
||
Args:
|
||
content: LLM response content
|
||
|
||
Returns:
|
||
Parsed dictionary with intent_id, intent_name, confidence, reasoning
|
||
"""
|
||
try:
|
||
cleaned = content.strip()
|
||
if cleaned.startswith("```json"):
|
||
cleaned = cleaned[7:]
|
||
if cleaned.startswith("```"):
|
||
cleaned = cleaned[3:]
|
||
if cleaned.endswith("```"):
|
||
cleaned = cleaned[:-3]
|
||
cleaned = cleaned.strip()
|
||
|
||
result: dict[str, Any] = json.loads(cleaned)
|
||
return result
|
||
except json.JSONDecodeError as e:
|
||
logger.warning(f"[AC-AISVC-119] Failed to parse LLM response: {e}")
|
||
return {}
|