feat(AC-AISVC-93): 完整流程测试12步执行时间线与步骤详情

改进内容:
- 每个步骤添加详细的input_data和output_data
- InputScanner: 显示用户输入文本
- FlowEngine: 显示会话ID和流程名称
- IntentRouter: 显示查询和匹配结果
- QueryRewriter: 显示查询和重写状态
- MultiKBRetrieval: 显示查询、top_k、命中数、最高分、top_hits详情
- PromptBuilder: 显示模板ID、行为规则、prompt预览
- LLMGenerate: 显示模型名称(deepseek-chat)、回复长度、回复预览
- OutputFilter: 显示文本长度、是否过滤、触发词
- Confidence: 显示回复长度、命中数、置信度、是否转人工
- Memory: 显示会话ID、保存状态
- Response: 显示置信度、是否转人工、回复预览

修复问题:
- OrchestratorService没有返回execution_steps
- 前端字段名与后端不一致(camelCase vs snake_case)
- RetrievalResult.evidence -> RetrievalResult.hits
- LLM模型名称显示unknown -> 显示实际模型名称
This commit is contained in:
MerCry 2026-02-28 14:01:15 +08:00
parent 6b21ba8351
commit aa02ab79d2
3 changed files with 105 additions and 13 deletions

View File

@ -23,18 +23,18 @@ export interface FlowExecutionStep {
} }
export interface FlowExecutionResponse { export interface FlowExecutionResponse {
testId: string test_id: string
sessionId: string session_id: string
status: string status: string
steps: FlowExecutionStep[] steps: FlowExecutionStep[]
finalResponse: { final_response: {
reply: string reply: string
confidence: number | null confidence: number | null
should_transfer: boolean should_transfer: boolean
sources?: any[] sources?: any[]
} | null } | null
totalDurationMs: number total_duration_ms: number
createdAt: string created_at: string
} }
export interface FlowTestRecord { export interface FlowTestRecord {

View File

@ -152,7 +152,7 @@
<el-tag :type="getStatusType(flowTestResult.status)" size="small"> <el-tag :type="getStatusType(flowTestResult.status)" size="small">
{{ flowTestResult.status }} {{ flowTestResult.status }}
</el-tag> </el-tag>
<span class="duration">{{ flowTestResult.totalDurationMs }}ms</span> <span class="duration">{{ flowTestResult.total_duration_ms }}ms</span>
</div> </div>
</div> </div>
</template> </template>
@ -197,14 +197,14 @@
</el-timeline-item> </el-timeline-item>
</el-timeline> </el-timeline>
<el-divider content-position="left" v-if="flowTestResult.finalResponse">最终响应</el-divider> <el-divider content-position="left" v-if="flowTestResult.final_response">最终响应</el-divider>
<div v-if="flowTestResult.finalResponse" class="final-response"> <div v-if="flowTestResult.final_response" class="final-response">
<div class="response-content">{{ flowTestResult.finalResponse.reply }}</div> <div class="response-content">{{ flowTestResult.final_response.reply }}</div>
<div class="response-meta"> <div class="response-meta">
<span v-if="flowTestResult.finalResponse.confidence"> <span v-if="flowTestResult.final_response.confidence">
置信度: {{ (flowTestResult.finalResponse.confidence * 100).toFixed(1) }}% 置信度: {{ (flowTestResult.final_response.confidence * 100).toFixed(1) }}%
</span> </span>
<el-tag v-if="flowTestResult.finalResponse.should_transfer" type="warning" size="small"> <el-tag v-if="flowTestResult.final_response.should_transfer" type="warning" size="small">
需转人工 需转人工
</el-tag> </el-tag>
</div> </div>

View File

@ -116,6 +116,7 @@ class GenerationContext:
behavior_rules: list[str] = field(default_factory=list) behavior_rules: list[str] = field(default_factory=list)
diagnostics: dict[str, Any] = field(default_factory=dict) diagnostics: dict[str, Any] = field(default_factory=dict)
execution_steps: list[dict[str, Any]] = field(default_factory=list)
class OrchestratorService: class OrchestratorService:
@ -204,6 +205,28 @@ class OrchestratorService:
self._behavior_rule_service = behavior_rule_service self._behavior_rule_service = behavior_rule_service
self._output_filter = output_filter self._output_filter = output_filter
def _record_step(
self,
ctx: GenerationContext,
step_no: int,
name: str,
status: str = "success",
duration_ms: int = 0,
input_data: Any = None,
output_data: Any = None,
error: str | None = None,
) -> None:
"""Record execution step for flow test visualization."""
ctx.execution_steps.append({
"step": step_no,
"name": name,
"status": status,
"duration_ms": duration_ms,
"input": input_data,
"output": output_data,
"error": error,
})
async def generate( async def generate(
self, self,
tenant_id: str, tenant_id: str,
@ -242,42 +265,110 @@ class OrchestratorService:
) )
try: try:
import time
# Step 1: InputScanner - Scan user input for forbidden words # Step 1: InputScanner - Scan user input for forbidden words
step_start = time.time()
await self._scan_input(ctx) await self._scan_input(ctx)
self._record_step(ctx, 1, "InputScanner", "success", int((time.time() - step_start) * 1000),
input_data={"text": ctx.current_message[:200]},
output_data=ctx.diagnostics.get("input_scan"))
# Load local history and merge context (original pipeline) # Load local history and merge context (original pipeline)
await self._load_local_history(ctx) await self._load_local_history(ctx)
await self._merge_context(ctx, request.history) await self._merge_context(ctx, request.history)
# Step 2: FlowEngine - Check if session has active script flow # Step 2: FlowEngine - Check if session has active script flow
step_start = time.time()
await self._check_active_flow(ctx) await self._check_active_flow(ctx)
self._record_step(ctx, 2, "FlowEngine", "success", int((time.time() - step_start) * 1000),
input_data={"session_id": ctx.session_id},
output_data={"active_flow": bool(ctx.active_flow), "flow_name": getattr(ctx.active_flow, 'flow_name', None) if ctx.active_flow else None})
# Step 3: IntentRouter - Match intent rules and route # Step 3: IntentRouter - Match intent rules and route
step_start = time.time()
await self._match_intent(ctx) await self._match_intent(ctx)
intent_output = {"matched": bool(ctx.intent_match)}
if ctx.intent_match:
intent_output["rule_name"] = getattr(ctx.intent_match, 'rule_name', None)
intent_output["confidence"] = getattr(ctx.intent_match, 'confidence', None)
self._record_step(ctx, 3, "IntentRouter", "success", int((time.time() - step_start) * 1000),
input_data={"query": ctx.current_message[:100]},
output_data=intent_output)
# Step 4: QueryRewriter - (Optional, skipped in MVP) # Step 4: QueryRewriter - (Optional, skipped in MVP)
# ctx.query_rewritten = ctx.current_message self._record_step(ctx, 4, "QueryRewriter", "skipped", 0,
input_data={"query": ctx.current_message[:100]},
output_data={"note": "Skipped in MVP", "rewritten": None})
# Step 5-6: Multi-KB Retrieval + ResultRanker # Step 5-6: Multi-KB Retrieval + ResultRanker
step_start = time.time()
if self._config.enable_rag and self._retriever: if self._config.enable_rag and self._retriever:
await self._retrieve_evidence(ctx) await self._retrieve_evidence(ctx)
retrieval_output = {
"hit_count": len(ctx.retrieval_result.hits) if ctx.retrieval_result else 0,
"max_score": ctx.retrieval_result.max_score if ctx.retrieval_result else 0,
}
if ctx.retrieval_result and ctx.retrieval_result.hits:
retrieval_output["top_hits"] = [
{
"content": hit.text[:200] + "..." if len(hit.text) > 200 else hit.text,
"score": round(hit.score, 4),
"source": hit.source,
}
for hit in ctx.retrieval_result.hits[:5]
]
self._record_step(ctx, 5, "MultiKBRetrieval", "success", int((time.time() - step_start) * 1000),
input_data={"query": ctx.current_message[:100], "top_k": 3},
output_data=retrieval_output)
else:
self._record_step(ctx, 5, "MultiKBRetrieval", "skipped", 0,
input_data={"query": ctx.current_message[:100]},
output_data={"note": "RAG disabled or no retriever"})
# Step 7: PromptBuilder - Load template + inject behavior rules # Step 7: PromptBuilder - Load template + inject behavior rules
step_start = time.time()
await self._build_system_prompt(ctx) await self._build_system_prompt(ctx)
self._record_step(ctx, 7, "PromptBuilder", "success", int((time.time() - step_start) * 1000),
input_data={"template_id": getattr(ctx, 'template_id', None), "behavior_rules": ctx.behavior_rules[:3] if ctx.behavior_rules else []},
output_data={"prompt_length": len(ctx.system_prompt) if ctx.system_prompt else 0, "prompt_preview": ctx.system_prompt[:300] + "..." if ctx.system_prompt and len(ctx.system_prompt) > 300 else ctx.system_prompt})
# Step 8: LLM.generate - Generate response # Step 8: LLM.generate - Generate response
step_start = time.time()
await self._generate_response(ctx) await self._generate_response(ctx)
llm_model = ctx.llm_response.model if ctx.llm_response else "unknown"
self._record_step(ctx, 8, "LLMGenerate", "success", int((time.time() - step_start) * 1000),
input_data={"model": llm_model, "messages_count": len(self._build_llm_messages(ctx)) if hasattr(self, '_build_llm_messages') else 1},
output_data={"reply_length": len(ctx.llm_response.content) if ctx.llm_response else 0, "reply_preview": ctx.llm_response.content[:200] + "..." if ctx.llm_response and len(ctx.llm_response.content) > 200 else (ctx.llm_response.content if ctx.llm_response else None)})
# Step 9: OutputFilter - Filter forbidden words in output # Step 9: OutputFilter - Filter forbidden words in output
step_start = time.time()
await self._filter_output(ctx) await self._filter_output(ctx)
filter_output = {"filtered": ctx.filtered_reply != ctx.llm_response.content if ctx.llm_response else False}
if ctx.diagnostics.get("output_filter"):
filter_output["triggered_words"] = ctx.diagnostics.get("output_filter", {}).get("triggered_words", [])
self._record_step(ctx, 9, "OutputFilter", "success", int((time.time() - step_start) * 1000),
input_data={"text_length": len(ctx.llm_response.content) if ctx.llm_response else 0},
output_data=filter_output)
# Step 10: Confidence - Calculate confidence score # Step 10: Confidence - Calculate confidence score
step_start = time.time()
self._calculate_confidence(ctx) self._calculate_confidence(ctx)
self._record_step(ctx, 10, "Confidence", "success", int((time.time() - step_start) * 1000),
input_data={"reply_length": len(ctx.filtered_reply) if ctx.filtered_reply else 0, "hit_count": len(ctx.retrieval_result.hits) if ctx.retrieval_result else 0},
output_data={"confidence": ctx.confidence_result.confidence if ctx.confidence_result else 0, "should_transfer": ctx.confidence_result.should_transfer if ctx.confidence_result else True})
# Step 11: Memory - Save messages # Step 11: Memory - Save messages
step_start = time.time()
await self._save_messages(ctx) await self._save_messages(ctx)
self._record_step(ctx, 11, "Memory", "success", int((time.time() - step_start) * 1000),
input_data={"session_id": ctx.session_id},
output_data={"saved": True})
# Step 12: Return - Build and return ChatResponse # Step 12: Return - Build and return ChatResponse
self._record_step(ctx, 12, "Response", "success", 0,
input_data={"confidence": ctx.confidence_result.confidence if ctx.confidence_result else 0, "should_transfer": ctx.confidence_result.should_transfer if ctx.confidence_result else True},
output_data={"reply_length": len(ctx.filtered_reply) if ctx.filtered_reply else 0, "reply_preview": ctx.filtered_reply[:200] + "..." if ctx.filtered_reply and len(ctx.filtered_reply) > 200 else ctx.filtered_reply})
return self._build_response(ctx) return self._build_response(ctx)
except Exception as e: except Exception as e:
@ -944,6 +1035,7 @@ class OrchestratorService:
"session_id": ctx.session_id, "session_id": ctx.session_id,
"channel_type": ctx.channel_type, "channel_type": ctx.channel_type,
"diagnostics": ctx.diagnostics, "diagnostics": ctx.diagnostics,
"execution_steps": ctx.execution_steps,
} }
return ChatResponse( return ChatResponse(