From fcc8869feac4aaf4f5da614a644317d5afa75b01 Mon Sep 17 00:00:00 2001 From: MerCry Date: Tue, 3 Mar 2026 00:33:06 +0800 Subject: [PATCH] feat: add intent-driven script generation components [AC-IDS-04] - Add FlowCache for Redis-based flow instance caching - Add ScriptGenerator for flexible mode script generation - Add TemplateEngine for template variable filling - Add VariableExtractor for context variable extraction --- ai-service/app/services/cache/__init__.py | 7 + .../app/services/flow/script_generator.py | 149 +++++++++++++ .../app/services/flow/template_engine.py | 170 +++++++++++++++ .../app/services/flow/variable_extractor.py | 201 ++++++++++++++++++ 4 files changed, 527 insertions(+) create mode 100644 ai-service/app/services/cache/__init__.py create mode 100644 ai-service/app/services/flow/script_generator.py create mode 100644 ai-service/app/services/flow/template_engine.py create mode 100644 ai-service/app/services/flow/variable_extractor.py diff --git a/ai-service/app/services/cache/__init__.py b/ai-service/app/services/cache/__init__.py new file mode 100644 index 0000000..be9e2a8 --- /dev/null +++ b/ai-service/app/services/cache/__init__.py @@ -0,0 +1,7 @@ +""" +Cache services for AI Service. +""" + +from app.services.cache.flow_cache import FlowCache, get_flow_cache + +__all__ = ["FlowCache", "get_flow_cache"] diff --git a/ai-service/app/services/flow/script_generator.py b/ai-service/app/services/flow/script_generator.py new file mode 100644 index 0000000..2565901 --- /dev/null +++ b/ai-service/app/services/flow/script_generator.py @@ -0,0 +1,149 @@ +""" +Script Generator for Intent-Driven Script Flow. +[AC-IDS-04] Flexible mode script generation with LLM. +""" + +import asyncio +import logging +from typing import Any + +logger = logging.getLogger(__name__) + + +class ScriptGenerator: + """ + [AC-IDS-04] Flexible mode script generator. + Generates dynamic scripts based on intent, constraints, and conversation history. + """ + + DEFAULT_TIMEOUT = 5.0 + MAX_SCRIPT_LENGTH = 200 + + def __init__(self, llm_client: Any = None): + """ + Initialize ScriptGenerator. + + Args: + llm_client: LLM client for text generation (optional, for testing) + """ + self._llm_client = llm_client + + async def generate( + self, + intent: str, + intent_description: str | None, + constraints: list[str] | None, + context: dict[str, Any] | None, + history: list[dict[str, str]] | None, + fallback: str, + ) -> str: + """ + [AC-IDS-04] Generate flexible script based on intent and context. + + Args: + intent: Step intent (e.g., "获取用户姓名") + intent_description: Detailed intent description + constraints: Script constraints (e.g., ["必须礼貌", "语气自然"]) + context: Session context with collected inputs + history: Conversation history (last N turns) + fallback: Fallback script when generation fails + + Returns: + Generated script text or fallback + """ + try: + prompt = self._build_prompt( + intent=intent, + intent_description=intent_description, + constraints=constraints, + context=context, + history=history, + ) + + if self._llm_client: + messages = [{"role": "user", "content": prompt}] + response = await asyncio.wait_for( + self._llm_client.generate(messages), + timeout=self.DEFAULT_TIMEOUT, + ) + generated = response.content.strip() if hasattr(response, 'content') else str(response).strip() + + if len(generated) > self.MAX_SCRIPT_LENGTH * 2: + generated = generated[:self.MAX_SCRIPT_LENGTH * 2] + + logger.info( + f"[AC-IDS-04] Generated flexible script: " + f"intent={intent}, length={len(generated)}" + ) + return generated + else: + logger.warning( + f"[AC-IDS-05] No LLM client configured, using fallback" + ) + return fallback + + except asyncio.TimeoutError: + logger.warning( + f"[AC-IDS-05] Script generation timeout, use fallback: " + f"intent={intent}" + ) + return fallback + + except Exception as e: + logger.error( + f"[AC-IDS-05] Script generation failed: {e}, use fallback" + ) + return fallback + + def _build_prompt( + self, + intent: str, + intent_description: str | None, + constraints: list[str] | None, + context: dict[str, Any] | None, + history: list[dict[str, str]] | None, + ) -> str: + """ + [AC-IDS-04] Build LLM prompt for script generation. + """ + prompt_parts = [ + "你是一个客服对话系统,当前需要执行以下步骤:", + "", + f"【步骤目标】{intent}", + ] + + if intent_description: + prompt_parts.append(f"【详细说明】{intent_description}") + + if constraints: + prompt_parts.append("") + prompt_parts.append("【约束条件】") + for c in constraints: + prompt_parts.append(f"- {c}") + + if history: + prompt_parts.append("") + prompt_parts.append("【对话历史】") + for msg in history[-3:]: + role = "用户" if msg.get("role") == "user" else "客服" + content = msg.get("content", "") + prompt_parts.append(f"{role}: {content}") + + if context and context.get("inputs"): + prompt_parts.append("") + prompt_parts.append("【已收集信息】") + for inp in context["inputs"]: + if isinstance(inp, dict): + step = inp.get("step", "?") + input_text = inp.get("input", "") + prompt_parts.append(f"- 步骤{step}: {input_text}") + else: + prompt_parts.append(f"- {inp}") + + prompt_parts.extend([ + "", + f"请生成一句符合目标和约束的话术(不超过{self.MAX_SCRIPT_LENGTH}字)。", + "只返回话术内容,不要解释。", + ]) + + return "\n".join(prompt_parts) diff --git a/ai-service/app/services/flow/template_engine.py b/ai-service/app/services/flow/template_engine.py new file mode 100644 index 0000000..5e48551 --- /dev/null +++ b/ai-service/app/services/flow/template_engine.py @@ -0,0 +1,170 @@ +""" +Template Engine for Intent-Driven Script Flow. +[AC-IDS-06] Template mode script generation with variable filling. +""" + +import asyncio +import logging +import re +from typing import Any + +logger = logging.getLogger(__name__) + + +class TemplateEngine: + """ + [AC-IDS-06] Template script engine. + Fills template variables using context or LLM generation. + """ + + VARIABLE_PATTERN = re.compile(r'\{(\w+)\}') + DEFAULT_TIMEOUT = 5.0 + + def __init__(self, llm_client: Any = None): + """ + Initialize TemplateEngine. + + Args: + llm_client: LLM client for variable generation (optional) + """ + self._llm_client = llm_client + + async def fill_template( + self, + template: str, + context: dict[str, Any] | None, + history: list[dict[str, str]] | None, + ) -> str: + """ + [AC-IDS-06] Fill template variables with context or LLM-generated values. + + Args: + template: Script template with {variable} placeholders + context: Session context with collected inputs + history: Conversation history for context + + Returns: + Filled template string + """ + try: + variables = self.VARIABLE_PATTERN.findall(template) + + if not variables: + return template + + variable_values = {} + for var in variables: + value = await self._generate_variable_value( + variable_name=var, + context=context, + history=history, + ) + variable_values[var] = value + + result = template + for var, value in variable_values.items(): + result = result.replace(f"{{{var}}}", value) + + logger.info( + f"[AC-IDS-06] Filled template: " + f"variables={list(variable_values.keys())}" + ) + return result + + except Exception as e: + logger.error(f"[AC-IDS-06] Template fill failed: {e}, return original") + return template + + async def _generate_variable_value( + self, + variable_name: str, + context: dict[str, Any] | None, + history: list[dict[str, str]] | None, + ) -> str: + """ + Generate value for a single template variable. + + Args: + variable_name: Variable name to generate value for + context: Session context + history: Conversation history + + Returns: + Generated variable value + """ + if context and variable_name in context: + return str(context[variable_name]) + + if context and context.get("inputs"): + for inp in context["inputs"]: + if isinstance(inp, dict): + if inp.get("variable") == variable_name: + return str(inp.get("input", f"[{variable_name}]")) + + if self._llm_client: + prompt = self._build_variable_prompt( + variable_name=variable_name, + history=history, + ) + + try: + messages = [{"role": "user", "content": prompt}] + response = await asyncio.wait_for( + self._llm_client.generate(messages), + timeout=self.DEFAULT_TIMEOUT, + ) + value = response.content.strip() if hasattr(response, 'content') else str(response).strip() + return value + except asyncio.TimeoutError: + logger.warning( + f"[AC-IDS-06] Variable generation timeout for {variable_name}" + ) + except Exception as e: + logger.warning( + f"[AC-IDS-06] Variable generation failed for {variable_name}: {e}" + ) + + logger.warning( + f"[AC-IDS-06] Failed to generate value for {variable_name}, " + f"use placeholder" + ) + return f"[{variable_name}]" + + def _build_variable_prompt( + self, + variable_name: str, + history: list[dict[str, str]] | None, + ) -> str: + """ + Build prompt for variable value generation. + """ + prompt_parts = [ + f'根据对话历史,为变量 "{variable_name}" 生成合适的值。', + "", + ] + + if history: + prompt_parts.append("对话历史:") + for msg in history[-3:]: + role = "用户" if msg.get("role") == "user" else "客服" + content = msg.get("content", "") + prompt_parts.append(f"{role}: {content}") + prompt_parts.append("") + + prompt_parts.extend([ + "只返回变量值,不要解释。", + ]) + + return "\n".join(prompt_parts) + + def extract_variables(self, template: str) -> list[str]: + """ + Extract variable names from template. + + Args: + template: Template string with {variable} placeholders + + Returns: + List of variable names + """ + return self.VARIABLE_PATTERN.findall(template) diff --git a/ai-service/app/services/flow/variable_extractor.py b/ai-service/app/services/flow/variable_extractor.py new file mode 100644 index 0000000..25c33e2 --- /dev/null +++ b/ai-service/app/services/flow/variable_extractor.py @@ -0,0 +1,201 @@ +""" +Variable Extractor for Intent-Driven Script Flow. +从用户输入中提取期望变量,如年级、学科等。 +""" + +import asyncio +import logging +import re +from typing import Any + +logger = logging.getLogger(__name__) + + +VARIABLE_PATTERNS = { + "grade": [ + (r"初[一二三]", lambda m: m.group(0)), + (r"高[一二三]", lambda m: m.group(0)), + (r"七年级", lambda m: "初一"), + (r"八年级", lambda m: "初二"), + (r"九年级", lambda m: "初三"), + (r"高一", lambda m: "高一"), + (r"高二", lambda m: "高二"), + (r"高三", lambda m: "高三"), + ], + "subject": [ + (r"语文", lambda m: "语文"), + (r"数学", lambda m: "数学"), + (r"英语|英文", lambda m: "英语"), + (r"物理", lambda m: "物理"), + (r"化学", lambda m: "化学"), + (r"生物", lambda m: "生物"), + (r"历史", lambda m: "历史"), + (r"地理", lambda m: "地理"), + (r"政治", lambda m: "政治"), + ], +} + + +class VariableExtractor: + """ + 变量提取器 + 从用户输入中提取期望变量(如年级、学科等) + + 支持两种模式: + 1. 规则匹配: 使用预定义的正则表达式匹配 + 2. LLM 提取: 使用大语言模型智能提取 + """ + + DEFAULT_TIMEOUT = 5.0 + + def __init__(self, llm_client: Any = None): + """ + Initialize VariableExtractor. + + Args: + llm_client: LLM client for intelligent extraction (optional) + """ + self._llm_client = llm_client + + async def extract( + self, + user_input: str, + expected_variables: list[str], + history: list[dict[str, str]] | None = None, + ) -> dict[str, str]: + """ + 从用户输入中提取期望变量 + + Args: + user_input: 用户输入文本 + expected_variables: 期望提取的变量列表,如 ["grade", "subject"] + history: 对话历史(用于 LLM 提取时的上下文) + + Returns: + 提取的变量字典,如 {"grade": "初一", "subject": "语文"} + """ + if not expected_variables: + return {} + + result = {} + + for var_name in expected_variables: + value = await self._extract_variable( + variable_name=var_name, + user_input=user_input, + history=history, + ) + if value: + result[var_name] = value + + if result: + logger.info(f"[VariableExtractor] Extracted variables: {result}") + + return result + + async def _extract_variable( + self, + variable_name: str, + user_input: str, + history: list[dict[str, str]] | None, + ) -> str | None: + """ + 提取单个变量 + + 先尝试规则匹配,失败则使用 LLM 提取 + """ + value = self._extract_by_pattern(variable_name, user_input) + if value: + return value + + if self._llm_client: + value = await self._extract_by_llm(variable_name, user_input, history) + if value: + return value + + return None + + def _extract_by_pattern(self, variable_name: str, user_input: str) -> str | None: + """ + 使用正则表达式提取变量 + """ + patterns = VARIABLE_PATTERNS.get(variable_name, []) + for pattern, extractor in patterns: + match = re.search(pattern, user_input) + if match: + return extractor(match) + return None + + async def _extract_by_llm( + self, + variable_name: str, + user_input: str, + history: list[dict[str, str]] | None, + ) -> str | None: + """ + 使用 LLM 提取变量 + """ + prompt = self._build_extraction_prompt(variable_name, user_input, history) + + try: + messages = [{"role": "user", "content": prompt}] + response = await asyncio.wait_for( + self._llm_client.generate(messages), + timeout=self.DEFAULT_TIMEOUT, + ) + value = response.content.strip() if hasattr(response, 'content') else str(response).strip() + + if value and value not in ["未知", "无法确定", "无", "None", "null"]: + return value + + except asyncio.TimeoutError: + logger.warning( + f"[VariableExtractor] LLM extraction timeout for {variable_name}" + ) + except Exception as e: + logger.warning( + f"[VariableExtractor] LLM extraction failed for {variable_name}: {e}" + ) + + return None + + def _build_extraction_prompt( + self, + variable_name: str, + user_input: str, + history: list[dict[str, str]] | None, + ) -> str: + """ + 构建变量提取的提示词 + """ + variable_descriptions = { + "grade": "年级(如:初一、初二、初三、高一、高二、高三)", + "subject": "学科(如:语文、数学、英语、物理、化学、生物)", + "type": "内容类型(如:痛点、学科特点、能力要求、课程价值、观点)", + } + + description = variable_descriptions.get(variable_name, variable_name) + + prompt_parts = [ + f'请从以下用户输入中提取"{description}"信息。', + "", + f"用户输入:{user_input}", + "", + ] + + if history: + prompt_parts.append("对话历史:") + for msg in history[-3:]: + role = "用户" if msg.get("role") == "user" else "客服" + content = msg.get("content", "") + prompt_parts.append(f"{role}: {content}") + prompt_parts.append("") + + prompt_parts.extend([ + "要求:", + "1. 如果能确定,直接返回提取的值(如:初一、语文)", + "2. 如果无法确定,返回\"未知\"", + "3. 只返回提取的值,不要解释", + ]) + + return "\n".join(prompt_parts)