2026-02-24 08:10:27 +00:00
|
|
|
|
"""
|
|
|
|
|
|
Knowledge Base management endpoints.
|
|
|
|
|
|
[AC-ASA-01, AC-ASA-02, AC-ASA-08] Document upload, list, and index job status.
|
2026-02-28 04:52:50 +00:00
|
|
|
|
[AC-AISVC-59~AC-AISVC-64] Multi-knowledge-base management.
|
2026-02-24 08:10:27 +00:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import logging
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
import uuid
|
2026-02-24 17:16:59 +00:00
|
|
|
|
from dataclasses import dataclass
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
from typing import Annotated, Optional
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
import tiktoken
|
2026-02-28 04:52:50 +00:00
|
|
|
|
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, Query, UploadFile
|
2026-02-24 08:10:27 +00:00
|
|
|
|
from fastapi.responses import JSONResponse
|
2026-02-24 11:52:52 +00:00
|
|
|
|
from sqlalchemy import select
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
from app.core.database import get_session
|
|
|
|
|
|
from app.core.exceptions import MissingTenantIdException
|
2026-02-24 08:10:27 +00:00
|
|
|
|
from app.core.tenant import get_tenant_id
|
|
|
|
|
|
from app.models import ErrorResponse
|
2026-02-28 04:52:50 +00:00
|
|
|
|
from app.models.entities import (
|
|
|
|
|
|
IndexJob,
|
|
|
|
|
|
IndexJobStatus,
|
|
|
|
|
|
KBType,
|
|
|
|
|
|
KnowledgeBaseCreate,
|
|
|
|
|
|
KnowledgeBaseUpdate,
|
|
|
|
|
|
)
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
from app.services.kb import KBService
|
2026-02-28 04:52:50 +00:00
|
|
|
|
from app.services.knowledge_base_service import KnowledgeBaseService
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
router = APIRouter(prefix="/admin/kb", tags=["KB Management"])
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
@dataclass
|
|
|
|
|
|
class TextChunk:
|
|
|
|
|
|
"""Text chunk with metadata."""
|
|
|
|
|
|
text: str
|
|
|
|
|
|
start_token: int
|
|
|
|
|
|
end_token: int
|
|
|
|
|
|
page: int | None = None
|
|
|
|
|
|
source: str | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-25 15:09:24 +00:00
|
|
|
|
def chunk_text_by_lines(
|
|
|
|
|
|
text: str,
|
|
|
|
|
|
min_line_length: int = 10,
|
|
|
|
|
|
source: str | None = None,
|
|
|
|
|
|
) -> list[TextChunk]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
按行分块,每行作为一个独立的检索单元。
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-25 15:09:24 +00:00
|
|
|
|
Args:
|
|
|
|
|
|
text: 要分块的文本
|
|
|
|
|
|
min_line_length: 最小行长度,低于此长度的行会被跳过
|
|
|
|
|
|
source: 来源文件路径(可选)
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-25 15:09:24 +00:00
|
|
|
|
Returns:
|
|
|
|
|
|
分块列表,每个块对应一行文本
|
|
|
|
|
|
"""
|
|
|
|
|
|
lines = text.split('\n')
|
|
|
|
|
|
chunks: list[TextChunk] = []
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-25 15:09:24 +00:00
|
|
|
|
for i, line in enumerate(lines):
|
|
|
|
|
|
line = line.strip()
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-25 15:09:24 +00:00
|
|
|
|
if len(line) < min_line_length:
|
|
|
|
|
|
continue
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-25 15:09:24 +00:00
|
|
|
|
chunks.append(TextChunk(
|
|
|
|
|
|
text=line,
|
|
|
|
|
|
start_token=i,
|
|
|
|
|
|
end_token=i + 1,
|
|
|
|
|
|
page=None,
|
|
|
|
|
|
source=source,
|
|
|
|
|
|
))
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-25 15:09:24 +00:00
|
|
|
|
return chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
def chunk_text_with_tiktoken(
|
|
|
|
|
|
text: str,
|
|
|
|
|
|
chunk_size: int = 512,
|
|
|
|
|
|
overlap: int = 100,
|
|
|
|
|
|
page: int | None = None,
|
|
|
|
|
|
source: str | None = None,
|
|
|
|
|
|
) -> list[TextChunk]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
使用 tiktoken 按 token 数分块,支持重叠分块。
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
Args:
|
|
|
|
|
|
text: 要分块的文本
|
|
|
|
|
|
chunk_size: 每个块的最大 token 数
|
|
|
|
|
|
overlap: 块之间的重叠 token 数
|
|
|
|
|
|
page: 页码(可选)
|
|
|
|
|
|
source: 来源文件路径(可选)
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
Returns:
|
|
|
|
|
|
分块列表,每个块包含文本及起始/结束位置
|
|
|
|
|
|
"""
|
|
|
|
|
|
encoding = tiktoken.get_encoding("cl100k_base")
|
|
|
|
|
|
tokens = encoding.encode(text)
|
|
|
|
|
|
chunks: list[TextChunk] = []
|
|
|
|
|
|
start = 0
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
while start < len(tokens):
|
|
|
|
|
|
end = min(start + chunk_size, len(tokens))
|
|
|
|
|
|
chunk_tokens = tokens[start:end]
|
|
|
|
|
|
chunk_text = encoding.decode(chunk_tokens)
|
|
|
|
|
|
chunks.append(TextChunk(
|
|
|
|
|
|
text=chunk_text,
|
|
|
|
|
|
start_token=start,
|
|
|
|
|
|
end_token=end,
|
|
|
|
|
|
page=page,
|
|
|
|
|
|
source=source,
|
|
|
|
|
|
))
|
|
|
|
|
|
if end == len(tokens):
|
|
|
|
|
|
break
|
|
|
|
|
|
start += chunk_size - overlap
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
return chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
def get_current_tenant_id() -> str:
|
|
|
|
|
|
"""Dependency to get current tenant ID or raise exception."""
|
|
|
|
|
|
tenant_id = get_tenant_id()
|
|
|
|
|
|
if not tenant_id:
|
|
|
|
|
|
raise MissingTenantIdException()
|
|
|
|
|
|
return tenant_id
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-24 11:52:52 +00:00
|
|
|
|
@router.get(
|
|
|
|
|
|
"/knowledge-bases",
|
|
|
|
|
|
operation_id="listKnowledgeBases",
|
|
|
|
|
|
summary="Query knowledge base list",
|
2026-02-28 04:52:50 +00:00
|
|
|
|
description="[AC-AISVC-60] Get list of knowledge bases for the current tenant with type and status filters.",
|
2026-02-24 11:52:52 +00:00
|
|
|
|
responses={
|
|
|
|
|
|
200: {"description": "Knowledge base list"},
|
|
|
|
|
|
401: {"description": "Unauthorized", "model": ErrorResponse},
|
|
|
|
|
|
403: {"description": "Forbidden", "model": ErrorResponse},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
async def list_knowledge_bases(
|
|
|
|
|
|
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
|
|
|
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
2026-02-28 04:52:50 +00:00
|
|
|
|
kb_type: Annotated[Optional[str], Query()] = None,
|
|
|
|
|
|
is_enabled: Annotated[Optional[bool], Query()] = None,
|
2026-02-24 11:52:52 +00:00
|
|
|
|
) -> JSONResponse:
|
|
|
|
|
|
"""
|
2026-02-28 04:52:50 +00:00
|
|
|
|
[AC-AISVC-60] List all knowledge bases for the current tenant.
|
|
|
|
|
|
Supports filtering by kb_type and is_enabled status.
|
2026-02-24 11:52:52 +00:00
|
|
|
|
"""
|
2026-02-28 04:52:50 +00:00
|
|
|
|
try:
|
|
|
|
|
|
logger.info(f"[AC-AISVC-60] Listing knowledge bases: tenant={tenant_id}, kb_type={kb_type}, is_enabled={is_enabled}")
|
|
|
|
|
|
|
|
|
|
|
|
kb_service = KnowledgeBaseService(session)
|
|
|
|
|
|
logger.info(f"[AC-AISVC-60] KnowledgeBaseService created, calling list_knowledge_bases...")
|
|
|
|
|
|
knowledge_bases = await kb_service.list_knowledge_bases(
|
|
|
|
|
|
tenant_id=tenant_id,
|
|
|
|
|
|
kb_type=kb_type,
|
|
|
|
|
|
is_enabled=is_enabled,
|
|
|
|
|
|
)
|
|
|
|
|
|
logger.info(f"[AC-AISVC-60] Found {len(knowledge_bases)} knowledge bases")
|
|
|
|
|
|
|
|
|
|
|
|
data = []
|
|
|
|
|
|
for kb in knowledge_bases:
|
|
|
|
|
|
data.append({
|
|
|
|
|
|
"id": str(kb.id),
|
|
|
|
|
|
"name": kb.name,
|
|
|
|
|
|
"kbType": kb.kb_type,
|
|
|
|
|
|
"description": kb.description,
|
|
|
|
|
|
"priority": kb.priority,
|
|
|
|
|
|
"isEnabled": kb.is_enabled,
|
|
|
|
|
|
"docCount": kb.doc_count,
|
|
|
|
|
|
"createdAt": kb.created_at.isoformat() + "Z",
|
|
|
|
|
|
"updatedAt": kb.updated_at.isoformat() + "Z",
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"[AC-AISVC-60] Returning {len(data)} knowledge bases")
|
|
|
|
|
|
return JSONResponse(content={"data": data})
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
logger.error(f"[AC-AISVC-60] Error listing knowledge bases: {type(e).__name__}: {e}\n{traceback.format_exc()}")
|
|
|
|
|
|
raise
|
2026-02-24 11:52:52 +00:00
|
|
|
|
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
|
|
|
|
|
@router.post(
|
|
|
|
|
|
"/knowledge-bases",
|
|
|
|
|
|
operation_id="createKnowledgeBase",
|
|
|
|
|
|
summary="Create knowledge base",
|
|
|
|
|
|
description="[AC-AISVC-59] Create a new knowledge base with specified type and priority.",
|
|
|
|
|
|
responses={
|
|
|
|
|
|
201: {"description": "Knowledge base created"},
|
|
|
|
|
|
400: {"description": "Bad Request - invalid kb_type"},
|
|
|
|
|
|
401: {"description": "Unauthorized", "model": ErrorResponse},
|
|
|
|
|
|
403: {"description": "Forbidden", "model": ErrorResponse},
|
|
|
|
|
|
},
|
|
|
|
|
|
status_code=201,
|
|
|
|
|
|
)
|
|
|
|
|
|
async def create_knowledge_base(
|
|
|
|
|
|
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
|
|
|
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
|
|
|
|
|
kb_create: KnowledgeBaseCreate,
|
|
|
|
|
|
) -> JSONResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
[AC-AISVC-59] Create a new knowledge base.
|
|
|
|
|
|
Initializes corresponding Qdrant Collection.
|
|
|
|
|
|
"""
|
|
|
|
|
|
valid_types = [t.value for t in KBType]
|
|
|
|
|
|
if kb_create.kb_type not in valid_types:
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=400,
|
|
|
|
|
|
content={
|
|
|
|
|
|
"code": "INVALID_KB_TYPE",
|
|
|
|
|
|
"message": f"Invalid kb_type: {kb_create.kb_type}",
|
|
|
|
|
|
"details": {"valid_types": valid_types},
|
|
|
|
|
|
},
|
2026-02-24 11:52:52 +00:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-28 04:52:50 +00:00
|
|
|
|
logger.info(
|
|
|
|
|
|
f"[AC-AISVC-59] Creating knowledge base: tenant={tenant_id}, "
|
|
|
|
|
|
f"name={kb_create.name}, type={kb_create.kb_type}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
kb_service = KnowledgeBaseService(session)
|
|
|
|
|
|
kb = await kb_service.create_knowledge_base(tenant_id, kb_create)
|
|
|
|
|
|
await session.commit()
|
|
|
|
|
|
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=201,
|
|
|
|
|
|
content={
|
|
|
|
|
|
"id": str(kb.id),
|
2026-02-24 11:52:52 +00:00
|
|
|
|
"name": kb.name,
|
2026-02-28 04:52:50 +00:00
|
|
|
|
"kbType": kb.kb_type,
|
|
|
|
|
|
"description": kb.description,
|
|
|
|
|
|
"priority": kb.priority,
|
|
|
|
|
|
"isEnabled": kb.is_enabled,
|
|
|
|
|
|
"docCount": kb.doc_count,
|
2026-02-24 11:52:52 +00:00
|
|
|
|
"createdAt": kb.created_at.isoformat() + "Z",
|
2026-02-28 04:52:50 +00:00
|
|
|
|
"updatedAt": kb.updated_at.isoformat() + "Z",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get(
|
|
|
|
|
|
"/knowledge-bases/{kb_id}",
|
|
|
|
|
|
operation_id="getKnowledgeBase",
|
|
|
|
|
|
summary="Get knowledge base details",
|
|
|
|
|
|
description="Get detailed information about a specific knowledge base.",
|
|
|
|
|
|
responses={
|
|
|
|
|
|
200: {"description": "Knowledge base details"},
|
|
|
|
|
|
404: {"description": "Knowledge base not found"},
|
|
|
|
|
|
401: {"description": "Unauthorized", "model": ErrorResponse},
|
|
|
|
|
|
403: {"description": "Forbidden", "model": ErrorResponse},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
async def get_knowledge_base(
|
|
|
|
|
|
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
|
|
|
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
|
|
|
|
|
kb_id: str,
|
|
|
|
|
|
) -> JSONResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Get a specific knowledge base by ID.
|
|
|
|
|
|
"""
|
|
|
|
|
|
logger.info(f"Getting knowledge base: tenant={tenant_id}, kb_id={kb_id}")
|
2026-02-24 11:52:52 +00:00
|
|
|
|
|
2026-02-28 04:52:50 +00:00
|
|
|
|
kb_service = KnowledgeBaseService(session)
|
|
|
|
|
|
kb = await kb_service.get_knowledge_base(tenant_id, kb_id)
|
|
|
|
|
|
|
|
|
|
|
|
if not kb:
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=404,
|
|
|
|
|
|
content={
|
|
|
|
|
|
"code": "KB_NOT_FOUND",
|
|
|
|
|
|
"message": f"Knowledge base {kb_id} not found",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
content={
|
|
|
|
|
|
"id": str(kb.id),
|
|
|
|
|
|
"name": kb.name,
|
|
|
|
|
|
"kbType": kb.kb_type,
|
|
|
|
|
|
"description": kb.description,
|
|
|
|
|
|
"priority": kb.priority,
|
|
|
|
|
|
"isEnabled": kb.is_enabled,
|
|
|
|
|
|
"docCount": kb.doc_count,
|
|
|
|
|
|
"createdAt": kb.created_at.isoformat() + "Z",
|
|
|
|
|
|
"updatedAt": kb.updated_at.isoformat() + "Z",
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.put(
|
|
|
|
|
|
"/knowledge-bases/{kb_id}",
|
|
|
|
|
|
operation_id="updateKnowledgeBase",
|
|
|
|
|
|
summary="Update knowledge base",
|
|
|
|
|
|
description="[AC-AISVC-61] Update knowledge base name, type, description, priority, or enabled status.",
|
|
|
|
|
|
responses={
|
|
|
|
|
|
200: {"description": "Knowledge base updated"},
|
|
|
|
|
|
400: {"description": "Bad Request - invalid kb_type"},
|
|
|
|
|
|
404: {"description": "Knowledge base not found"},
|
|
|
|
|
|
401: {"description": "Unauthorized", "model": ErrorResponse},
|
|
|
|
|
|
403: {"description": "Forbidden", "model": ErrorResponse},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
async def update_knowledge_base(
|
|
|
|
|
|
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
|
|
|
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
|
|
|
|
|
kb_id: str,
|
|
|
|
|
|
kb_update: KnowledgeBaseUpdate,
|
|
|
|
|
|
) -> JSONResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
[AC-AISVC-61] Update a knowledge base.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if kb_update.kb_type is not None:
|
|
|
|
|
|
valid_types = [t.value for t in KBType]
|
|
|
|
|
|
if kb_update.kb_type not in valid_types:
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=400,
|
|
|
|
|
|
content={
|
|
|
|
|
|
"code": "INVALID_KB_TYPE",
|
|
|
|
|
|
"message": f"Invalid kb_type: {kb_update.kb_type}",
|
|
|
|
|
|
"details": {"valid_types": valid_types},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
f"[AC-AISVC-61] Updating knowledge base: tenant={tenant_id}, kb_id={kb_id}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
kb_service = KnowledgeBaseService(session)
|
|
|
|
|
|
kb = await kb_service.update_knowledge_base(tenant_id, kb_id, kb_update)
|
|
|
|
|
|
|
|
|
|
|
|
if not kb:
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=404,
|
|
|
|
|
|
content={
|
|
|
|
|
|
"code": "KB_NOT_FOUND",
|
|
|
|
|
|
"message": f"Knowledge base {kb_id} not found",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
await session.commit()
|
|
|
|
|
|
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
content={
|
|
|
|
|
|
"id": str(kb.id),
|
|
|
|
|
|
"name": kb.name,
|
|
|
|
|
|
"kbType": kb.kb_type,
|
|
|
|
|
|
"description": kb.description,
|
|
|
|
|
|
"priority": kb.priority,
|
|
|
|
|
|
"isEnabled": kb.is_enabled,
|
|
|
|
|
|
"docCount": kb.doc_count,
|
|
|
|
|
|
"createdAt": kb.created_at.isoformat() + "Z",
|
|
|
|
|
|
"updatedAt": kb.updated_at.isoformat() + "Z",
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.delete(
|
|
|
|
|
|
"/knowledge-bases/{kb_id}",
|
|
|
|
|
|
operation_id="deleteKnowledgeBase",
|
|
|
|
|
|
summary="Delete knowledge base",
|
|
|
|
|
|
description="[AC-AISVC-62] Delete a knowledge base and its associated documents and Qdrant Collection.",
|
|
|
|
|
|
responses={
|
|
|
|
|
|
204: {"description": "Knowledge base deleted"},
|
|
|
|
|
|
404: {"description": "Knowledge base not found"},
|
|
|
|
|
|
401: {"description": "Unauthorized", "model": ErrorResponse},
|
|
|
|
|
|
403: {"description": "Forbidden", "model": ErrorResponse},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
async def delete_knowledge_base(
|
|
|
|
|
|
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
|
|
|
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
|
|
|
|
|
kb_id: str,
|
|
|
|
|
|
) -> JSONResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
[AC-AISVC-62] Delete a knowledge base.
|
|
|
|
|
|
Also deletes associated documents and Qdrant Collection.
|
|
|
|
|
|
"""
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
f"[AC-AISVC-62] Deleting knowledge base: tenant={tenant_id}, kb_id={kb_id}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
kb_service = KnowledgeBaseService(session)
|
|
|
|
|
|
deleted = await kb_service.delete_knowledge_base(tenant_id, kb_id)
|
|
|
|
|
|
|
|
|
|
|
|
if not deleted:
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=404,
|
|
|
|
|
|
content={
|
|
|
|
|
|
"code": "KB_NOT_FOUND",
|
|
|
|
|
|
"message": f"Knowledge base {kb_id} not found",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
await session.commit()
|
|
|
|
|
|
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=204,
|
|
|
|
|
|
content=None,
|
|
|
|
|
|
)
|
2026-02-24 11:52:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-24 08:10:27 +00:00
|
|
|
|
@router.get(
|
|
|
|
|
|
"/documents",
|
|
|
|
|
|
operation_id="listDocuments",
|
|
|
|
|
|
summary="Query document list",
|
|
|
|
|
|
description="[AC-ASA-08] Get list of documents with pagination and filtering.",
|
|
|
|
|
|
responses={
|
|
|
|
|
|
200: {"description": "Document list with pagination"},
|
|
|
|
|
|
401: {"description": "Unauthorized", "model": ErrorResponse},
|
|
|
|
|
|
403: {"description": "Forbidden", "model": ErrorResponse},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
async def list_documents(
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
|
|
|
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
2026-02-24 08:10:27 +00:00
|
|
|
|
kb_id: Annotated[Optional[str], Query()] = None,
|
|
|
|
|
|
status: Annotated[Optional[str], Query()] = None,
|
|
|
|
|
|
page: int = Query(1, ge=1),
|
|
|
|
|
|
page_size: int = Query(20, ge=1, le=100),
|
|
|
|
|
|
) -> JSONResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
[AC-ASA-08] List documents with filtering and pagination.
|
|
|
|
|
|
"""
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
f"[AC-ASA-08] Listing documents: tenant={tenant_id}, kb_id={kb_id}, "
|
|
|
|
|
|
f"status={status}, page={page}, page_size={page_size}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
kb_service = KBService(session)
|
|
|
|
|
|
documents, total = await kb_service.list_documents(
|
|
|
|
|
|
tenant_id=tenant_id,
|
|
|
|
|
|
kb_id=kb_id,
|
|
|
|
|
|
status=status,
|
|
|
|
|
|
page=page,
|
|
|
|
|
|
page_size=page_size,
|
|
|
|
|
|
)
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
total_pages = (total + page_size - 1) // page_size if total > 0 else 0
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
2026-02-24 11:52:52 +00:00
|
|
|
|
data = []
|
|
|
|
|
|
for doc in documents:
|
|
|
|
|
|
job_stmt = select(IndexJob).where(
|
|
|
|
|
|
IndexJob.tenant_id == tenant_id,
|
|
|
|
|
|
IndexJob.doc_id == doc.id,
|
|
|
|
|
|
).order_by(IndexJob.created_at.desc())
|
|
|
|
|
|
job_result = await session.execute(job_stmt)
|
|
|
|
|
|
latest_job = job_result.scalar_one_or_none()
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-24 11:52:52 +00:00
|
|
|
|
data.append({
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
"docId": str(doc.id),
|
|
|
|
|
|
"kbId": doc.kb_id,
|
|
|
|
|
|
"fileName": doc.file_name,
|
|
|
|
|
|
"status": doc.status,
|
2026-02-24 11:52:52 +00:00
|
|
|
|
"jobId": str(latest_job.id) if latest_job else None,
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
"createdAt": doc.created_at.isoformat() + "Z",
|
|
|
|
|
|
"updatedAt": doc.updated_at.isoformat() + "Z",
|
2026-02-24 11:52:52 +00:00
|
|
|
|
})
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
content={
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
"data": data,
|
2026-02-24 08:10:27 +00:00
|
|
|
|
"pagination": {
|
|
|
|
|
|
"page": page,
|
|
|
|
|
|
"pageSize": page_size,
|
|
|
|
|
|
"total": total,
|
|
|
|
|
|
"totalPages": total_pages,
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post(
|
|
|
|
|
|
"/documents",
|
|
|
|
|
|
operation_id="uploadDocument",
|
|
|
|
|
|
summary="Upload/import document",
|
2026-02-28 04:52:50 +00:00
|
|
|
|
description="[AC-ASA-01, AC-AISVC-63] Upload document to specified knowledge base and trigger indexing job.",
|
2026-02-24 08:10:27 +00:00
|
|
|
|
responses={
|
|
|
|
|
|
202: {"description": "Accepted - async indexing job started"},
|
2026-02-28 04:52:50 +00:00
|
|
|
|
400: {"description": "Bad Request - unsupported format or invalid kb_id"},
|
2026-02-24 08:10:27 +00:00
|
|
|
|
401: {"description": "Unauthorized", "model": ErrorResponse},
|
|
|
|
|
|
403: {"description": "Forbidden", "model": ErrorResponse},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
async def upload_document(
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
|
|
|
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
2026-02-24 11:52:52 +00:00
|
|
|
|
background_tasks: BackgroundTasks,
|
2026-02-24 08:10:27 +00:00
|
|
|
|
file: UploadFile = File(...),
|
|
|
|
|
|
kb_id: str = Form(...),
|
|
|
|
|
|
) -> JSONResponse:
|
|
|
|
|
|
"""
|
2026-02-28 04:52:50 +00:00
|
|
|
|
[AC-ASA-01, AC-AISVC-63] Upload document to specified knowledge base.
|
|
|
|
|
|
Creates KB if not exists, indexes to corresponding Qdrant Collection.
|
2026-02-24 08:10:27 +00:00
|
|
|
|
"""
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
from pathlib import Path
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
|
|
|
|
|
from app.services.document import get_supported_document_formats
|
|
|
|
|
|
|
2026-02-24 08:10:27 +00:00
|
|
|
|
logger.info(
|
2026-02-28 04:52:50 +00:00
|
|
|
|
f"[AC-AISVC-63] Uploading document: tenant={tenant_id}, "
|
2026-02-24 08:10:27 +00:00
|
|
|
|
f"kb_id={kb_id}, filename={file.filename}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
file_ext = Path(file.filename or "").suffix.lower()
|
|
|
|
|
|
supported_formats = get_supported_document_formats()
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
if file_ext and file_ext not in supported_formats:
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=400,
|
|
|
|
|
|
content={
|
|
|
|
|
|
"code": "UNSUPPORTED_FORMAT",
|
|
|
|
|
|
"message": f"Unsupported file format: {file_ext}",
|
|
|
|
|
|
"details": {
|
|
|
|
|
|
"supported_formats": supported_formats,
|
|
|
|
|
|
},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-28 04:52:50 +00:00
|
|
|
|
kb_service = KnowledgeBaseService(session)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
kb = await kb_service.get_knowledge_base(tenant_id, kb_id)
|
|
|
|
|
|
if not kb:
|
|
|
|
|
|
kb = await kb_service.get_or_create_default_kb(tenant_id)
|
|
|
|
|
|
kb_id = str(kb.id)
|
|
|
|
|
|
logger.info(f"[AC-AISVC-63] KB not found, using default: {kb_id}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
kb_id = str(kb.id)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
kb = await kb_service.get_or_create_default_kb(tenant_id)
|
|
|
|
|
|
kb_id = str(kb.id)
|
|
|
|
|
|
|
|
|
|
|
|
doc_kb_service = KBService(session)
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
file_content = await file.read()
|
2026-02-28 04:52:50 +00:00
|
|
|
|
document, job = await doc_kb_service.upload_document(
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
tenant_id=tenant_id,
|
2026-02-28 04:52:50 +00:00
|
|
|
|
kb_id=kb_id,
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
file_name=file.filename or "unknown",
|
|
|
|
|
|
file_content=file_content,
|
|
|
|
|
|
file_type=file.content_type,
|
|
|
|
|
|
)
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
|
|
|
|
|
await kb_service.update_doc_count(tenant_id, kb_id, delta=1)
|
2026-02-24 11:52:52 +00:00
|
|
|
|
await session.commit()
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
2026-02-24 11:52:52 +00:00
|
|
|
|
background_tasks.add_task(
|
2026-02-28 04:52:50 +00:00
|
|
|
|
_index_document, tenant_id, kb_id, str(job.id), str(document.id), file_content, file.filename
|
2026-02-24 11:52:52 +00:00
|
|
|
|
)
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=202,
|
|
|
|
|
|
content={
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
"jobId": str(job.id),
|
|
|
|
|
|
"docId": str(document.id),
|
2026-02-28 04:52:50 +00:00
|
|
|
|
"kbId": kb_id,
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
"status": job.status,
|
2026-02-24 08:10:27 +00:00
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-28 04:52:50 +00:00
|
|
|
|
async def _index_document(
|
|
|
|
|
|
tenant_id: str,
|
|
|
|
|
|
kb_id: str,
|
|
|
|
|
|
job_id: str,
|
|
|
|
|
|
doc_id: str,
|
|
|
|
|
|
content: bytes,
|
|
|
|
|
|
filename: str | None = None,
|
|
|
|
|
|
):
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
"""
|
2026-02-24 11:52:52 +00:00
|
|
|
|
Background indexing task.
|
2026-02-28 04:52:50 +00:00
|
|
|
|
[AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-63] Uses document parsing and pluggable embedding.
|
|
|
|
|
|
Indexes to the specified knowledge base's Qdrant Collection.
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
"""
|
|
|
|
|
|
import asyncio
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
import tempfile
|
|
|
|
|
|
from pathlib import Path
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
|
2026-02-28 04:52:50 +00:00
|
|
|
|
from qdrant_client.models import PointStruct
|
|
|
|
|
|
|
|
|
|
|
|
from app.core.database import async_session_maker
|
|
|
|
|
|
from app.core.qdrant_client import get_qdrant_client
|
|
|
|
|
|
from app.services.document import DocumentParseException, UnsupportedFormatError, parse_document
|
|
|
|
|
|
from app.services.embedding import get_embedding_provider
|
|
|
|
|
|
from app.services.kb import KBService
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"[INDEX] Starting indexing: tenant={tenant_id}, kb_id={kb_id}, job_id={job_id}, doc_id={doc_id}, filename={filename}")
|
2026-02-24 11:52:52 +00:00
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
|
|
|
|
|
|
|
|
async with async_session_maker() as session:
|
|
|
|
|
|
kb_service = KBService(session)
|
|
|
|
|
|
try:
|
|
|
|
|
|
await kb_service.update_job_status(
|
|
|
|
|
|
tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=10
|
|
|
|
|
|
)
|
|
|
|
|
|
await session.commit()
|
|
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
parse_result = None
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
text = None
|
|
|
|
|
|
file_ext = Path(filename or "").suffix.lower()
|
2026-02-24 17:16:59 +00:00
|
|
|
|
logger.info(f"[INDEX] File extension: {file_ext}, content size: {len(content)} bytes")
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
text_extensions = {".txt", ".md", ".markdown", ".rst", ".log", ".json", ".xml", ".yaml", ".yml"}
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
if file_ext in text_extensions or not file_ext:
|
2026-02-28 04:52:50 +00:00
|
|
|
|
logger.info("[INDEX] Treating as text file, trying multiple encodings")
|
2026-02-25 15:09:24 +00:00
|
|
|
|
text = None
|
|
|
|
|
|
for encoding in ["utf-8", "gbk", "gb2312", "gb18030", "big5", "utf-16", "latin-1"]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
text = content.decode(encoding)
|
|
|
|
|
|
logger.info(f"[INDEX] Successfully decoded with encoding: {encoding}")
|
|
|
|
|
|
break
|
|
|
|
|
|
except (UnicodeDecodeError, LookupError):
|
|
|
|
|
|
continue
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-25 15:09:24 +00:00
|
|
|
|
if text is None:
|
|
|
|
|
|
text = content.decode("utf-8", errors="replace")
|
2026-02-28 04:52:50 +00:00
|
|
|
|
logger.warning("[INDEX] Failed to decode with known encodings, using utf-8 with replacement")
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
else:
|
2026-02-28 04:52:50 +00:00
|
|
|
|
logger.info("[INDEX] Binary file detected, will parse with document parser")
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
await kb_service.update_job_status(
|
|
|
|
|
|
tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=15
|
|
|
|
|
|
)
|
|
|
|
|
|
await session.commit()
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp_file:
|
|
|
|
|
|
tmp_file.write(content)
|
|
|
|
|
|
tmp_path = tmp_file.name
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
logger.info(f"[INDEX] Temp file created: {tmp_path}")
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
try:
|
2026-02-24 17:16:59 +00:00
|
|
|
|
logger.info(f"[INDEX] Starting document parsing for {file_ext}...")
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
parse_result = parse_document(tmp_path)
|
|
|
|
|
|
text = parse_result.text
|
|
|
|
|
|
logger.info(
|
2026-02-24 17:16:59 +00:00
|
|
|
|
f"[INDEX] Parsed document SUCCESS: {filename}, "
|
|
|
|
|
|
f"chars={len(text)}, format={parse_result.metadata.get('format')}, "
|
|
|
|
|
|
f"pages={len(parse_result.pages) if parse_result.pages else 'N/A'}, "
|
|
|
|
|
|
f"metadata={parse_result.metadata}"
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
)
|
2026-02-24 17:16:59 +00:00
|
|
|
|
if len(text) < 100:
|
|
|
|
|
|
logger.warning(f"[INDEX] Parsed text is very short, preview: {text[:200]}")
|
|
|
|
|
|
except UnsupportedFormatError as e:
|
|
|
|
|
|
logger.error(f"[INDEX] UnsupportedFormatError: {e}")
|
|
|
|
|
|
text = content.decode("utf-8", errors="ignore")
|
|
|
|
|
|
except DocumentParseException as e:
|
|
|
|
|
|
logger.error(f"[INDEX] DocumentParseException: {e}, details={getattr(e, 'details', {})}")
|
|
|
|
|
|
text = content.decode("utf-8", errors="ignore")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"[INDEX] Unexpected parsing error: {type(e).__name__}: {e}")
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
text = content.decode("utf-8", errors="ignore")
|
|
|
|
|
|
finally:
|
|
|
|
|
|
Path(tmp_path).unlink(missing_ok=True)
|
2026-02-28 04:52:50 +00:00
|
|
|
|
logger.info("[INDEX] Temp file cleaned up")
|
|
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
logger.info(f"[INDEX] Final text length: {len(text)} chars")
|
|
|
|
|
|
if len(text) < 50:
|
|
|
|
|
|
logger.warning(f"[INDEX] Text too short, preview: {repr(text[:200])}")
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
await kb_service.update_job_status(
|
|
|
|
|
|
tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=20
|
|
|
|
|
|
)
|
|
|
|
|
|
await session.commit()
|
2026-02-24 11:52:52 +00:00
|
|
|
|
|
2026-02-28 04:52:50 +00:00
|
|
|
|
logger.info("[INDEX] Getting embedding provider...")
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
embedding_provider = await get_embedding_provider()
|
2026-02-24 17:16:59 +00:00
|
|
|
|
logger.info(f"[INDEX] Embedding provider: {type(embedding_provider).__name__}")
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
all_chunks: list[TextChunk] = []
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-24 17:16:59 +00:00
|
|
|
|
if parse_result and parse_result.pages:
|
2026-02-25 15:09:24 +00:00
|
|
|
|
logger.info(f"[INDEX] PDF with {len(parse_result.pages)} pages, using line-based chunking with page metadata")
|
2026-02-24 17:16:59 +00:00
|
|
|
|
for page in parse_result.pages:
|
2026-02-25 15:09:24 +00:00
|
|
|
|
page_chunks = chunk_text_by_lines(
|
2026-02-24 17:16:59 +00:00
|
|
|
|
page.text,
|
2026-02-25 15:09:24 +00:00
|
|
|
|
min_line_length=10,
|
2026-02-24 17:16:59 +00:00
|
|
|
|
source=filename,
|
|
|
|
|
|
)
|
2026-02-25 15:09:24 +00:00
|
|
|
|
for pc in page_chunks:
|
|
|
|
|
|
pc.page = page.page
|
2026-02-24 17:16:59 +00:00
|
|
|
|
all_chunks.extend(page_chunks)
|
|
|
|
|
|
logger.info(f"[INDEX] Total chunks from PDF: {len(all_chunks)}")
|
|
|
|
|
|
else:
|
2026-02-28 04:52:50 +00:00
|
|
|
|
logger.info("[INDEX] Using line-based chunking")
|
2026-02-25 15:09:24 +00:00
|
|
|
|
all_chunks = chunk_text_by_lines(
|
2026-02-24 17:16:59 +00:00
|
|
|
|
text,
|
2026-02-25 15:09:24 +00:00
|
|
|
|
min_line_length=10,
|
2026-02-24 17:16:59 +00:00
|
|
|
|
source=filename,
|
|
|
|
|
|
)
|
|
|
|
|
|
logger.info(f"[INDEX] Total chunks: {len(all_chunks)}")
|
2026-02-24 11:52:52 +00:00
|
|
|
|
|
|
|
|
|
|
qdrant = await get_qdrant_client()
|
2026-02-28 04:52:50 +00:00
|
|
|
|
await qdrant.ensure_kb_collection_exists(tenant_id, kb_id, use_multi_vector=True)
|
2026-02-26 04:39:42 +00:00
|
|
|
|
|
|
|
|
|
|
from app.services.embedding.nomic_provider import NomicEmbeddingProvider
|
|
|
|
|
|
use_multi_vector = isinstance(embedding_provider, NomicEmbeddingProvider)
|
|
|
|
|
|
logger.info(f"[INDEX] Using multi-vector format: {use_multi_vector}")
|
2026-02-24 11:52:52 +00:00
|
|
|
|
|
|
|
|
|
|
points = []
|
2026-02-24 17:16:59 +00:00
|
|
|
|
total_chunks = len(all_chunks)
|
|
|
|
|
|
for i, chunk in enumerate(all_chunks):
|
|
|
|
|
|
payload = {
|
|
|
|
|
|
"text": chunk.text,
|
|
|
|
|
|
"source": doc_id,
|
2026-02-28 04:52:50 +00:00
|
|
|
|
"kb_id": kb_id,
|
2026-02-24 17:16:59 +00:00
|
|
|
|
"chunk_index": i,
|
|
|
|
|
|
"start_token": chunk.start_token,
|
|
|
|
|
|
"end_token": chunk.end_token,
|
|
|
|
|
|
}
|
|
|
|
|
|
if chunk.page is not None:
|
|
|
|
|
|
payload["page"] = chunk.page
|
|
|
|
|
|
if chunk.source:
|
|
|
|
|
|
payload["filename"] = chunk.source
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
2026-02-26 04:39:42 +00:00
|
|
|
|
if use_multi_vector:
|
|
|
|
|
|
embedding_result = await embedding_provider.embed_document(chunk.text)
|
|
|
|
|
|
points.append({
|
|
|
|
|
|
"id": str(uuid.uuid4()),
|
|
|
|
|
|
"vector": {
|
|
|
|
|
|
"full": embedding_result.embedding_full,
|
|
|
|
|
|
"dim_256": embedding_result.embedding_256,
|
|
|
|
|
|
"dim_512": embedding_result.embedding_512,
|
|
|
|
|
|
},
|
|
|
|
|
|
"payload": payload,
|
|
|
|
|
|
})
|
|
|
|
|
|
else:
|
|
|
|
|
|
embedding = await embedding_provider.embed(chunk.text)
|
|
|
|
|
|
points.append(
|
|
|
|
|
|
PointStruct(
|
|
|
|
|
|
id=str(uuid.uuid4()),
|
|
|
|
|
|
vector=embedding,
|
|
|
|
|
|
payload=payload,
|
|
|
|
|
|
)
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
)
|
2026-02-28 04:52:50 +00:00
|
|
|
|
|
feat(AISVC-T7): 嵌入模型可插拔设计与文档解析支持 [AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32, AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37, AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 新增 EmbeddingProvider 抽象基类和工厂模式 [AC-AISVC-29, AC-AISVC-30]
- 实现 OllamaEmbeddingProvider 和 OpenAIEmbeddingProvider [AC-AISVC-29, AC-AISVC-30]
- 新增 EmbeddingConfigManager 支持配置热更新 [AC-AISVC-31, AC-AISVC-32]
- 新增 DocumentParser 抽象接口和工厂类 [AC-AISVC-33]
- 实现 PDF/Word/Excel/Text 文档解析器 [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]
- 新增嵌入管理 API 端点 [AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]
- 更新文档上传流程支持多格式文档解析 [AC-AISVC-36, AC-AISVC-37]
- 更新 OpenAPI 契约添加嵌入管理接口
- 添加数据库初始化脚本
- 更新规范文档标记 Phase 7 完成
2026-02-24 15:08:08 +00:00
|
|
|
|
progress = 20 + int((i + 1) / total_chunks * 70)
|
|
|
|
|
|
if i % 10 == 0 or i == total_chunks - 1:
|
|
|
|
|
|
await kb_service.update_job_status(
|
|
|
|
|
|
tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=progress
|
|
|
|
|
|
)
|
|
|
|
|
|
await session.commit()
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
|
2026-02-24 11:52:52 +00:00
|
|
|
|
if points:
|
2026-02-28 04:52:50 +00:00
|
|
|
|
logger.info(f"[INDEX] Upserting {len(points)} vectors to Qdrant for kb_id={kb_id}...")
|
2026-02-26 04:39:42 +00:00
|
|
|
|
if use_multi_vector:
|
2026-02-28 04:52:50 +00:00
|
|
|
|
await qdrant.upsert_multi_vector(tenant_id, points, kb_id=kb_id)
|
2026-02-26 04:39:42 +00:00
|
|
|
|
else:
|
2026-02-28 04:52:50 +00:00
|
|
|
|
await qdrant.upsert_vectors(tenant_id, points, kb_id=kb_id)
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
|
2026-02-24 11:52:52 +00:00
|
|
|
|
await kb_service.update_job_status(
|
|
|
|
|
|
tenant_id, job_id, IndexJobStatus.COMPLETED.value, progress=100
|
|
|
|
|
|
)
|
|
|
|
|
|
await session.commit()
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
|
2026-02-24 11:52:52 +00:00
|
|
|
|
logger.info(
|
2026-02-28 04:52:50 +00:00
|
|
|
|
f"[INDEX] COMPLETED: tenant={tenant_id}, kb_id={kb_id}, "
|
2026-02-24 17:16:59 +00:00
|
|
|
|
f"job_id={job_id}, chunks={len(all_chunks)}, text_len={len(text)}"
|
2026-02-24 11:52:52 +00:00
|
|
|
|
)
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
|
2026-02-24 11:52:52 +00:00
|
|
|
|
except Exception as e:
|
2026-02-24 17:16:59 +00:00
|
|
|
|
import traceback
|
|
|
|
|
|
logger.error(f"[INDEX] FAILED: {e}\n{traceback.format_exc()}")
|
2026-02-24 11:52:52 +00:00
|
|
|
|
await session.rollback()
|
|
|
|
|
|
async with async_session_maker() as error_session:
|
|
|
|
|
|
kb_service = KBService(error_session)
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
await kb_service.update_job_status(
|
|
|
|
|
|
tenant_id, job_id, IndexJobStatus.FAILED.value,
|
|
|
|
|
|
progress=0, error_msg=str(e)
|
|
|
|
|
|
)
|
2026-02-24 11:52:52 +00:00
|
|
|
|
await error_session.commit()
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-24 08:10:27 +00:00
|
|
|
|
@router.get(
|
|
|
|
|
|
"/index/jobs/{job_id}",
|
|
|
|
|
|
operation_id="getIndexJob",
|
|
|
|
|
|
summary="Query index job status",
|
|
|
|
|
|
description="[AC-ASA-02] Get indexing job status and progress.",
|
|
|
|
|
|
responses={
|
|
|
|
|
|
200: {"description": "Job status details"},
|
|
|
|
|
|
401: {"description": "Unauthorized", "model": ErrorResponse},
|
|
|
|
|
|
403: {"description": "Forbidden", "model": ErrorResponse},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
async def get_index_job(
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
|
|
|
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
2026-02-24 08:10:27 +00:00
|
|
|
|
job_id: str,
|
|
|
|
|
|
) -> JSONResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
[AC-ASA-02] Get indexing job status with progress.
|
|
|
|
|
|
"""
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
f"[AC-ASA-02] Getting job status: tenant={tenant_id}, job_id={job_id}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
kb_service = KBService(session)
|
|
|
|
|
|
job = await kb_service.get_index_job(tenant_id, job_id)
|
|
|
|
|
|
|
|
|
|
|
|
if not job:
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=404,
|
|
|
|
|
|
content={
|
|
|
|
|
|
"code": "JOB_NOT_FOUND",
|
|
|
|
|
|
"message": f"Job {job_id} not found",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
content={
|
|
|
|
|
|
"jobId": str(job.id),
|
|
|
|
|
|
"docId": str(job.doc_id),
|
|
|
|
|
|
"status": job.status,
|
|
|
|
|
|
"progress": job.progress,
|
|
|
|
|
|
"errorMsg": job.error_msg,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
2026-02-24 08:10:27 +00:00
|
|
|
|
|
feat(ai-service): v0.2.0 前后端联调真实对接
实现内容:
- 新增知识库实体模型 (KnowledgeBase, Document, IndexJob)
- 新增 KBService 服务层,支持文档上传、存储、索引任务管理
- 实现知识库管理 API 真实对接 (POST/GET /admin/kb/documents)
- 实现索引任务状态查询 API (GET /admin/kb/index/jobs/{jobId})
- 实现 RAG 实验室真实向量检索 (POST /admin/rag/experiments/run)
- 实现会话监控真实数据库查询 (GET /admin/sessions)
规范更新:
- requirements.md: v0.1.0 -> v0.2.0, 新增 AC-AISVC-21~28
- tasks.md: v0.1.0 -> v0.2.0, 新增 Phase 6 (9个任务)
- openapi.admin.yaml: L0 -> L1, 更新 x-requirements 映射
验收标准: AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24,
AC-AISVC-25, AC-AISVC-26, AC-AISVC-27, AC-AISVC-28
2026-02-24 10:16:29 +00:00
|
|
|
|
|
|
|
|
|
|
@router.delete(
|
|
|
|
|
|
"/documents/{doc_id}",
|
|
|
|
|
|
operation_id="deleteDocument",
|
|
|
|
|
|
summary="Delete document",
|
|
|
|
|
|
description="[AC-ASA-08] Delete a document and its associated files.",
|
|
|
|
|
|
responses={
|
|
|
|
|
|
200: {"description": "Document deleted"},
|
|
|
|
|
|
404: {"description": "Document not found"},
|
|
|
|
|
|
401: {"description": "Unauthorized", "model": ErrorResponse},
|
|
|
|
|
|
403: {"description": "Forbidden", "model": ErrorResponse},
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
async def delete_document(
|
|
|
|
|
|
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
|
|
|
|
|
|
session: Annotated[AsyncSession, Depends(get_session)],
|
|
|
|
|
|
doc_id: str,
|
|
|
|
|
|
) -> JSONResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
[AC-ASA-08] Delete a document.
|
|
|
|
|
|
"""
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
f"[AC-ASA-08] Deleting document: tenant={tenant_id}, doc_id={doc_id}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
kb_service = KBService(session)
|
|
|
|
|
|
deleted = await kb_service.delete_document(tenant_id, doc_id)
|
|
|
|
|
|
|
|
|
|
|
|
if not deleted:
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
status_code=404,
|
|
|
|
|
|
content={
|
|
|
|
|
|
"code": "DOCUMENT_NOT_FOUND",
|
|
|
|
|
|
"message": f"Document {doc_id} not found",
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
|
content={
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"message": "Document deleted",
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|