""" 检查课程知识库的录入情况 """ import asyncio import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker from app.core.config import get_settings from app.core.qdrant_client import QdrantClient from app.models.entities import Document async def check_course_kb_status(): """检查课程知识库的录入情况""" settings = get_settings() engine = create_async_engine(settings.database_url) async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) tenant_id = "szmp@ash@2026" kb_id = "75c465fe-277d-455d-a30b-4b168adcc03b" print(f"\n{'='*80}") print(f"检查课程知识库的录入情况") print(f"{'='*80}") print(f"租户 ID: {tenant_id}") print(f"知识库 ID: {kb_id}") async with async_session() as session: stmt = select(Document).where( Document.tenant_id == tenant_id, Document.kb_id == kb_id, ) result = await session.execute(stmt) documents = result.scalars().all() print(f"\n数据库中的文档记录: {len(documents)} 个") if documents: for doc in documents[:5]: print(f" - {doc.file_name} (status: {doc.status})") if len(documents) > 5: print(f" ... 还有 {len(documents) - 5} 个文档") client = QdrantClient() qdrant = await client.get_client() collection_name = client.get_kb_collection_name(tenant_id, kb_id) print(f"\nQdrant Collection 名称: {collection_name}") exists = await qdrant.collection_exists(collection_name) if exists: points_result = await qdrant.scroll( collection_name=collection_name, limit=5, with_vectors=False, ) points = points_result[0] if isinstance(points_result, tuple) else points_result print(f"Qdrant Collection 存在,有 {len(points)} 条数据") for i, point in enumerate(points, 1): if hasattr(point, 'payload'): payload = point.payload point_id = point.id else: payload = point.get('payload', {}) point_id = point.get('id', 'unknown') print(f" [{i}] id: {point_id}") if 'text' in payload: text = payload['text'][:50] + '...' if len(payload['text']) > 50 else payload['text'] print(f" text: {text}") else: print(f"Qdrant Collection 不存在!") print(f"\n{'='*80}") print(f"结论:") if len(documents) > 0 and not exists: print(" 数据库有文档记录,但 Qdrant Collection 不存在") print(" 需要等待文档向量化任务完成") elif len(documents) == 0 and exists: print(" 数据库没有文档记录,但 Qdrant Collection 存在") print(" 可能是旧数据") elif len(documents) > 0 and exists: print(f" 数据库有 {len(documents)} 个文档记录") print(f" Qdrant Collection 存在") print(" ✅ 知识库已录入完成") else: print(" 数据库没有文档记录") print(" Qdrant Collection 不存在") print(" ❌ 知识库未录入") if __name__ == "__main__": asyncio.run(check_course_kb_status())