ai-robot-core/ai-service/scripts/delete_course_kb_documents.py

76 lines
2.3 KiB
Python
Raw Normal View History

"""
删除课程知识库的文档记录
"""
import asyncio
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from sqlalchemy import delete, select
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from app.core.config import get_settings
from app.models.entities import Document, IndexJob
async def delete_course_kb_documents():
"""删除课程知识库的文档记录"""
settings = get_settings()
engine = create_async_engine(settings.database_url)
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
tenant_id = "szmp@ash@2026"
kb_id = "75c465fe-277d-455d-a30b-4b168adcc03b"
print(f"\n{'='*80}")
print(f"删除课程知识库的文档记录")
print(f"{'='*80}")
print(f"租户 ID: {tenant_id}")
print(f"知识库 ID: {kb_id}")
async with async_session() as session:
stmt = select(Document).where(
Document.tenant_id == tenant_id,
Document.kb_id == kb_id,
)
result = await session.execute(stmt)
documents = result.scalars().all()
print(f"\n找到 {len(documents)} 个文档记录")
if not documents:
print("没有需要删除的文档记录")
return
for doc in documents[:5]:
print(f" - {doc.file_name} (id: {doc.id})")
if len(documents) > 5:
print(f" ... 还有 {len(documents) - 5} 个文档")
doc_ids = [doc.id for doc in documents]
index_job_stmt = delete(IndexJob).where(
IndexJob.tenant_id == tenant_id,
IndexJob.doc_id.in_(doc_ids),
)
index_job_result = await session.execute(index_job_stmt)
print(f"\n删除了 {index_job_result.rowcount} 个索引任务记录")
doc_stmt = delete(Document).where(
Document.tenant_id == tenant_id,
Document.kb_id == kb_id,
)
doc_result = await session.execute(doc_stmt)
print(f"删除了 {doc_result.rowcount} 个文档记录")
await session.commit()
print(f"\n✅ 删除完成!")
if __name__ == "__main__":
asyncio.run(delete_course_kb_documents())