ai-robot-core/ai-service/scripts/check_course_kb.py

72 lines
2.1 KiB
Python
Raw Permalink Normal View History

"""
检查 Qdrant 中课程知识库的数据结构
"""
import asyncio
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from qdrant_client import AsyncQdrantClient
from app.core.config import get_settings
from app.core.qdrant_client import QdrantClient
async def check_course_kb():
"""检查课程知识库"""
settings = get_settings()
client = QdrantClient()
qdrant = await client.get_client()
tenant_id = "szmp@ash@2026"
course_kb_id = "75c465fe-277d-455d-a30b-4b168adcc03b"
safe_tenant_id = tenant_id.replace('@', '_')
prefix = settings.qdrant_collection_prefix
expected_collection = f"{prefix}{safe_tenant_id}_{course_kb_id}"
print(f"\n{'='*80}")
print(f"检查课程知识库 Collection")
print(f"{'='*80}")
print(f"租户 ID: {tenant_id}")
print(f"课程知识库 ID: {course_kb_id}")
print(f"预期 Collection 名称: {expected_collection}")
collections = await qdrant.get_collections()
collection_names = [c.name for c in collections.collections]
print(f"\n租户的所有 Collections:")
for name in collection_names:
if safe_tenant_id in name:
print(f" - {name}")
if expected_collection in collection_names:
print(f"\n✅ 课程知识库 Collection 存在: {expected_collection}")
points, _ = qdrant.scroll(
collection_name=expected_collection,
limit=3,
with_vectors=False,
)
print(f"\n课程知识库数据 (共 {len(points)} 条):")
for i, point in enumerate(points, 1):
payload = point.get('payload', {})
print(f"\n [{i}] id: {point.get('id')}")
print(f" payload keys: {list(payload.keys())}")
if 'metadata' in payload:
print(f" metadata: {payload['metadata']}")
else:
print(f"\n❌ 课程知识库 Collection 不存在!")
print(f" 可用的 Collections: {collection_names}")
if __name__ == "__main__":
asyncio.run(check_course_kb())