ai-robot-core/ai-service/scripts/check_kb_content.py

89 lines
2.4 KiB
Python

"""
查看指定知识库的内容
"""
import asyncio
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from qdrant_client import AsyncQdrantClient
from app.core.config import get_settings
async def check_kb_content():
"""查看知识库内容"""
settings = get_settings()
client = AsyncQdrantClient(url=settings.qdrant_url)
tenant_id = "szmp@ash@2026"
kb_id = "8559ebc9-bfaf-4211-8fe3-ee2b22a5e29c"
collection_name = f"kb_szmp_ash_2026_8559ebc9"
print("=" * 80)
print(f"查看知识库: {kb_id}")
print(f"Collection: {collection_name}")
print("=" * 80)
try:
# 检查 collection 是否存在
exists = await client.collection_exists(collection_name)
print(f"\nCollection 存在: {exists}")
if not exists:
print("Collection 不存在!")
return
# 获取 collection 信息
info = await client.get_collection(collection_name)
print(f"\nCollection 信息:")
print(f" 向量数: {info.points_count}")
# 滚动查询所有点
print(f"\n文档内容:")
print("-" * 80)
offset = None
total = 0
while True:
result = await client.scroll(
collection_name=collection_name,
limit=10,
offset=offset,
with_payload=True,
)
points = result[0]
if not points:
break
for point in points:
total += 1
payload = point.payload or {}
text = payload.get('text', 'N/A')[:100]
metadata = payload.get('metadata', {})
filename = payload.get('filename', 'N/A')
print(f"\n [{total}] ID: {point.id}")
print(f" Filename: {filename}")
print(f" Text: {text}...")
print(f" Metadata: {metadata}")
offset = result[1]
if offset is None:
break
print(f"\n总计 {total} 条记录")
except Exception as e:
print(f"\n错误: {e}")
import traceback
traceback.print_exc()
finally:
await client.close()
if __name__ == "__main__":
asyncio.run(check_kb_content())