ai-robot-core/ai-service/scripts/check_qdrant.py

114 lines
4.1 KiB
Python
Raw Permalink Normal View History

"""
检查 Qdrant 向量数据库状态和知识库内容
"""
import asyncio
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from app.core.config import get_settings
from app.core.qdrant_client import get_qdrant_client
async def check_qdrant():
"""检查 Qdrant 状态"""
settings = get_settings()
tenant_id = "szmp@ash@2026"
print(f"Database URL: {settings.database_url}")
print(f"Qdrant URL: {settings.qdrant_url}")
print(f"Tenant ID: {tenant_id}")
print()
try:
qdrant_manager = await get_qdrant_client()
client = await qdrant_manager.get_client()
# 检查集合是否存在
collections = (await client.get_collections()).collections
collection_names = [c.name for c in collections]
print(f"Available collections: {collection_names}")
print()
# 筛选该租户的 collections
tenant_collections = [name for name in collection_names if "szmp_ash_2026" in name]
print(f"Tenant collections: {tenant_collections}")
print()
# 检查每个集合
for collection_name in tenant_collections:
print(f"\n{'='*60}")
print(f"Collection: {collection_name}")
print(f"{'='*60}")
# 获取集合信息
collection_info = await client.get_collection(collection_name)
print(f" Points count: {collection_info.points_count}")
print(f" Vectors count: {collection_info.vectors_count}")
print(f" Status: {collection_info.status}")
if collection_info.points_count == 0:
print(" ⚠️ Collection is empty!")
continue
# 滚动获取一些数据
print(f"\n 前 3 条数据:")
points, next_page = await client.scroll(
collection_name=collection_name,
limit=3,
with_payload=True,
with_vectors=False,
)
for i, point in enumerate(points, 1):
payload = point.payload or {}
text = payload.get("text", "")[:100] + "..." if payload.get("text") else "N/A"
kb_id = payload.get("kb_id", "N/A")
metadata = payload.get("metadata", {})
print(f"\n Point {i}:")
print(f" ID: {point.id}")
print(f" KB ID: {kb_id}")
print(f" Text: {text}")
print(f" Metadata: {metadata}")
# 尝试向量搜索
print(f"\n\n{'='*60}")
print(f"尝试向量搜索 (query='课程'):")
print(f"{'='*60}")
from app.services.embedding.factory import get_embedding_provider
embedding_provider = await get_embedding_provider()
query_vector = await embedding_provider.embed("课程")
print(f"Query vector dimension: {len(query_vector)}")
for collection_name in tenant_collections:
print(f"\n搜索 collection: {collection_name}")
try:
search_results = await client.query_points(
collection_name=collection_name,
query=query_vector,
using="full", # 使用 full 向量
limit=3,
with_payload=True,
)
print(f" Search results: {len(search_results.points)}")
for i, result in enumerate(search_results.points, 1):
payload = result.payload or {}
text = payload.get("text", "")[:80] + "..." if payload.get("text") else "N/A"
print(f" {i}. [score={result.score:.4f}] {text}")
except Exception as e:
print(f" ❌ Search error: {e}")
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(check_qdrant())