114 lines
4.1 KiB
Python
114 lines
4.1 KiB
Python
"""
|
|
检查 Qdrant 向量数据库状态和知识库内容
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from app.core.config import get_settings
|
|
from app.core.qdrant_client import get_qdrant_client
|
|
|
|
|
|
async def check_qdrant():
|
|
"""检查 Qdrant 状态"""
|
|
settings = get_settings()
|
|
tenant_id = "szmp@ash@2026"
|
|
|
|
print(f"Database URL: {settings.database_url}")
|
|
print(f"Qdrant URL: {settings.qdrant_url}")
|
|
print(f"Tenant ID: {tenant_id}")
|
|
print()
|
|
|
|
try:
|
|
qdrant_manager = await get_qdrant_client()
|
|
client = await qdrant_manager.get_client()
|
|
|
|
# 检查集合是否存在
|
|
collections = (await client.get_collections()).collections
|
|
collection_names = [c.name for c in collections]
|
|
print(f"Available collections: {collection_names}")
|
|
print()
|
|
|
|
# 筛选该租户的 collections
|
|
tenant_collections = [name for name in collection_names if "szmp_ash_2026" in name]
|
|
print(f"Tenant collections: {tenant_collections}")
|
|
print()
|
|
|
|
# 检查每个集合
|
|
for collection_name in tenant_collections:
|
|
print(f"\n{'='*60}")
|
|
print(f"Collection: {collection_name}")
|
|
print(f"{'='*60}")
|
|
|
|
# 获取集合信息
|
|
collection_info = await client.get_collection(collection_name)
|
|
print(f" Points count: {collection_info.points_count}")
|
|
print(f" Vectors count: {collection_info.vectors_count}")
|
|
print(f" Status: {collection_info.status}")
|
|
|
|
if collection_info.points_count == 0:
|
|
print(" ⚠️ Collection is empty!")
|
|
continue
|
|
|
|
# 滚动获取一些数据
|
|
print(f"\n 前 3 条数据:")
|
|
points, next_page = await client.scroll(
|
|
collection_name=collection_name,
|
|
limit=3,
|
|
with_payload=True,
|
|
with_vectors=False,
|
|
)
|
|
|
|
for i, point in enumerate(points, 1):
|
|
payload = point.payload or {}
|
|
text = payload.get("text", "")[:100] + "..." if payload.get("text") else "N/A"
|
|
kb_id = payload.get("kb_id", "N/A")
|
|
metadata = payload.get("metadata", {})
|
|
print(f"\n Point {i}:")
|
|
print(f" ID: {point.id}")
|
|
print(f" KB ID: {kb_id}")
|
|
print(f" Text: {text}")
|
|
print(f" Metadata: {metadata}")
|
|
|
|
# 尝试向量搜索
|
|
print(f"\n\n{'='*60}")
|
|
print(f"尝试向量搜索 (query='课程'):")
|
|
print(f"{'='*60}")
|
|
|
|
from app.services.embedding.factory import get_embedding_provider
|
|
|
|
embedding_provider = await get_embedding_provider()
|
|
query_vector = await embedding_provider.embed("课程")
|
|
print(f"Query vector dimension: {len(query_vector)}")
|
|
|
|
for collection_name in tenant_collections:
|
|
print(f"\n搜索 collection: {collection_name}")
|
|
try:
|
|
search_results = await client.query_points(
|
|
collection_name=collection_name,
|
|
query=query_vector,
|
|
using="full", # 使用 full 向量
|
|
limit=3,
|
|
with_payload=True,
|
|
)
|
|
|
|
print(f" Search results: {len(search_results.points)}")
|
|
for i, result in enumerate(search_results.points, 1):
|
|
payload = result.payload or {}
|
|
text = payload.get("text", "")[:80] + "..." if payload.get("text") else "N/A"
|
|
print(f" {i}. [score={result.score:.4f}] {text}")
|
|
except Exception as e:
|
|
print(f" ❌ Search error: {e}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(check_qdrant())
|