82 lines
2.9 KiB
Python
82 lines
2.9 KiB
Python
"""
|
|
检查 Qdrant 中的集合和数据
|
|
"""
|
|
import asyncio
|
|
from app.core.qdrant_client import get_qdrant_client
|
|
|
|
|
|
async def check_qdrant():
|
|
client = await get_qdrant_client()
|
|
qdrant = await client.get_client()
|
|
|
|
print("=" * 60)
|
|
print("Qdrant 状态检查")
|
|
print("=" * 60)
|
|
|
|
# 1. 列出所有集合
|
|
collections = await qdrant.get_collections()
|
|
print(f"\n现有集合 ({len(collections.collections)}):")
|
|
for col in collections.collections:
|
|
print(f" - {col.name}")
|
|
|
|
# 2. 检查特定集合
|
|
tenant_id = "szmp@ash@2026"
|
|
collection_name = client.get_collection_name(tenant_id)
|
|
print(f"\n目标集合: {collection_name}")
|
|
|
|
exists = await qdrant.collection_exists(collection_name)
|
|
print(f"集合存在: {exists}")
|
|
|
|
if exists:
|
|
# 获取集合信息
|
|
info = await qdrant.get_collection(collection_name)
|
|
print(f"\n集合信息:")
|
|
print(f" 向量数: {info.points_count}")
|
|
vectors_config = info.config.params.vectors
|
|
if isinstance(vectors_config, dict):
|
|
print(f" 向量配置: {list(vectors_config.keys())}")
|
|
else:
|
|
print(f" 向量大小: {vectors_config.size}")
|
|
|
|
# 获取一些样本点
|
|
if info.points_count > 0:
|
|
print(f"\n样本数据 (前3条):")
|
|
from qdrant_client.models import ScrollRequest
|
|
results = await qdrant.scroll(
|
|
collection_name=collection_name,
|
|
limit=3,
|
|
with_payload=True,
|
|
)
|
|
for i, point in enumerate(results[0], 1):
|
|
print(f"\n [{i}] ID: {point.id}")
|
|
payload = point.payload or {}
|
|
metadata = payload.get("metadata", {})
|
|
print(f" 元数据: {metadata}")
|
|
print(f" 内容: {payload.get('text', '')[:100]}...")
|
|
else:
|
|
print("\n集合不存在,可能原因:")
|
|
print(" 1. 还没有上传过文档")
|
|
print(" 2. 集合名称格式不匹配")
|
|
print("\n尝试查找其他相关集合...")
|
|
|
|
for col in collections.collections:
|
|
if tenant_id.replace("@", "_") in col.name or "szmp" in col.name:
|
|
print(f"\n找到相关集合: {col.name}")
|
|
info = await qdrant.get_collection(col.name)
|
|
print(f" 向量数: {info.points_count}")
|
|
|
|
if info.points_count > 0:
|
|
results = await qdrant.scroll(
|
|
collection_name=col.name,
|
|
limit=2,
|
|
with_payload=True,
|
|
)
|
|
for point in results[0]:
|
|
payload = point.payload or {}
|
|
metadata = payload.get("metadata", {})
|
|
print(f" 样本元数据: {metadata}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(check_qdrant())
|