69 lines
2.0 KiB
Python
69 lines
2.0 KiB
Python
|
|
"""
|
||
|
|
检查 Qdrant 中数据的 metadata 存储结构
|
||
|
|
"""
|
||
|
|
|
||
|
|
import asyncio
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||
|
|
|
||
|
|
from app.core.config import get_settings
|
||
|
|
from app.core.qdrant_client import QdrantClient
|
||
|
|
|
||
|
|
|
||
|
|
async def check_metadata_structure():
|
||
|
|
"""检查 Qdrant 中数据的 metadata 存储结构"""
|
||
|
|
settings = get_settings()
|
||
|
|
client = QdrantClient()
|
||
|
|
qdrant = await client.get_client()
|
||
|
|
|
||
|
|
tenant_id = "szmp@ash@2026"
|
||
|
|
kb_id = "75c465fe-277d-455d-a30b-4b168adcc03b"
|
||
|
|
|
||
|
|
collection_name = client.get_kb_collection_name(tenant_id, kb_id)
|
||
|
|
|
||
|
|
print(f"\n{'='*80}")
|
||
|
|
print(f"检查 Qdrant 数据结构")
|
||
|
|
print(f"{'='*80}")
|
||
|
|
print(f"Collection: {collection_name}")
|
||
|
|
|
||
|
|
points = await qdrant.scroll(
|
||
|
|
collection_name=collection_name,
|
||
|
|
limit=3,
|
||
|
|
with_vectors=False,
|
||
|
|
)
|
||
|
|
|
||
|
|
print(f"\n找到 {len(points[0])} 条数据:")
|
||
|
|
|
||
|
|
for i, point in enumerate(points[0], 1):
|
||
|
|
print(f"\n--- Point {i} ---")
|
||
|
|
if hasattr(point, 'payload'):
|
||
|
|
payload = point.payload
|
||
|
|
point_id = point.id
|
||
|
|
else:
|
||
|
|
payload = point.get('payload', {})
|
||
|
|
point_id = point.get('id', 'unknown')
|
||
|
|
|
||
|
|
print(f"ID: {point_id}")
|
||
|
|
print(f"Payload keys: {list(payload.keys())}")
|
||
|
|
|
||
|
|
# 打印完整的 payload 结构
|
||
|
|
for key, value in payload.items():
|
||
|
|
if key == 'text':
|
||
|
|
print(f" {key}: {value[:50]}..." if len(str(value)) > 50 else f" {key}: {value}")
|
||
|
|
elif key == 'vector':
|
||
|
|
print(f" {key}: [向量数据]")
|
||
|
|
else:
|
||
|
|
print(f" {key}: {value}")
|
||
|
|
|
||
|
|
# 检查 metadata 字段
|
||
|
|
if 'metadata' in payload:
|
||
|
|
print(f"\n metadata 字段内容:")
|
||
|
|
for mk, mv in payload['metadata'].items():
|
||
|
|
print(f" {mk}: {mv}")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
asyncio.run(check_metadata_structure())
|