""" 检查 Qdrant 中是否有 grade=五年级 的数据 """ import asyncio import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from qdrant_client.models import FieldCondition, Filter, MatchValue from app.core.config import get_settings from app.core.qdrant_client import QdrantClient async def check_grade_data(): """检查 Qdrant 中是否有 grade=五年级 的数据""" settings = get_settings() client = QdrantClient() qdrant = await client.get_client() tenant_id = "szmp@ash@2026" kb_id = "75c465fe-277d-455d-a30b-4b168adcc03b" collection_name = client.get_kb_collection_name(tenant_id, kb_id) print(f"\n{'='*80}") print(f"检查 Qdrant 中 grade 字段的分布") print(f"{'='*80}") print(f"Collection: {collection_name}") # 获取所有数据 all_points = await qdrant.scroll( collection_name=collection_name, limit=100, with_vectors=False, ) print(f"\n总数据量: {len(all_points[0])} 条") # 统计 grade 分布 grade_count = {} for point in all_points[0]: metadata = point.payload.get('metadata', {}) grade = metadata.get('grade', '无') grade_count[grade] = grade_count.get(grade, 0) + 1 print(f"\ngrade 字段分布:") for grade, count in sorted(grade_count.items()): print(f" {grade}: {count} 条") # 检查是否有 五年级 的数据 print(f"\n--- 检查 grade=五年级 的数据 ---") qdrant_filter = Filter( must=[ FieldCondition( key="metadata.grade", match=MatchValue(value="五年级"), ) ] ) results = await qdrant.scroll( collection_name=collection_name, limit=10, with_vectors=False, scroll_filter=qdrant_filter, ) print(f"grade=五年级 的数据: {len(results[0])} 条") for p in results[0]: print(f" text: {p.payload.get('text', '')[:80]}...") print(f" metadata: {p.payload.get('metadata', {})}") if __name__ == "__main__": asyncio.run(check_grade_data())