ai-robot-core/ai-service/scripts/check_grade_data.py

79 lines
2.1 KiB
Python

"""
检查 Qdrant 中是否有 grade=五年级 的数据
"""
import asyncio
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from qdrant_client.models import FieldCondition, Filter, MatchValue
from app.core.config import get_settings
from app.core.qdrant_client import QdrantClient
async def check_grade_data():
"""检查 Qdrant 中是否有 grade=五年级 的数据"""
settings = get_settings()
client = QdrantClient()
qdrant = await client.get_client()
tenant_id = "szmp@ash@2026"
kb_id = "75c465fe-277d-455d-a30b-4b168adcc03b"
collection_name = client.get_kb_collection_name(tenant_id, kb_id)
print(f"\n{'='*80}")
print(f"检查 Qdrant 中 grade 字段的分布")
print(f"{'='*80}")
print(f"Collection: {collection_name}")
# 获取所有数据
all_points = await qdrant.scroll(
collection_name=collection_name,
limit=100,
with_vectors=False,
)
print(f"\n总数据量: {len(all_points[0])}")
# 统计 grade 分布
grade_count = {}
for point in all_points[0]:
metadata = point.payload.get('metadata', {})
grade = metadata.get('grade', '')
grade_count[grade] = grade_count.get(grade, 0) + 1
print(f"\ngrade 字段分布:")
for grade, count in sorted(grade_count.items()):
print(f" {grade}: {count}")
# 检查是否有 五年级 的数据
print(f"\n--- 检查 grade=五年级 的数据 ---")
qdrant_filter = Filter(
must=[
FieldCondition(
key="metadata.grade",
match=MatchValue(value="五年级"),
)
]
)
results = await qdrant.scroll(
collection_name=collection_name,
limit=10,
with_vectors=False,
scroll_filter=qdrant_filter,
)
print(f"grade=五年级 的数据: {len(results[0])}")
for p in results[0]:
print(f" text: {p.payload.get('text', '')[:80]}...")
print(f" metadata: {p.payload.get('metadata', {})}")
if __name__ == "__main__":
asyncio.run(check_grade_data())