79 lines
2.1 KiB
Python
79 lines
2.1 KiB
Python
|
|
"""
|
||
|
|
检查 Qdrant 中是否有 grade=五年级 的数据
|
||
|
|
"""
|
||
|
|
|
||
|
|
import asyncio
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||
|
|
|
||
|
|
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||
|
|
|
||
|
|
from app.core.config import get_settings
|
||
|
|
from app.core.qdrant_client import QdrantClient
|
||
|
|
|
||
|
|
|
||
|
|
async def check_grade_data():
|
||
|
|
"""检查 Qdrant 中是否有 grade=五年级 的数据"""
|
||
|
|
settings = get_settings()
|
||
|
|
client = QdrantClient()
|
||
|
|
qdrant = await client.get_client()
|
||
|
|
|
||
|
|
tenant_id = "szmp@ash@2026"
|
||
|
|
kb_id = "75c465fe-277d-455d-a30b-4b168adcc03b"
|
||
|
|
|
||
|
|
collection_name = client.get_kb_collection_name(tenant_id, kb_id)
|
||
|
|
|
||
|
|
print(f"\n{'='*80}")
|
||
|
|
print(f"检查 Qdrant 中 grade 字段的分布")
|
||
|
|
print(f"{'='*80}")
|
||
|
|
print(f"Collection: {collection_name}")
|
||
|
|
|
||
|
|
# 获取所有数据
|
||
|
|
all_points = await qdrant.scroll(
|
||
|
|
collection_name=collection_name,
|
||
|
|
limit=100,
|
||
|
|
with_vectors=False,
|
||
|
|
)
|
||
|
|
|
||
|
|
print(f"\n总数据量: {len(all_points[0])} 条")
|
||
|
|
|
||
|
|
# 统计 grade 分布
|
||
|
|
grade_count = {}
|
||
|
|
for point in all_points[0]:
|
||
|
|
metadata = point.payload.get('metadata', {})
|
||
|
|
grade = metadata.get('grade', '无')
|
||
|
|
grade_count[grade] = grade_count.get(grade, 0) + 1
|
||
|
|
|
||
|
|
print(f"\ngrade 字段分布:")
|
||
|
|
for grade, count in sorted(grade_count.items()):
|
||
|
|
print(f" {grade}: {count} 条")
|
||
|
|
|
||
|
|
# 检查是否有 五年级 的数据
|
||
|
|
print(f"\n--- 检查 grade=五年级 的数据 ---")
|
||
|
|
qdrant_filter = Filter(
|
||
|
|
must=[
|
||
|
|
FieldCondition(
|
||
|
|
key="metadata.grade",
|
||
|
|
match=MatchValue(value="五年级"),
|
||
|
|
)
|
||
|
|
]
|
||
|
|
)
|
||
|
|
|
||
|
|
results = await qdrant.scroll(
|
||
|
|
collection_name=collection_name,
|
||
|
|
limit=10,
|
||
|
|
with_vectors=False,
|
||
|
|
scroll_filter=qdrant_filter,
|
||
|
|
)
|
||
|
|
|
||
|
|
print(f"grade=五年级 的数据: {len(results[0])} 条")
|
||
|
|
for p in results[0]:
|
||
|
|
print(f" text: {p.payload.get('text', '')[:80]}...")
|
||
|
|
print(f" metadata: {p.payload.get('metadata', {})}")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
asyncio.run(check_grade_data())
|