fix: 修复RAG检索无结果问题-向量存储格式与检索格式不匹配 [AC-AISVC-50]

This commit is contained in:
MerCry 2026-02-26 12:39:42 +08:00
parent 3f1f4cd98d
commit 6150fc0dd2
1 changed files with 28 additions and 10 deletions

View File

@ -442,13 +442,15 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt
logger.info(f"[INDEX] Total chunks: {len(all_chunks)}")
qdrant = await get_qdrant_client()
await qdrant.ensure_collection_exists(tenant_id)
await qdrant.ensure_collection_exists(tenant_id, use_multi_vector=True)
from app.services.embedding.nomic_provider import NomicEmbeddingProvider
use_multi_vector = isinstance(embedding_provider, NomicEmbeddingProvider)
logger.info(f"[INDEX] Using multi-vector format: {use_multi_vector}")
points = []
total_chunks = len(all_chunks)
for i, chunk in enumerate(all_chunks):
embedding = await embedding_provider.embed(chunk.text)
payload = {
"text": chunk.text,
"source": doc_id,
@ -461,13 +463,26 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt
if chunk.source:
payload["filename"] = chunk.source
points.append(
PointStruct(
id=str(uuid.uuid4()),
vector=embedding,
payload=payload,
if use_multi_vector:
embedding_result = await embedding_provider.embed_document(chunk.text)
points.append({
"id": str(uuid.uuid4()),
"vector": {
"full": embedding_result.embedding_full,
"dim_256": embedding_result.embedding_256,
"dim_512": embedding_result.embedding_512,
},
"payload": payload,
})
else:
embedding = await embedding_provider.embed(chunk.text)
points.append(
PointStruct(
id=str(uuid.uuid4()),
vector=embedding,
payload=payload,
)
)
)
progress = 20 + int((i + 1) / total_chunks * 70)
if i % 10 == 0 or i == total_chunks - 1:
@ -478,7 +493,10 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt
if points:
logger.info(f"[INDEX] Upserting {len(points)} vectors to Qdrant...")
await qdrant.upsert_vectors(tenant_id, points)
if use_multi_vector:
await qdrant.upsert_multi_vector(tenant_id, points)
else:
await qdrant.upsert_vectors(tenant_id, points)
await kb_service.update_job_status(
tenant_id, job_id, IndexJobStatus.COMPLETED.value, progress=100