fix: 修复RAG检索无结果问题-向量存储格式与检索格式不匹配 [AC-AISVC-50]
This commit is contained in:
parent
3f1f4cd98d
commit
6150fc0dd2
|
|
@ -442,13 +442,15 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt
|
|||
logger.info(f"[INDEX] Total chunks: {len(all_chunks)}")
|
||||
|
||||
qdrant = await get_qdrant_client()
|
||||
await qdrant.ensure_collection_exists(tenant_id)
|
||||
await qdrant.ensure_collection_exists(tenant_id, use_multi_vector=True)
|
||||
|
||||
from app.services.embedding.nomic_provider import NomicEmbeddingProvider
|
||||
use_multi_vector = isinstance(embedding_provider, NomicEmbeddingProvider)
|
||||
logger.info(f"[INDEX] Using multi-vector format: {use_multi_vector}")
|
||||
|
||||
points = []
|
||||
total_chunks = len(all_chunks)
|
||||
for i, chunk in enumerate(all_chunks):
|
||||
embedding = await embedding_provider.embed(chunk.text)
|
||||
|
||||
payload = {
|
||||
"text": chunk.text,
|
||||
"source": doc_id,
|
||||
|
|
@ -461,13 +463,26 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt
|
|||
if chunk.source:
|
||||
payload["filename"] = chunk.source
|
||||
|
||||
points.append(
|
||||
PointStruct(
|
||||
id=str(uuid.uuid4()),
|
||||
vector=embedding,
|
||||
payload=payload,
|
||||
if use_multi_vector:
|
||||
embedding_result = await embedding_provider.embed_document(chunk.text)
|
||||
points.append({
|
||||
"id": str(uuid.uuid4()),
|
||||
"vector": {
|
||||
"full": embedding_result.embedding_full,
|
||||
"dim_256": embedding_result.embedding_256,
|
||||
"dim_512": embedding_result.embedding_512,
|
||||
},
|
||||
"payload": payload,
|
||||
})
|
||||
else:
|
||||
embedding = await embedding_provider.embed(chunk.text)
|
||||
points.append(
|
||||
PointStruct(
|
||||
id=str(uuid.uuid4()),
|
||||
vector=embedding,
|
||||
payload=payload,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
progress = 20 + int((i + 1) / total_chunks * 70)
|
||||
if i % 10 == 0 or i == total_chunks - 1:
|
||||
|
|
@ -478,7 +493,10 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt
|
|||
|
||||
if points:
|
||||
logger.info(f"[INDEX] Upserting {len(points)} vectors to Qdrant...")
|
||||
await qdrant.upsert_vectors(tenant_id, points)
|
||||
if use_multi_vector:
|
||||
await qdrant.upsert_multi_vector(tenant_id, points)
|
||||
else:
|
||||
await qdrant.upsert_vectors(tenant_id, points)
|
||||
|
||||
await kb_service.update_job_status(
|
||||
tenant_id, job_id, IndexJobStatus.COMPLETED.value, progress=100
|
||||
|
|
|
|||
Loading…
Reference in New Issue