fix: 修复RAG检索无结果问题-向量存储格式与检索格式不匹配 [AC-AISVC-50]
This commit is contained in:
parent
3f1f4cd98d
commit
6150fc0dd2
|
|
@ -442,13 +442,15 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt
|
||||||
logger.info(f"[INDEX] Total chunks: {len(all_chunks)}")
|
logger.info(f"[INDEX] Total chunks: {len(all_chunks)}")
|
||||||
|
|
||||||
qdrant = await get_qdrant_client()
|
qdrant = await get_qdrant_client()
|
||||||
await qdrant.ensure_collection_exists(tenant_id)
|
await qdrant.ensure_collection_exists(tenant_id, use_multi_vector=True)
|
||||||
|
|
||||||
|
from app.services.embedding.nomic_provider import NomicEmbeddingProvider
|
||||||
|
use_multi_vector = isinstance(embedding_provider, NomicEmbeddingProvider)
|
||||||
|
logger.info(f"[INDEX] Using multi-vector format: {use_multi_vector}")
|
||||||
|
|
||||||
points = []
|
points = []
|
||||||
total_chunks = len(all_chunks)
|
total_chunks = len(all_chunks)
|
||||||
for i, chunk in enumerate(all_chunks):
|
for i, chunk in enumerate(all_chunks):
|
||||||
embedding = await embedding_provider.embed(chunk.text)
|
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"text": chunk.text,
|
"text": chunk.text,
|
||||||
"source": doc_id,
|
"source": doc_id,
|
||||||
|
|
@ -461,6 +463,19 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt
|
||||||
if chunk.source:
|
if chunk.source:
|
||||||
payload["filename"] = chunk.source
|
payload["filename"] = chunk.source
|
||||||
|
|
||||||
|
if use_multi_vector:
|
||||||
|
embedding_result = await embedding_provider.embed_document(chunk.text)
|
||||||
|
points.append({
|
||||||
|
"id": str(uuid.uuid4()),
|
||||||
|
"vector": {
|
||||||
|
"full": embedding_result.embedding_full,
|
||||||
|
"dim_256": embedding_result.embedding_256,
|
||||||
|
"dim_512": embedding_result.embedding_512,
|
||||||
|
},
|
||||||
|
"payload": payload,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
embedding = await embedding_provider.embed(chunk.text)
|
||||||
points.append(
|
points.append(
|
||||||
PointStruct(
|
PointStruct(
|
||||||
id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
|
|
@ -478,6 +493,9 @@ async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: byt
|
||||||
|
|
||||||
if points:
|
if points:
|
||||||
logger.info(f"[INDEX] Upserting {len(points)} vectors to Qdrant...")
|
logger.info(f"[INDEX] Upserting {len(points)} vectors to Qdrant...")
|
||||||
|
if use_multi_vector:
|
||||||
|
await qdrant.upsert_multi_vector(tenant_id, points)
|
||||||
|
else:
|
||||||
await qdrant.upsert_vectors(tenant_id, points)
|
await qdrant.upsert_vectors(tenant_id, points)
|
||||||
|
|
||||||
await kb_service.update_job_status(
|
await kb_service.update_job_status(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue