2026-02-25 15:10:12 +00:00
|
|
|
|
"""
|
|
|
|
|
|
Nomic embedding provider with task prefixes and Matryoshka support.
|
|
|
|
|
|
Implements RAG optimization spec:
|
|
|
|
|
|
- Task prefixes: search_document: / search_query:
|
|
|
|
|
|
- Matryoshka dimension truncation: 256/512/768 dimensions
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
|
import time
|
|
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
from enum import Enum
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
|
|
|
|
|
import httpx
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
from app.services.embedding.base import (
|
|
|
|
|
|
EmbeddingConfig,
|
|
|
|
|
|
EmbeddingException,
|
|
|
|
|
|
EmbeddingProvider,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EmbeddingTask(str, Enum):
|
|
|
|
|
|
"""Task type for nomic-embed-text v1.5 model."""
|
|
|
|
|
|
DOCUMENT = "search_document"
|
|
|
|
|
|
QUERY = "search_query"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
|
class NomicEmbeddingResult:
|
|
|
|
|
|
"""Result from Nomic embedding with multiple dimensions."""
|
|
|
|
|
|
embedding_full: list[float]
|
|
|
|
|
|
embedding_256: list[float]
|
|
|
|
|
|
embedding_512: list[float]
|
|
|
|
|
|
dimension: int
|
|
|
|
|
|
model: str
|
|
|
|
|
|
task: EmbeddingTask
|
|
|
|
|
|
latency_ms: float = 0.0
|
|
|
|
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class NomicEmbeddingProvider(EmbeddingProvider):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Nomic-embed-text v1.5 embedding provider with task prefixes.
|
|
|
|
|
|
|
|
|
|
|
|
Key features:
|
|
|
|
|
|
- Task prefixes: search_document: for documents, search_query: for queries
|
|
|
|
|
|
- Matryoshka dimension truncation: 256/512/768 dimensions
|
|
|
|
|
|
- Automatic normalization after truncation
|
|
|
|
|
|
|
|
|
|
|
|
Reference: rag-optimization/spec.md Section 2.1, 2.3
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
PROVIDER_NAME = "nomic"
|
|
|
|
|
|
DOCUMENT_PREFIX = "search_document:"
|
|
|
|
|
|
QUERY_PREFIX = "search_query:"
|
|
|
|
|
|
FULL_DIMENSION = 768
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
|
self,
|
|
|
|
|
|
base_url: str = "http://localhost:11434",
|
|
|
|
|
|
model: str = "nomic-embed-text",
|
|
|
|
|
|
dimension: int = 768,
|
|
|
|
|
|
timeout_seconds: int = 60,
|
|
|
|
|
|
enable_matryoshka: bool = True,
|
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
|
):
|
|
|
|
|
|
self._base_url = base_url.rstrip("/")
|
|
|
|
|
|
self._model = model
|
|
|
|
|
|
self._dimension = dimension
|
|
|
|
|
|
self._timeout = timeout_seconds
|
|
|
|
|
|
self._enable_matryoshka = enable_matryoshka
|
|
|
|
|
|
self._client: httpx.AsyncClient | None = None
|
|
|
|
|
|
self._extra_config = kwargs
|
|
|
|
|
|
|
|
|
|
|
|
async def _get_client(self) -> httpx.AsyncClient:
|
|
|
|
|
|
if self._client is None:
|
|
|
|
|
|
self._client = httpx.AsyncClient(timeout=self._timeout)
|
|
|
|
|
|
return self._client
|
|
|
|
|
|
|
|
|
|
|
|
def _add_prefix(self, text: str, task: EmbeddingTask) -> str:
|
|
|
|
|
|
"""Add task prefix to text."""
|
|
|
|
|
|
if task == EmbeddingTask.DOCUMENT:
|
|
|
|
|
|
prefix = self.DOCUMENT_PREFIX
|
|
|
|
|
|
else:
|
|
|
|
|
|
prefix = self.QUERY_PREFIX
|
|
|
|
|
|
|
|
|
|
|
|
if text.startswith(prefix):
|
|
|
|
|
|
return text
|
|
|
|
|
|
return f"{prefix}{text}"
|
|
|
|
|
|
|
|
|
|
|
|
def _truncate_and_normalize(self, embedding: list[float], target_dim: int) -> list[float]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Truncate embedding to target dimension and normalize.
|
|
|
|
|
|
Matryoshka representation learning allows dimension truncation.
|
|
|
|
|
|
"""
|
|
|
|
|
|
truncated = embedding[:target_dim]
|
|
|
|
|
|
|
|
|
|
|
|
arr = np.array(truncated, dtype=np.float32)
|
|
|
|
|
|
norm = np.linalg.norm(arr)
|
|
|
|
|
|
if norm > 0:
|
|
|
|
|
|
arr = arr / norm
|
|
|
|
|
|
|
|
|
|
|
|
return arr.tolist()
|
|
|
|
|
|
|
|
|
|
|
|
async def embed_with_task(
|
|
|
|
|
|
self,
|
|
|
|
|
|
text: str,
|
|
|
|
|
|
task: EmbeddingTask,
|
|
|
|
|
|
) -> NomicEmbeddingResult:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Generate embedding with specified task prefix.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
text: Input text to embed
|
|
|
|
|
|
task: DOCUMENT for indexing, QUERY for retrieval
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
NomicEmbeddingResult with all dimension variants
|
|
|
|
|
|
"""
|
|
|
|
|
|
start_time = time.perf_counter()
|
|
|
|
|
|
|
|
|
|
|
|
prefixed_text = self._add_prefix(text, task)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
client = await self._get_client()
|
|
|
|
|
|
response = await client.post(
|
|
|
|
|
|
f"{self._base_url}/api/embeddings",
|
|
|
|
|
|
json={
|
|
|
|
|
|
"model": self._model,
|
|
|
|
|
|
"prompt": prefixed_text,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
data = response.json()
|
|
|
|
|
|
embedding = data.get("embedding", [])
|
|
|
|
|
|
|
|
|
|
|
|
if not embedding:
|
|
|
|
|
|
raise EmbeddingException(
|
|
|
|
|
|
"Empty embedding returned",
|
|
|
|
|
|
provider=self.PROVIDER_NAME,
|
|
|
|
|
|
details={"text_length": len(text), "task": task.value}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
|
|
|
|
|
|
|
|
|
|
embedding_256 = self._truncate_and_normalize(embedding, 256)
|
|
|
|
|
|
embedding_512 = self._truncate_and_normalize(embedding, 512)
|
2026-02-26 06:45:21 +00:00
|
|
|
|
embedding_full = self._truncate_and_normalize(embedding, len(embedding))
|
2026-02-25 15:10:12 +00:00
|
|
|
|
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
f"Generated Nomic embedding: task={task.value}, "
|
|
|
|
|
|
f"dim={len(embedding)}, latency={latency_ms:.2f}ms"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return NomicEmbeddingResult(
|
2026-02-26 06:45:21 +00:00
|
|
|
|
embedding_full=embedding_full,
|
2026-02-25 15:10:12 +00:00
|
|
|
|
embedding_256=embedding_256,
|
|
|
|
|
|
embedding_512=embedding_512,
|
|
|
|
|
|
dimension=len(embedding),
|
|
|
|
|
|
model=self._model,
|
|
|
|
|
|
task=task,
|
|
|
|
|
|
latency_ms=latency_ms,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
|
|
|
|
raise EmbeddingException(
|
|
|
|
|
|
f"Ollama API error: {e.response.status_code}",
|
|
|
|
|
|
provider=self.PROVIDER_NAME,
|
|
|
|
|
|
details={"status_code": e.response.status_code, "response": e.response.text}
|
|
|
|
|
|
)
|
|
|
|
|
|
except httpx.RequestError as e:
|
|
|
|
|
|
raise EmbeddingException(
|
|
|
|
|
|
f"Ollama connection error: {e}",
|
|
|
|
|
|
provider=self.PROVIDER_NAME,
|
|
|
|
|
|
details={"base_url": self._base_url}
|
|
|
|
|
|
)
|
|
|
|
|
|
except EmbeddingException:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise EmbeddingException(
|
|
|
|
|
|
f"Embedding generation failed: {e}",
|
|
|
|
|
|
provider=self.PROVIDER_NAME
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def embed_document(self, text: str) -> NomicEmbeddingResult:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Generate embedding for document (with search_document: prefix).
|
|
|
|
|
|
Use this when indexing documents into vector store.
|
|
|
|
|
|
"""
|
|
|
|
|
|
return await self.embed_with_task(text, EmbeddingTask.DOCUMENT)
|
|
|
|
|
|
|
|
|
|
|
|
async def embed_query(self, text: str) -> NomicEmbeddingResult:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Generate embedding for query (with search_query: prefix).
|
|
|
|
|
|
Use this when searching/retrieving documents.
|
|
|
|
|
|
"""
|
|
|
|
|
|
return await self.embed_with_task(text, EmbeddingTask.QUERY)
|
|
|
|
|
|
|
|
|
|
|
|
async def embed(self, text: str) -> list[float]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Generate embedding vector for a single text.
|
|
|
|
|
|
Default uses QUERY task for backward compatibility.
|
|
|
|
|
|
"""
|
|
|
|
|
|
result = await self.embed_query(text)
|
|
|
|
|
|
return result.embedding_full
|
|
|
|
|
|
|
|
|
|
|
|
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Generate embedding vectors for multiple texts.
|
|
|
|
|
|
Uses QUERY task by default.
|
|
|
|
|
|
"""
|
|
|
|
|
|
embeddings = []
|
|
|
|
|
|
for text in texts:
|
|
|
|
|
|
embedding = await self.embed(text)
|
|
|
|
|
|
embeddings.append(embedding)
|
|
|
|
|
|
return embeddings
|
|
|
|
|
|
|
|
|
|
|
|
async def embed_documents_batch(
|
|
|
|
|
|
self,
|
|
|
|
|
|
texts: list[str],
|
|
|
|
|
|
) -> list[NomicEmbeddingResult]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Generate embeddings for multiple documents (DOCUMENT task).
|
|
|
|
|
|
Use this when batch indexing documents.
|
|
|
|
|
|
"""
|
|
|
|
|
|
results = []
|
|
|
|
|
|
for text in texts:
|
|
|
|
|
|
result = await self.embed_document(text)
|
|
|
|
|
|
results.append(result)
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
async def embed_queries_batch(
|
|
|
|
|
|
self,
|
|
|
|
|
|
texts: list[str],
|
|
|
|
|
|
) -> list[NomicEmbeddingResult]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Generate embeddings for multiple queries (QUERY task).
|
|
|
|
|
|
Use this when batch processing queries.
|
|
|
|
|
|
"""
|
|
|
|
|
|
results = []
|
|
|
|
|
|
for text in texts:
|
|
|
|
|
|
result = await self.embed_query(text)
|
|
|
|
|
|
results.append(result)
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
def get_dimension(self) -> int:
|
|
|
|
|
|
"""Get the dimension of embedding vectors."""
|
|
|
|
|
|
return self._dimension
|
|
|
|
|
|
|
|
|
|
|
|
def get_provider_name(self) -> str:
|
|
|
|
|
|
"""Get the name of this embedding provider."""
|
|
|
|
|
|
return self.PROVIDER_NAME
|
|
|
|
|
|
|
|
|
|
|
|
def get_config_schema(self) -> dict[str, Any]:
|
|
|
|
|
|
"""Get the configuration schema for Nomic provider."""
|
|
|
|
|
|
return {
|
|
|
|
|
|
"base_url": {
|
|
|
|
|
|
"type": "string",
|
2026-02-26 06:45:21 +00:00
|
|
|
|
"title": "API 地址",
|
2026-02-25 15:10:12 +00:00
|
|
|
|
"description": "Ollama API 地址",
|
|
|
|
|
|
"default": "http://localhost:11434",
|
|
|
|
|
|
},
|
|
|
|
|
|
"model": {
|
|
|
|
|
|
"type": "string",
|
2026-02-26 06:45:21 +00:00
|
|
|
|
"title": "模型名称",
|
2026-02-25 15:10:12 +00:00
|
|
|
|
"description": "嵌入模型名称(推荐 nomic-embed-text v1.5)",
|
|
|
|
|
|
"default": "nomic-embed-text",
|
|
|
|
|
|
},
|
|
|
|
|
|
"dimension": {
|
|
|
|
|
|
"type": "integer",
|
2026-02-26 06:45:21 +00:00
|
|
|
|
"title": "向量维度",
|
2026-02-25 15:10:12 +00:00
|
|
|
|
"description": "向量维度(支持 256/512/768)",
|
|
|
|
|
|
"default": 768,
|
|
|
|
|
|
},
|
|
|
|
|
|
"timeout_seconds": {
|
|
|
|
|
|
"type": "integer",
|
2026-02-26 06:45:21 +00:00
|
|
|
|
"title": "超时时间",
|
2026-02-25 15:10:12 +00:00
|
|
|
|
"description": "请求超时时间(秒)",
|
|
|
|
|
|
"default": 60,
|
|
|
|
|
|
},
|
|
|
|
|
|
"enable_matryoshka": {
|
|
|
|
|
|
"type": "boolean",
|
2026-02-26 06:45:21 +00:00
|
|
|
|
"title": "Matryoshka 截断",
|
2026-02-25 15:10:12 +00:00
|
|
|
|
"description": "启用 Matryoshka 维度截断",
|
|
|
|
|
|
"default": True,
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async def close(self) -> None:
|
|
|
|
|
|
"""Close the HTTP client."""
|
|
|
|
|
|
if self._client:
|
|
|
|
|
|
await self._client.aclose()
|
|
|
|
|
|
self._client = None
|