116 lines
3.1 KiB
Python
116 lines
3.1 KiB
Python
|
|
"""
|
||
|
|
Base LLM client interface.
|
||
|
|
[AC-AISVC-02, AC-AISVC-06] Abstract interface for LLM providers.
|
||
|
|
|
||
|
|
Design reference: design.md Section 8.1 - LLMClient interface
|
||
|
|
- generate(prompt, params) -> text
|
||
|
|
- stream_generate(prompt, params) -> iterator[delta]
|
||
|
|
"""
|
||
|
|
|
||
|
|
from abc import ABC, abstractmethod
|
||
|
|
from dataclasses import dataclass, field
|
||
|
|
from typing import Any, AsyncGenerator
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class LLMConfig:
|
||
|
|
"""
|
||
|
|
Configuration for LLM client.
|
||
|
|
[AC-AISVC-02] Supports configurable model parameters.
|
||
|
|
"""
|
||
|
|
model: str = "gpt-4o-mini"
|
||
|
|
max_tokens: int = 2048
|
||
|
|
temperature: float = 0.7
|
||
|
|
top_p: float = 1.0
|
||
|
|
timeout_seconds: int = 30
|
||
|
|
max_retries: int = 3
|
||
|
|
extra_params: dict[str, Any] = field(default_factory=dict)
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class LLMResponse:
|
||
|
|
"""
|
||
|
|
Response from LLM generation.
|
||
|
|
[AC-AISVC-02] Contains generated content and metadata.
|
||
|
|
"""
|
||
|
|
content: str
|
||
|
|
model: str
|
||
|
|
usage: dict[str, int] = field(default_factory=dict)
|
||
|
|
finish_reason: str = "stop"
|
||
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class LLMStreamChunk:
|
||
|
|
"""
|
||
|
|
Streaming chunk from LLM.
|
||
|
|
[AC-AISVC-06, AC-AISVC-07] Incremental output for SSE streaming.
|
||
|
|
"""
|
||
|
|
delta: str
|
||
|
|
model: str
|
||
|
|
finish_reason: str | None = None
|
||
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||
|
|
|
||
|
|
|
||
|
|
class LLMClient(ABC):
|
||
|
|
"""
|
||
|
|
Abstract base class for LLM clients.
|
||
|
|
[AC-AISVC-02, AC-AISVC-06] Provides unified interface for different LLM providers.
|
||
|
|
|
||
|
|
Design reference: design.md Section 8.2 - Plugin points
|
||
|
|
- OpenAICompatibleClient / LocalModelClient can be swapped
|
||
|
|
"""
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
async def generate(
|
||
|
|
self,
|
||
|
|
messages: list[dict[str, str]],
|
||
|
|
config: LLMConfig | None = None,
|
||
|
|
**kwargs: Any,
|
||
|
|
) -> LLMResponse:
|
||
|
|
"""
|
||
|
|
Generate a non-streaming response.
|
||
|
|
[AC-AISVC-02] Returns complete response for ChatResponse.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
messages: List of chat messages with 'role' and 'content'.
|
||
|
|
config: Optional LLM configuration overrides.
|
||
|
|
**kwargs: Additional provider-specific parameters.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
LLMResponse with generated content and metadata.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
LLMException: If generation fails.
|
||
|
|
"""
|
||
|
|
pass
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
async def stream_generate(
|
||
|
|
self,
|
||
|
|
messages: list[dict[str, str]],
|
||
|
|
config: LLMConfig | None = None,
|
||
|
|
**kwargs: Any,
|
||
|
|
) -> AsyncGenerator[LLMStreamChunk, None]:
|
||
|
|
"""
|
||
|
|
Generate a streaming response.
|
||
|
|
[AC-AISVC-06, AC-AISVC-07] Yields incremental chunks for SSE.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
messages: List of chat messages with 'role' and 'content'.
|
||
|
|
config: Optional LLM configuration overrides.
|
||
|
|
**kwargs: Additional provider-specific parameters.
|
||
|
|
|
||
|
|
Yields:
|
||
|
|
LLMStreamChunk with incremental content.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
LLMException: If generation fails.
|
||
|
|
"""
|
||
|
|
pass
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
async def close(self) -> None:
|
||
|
|
"""Close the client and release resources."""
|
||
|
|
pass
|