310 lines
9.1 KiB
Python
310 lines
9.1 KiB
Python
"""
|
|
RAG smoke tests for AI Service.
|
|
[AC-AISVC-17, AC-AISVC-18] Tests for retrieval-augmented generation scenarios.
|
|
"""
|
|
|
|
import json
|
|
import pytest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
from fastapi.testclient import TestClient
|
|
|
|
from app.main import app
|
|
from app.models import ChatRequest, ChannelType
|
|
from app.services.orchestrator import OrchestratorService
|
|
from app.services.retrieval.base import RetrievalContext, RetrievalHit, RetrievalResult
|
|
|
|
|
|
class TestRAGSmokeScenarios:
|
|
"""
|
|
[AC-AISVC-17, AC-AISVC-18] Smoke tests for RAG scenarios.
|
|
"""
|
|
|
|
@pytest.fixture
|
|
def client(self):
|
|
return TestClient(app)
|
|
|
|
@pytest.fixture
|
|
def valid_headers(self):
|
|
return {"X-Tenant-Id": "tenant_rag_test"}
|
|
|
|
@pytest.fixture
|
|
def valid_body(self):
|
|
return {
|
|
"sessionId": "rag_session",
|
|
"currentMessage": "What is the product price?",
|
|
"channelType": "wechat",
|
|
}
|
|
|
|
def test_rag_retrieval_hit_scenario(self, client, valid_headers, valid_body):
|
|
"""
|
|
[AC-AISVC-17] Test RAG scenario when retrieval has good hits.
|
|
Expected behavior:
|
|
- High confidence score
|
|
- shouldTransfer = False
|
|
- Response includes relevant information
|
|
"""
|
|
response = client.post(
|
|
"/ai/chat",
|
|
json=valid_body,
|
|
headers=valid_headers,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert "reply" in data
|
|
assert "confidence" in data
|
|
assert "shouldTransfer" in data
|
|
assert isinstance(data["confidence"], (int, float))
|
|
assert 0 <= data["confidence"] <= 1
|
|
|
|
def test_rag_retrieval_miss_scenario(self, client, valid_headers):
|
|
"""
|
|
[AC-AISVC-17, AC-AISVC-18] Test RAG scenario when retrieval has no hits.
|
|
Expected behavior:
|
|
- Lower confidence score
|
|
- may suggest transfer to human agent
|
|
- Graceful fallback response
|
|
"""
|
|
body = {
|
|
"sessionId": "rag_session_miss",
|
|
"currentMessage": "Xyzzy plugh unknown query",
|
|
"channelType": "wechat",
|
|
}
|
|
|
|
response = client.post(
|
|
"/ai/chat",
|
|
json=body,
|
|
headers=valid_headers,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert "reply" in data
|
|
assert "confidence" in data
|
|
assert "shouldTransfer" in data
|
|
|
|
def test_rag_sse_with_retrieval(self, client, valid_headers, valid_body):
|
|
"""
|
|
[AC-AISVC-17] Test RAG with SSE streaming.
|
|
"""
|
|
headers = {**valid_headers, "Accept": "text/event-stream"}
|
|
|
|
response = client.post(
|
|
"/ai/chat",
|
|
json=valid_body,
|
|
headers=headers,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
content = response.text
|
|
|
|
assert "event:final" in content or "event: final" in content
|
|
|
|
lines = content.split("\n")
|
|
for line in lines:
|
|
if line.startswith("data:") and "confidence" in line:
|
|
data_str = line[5:].strip()
|
|
try:
|
|
data = json.loads(data_str)
|
|
assert "confidence" in data
|
|
assert 0 <= data["confidence"] <= 1
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
|
|
class TestRAGConfidenceScoring:
|
|
"""
|
|
[AC-AISVC-17, AC-AISVC-18] Tests for confidence scoring based on retrieval quality.
|
|
"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_high_confidence_with_good_retrieval(self):
|
|
"""
|
|
[AC-AISVC-17] High retrieval score should result in high confidence.
|
|
Note: Without LLM client, fallback mode is used with lower confidence.
|
|
"""
|
|
orchestrator = OrchestratorService()
|
|
request = ChatRequest(
|
|
session_id="test",
|
|
current_message="What is the price?",
|
|
channel_type=ChannelType.WECHAT,
|
|
)
|
|
|
|
response = await orchestrator.generate("tenant", request)
|
|
|
|
assert response.confidence >= 0
|
|
assert response.confidence <= 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_low_confidence_with_poor_retrieval(self):
|
|
"""
|
|
[AC-AISVC-17, AC-AISVC-18] Poor retrieval should result in lower confidence.
|
|
"""
|
|
orchestrator = OrchestratorService()
|
|
request = ChatRequest(
|
|
session_id="test",
|
|
current_message="Unknown topic xyzzy",
|
|
channel_type=ChannelType.WECHAT,
|
|
)
|
|
|
|
response = await orchestrator.generate("tenant", request)
|
|
|
|
assert response.confidence >= 0
|
|
assert response.confidence <= 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transfer_suggestion_on_very_low_confidence(self):
|
|
"""
|
|
[AC-AISVC-18] Very low confidence should suggest transfer to human.
|
|
"""
|
|
orchestrator = OrchestratorService()
|
|
request = ChatRequest(
|
|
session_id="test",
|
|
current_message="Complex query requiring human expertise",
|
|
channel_type=ChannelType.WECHAT,
|
|
)
|
|
|
|
response = await orchestrator.generate("tenant", request)
|
|
|
|
assert response.should_transfer is not None
|
|
|
|
|
|
class TestRAGRetrievalDiagnostics:
|
|
"""
|
|
[AC-AISVC-17] Tests for retrieval diagnostics.
|
|
"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_retrieval_result_statistics(self):
|
|
"""
|
|
[AC-AISVC-17] Retrieval result should provide useful diagnostics.
|
|
"""
|
|
result = RetrievalResult(
|
|
hits=[
|
|
RetrievalHit(text="Doc 1", score=0.9, source="kb"),
|
|
RetrievalHit(text="Doc 2", score=0.7, source="kb"),
|
|
]
|
|
)
|
|
|
|
assert result.hit_count == 2
|
|
assert result.max_score == 0.9
|
|
assert result.is_empty is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_empty_retrieval_result(self):
|
|
"""
|
|
[AC-AISVC-17] Empty retrieval result should be detectable.
|
|
"""
|
|
result = RetrievalResult(hits=[])
|
|
|
|
assert result.is_empty is True
|
|
assert result.hit_count == 0
|
|
assert result.max_score == 0.0
|
|
|
|
|
|
class TestRAGFallbackBehavior:
|
|
"""
|
|
[AC-AISVC-18] Tests for fallback behavior when retrieval fails.
|
|
"""
|
|
|
|
@pytest.fixture
|
|
def client(self):
|
|
return TestClient(app)
|
|
|
|
def test_graceful_fallback_on_retrieval_error(self, client):
|
|
"""
|
|
[AC-AISVC-18] Should gracefully handle retrieval errors.
|
|
"""
|
|
response = client.post(
|
|
"/ai/chat",
|
|
json={
|
|
"sessionId": "fallback_session",
|
|
"currentMessage": "Test fallback",
|
|
"channelType": "wechat",
|
|
},
|
|
headers={"X-Tenant-Id": "tenant_fallback"},
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert "reply" in data
|
|
|
|
def test_fallback_response_quality(self, client):
|
|
"""
|
|
[AC-AISVC-18] Fallback response should still be helpful.
|
|
"""
|
|
response = client.post(
|
|
"/ai/chat",
|
|
json={
|
|
"sessionId": "fallback_quality",
|
|
"currentMessage": "I need help with my order",
|
|
"channelType": "wechat",
|
|
},
|
|
headers={"X-Tenant-Id": "tenant_fallback_quality"},
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert len(data["reply"]) > 0
|
|
assert data["confidence"] >= 0
|
|
|
|
|
|
class TestRAGWithHistory:
|
|
"""
|
|
Tests for RAG with conversation history.
|
|
"""
|
|
|
|
@pytest.fixture
|
|
def client(self):
|
|
return TestClient(app)
|
|
|
|
def test_rag_with_conversation_history(self, client):
|
|
"""
|
|
[AC-AISVC-14] RAG should consider conversation history.
|
|
"""
|
|
response = client.post(
|
|
"/ai/chat",
|
|
json={
|
|
"sessionId": "history_session",
|
|
"currentMessage": "How much does it cost?",
|
|
"channelType": "wechat",
|
|
"history": [
|
|
{"role": "user", "content": "I'm interested in your product"},
|
|
{"role": "assistant", "content": "Great! Our product has many features."},
|
|
],
|
|
},
|
|
headers={"X-Tenant-Id": "tenant_history"},
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert "reply" in data
|
|
|
|
def test_rag_with_long_history(self, client):
|
|
"""
|
|
[AC-AISVC-14, AC-AISVC-15] RAG should handle long conversation history.
|
|
"""
|
|
long_history = [
|
|
{"role": "user" if i % 2 == 0 else "assistant", "content": f"Message {i}"}
|
|
for i in range(20)
|
|
]
|
|
|
|
response = client.post(
|
|
"/ai/chat",
|
|
json={
|
|
"sessionId": "long_history_session",
|
|
"currentMessage": "Summary please",
|
|
"channelType": "wechat",
|
|
"history": long_history,
|
|
},
|
|
headers={"X-Tenant-Id": "tenant_long_history"},
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert "reply" in data
|