ai-robot-core/ai-service/tests/test_rag_smoke.py

310 lines
9.1 KiB
Python
Raw Normal View History

"""
RAG smoke tests for AI Service.
[AC-AISVC-17, AC-AISVC-18] Tests for retrieval-augmented generation scenarios.
"""
import json
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from fastapi.testclient import TestClient
from app.main import app
from app.models import ChatRequest, ChannelType
from app.services.orchestrator import OrchestratorService
from app.services.retrieval.base import RetrievalContext, RetrievalHit, RetrievalResult
class TestRAGSmokeScenarios:
"""
[AC-AISVC-17, AC-AISVC-18] Smoke tests for RAG scenarios.
"""
@pytest.fixture
def client(self):
return TestClient(app)
@pytest.fixture
def valid_headers(self):
return {"X-Tenant-Id": "tenant_rag_test"}
@pytest.fixture
def valid_body(self):
return {
"sessionId": "rag_session",
"currentMessage": "What is the product price?",
"channelType": "wechat",
}
def test_rag_retrieval_hit_scenario(self, client, valid_headers, valid_body):
"""
[AC-AISVC-17] Test RAG scenario when retrieval has good hits.
Expected behavior:
- High confidence score
- shouldTransfer = False
- Response includes relevant information
"""
response = client.post(
"/ai/chat",
json=valid_body,
headers=valid_headers,
)
assert response.status_code == 200
data = response.json()
assert "reply" in data
assert "confidence" in data
assert "shouldTransfer" in data
assert isinstance(data["confidence"], (int, float))
assert 0 <= data["confidence"] <= 1
def test_rag_retrieval_miss_scenario(self, client, valid_headers):
"""
[AC-AISVC-17, AC-AISVC-18] Test RAG scenario when retrieval has no hits.
Expected behavior:
- Lower confidence score
- may suggest transfer to human agent
- Graceful fallback response
"""
body = {
"sessionId": "rag_session_miss",
"currentMessage": "Xyzzy plugh unknown query",
"channelType": "wechat",
}
response = client.post(
"/ai/chat",
json=body,
headers=valid_headers,
)
assert response.status_code == 200
data = response.json()
assert "reply" in data
assert "confidence" in data
assert "shouldTransfer" in data
def test_rag_sse_with_retrieval(self, client, valid_headers, valid_body):
"""
[AC-AISVC-17] Test RAG with SSE streaming.
"""
headers = {**valid_headers, "Accept": "text/event-stream"}
response = client.post(
"/ai/chat",
json=valid_body,
headers=headers,
)
assert response.status_code == 200
content = response.text
assert "event:final" in content or "event: final" in content
lines = content.split("\n")
for line in lines:
if line.startswith("data:") and "confidence" in line:
data_str = line[5:].strip()
try:
data = json.loads(data_str)
assert "confidence" in data
assert 0 <= data["confidence"] <= 1
except json.JSONDecodeError:
pass
class TestRAGConfidenceScoring:
"""
[AC-AISVC-17, AC-AISVC-18] Tests for confidence scoring based on retrieval quality.
"""
@pytest.mark.asyncio
async def test_high_confidence_with_good_retrieval(self):
"""
[AC-AISVC-17] High retrieval score should result in high confidence.
Note: Without LLM client, fallback mode is used with lower confidence.
"""
orchestrator = OrchestratorService()
request = ChatRequest(
session_id="test",
current_message="What is the price?",
channel_type=ChannelType.WECHAT,
)
response = await orchestrator.generate("tenant", request)
assert response.confidence >= 0
assert response.confidence <= 1
@pytest.mark.asyncio
async def test_low_confidence_with_poor_retrieval(self):
"""
[AC-AISVC-17, AC-AISVC-18] Poor retrieval should result in lower confidence.
"""
orchestrator = OrchestratorService()
request = ChatRequest(
session_id="test",
current_message="Unknown topic xyzzy",
channel_type=ChannelType.WECHAT,
)
response = await orchestrator.generate("tenant", request)
assert response.confidence >= 0
assert response.confidence <= 1
@pytest.mark.asyncio
async def test_transfer_suggestion_on_very_low_confidence(self):
"""
[AC-AISVC-18] Very low confidence should suggest transfer to human.
"""
orchestrator = OrchestratorService()
request = ChatRequest(
session_id="test",
current_message="Complex query requiring human expertise",
channel_type=ChannelType.WECHAT,
)
response = await orchestrator.generate("tenant", request)
assert response.should_transfer is not None
class TestRAGRetrievalDiagnostics:
"""
[AC-AISVC-17] Tests for retrieval diagnostics.
"""
@pytest.mark.asyncio
async def test_retrieval_result_statistics(self):
"""
[AC-AISVC-17] Retrieval result should provide useful diagnostics.
"""
result = RetrievalResult(
hits=[
RetrievalHit(text="Doc 1", score=0.9, source="kb"),
RetrievalHit(text="Doc 2", score=0.7, source="kb"),
]
)
assert result.hit_count == 2
assert result.max_score == 0.9
assert result.is_empty is False
@pytest.mark.asyncio
async def test_empty_retrieval_result(self):
"""
[AC-AISVC-17] Empty retrieval result should be detectable.
"""
result = RetrievalResult(hits=[])
assert result.is_empty is True
assert result.hit_count == 0
assert result.max_score == 0.0
class TestRAGFallbackBehavior:
"""
[AC-AISVC-18] Tests for fallback behavior when retrieval fails.
"""
@pytest.fixture
def client(self):
return TestClient(app)
def test_graceful_fallback_on_retrieval_error(self, client):
"""
[AC-AISVC-18] Should gracefully handle retrieval errors.
"""
response = client.post(
"/ai/chat",
json={
"sessionId": "fallback_session",
"currentMessage": "Test fallback",
"channelType": "wechat",
},
headers={"X-Tenant-Id": "tenant_fallback"},
)
assert response.status_code == 200
data = response.json()
assert "reply" in data
def test_fallback_response_quality(self, client):
"""
[AC-AISVC-18] Fallback response should still be helpful.
"""
response = client.post(
"/ai/chat",
json={
"sessionId": "fallback_quality",
"currentMessage": "I need help with my order",
"channelType": "wechat",
},
headers={"X-Tenant-Id": "tenant_fallback_quality"},
)
assert response.status_code == 200
data = response.json()
assert len(data["reply"]) > 0
assert data["confidence"] >= 0
class TestRAGWithHistory:
"""
Tests for RAG with conversation history.
"""
@pytest.fixture
def client(self):
return TestClient(app)
def test_rag_with_conversation_history(self, client):
"""
[AC-AISVC-14] RAG should consider conversation history.
"""
response = client.post(
"/ai/chat",
json={
"sessionId": "history_session",
"currentMessage": "How much does it cost?",
"channelType": "wechat",
"history": [
{"role": "user", "content": "I'm interested in your product"},
{"role": "assistant", "content": "Great! Our product has many features."},
],
},
headers={"X-Tenant-Id": "tenant_history"},
)
assert response.status_code == 200
data = response.json()
assert "reply" in data
def test_rag_with_long_history(self, client):
"""
[AC-AISVC-14, AC-AISVC-15] RAG should handle long conversation history.
"""
long_history = [
{"role": "user" if i % 2 == 0 else "assistant", "content": f"Message {i}"}
for i in range(20)
]
response = client.post(
"/ai/chat",
json={
"sessionId": "long_history_session",
"currentMessage": "Summary please",
"channelType": "wechat",
"history": long_history,
},
headers={"X-Tenant-Id": "tenant_long_history"},
)
assert response.status_code == 200
data = response.json()
assert "reply" in data