ai-robot-core/ai-service/app/api/admin/dashboard.py

203 lines
7.9 KiB
Python

"""
Dashboard statistics endpoints.
Provides overview statistics for the admin dashboard.
"""
import logging
from typing import Annotated
from fastapi import APIRouter, Depends, Query
from fastapi.responses import JSONResponse
from sqlalchemy import select, func, desc
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_session
from app.core.exceptions import MissingTenantIdException
from app.core.tenant import get_tenant_id
from app.models import ErrorResponse
from app.models.entities import ChatMessage, ChatSession, Document, KnowledgeBase
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/admin/dashboard", tags=["Dashboard"])
LATENCY_THRESHOLD_MS = 5000
def get_current_tenant_id() -> str:
"""Dependency to get current tenant ID or raise exception."""
tenant_id = get_tenant_id()
if not tenant_id:
raise MissingTenantIdException()
return tenant_id
@router.get(
"/stats",
operation_id="getDashboardStats",
summary="Get dashboard statistics",
description="Get overview statistics for the admin dashboard.",
responses={
200: {"description": "Dashboard statistics"},
401: {"description": "Unauthorized", "model": ErrorResponse},
403: {"description": "Forbidden", "model": ErrorResponse},
},
)
async def get_dashboard_stats(
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
session: Annotated[AsyncSession, Depends(get_session)],
latency_threshold: int = Query(default=LATENCY_THRESHOLD_MS, description="Latency threshold in ms"),
) -> JSONResponse:
"""
Get dashboard statistics including knowledge bases, messages, and activity.
"""
logger.info(f"Getting dashboard stats: tenant={tenant_id}")
kb_count_stmt = select(func.count()).select_from(KnowledgeBase).where(
KnowledgeBase.tenant_id == tenant_id
)
kb_result = await session.execute(kb_count_stmt)
kb_count = kb_result.scalar() or 0
msg_count_stmt = select(func.count()).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id
)
msg_result = await session.execute(msg_count_stmt)
msg_count = msg_result.scalar() or 0
doc_count_stmt = select(func.count()).select_from(Document).where(
Document.tenant_id == tenant_id
)
doc_result = await session.execute(doc_count_stmt)
doc_count = doc_result.scalar() or 0
session_count_stmt = select(func.count()).select_from(ChatSession).where(
ChatSession.tenant_id == tenant_id
)
session_result = await session.execute(session_count_stmt)
session_count = session_result.scalar() or 0
total_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.total_tokens), 0)).select_from(
ChatMessage
).where(ChatMessage.tenant_id == tenant_id)
total_tokens_result = await session.execute(total_tokens_stmt)
total_tokens = total_tokens_result.scalar() or 0
prompt_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.prompt_tokens), 0)).select_from(
ChatMessage
).where(ChatMessage.tenant_id == tenant_id)
prompt_tokens_result = await session.execute(prompt_tokens_stmt)
prompt_tokens = prompt_tokens_result.scalar() or 0
completion_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.completion_tokens), 0)).select_from(
ChatMessage
).where(ChatMessage.tenant_id == tenant_id)
completion_tokens_result = await session.execute(completion_tokens_stmt)
completion_tokens = completion_tokens_result.scalar() or 0
ai_requests_stmt = select(func.count()).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant"
)
ai_requests_result = await session.execute(ai_requests_stmt)
ai_requests_count = ai_requests_result.scalar() or 0
avg_latency_stmt = select(func.coalesce(func.avg(ChatMessage.latency_ms), 0)).select_from(
ChatMessage
).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
avg_latency_result = await session.execute(avg_latency_stmt)
avg_latency_ms = float(avg_latency_result.scalar() or 0)
last_request_stmt = select(ChatMessage.latency_ms, ChatMessage.created_at).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant"
).order_by(desc(ChatMessage.created_at)).limit(1)
last_request_result = await session.execute(last_request_stmt)
last_request_row = last_request_result.fetchone()
last_latency_ms = last_request_row[0] if last_request_row else None
last_request_time = last_request_row[1].isoformat() if last_request_row and last_request_row[1] else None
slow_requests_stmt = select(func.count()).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None),
ChatMessage.latency_ms >= latency_threshold
)
slow_requests_result = await session.execute(slow_requests_stmt)
slow_requests_count = slow_requests_result.scalar() or 0
error_requests_stmt = select(func.count()).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.is_error == True
)
error_requests_result = await session.execute(error_requests_stmt)
error_requests_count = error_requests_result.scalar() or 0
p95_latency_stmt = select(func.coalesce(
func.percentile_cont(0.95).within_group(ChatMessage.latency_ms), 0
)).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
p95_latency_result = await session.execute(p95_latency_stmt)
p95_latency_ms = float(p95_latency_result.scalar() or 0)
p99_latency_stmt = select(func.coalesce(
func.percentile_cont(0.99).within_group(ChatMessage.latency_ms), 0
)).select_from(ChatMessage).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
p99_latency_result = await session.execute(p99_latency_stmt)
p99_latency_ms = float(p99_latency_result.scalar() or 0)
min_latency_stmt = select(func.coalesce(func.min(ChatMessage.latency_ms), 0)).select_from(
ChatMessage
).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
min_latency_result = await session.execute(min_latency_stmt)
min_latency_ms = float(min_latency_result.scalar() or 0)
max_latency_stmt = select(func.coalesce(func.max(ChatMessage.latency_ms), 0)).select_from(
ChatMessage
).where(
ChatMessage.tenant_id == tenant_id,
ChatMessage.role == "assistant",
ChatMessage.latency_ms.isnot(None)
)
max_latency_result = await session.execute(max_latency_stmt)
max_latency_ms = float(max_latency_result.scalar() or 0)
return JSONResponse(
content={
"knowledgeBases": kb_count,
"totalMessages": msg_count,
"totalDocuments": doc_count,
"totalSessions": session_count,
"totalTokens": total_tokens,
"promptTokens": prompt_tokens,
"completionTokens": completion_tokens,
"aiRequestsCount": ai_requests_count,
"avgLatencyMs": round(avg_latency_ms, 2),
"lastLatencyMs": last_latency_ms,
"lastRequestTime": last_request_time,
"slowRequestsCount": slow_requests_count,
"errorRequestsCount": error_requests_count,
"p95LatencyMs": round(p95_latency_ms, 2),
"p99LatencyMs": round(p99_latency_ms, 2),
"minLatencyMs": round(min_latency_ms, 2),
"maxLatencyMs": round(max_latency_ms, 2),
"latencyThresholdMs": latency_threshold,
}
)