[AC-AISVC-02, AC-AISVC-16] 多个需求合并 #1

Merged
MerCry merged 45 commits from feature/prompt-unification-and-logging into main 2026-02-25 17:17:35 +00:00
31 changed files with 3613 additions and 220 deletions
Showing only changes of commit fc53fdc6ac - Show all commits

View File

@ -10,6 +10,7 @@
<el-menu-item index="/kb">知识库管理</el-menu-item>
<el-menu-item index="/rag-lab">RAG 实验室</el-menu-item>
<el-menu-item index="/monitoring">会话监控</el-menu-item>
<el-menu-item index="/admin/embedding">嵌入模型配置</el-menu-item>
<div class="flex-grow" />
<div class="tenant-selector">
<el-select v-model="currentTenantId" placeholder="选择租户" @change="handleTenantChange">

View File

@ -1,8 +1,5 @@
import request from '@/utils/request'
/**
* Dashboard
*/
export function getDashboardStats() {
return request({
url: '/admin/dashboard/stats',

View File

@ -52,21 +52,21 @@ export interface SupportedFormatsResponse {
export function getProviders() {
return request({
url: '/admin/embedding/providers',
url: '/embedding/providers',
method: 'get'
})
}
export function getConfig() {
return request({
url: '/admin/embedding/config',
url: '/embedding/config',
method: 'get'
})
}
export function saveConfig(data: EmbeddingConfigUpdate) {
return request({
url: '/admin/embedding/config',
url: '/embedding/config',
method: 'put',
data
})
@ -74,7 +74,7 @@ export function saveConfig(data: EmbeddingConfigUpdate) {
export function testEmbedding(data: EmbeddingTestRequest): Promise<EmbeddingTestResult> {
return request({
url: '/admin/embedding/test',
url: '/embedding/test',
method: 'post',
data
})
@ -82,7 +82,7 @@ export function testEmbedding(data: EmbeddingTestRequest): Promise<EmbeddingTest
export function getSupportedFormats() {
return request({
url: '/admin/embedding/formats',
url: '/embedding/formats',
method: 'get'
})
}

View File

@ -1,8 +1,5 @@
import request from '@/utils/request'
/**
*
*/
export function listKnowledgeBases() {
return request({
url: '/admin/kb/knowledge-bases',
@ -10,9 +7,6 @@ export function listKnowledgeBases() {
})
}
/**
* [AC-ASA-08]
*/
export function listDocuments(params: any) {
return request({
url: '/admin/kb/documents',
@ -21,9 +15,6 @@ export function listDocuments(params: any) {
})
}
/**
* [AC-ASA-01]
*/
export function uploadDocument(data: FormData) {
return request({
url: '/admin/kb/documents',
@ -32,9 +23,6 @@ export function uploadDocument(data: FormData) {
})
}
/**
* [AC-ASA-02]
*/
export function getIndexJob(jobId: string) {
return request({
url: `/admin/kb/index/jobs/${jobId}`,
@ -42,9 +30,6 @@ export function getIndexJob(jobId: string) {
})
}
/**
* [AC-ASA-08]
*/
export function deleteDocument(docId: string) {
return request({
url: `/admin/kb/documents/${docId}`,

View File

@ -1,8 +1,5 @@
import request from '@/utils/request'
/**
* [AC-ASA-09]
*/
export function listSessions(params: any) {
return request({
url: '/admin/sessions',
@ -11,9 +8,6 @@ export function listSessions(params: any) {
})
}
/**
* [AC-ASA-07]
*/
export function getSessionDetail(sessionId: string) {
return request({
url: `/admin/sessions/${sessionId}`,

View File

@ -1,8 +1,5 @@
import request from '@/utils/request'
/**
* RAG [AC-ASA-05]
*/
export function runRagExperiment(data: { query: string, kbIds?: string[], params?: any }) {
return request({
url: '/admin/rag/experiments/run',

View File

@ -0,0 +1,73 @@
<template>
<el-select
:model-value="modelValue"
:loading="loading"
:placeholder="placeholder"
:disabled="disabled"
:clearable="clearable"
@update:model-value="handleChange"
>
<el-option
v-for="provider in providers"
:key="provider.name"
:label="provider.display_name"
:value="provider.name"
>
<div class="provider-option">
<span class="provider-name">{{ provider.display_name }}</span>
<span v-if="provider.description" class="provider-desc">{{ provider.description }}</span>
</div>
</el-option>
</el-select>
</template>
<script setup lang="ts">
import type { EmbeddingProviderInfo } from '@/types/embedding'
const props = withDefaults(
defineProps<{
modelValue?: string
providers: EmbeddingProviderInfo[]
loading?: boolean
disabled?: boolean
clearable?: boolean
placeholder?: string
}>(),
{
modelValue: '',
loading: false,
disabled: false,
clearable: false,
placeholder: '请选择嵌入模型提供者'
}
)
const emit = defineEmits<{
'update:modelValue': [value: string]
change: [provider: EmbeddingProviderInfo | undefined]
}>()
const handleChange = (value: string) => {
emit('update:modelValue', value)
const selectedProvider = props.providers.find((p) => p.name === value)
emit('change', selectedProvider)
}
</script>
<style scoped>
.provider-option {
display: flex;
flex-direction: column;
line-height: 1.4;
}
.provider-name {
font-weight: 500;
}
.provider-desc {
font-size: 12px;
color: var(--el-text-color-secondary);
margin-top: 2px;
}
</style>

View File

@ -1,90 +1,111 @@
<template>
<el-card shadow="never" class="test-panel">
<el-card shadow="hover" class="test-panel">
<template #header>
<div class="card-header">
<span>连接测试</span>
<div class="header-left">
<div class="icon-wrapper">
<el-icon><Connection /></el-icon>
</div>
<span class="header-title">连接测试</span>
</div>
<el-tag v-if="testResult" :type="testResult.success ? 'success' : 'danger'" size="small" effect="dark">
{{ testResult.success ? '连接成功' : '连接失败' }}
</el-tag>
</div>
</template>
<div class="test-content">
<el-form :model="testForm" label-width="80px">
<el-form-item label="测试文本">
<el-input
v-model="testForm.test_text"
type="textarea"
:rows="3"
placeholder="请输入测试文本(可选,默认使用系统预设文本)"
clearable
/>
</el-form-item>
<el-form-item>
<el-button
type="primary"
:loading="loading"
:disabled="!config?.provider"
@click="handleTest"
>
<el-icon v-if="!loading"><Connection /></el-icon>
{{ loading ? '测试中...' : '测试连接' }}
</el-button>
</el-form-item>
</el-form>
<div v-if="testResult" class="test-result">
<el-divider />
<el-alert
v-if="testResult.success"
:title="testResult.message || '连接成功'"
type="success"
:closable="false"
show-icon
class="result-alert"
<div class="test-form-section">
<div class="section-label">
<el-icon><Edit /></el-icon>
<span>测试文本</span>
</div>
<el-input
v-model="testForm.test_text"
type="textarea"
:rows="3"
placeholder="请输入测试文本(可选,默认使用系统预设文本)"
clearable
class="test-textarea"
/>
<el-button
type="primary"
size="large"
:loading="loading"
:disabled="!config?.provider"
class="test-button"
@click="handleTest"
>
<template #default>
<div class="success-details">
<div class="detail-item">
<span class="label">向量维度</span>
<el-tag type="success">{{ testResult.dimension }}</el-tag>
</div>
<div v-if="testResult.latency_ms" class="detail-item">
<span class="label">响应延迟</span>
<el-tag type="info">{{ testResult.latency_ms.toFixed(2) }} ms</el-tag>
</div>
</div>
</template>
</el-alert>
<el-alert
v-else
:title="testResult.error || '连接失败'"
type="error"
:closable="false"
show-icon
class="result-alert"
>
<template #default>
<div class="error-details">
<p class="error-message">{{ testResult.error || '未知错误' }}</p>
<div class="troubleshooting">
<p class="troubleshoot-title">排查建议</p>
<ul class="troubleshoot-list">
<li v-for="(tip, index) in troubleshootingTips" :key="index">
{{ tip }}
</li>
</ul>
</div>
</div>
</template>
</el-alert>
<el-icon v-if="!loading"><Connection /></el-icon>
{{ loading ? '测试中...' : '测试连接' }}
</el-button>
</div>
<transition name="result-fade">
<div v-if="testResult" class="test-result">
<el-divider />
<div v-if="testResult.success" class="success-result">
<div class="result-header">
<div class="success-icon">
<el-icon><CircleCheck /></el-icon>
</div>
<span class="result-title">{{ testResult.message || '连接成功' }}</span>
</div>
<div class="success-details">
<div class="detail-card">
<div class="detail-icon">
<el-icon><Grid /></el-icon>
</div>
<div class="detail-info">
<span class="detail-label">向量维度</span>
<span class="detail-value">{{ testResult.dimension }}</span>
</div>
</div>
<div v-if="testResult.latency_ms" class="detail-card">
<div class="detail-icon">
<el-icon><Timer /></el-icon>
</div>
<div class="detail-info">
<span class="detail-label">响应延迟</span>
<span class="detail-value">{{ testResult.latency_ms.toFixed(2) }} ms</span>
</div>
</div>
</div>
</div>
<div v-else class="error-result">
<div class="result-header">
<div class="error-icon">
<el-icon><CircleClose /></el-icon>
</div>
<span class="result-title error">连接失败</span>
</div>
<div class="error-message-box">
<p class="error-text">{{ testResult.error || '未知错误' }}</p>
</div>
<div class="troubleshooting">
<div class="troubleshoot-header">
<el-icon><Warning /></el-icon>
<span>排查建议</span>
</div>
<ul class="troubleshoot-list">
<li v-for="(tip, index) in troubleshootingTips" :key="index">
<el-icon class="list-icon"><Right /></el-icon>
{{ tip }}
</li>
</ul>
</div>
</div>
</div>
</transition>
</div>
</el-card>
</template>
<script setup lang="ts">
import { ref, computed } from 'vue'
import { Connection } from '@element-plus/icons-vue'
import { Connection, Edit, CircleCheck, CircleClose, Timer, Grid, Warning, Right } from '@element-plus/icons-vue'
import { testEmbedding, type EmbeddingConfigUpdate, type EmbeddingTestResult } from '@/api/embedding'
const props = defineProps<{
@ -159,72 +180,281 @@ const handleTest = async () => {
<style scoped>
.test-panel {
margin-top: 20px;
border-radius: 16px;
border: none;
background: rgba(255, 255, 255, 0.98);
backdrop-filter: blur(10px);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
transition: all 0.3s ease;
}
.test-panel:hover {
box-shadow: 0 12px 48px rgba(0, 0, 0, 0.15);
transform: translateY(-4px);
}
.card-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0;
}
.header-left {
display: flex;
align-items: center;
gap: 12px;
}
.icon-wrapper {
width: 40px;
height: 40px;
display: flex;
align-items: center;
justify-content: center;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-radius: 10px;
color: #ffffff;
font-size: 20px;
}
.header-title {
font-size: 16px;
font-weight: 600;
color: #303133;
}
.test-content {
padding: 0 10px;
padding: 8px 0;
}
.test-form-section {
display: flex;
flex-direction: column;
gap: 16px;
}
.section-label {
display: flex;
align-items: center;
gap: 8px;
font-size: 14px;
font-weight: 600;
color: #606266;
}
.section-label .el-icon {
color: #667eea;
}
.test-textarea {
border-radius: 10px;
}
.test-textarea :deep(.el-textarea__inner) {
border-radius: 10px;
border: 1px solid #dcdfe6;
transition: all 0.3s ease;
}
.test-textarea :deep(.el-textarea__inner:focus) {
border-color: #667eea;
box-shadow: 0 0 0 2px rgba(102, 126, 234, 0.2);
}
.test-button {
align-self: flex-start;
border-radius: 10px;
padding: 12px 24px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border: none;
transition: all 0.3s ease;
}
.test-button:hover:not(:disabled) {
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
}
.test-button:disabled {
opacity: 0.6;
}
.test-result {
margin-top: 10px;
animation: fadeIn 0.4s ease-out;
}
.result-alert {
margin-top: 10px;
@keyframes fadeIn {
from {
opacity: 0;
transform: translateY(10px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.result-header {
display: flex;
align-items: center;
gap: 12px;
margin-bottom: 16px;
}
.success-icon,
.error-icon {
width: 36px;
height: 36px;
display: flex;
align-items: center;
justify-content: center;
border-radius: 50%;
font-size: 20px;
}
.success-icon {
background: linear-gradient(135deg, #67c23a 0%, #85ce61 100%);
color: #ffffff;
}
.error-icon {
background: linear-gradient(135deg, #f56c6c 0%, #f89898 100%);
color: #ffffff;
}
.result-title {
font-size: 16px;
font-weight: 600;
color: #67c23a;
}
.result-title.error {
color: #f56c6c;
}
.success-details {
margin-top: 12px;
display: flex;
gap: 16px;
flex-wrap: wrap;
}
.detail-item {
display: inline-flex;
.detail-card {
display: flex;
align-items: center;
margin-right: 20px;
margin-bottom: 8px;
gap: 12px;
padding: 14px 18px;
background: linear-gradient(135deg, #f0f9eb 0%, #e1f3d8 100%);
border-radius: 12px;
border: 1px solid #e1f3d8;
}
.detail-item .label {
color: #606266;
margin-right: 8px;
.detail-icon {
width: 40px;
height: 40px;
display: flex;
align-items: center;
justify-content: center;
background: linear-gradient(135deg, #67c23a 0%, #85ce61 100%);
border-radius: 10px;
color: #ffffff;
font-size: 18px;
}
.error-details {
margin-top: 8px;
.detail-info {
display: flex;
flex-direction: column;
}
.error-message {
.detail-label {
font-size: 12px;
color: #909399;
}
.detail-value {
font-size: 18px;
font-weight: 700;
color: #303133;
}
.error-result {
animation: shake 0.5s ease-out;
}
@keyframes shake {
0%, 100% { transform: translateX(0); }
25% { transform: translateX(-5px); }
75% { transform: translateX(5px); }
}
.error-message-box {
padding: 14px 16px;
background: linear-gradient(135deg, #fef0f0 0%, #fde2e2 100%);
border-radius: 10px;
border-left: 3px solid #f56c6c;
margin-bottom: 16px;
}
.error-text {
margin: 0;
color: #f56c6c;
margin-bottom: 12px;
font-size: 14px;
line-height: 1.6;
}
.troubleshooting {
background-color: #fef0f0;
padding: 12px;
border-radius: 4px;
margin-top: 8px;
padding: 16px;
background: linear-gradient(135deg, #fdf6ec 0%, #faecd8 100%);
border-radius: 12px;
border: 1px solid #faecd8;
}
.troubleshoot-title {
.troubleshoot-header {
display: flex;
align-items: center;
gap: 8px;
margin-bottom: 12px;
font-weight: 600;
color: #f56c6c;
margin-bottom: 8px;
color: #e6a23c;
}
.troubleshoot-list {
margin: 0;
padding-left: 20px;
color: #909399;
padding: 0;
list-style: none;
}
.troubleshoot-list li {
margin-bottom: 4px;
display: flex;
align-items: flex-start;
gap: 8px;
margin-bottom: 8px;
color: #606266;
font-size: 13px;
line-height: 1.6;
}
.list-icon {
margin-top: 4px;
color: #e6a23c;
font-size: 12px;
}
.result-fade-enter-active {
transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
}
.result-fade-leave-active {
transition: all 0.3s cubic-bezier(1, 0.5, 0.8, 1);
}
.result-fade-enter-from {
opacity: 0;
transform: translateY(20px);
}
.result-fade-leave-to {
opacity: 0;
transform: translateY(-10px);
}
</style>

View File

@ -0,0 +1,161 @@
<template>
<div class="supported-formats">
<div v-loading="loading" class="formats-content">
<transition-group name="tag-fade" tag="div" class="formats-grid">
<el-tooltip
v-for="format in formats"
:key="format.extension"
:content="format.description"
placement="top"
:disabled="!format.description"
effect="light"
>
<div class="format-item">
<div class="format-icon">
<span class="extension">{{ format.extension }}</span>
</div>
<div class="format-info">
<span class="format-name">{{ format.name }}</span>
</div>
</div>
</el-tooltip>
</transition-group>
<el-empty v-if="!loading && formats.length === 0" description="暂无支持的格式" :image-size="80">
<template #image>
<div class="empty-icon">
<el-icon><Document /></el-icon>
</div>
</template>
</el-empty>
</div>
</div>
</template>
<script setup lang="ts">
import { computed, onMounted } from 'vue'
import { Document } from '@element-plus/icons-vue'
import { useEmbeddingStore } from '@/stores/embedding'
const embeddingStore = useEmbeddingStore()
const formats = computed(() => embeddingStore.formats)
const loading = computed(() => embeddingStore.formatsLoading)
onMounted(() => {
if (formats.value.length === 0) {
embeddingStore.loadFormats()
}
})
</script>
<style scoped>
.supported-formats {
padding: 8px 0;
}
.formats-content {
min-height: 60px;
}
.formats-grid {
display: flex;
flex-wrap: wrap;
gap: 12px;
}
.format-item {
display: flex;
align-items: center;
gap: 10px;
padding: 10px 14px;
background: linear-gradient(135deg, #f8f9fc 0%, #eef1f5 100%);
border-radius: 10px;
border: 1px solid #e4e7ed;
cursor: default;
transition: all 0.3s ease;
}
.format-item:hover {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-color: transparent;
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3);
}
.format-item:hover .extension,
.format-item:hover .format-name {
color: #ffffff;
}
.format-icon {
width: 36px;
height: 36px;
display: flex;
align-items: center;
justify-content: center;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-radius: 8px;
}
.format-item:hover .format-icon {
background: rgba(255, 255, 255, 0.2);
}
.extension {
font-size: 11px;
font-weight: 700;
color: #ffffff;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.format-info {
display: flex;
flex-direction: column;
}
.format-name {
font-size: 13px;
font-weight: 600;
color: #303133;
transition: color 0.3s ease;
}
.empty-icon {
width: 80px;
height: 80px;
display: flex;
align-items: center;
justify-content: center;
background: linear-gradient(135deg, #f5f7fa 0%, #e8ecf1 100%);
border-radius: 50%;
margin: 0 auto;
}
.empty-icon .el-icon {
font-size: 40px;
color: #c0c4cc;
}
.tag-fade-enter-active {
transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
}
.tag-fade-leave-active {
transition: all 0.3s cubic-bezier(1, 0.5, 0.8, 1);
}
.tag-fade-enter-from {
opacity: 0;
transform: scale(0.8);
}
.tag-fade-leave-to {
opacity: 0;
transform: scale(0.8);
}
.tag-fade-move {
transition: transform 0.3s ease;
}
</style>

View File

@ -28,6 +28,12 @@ const routes: Array<RouteRecordRaw> = [
name: 'Monitoring',
component: () => import('@/views/monitoring/index.vue'),
meta: { title: '会话监控' }
},
{
path: '/admin/embedding',
name: 'EmbeddingConfig',
component: () => import('@/views/admin/embedding/index.vue'),
meta: { title: '嵌入模型配置' }
}
]

View File

@ -0,0 +1,164 @@
import { defineStore } from 'pinia'
import { ref, computed } from 'vue'
import {
getProviders,
getConfig,
saveConfig,
testEmbedding,
getSupportedFormats,
type EmbeddingProviderInfo,
type EmbeddingConfig,
type EmbeddingConfigUpdate,
type EmbeddingTestResult,
type DocumentFormat
} from '@/api/embedding'
export const useEmbeddingStore = defineStore('embedding', () => {
const providers = ref<EmbeddingProviderInfo[]>([])
const currentConfig = ref<EmbeddingConfig>({
provider: '',
config: {}
})
const formats = ref<DocumentFormat[]>([])
const loading = ref(false)
const providersLoading = ref(false)
const formatsLoading = ref(false)
const testResult = ref<EmbeddingTestResult | null>(null)
const testLoading = ref(false)
const currentProvider = computed(() => {
return providers.value.find(p => p.name === currentConfig.value.provider)
})
const configSchema = computed(() => {
return currentProvider.value?.config_schema || { properties: {} }
})
const loadProviders = async () => {
providersLoading.value = true
try {
const res: any = await getProviders()
providers.value = res?.providers || res?.data?.providers || []
} catch (error) {
console.error('Failed to load providers:', error)
throw error
} finally {
providersLoading.value = false
}
}
const loadConfig = async () => {
loading.value = true
try {
const res: any = await getConfig()
const config = res?.data || res
if (config) {
currentConfig.value = {
provider: config.provider || '',
config: config.config || {},
updated_at: config.updated_at
}
}
} catch (error) {
console.error('Failed to load config:', error)
throw error
} finally {
loading.value = false
}
}
const saveCurrentConfig = async () => {
loading.value = true
try {
const updateData: EmbeddingConfigUpdate = {
provider: currentConfig.value.provider,
config: currentConfig.value.config
}
await saveConfig(updateData)
} catch (error) {
console.error('Failed to save config:', error)
throw error
} finally {
loading.value = false
}
}
const runTest = async (testText?: string) => {
testLoading.value = true
testResult.value = null
try {
const result = await testEmbedding({
test_text: testText,
config: {
provider: currentConfig.value.provider,
config: currentConfig.value.config
}
})
testResult.value = result
} catch (error: any) {
testResult.value = {
success: false,
dimension: 0,
error: error?.message || '连接测试失败'
}
} finally {
testLoading.value = false
}
}
const loadFormats = async () => {
formatsLoading.value = true
try {
const res: any = await getSupportedFormats()
formats.value = res?.formats || res?.data?.formats || []
} catch (error) {
console.error('Failed to load formats:', error)
throw error
} finally {
formatsLoading.value = false
}
}
const setProvider = (providerName: string) => {
currentConfig.value.provider = providerName
const provider = providers.value.find(p => p.name === providerName)
if (provider?.config_schema?.properties) {
const newConfig: Record<string, any> = {}
Object.entries(provider.config_schema.properties).forEach(([key, field]: [string, any]) => {
newConfig[key] = field.default !== undefined ? field.default : ''
})
currentConfig.value.config = newConfig
} else {
currentConfig.value.config = {}
}
}
const updateConfigValue = (key: string, value: any) => {
currentConfig.value.config[key] = value
}
const clearTestResult = () => {
testResult.value = null
}
return {
providers,
currentConfig,
formats,
loading,
providersLoading,
formatsLoading,
testResult,
testLoading,
currentProvider,
configSchema,
loadProviders,
loadConfig,
saveCurrentConfig,
runTest,
loadFormats,
setProvider,
updateConfigValue,
clearTestResult
}
})

View File

@ -0,0 +1,49 @@
export interface EmbeddingProviderInfo {
name: string
display_name: string
description?: string
config_schema: Record<string, any>
}
export interface EmbeddingConfig {
provider: string
config: Record<string, any>
updated_at?: string
}
export interface EmbeddingConfigUpdate {
provider: string
config?: Record<string, any>
}
export interface EmbeddingTestResult {
success: boolean
dimension: number
latency_ms?: number
message?: string
error?: string
}
export interface DocumentFormat {
extension: string
name: string
description?: string
}
export interface EmbeddingProvidersResponse {
providers: EmbeddingProviderInfo[]
}
export interface EmbeddingConfigUpdateResponse {
success: boolean
message: string
}
export interface SupportedFormatsResponse {
formats: DocumentFormat[]
}
export interface EmbeddingTestRequest {
test_text?: string
config?: EmbeddingConfigUpdate
}

View File

@ -0,0 +1,504 @@
<template>
<div class="embedding-config-page">
<div class="page-header">
<div class="header-content">
<div class="title-section">
<h1 class="page-title">嵌入模型配置</h1>
<p class="page-desc">配置和管理系统使用的嵌入模型支持多种提供者切换配置修改后需保存才能生效</p>
</div>
<div class="header-actions">
<el-tag v-if="currentConfig.updated_at" type="info" size="large" effect="plain">
<el-icon class="tag-icon"><Clock /></el-icon>
上次更新: {{ formatDate(currentConfig.updated_at) }}
</el-tag>
</div>
</div>
</div>
<el-row :gutter="24" v-loading="pageLoading" element-loading-text="加载中...">
<el-col :xs="24" :sm="24" :md="12" :lg="12">
<div class="config-card-wrapper">
<el-card shadow="hover" class="config-card">
<template #header>
<div class="card-header">
<div class="header-left">
<div class="icon-wrapper">
<el-icon><Setting /></el-icon>
</div>
<span class="header-title">模型配置</span>
</div>
</div>
</template>
<div class="card-content">
<div class="provider-select-section">
<div class="section-label">
<el-icon><Connection /></el-icon>
<span>选择提供者</span>
</div>
<EmbeddingProviderSelect
v-model="currentConfig.provider"
:providers="providers"
:loading="providersLoading"
placeholder="请选择嵌入模型提供者"
@change="handleProviderChange"
/>
<transition name="fade">
<div v-if="currentProvider" class="provider-info">
<el-icon class="info-icon"><InfoFilled /></el-icon>
<span class="info-text">{{ currentProvider.description }}</span>
</div>
</transition>
</div>
<el-divider />
<transition name="slide-fade" mode="out-in">
<div v-if="currentConfig.provider" key="form" class="config-form-section">
<EmbeddingConfigForm
ref="configFormRef"
:schema="configSchema"
v-model="currentConfig.config"
label-width="140px"
/>
</div>
<el-empty v-else key="empty" description="请先选择一个嵌入模型提供者" :image-size="120">
<template #image>
<div class="empty-icon">
<el-icon><Box /></el-icon>
</div>
</template>
</el-empty>
</transition>
</div>
<template #footer>
<div class="card-footer">
<el-button size="large" @click="handleReset">
<el-icon><RefreshLeft /></el-icon>
重置
</el-button>
<el-button type="primary" size="large" :loading="saving" @click="handleSave">
<el-icon><Check /></el-icon>
保存配置
</el-button>
</div>
</template>
</el-card>
</div>
</el-col>
<el-col :xs="24" :sm="24" :md="12" :lg="12">
<div class="right-column">
<div class="test-panel-wrapper">
<EmbeddingTestPanel
:config="{ provider: currentConfig.provider, config: currentConfig.config }"
/>
</div>
<el-card shadow="hover" class="formats-card">
<template #header>
<div class="card-header">
<div class="header-left">
<div class="icon-wrapper">
<el-icon><Document /></el-icon>
</div>
<span class="header-title">支持的文档格式</span>
</div>
</div>
</template>
<SupportedFormats />
</el-card>
</div>
</el-col>
</el-row>
</div>
</template>
<script setup lang="ts">
import { ref, computed, onMounted } from 'vue'
import { ElMessage, ElMessageBox } from 'element-plus'
import { Setting, Connection, InfoFilled, Box, RefreshLeft, Check, Clock, Document } from '@element-plus/icons-vue'
import { useEmbeddingStore } from '@/stores/embedding'
import EmbeddingProviderSelect from '@/components/embedding/EmbeddingProviderSelect.vue'
import EmbeddingConfigForm from '@/components/embedding/EmbeddingConfigForm.vue'
import EmbeddingTestPanel from '@/components/embedding/EmbeddingTestPanel.vue'
import SupportedFormats from '@/components/embedding/SupportedFormats.vue'
const embeddingStore = useEmbeddingStore()
const configFormRef = ref<InstanceType<typeof EmbeddingConfigForm>>()
const saving = ref(false)
const pageLoading = ref(false)
const providers = computed(() => embeddingStore.providers)
const currentConfig = computed(() => embeddingStore.currentConfig)
const currentProvider = computed(() => embeddingStore.currentProvider)
const configSchema = computed(() => embeddingStore.configSchema)
const providersLoading = computed(() => embeddingStore.providersLoading)
const formatDate = (dateStr: string) => {
if (!dateStr) return ''
const date = new Date(dateStr)
return date.toLocaleString('zh-CN', {
year: 'numeric',
month: '2-digit',
day: '2-digit',
hour: '2-digit',
minute: '2-digit'
})
}
const handleProviderChange = (provider: any) => {
if (provider) {
embeddingStore.setProvider(provider.name)
}
}
const handleSave = async () => {
if (!currentConfig.value.provider) {
ElMessage.warning('请先选择嵌入模型提供者')
return
}
try {
const valid = await configFormRef.value?.validate()
if (!valid) {
return
}
} catch (error) {
ElMessage.warning('请检查配置表单中的必填项')
return
}
saving.value = true
try {
await embeddingStore.saveCurrentConfig()
ElMessage.success('配置保存成功')
} catch (error) {
ElMessage.error('配置保存失败')
} finally {
saving.value = false
}
}
const handleReset = async () => {
try {
await ElMessageBox.confirm('确定要重置配置吗?将恢复为当前保存的配置。', '确认重置', {
confirmButtonText: '确定',
cancelButtonText: '取消',
type: 'warning'
})
await embeddingStore.loadConfig()
ElMessage.success('配置已重置')
} catch (error) {
//
}
}
const initPage = async () => {
pageLoading.value = true
try {
await Promise.all([
embeddingStore.loadProviders(),
embeddingStore.loadConfig(),
embeddingStore.loadFormats()
])
} catch (error) {
ElMessage.error('初始化页面失败')
} finally {
pageLoading.value = false
}
}
onMounted(() => {
initPage()
})
</script>
<style scoped>
.embedding-config-page {
padding: 24px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: calc(100vh - 60px);
}
.page-header {
margin-bottom: 32px;
animation: slideDown 0.6s ease-out;
}
@keyframes slideDown {
from {
opacity: 0;
transform: translateY(-20px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.header-content {
display: flex;
justify-content: space-between;
align-items: flex-start;
gap: 20px;
flex-wrap: wrap;
}
.title-section {
flex: 1;
min-width: 300px;
}
.page-title {
margin: 0 0 12px 0;
font-size: 28px;
font-weight: 700;
color: #ffffff;
letter-spacing: -0.5px;
text-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
.page-desc {
margin: 0;
font-size: 14px;
color: rgba(255, 255, 255, 0.85);
line-height: 1.6;
}
.header-actions {
display: flex;
align-items: center;
}
.tag-icon {
margin-right: 4px;
}
.config-card-wrapper,
.test-panel-wrapper,
.formats-card {
animation: fadeInUp 0.6s ease-out;
}
@keyframes fadeInUp {
from {
opacity: 0;
transform: translateY(30px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.config-card {
border-radius: 16px;
border: none;
background: rgba(255, 255, 255, 0.98);
backdrop-filter: blur(10px);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
transition: all 0.3s ease;
}
.config-card:hover {
box-shadow: 0 12px 48px rgba(0, 0, 0, 0.15);
transform: translateY(-4px);
}
.card-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0;
}
.header-left {
display: flex;
align-items: center;
gap: 12px;
}
.icon-wrapper {
width: 40px;
height: 40px;
display: flex;
align-items: center;
justify-content: center;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-radius: 10px;
color: #ffffff;
font-size: 20px;
}
.header-title {
font-size: 16px;
font-weight: 600;
color: #303133;
}
.card-content {
padding: 8px 0;
}
.provider-select-section {
margin-bottom: 16px;
}
.section-label {
display: flex;
align-items: center;
gap: 8px;
margin-bottom: 12px;
font-size: 14px;
font-weight: 600;
color: #606266;
}
.section-label .el-icon {
color: #667eea;
}
.provider-info {
display: flex;
align-items: flex-start;
gap: 8px;
margin-top: 12px;
padding: 14px 16px;
background: linear-gradient(135deg, #f5f7fa 0%, #e8ecf1 100%);
border-radius: 10px;
font-size: 13px;
color: #606266;
line-height: 1.6;
border-left: 3px solid #667eea;
}
.info-icon {
margin-top: 2px;
color: #667eea;
font-size: 16px;
}
.info-text {
flex: 1;
}
.config-form-section {
max-height: 400px;
overflow-y: auto;
padding-right: 8px;
}
.config-form-section::-webkit-scrollbar {
width: 6px;
}
.config-form-section::-webkit-scrollbar-track {
background: #f1f1f1;
border-radius: 3px;
}
.config-form-section::-webkit-scrollbar-thumb {
background: #c0c4cc;
border-radius: 3px;
}
.config-form-section::-webkit-scrollbar-thumb:hover {
background: #a0a4ac;
}
.card-footer {
display: flex;
justify-content: flex-end;
gap: 12px;
padding-top: 8px;
}
.right-column {
display: flex;
flex-direction: column;
gap: 24px;
}
.formats-card {
border-radius: 16px;
border: none;
background: rgba(255, 255, 255, 0.98);
backdrop-filter: blur(10px);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
transition: all 0.3s ease;
}
.formats-card:hover {
box-shadow: 0 12px 48px rgba(0, 0, 0, 0.15);
transform: translateY(-4px);
}
.empty-icon {
width: 120px;
height: 120px;
display: flex;
align-items: center;
justify-content: center;
background: linear-gradient(135deg, #f5f7fa 0%, #e8ecf1 100%);
border-radius: 50%;
margin: 0 auto;
}
.empty-icon .el-icon {
font-size: 60px;
color: #c0c4cc;
}
.fade-enter-active,
.fade-leave-active {
transition: opacity 0.3s ease;
}
.fade-enter-from,
.fade-leave-to {
opacity: 0;
}
.slide-fade-enter-active {
transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
}
.slide-fade-leave-active {
transition: all 0.3s cubic-bezier(1, 0.5, 0.8, 1);
}
.slide-fade-enter-from {
opacity: 0;
transform: translateX(-20px);
}
.slide-fade-leave-to {
opacity: 0;
transform: translateX(20px);
}
@media (max-width: 768px) {
.embedding-config-page {
padding: 16px;
}
.page-title {
font-size: 24px;
}
.header-content {
flex-direction: column;
}
.title-section {
min-width: 100%;
}
.config-form-section {
max-height: 300px;
}
}
</style>

View File

@ -6,7 +6,8 @@ Admin API routes for AI Service management.
from app.api.admin.dashboard import router as dashboard_router
from app.api.admin.embedding import router as embedding_router
from app.api.admin.kb import router as kb_router
from app.api.admin.llm import router as llm_router
from app.api.admin.rag import router as rag_router
from app.api.admin.sessions import router as sessions_router
__all__ = ["dashboard_router", "embedding_router", "kb_router", "rag_router", "sessions_router"]
__all__ = ["dashboard_router", "embedding_router", "kb_router", "llm_router", "rag_router", "sessions_router"]

View File

@ -0,0 +1,146 @@
"""
LLM Configuration Management API.
[AC-ASA-14, AC-ASA-15, AC-ASA-16, AC-ASA-17, AC-ASA-18] LLM provider management endpoints.
"""
import logging
from typing import Any
from fastapi import APIRouter, Request
from app.core.tenant import get_tenant_id
from app.services.llm.factory import (
LLMConfigManager,
LLMProviderFactory,
get_llm_config_manager,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/admin/llm", tags=["LLM Management"])
@router.get("/providers")
async def list_providers(request: Request) -> dict[str, Any]:
"""
List all available LLM providers.
[AC-ASA-15] Returns provider list with configuration schemas.
"""
tenant_id = get_tenant_id(request)
logger.info(f"[AC-ASA-15] Listing LLM providers for tenant={tenant_id}")
providers = LLMProviderFactory.get_providers()
return {
"providers": [
{
"name": p.name,
"display_name": p.display_name,
"description": p.description,
"config_schema": p.config_schema,
}
for p in providers
],
}
@router.get("/config")
async def get_config(request: Request) -> dict[str, Any]:
"""
Get current LLM configuration.
[AC-ASA-14] Returns current provider and config.
"""
tenant_id = get_tenant_id(request)
logger.info(f"[AC-ASA-14] Getting LLM config for tenant={tenant_id}")
manager = get_llm_config_manager()
config = manager.get_current_config()
masked_config = _mask_secrets(config.get("config", {}))
return {
"provider": config["provider"],
"config": masked_config,
}
@router.put("/config")
async def update_config(
request: Request,
body: dict[str, Any],
) -> dict[str, Any]:
"""
Update LLM configuration.
[AC-ASA-16] Updates provider and config with validation.
"""
tenant_id = get_tenant_id(request)
provider = body.get("provider")
config = body.get("config", {})
logger.info(f"[AC-ASA-16] Updating LLM config for tenant={tenant_id}, provider={provider}")
if not provider:
return {
"success": False,
"message": "Provider is required",
}
try:
manager = get_llm_config_manager()
await manager.update_config(provider, config)
return {
"success": True,
"message": f"LLM configuration updated to {provider}",
}
except ValueError as e:
logger.error(f"[AC-ASA-16] Invalid LLM config: {e}")
return {
"success": False,
"message": str(e),
}
@router.post("/test")
async def test_connection(
request: Request,
body: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""
Test LLM connection.
[AC-ASA-17, AC-ASA-18] Tests connection and returns response.
"""
tenant_id = get_tenant_id(request)
body = body or {}
test_prompt = body.get("test_prompt", "你好,请简单介绍一下自己。")
provider = body.get("provider")
config = body.get("config")
logger.info(
f"[AC-ASA-17] Testing LLM connection for tenant={tenant_id}, "
f"provider={provider or 'current'}"
)
manager = get_llm_config_manager()
result = await manager.test_connection(
test_prompt=test_prompt,
provider=provider,
config=config,
)
return result
def _mask_secrets(config: dict[str, Any]) -> dict[str, Any]:
"""Mask secret fields in config for display."""
masked = {}
for key, value in config.items():
if key in ("api_key", "password", "secret"):
if value:
masked[key] = f"{str(value)[:4]}***"
else:
masked[key] = ""
else:
masked[key] = value
return masked

View File

@ -1,24 +1,24 @@
"""
RAG Lab endpoints for debugging and experimentation.
[AC-ASA-05] RAG experiment debugging with retrieval results and prompt visualization.
[AC-ASA-05, AC-ASA-19, AC-ASA-20, AC-ASA-21, AC-ASA-22] RAG experiment with AI output.
"""
import json
import logging
import time
from typing import Annotated, Any, List
from fastapi import APIRouter, Depends, Body
from fastapi.responses import JSONResponse
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import get_settings
from app.core.database import get_session
from app.core.exceptions import MissingTenantIdException
from app.core.tenant import get_tenant_id
from app.core.qdrant_client import get_qdrant_client
from app.models import ErrorResponse
from app.services.retrieval.vector_retriever import get_vector_retriever
from app.services.retrieval.base import RetrievalContext
from app.services.llm.factory import get_llm_config_manager
logger = logging.getLogger(__name__)
@ -36,16 +36,37 @@ def get_current_tenant_id() -> str:
class RAGExperimentRequest(BaseModel):
query: str = Field(..., description="Query text for retrieval")
kb_ids: List[str] | None = Field(default=None, description="Knowledge base IDs to search")
params: dict[str, Any] | None = Field(default=None, description="Retrieval parameters")
top_k: int = Field(default=5, description="Number of results to retrieve")
score_threshold: float = Field(default=0.5, description="Minimum similarity score")
generate_response: bool = Field(default=True, description="Whether to generate AI response")
llm_provider: str | None = Field(default=None, description="Specific LLM provider to use")
class AIResponse(BaseModel):
content: str
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
latency_ms: float = 0
model: str = ""
class RAGExperimentResult(BaseModel):
query: str
retrieval_results: List[dict] = []
final_prompt: str = ""
ai_response: AIResponse | None = None
total_latency_ms: float = 0
diagnostics: dict[str, Any] = {}
@router.post(
"/experiments/run",
operation_id="runRagExperiment",
summary="Run RAG debugging experiment",
description="[AC-ASA-05] Trigger RAG experiment with retrieval and prompt generation.",
summary="Run RAG debugging experiment with AI output",
description="[AC-ASA-05, AC-ASA-19, AC-ASA-21, AC-ASA-22] Trigger RAG experiment with retrieval, prompt generation, and AI response.",
responses={
200: {"description": "Experiment results with retrieval and prompt"},
200: {"description": "Experiment results with retrieval, prompt, and AI response"},
401: {"description": "Unauthorized", "model": ErrorResponse},
403: {"description": "Forbidden", "model": ErrorResponse},
},
@ -55,18 +76,19 @@ async def run_rag_experiment(
request: RAGExperimentRequest = Body(...),
) -> JSONResponse:
"""
[AC-ASA-05] Run RAG experiment and return retrieval results with final prompt.
[AC-ASA-05, AC-ASA-19, AC-ASA-21, AC-ASA-22] Run RAG experiment and return retrieval results with AI response.
"""
start_time = time.time()
logger.info(
f"[AC-ASA-05] Running RAG experiment: tenant={tenant_id}, "
f"query={request.query[:50]}..., kb_ids={request.kb_ids}"
f"query={request.query[:50]}..., kb_ids={request.kb_ids}, "
f"generate_response={request.generate_response}"
)
settings = get_settings()
params = request.params or {}
top_k = params.get("topK", settings.rag_top_k)
threshold = params.get("threshold", settings.rag_score_threshold)
top_k = request.top_k or settings.rag_top_k
threshold = request.score_threshold or settings.rag_score_threshold
try:
retriever = await get_vector_retriever()
@ -94,14 +116,26 @@ async def run_rag_experiment(
final_prompt = _build_final_prompt(request.query, retrieval_results)
logger.info(
f"[AC-ASA-05] RAG experiment complete: hits={len(retrieval_results)}, "
f"[AC-ASA-05] RAG retrieval complete: hits={len(retrieval_results)}, "
f"max_score={result.max_score:.3f}"
)
ai_response = None
if request.generate_response:
ai_response = await _generate_ai_response(
final_prompt,
provider=request.llm_provider,
)
total_latency_ms = (time.time() - start_time) * 1000
return JSONResponse(
content={
"retrievalResults": retrieval_results,
"finalPrompt": final_prompt,
"query": request.query,
"retrieval_results": retrieval_results,
"final_prompt": final_prompt,
"ai_response": ai_response.model_dump() if ai_response else None,
"total_latency_ms": round(total_latency_ms, 2),
"diagnostics": result.diagnostics,
}
)
@ -112,10 +146,22 @@ async def run_rag_experiment(
fallback_results = _get_fallback_results(request.query)
fallback_prompt = _build_final_prompt(request.query, fallback_results)
ai_response = None
if request.generate_response:
ai_response = await _generate_ai_response(
fallback_prompt,
provider=request.llm_provider,
)
total_latency_ms = (time.time() - start_time) * 1000
return JSONResponse(
content={
"retrievalResults": fallback_results,
"finalPrompt": fallback_prompt,
"query": request.query,
"retrieval_results": fallback_results,
"final_prompt": fallback_prompt,
"ai_response": ai_response.model_dump() if ai_response else None,
"total_latency_ms": round(total_latency_ms, 2),
"diagnostics": {
"error": str(e),
"fallback": True,
@ -124,6 +170,130 @@ async def run_rag_experiment(
)
@router.post(
"/experiments/stream",
operation_id="runRagExperimentStream",
summary="Run RAG experiment with streaming AI output",
description="[AC-ASA-20] Trigger RAG experiment with SSE streaming for AI response.",
responses={
200: {"description": "SSE stream with retrieval results and AI response"},
401: {"description": "Unauthorized", "model": ErrorResponse},
403: {"description": "Forbidden", "model": ErrorResponse},
},
)
async def run_rag_experiment_stream(
tenant_id: Annotated[str, Depends(get_current_tenant_id)],
request: RAGExperimentRequest = Body(...),
) -> StreamingResponse:
"""
[AC-ASA-20] Run RAG experiment with SSE streaming for AI response.
"""
logger.info(
f"[AC-ASA-20] Running RAG experiment stream: tenant={tenant_id}, "
f"query={request.query[:50]}..."
)
settings = get_settings()
top_k = request.top_k or settings.rag_top_k
threshold = request.score_threshold or settings.rag_score_threshold
async def event_generator():
try:
retriever = await get_vector_retriever()
retrieval_ctx = RetrievalContext(
tenant_id=tenant_id,
query=request.query,
session_id="rag_experiment_stream",
channel_type="admin",
metadata={"kb_ids": request.kb_ids},
)
result = await retriever.retrieve(retrieval_ctx)
retrieval_results = [
{
"content": hit.text,
"score": hit.score,
"source": hit.source,
"metadata": hit.metadata,
}
for hit in result.hits
]
final_prompt = _build_final_prompt(request.query, retrieval_results)
yield f"event: retrieval\ndata: {json.dumps({'results': retrieval_results, 'count': len(retrieval_results)})}\n\n"
yield f"event: prompt\ndata: {json.dumps({'prompt': final_prompt})}\n\n"
if request.generate_response:
manager = get_llm_config_manager()
client = manager.get_client()
full_content = ""
async for chunk in client.stream_generate(
messages=[{"role": "user", "content": final_prompt}],
):
if chunk.delta:
full_content += chunk.delta
yield f"event: message\ndata: {json.dumps({'delta': chunk.delta})}\n\n"
yield f"event: final\ndata: {json.dumps({'content': full_content, 'finish_reason': 'stop'})}\n\n"
else:
yield f"event: final\ndata: {json.dumps({'content': '', 'finish_reason': 'skipped'})}\n\n"
except Exception as e:
logger.error(f"[AC-ASA-20] RAG experiment stream failed: {e}")
yield f"event: error\ndata: {json.dumps({'error': str(e)})}\n\n"
return StreamingResponse(
event_generator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
async def _generate_ai_response(
prompt: str,
provider: str | None = None,
) -> AIResponse | None:
"""
[AC-ASA-19, AC-ASA-21] Generate AI response from prompt.
"""
import time
try:
manager = get_llm_config_manager()
client = manager.get_client()
start_time = time.time()
response = await client.generate(
messages=[{"role": "user", "content": prompt}],
)
latency_ms = (time.time() - start_time) * 1000
return AIResponse(
content=response.content,
prompt_tokens=response.usage.get("prompt_tokens", 0),
completion_tokens=response.usage.get("completion_tokens", 0),
total_tokens=response.usage.get("total_tokens", 0),
latency_ms=round(latency_ms, 2),
model=response.model,
)
except Exception as e:
logger.error(f"[AC-ASA-19] AI response generation failed: {e}")
return AIResponse(
content=f"AI 响应生成失败: {str(e)}",
latency_ms=0,
)
def _build_final_prompt(query: str, retrieval_results: list[dict]) -> str:
"""
Build the final prompt from query and retrieval results.
@ -138,14 +308,14 @@ def _build_final_prompt(query: str, retrieval_results: list[dict]) -> str:
for i, hit in enumerate(retrieval_results[:5])
])
return f"""基于以下检索到的信息,回答用户问题:
return f"""基于以下检索到的信息,作为一个回答简洁精准的客服,回答用户问题:
用户问题{query}
检索结果
{evidence_text}
请基于以上信息生成专业准确的回答"""
请基于以上信息生成专业准确的回答注意输出内容应该格式整齐不包含json符号等"""
def _get_fallback_results(query: str) -> list[dict]:

View File

@ -12,7 +12,7 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from app.api import chat_router, health_router
from app.api.admin import dashboard_router, embedding_router, kb_router, rag_router, sessions_router
from app.api.admin import dashboard_router, embedding_router, kb_router, llm_router, rag_router, sessions_router
from app.core.config import get_settings
from app.core.database import close_db, init_db
from app.core.exceptions import (
@ -115,6 +115,7 @@ app.include_router(chat_router)
app.include_router(dashboard_router)
app.include_router(embedding_router)
app.include_router(kb_router)
app.include_router(llm_router)
app.include_router(rag_router)
app.include_router(sessions_router)

View File

@ -0,0 +1,332 @@
"""
LLM Provider Factory and Configuration Management.
[AC-ASA-14, AC-ASA-15, AC-ASA-16, AC-ASA-17, AC-ASA-18] LLM provider management.
Design pattern: Factory pattern for pluggable LLM providers.
"""
import logging
from dataclasses import dataclass, field
from typing import Any
from app.services.llm.base import LLMClient, LLMConfig
from app.services.llm.openai_client import OpenAIClient
logger = logging.getLogger(__name__)
@dataclass
class LLMProviderInfo:
"""Information about an LLM provider."""
name: str
display_name: str
description: str
config_schema: dict[str, Any]
LLM_PROVIDERS: dict[str, LLMProviderInfo] = {
"openai": LLMProviderInfo(
name="openai",
display_name="OpenAI",
description="OpenAI GPT 系列模型 (GPT-4, GPT-3.5 等)",
config_schema={
"api_key": {
"type": "string",
"description": "API Key",
"required": True,
"secret": True,
},
"base_url": {
"type": "string",
"description": "API Base URL",
"default": "https://api.openai.com/v1",
},
"model": {
"type": "string",
"description": "模型名称",
"default": "gpt-4o-mini",
},
"max_tokens": {
"type": "integer",
"description": "最大输出 Token 数",
"default": 2048,
},
"temperature": {
"type": "number",
"description": "温度参数 (0-2)",
"default": 0.7,
},
},
),
"ollama": LLMProviderInfo(
name="ollama",
display_name="Ollama",
description="Ollama 本地模型 (Llama, Qwen 等)",
config_schema={
"base_url": {
"type": "string",
"description": "Ollama API 地址",
"default": "http://localhost:11434/v1",
},
"model": {
"type": "string",
"description": "模型名称",
"default": "llama3.2",
},
"max_tokens": {
"type": "integer",
"description": "最大输出 Token 数",
"default": 2048,
},
"temperature": {
"type": "number",
"description": "温度参数 (0-2)",
"default": 0.7,
},
},
),
"azure": LLMProviderInfo(
name="azure",
display_name="Azure OpenAI",
description="Azure OpenAI 服务",
config_schema={
"api_key": {
"type": "string",
"description": "API Key",
"required": True,
"secret": True,
},
"base_url": {
"type": "string",
"description": "Azure Endpoint",
"required": True,
},
"model": {
"type": "string",
"description": "部署名称",
"required": True,
},
"api_version": {
"type": "string",
"description": "API 版本",
"default": "2024-02-15-preview",
},
"max_tokens": {
"type": "integer",
"description": "最大输出 Token 数",
"default": 2048,
},
"temperature": {
"type": "number",
"description": "温度参数 (0-2)",
"default": 0.7,
},
},
),
}
class LLMProviderFactory:
"""
Factory for creating LLM clients.
[AC-ASA-14, AC-ASA-15] Dynamic provider creation.
"""
@classmethod
def get_providers(cls) -> list[LLMProviderInfo]:
"""Get all registered LLM providers."""
return list(LLM_PROVIDERS.values())
@classmethod
def get_provider_info(cls, name: str) -> LLMProviderInfo | None:
"""Get provider info by name."""
return LLM_PROVIDERS.get(name)
@classmethod
def create_client(
cls,
provider: str,
config: dict[str, Any],
) -> LLMClient:
"""
Create an LLM client for the specified provider.
[AC-ASA-15] Factory method for client creation.
Args:
provider: Provider name (openai, ollama, azure)
config: Provider configuration
Returns:
LLMClient instance
Raises:
ValueError: If provider is not supported
"""
if provider not in LLM_PROVIDERS:
raise ValueError(f"Unsupported LLM provider: {provider}")
if provider in ("openai", "ollama", "azure"):
return OpenAIClient(
api_key=config.get("api_key"),
base_url=config.get("base_url"),
model=config.get("model"),
default_config=LLMConfig(
model=config.get("model", "gpt-4o-mini"),
max_tokens=config.get("max_tokens", 2048),
temperature=config.get("temperature", 0.7),
),
)
raise ValueError(f"Unsupported LLM provider: {provider}")
class LLMConfigManager:
"""
Manager for LLM configuration.
[AC-ASA-16, AC-ASA-17, AC-ASA-18] Configuration management with hot-reload.
"""
def __init__(self):
self._current_provider: str = "openai"
self._current_config: dict[str, Any] = {}
self._client: LLMClient | None = None
def get_current_config(self) -> dict[str, Any]:
"""Get current LLM configuration."""
return {
"provider": self._current_provider,
"config": self._current_config,
}
async def update_config(
self,
provider: str,
config: dict[str, Any],
) -> bool:
"""
Update LLM configuration.
[AC-ASA-16] Hot-reload configuration.
Args:
provider: Provider name
config: New configuration
Returns:
True if update successful
"""
if provider not in LLM_PROVIDERS:
raise ValueError(f"Unsupported LLM provider: {provider}")
provider_info = LLM_PROVIDERS[provider]
validated_config = self._validate_config(provider_info, config)
if self._client:
await self._client.close()
self._client = None
self._current_provider = provider
self._current_config = validated_config
logger.info(f"[AC-ASA-16] LLM config updated: provider={provider}")
return True
def _validate_config(
self,
provider_info: LLMProviderInfo,
config: dict[str, Any],
) -> dict[str, Any]:
"""Validate configuration against provider schema."""
validated = {}
for key, schema in provider_info.config_schema.items():
if key in config:
validated[key] = config[key]
elif "default" in schema:
validated[key] = schema["default"]
elif schema.get("required"):
raise ValueError(f"Missing required config: {key}")
return validated
def get_client(self) -> LLMClient:
"""Get or create LLM client with current config."""
if self._client is None:
self._client = LLMProviderFactory.create_client(
self._current_provider,
self._current_config,
)
return self._client
async def test_connection(
self,
test_prompt: str = "你好,请简单介绍一下自己。",
provider: str | None = None,
config: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""
Test LLM connection.
[AC-ASA-17, AC-ASA-18] Connection testing.
Args:
test_prompt: Test prompt to send
provider: Optional provider to test (uses current if not specified)
config: Optional config to test (uses current if not specified)
Returns:
Test result with success status, response, and metrics
"""
import time
test_provider = provider or self._current_provider
test_config = config or self._current_config
if test_provider not in LLM_PROVIDERS:
return {
"success": False,
"error": f"Unsupported provider: {test_provider}",
}
try:
client = LLMProviderFactory.create_client(test_provider, test_config)
start_time = time.time()
response = await client.generate(
messages=[{"role": "user", "content": test_prompt}],
)
latency_ms = (time.time() - start_time) * 1000
await client.close()
return {
"success": True,
"response": response.content,
"latency_ms": round(latency_ms, 2),
"prompt_tokens": response.usage.get("prompt_tokens", 0),
"completion_tokens": response.usage.get("completion_tokens", 0),
"total_tokens": response.usage.get("total_tokens", 0),
"model": response.model,
"message": f"连接成功,模型: {response.model}",
}
except Exception as e:
logger.error(f"[AC-ASA-18] LLM test failed: {e}")
return {
"success": False,
"error": str(e),
"message": f"连接失败: {str(e)}",
}
async def close(self) -> None:
"""Close the current client."""
if self._client:
await self._client.close()
self._client = None
_llm_config_manager: LLMConfigManager | None = None
def get_llm_config_manager() -> LLMConfigManager:
"""Get or create LLM config manager instance."""
global _llm_config_manager
if _llm_config_manager is None:
_llm_config_manager = LLMConfigManager()
return _llm_config_manager

View File

@ -16,6 +16,10 @@ dependencies = [
"asyncpg>=0.29.0",
"qdrant-client>=1.7.0",
"tiktoken>=0.5.0",
"openpyxl>=3.1.0",
"python-docx>=1.1.0",
"pymupdf>=1.23.0",
"pdfplumber>=0.10.0",
]
[project.optional-dependencies]

View File

@ -0,0 +1,80 @@
"""
Check Qdrant vector database contents - detailed view.
"""
import asyncio
import sys
sys.path.insert(0, ".")
from qdrant_client import AsyncQdrantClient
from app.core.config import get_settings
from collections import defaultdict
settings = get_settings()
async def check_qdrant():
"""Check Qdrant collections and vectors."""
client = AsyncQdrantClient(url=settings.qdrant_url, check_compatibility=False)
print(f"\n{'='*60}")
print(f"Qdrant URL: {settings.qdrant_url}")
print(f"{'='*60}\n")
# List all collections
collections = await client.get_collections()
# Check kb_default collection
for c in collections.collections:
if c.name == "kb_default":
print(f"\n--- Collection: {c.name} ---")
# Get collection info
info = await client.get_collection(c.name)
print(f" Total vectors: {info.points_count}")
# Scroll through all points and group by source
all_points = []
offset = None
while True:
points, offset = await client.scroll(
collection_name=c.name,
limit=100,
offset=offset,
with_payload=True,
with_vectors=False,
)
all_points.extend(points)
if offset is None:
break
# Group by source
by_source = defaultdict(list)
for p in all_points:
source = p.payload.get("source", "unknown") if p.payload else "unknown"
by_source[source].append(p)
print(f"\n Documents by source:")
for source, points in by_source.items():
print(f"\n Source: {source}")
print(f" Chunks: {len(points)}")
# Check first chunk content
first_point = points[0]
text = first_point.payload.get("text", "") if first_point.payload else ""
# Check if it's binary garbage or proper text
is_garbage = any(ord(c) > 0xFFFF or (ord(c) < 32 and c not in '\n\r\t') for c in text[:200])
if is_garbage:
print(f" Status: ❌ BINARY GARBAGE (parsing failed)")
else:
print(f" Status: ✅ PROPER TEXT (parsed correctly)")
print(f" Preview: {text[:150]}...")
await client.close()
if __name__ == "__main__":
asyncio.run(check_qdrant())

View File

@ -0,0 +1,115 @@
"""
Clean up garbage data from Qdrant vector database.
Removes vectors that contain binary garbage (failed parsing results).
"""
import asyncio
import sys
sys.path.insert(0, ".")
from qdrant_client import AsyncQdrantClient
from qdrant_client.models import PointIdsList
from app.core.config import get_settings
from collections import defaultdict
settings = get_settings()
def is_garbage_text(text: str) -> bool:
"""Check if text contains binary garbage."""
if not text:
return True
sample = text[:500]
garbage_chars = sum(1 for c in sample if ord(c) > 0xFFFF or (ord(c) < 32 and c not in '\n\r\t'))
return garbage_chars > len(sample) * 0.1
async def cleanup_garbage():
"""Clean up garbage data from Qdrant."""
client = AsyncQdrantClient(url=settings.qdrant_url, check_compatibility=False)
print(f"\n{'='*60}")
print(f"Cleaning up garbage data from Qdrant")
print(f"URL: {settings.qdrant_url}")
print(f"{'='*60}\n")
collections = await client.get_collections()
for c in collections.collections:
if not c.name.startswith(settings.qdrant_collection_prefix):
continue
print(f"\n--- Collection: {c.name} ---")
info = await client.get_collection(c.name)
print(f" Total vectors: {info.points_count}")
all_points = []
offset = None
while True:
points, offset = await client.scroll(
collection_name=c.name,
limit=100,
offset=offset,
with_payload=True,
with_vectors=False,
)
all_points.extend(points)
if offset is None:
break
by_source = defaultdict(list)
for p in all_points:
source = p.payload.get("source", "unknown") if p.payload else "unknown"
by_source[source].append(p)
garbage_sources = []
good_sources = []
for source, points in by_source.items():
first_point = points[0]
text = first_point.payload.get("text", "") if first_point.payload else ""
if is_garbage_text(text):
garbage_sources.append((source, points))
else:
good_sources.append((source, points))
print(f"\n Good documents: {len(good_sources)}")
print(f" Garbage documents: {len(garbage_sources)}")
if garbage_sources:
print(f"\n Garbage documents to delete:")
for source, points in garbage_sources:
print(f" - {source} ({len(points)} chunks)")
preview = ""
if points[0].payload:
preview = points[0].payload.get("text", "")[:80]
print(f" Preview: {repr(preview)}...")
confirm = input("\n Delete these garbage documents? (y/n): ")
if confirm.lower() == 'y':
for source, points in garbage_sources:
point_ids = [p.id for p in points]
await client.delete(
collection_name=c.name,
points_selector=PointIdsList(points=point_ids)
)
print(f" Deleted {len(point_ids)} vectors for source {source}")
print(f"\n Cleanup complete!")
else:
print(f"\n Cancelled.")
else:
print(f"\n No garbage data found.")
await client.close()
if __name__ == "__main__":
asyncio.run(cleanup_garbage())

View File

@ -0,0 +1,40 @@
"""
Test Excel parsing directly.
"""
import sys
sys.path.insert(0, ".")
from app.services.document import parse_document, get_supported_document_formats
print("Supported formats:", get_supported_document_formats())
print()
# Test with a sample xlsx file if available
import os
from pathlib import Path
# Find any xlsx files in the uploads directory
uploads_dir = Path("uploads")
if uploads_dir.exists():
xlsx_files = list(uploads_dir.glob("**/*.xlsx"))
print(f"Found {len(xlsx_files)} xlsx files")
for f in xlsx_files[:1]: # Test first one
print(f"\nTesting: {f}")
try:
result = parse_document(str(f))
print(f" SUCCESS: chars={len(result.text)}")
print(f" metadata: {result.metadata}")
print(f" preview: {result.text[:500]}...")
except Exception as e:
print(f" FAILED: {type(e).__name__}: {e}")
else:
print("No uploads directory found")
# Test openpyxl directly
print("\n--- Testing openpyxl directly ---")
try:
import openpyxl
print(f"openpyxl version: {openpyxl.__version__}")
except ImportError as e:
print(f"openpyxl NOT installed: {e}")

View File

@ -3,7 +3,8 @@ module: ai-service-admin
feature: ASA
status: in_progress
created: 2026-02-24
last_updated: 2026-02-24
last_updated: "2026-02-24"
version: "0.3.0"
---
# AI 中台管理界面ai-service-admin进度文档
@ -28,41 +29,60 @@ last_updated: 2026-02-24
- [x] Phase 3: RAG 实验室 (100%) [P3-01 ~ P3-04]
- [x] Phase 4: 会话监控与详情 (100%) [P4-01 ~ P4-03]
- [x] Phase 5: 后端管理接口实现 (100%) [Backend Admin APIs]
- [ ] Phase 6: 嵌入模型管理 (0%) [P5-01 ~ P5-08]
- [ ] Phase 7: LLM 配置与 RAG 调试输出 (0%) [P6-01 ~ P6-10] 🔄当前
## current_phase
**goal**: 知识库管理模块开发,实现文档上传、列表展示与状态轮询
**goal**: 实现 LLM 模型配置页面及 RAG 实验室 AI 输出调试功能
### sub_tasks
- [x] (P1-01) 初始化 `ai-service-admin` 前端工程Vue 3 + Element Plus + RuoYi-Vue 基座对齐),落地基础目录结构与路由骨架
- [x] (P1-02) 接入 Pinia实现 `tenant` store`currentTenantId`并持久化localStorage提供切换租户能力
- [x] (P1-03) Axios/SDK 请求层封装:创建统一 `request` 实例,自动注入必填 Header `X-Tenant-Id`
- [x] (P1-04) 全局异常拦截:实现 401/403 响应拦截策略
- [x] (P1-05) 基础组件封装:`BaseTable`、`BaseForm` 并给出示例页
- [x] (P2-01) 创建 `openapi.deps.yaml` 明确依赖契约 (L1) [AC-ASA-08]
- [x] (P2-02) 实现知识库列表 API 对接及分页展示 [AC-ASA-08]
- [x] (P2-03) 实现文档上传功能Multipart/form-data[AC-ASA-01]
- [x] (P2-04) 实现索引任务状态轮询机制3s 间隔)[AC-ASA-02]
- [x] (P2-05) 失败任务错误详情弹窗展示 [AC-ASA-02]
- [x] (P5-01) 实现后端 GET /admin/kb/documents 文档列表接口 [AC-ASA-08]
- [x] (P5-02) 实现后端 POST /admin/kb/documents 文档上传接口 [AC-ASA-01]
- [x] (P5-03) 实现后端 GET /admin/kb/index/jobs/{jobId} 索引任务查询接口 [AC-ASA-02]
- [x] (P5-04) 实现后端 POST /admin/rag/experiments/run RAG实验接口 [AC-ASA-05]
- [x] (P5-05) 实现后端 GET /admin/sessions 会话列表接口 [AC-ASA-09]
- [x] (P5-06) 实现后端 GET /admin/sessions/{sessionId} 会话详情接口 [AC-ASA-07]
#### Phase 1-5 已完成
- [x] (P1-01) 初始化前端工程
- [x] (P1-02) 接入 Pinia tenant store
- [x] (P1-03) Axios 请求层封装
- [x] (P1-04) 全局异常拦截
- [x] (P1-05) 基础组件封装
- [x] (P2-01~P2-05) 知识库管理功能
- [x] (P3-01~P3-04) RAG 实验室功能
- [x] (P4-01~P4-03) 会话监控功能
- [x] (P5-01~P5-06) 后端管理接口实现
#### Phase 6: 嵌入模型管理(待处理)
- [ ] (P5-01) API 服务层与类型定义 [AC-ASA-08, AC-ASA-09]
- [ ] (P5-02) 提供者选择组件 [AC-ASA-09]
- [ ] (P5-03) 动态配置表单 [AC-ASA-09, AC-ASA-10]
- [ ] (P5-04) 测试连接组件 [AC-ASA-11, AC-ASA-12]
- [ ] (P5-05) 支持格式组件 [AC-ASA-13]
- [ ] (P5-06) 页面骨架与路由 [AC-ASA-08]
- [ ] (P5-07) 配置加载与保存 [AC-ASA-08, AC-ASA-10]
- [ ] (P5-08) 组件整合与测试 [AC-ASA-08~AC-ASA-13]
#### Phase 7: LLM 配置与 RAG 调试输出(当前)
- [ ] (P6-01) LLM API 服务层与类型定义:创建 src/api/llm.ts 和 src/types/llm.ts [AC-ASA-14, AC-ASA-15]
- [ ] (P6-02) LLM 提供者选择组件:创建 LLMProviderSelect.vue [AC-ASA-15]
- [ ] (P6-03) LLM 动态配置表单:创建 LLMConfigForm.vue [AC-ASA-15, AC-ASA-16]
- [ ] (P6-04) LLM 测试连接组件:创建 LLMTestPanel.vue [AC-ASA-17, AC-ASA-18]
- [ ] (P6-05) LLM 配置页面:创建 /admin/llm 页面 [AC-ASA-14, AC-ASA-16]
- [ ] (P6-06) AI 回复展示组件:创建 AIResponseViewer.vue [AC-ASA-19]
- [ ] (P6-07) 流式输出支持:实现 SSE 流式输出展示 [AC-ASA-20]
- [ ] (P6-08) Token 统计展示:展示 Token 消耗、响应耗时 [AC-ASA-21]
- [ ] (P6-09) LLM 选择器:在 RAG 实验室中添加 LLM 配置选择器 [AC-ASA-22]
- [ ] (P6-10) RAG 实验室整合:将 AI 输出组件整合到 RAG 实验室 [AC-ASA-19~AC-ASA-22]
### next_action
**immediate**: 后端管理接口已实现完成,等待前端联调
**immediate**: 并行启动 3 个窗口执行 Phase 6 和 Phase 7 任务
**details**:
- file: "ai-service/app/api/admin/"
- action: "后端 7 个管理接口已全部实现,包含 Mock 数据返回,支持前端并行开发"
- reference: "spec/ai-service/openapi.admin.yaml"
- file: "ai-service-admin/src/"
- action: "窗口1: 嵌入管理组件; 窗口2: LLM 配置组件; 窗口3: RAG 实验室增强"
- reference: "spec/ai-service-admin/openapi.deps.yaml"
- constraints:
- 所有接口均已实现 X-Tenant-Id Header 校验
- 返回数据格式与契约定义一致
- 每个任务必须包含 AC 标记
- 完成后更新 spec/ai-service-admin/tasks.md
- commit message 格式: `feat(ASA-P6/P7): <desc> [AC-ASA-XX]`
### backend_implementation_summary
@ -147,6 +167,30 @@ export const useTenantStore = defineStore('tenant', {
- ai-service/app/main.py - 注册管理路由
- docs/progress/ai-service-admin-progress.md - 更新进度
- session: "Session #3 (2026-02-24) - 嵌入模型管理需求规划"
completed:
- 更新 spec/ai-service-admin/requirements.md 添加 v0.2.0 迭代需求
- 更新 spec/ai-service-admin/tasks.md 添加 Phase 5 任务
- 更新 spec/ai-service-admin/openapi.deps.yaml 添加嵌入管理接口
- 更新进度文档添加 Phase 6 任务
changes:
- spec/ai-service-admin/requirements.md - 新增 AC-ASA-08~AC-ASA-13
- spec/ai-service-admin/tasks.md - 新增 P5-01~P5-08 任务
- spec/ai-service-admin/openapi.deps.yaml - 完整重写,添加嵌入管理接口
- docs/progress/ai-service-admin-progress.md - 添加 Phase 6
- session: "Session #4 (2026-02-24) - LLM 配置与 RAG 调试输出需求规划"
completed:
- 更新 spec/ai-service-admin/requirements.md 添加 v0.3.0 迭代需求
- 更新 spec/ai-service-admin/tasks.md 添加 Phase 6 任务
- 更新 spec/ai-service-admin/openapi.deps.yaml 添加 LLM 管理和 RAG 实验增强接口
- 更新进度文档添加 Phase 7 任务
changes:
- spec/ai-service-admin/requirements.md - 新增 AC-ASA-14~AC-ASA-22
- spec/ai-service-admin/tasks.md - 新增 P6-01~P6-10 任务
- spec/ai-service-admin/openapi.deps.yaml - 添加 LLM 配置接口和 RAG 实验增强接口
- docs/progress/ai-service-admin-progress.md - 添加 Phase 7
## startup_guide
1. **Step 1**: 读取本进度文档(了解当前位置与下一步)
@ -159,9 +203,12 @@ export const useTenantStore = defineStore('tenant', {
| Phase | 名称 | 任务数 | 状态 |
|-------|------|--------|------|
| Phase 1 | 基础建设 | 5 | ⏳ 待开始 |
| Phase 2 | 知识库管理 | 5 | ⏳ 待开始 |
| Phase 3 | RAG 实验室 | 4 | ⏳ 待开始 |
| Phase 4 | 会话监控与详情 | 3 | ⏳ 待开始 |
| Phase 1 | 基础建设 | 5 | ✅ 完成 |
| Phase 2 | 知识库管理 | 5 | ✅ 完成 |
| Phase 3 | RAG 实验室 | 4 | ✅ 完成 |
| Phase 4 | 会话监控与详情 | 3 | ✅ 完成 |
| Phase 5 | 后端管理接口实现 | 6 | ✅ 完成 |
| Phase 6 | 嵌入模型管理 | 8 | ⏳ 待处理 |
| Phase 7 | LLM 配置与 RAG 调试输出 | 10 | 🔄 进行中 |
**总计: 17 个任务**
**总计: 41 个任务 | 已完成: 23 个 | 待处理: 8 个 | 进行中: 10 个**

View File

@ -1,6 +1,592 @@
openapi: 3.1.0
info:
title: \" AI Service Admin "Dependencies\
description: \ai-service-admin" 模块依赖的外<E79A84>?API 契约Consumer "需求侧)\
version: \0.1.0\
x-contract-level: L1
openapi: 3.1.0
info:
title: "AI Service Admin Dependencies"
description: "ai-service-admin 模块依赖的外部 API 契约Consumer 需求侧)"
version: "0.3.0"
x-contract-level: L1
servers:
- url: http://localhost:8000
description: 本地开发服务器
paths:
/admin/embedding/providers:
get:
operationId: listEmbeddingProviders
summary: 获取可用的嵌入模型提供者列表
tags:
- Embedding Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
responses:
'200':
description: 成功返回提供者列表
content:
application/json:
schema:
type: object
properties:
providers:
type: array
items:
$ref: '#/components/schemas/EmbeddingProviderInfo'
/admin/embedding/config:
get:
operationId: getEmbeddingConfig
summary: 获取当前嵌入模型配置
tags:
- Embedding Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
responses:
'200':
description: 成功返回当前配置
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingConfig'
put:
operationId: updateEmbeddingConfig
summary: 更新嵌入模型配置
tags:
- Embedding Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingConfigUpdate'
responses:
'200':
description: 配置更新成功
content:
application/json:
schema:
type: object
properties:
success:
type: boolean
message:
type: string
/admin/embedding/test:
post:
operationId: testEmbedding
summary: 测试嵌入模型连接
tags:
- Embedding Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: false
content:
application/json:
schema:
type: object
properties:
test_text:
type: string
description: 测试文本(可选)
config:
$ref: '#/components/schemas/EmbeddingConfigUpdate'
responses:
'200':
description: 测试成功
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingTestResult'
/admin/embedding/formats:
get:
operationId: getSupportedFormats
summary: 获取支持的文档格式列表
tags:
- Embedding Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
responses:
'200':
description: 成功返回支持格式列表
content:
application/json:
schema:
type: object
properties:
formats:
type: array
items:
$ref: '#/components/schemas/DocumentFormat'
/admin/llm/providers:
get:
operationId: listLLMProviders
summary: 获取可用的 LLM 提供者列表
tags:
- LLM Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
responses:
'200':
description: 成功返回提供者列表
content:
application/json:
schema:
type: object
properties:
providers:
type: array
items:
$ref: '#/components/schemas/LLMProviderInfo'
/admin/llm/config:
get:
operationId: getLLMConfig
summary: 获取当前 LLM 配置
tags:
- LLM Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
responses:
'200':
description: 成功返回当前配置
content:
application/json:
schema:
$ref: '#/components/schemas/LLMConfig'
put:
operationId: updateLLMConfig
summary: 更新 LLM 配置
tags:
- LLM Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/LLMConfigUpdate'
responses:
'200':
description: 配置更新成功
content:
application/json:
schema:
type: object
properties:
success:
type: boolean
message:
type: string
/admin/llm/test:
post:
operationId: testLLM
summary: 测试 LLM 连接
tags:
- LLM Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: false
content:
application/json:
schema:
type: object
properties:
test_prompt:
type: string
description: 测试提示词(可选)
example: "你好,请简单介绍一下自己。"
config:
$ref: '#/components/schemas/LLMConfigUpdate'
responses:
'200':
description: 测试成功
content:
application/json:
schema:
$ref: '#/components/schemas/LLMTestResult'
/admin/rag/experiments/run:
post:
operationId: runRagExperiment
summary: 运行 RAG 实验(含 AI 输出)
tags:
- RAG Lab
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/RagExperimentRequest'
responses:
'200':
description: 实验完成
content:
application/json:
schema:
$ref: '#/components/schemas/RagExperimentResult'
/admin/rag/experiments/stream:
post:
operationId: runRagExperimentStream
summary: 运行 RAG 实验(流式输出)
tags:
- RAG Lab
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/RagExperimentRequest'
responses:
'200':
description: SSE 流式输出
content:
text/event-stream:
schema:
type: string
components:
schemas:
EmbeddingProviderInfo:
type: object
required:
- name
- display_name
- config_schema
properties:
name:
type: string
description: 提供者唯一标识
example: "ollama"
display_name:
type: string
description: 提供者显示名称
example: "Ollama 本地模型"
description:
type: string
description: 提供者描述
example: "使用 Ollama 运行的本地嵌入模型"
config_schema:
type: object
description: 配置参数定义JSON Schema 格式)
additionalProperties: true
EmbeddingConfig:
type: object
required:
- provider
- config
properties:
provider:
type: string
description: 当前激活的提供者
example: "ollama"
config:
type: object
description: 提供者配置参数
additionalProperties: true
updated_at:
type: string
format: date-time
description: 配置最后更新时间
EmbeddingConfigUpdate:
type: object
required:
- provider
properties:
provider:
type: string
description: 提供者标识
example: "ollama"
config:
type: object
description: 提供者配置参数
additionalProperties: true
EmbeddingTestResult:
type: object
required:
- success
- dimension
properties:
success:
type: boolean
description: 测试是否成功
dimension:
type: integer
description: 返回的向量维度
example: 768
latency_ms:
type: number
description: 响应延迟(毫秒)
example: 125.5
message:
type: string
description: 测试结果消息
example: "连接成功,向量维度: 768"
error:
type: string
description: 错误信息(失败时)
example: "连接超时"
DocumentFormat:
type: object
required:
- extension
- name
properties:
extension:
type: string
description: 文件扩展名
example: ".pdf"
name:
type: string
description: 格式名称
example: "PDF 文档"
description:
type: string
description: 格式描述
example: "使用 PyMuPDF 解析 PDF 文档"
LLMProviderInfo:
type: object
required:
- name
- display_name
- config_schema
properties:
name:
type: string
description: 提供者唯一标识
example: "openai"
display_name:
type: string
description: 提供者显示名称
example: "OpenAI"
description:
type: string
description: 提供者描述
example: "OpenAI GPT 系列模型"
config_schema:
type: object
description: 配置参数定义JSON Schema 格式)
additionalProperties: true
LLMConfig:
type: object
required:
- provider
- config
properties:
provider:
type: string
description: 当前激活的提供者
example: "openai"
config:
type: object
description: 提供者配置参数
additionalProperties: true
example:
api_key: "sk-xxx"
base_url: "https://api.openai.com/v1"
model: "gpt-4o-mini"
updated_at:
type: string
format: date-time
description: 配置最后更新时间
LLMConfigUpdate:
type: object
required:
- provider
properties:
provider:
type: string
description: 提供者标识
example: "openai"
config:
type: object
description: 提供者配置参数
additionalProperties: true
LLMTestResult:
type: object
required:
- success
properties:
success:
type: boolean
description: 测试是否成功
response:
type: string
description: LLM 响应内容
example: "你好!我是一个 AI 助手..."
latency_ms:
type: number
description: 响应延迟(毫秒)
example: 1250.5
prompt_tokens:
type: integer
description: 输入 Token 数
example: 15
completion_tokens:
type: integer
description: 输出 Token 数
example: 50
total_tokens:
type: integer
description: 总 Token 数
example: 65
message:
type: string
description: 测试结果消息
example: "连接成功"
error:
type: string
description: 错误信息(失败时)
example: "API Key 无效"
RagExperimentRequest:
type: object
required:
- query
properties:
query:
type: string
description: 查询文本
example: "什么是 RAG"
kb_ids:
type: array
items:
type: string
description: 知识库 ID 列表
top_k:
type: integer
description: 检索数量
default: 5
score_threshold:
type: number
description: 相似度阈值
default: 0.5
llm_provider:
type: string
description: 指定 LLM 提供者(可选)
example: "openai"
generate_response:
type: boolean
description: 是否生成 AI 回复
default: true
RagExperimentResult:
type: object
properties:
query:
type: string
description: 原始查询
retrieval_results:
type: array
items:
$ref: '#/components/schemas/RetrievalResult'
final_prompt:
type: string
description: 最终拼接的 Prompt
ai_response:
$ref: '#/components/schemas/AIResponse'
total_latency_ms:
type: number
description: 总耗时(毫秒)
RetrievalResult:
type: object
properties:
content:
type: string
description: 检索到的内容
score:
type: number
description: 相似度分数
source:
type: string
description: 来源文档
metadata:
type: object
additionalProperties: true
description: 元数据
AIResponse:
type: object
properties:
content:
type: string
description: AI 回复内容
prompt_tokens:
type: integer
description: 输入 Token 数
completion_tokens:
type: integer
description: 输出 Token 数
total_tokens:
type: integer
description: 总 Token 数
latency_ms:
type: number
description: 生成耗时(毫秒)
model:
type: string
description: 使用的模型

View File

@ -2,7 +2,7 @@
feature_id: "ASA"
title: "AI 中台管理界面ai-service-admin需求规范"
status: "draft"
version: "0.1.0"
version: "0.3.0"
owners:
- "product"
- "frontend"
@ -70,3 +70,92 @@ source:
| AC-ASA-03 | /admin/config/prompt-templates/{tplId}/publish | POST | 发布指定版本 |
| AC-ASA-05 | /admin/rag/experiments/run | POST | 触发调试实验 |
| AC-ASA-07 | /admin/sessions/{sessionId} | GET | 获取全链路详情 |
---
## 7. 迭代需求嵌入模型管理v0.2.0
> 说明:本节为 v0.2.0 迭代新增,用于支持嵌入模型的界面配置与管理。
### 7.1 嵌入模型配置管理
- [AC-ASA-08] WHEN 用户访问嵌入模型配置页面 THEN 系统 SHALL 展示当前激活的嵌入模型提供者及其配置参数。
- [AC-ASA-09] WHEN 用户切换嵌入模型提供者 THEN 系统 SHALL 动态展示该提供者的配置参数表单,并保留当前配置值。
- [AC-ASA-10] WHEN 用户修改嵌入模型配置并保存 THEN 系统 SHALL 验证配置有效性,更新配置并提示操作结果。
- [AC-ASA-11] WHEN 用户点击"测试连接"按钮 THEN 系统 SHALL 调用嵌入模型生成测试向量,展示连接状态、向量维度和响应延迟。
- [AC-ASA-12] WHEN 嵌入模型连接测试失败 THEN 系统 SHALL 展示详细错误信息,帮助用户排查配置问题。
### 7.2 文档格式支持展示
- [AC-ASA-13] WHEN 用户查看嵌入模型配置页面 THEN 系统 SHALL 展示当前支持的文档格式列表PDF、Word、Excel、TXT 等)。
### 7.3 用户故事(迭代追加)
- [US-ASA-06] 作为系统管理员,我希望在界面上配置和切换嵌入模型,以便快速适配不同的业务场景而无需修改代码。
- [US-ASA-07] 作为系统管理员,我希望在保存配置前测试嵌入模型连接,以便确保配置正确后再正式启用。
### 7.4 追踪映射(迭代追加)
| AC ID | Endpoint | 方法 | 备注 |
|------|----------|------|-----|
| AC-ASA-08 | /admin/embedding/config | GET | 获取当前配置 |
| AC-ASA-09 | /admin/embedding/providers | GET | 获取提供者列表及配置定义 |
| AC-ASA-10 | /admin/embedding/config | PUT | 更新配置 |
| AC-ASA-11 | /admin/embedding/test | POST | 测试连接 |
| AC-ASA-12 | /admin/embedding/test | POST | 测试失败错误展示 |
| AC-ASA-13 | /admin/embedding/formats | GET | 获取支持格式 |
---
## 8. 迭代需求LLM 模型配置与 RAG 调试输出v0.3.0
> 说明:本节为 v0.3.0 迭代新增,用于支持 LLM 模型的界面配置及 RAG 实验室的 AI 输出调试。
### 8.1 LLM 模型配置管理
- [AC-ASA-14] WHEN 用户访问 LLM 模型配置页面 THEN 系统 SHALL 展示当前激活的 LLM 提供者及其配置参数API Key、Base URL、模型名称等
- [AC-ASA-15] WHEN 用户切换 LLM 提供者 THEN 系统 SHALL 动态展示该提供者的配置参数表单,并保留当前配置值。
- [AC-ASA-16] WHEN 用户修改 LLM 模型配置并保存 THEN 系统 SHALL 验证配置有效性,更新配置并提示操作结果。
- [AC-ASA-17] WHEN 用户点击"测试连接"按钮 THEN 系统 SHALL 调用 LLM 生成测试回复,展示连接状态、模型响应和耗时。
- [AC-ASA-18] WHEN LLM 连接测试失败 THEN 系统 SHALL 展示详细错误信息,帮助用户排查配置问题。
### 8.2 RAG 实验室 AI 输出展示
- [AC-ASA-19] WHEN 用户运行 RAG 实验后 THEN 系统 SHALL 在结果区域新增"AI 回复"展示区,显示基于检索结果生成的 AI 最终输出。
- [AC-ASA-20] WHEN AI 回复生成中 THEN 系统 SHALL 展示 Loading 状态支持流式输出展示SSE
- [AC-ASA-21] WHEN AI 回复生成完成 THEN 系统 SHALL 展示完整的回复内容、Token 消耗统计、响应耗时。
- [AC-ASA-22] WHEN 用户选择不同的 LLM 配置 THEN 系统 SHALL 使用选定的 LLM 模型生成回复,便于对比不同模型效果。
### 8.3 用户故事(迭代追加)
- [US-ASA-08] 作为系统管理员,我希望在界面上配置和切换不同的 LLM 提供者(如 OpenAI、Ollama、Azure 等),以便快速适配不同的业务场景。
- [US-ASA-09] 作为 AI 开发者,我希望在 RAG 实验室中看到 AI 的最终输出,以便完整调试 RAG 链路效果,而不仅仅是检索结果。
- [US-ASA-10] 作为 Prompt 工程师,我希望对比不同 LLM 模型在相同检索结果下的回复效果,以便选择最适合业务场景的模型。
### 8.4 追踪映射(迭代追加)
| AC ID | Endpoint | 方法 | 备注 |
|------|----------|------|-----|
| AC-ASA-14 | /admin/llm/config | GET | 获取当前 LLM 配置 |
| AC-ASA-15 | /admin/llm/providers | GET | 获取 LLM 提供者列表 |
| AC-ASA-16 | /admin/llm/config | PUT | 更新 LLM 配置 |
| AC-ASA-17 | /admin/llm/test | POST | 测试 LLM 连接 |
| AC-ASA-18 | /admin/llm/test | POST | LLM 测试失败错误展示 |
| AC-ASA-19 | /admin/rag/experiments/run | POST | RAG 实验增加 AI 输出 |
| AC-ASA-20 | /admin/rag/experiments/stream | POST | RAG 实验流式输出SSE |
| AC-ASA-21 | /admin/rag/experiments/run | POST | Token 统计与耗时 |
| AC-ASA-22 | /admin/rag/experiments/run | POST | 支持指定 LLM 配置 |

View File

@ -123,29 +123,29 @@ principles:
> 页面导向:嵌入模型配置页面,支持提供者切换、参数配置、连接测试。
- [ ] (P5-01) 嵌入模型配置页面骨架:创建 `/admin/embedding` 路由,布局包含提供者选择区、配置表单区、测试连接区、支持格式展示区。
- AC: [AC-ASA-08]
- [ ] (P5-01) API 服务层与类型定义:创建 src/api/embedding.ts 和 src/types/embedding.ts
- AC: [AC-ASA-08, AC-ASA-09]
- [x] (P5-02) 提供者选择组件:实现 `EmbeddingProviderSelect` 下拉组件,对接 `/admin/embedding/providers`展示提供者列表name、display_name、description
- [ ] (P5-02) 提供者选择组件:实现 `EmbeddingProviderSelect` 下拉组件,对接 `/admin/embedding/providers`
- AC: [AC-ASA-09]
- [x] (P5-03) 动态配置表单:根据选中提供者的 `config_schema` 动态渲染配置表单(支持 string、integer、number 类型),实现表单校验。
- [ ] (P5-03) 动态配置表单:根据 `config_schema` 动态渲染配置表单,实现表单校验
- AC: [AC-ASA-09, AC-ASA-10]
- [ ] (P5-04) 当前配置加载:页面初始化时调用 `/admin/embedding/config` 获取当前配置,填充表单默认值。
- [ ] (P5-04) 测试连接组件:实现 `EmbeddingTestPanel`,展示测试结果和错误信息
- AC: [AC-ASA-11, AC-ASA-12]
- [ ] (P5-05) 支持格式组件:实现 `SupportedFormats`,展示支持的文档格式列表
- AC: [AC-ASA-13]
- [ ] (P5-06) 页面骨架与路由:创建 `/admin/embedding` 页面,布局包含各功能区
- AC: [AC-ASA-08]
- [ ] (P5-05) 配置保存功能:实现保存按钮,调用 `PUT /admin/embedding/config`,处理成功/失败响应并提示用户。
- AC: [AC-ASA-10]
- [ ] (P5-07) 配置加载与保存:实现配置加载、保存逻辑
- AC: [AC-ASA-08, AC-ASA-10]
- [x] (P5-06) 测试连接功能:实现测试按钮,调用 `POST /admin/embedding/test`展示测试结果success、dimension、latency_ms、message
- AC: [AC-ASA-11]
- [x] (P5-07) 测试失败错误展示测试失败时展示详细错误信息error 字段),并提供排查建议。
- AC: [AC-ASA-12]
- [ ] (P5-08) 支持格式展示:调用 `/admin/embedding/formats` 获取支持的文档格式列表,以标签或卡片形式展示。
- AC: [AC-ASA-13]
- [ ] (P5-08) 组件整合与测试:整合所有组件完成功能闭环
- AC: [AC-ASA-08~AC-ASA-13]
---
@ -153,11 +153,68 @@ principles:
| 任务 | 描述 | 状态 |
|------|------|------|
| P5-01 | 嵌入模型配置页面骨架 | ⏳ 待处理 |
| P5-02 | 提供者选择组件 | ✅ 已完成 |
| P5-03 | 动态配置表单 | ✅ 已完成 |
| P5-04 | 当前配置加载 | ⏳ 待处理 |
| P5-05 | 配置保存功能 | ⏳ 待处理 |
| P5-06 | 测试连接功能 | ✅ 已完成 |
| P5-07 | 测试失败错误展示 | ✅ 已完成 |
| P5-08 | 支持格式展示 | ⏳ 待处理 |
| P5-01 | API 服务层与类型定义 | ⏳ 待处理 |
| P5-02 | 提供者选择组件 | ⏳ 待处理 |
| P5-03 | 动态配置表单 | ⏳ 待处理 |
| P5-04 | 测试连接组件 | ⏳ 待处理 |
| P5-05 | 支持格式组件 | ⏳ 待处理 |
| P5-06 | 页面骨架与路由 | ⏳ 待处理 |
| P5-07 | 配置加载与保存 | ⏳ 待处理 |
| P5-08 | 组件整合与测试 | ⏳ 待处理 |
---
## Phase 6: LLM 模型配置与 RAG 调试输出v0.3.0
> 页面导向LLM 模型配置页面 + RAG 实验室 AI 输出增强。
### 6.1 LLM 模型配置
- [ ] (P6-01) LLM API 服务层与类型定义:创建 src/api/llm.ts 和 src/types/llm.ts
- AC: [AC-ASA-14, AC-ASA-15]
- [ ] (P6-02) LLM 提供者选择组件:实现 `LLMProviderSelect` 下拉组件
- AC: [AC-ASA-15]
- [ ] (P6-03) LLM 动态配置表单:根据 `config_schema` 动态渲染配置表单
- AC: [AC-ASA-15, AC-ASA-16]
- [ ] (P6-04) LLM 测试连接组件:实现 `LLMTestPanel`,展示测试回复和耗时
- AC: [AC-ASA-17, AC-ASA-18]
- [ ] (P6-05) LLM 配置页面:创建 `/admin/llm` 页面,整合所有组件
- AC: [AC-ASA-14, AC-ASA-16]
### 6.2 RAG 实验室 AI 输出增强
- [ ] (P6-06) AI 回复展示组件:实现 `AIResponseViewer`,展示 AI 最终输出
- AC: [AC-ASA-19]
- [ ] (P6-07) 流式输出支持:实现 SSE 流式输出展示,支持实时显示 AI 回复
- AC: [AC-ASA-20]
- [ ] (P6-08) Token 统计展示:展示 Token 消耗、响应耗时等统计信息
- AC: [AC-ASA-21]
- [ ] (P6-09) LLM 选择器:在 RAG 实验室中添加 LLM 配置选择器
- AC: [AC-ASA-22]
- [ ] (P6-10) RAG 实验室整合:将 AI 输出组件整合到 RAG 实验室页面
- AC: [AC-ASA-19~AC-ASA-22]
---
## Phase 6 任务进度追踪
| 任务 | 描述 | 状态 |
|------|------|------|
| P6-01 | LLM API 服务层与类型定义 | ⏳ 待处理 |
| P6-02 | LLM 提供者选择组件 | ⏳ 待处理 |
| P6-03 | LLM 动态配置表单 | ⏳ 待处理 |
| P6-04 | LLM 测试连接组件 | ⏳ 待处理 |
| P6-05 | LLM 配置页面 | ⏳ 待处理 |
| P6-06 | AI 回复展示组件 | ⏳ 待处理 |
| P6-07 | 流式输出支持 | ⏳ 待处理 |
| P6-08 | Token 统计展示 | ⏳ 待处理 |
| P6-09 | LLM 选择器 | ⏳ 待处理 |
| P6-10 | RAG 实验室整合 | ⏳ 待处理 |

View File

@ -25,6 +25,10 @@ tags:
description: 健康检查
- name: Embedding Management
description: 嵌入模型管理
- name: LLM Management
description: LLM 模型管理
- name: RAG Lab
description: RAG 实验室
paths:
/ai/chat:
@ -536,3 +540,428 @@ components:
type: string
description: 错误信息(失败时)
example: "连接超时"
LLMProviderInfo:
type: object
description: LLM 提供者信息
required:
- name
- display_name
- config_schema
properties:
name:
type: string
description: 提供者唯一标识
example: "openai"
display_name:
type: string
description: 提供者显示名称
example: "OpenAI"
description:
type: string
description: 提供者描述
example: "OpenAI GPT 系列模型"
config_schema:
type: object
description: 配置参数定义JSON Schema 格式)
additionalProperties: true
LLMConfig:
type: object
description: 当前 LLM 配置
required:
- provider
- config
properties:
provider:
type: string
description: 当前激活的提供者
example: "openai"
config:
type: object
description: 提供者配置参数(敏感字段已脱敏)
additionalProperties: true
example:
api_key: "sk-***"
base_url: "https://api.openai.com/v1"
model: "gpt-4o-mini"
updated_at:
type: string
format: date-time
description: 配置最后更新时间
LLMConfigUpdate:
type: object
description: LLM 配置更新请求
required:
- provider
properties:
provider:
type: string
description: 提供者标识
example: "openai"
config:
type: object
description: 提供者配置参数
additionalProperties: true
LLMTestResult:
type: object
description: LLM 测试结果
required:
- success
properties:
success:
type: boolean
description: 测试是否成功
response:
type: string
description: LLM 响应内容
example: "你好!我是一个 AI 助手..."
latency_ms:
type: number
description: 响应延迟(毫秒)
example: 1250.5
prompt_tokens:
type: integer
description: 输入 Token 数
example: 15
completion_tokens:
type: integer
description: 输出 Token 数
example: 50
total_tokens:
type: integer
description: 总 Token 数
example: 65
model:
type: string
description: 使用的模型
example: "gpt-4o-mini"
message:
type: string
description: 测试结果消息
example: "连接成功"
error:
type: string
description: 错误信息(失败时)
example: "API Key 无效"
RagExperimentRequest:
type: object
description: RAG 实验请求
required:
- query
properties:
query:
type: string
description: 查询文本
example: "什么是 RAG"
kb_ids:
type: array
items:
type: string
description: 知识库 ID 列表
top_k:
type: integer
description: 检索数量
default: 5
score_threshold:
type: number
description: 相似度阈值
default: 0.5
generate_response:
type: boolean
description: 是否生成 AI 回复
default: true
llm_provider:
type: string
description: 指定 LLM 提供者(可选)
example: "openai"
RagExperimentResult:
type: object
description: RAG 实验结果
properties:
query:
type: string
description: 原始查询
retrieval_results:
type: array
items:
$ref: '#/components/schemas/RetrievalResult'
final_prompt:
type: string
description: 最终拼接的 Prompt
ai_response:
$ref: '#/components/schemas/AIResponse'
total_latency_ms:
type: number
description: 总耗时(毫秒)
diagnostics:
type: object
additionalProperties: true
description: 诊断信息
RetrievalResult:
type: object
description: 检索结果
properties:
content:
type: string
description: 检索到的内容
score:
type: number
description: 相似度分数
source:
type: string
description: 来源文档
metadata:
type: object
additionalProperties: true
description: 元数据
AIResponse:
type: object
description: AI 回复
properties:
content:
type: string
description: AI 回复内容
prompt_tokens:
type: integer
description: 输入 Token 数
completion_tokens:
type: integer
description: 输出 Token 数
total_tokens:
type: integer
description: 总 Token 数
latency_ms:
type: number
description: 生成耗时(毫秒)
model:
type: string
description: 使用的模型
/admin/llm/providers:
get:
operationId: listLLMProviders
summary: 获取可用的 LLM 提供者列表
description: |
[AC-ASA-15] 返回所有支持的 LLM 提供者及其配置参数定义。
支持的提供者OpenAI、Ollama、Azure OpenAI
tags:
- LLM Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
responses:
'200':
description: 成功返回提供者列表
content:
application/json:
schema:
type: object
properties:
providers:
type: array
items:
$ref: '#/components/schemas/LLMProviderInfo'
'401':
description: 未授权
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/admin/llm/config:
get:
operationId: getLLMConfig
summary: 获取当前 LLM 配置
description: |
[AC-ASA-14] 返回当前激活的 LLM 提供者及其配置参数。
敏感字段(如 API Key会被脱敏显示。
tags:
- LLM Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
responses:
'200':
description: 成功返回当前配置
content:
application/json:
schema:
$ref: '#/components/schemas/LLMConfig'
'401':
description: 未授权
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
put:
operationId: updateLLMConfig
summary: 更新 LLM 配置
description: |
[AC-ASA-16] 更新 LLM 提供者和配置参数。
配置更新后立即生效,无需重启服务。
tags:
- LLM Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/LLMConfigUpdate'
responses:
'200':
description: 配置更新成功
content:
application/json:
schema:
type: object
properties:
success:
type: boolean
message:
type: string
'400':
description: 请求参数错误
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
'401':
description: 未授权
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/admin/llm/test:
post:
operationId: testLLM
summary: 测试 LLM 连接
description: |
[AC-ASA-17, AC-ASA-18] 测试 LLM 提供者连接。
发送测试提示词并返回响应结果,包含 Token 消耗和延迟统计。
tags:
- LLM Management
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: false
content:
application/json:
schema:
type: object
properties:
test_prompt:
type: string
description: 测试提示词(可选)
example: "你好,请简单介绍一下自己。"
provider:
type: string
description: 指定测试的提供者(可选,默认使用当前配置)
config:
$ref: '#/components/schemas/LLMConfigUpdate'
responses:
'200':
description: 测试完成
content:
application/json:
schema:
$ref: '#/components/schemas/LLMTestResult'
'401':
description: 未授权
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/admin/rag/experiments/run:
post:
operationId: runRagExperiment
summary: 运行 RAG 实验(含 AI 输出)
description: |
[AC-ASA-05, AC-ASA-19, AC-ASA-21, AC-ASA-22] 运行 RAG 实验。
返回检索结果、最终 Prompt 和 AI 回复。
tags:
- RAG Lab
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/RagExperimentRequest'
responses:
'200':
description: 实验完成
content:
application/json:
schema:
$ref: '#/components/schemas/RagExperimentResult'
'401':
description: 未授权
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/admin/rag/experiments/stream:
post:
operationId: runRagExperimentStream
summary: 运行 RAG 实验(流式输出)
description: |
[AC-ASA-20] 运行 RAG 实验并以 SSE 流式输出 AI 回复。
事件类型retrieval、prompt、message、final、error
tags:
- RAG Lab
parameters:
- name: X-Tenant-Id
in: header
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/RagExperimentRequest'
responses:
'200':
description: SSE 流式输出
content:
text/event-stream:
schema:
type: string
'401':
description: 未授权
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'

View File

@ -2,7 +2,7 @@
feature_id: "AISVC"
title: "Python AI 中台ai-service进度追踪"
status: "completed"
version: "0.3.0"
version: "0.4.0"
last_updated: "2026-02-24"
---
@ -54,6 +54,66 @@ last_updated: "2026-02-24"
| Phase 5 | 集成测试 | 100% | ✅ 完成 |
| Phase 6 | 前后端联调 | 100% | ✅ 完成 |
| Phase 7 | 嵌入模型可插拔与文档解析 | 100% | ✅ 完成 |
| Phase 8 | LLM 配置与 RAG 调试输出 | 100% | ✅ 完成 |
**测试统计: 184 tests passing**
---
## Phase 8: LLM 配置与 RAG 调试输出v0.4.0 迭代)
### 8.1 设计目标
- LLM 提供者可插拔设计
- 支持界面配置不同供应商的 AI
- RAG 实验室支持 AI 输出调试
### 8.2 实现详情 (2026-02-24)
#### LLM 服务实现
- 创建 LLMProviderFactory 工厂类 (`app/services/llm/factory.py`)
- 支持 OpenAI、Ollama、Azure OpenAI 三种提供者
- 实现 LLMConfigManager 配置热更新
- 实现连接测试功能
#### API 端点实现
- GET /admin/llm/providers - 获取 LLM 提供者列表
- GET /admin/llm/config - 获取当前 LLM 配置
- PUT /admin/llm/config - 更新 LLM 配置
- POST /admin/llm/test - 测试 LLM 连接
#### RAG 实验增强
- 更新 POST /admin/rag/experiments/run - 支持 AI 回复生成
- 新增 POST /admin/rag/experiments/stream - SSE 流式输出
- 支持 Token 统计和响应耗时
- 支持指定 LLM 提供者
### 8.3 任务进度
| 任务 | 描述 | 状态 |
|------|------|------|
| T8.1 | LLMProviderFactory 工厂类 | ✅ 完成 |
| T8.2 | LLMConfigManager 配置管理 | ✅ 完成 |
| T8.3 | GET /admin/llm/providers | ✅ 完成 |
| T8.4 | GET /admin/llm/config | ✅ 完成 |
| T8.5 | PUT /admin/llm/config | ✅ 完成 |
| T8.6 | POST /admin/llm/test | ✅ 完成 |
| T8.7 | RAG 实验支持 AI 回复 | ✅ 完成 |
| T8.8 | RAG 实验流式输出 | ✅ 完成 |
| T8.9 | 支持指定 LLM 提供者 | ✅ 完成 |
| T8.10 | 更新 OpenAPI 契约 | ✅ 完成 |
---
## v0.4.0 完成总结
**Phase 8 已全部完成**
| 模块 | 文件数 | 状态 |
|------|--------|------|
| LLM 服务 | 1 | ✅ |
| API 端点 | 2 | ✅ |
| OpenAPI 契约 | 1 | ✅ |
**测试统计: 184 tests passing**

View File

@ -1,8 +1,8 @@
---
feature_id: "AISVC"
title: "Python AI 中台ai-service需求规范"
status: "draft"
version: "0.3.0"
status: "completed"
version: "0.4.0"
owners:
- "product"
- "backend"
@ -264,3 +264,45 @@ source:
| AC-AISVC-39 | /admin/embedding/config | GET | getEmbeddingConfig | 当前配置查询 |
| AC-AISVC-40 | /admin/embedding/config | PUT | updateEmbeddingConfig | 配置更新 |
| AC-AISVC-41 | /admin/embedding/test | POST | testEmbedding | 连接测试 |
---
## 11. 迭代需求LLM 模型配置与 RAG 调试输出v0.4.0
> 说明:本节为 v0.4.0 迭代新增,用于支持 LLM 模型的界面配置及 RAG 实验室的 AI 输出调试。
### 11.1 LLM 模型配置管理
- [AC-AISVC-42] WHEN 前端通过 `GET /admin/llm/providers` 获取 LLM 提供者列表 THEN 系统 SHALL 返回所有支持的 LLM 提供者及其配置参数定义。
- [AC-AISVC-43] WHEN 前端通过 `GET /admin/llm/config` 获取当前 LLM 配置 THEN 系统 SHALL 返回当前激活的 LLM 提供者及其配置参数(敏感字段脱敏)。
- [AC-AISVC-44] WHEN 前端通过 `PUT /admin/llm/config` 更新 LLM 配置 THEN 系统 SHALL 验证配置有效性,更新配置并立即生效。
- [AC-AISVC-45] WHEN 前端通过 `POST /admin/llm/test` 测试 LLM 连接 THEN 系统 SHALL 调用 LLM 生成测试回复返回响应内容、Token 消耗和延迟统计。
- [AC-AISVC-46] WHEN LLM 连接测试失败 THEN 系统 SHALL 返回详细错误信息,帮助用户排查配置问题。
### 11.2 RAG 实验室 AI 输出增强
- [AC-AISVC-47] WHEN 前端通过 `POST /admin/rag/experiments/run` 运行 RAG 实验 THEN 系统 SHALL 返回检索结果、最终 Prompt 和 AI 回复。
- [AC-AISVC-48] WHEN 前端通过 `POST /admin/rag/experiments/stream` 运行 RAG 实验 THEN 系统 SHALL 以 SSE 流式输出 AI 回复。
- [AC-AISVC-49] WHEN RAG 实验生成 AI 回复 THEN 系统 SHALL 返回 Token 消耗统计和响应耗时。
- [AC-AISVC-50] WHEN RAG 实验请求指定 `llm_provider` THEN 系统 SHALL 使用指定的 LLM 提供者生成回复。
### 11.3 追踪映射v0.4.0 迭代)
| AC ID | Endpoint | 方法 | Operation | 描述 |
|-------|----------|------|-----------|------|
| AC-AISVC-42 | /admin/llm/providers | GET | listLLMProviders | LLM 提供者列表 |
| AC-AISVC-43 | /admin/llm/config | GET | getLLMConfig | 当前 LLM 配置查询 |
| AC-AISVC-44 | /admin/llm/config | PUT | updateLLMConfig | LLM 配置更新 |
| AC-AISVC-45 | /admin/llm/test | POST | testLLM | LLM 连接测试 |
| AC-AISVC-46 | /admin/llm/test | POST | testLLM | LLM 测试失败处理 |
| AC-AISVC-47 | /admin/rag/experiments/run | POST | runRagExperiment | RAG 实验含 AI 输出 |
| AC-AISVC-48 | /admin/rag/experiments/stream | POST | runRagExperimentStream | RAG 实验流式输出 |
| AC-AISVC-49 | /admin/rag/experiments/run | POST | runRagExperiment | Token 统计 |
| AC-AISVC-50 | /admin/rag/experiments/run | POST | runRagExperiment | 指定 LLM 提供者 |

View File

@ -2,7 +2,7 @@
feature_id: "AISVC"
title: "Python AI 中台ai-service任务清单"
status: "completed"
version: "0.3.0"
version: "0.4.0"
last_updated: "2026-02-24"
---
@ -83,7 +83,7 @@ last_updated: "2026-02-24"
## 5. 完成总结
**Phase 1-7 已全部完成**
**Phase 1-7 已全部完成Phase 8 进行中**
| Phase | 描述 | 任务数 | 状态 |
|-------|------|--------|------|
@ -94,8 +94,9 @@ last_updated: "2026-02-24"
| Phase 5 | 集成测试 | 3 | ✅ 完成 |
| Phase 6 | 前后端联调真实对接 | 9 | ✅ 完成 |
| Phase 7 | 嵌入模型可插拔与文档解析 | 21 | ✅ 完成 |
| Phase 8 | LLM 配置与 RAG 调试输出 | 10 | ⏳ 进行中 |
**已完成: 53 个任务**
**已完成: 53 个任务 | 进行中: 10 个任务**
---
@ -121,3 +122,17 @@ last_updated: "2026-02-24"
- [x] T7.19 编写嵌入服务单元测试 `[AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32]`
- [x] T7.20 编写文档解析单元测试 `[AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37]`
- [x] T7.21 编写嵌入管理 API 集成测试 `[AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]`
---
### Phase 8: LLM 配置与 RAG 调试输出v0.4.0 迭代)
- [x] T8.1 设计 `LLMProviderFactory` 工厂类:支持根据配置动态加载提供者 `[AC-AISVC-42]`
- [x] T8.2 实现 `LLMConfigManager` 配置管理:支持动态配置与热更新 `[AC-AISVC-43, AC-AISVC-44]`
- [x] T8.3 实现 `GET /admin/llm/providers` API返回可用提供者列表 `[AC-AISVC-42]`
- [x] T8.4 实现 `GET /admin/llm/config` API返回当前配置 `[AC-AISVC-43]`
- [x] T8.5 实现 `PUT /admin/llm/config` API更新配置 `[AC-AISVC-44]`
- [x] T8.6 实现 `POST /admin/llm/test` API测试 LLM 连接 `[AC-AISVC-45, AC-AISVC-46]`
- [x] T8.7 更新 RAG 实验接口:支持 AI 回复生成 `[AC-AISVC-47, AC-AISVC-49]`
- [x] T8.8 实现 RAG 实验流式输出SSE 流式 AI 回复 `[AC-AISVC-48]`
- [x] T8.9 支持指定 LLM 提供者RAG 实验可选择不同 LLM `[AC-AISVC-50]`
- [x] T8.10 更新 OpenAPI 契约:添加 LLM 管理和 RAG 实验增强接口 ✅

18
test-doc.txt Normal file
View File

@ -0,0 +1,18 @@
这是一个测试文档用于验证RAG检索功能。
世界设定:
这是一个奇幻世界,名为艾泽拉斯。这个世界由多个大陆组成,包括东部王国、卡利姆多和诺森德。
主要种族:
1. 人类 - 居住在东部王国,拥有强大的骑士和法师
2. 精灵 - 分为暗夜精灵和高等精灵,擅长弓箭和魔法
3. 矮人 - 居住在山脉中,善于锻造和采矿
4. 兽人 - 来自外域,拥有强大的战士
魔法系统:
这个世界充满了魔法能量,法师可以从空气中汲取魔力施放法术。
主要魔法类型包括:火焰、冰霜、奥术、暗影和神圣。
历史背景:
这个世界经历了多次大战,最近的一次是天灾军团的入侵。
巫妖王阿尔萨斯率领亡灵大军试图征服整个世界。