diff --git a/.gitignore b/.gitignore index 5d381cc..232ea0c 100644 --- a/.gitignore +++ b/.gitignore @@ -158,5 +158,9 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ + +# Project specific +ai-service/uploads/ +*.local diff --git a/ai-service-admin/.gitignore b/ai-service-admin/.gitignore new file mode 100644 index 0000000..002f89c --- /dev/null +++ b/ai-service-admin/.gitignore @@ -0,0 +1 @@ +node_modules/ diff --git a/ai-service-admin/index.html b/ai-service-admin/index.html new file mode 100644 index 0000000..d5e0569 --- /dev/null +++ b/ai-service-admin/index.html @@ -0,0 +1,13 @@ + + + + + + + AI Service Admin + + +
+ + + diff --git a/ai-service-admin/package-lock.json b/ai-service-admin/package-lock.json new file mode 100644 index 0000000..24bc60f --- /dev/null +++ b/ai-service-admin/package-lock.json @@ -0,0 +1,1935 @@ +{ + "name": "ai-service-admin", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "ai-service-admin", + "version": "0.1.0", + "dependencies": { + "@element-plus/icons-vue": "^2.3.1", + "axios": "^1.6.7", + "element-plus": "^2.6.1", + "pinia": "^2.1.7", + "vue": "^3.4.21", + "vue-router": "^4.3.0" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^5.0.4", + "typescript": "^5.2.2", + "vite": "^5.1.4", + "vue-tsc": "^1.8.27" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", + "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz", + "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.0.tgz", + "integrity": "sha512-IyDgFV5GeDUVX4YdF/3CPULtVGSXXMLh1xVIgdCgxApktqnQV0r7/8Nqthg+8YLGaAtdyIlo2qIdZrbCv4+7ww==", + "license": "MIT", + "dependencies": { + "@babel/types": "^7.29.0" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/types": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz", + "integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==", + "license": "MIT", + "dependencies": { + "@babel/helper-string-parser": "^7.27.1", + "@babel/helper-validator-identifier": "^7.28.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@ctrl/tinycolor": { + "version": "3.6.1", + "resolved": "https://registry.npmjs.org/@ctrl/tinycolor/-/tinycolor-3.6.1.tgz", + "integrity": "sha512-SITSV6aIXsuVNV3f3O0f2n/cgyEDWoSqtZMYiAmcsYHydcKrOz3gUxB/iXd/Qf08+IZX4KpgNbvUdMBmWz+kcA==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, + "node_modules/@element-plus/icons-vue": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/@element-plus/icons-vue/-/icons-vue-2.3.2.tgz", + "integrity": "sha512-OzIuTaIfC8QXEPmJvB4Y4kw34rSXdCJzxcD1kFStBvr8bK6X1zQAYDo0CNMjojnfTqRQCJ0I7prlErcoRiET2A==", + "license": "MIT", + "peerDependencies": { + "vue": "^3.2.0" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", + "integrity": "sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.21.5.tgz", + "integrity": "sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz", + "integrity": "sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.21.5.tgz", + "integrity": "sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz", + "integrity": "sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz", + "integrity": "sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz", + "integrity": "sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz", + "integrity": "sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz", + "integrity": "sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz", + "integrity": "sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz", + "integrity": "sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz", + "integrity": "sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz", + "integrity": "sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz", + "integrity": "sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz", + "integrity": "sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz", + "integrity": "sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz", + "integrity": "sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", + "integrity": "sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", + "integrity": "sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", + "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz", + "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz", + "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz", + "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@floating-ui/core": { + "version": "1.7.4", + "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.4.tgz", + "integrity": "sha512-C3HlIdsBxszvm5McXlB8PeOEWfBhcGBTZGkGlWc2U0KFY5IwG5OQEuQ8rq52DZmcHDlPLd+YFBK+cZcytwIFWg==", + "license": "MIT", + "dependencies": { + "@floating-ui/utils": "^0.2.10" + } + }, + "node_modules/@floating-ui/dom": { + "version": "1.7.5", + "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.5.tgz", + "integrity": "sha512-N0bD2kIPInNHUHehXhMke1rBGs1dwqvC9O9KYMyyjK7iXt7GAhnro7UlcuYcGdS/yYOlq0MAVgrow8IbWJwyqg==", + "license": "MIT", + "dependencies": { + "@floating-ui/core": "^1.7.4", + "@floating-ui/utils": "^0.2.10" + } + }, + "node_modules/@floating-ui/utils": { + "version": "0.2.10", + "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.10.tgz", + "integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==", + "license": "MIT" + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "license": "MIT" + }, + "node_modules/@popperjs/core": { + "name": "@sxzz/popperjs-es", + "version": "2.11.8", + "resolved": "https://registry.npmjs.org/@sxzz/popperjs-es/-/popperjs-es-2.11.8.tgz", + "integrity": "sha512-wOwESXvvED3S8xBmcPWHs2dUuzrE4XiZeFu7e1hROIJkm02a49N120pmOXxY33sBb6hArItm5W5tcg1cBtV+HQ==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/popperjs" + } + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz", + "integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz", + "integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz", + "integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz", + "integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz", + "integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz", + "integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz", + "integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz", + "integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz", + "integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz", + "integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz", + "integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz", + "integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz", + "integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz", + "integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz", + "integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz", + "integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz", + "integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz", + "integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz", + "integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-openbsd-x64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz", + "integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ] + }, + "node_modules/@rollup/rollup-openharmony-arm64": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz", + "integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz", + "integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz", + "integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-gnu": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz", + "integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz", + "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@types/estree": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", + "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/lodash": { + "version": "4.17.24", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.24.tgz", + "integrity": "sha512-gIW7lQLZbue7lRSWEFql49QJJWThrTFFeIMJdp3eH4tKoxm1OvEPg02rm4wCCSHS0cL3/Fizimb35b7k8atwsQ==", + "license": "MIT" + }, + "node_modules/@types/lodash-es": { + "version": "4.17.12", + "resolved": "https://registry.npmjs.org/@types/lodash-es/-/lodash-es-4.17.12.tgz", + "integrity": "sha512-0NgftHUcV4v34VhXm8QBSftKVXtbkBG3ViCjs6+eJ5a6y6Mi/jiFGPc1sC7QK+9BFhWrURE3EOggmWaSxL9OzQ==", + "license": "MIT", + "dependencies": { + "@types/lodash": "*" + } + }, + "node_modules/@types/web-bluetooth": { + "version": "0.0.20", + "resolved": "https://registry.npmjs.org/@types/web-bluetooth/-/web-bluetooth-0.0.20.tgz", + "integrity": "sha512-g9gZnnXVq7gM7v3tJCWV/qw7w+KeOlSHAhgF9RytFyifW6AF61hdT2ucrYhPq9hLs5JIryeupHV3qGk95dH9ow==", + "license": "MIT" + }, + "node_modules/@vitejs/plugin-vue": { + "version": "5.2.4", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-vue/-/plugin-vue-5.2.4.tgz", + "integrity": "sha512-7Yx/SXSOcQq5HiiV3orevHUFn+pmMB4cgbEkDYgnkUWb0WfeQ/wa2yFv6D5ICiCQOVpjA7vYDXrC7AGO8yjDHA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "peerDependencies": { + "vite": "^5.0.0 || ^6.0.0", + "vue": "^3.2.25" + } + }, + "node_modules/@volar/language-core": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@volar/language-core/-/language-core-1.11.1.tgz", + "integrity": "sha512-dOcNn3i9GgZAcJt43wuaEykSluAuOkQgzni1cuxLxTV0nJKanQztp7FxyswdRILaKH+P2XZMPRp2S4MV/pElCw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@volar/source-map": "1.11.1" + } + }, + "node_modules/@volar/source-map": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@volar/source-map/-/source-map-1.11.1.tgz", + "integrity": "sha512-hJnOnwZ4+WT5iupLRnuzbULZ42L7BWWPMmruzwtLhJfpDVoZLjNBxHDi2sY2bgZXCKlpU5XcsMFoYrsQmPhfZg==", + "dev": true, + "license": "MIT", + "dependencies": { + "muggle-string": "^0.3.1" + } + }, + "node_modules/@volar/typescript": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@volar/typescript/-/typescript-1.11.1.tgz", + "integrity": "sha512-iU+t2mas/4lYierSnoFOeRFQUhAEMgsFuQxoxvwn5EdQopw43j+J27a4lt9LMInx1gLJBC6qL14WYGlgymaSMQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@volar/language-core": "1.11.1", + "path-browserify": "^1.0.1" + } + }, + "node_modules/@vue/compiler-core": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.5.29.tgz", + "integrity": "sha512-cuzPhD8fwRHk8IGfmYaR4eEe4cAyJEL66Ove/WZL7yWNL134nqLddSLwNRIsFlnnW1kK+p8Ck3viFnC0chXCXw==", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.29.0", + "@vue/shared": "3.5.29", + "entities": "^7.0.1", + "estree-walker": "^2.0.2", + "source-map-js": "^1.2.1" + } + }, + "node_modules/@vue/compiler-dom": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/@vue/compiler-dom/-/compiler-dom-3.5.29.tgz", + "integrity": "sha512-n0G5o7R3uBVmVxjTIYcz7ovr8sy7QObFG8OQJ3xGCDNhbG60biP/P5KnyY8NLd81OuT1WJflG7N4KWYHaeeaIg==", + "license": "MIT", + "dependencies": { + "@vue/compiler-core": "3.5.29", + "@vue/shared": "3.5.29" + } + }, + "node_modules/@vue/compiler-sfc": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/@vue/compiler-sfc/-/compiler-sfc-3.5.29.tgz", + "integrity": "sha512-oJZhN5XJs35Gzr50E82jg2cYdZQ78wEwvRO6Y63TvLVTc+6xICzJHP1UIecdSPPYIbkautNBanDiWYa64QSFIA==", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.29.0", + "@vue/compiler-core": "3.5.29", + "@vue/compiler-dom": "3.5.29", + "@vue/compiler-ssr": "3.5.29", + "@vue/shared": "3.5.29", + "estree-walker": "^2.0.2", + "magic-string": "^0.30.21", + "postcss": "^8.5.6", + "source-map-js": "^1.2.1" + } + }, + "node_modules/@vue/compiler-ssr": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/@vue/compiler-ssr/-/compiler-ssr-3.5.29.tgz", + "integrity": "sha512-Y/ARJZE6fpjzL5GH/phJmsFwx3g6t2KmHKHx5q+MLl2kencADKIrhH5MLF6HHpRMmlRAYBRSvv347Mepf1zVNw==", + "license": "MIT", + "dependencies": { + "@vue/compiler-dom": "3.5.29", + "@vue/shared": "3.5.29" + } + }, + "node_modules/@vue/devtools-api": { + "version": "6.6.4", + "resolved": "https://registry.npmjs.org/@vue/devtools-api/-/devtools-api-6.6.4.tgz", + "integrity": "sha512-sGhTPMuXqZ1rVOk32RylztWkfXTRhuS7vgAKv0zjqk8gbsHkJ7xfFf+jbySxt7tWObEJwyKaHMikV/WGDiQm8g==", + "license": "MIT" + }, + "node_modules/@vue/language-core": { + "version": "1.8.27", + "resolved": "https://registry.npmjs.org/@vue/language-core/-/language-core-1.8.27.tgz", + "integrity": "sha512-L8Kc27VdQserNaCUNiSFdDl9LWT24ly8Hpwf1ECy3aFb9m6bDhBGQYOujDm21N7EW3moKIOKEanQwe1q5BK+mA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@volar/language-core": "~1.11.1", + "@volar/source-map": "~1.11.1", + "@vue/compiler-dom": "^3.3.0", + "@vue/shared": "^3.3.0", + "computeds": "^0.0.1", + "minimatch": "^9.0.3", + "muggle-string": "^0.3.1", + "path-browserify": "^1.0.1", + "vue-template-compiler": "^2.7.14" + }, + "peerDependencies": { + "typescript": "*" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@vue/reactivity": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/@vue/reactivity/-/reactivity-3.5.29.tgz", + "integrity": "sha512-zcrANcrRdcLtmGZETBxWqIkoQei8HaFpZWx/GHKxx79JZsiZ8j1du0VUJtu4eJjgFvU/iKL5lRXFXksVmI+5DA==", + "license": "MIT", + "dependencies": { + "@vue/shared": "3.5.29" + } + }, + "node_modules/@vue/runtime-core": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/@vue/runtime-core/-/runtime-core-3.5.29.tgz", + "integrity": "sha512-8DpW2QfdwIWOLqtsNcds4s+QgwSaHSJY/SUe04LptianUQ/0xi6KVsu/pYVh+HO3NTVvVJjIPL2t6GdeKbS4Lg==", + "license": "MIT", + "dependencies": { + "@vue/reactivity": "3.5.29", + "@vue/shared": "3.5.29" + } + }, + "node_modules/@vue/runtime-dom": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/@vue/runtime-dom/-/runtime-dom-3.5.29.tgz", + "integrity": "sha512-AHvvJEtcY9tw/uk+s/YRLSlxxQnqnAkjqvK25ZiM4CllCZWzElRAoQnCM42m9AHRLNJ6oe2kC5DCgD4AUdlvXg==", + "license": "MIT", + "dependencies": { + "@vue/reactivity": "3.5.29", + "@vue/runtime-core": "3.5.29", + "@vue/shared": "3.5.29", + "csstype": "^3.2.3" + } + }, + "node_modules/@vue/server-renderer": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/@vue/server-renderer/-/server-renderer-3.5.29.tgz", + "integrity": "sha512-G/1k6WK5MusLlbxSE2YTcqAAezS+VuwHhOvLx2KnQU7G2zCH6KIb+5Wyt6UjMq7a3qPzNEjJXs1hvAxDclQH+g==", + "license": "MIT", + "dependencies": { + "@vue/compiler-ssr": "3.5.29", + "@vue/shared": "3.5.29" + }, + "peerDependencies": { + "vue": "3.5.29" + } + }, + "node_modules/@vue/shared": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.5.29.tgz", + "integrity": "sha512-w7SR0A5zyRByL9XUkCfdLs7t9XOHUyJ67qPGQjOou3p6GvBeBW+AVjUUmlxtZ4PIYaRvE+1LmK44O4uajlZwcg==", + "license": "MIT" + }, + "node_modules/@vueuse/core": { + "version": "10.11.1", + "resolved": "https://registry.npmjs.org/@vueuse/core/-/core-10.11.1.tgz", + "integrity": "sha512-guoy26JQktXPcz+0n3GukWIy/JDNKti9v6VEMu6kV2sYBsWuGiTU8OWdg+ADfUbHg3/3DlqySDe7JmdHrktiww==", + "license": "MIT", + "dependencies": { + "@types/web-bluetooth": "^0.0.20", + "@vueuse/metadata": "10.11.1", + "@vueuse/shared": "10.11.1", + "vue-demi": ">=0.14.8" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@vueuse/metadata": { + "version": "10.11.1", + "resolved": "https://registry.npmjs.org/@vueuse/metadata/-/metadata-10.11.1.tgz", + "integrity": "sha512-IGa5FXd003Ug1qAZmyE8wF3sJ81xGLSqTqtQ6jaVfkeZ4i5kS2mwQF61yhVqojRnenVew5PldLyRgvdl4YYuSw==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@vueuse/shared": { + "version": "10.11.1", + "resolved": "https://registry.npmjs.org/@vueuse/shared/-/shared-10.11.1.tgz", + "integrity": "sha512-LHpC8711VFZlDaYUXEBbFBCQ7GS3dVU9mjOhhMhXP6txTV4EhYQg/KGnQuvt/sPAtoUKq7VVUnL6mVtFoL42sA==", + "license": "MIT", + "dependencies": { + "vue-demi": ">=0.14.8" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/async-validator": { + "version": "4.2.5", + "resolved": "https://registry.npmjs.org/async-validator/-/async-validator-4.2.5.tgz", + "integrity": "sha512-7HhHjtERjqlNbZtqNqy2rckN/SpOOlmDliet+lP7k+eKZEjPk3DgyeU9lIXLdeLz0uBbbVp+9Qdow9wJWgwwfg==", + "license": "MIT" + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.5", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.5.tgz", + "integrity": "sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/balanced-match": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", + "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/brace-expansion": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz", + "integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/computeds": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/computeds/-/computeds-0.0.1.tgz", + "integrity": "sha512-7CEBgcMjVmitjYo5q8JTJVra6X5mQ20uTThdK+0kR7UEaDrAWEQcRiBtWJzga4eRpP6afNwwLsX2SET2JhVB1Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/csstype": { + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", + "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==", + "license": "MIT" + }, + "node_modules/dayjs": { + "version": "1.11.19", + "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.19.tgz", + "integrity": "sha512-t5EcLVS6QPBNqM2z8fakk/NKel+Xzshgt8FFKAn+qwlD1pzZWxh0nVCrvFK7ZDb6XucZeF9z8C7CBWTRIVApAw==", + "license": "MIT" + }, + "node_modules/de-indent": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/de-indent/-/de-indent-1.0.2.tgz", + "integrity": "sha512-e/1zu3xH5MQryN2zdVaF0OrdNLUbvWxzMbi+iNA6Bky7l1RoP8a2fIbRocyHclXt/arDrrR6lL3TqFD9pMQTsg==", + "dev": true, + "license": "MIT" + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/element-plus": { + "version": "2.13.2", + "resolved": "https://registry.npmjs.org/element-plus/-/element-plus-2.13.2.tgz", + "integrity": "sha512-Zjzm1NnFXGhV4LYZ6Ze9skPlYi2B4KAmN18FL63A3PZcjhDfroHwhtM6RE8BonlOPHXUnPQynH0BgaoEfvhrGw==", + "license": "MIT", + "dependencies": { + "@ctrl/tinycolor": "^3.4.1", + "@element-plus/icons-vue": "^2.3.2", + "@floating-ui/dom": "^1.0.1", + "@popperjs/core": "npm:@sxzz/popperjs-es@^2.11.7", + "@types/lodash": "^4.17.20", + "@types/lodash-es": "^4.17.12", + "@vueuse/core": "^10.11.0", + "async-validator": "^4.2.5", + "dayjs": "^1.11.19", + "lodash": "^4.17.23", + "lodash-es": "^4.17.23", + "lodash-unified": "^1.0.3", + "memoize-one": "^6.0.0", + "normalize-wheel-es": "^1.2.0" + }, + "peerDependencies": { + "vue": "^3.3.0" + } + }, + "node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/esbuild": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz", + "integrity": "sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.21.5", + "@esbuild/android-arm": "0.21.5", + "@esbuild/android-arm64": "0.21.5", + "@esbuild/android-x64": "0.21.5", + "@esbuild/darwin-arm64": "0.21.5", + "@esbuild/darwin-x64": "0.21.5", + "@esbuild/freebsd-arm64": "0.21.5", + "@esbuild/freebsd-x64": "0.21.5", + "@esbuild/linux-arm": "0.21.5", + "@esbuild/linux-arm64": "0.21.5", + "@esbuild/linux-ia32": "0.21.5", + "@esbuild/linux-loong64": "0.21.5", + "@esbuild/linux-mips64el": "0.21.5", + "@esbuild/linux-ppc64": "0.21.5", + "@esbuild/linux-riscv64": "0.21.5", + "@esbuild/linux-s390x": "0.21.5", + "@esbuild/linux-x64": "0.21.5", + "@esbuild/netbsd-x64": "0.21.5", + "@esbuild/openbsd-x64": "0.21.5", + "@esbuild/sunos-x64": "0.21.5", + "@esbuild/win32-arm64": "0.21.5", + "@esbuild/win32-ia32": "0.21.5", + "@esbuild/win32-x64": "0.21.5" + } + }, + "node_modules/estree-walker": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", + "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", + "license": "MIT" + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "dev": true, + "license": "MIT", + "bin": { + "he": "bin/he" + } + }, + "node_modules/lodash": { + "version": "4.17.23", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz", + "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", + "license": "MIT" + }, + "node_modules/lodash-es": { + "version": "4.17.23", + "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.23.tgz", + "integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==", + "license": "MIT" + }, + "node_modules/lodash-unified": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/lodash-unified/-/lodash-unified-1.0.3.tgz", + "integrity": "sha512-WK9qSozxXOD7ZJQlpSqOT+om2ZfcT4yO+03FuzAHD0wF6S0l0090LRPDx3vhTTLZ8cFKpBn+IOcVXK6qOcIlfQ==", + "license": "MIT", + "peerDependencies": { + "@types/lodash-es": "*", + "lodash": "*", + "lodash-es": "*" + } + }, + "node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/memoize-one": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/memoize-one/-/memoize-one-6.0.0.tgz", + "integrity": "sha512-rkpe71W0N0c0Xz6QD0eJETuWAJGnJ9afsl1srmwPrI+yBCkge5EycXXbYRyvL29zZVUWQCY7InPRCv3GDXuZNw==", + "license": "MIT" + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/minimatch": { + "version": "9.0.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.6.tgz", + "integrity": "sha512-kQAVowdR33euIqeA0+VZTDqU+qo1IeVY+hrKYtZMio3Pg0P0vuh/kwRylLUddJhB6pf3q/botcOvRtx4IN1wqQ==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^5.0.2" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/muggle-string": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/muggle-string/-/muggle-string-0.3.1.tgz", + "integrity": "sha512-ckmWDJjphvd/FvZawgygcUeQCxzvohjFO5RxTjj4eq8kw359gFF3E1brjfI+viLMxss5JrHTDRHZvu2/tuy0Qg==", + "dev": true, + "license": "MIT" + }, + "node_modules/nanoid": { + "version": "3.3.11", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/normalize-wheel-es": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/normalize-wheel-es/-/normalize-wheel-es-1.2.0.tgz", + "integrity": "sha512-Wj7+EJQ8mSuXr2iWfnujrimU35R2W4FAErEyTmJoJ7ucwTn2hOUSsRehMb5RSYkxXGTM7Y9QpvPmp++w5ftoJw==", + "license": "BSD-3-Clause" + }, + "node_modules/path-browserify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz", + "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==", + "dev": true, + "license": "MIT" + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "license": "ISC" + }, + "node_modules/pinia": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/pinia/-/pinia-2.3.1.tgz", + "integrity": "sha512-khUlZSwt9xXCaTbbxFYBKDc/bWAGWJjOgvxETwkTN7KRm66EeT1ZdZj6i2ceh9sP2Pzqsbc704r2yngBrxBVug==", + "license": "MIT", + "dependencies": { + "@vue/devtools-api": "^6.6.3", + "vue-demi": "^0.14.10" + }, + "funding": { + "url": "https://github.com/sponsors/posva" + }, + "peerDependencies": { + "typescript": ">=4.4.4", + "vue": "^2.7.0 || ^3.5.11" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/postcss": { + "version": "8.5.6", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", + "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "nanoid": "^3.3.11", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, + "node_modules/rollup": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz", + "integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "1.0.8" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.59.0", + "@rollup/rollup-android-arm64": "4.59.0", + "@rollup/rollup-darwin-arm64": "4.59.0", + "@rollup/rollup-darwin-x64": "4.59.0", + "@rollup/rollup-freebsd-arm64": "4.59.0", + "@rollup/rollup-freebsd-x64": "4.59.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.59.0", + "@rollup/rollup-linux-arm-musleabihf": "4.59.0", + "@rollup/rollup-linux-arm64-gnu": "4.59.0", + "@rollup/rollup-linux-arm64-musl": "4.59.0", + "@rollup/rollup-linux-loong64-gnu": "4.59.0", + "@rollup/rollup-linux-loong64-musl": "4.59.0", + "@rollup/rollup-linux-ppc64-gnu": "4.59.0", + "@rollup/rollup-linux-ppc64-musl": "4.59.0", + "@rollup/rollup-linux-riscv64-gnu": "4.59.0", + "@rollup/rollup-linux-riscv64-musl": "4.59.0", + "@rollup/rollup-linux-s390x-gnu": "4.59.0", + "@rollup/rollup-linux-x64-gnu": "4.59.0", + "@rollup/rollup-linux-x64-musl": "4.59.0", + "@rollup/rollup-openbsd-x64": "4.59.0", + "@rollup/rollup-openharmony-arm64": "4.59.0", + "@rollup/rollup-win32-arm64-msvc": "4.59.0", + "@rollup/rollup-win32-ia32-msvc": "4.59.0", + "@rollup/rollup-win32-x64-gnu": "4.59.0", + "@rollup/rollup-win32-x64-msvc": "4.59.0", + "fsevents": "~2.3.2" + } + }, + "node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "devOptional": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/vite": { + "version": "5.4.21", + "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.21.tgz", + "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "^0.21.3", + "postcss": "^8.4.43", + "rollup": "^4.20.0" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^18.0.0 || >=20.0.0", + "less": "*", + "lightningcss": "^1.21.0", + "sass": "*", + "sass-embedded": "*", + "stylus": "*", + "sugarss": "*", + "terser": "^5.4.0" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + } + } + }, + "node_modules/vue": { + "version": "3.5.29", + "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.29.tgz", + "integrity": "sha512-BZqN4Ze6mDQVNAni0IHeMJ5mwr8VAJ3MQC9FmprRhcBYENw+wOAAjRj8jfmN6FLl0j96OXbR+CjWhmAmM+QGnA==", + "license": "MIT", + "dependencies": { + "@vue/compiler-dom": "3.5.29", + "@vue/compiler-sfc": "3.5.29", + "@vue/runtime-dom": "3.5.29", + "@vue/server-renderer": "3.5.29", + "@vue/shared": "3.5.29" + }, + "peerDependencies": { + "typescript": "*" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/vue-demi": { + "version": "0.14.10", + "resolved": "https://registry.npmjs.org/vue-demi/-/vue-demi-0.14.10.tgz", + "integrity": "sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "vue-demi-fix": "bin/vue-demi-fix.js", + "vue-demi-switch": "bin/vue-demi-switch.js" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "@vue/composition-api": "^1.0.0-rc.1", + "vue": "^3.0.0-0 || ^2.6.0" + }, + "peerDependenciesMeta": { + "@vue/composition-api": { + "optional": true + } + } + }, + "node_modules/vue-router": { + "version": "4.6.4", + "resolved": "https://registry.npmjs.org/vue-router/-/vue-router-4.6.4.tgz", + "integrity": "sha512-Hz9q5sa33Yhduglwz6g9skT8OBPii+4bFn88w6J+J4MfEo4KRRpmiNG/hHHkdbRFlLBOqxN8y8gf2Fb0MTUgVg==", + "license": "MIT", + "dependencies": { + "@vue/devtools-api": "^6.6.4" + }, + "funding": { + "url": "https://github.com/sponsors/posva" + }, + "peerDependencies": { + "vue": "^3.5.0" + } + }, + "node_modules/vue-template-compiler": { + "version": "2.7.16", + "resolved": "https://registry.npmjs.org/vue-template-compiler/-/vue-template-compiler-2.7.16.tgz", + "integrity": "sha512-AYbUWAJHLGGQM7+cNTELw+KsOG9nl2CnSv467WobS5Cv9uk3wFcnr1Etsz2sEIHEZvw1U+o9mRlEO6QbZvUPGQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "de-indent": "^1.0.2", + "he": "^1.2.0" + } + }, + "node_modules/vue-tsc": { + "version": "1.8.27", + "resolved": "https://registry.npmjs.org/vue-tsc/-/vue-tsc-1.8.27.tgz", + "integrity": "sha512-WesKCAZCRAbmmhuGl3+VrdWItEvfoFIPXOvUJkjULi+x+6G/Dy69yO3TBRJDr9eUlmsNAwVmxsNZxvHKzbkKdg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@volar/typescript": "~1.11.1", + "@vue/language-core": "1.8.27", + "semver": "^7.5.4" + }, + "bin": { + "vue-tsc": "bin/vue-tsc.js" + }, + "peerDependencies": { + "typescript": "*" + } + } + } +} diff --git a/ai-service-admin/package.json b/ai-service-admin/package.json new file mode 100644 index 0000000..d8fbf9f --- /dev/null +++ b/ai-service-admin/package.json @@ -0,0 +1,24 @@ +{ + "name": "ai-service-admin", + "version": "0.1.0", + "private": true, + "scripts": { + "dev": "vite", + "build": "vue-tsc --noEmit && vite build", + "preview": "vite preview" + }, + "dependencies": { + "@element-plus/icons-vue": "^2.3.1", + "axios": "^1.6.7", + "element-plus": "^2.6.1", + "pinia": "^2.1.7", + "vue": "^3.4.21", + "vue-router": "^4.3.0" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^5.0.4", + "typescript": "^5.2.2", + "vite": "^5.1.4", + "vue-tsc": "^1.8.27" + } +} diff --git a/ai-service-admin/src/App.vue b/ai-service-admin/src/App.vue new file mode 100644 index 0000000..b6aa613 --- /dev/null +++ b/ai-service-admin/src/App.vue @@ -0,0 +1,279 @@ + + + + + diff --git a/ai-service-admin/src/api/dashboard.ts b/ai-service-admin/src/api/dashboard.ts new file mode 100644 index 0000000..0d7ef78 --- /dev/null +++ b/ai-service-admin/src/api/dashboard.ts @@ -0,0 +1,8 @@ +import request from '@/utils/request' + +export function getDashboardStats() { + return request({ + url: '/admin/dashboard/stats', + method: 'get' + }) +} diff --git a/ai-service-admin/src/api/embedding.ts b/ai-service-admin/src/api/embedding.ts new file mode 100644 index 0000000..418dc1e --- /dev/null +++ b/ai-service-admin/src/api/embedding.ts @@ -0,0 +1,88 @@ +import request from '@/utils/request' + +export interface EmbeddingProviderInfo { + name: string + display_name: string + description?: string + config_schema: Record +} + +export interface EmbeddingConfig { + provider: string + config: Record + updated_at?: string +} + +export interface EmbeddingConfigUpdate { + provider: string + config?: Record +} + +export interface EmbeddingTestResult { + success: boolean + dimension: number + latency_ms?: number + message?: string + error?: string +} + +export interface EmbeddingTestRequest { + test_text?: string + config?: EmbeddingConfigUpdate +} + +export interface DocumentFormat { + extension: string + name: string + description?: string +} + +export interface EmbeddingProvidersResponse { + providers: EmbeddingProviderInfo[] +} + +export interface EmbeddingConfigUpdateResponse { + success: boolean + message: string +} + +export interface SupportedFormatsResponse { + formats: DocumentFormat[] +} + +export function getProviders() { + return request({ + url: '/embedding/providers', + method: 'get' + }) +} + +export function getConfig() { + return request({ + url: '/embedding/config', + method: 'get' + }) +} + +export function saveConfig(data: EmbeddingConfigUpdate) { + return request({ + url: '/embedding/config', + method: 'put', + data + }) +} + +export function testEmbedding(data: EmbeddingTestRequest): Promise { + return request({ + url: '/embedding/test', + method: 'post', + data + }) +} + +export function getSupportedFormats() { + return request({ + url: '/embedding/formats', + method: 'get' + }) +} diff --git a/ai-service-admin/src/api/kb.ts b/ai-service-admin/src/api/kb.ts new file mode 100644 index 0000000..bf54fe1 --- /dev/null +++ b/ai-service-admin/src/api/kb.ts @@ -0,0 +1,38 @@ +import request from '@/utils/request' + +export function listKnowledgeBases() { + return request({ + url: '/admin/kb/knowledge-bases', + method: 'get' + }) +} + +export function listDocuments(params: any) { + return request({ + url: '/admin/kb/documents', + method: 'get', + params + }) +} + +export function uploadDocument(data: FormData) { + return request({ + url: '/admin/kb/documents', + method: 'post', + data + }) +} + +export function getIndexJob(jobId: string) { + return request({ + url: `/admin/kb/index/jobs/${jobId}`, + method: 'get' + }) +} + +export function deleteDocument(docId: string) { + return request({ + url: `/admin/kb/documents/${docId}`, + method: 'delete' + }) +} diff --git a/ai-service-admin/src/api/llm.ts b/ai-service-admin/src/api/llm.ts new file mode 100644 index 0000000..b474dcc --- /dev/null +++ b/ai-service-admin/src/api/llm.ts @@ -0,0 +1,50 @@ +import request from '@/utils/request' +import type { + LLMProviderInfo, + LLMConfig, + LLMConfigUpdate, + LLMTestResult, + LLMTestRequest, + LLMProvidersResponse, + LLMConfigUpdateResponse +} from '@/types/llm' + +export function getLLMProviders(): Promise { + return request({ + url: '/admin/llm/providers', + method: 'get' + }) +} + +export function getLLMConfig(): Promise { + return request({ + url: '/admin/llm/config', + method: 'get' + }) +} + +export function updateLLMConfig(data: LLMConfigUpdate): Promise { + return request({ + url: '/admin/llm/config', + method: 'put', + data + }) +} + +export function testLLM(data: LLMTestRequest): Promise { + return request({ + url: '/admin/llm/test', + method: 'post', + data + }) +} + +export type { + LLMProviderInfo, + LLMConfig, + LLMConfigUpdate, + LLMTestResult, + LLMTestRequest, + LLMProvidersResponse, + LLMConfigUpdateResponse +} diff --git a/ai-service-admin/src/api/monitoring.ts b/ai-service-admin/src/api/monitoring.ts new file mode 100644 index 0000000..e2a218f --- /dev/null +++ b/ai-service-admin/src/api/monitoring.ts @@ -0,0 +1,16 @@ +import request from '@/utils/request' + +export function listSessions(params: any) { + return request({ + url: '/admin/sessions', + method: 'get', + params + }) +} + +export function getSessionDetail(sessionId: string) { + return request({ + url: `/admin/sessions/${sessionId}`, + method: 'get' + }) +} diff --git a/ai-service-admin/src/api/rag.ts b/ai-service-admin/src/api/rag.ts new file mode 100644 index 0000000..12ca036 --- /dev/null +++ b/ai-service-admin/src/api/rag.ts @@ -0,0 +1,135 @@ +import request from '@/utils/request' +import { useTenantStore } from '@/stores/tenant' + +export interface AIResponse { + content: string + prompt_tokens?: number + completion_tokens?: number + total_tokens?: number + latency_ms?: number + model?: string +} + +export interface RetrievalResult { + content: string + score: number + source: string + metadata?: Record +} + +export interface RagExperimentRequest { + query: string + kb_ids?: string[] + top_k?: number + score_threshold?: number + llm_provider?: string + generate_response?: boolean +} + +export interface RagExperimentResult { + query: string + retrieval_results?: RetrievalResult[] + final_prompt?: string + ai_response?: AIResponse + total_latency_ms?: number +} + +export function runRagExperiment(data: RagExperimentRequest): Promise { + return request({ + url: '/admin/rag/experiments/run', + method: 'post', + data + }) +} + +export function runRagExperimentStream( + data: RagExperimentRequest, + onMessage: (event: MessageEvent) => void, + onError?: (error: Event) => void, + onComplete?: () => void +): EventSource { + const baseUrl = import.meta.env.VITE_APP_BASE_API || '/api' + const url = `${baseUrl}/admin/rag/experiments/stream` + + const eventSource = new EventSource(url, { + withCredentials: true + }) + + eventSource.onmessage = onMessage + eventSource.onerror = (error) => { + eventSource.close() + onError?.(error) + } + + return eventSource +} + +export function createSSEConnection( + url: string, + body: RagExperimentRequest, + onMessage: (data: string) => void, + onError?: (error: Error) => void, + onComplete?: () => void +): () => void { + const baseUrl = import.meta.env.VITE_APP_BASE_API || '/api' + const fullUrl = `${baseUrl}${url}` + + const tenantStore = useTenantStore() + + const controller = new AbortController() + + fetch(fullUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Accept': 'text/event-stream', + 'X-Tenant-Id': tenantStore.currentTenantId || '', + }, + body: JSON.stringify(body), + signal: controller.signal + }) + .then(async (response) => { + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`) + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('No response body') + } + + const decoder = new TextDecoder() + let buffer = '' + + while (true) { + const { done, value } = await reader.read() + + if (done) { + onComplete?.() + break + } + + buffer += decoder.decode(value, { stream: true }) + const lines = buffer.split('\n') + buffer = lines.pop() || '' + + for (const line of lines) { + if (line.startsWith('data: ')) { + const data = line.slice(6) + if (data === '[DONE]') { + onComplete?.() + return + } + onMessage(data) + } + } + } + }) + .catch((error) => { + if (error.name !== 'AbortError') { + onError?.(error) + } + }) + + return () => controller.abort() +} diff --git a/ai-service-admin/src/api/tenant.ts b/ai-service-admin/src/api/tenant.ts new file mode 100644 index 0000000..2fce357 --- /dev/null +++ b/ai-service-admin/src/api/tenant.ts @@ -0,0 +1,21 @@ +import request from '@/utils/request' + +export interface Tenant { + id: string + name: string + displayName: string + year: string + createdAt: string +} + +export interface TenantListResponse { + tenants: Tenant[] + total: number +} + +export function getTenantList() { + return request({ + url: '/admin/tenants', + method: 'get' + }) +} diff --git a/ai-service-admin/src/components/BaseForm.vue b/ai-service-admin/src/components/BaseForm.vue new file mode 100644 index 0000000..b63f53a --- /dev/null +++ b/ai-service-admin/src/components/BaseForm.vue @@ -0,0 +1,43 @@ + + + diff --git a/ai-service-admin/src/components/BaseTable.vue b/ai-service-admin/src/components/BaseTable.vue new file mode 100644 index 0000000..ab88cca --- /dev/null +++ b/ai-service-admin/src/components/BaseTable.vue @@ -0,0 +1,55 @@ + + + + + diff --git a/ai-service-admin/src/components/common/ConfigForm.vue b/ai-service-admin/src/components/common/ConfigForm.vue new file mode 100644 index 0000000..569e4af --- /dev/null +++ b/ai-service-admin/src/components/common/ConfigForm.vue @@ -0,0 +1,219 @@ + + + + + diff --git a/ai-service-admin/src/components/common/ProviderSelect.vue b/ai-service-admin/src/components/common/ProviderSelect.vue new file mode 100644 index 0000000..6120441 --- /dev/null +++ b/ai-service-admin/src/components/common/ProviderSelect.vue @@ -0,0 +1,83 @@ + + + + + diff --git a/ai-service-admin/src/components/common/TestPanel.vue b/ai-service-admin/src/components/common/TestPanel.vue new file mode 100644 index 0000000..c3a0d08 --- /dev/null +++ b/ai-service-admin/src/components/common/TestPanel.vue @@ -0,0 +1,523 @@ + + + + + diff --git a/ai-service-admin/src/components/embedding/EmbeddingConfigForm.vue b/ai-service-admin/src/components/embedding/EmbeddingConfigForm.vue new file mode 100644 index 0000000..e491cc8 --- /dev/null +++ b/ai-service-admin/src/components/embedding/EmbeddingConfigForm.vue @@ -0,0 +1,219 @@ + + + + + diff --git a/ai-service-admin/src/components/embedding/EmbeddingProviderSelect.vue b/ai-service-admin/src/components/embedding/EmbeddingProviderSelect.vue new file mode 100644 index 0000000..66a27eb --- /dev/null +++ b/ai-service-admin/src/components/embedding/EmbeddingProviderSelect.vue @@ -0,0 +1,78 @@ + + + + + diff --git a/ai-service-admin/src/components/embedding/EmbeddingTestPanel.vue b/ai-service-admin/src/components/embedding/EmbeddingTestPanel.vue new file mode 100644 index 0000000..3b0ae02 --- /dev/null +++ b/ai-service-admin/src/components/embedding/EmbeddingTestPanel.vue @@ -0,0 +1,460 @@ + + + + + diff --git a/ai-service-admin/src/components/embedding/SupportedFormats.vue b/ai-service-admin/src/components/embedding/SupportedFormats.vue new file mode 100644 index 0000000..7a73e4f --- /dev/null +++ b/ai-service-admin/src/components/embedding/SupportedFormats.vue @@ -0,0 +1,161 @@ + + + + + diff --git a/ai-service-admin/src/components/rag/AIResponseViewer.vue b/ai-service-admin/src/components/rag/AIResponseViewer.vue new file mode 100644 index 0000000..a9960a5 --- /dev/null +++ b/ai-service-admin/src/components/rag/AIResponseViewer.vue @@ -0,0 +1,351 @@ + + + + + diff --git a/ai-service-admin/src/components/rag/LLMSelector.vue b/ai-service-admin/src/components/rag/LLMSelector.vue new file mode 100644 index 0000000..675faae --- /dev/null +++ b/ai-service-admin/src/components/rag/LLMSelector.vue @@ -0,0 +1,159 @@ + + + + + + + diff --git a/ai-service-admin/src/components/rag/StreamOutput.vue b/ai-service-admin/src/components/rag/StreamOutput.vue new file mode 100644 index 0000000..e06f03c --- /dev/null +++ b/ai-service-admin/src/components/rag/StreamOutput.vue @@ -0,0 +1,299 @@ + + + + + diff --git a/ai-service-admin/src/main.ts b/ai-service-admin/src/main.ts new file mode 100644 index 0000000..5aff922 --- /dev/null +++ b/ai-service-admin/src/main.ts @@ -0,0 +1,16 @@ +import { createApp } from 'vue' +import { createPinia } from 'pinia' +import ElementPlus from 'element-plus' +import 'element-plus/dist/index.css' +import './styles/main.css' +import App from './App.vue' +import router from './router' + +const app = createApp(App) +const pinia = createPinia() + +app.use(pinia) +app.use(router) +app.use(ElementPlus) + +app.mount('#app') diff --git a/ai-service-admin/src/router/index.ts b/ai-service-admin/src/router/index.ts new file mode 100644 index 0000000..c4ffa87 --- /dev/null +++ b/ai-service-admin/src/router/index.ts @@ -0,0 +1,51 @@ +import { createRouter, createWebHistory, RouteRecordRaw } from 'vue-router' + +const routes: Array = [ + { + path: '/', + redirect: '/dashboard' + }, + { + path: '/dashboard', + name: 'Dashboard', + component: () => import('@/views/dashboard/index.vue'), + meta: { title: '控制台' } + }, + { + path: '/kb', + name: 'KBManagement', + component: () => import('@/views/kb/index.vue'), + meta: { title: '知识库管理' } + }, + { + path: '/rag-lab', + name: 'RagLab', + component: () => import('@/views/rag-lab/index.vue'), + meta: { title: 'RAG 实验室' } + }, + { + path: '/monitoring', + name: 'Monitoring', + component: () => import('@/views/monitoring/index.vue'), + meta: { title: '会话监控' } + }, + { + path: '/admin/embedding', + name: 'EmbeddingConfig', + component: () => import('@/views/admin/embedding/index.vue'), + meta: { title: '嵌入模型配置' } + }, + { + path: '/admin/llm', + name: 'LLMConfig', + component: () => import('@/views/admin/llm/index.vue'), + meta: { title: 'LLM 模型配置' } + } +] + +const router = createRouter({ + history: createWebHistory(), + routes +}) + +export default router diff --git a/ai-service-admin/src/stores/embedding.ts b/ai-service-admin/src/stores/embedding.ts new file mode 100644 index 0000000..2cfb0ea --- /dev/null +++ b/ai-service-admin/src/stores/embedding.ts @@ -0,0 +1,164 @@ +import { defineStore } from 'pinia' +import { ref, computed } from 'vue' +import { + getProviders, + getConfig, + saveConfig, + testEmbedding, + getSupportedFormats, + type EmbeddingProviderInfo, + type EmbeddingConfig, + type EmbeddingConfigUpdate, + type EmbeddingTestResult, + type DocumentFormat +} from '@/api/embedding' + +export const useEmbeddingStore = defineStore('embedding', () => { + const providers = ref([]) + const currentConfig = ref({ + provider: '', + config: {} + }) + const formats = ref([]) + const loading = ref(false) + const providersLoading = ref(false) + const formatsLoading = ref(false) + const testResult = ref(null) + const testLoading = ref(false) + + const currentProvider = computed(() => { + return providers.value.find(p => p.name === currentConfig.value.provider) + }) + + const configSchema = computed(() => { + return currentProvider.value?.config_schema || { properties: {} } + }) + + const loadProviders = async () => { + providersLoading.value = true + try { + const res: any = await getProviders() + providers.value = res?.providers || res?.data?.providers || [] + } catch (error) { + console.error('Failed to load providers:', error) + throw error + } finally { + providersLoading.value = false + } + } + + const loadConfig = async () => { + loading.value = true + try { + const res: any = await getConfig() + const config = res?.data || res + if (config) { + currentConfig.value = { + provider: config.provider || '', + config: config.config || {}, + updated_at: config.updated_at + } + } + } catch (error) { + console.error('Failed to load config:', error) + throw error + } finally { + loading.value = false + } + } + + const saveCurrentConfig = async () => { + loading.value = true + try { + const updateData: EmbeddingConfigUpdate = { + provider: currentConfig.value.provider, + config: currentConfig.value.config + } + await saveConfig(updateData) + } catch (error) { + console.error('Failed to save config:', error) + throw error + } finally { + loading.value = false + } + } + + const runTest = async (testText?: string) => { + testLoading.value = true + testResult.value = null + try { + const result = await testEmbedding({ + test_text: testText, + config: { + provider: currentConfig.value.provider, + config: currentConfig.value.config + } + }) + testResult.value = result + } catch (error: any) { + testResult.value = { + success: false, + dimension: 0, + error: error?.message || '连接测试失败' + } + } finally { + testLoading.value = false + } + } + + const loadFormats = async () => { + formatsLoading.value = true + try { + const res: any = await getSupportedFormats() + formats.value = res?.formats || res?.data?.formats || [] + } catch (error) { + console.error('Failed to load formats:', error) + throw error + } finally { + formatsLoading.value = false + } + } + + const setProvider = (providerName: string) => { + currentConfig.value.provider = providerName + const provider = providers.value.find(p => p.name === providerName) + if (provider?.config_schema?.properties) { + const newConfig: Record = {} + Object.entries(provider.config_schema.properties).forEach(([key, field]: [string, any]) => { + newConfig[key] = field.default !== undefined ? field.default : '' + }) + currentConfig.value.config = newConfig + } else { + currentConfig.value.config = {} + } + } + + const updateConfigValue = (key: string, value: any) => { + currentConfig.value.config[key] = value + } + + const clearTestResult = () => { + testResult.value = null + } + + return { + providers, + currentConfig, + formats, + loading, + providersLoading, + formatsLoading, + testResult, + testLoading, + currentProvider, + configSchema, + loadProviders, + loadConfig, + saveCurrentConfig, + runTest, + loadFormats, + setProvider, + updateConfigValue, + clearTestResult + } +}) diff --git a/ai-service-admin/src/stores/llm.ts b/ai-service-admin/src/stores/llm.ts new file mode 100644 index 0000000..656ab29 --- /dev/null +++ b/ai-service-admin/src/stores/llm.ts @@ -0,0 +1,161 @@ +import { defineStore } from 'pinia' +import { ref, computed } from 'vue' +import { + getLLMProviders, + getLLMConfig, + updateLLMConfig, + testLLM, + type LLMProviderInfo, + type LLMConfig, + type LLMConfigUpdate, + type LLMTestResult +} from '@/api/llm' + +export const useLLMStore = defineStore('llm', () => { + const providers = ref([]) + const currentConfig = ref({ + provider: '', + config: {} + }) + const loading = ref(false) + const providersLoading = ref(false) + const testResult = ref(null) + const testLoading = ref(false) + + const currentProvider = computed(() => { + return providers.value.find(p => p.name === currentConfig.value.provider) + }) + + const configSchema = computed(() => { + return currentProvider.value?.config_schema || { properties: {} } + }) + + const loadProviders = async () => { + providersLoading.value = true + try { + const res: any = await getLLMProviders() + providers.value = res?.providers || res?.data?.providers || [] + } catch (error) { + console.error('Failed to load LLM providers:', error) + throw error + } finally { + providersLoading.value = false + } + } + + const loadConfig = async () => { + loading.value = true + try { + const res: any = await getLLMConfig() + const config = res?.data || res + if (config) { + currentConfig.value = { + provider: config.provider || '', + config: config.config || {}, + updated_at: config.updated_at + } + } + } catch (error) { + console.error('Failed to load LLM config:', error) + throw error + } finally { + loading.value = false + } + } + + const saveCurrentConfig = async () => { + loading.value = true + try { + const updateData: LLMConfigUpdate = { + provider: currentConfig.value.provider, + config: currentConfig.value.config + } + await updateLLMConfig(updateData) + } catch (error) { + console.error('Failed to save LLM config:', error) + throw error + } finally { + loading.value = false + } + } + + const runTest = async (testPrompt?: string): Promise => { + testLoading.value = true + testResult.value = null + try { + const result = await testLLM({ + test_prompt: testPrompt, + provider: currentConfig.value.provider, + config: currentConfig.value.config + }) + testResult.value = result + return result + } catch (error: any) { + const errorResult: LLMTestResult = { + success: false, + error: error?.message || '连接测试失败' + } + testResult.value = errorResult + return errorResult + } finally { + testLoading.value = false + } + } + + const setProvider = (providerName: string) => { + currentConfig.value.provider = providerName + const provider = providers.value.find(p => p.name === providerName) + if (provider?.config_schema?.properties) { + const newConfig: Record = {} + Object.entries(provider.config_schema.properties).forEach(([key, field]: [string, any]) => { + if (field.default !== undefined) { + newConfig[key] = field.default + } else { + switch (field.type) { + case 'string': + newConfig[key] = '' + break + case 'integer': + case 'number': + newConfig[key] = field.minimum ?? 0 + break + case 'boolean': + newConfig[key] = false + break + default: + newConfig[key] = null + } + } + }) + currentConfig.value.config = newConfig + } else { + currentConfig.value.config = {} + } + } + + const updateConfigValue = (key: string, value: any) => { + currentConfig.value.config[key] = value + } + + const clearTestResult = () => { + testResult.value = null + } + + return { + providers, + currentConfig, + loading, + providersLoading, + testResult, + testLoading, + currentProvider, + configSchema, + loadProviders, + loadConfig, + saveCurrentConfig, + runTest, + setProvider, + updateConfigValue, + clearTestResult + } +}) diff --git a/ai-service-admin/src/stores/rag.ts b/ai-service-admin/src/stores/rag.ts new file mode 100644 index 0000000..c9a9b5d --- /dev/null +++ b/ai-service-admin/src/stores/rag.ts @@ -0,0 +1,126 @@ +import { defineStore } from 'pinia' +import { ref, computed } from 'vue' +import { + runRagExperiment, + createSSEConnection, + type AIResponse, + type RetrievalResult, + type RagExperimentRequest, + type RagExperimentResult +} from '@/api/rag' + +export const useRagStore = defineStore('rag', () => { + const retrievalResults = ref([]) + const finalPrompt = ref('') + const aiResponse = ref(null) + const totalLatencyMs = ref(0) + + const loading = ref(false) + const streaming = ref(false) + const streamContent = ref('') + const streamError = ref(null) + + const hasResults = computed(() => retrievalResults.value.length > 0 || aiResponse.value !== null) + + const abortStream = ref<(() => void) | null>(null) + + const runExperiment = async (params: RagExperimentRequest) => { + loading.value = true + streamError.value = null + + try { + const result: RagExperimentResult = await runRagExperiment(params) + + retrievalResults.value = result.retrieval_results || [] + finalPrompt.value = result.final_prompt || '' + aiResponse.value = result.ai_response || null + totalLatencyMs.value = result.total_latency_ms || 0 + + return result + } catch (error: any) { + streamError.value = error?.message || '实验运行失败' + throw error + } finally { + loading.value = false + } + } + + const startStream = (params: RagExperimentRequest) => { + streaming.value = true + streamContent.value = '' + streamError.value = null + aiResponse.value = null + + abortStream.value = createSSEConnection( + '/admin/rag/experiments/stream', + params, + (data: string) => { + try { + const parsed = JSON.parse(data) + + if (parsed.type === 'content') { + streamContent.value += parsed.content || '' + } else if (parsed.type === 'retrieval') { + retrievalResults.value = parsed.results || [] + } else if (parsed.type === 'prompt') { + finalPrompt.value = parsed.prompt || '' + } else if (parsed.type === 'complete') { + aiResponse.value = { + content: streamContent.value, + prompt_tokens: parsed.prompt_tokens, + completion_tokens: parsed.completion_tokens, + total_tokens: parsed.total_tokens, + latency_ms: parsed.latency_ms, + model: parsed.model + } + totalLatencyMs.value = parsed.total_latency_ms || 0 + } else if (parsed.type === 'error') { + streamError.value = parsed.message || '流式输出错误' + } + } catch { + streamContent.value += data + } + }, + (error: Error) => { + streaming.value = false + streamError.value = error.message + }, + () => { + streaming.value = false + } + ) + } + + const stopStream = () => { + if (abortStream.value) { + abortStream.value() + abortStream.value = null + } + streaming.value = false + } + + const clearResults = () => { + retrievalResults.value = [] + finalPrompt.value = '' + aiResponse.value = null + totalLatencyMs.value = 0 + streamContent.value = '' + streamError.value = null + } + + return { + retrievalResults, + finalPrompt, + aiResponse, + totalLatencyMs, + loading, + streaming, + streamContent, + streamError, + hasResults, + runExperiment, + startStream, + stopStream, + clearResults + } +}) diff --git a/ai-service-admin/src/stores/ragLab.ts b/ai-service-admin/src/stores/ragLab.ts new file mode 100644 index 0000000..63ffe75 --- /dev/null +++ b/ai-service-admin/src/stores/ragLab.ts @@ -0,0 +1,41 @@ +import { defineStore } from 'pinia' +import { ref, watch } from 'vue' + +export const useRagLabStore = defineStore('ragLab', () => { + const query = ref(localStorage.getItem('ragLab_query') || '') + const kbIds = ref(JSON.parse(localStorage.getItem('ragLab_kbIds') || '[]')) + const llmProvider = ref(localStorage.getItem('ragLab_llmProvider') || '') + const topK = ref(parseInt(localStorage.getItem('ragLab_topK') || '3', 10)) + const scoreThreshold = ref(parseFloat(localStorage.getItem('ragLab_scoreThreshold') || '0.5')) + const generateResponse = ref(localStorage.getItem('ragLab_generateResponse') !== 'false') + const streamOutput = ref(localStorage.getItem('ragLab_streamOutput') === 'true') + + watch(query, (val) => localStorage.setItem('ragLab_query', val)) + watch(kbIds, (val) => localStorage.setItem('ragLab_kbIds', JSON.stringify(val)), { deep: true }) + watch(llmProvider, (val) => localStorage.setItem('ragLab_llmProvider', val)) + watch(topK, (val) => localStorage.setItem('ragLab_topK', String(val))) + watch(scoreThreshold, (val) => localStorage.setItem('ragLab_scoreThreshold', String(val))) + watch(generateResponse, (val) => localStorage.setItem('ragLab_generateResponse', String(val))) + watch(streamOutput, (val) => localStorage.setItem('ragLab_streamOutput', String(val))) + + const clearParams = () => { + query.value = '' + kbIds.value = [] + llmProvider.value = '' + topK.value = 3 + scoreThreshold.value = 0.5 + generateResponse.value = true + streamOutput.value = false + } + + return { + query, + kbIds, + llmProvider, + topK, + scoreThreshold, + generateResponse, + streamOutput, + clearParams + } +}) diff --git a/ai-service-admin/src/stores/tenant.ts b/ai-service-admin/src/stores/tenant.ts new file mode 100644 index 0000000..efce1e8 --- /dev/null +++ b/ai-service-admin/src/stores/tenant.ts @@ -0,0 +1,16 @@ +import { defineStore } from 'pinia' + +// Default tenant ID format: name@ash@year +const DEFAULT_TENANT_ID = 'default@ash@2026' + +export const useTenantStore = defineStore('tenant', { + state: () => ({ + currentTenantId: localStorage.getItem('X-Tenant-Id') || DEFAULT_TENANT_ID + }), + actions: { + setTenant(id: string) { + this.currentTenantId = id + localStorage.setItem('X-Tenant-Id', id) + } + } +}) diff --git a/ai-service-admin/src/styles/main.css b/ai-service-admin/src/styles/main.css new file mode 100644 index 0000000..09883c7 --- /dev/null +++ b/ai-service-admin/src/styles/main.css @@ -0,0 +1,486 @@ +@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=DM+Sans:wght@400;500;600;700&display=swap'); + +:root { + --primary-color: #4F7CFF; + --primary-light: #6B91FF; + --primary-lighter: #E8EEFF; + --primary-dark: #3A5FD9; + + --secondary-color: #6366F1; + --secondary-light: #818CF8; + + --accent-color: #10B981; + --accent-light: #34D399; + + --warning-color: #F59E0B; + --danger-color: #EF4444; + --success-color: #10B981; + --info-color: #3B82F6; + + --bg-primary: #F8FAFC; + --bg-secondary: #FFFFFF; + --bg-tertiary: #F1F5F9; + --bg-hover: #F1F5F9; + + --text-primary: #1E293B; + --text-secondary: #64748B; + --text-tertiary: #94A3B8; + --text-placeholder: #CBD5E1; + + --border-color: #E2E8F0; + --border-light: #F1F5F9; + + --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05); + --shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.05), 0 2px 4px -2px rgba(0, 0, 0, 0.05); + --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.05), 0 4px 6px -4px rgba(0, 0, 0, 0.05); + --shadow-xl: 0 20px 25px -5px rgba(0, 0, 0, 0.05), 0 8px 10px -6px rgba(0, 0, 0, 0.05); + + --radius-sm: 6px; + --radius-md: 10px; + --radius-lg: 14px; + --radius-xl: 20px; + + --transition-fast: 0.15s ease; + --transition-normal: 0.25s ease; + --transition-slow: 0.35s ease; + + --font-sans: 'DM Sans', 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + --font-mono: 'JetBrains Mono', 'Fira Code', 'SF Mono', Consolas, monospace; +} + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +html, body { + font-family: var(--font-sans); + font-size: 14px; + line-height: 1.6; + color: var(--text-primary); + background-color: var(--bg-primary); + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +.el-menu { + font-family: var(--font-sans) !important; + border-bottom: 1px solid var(--border-color) !important; + background-color: var(--bg-secondary) !important; +} + +.el-menu-item { + font-weight: 500 !important; + transition: all var(--transition-fast) !important; +} + +.el-menu-item:hover { + background-color: var(--bg-hover) !important; +} + +.el-menu-item.is-active { + color: var(--primary-color) !important; + border-bottom-color: var(--primary-color) !important; + background-color: var(--primary-lighter) !important; +} + +.el-card { + border-radius: var(--radius-lg) !important; + border: 1px solid var(--border-color) !important; + box-shadow: var(--shadow-sm) !important; + transition: all var(--transition-normal) !important; + background-color: var(--bg-secondary) !important; +} + +.el-card:hover { + box-shadow: var(--shadow-md) !important; +} + +.el-card__header { + border-bottom: 1px solid var(--border-light) !important; + padding: 16px 20px !important; + font-weight: 600 !important; + color: var(--text-primary) !important; +} + +.el-card__body { + padding: 20px !important; +} + +.el-button--primary { + background-color: var(--primary-color) !important; + border-color: var(--primary-color) !important; + font-weight: 500 !important; + transition: all var(--transition-fast) !important; +} + +.el-button--primary:hover { + background-color: var(--primary-light) !important; + border-color: var(--primary-light) !important; + transform: translateY(-1px); + box-shadow: var(--shadow-md) !important; +} + +.el-button--default { + font-weight: 500 !important; + border-color: var(--border-color) !important; + transition: all var(--transition-fast) !important; +} + +.el-button--default:hover { + border-color: var(--primary-color) !important; + color: var(--primary-color) !important; + background-color: var(--primary-lighter) !important; +} + +.el-input__wrapper { + border-radius: var(--radius-md) !important; + box-shadow: none !important; + border: 1px solid var(--border-color) !important; + transition: all var(--transition-fast) !important; +} + +.el-input__wrapper:hover { + border-color: var(--primary-light) !important; +} + +.el-input__wrapper.is-focus { + border-color: var(--primary-color) !important; + box-shadow: 0 0 0 3px var(--primary-lighter) !important; +} + +.el-select { + --el-select-border-color-hover: var(--primary-light) !important; +} + +.el-select .el-input__wrapper { + border-radius: var(--radius-md) !important; +} + +.el-select-dropdown { + border-radius: var(--radius-lg) !important; + border: 1px solid var(--border-color) !important; + box-shadow: var(--shadow-xl) !important; + margin-top: 8px !important; +} + +.el-select-dropdown__wrap { + max-height: 320px !important; +} + +.el-select-dropdown__item { + padding: 10px 16px !important; + line-height: 1.5 !important; + transition: all var(--transition-fast) !important; +} + +.el-select-dropdown__item.hover, +.el-select-dropdown__item:hover { + background-color: var(--primary-lighter) !important; + color: var(--primary-color) !important; +} + +.el-select-dropdown__item.is-selected { + background-color: var(--primary-lighter) !important; + color: var(--primary-color) !important; + font-weight: 600 !important; +} + +.el-tag { + border-radius: var(--radius-sm) !important; + font-weight: 500 !important; + border: none !important; +} + +.el-tag--success { + background-color: #D1FAE5 !important; + color: #059669 !important; +} + +.el-tag--warning { + background-color: #FEF3C7 !important; + color: #D97706 !important; +} + +.el-tag--danger { + background-color: #FEE2E2 !important; + color: #DC2626 !important; +} + +.el-tag--info { + background-color: #E0E7FF !important; + color: #4F46E5 !important; +} + +.el-table { + border-radius: var(--radius-lg) !important; + overflow: hidden !important; +} + +.el-table th.el-table__cell { + background-color: var(--bg-tertiary) !important; + font-weight: 600 !important; + color: var(--text-secondary) !important; +} + +.el-table td.el-table__cell { + border-bottom: 1px solid var(--border-light) !important; +} + +.el-table--striped .el-table__body tr.el-table__row--striped td.el-table__cell { + background-color: var(--bg-tertiary) !important; +} + +.el-table__row:hover > td.el-table__cell { + background-color: var(--primary-lighter) !important; +} + +.el-tabs__item { + font-weight: 500 !important; + transition: all var(--transition-fast) !important; +} + +.el-tabs__item.is-active { + color: var(--primary-color) !important; +} + +.el-tabs__active-bar { + background-color: var(--primary-color) !important; +} + +.el-dialog { + border-radius: var(--radius-xl) !important; + overflow: hidden !important; +} + +.el-dialog__header { + padding: 20px 24px !important; + border-bottom: 1px solid var(--border-light) !important; +} + +.el-dialog__title { + font-weight: 600 !important; + font-size: 18px !important; +} + +.el-dialog__body { + padding: 24px !important; +} + +.el-form-item__label { + font-weight: 500 !important; + color: var(--text-secondary) !important; +} + +.el-slider__bar { + background-color: var(--primary-color) !important; +} + +.el-slider__button { + border-color: var(--primary-color) !important; +} + +.el-switch.is-checked .el-switch__core { + background-color: var(--primary-color) !important; + border-color: var(--primary-color) !important; +} + +.el-alert { + border-radius: var(--radius-md) !important; + border: none !important; +} + +.el-alert--info { + background-color: #EFF6FF !important; +} + +.el-alert--success { + background-color: #ECFDF5 !important; +} + +.el-alert--warning { + background-color: #FFFBEB !important; +} + +.el-alert--error { + background-color: #FEF2F2 !important; +} + +.el-divider { + border-color: var(--border-light) !important; +} + +.el-empty__description { + color: var(--text-tertiary) !important; +} + +.el-loading-mask { + border-radius: var(--radius-lg) !important; +} + +.el-descriptions { + border-radius: var(--radius-md) !important; + overflow: hidden !important; +} + +.el-descriptions__label { + background-color: var(--bg-tertiary) !important; + font-weight: 500 !important; +} + +.fade-in-up { + animation: fadeInUp 0.5s ease-out forwards; +} + +@keyframes fadeInUp { + from { + opacity: 0; + transform: translateY(20px); + } + to { + opacity: 1; + transform: translateY(0); + } +} + +.slide-in-left { + animation: slideInLeft 0.4s ease-out forwards; +} + +@keyframes slideInLeft { + from { + opacity: 0; + transform: translateX(-20px); + } + to { + opacity: 1; + transform: translateX(0); + } +} + +.scale-in { + animation: scaleIn 0.3s ease-out forwards; +} + +@keyframes scaleIn { + from { + opacity: 0; + transform: scale(0.95); + } + to { + opacity: 1; + transform: scale(1); + } +} + +.page-container { + padding: 24px; + min-height: calc(100vh - 60px); + background-color: var(--bg-primary); +} + +.page-header { + margin-bottom: 24px; +} + +.page-title { + font-size: 24px; + font-weight: 700; + color: var(--text-primary); + margin: 0 0 8px 0; + letter-spacing: -0.5px; +} + +.page-desc { + font-size: 14px; + color: var(--text-secondary); + margin: 0; + line-height: 1.6; +} + +.card-icon { + width: 40px; + height: 40px; + display: flex; + align-items: center; + justify-content: center; + border-radius: var(--radius-md); + font-size: 18px; +} + +.card-icon.primary { + background-color: var(--primary-lighter); + color: var(--primary-color); +} + +.card-icon.success { + background-color: #D1FAE5; + color: #059669; +} + +.card-icon.warning { + background-color: #FEF3C7; + color: #D97706; +} + +.card-icon.info { + background-color: #E0E7FF; + color: #4F46E5; +} + +.stat-card { + position: relative; + overflow: hidden; +} + +.stat-card::before { + content: ''; + position: absolute; + top: 0; + left: 0; + right: 0; + height: 3px; + background: linear-gradient(90deg, var(--primary-color), var(--secondary-color)); + opacity: 0; + transition: opacity var(--transition-fast); +} + +.stat-card:hover::before { + opacity: 1; +} + +::-webkit-scrollbar { + width: 8px; + height: 8px; +} + +::-webkit-scrollbar-track { + background: var(--bg-tertiary); + border-radius: 4px; +} + +::-webkit-scrollbar-thumb { + background: var(--text-tertiary); + border-radius: 4px; + transition: background var(--transition-fast); +} + +::-webkit-scrollbar-thumb:hover { + background: var(--text-secondary); +} + +::selection { + background-color: var(--primary-lighter); + color: var(--primary-dark); +} + +@media (max-width: 768px) { + .page-container { + padding: 16px; + } + + .page-title { + font-size: 20px; + } +} diff --git a/ai-service-admin/src/types/embedding.ts b/ai-service-admin/src/types/embedding.ts new file mode 100644 index 0000000..993f994 --- /dev/null +++ b/ai-service-admin/src/types/embedding.ts @@ -0,0 +1,49 @@ +export interface EmbeddingProviderInfo { + name: string + display_name: string + description?: string + config_schema: Record +} + +export interface EmbeddingConfig { + provider: string + config: Record + updated_at?: string +} + +export interface EmbeddingConfigUpdate { + provider: string + config?: Record +} + +export interface EmbeddingTestResult { + success: boolean + dimension: number + latency_ms?: number + message?: string + error?: string +} + +export interface DocumentFormat { + extension: string + name: string + description?: string +} + +export interface EmbeddingProvidersResponse { + providers: EmbeddingProviderInfo[] +} + +export interface EmbeddingConfigUpdateResponse { + success: boolean + message: string +} + +export interface SupportedFormatsResponse { + formats: DocumentFormat[] +} + +export interface EmbeddingTestRequest { + test_text?: string + config?: EmbeddingConfigUpdate +} diff --git a/ai-service-admin/src/types/llm.ts b/ai-service-admin/src/types/llm.ts new file mode 100644 index 0000000..e64bec2 --- /dev/null +++ b/ai-service-admin/src/types/llm.ts @@ -0,0 +1,43 @@ +export interface LLMProviderInfo { + name: string + display_name: string + description?: string + config_schema: Record +} + +export interface LLMConfig { + provider: string + config: Record + updated_at?: string +} + +export interface LLMConfigUpdate { + provider: string + config?: Record +} + +export interface LLMTestResult { + success: boolean + response?: string + latency_ms?: number + prompt_tokens?: number + completion_tokens?: number + total_tokens?: number + message?: string + error?: string +} + +export interface LLMTestRequest { + test_prompt?: string + provider?: string + config?: Record +} + +export interface LLMProvidersResponse { + providers: LLMProviderInfo[] +} + +export interface LLMConfigUpdateResponse { + success: boolean + message: string +} diff --git a/ai-service-admin/src/utils/request.ts b/ai-service-admin/src/utils/request.ts new file mode 100644 index 0000000..7d64d24 --- /dev/null +++ b/ai-service-admin/src/utils/request.ts @@ -0,0 +1,72 @@ +import axios from 'axios' +import { ElMessage, ElMessageBox } from 'element-plus' +import { useTenantStore } from '@/stores/tenant' + +// 创建 axios 实例 +const service = axios.create({ + baseURL: import.meta.env.VITE_APP_BASE_API || '/api', + timeout: 60000 +}) + +// 请求拦截器 +service.interceptors.request.use( + (config) => { + const tenantStore = useTenantStore() + if (tenantStore.currentTenantId) { + config.headers['X-Tenant-Id'] = tenantStore.currentTenantId + } + // TODO: 如果有 token 也可以在这里注入 Authorization + return config + }, + (error) => { + console.log(error) + return Promise.reject(error) + } +) + +// 响应拦截器 +service.interceptors.response.use( + (response) => { + const res = response.data + // 这里可以根据后端的 code 进行统一处理 + return res + }, + (error) => { + console.log('err' + error) + let { message, response } = error + if (response) { + const status = response.status + if (status === 401) { + ElMessageBox.confirm('登录状态已过期,您可以继续留在该页面,或者重新登录', '系统提示', { + confirmButtonText: '重新登录', + cancelButtonText: '取消', + type: 'warning' + }).then(() => { + // TODO: 跳转到登录页或执行退出逻辑 + location.href = '/login' + }) + } else if (status === 403) { + ElMessage({ + message: '当前操作无权限', + type: 'error', + duration: 5 * 1000 + }) + } else { + ElMessage({ + message: message || '后端接口未知异常', + type: 'error', + duration: 5 * 1000 + }) + } + } else { + ElMessage({ + message: '网络连接异常', + type: 'error', + duration: 5 * 1000 + }) + } + return Promise.reject(error) + } +) + +export default service diff --git a/ai-service-admin/src/views/admin/embedding/index.vue b/ai-service-admin/src/views/admin/embedding/index.vue new file mode 100644 index 0000000..e38ad24 --- /dev/null +++ b/ai-service-admin/src/views/admin/embedding/index.vue @@ -0,0 +1,483 @@ + + + + + diff --git a/ai-service-admin/src/views/admin/llm/index.vue b/ai-service-admin/src/views/admin/llm/index.vue new file mode 100644 index 0000000..3d5ff66 --- /dev/null +++ b/ai-service-admin/src/views/admin/llm/index.vue @@ -0,0 +1,470 @@ + + + + + diff --git a/ai-service-admin/src/views/dashboard/index.vue b/ai-service-admin/src/views/dashboard/index.vue new file mode 100644 index 0000000..d7660f5 --- /dev/null +++ b/ai-service-admin/src/views/dashboard/index.vue @@ -0,0 +1,720 @@ + + + + + diff --git a/ai-service-admin/src/views/kb/index.vue b/ai-service-admin/src/views/kb/index.vue new file mode 100644 index 0000000..a6db08e --- /dev/null +++ b/ai-service-admin/src/views/kb/index.vue @@ -0,0 +1,379 @@ + + + + + diff --git a/ai-service-admin/src/views/monitoring/index.vue b/ai-service-admin/src/views/monitoring/index.vue new file mode 100644 index 0000000..569bdb8 --- /dev/null +++ b/ai-service-admin/src/views/monitoring/index.vue @@ -0,0 +1,191 @@ + + + + + diff --git a/ai-service-admin/src/views/rag-lab/index.vue b/ai-service-admin/src/views/rag-lab/index.vue new file mode 100644 index 0000000..b5af4e5 --- /dev/null +++ b/ai-service-admin/src/views/rag-lab/index.vue @@ -0,0 +1,545 @@ + + + + + diff --git a/ai-service-admin/tsconfig.json b/ai-service-admin/tsconfig.json new file mode 100644 index 0000000..33e425c --- /dev/null +++ b/ai-service-admin/tsconfig.json @@ -0,0 +1,22 @@ +{ + "compilerOptions": { + "target": "ESNext", + "useDefineForClassFields": true, + "module": "ESNext", + "moduleResolution": "Node", + "strict": true, + "jsx": "preserve", + "resolveJsonModule": true, + "isolatedModules": true, + "esModuleInterop": true, + "lib": ["ESNext", "DOM"], + "skipLibCheck": true, + "noEmit": true, + "baseUrl": ".", + "paths": { + "@/*": ["src/*"] + } + }, + "include": ["src/**/*.ts", "src/**/*.d.ts", "src/**/*.tsx", "src/**/*.vue"], + "references": [{ "path": "./tsconfig.node.json" }] +} diff --git a/ai-service-admin/tsconfig.node.json b/ai-service-admin/tsconfig.node.json new file mode 100644 index 0000000..7065ca9 --- /dev/null +++ b/ai-service-admin/tsconfig.node.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "composite": true, + "skipLibCheck": true, + "module": "ESNext", + "moduleResolution": "Node", + "allowSyntheticDefaultImports": true + }, + "include": ["vite.config.ts"] +} diff --git a/ai-service-admin/vite.config.ts b/ai-service-admin/vite.config.ts new file mode 100644 index 0000000..6df2553 --- /dev/null +++ b/ai-service-admin/vite.config.ts @@ -0,0 +1,23 @@ +import { defineConfig } from 'vite' +import vue from '@vitejs/plugin-vue' +import path from 'path' + +// https://vitejs.dev/config/ +export default defineConfig({ + plugins: [vue()], + resolve: { + alias: { + '@': path.resolve(__dirname, './src'), + }, + }, + server: { + port: 3000, + proxy: { + '/api': { + target: 'http://localhost:8000', + changeOrigin: true, + rewrite: (path) => path.replace(/^\/api/, ''), + }, + }, + }, +}) diff --git a/ai-service/README.md b/ai-service/README.md new file mode 100644 index 0000000..ac3f951 --- /dev/null +++ b/ai-service/README.md @@ -0,0 +1,74 @@ +# AI Service + +Python AI Service for intelligent chat with RAG support. + +## Features + +- Multi-tenant isolation via X-Tenant-Id header +- SSE streaming support via Accept: text/event-stream +- RAG-powered responses with confidence scoring + +## Prerequisites + +- PostgreSQL 12+ +- Qdrant vector database +- Python 3.10+ + +## Installation + +```bash +pip install -e ".[dev]" +``` + +## Database Initialization + +### Option 1: Using Python script (Recommended) + +```bash +# Create database and tables +python scripts/init_db.py --create-db + +# Or just create tables (database must exist) +python scripts/init_db.py +``` + +### Option 2: Using SQL script + +```bash +# Connect to PostgreSQL and run +psql -U postgres -f scripts/init_db.sql +``` + +## Configuration + +Create a `.env` file in the project root: + +```env +AI_SERVICE_DATABASE_URL=postgresql+asyncpg://postgres:password@localhost:5432/ai_service +AI_SERVICE_QDRANT_URL=http://localhost:6333 +AI_SERVICE_LLM_API_KEY=your-api-key +AI_SERVICE_LLM_BASE_URL=https://api.openai.com/v1 +AI_SERVICE_LLM_MODEL=gpt-4o-mini +AI_SERVICE_DEBUG=true +``` + +## Running + +```bash +uvicorn app.main:app --host 0.0.0.0 --port 8000 +``` + +## API Endpoints + +### Chat API +- `POST /ai/chat` - Generate AI reply (supports SSE streaming) +- `GET /ai/health` - Health check + +### Admin API +- `GET /admin/kb/documents` - List documents +- `POST /admin/kb/documents` - Upload document +- `GET /admin/kb/index/jobs/{jobId}` - Get indexing job status +- `DELETE /admin/kb/documents/{docId}` - Delete document +- `POST /admin/rag/experiments/run` - Run RAG experiment +- `GET /admin/sessions` - List chat sessions +- `GET /admin/sessions/{sessionId}` - Get session details diff --git a/ai-service/app/__init__.py b/ai-service/app/__init__.py new file mode 100644 index 0000000..bb68855 --- /dev/null +++ b/ai-service/app/__init__.py @@ -0,0 +1,4 @@ +""" +AI Service - Python AI Middle Platform +[AC-AISVC-01] FastAPI-based AI chat service with multi-tenant support. +""" diff --git a/ai-service/app/api/__init__.py b/ai-service/app/api/__init__.py new file mode 100644 index 0000000..b726039 --- /dev/null +++ b/ai-service/app/api/__init__.py @@ -0,0 +1,8 @@ +""" +API module for AI Service. +""" + +from app.api.chat import router as chat_router +from app.api.health import router as health_router + +__all__ = ["chat_router", "health_router"] diff --git a/ai-service/app/api/admin/__init__.py b/ai-service/app/api/admin/__init__.py new file mode 100644 index 0000000..40b96bb --- /dev/null +++ b/ai-service/app/api/admin/__init__.py @@ -0,0 +1,14 @@ +""" +Admin API routes for AI Service management. +[AC-ASA-01, AC-ASA-02, AC-ASA-05, AC-ASA-07, AC-ASA-08] Admin management endpoints. +""" + +from app.api.admin.dashboard import router as dashboard_router +from app.api.admin.embedding import router as embedding_router +from app.api.admin.kb import router as kb_router +from app.api.admin.llm import router as llm_router +from app.api.admin.rag import router as rag_router +from app.api.admin.sessions import router as sessions_router +from app.api.admin.tenants import router as tenants_router + +__all__ = ["dashboard_router", "embedding_router", "kb_router", "llm_router", "rag_router", "sessions_router", "tenants_router"] diff --git a/ai-service/app/api/admin/dashboard.py b/ai-service/app/api/admin/dashboard.py new file mode 100644 index 0000000..e9cf199 --- /dev/null +++ b/ai-service/app/api/admin/dashboard.py @@ -0,0 +1,202 @@ +""" +Dashboard statistics endpoints. +Provides overview statistics for the admin dashboard. +""" + +import logging +from typing import Annotated + +from fastapi import APIRouter, Depends, Query +from fastapi.responses import JSONResponse +from sqlalchemy import select, func, desc +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_session +from app.core.exceptions import MissingTenantIdException +from app.core.tenant import get_tenant_id +from app.models import ErrorResponse +from app.models.entities import ChatMessage, ChatSession, Document, KnowledgeBase + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin/dashboard", tags=["Dashboard"]) + +LATENCY_THRESHOLD_MS = 5000 + + +def get_current_tenant_id() -> str: + """Dependency to get current tenant ID or raise exception.""" + tenant_id = get_tenant_id() + if not tenant_id: + raise MissingTenantIdException() + return tenant_id + + +@router.get( + "/stats", + operation_id="getDashboardStats", + summary="Get dashboard statistics", + description="Get overview statistics for the admin dashboard.", + responses={ + 200: {"description": "Dashboard statistics"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def get_dashboard_stats( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + latency_threshold: int = Query(default=LATENCY_THRESHOLD_MS, description="Latency threshold in ms"), +) -> JSONResponse: + """ + Get dashboard statistics including knowledge bases, messages, and activity. + """ + logger.info(f"Getting dashboard stats: tenant={tenant_id}") + + kb_count_stmt = select(func.count()).select_from(KnowledgeBase).where( + KnowledgeBase.tenant_id == tenant_id + ) + kb_result = await session.execute(kb_count_stmt) + kb_count = kb_result.scalar() or 0 + + msg_count_stmt = select(func.count()).select_from(ChatMessage).where( + ChatMessage.tenant_id == tenant_id + ) + msg_result = await session.execute(msg_count_stmt) + msg_count = msg_result.scalar() or 0 + + doc_count_stmt = select(func.count()).select_from(Document).where( + Document.tenant_id == tenant_id + ) + doc_result = await session.execute(doc_count_stmt) + doc_count = doc_result.scalar() or 0 + + session_count_stmt = select(func.count()).select_from(ChatSession).where( + ChatSession.tenant_id == tenant_id + ) + session_result = await session.execute(session_count_stmt) + session_count = session_result.scalar() or 0 + + total_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.total_tokens), 0)).select_from( + ChatMessage + ).where(ChatMessage.tenant_id == tenant_id) + total_tokens_result = await session.execute(total_tokens_stmt) + total_tokens = total_tokens_result.scalar() or 0 + + prompt_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.prompt_tokens), 0)).select_from( + ChatMessage + ).where(ChatMessage.tenant_id == tenant_id) + prompt_tokens_result = await session.execute(prompt_tokens_stmt) + prompt_tokens = prompt_tokens_result.scalar() or 0 + + completion_tokens_stmt = select(func.coalesce(func.sum(ChatMessage.completion_tokens), 0)).select_from( + ChatMessage + ).where(ChatMessage.tenant_id == tenant_id) + completion_tokens_result = await session.execute(completion_tokens_stmt) + completion_tokens = completion_tokens_result.scalar() or 0 + + ai_requests_stmt = select(func.count()).select_from(ChatMessage).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.role == "assistant" + ) + ai_requests_result = await session.execute(ai_requests_stmt) + ai_requests_count = ai_requests_result.scalar() or 0 + + avg_latency_stmt = select(func.coalesce(func.avg(ChatMessage.latency_ms), 0)).select_from( + ChatMessage + ).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.role == "assistant", + ChatMessage.latency_ms.isnot(None) + ) + avg_latency_result = await session.execute(avg_latency_stmt) + avg_latency_ms = float(avg_latency_result.scalar() or 0) + + last_request_stmt = select(ChatMessage.latency_ms, ChatMessage.created_at).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.role == "assistant" + ).order_by(desc(ChatMessage.created_at)).limit(1) + last_request_result = await session.execute(last_request_stmt) + last_request_row = last_request_result.fetchone() + last_latency_ms = last_request_row[0] if last_request_row else None + last_request_time = last_request_row[1].isoformat() if last_request_row and last_request_row[1] else None + + slow_requests_stmt = select(func.count()).select_from(ChatMessage).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.role == "assistant", + ChatMessage.latency_ms.isnot(None), + ChatMessage.latency_ms >= latency_threshold + ) + slow_requests_result = await session.execute(slow_requests_stmt) + slow_requests_count = slow_requests_result.scalar() or 0 + + error_requests_stmt = select(func.count()).select_from(ChatMessage).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.role == "assistant", + ChatMessage.is_error == True + ) + error_requests_result = await session.execute(error_requests_stmt) + error_requests_count = error_requests_result.scalar() or 0 + + p95_latency_stmt = select(func.coalesce( + func.percentile_cont(0.95).within_group(ChatMessage.latency_ms), 0 + )).select_from(ChatMessage).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.role == "assistant", + ChatMessage.latency_ms.isnot(None) + ) + p95_latency_result = await session.execute(p95_latency_stmt) + p95_latency_ms = float(p95_latency_result.scalar() or 0) + + p99_latency_stmt = select(func.coalesce( + func.percentile_cont(0.99).within_group(ChatMessage.latency_ms), 0 + )).select_from(ChatMessage).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.role == "assistant", + ChatMessage.latency_ms.isnot(None) + ) + p99_latency_result = await session.execute(p99_latency_stmt) + p99_latency_ms = float(p99_latency_result.scalar() or 0) + + min_latency_stmt = select(func.coalesce(func.min(ChatMessage.latency_ms), 0)).select_from( + ChatMessage + ).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.role == "assistant", + ChatMessage.latency_ms.isnot(None) + ) + min_latency_result = await session.execute(min_latency_stmt) + min_latency_ms = float(min_latency_result.scalar() or 0) + + max_latency_stmt = select(func.coalesce(func.max(ChatMessage.latency_ms), 0)).select_from( + ChatMessage + ).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.role == "assistant", + ChatMessage.latency_ms.isnot(None) + ) + max_latency_result = await session.execute(max_latency_stmt) + max_latency_ms = float(max_latency_result.scalar() or 0) + + return JSONResponse( + content={ + "knowledgeBases": kb_count, + "totalMessages": msg_count, + "totalDocuments": doc_count, + "totalSessions": session_count, + "totalTokens": total_tokens, + "promptTokens": prompt_tokens, + "completionTokens": completion_tokens, + "aiRequestsCount": ai_requests_count, + "avgLatencyMs": round(avg_latency_ms, 2), + "lastLatencyMs": last_latency_ms, + "lastRequestTime": last_request_time, + "slowRequestsCount": slow_requests_count, + "errorRequestsCount": error_requests_count, + "p95LatencyMs": round(p95_latency_ms, 2), + "p99LatencyMs": round(p99_latency_ms, 2), + "minLatencyMs": round(min_latency_ms, 2), + "maxLatencyMs": round(max_latency_ms, 2), + "latencyThresholdMs": latency_threshold, + } + ) diff --git a/ai-service/app/api/admin/embedding.py b/ai-service/app/api/admin/embedding.py new file mode 100644 index 0000000..206b27d --- /dev/null +++ b/ai-service/app/api/admin/embedding.py @@ -0,0 +1,132 @@ +""" +Embedding management API endpoints. +[AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41] Embedding model management. +""" + +import logging +from typing import Any + +from fastapi import APIRouter, Depends, Header, HTTPException + +from app.core.exceptions import InvalidRequestException +from app.services.embedding import ( + EmbeddingException, + EmbeddingProviderFactory, + get_embedding_config_manager, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/embedding", tags=["Embedding Management"]) + + +def get_tenant_id(x_tenant_id: str = Header(..., alias="X-Tenant-Id")) -> str: + """Extract tenant ID from header.""" + if not x_tenant_id: + raise HTTPException(status_code=400, detail="X-Tenant-Id header is required") + return x_tenant_id + + +@router.get("/providers") +async def list_embedding_providers( + tenant_id: str = Depends(get_tenant_id), +) -> dict[str, Any]: + """ + Get available embedding providers. + [AC-AISVC-38] Returns all registered providers with config schemas. + """ + providers = [] + for name in EmbeddingProviderFactory.get_available_providers(): + info = EmbeddingProviderFactory.get_provider_info(name) + providers.append(info) + + return {"providers": providers} + + +@router.get("/config") +async def get_embedding_config( + tenant_id: str = Depends(get_tenant_id), +) -> dict[str, Any]: + """ + Get current embedding configuration. + [AC-AISVC-39] Returns current provider and config. + """ + manager = get_embedding_config_manager() + return manager.get_full_config() + + +@router.put("/config") +async def update_embedding_config( + request: dict[str, Any], + tenant_id: str = Depends(get_tenant_id), +) -> dict[str, Any]: + """ + Update embedding configuration. + [AC-AISVC-40, AC-AISVC-31] Updates config with hot-reload support. + """ + provider = request.get("provider") + config = request.get("config", {}) + + if not provider: + raise InvalidRequestException("provider is required") + + if provider not in EmbeddingProviderFactory.get_available_providers(): + raise InvalidRequestException( + f"Unknown provider: {provider}. " + f"Available: {EmbeddingProviderFactory.get_available_providers()}" + ) + + manager = get_embedding_config_manager() + + try: + await manager.update_config(provider, config) + return { + "success": True, + "message": f"Configuration updated to use {provider}", + } + except EmbeddingException as e: + raise InvalidRequestException(str(e)) + + +@router.post("/test") +async def test_embedding( + request: dict[str, Any] | None = None, + tenant_id: str = Depends(get_tenant_id), +) -> dict[str, Any]: + """ + Test embedding connection. + [AC-AISVC-41] Tests provider connectivity and returns dimension info. + """ + request = request or {} + test_text = request.get("test_text", "这是一个测试文本") + config = request.get("config") + provider = request.get("provider") + + manager = get_embedding_config_manager() + + result = await manager.test_connection( + test_text=test_text, + provider=provider, + config=config, + ) + + return result + + +@router.get("/formats") +async def get_supported_document_formats( + tenant_id: str = Depends(get_tenant_id), +) -> dict[str, Any]: + """ + Get supported document formats for embedding. + Returns list of supported file extensions. + """ + from app.services.document import get_supported_document_formats, DocumentParserFactory + + formats = get_supported_document_formats() + parser_info = DocumentParserFactory.get_parser_info() + + return { + "formats": formats, + "parsers": parser_info, + } diff --git a/ai-service/app/api/admin/kb.py b/ai-service/app/api/admin/kb.py new file mode 100644 index 0000000..e150d72 --- /dev/null +++ b/ai-service/app/api/admin/kb.py @@ -0,0 +1,593 @@ +""" +Knowledge Base management endpoints. +[AC-ASA-01, AC-ASA-02, AC-ASA-08] Document upload, list, and index job status. +""" + +import logging +import os +import uuid +from dataclasses import dataclass +from typing import Annotated, Optional + +import tiktoken +from fastapi import APIRouter, BackgroundTasks, Depends, Query, UploadFile, File, Form +from fastapi.responses import JSONResponse +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_session +from app.core.exceptions import MissingTenantIdException +from app.core.tenant import get_tenant_id +from app.models import ErrorResponse +from app.models.entities import DocumentStatus, IndexJob, IndexJobStatus +from app.services.kb import KBService + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin/kb", tags=["KB Management"]) + + +@dataclass +class TextChunk: + """Text chunk with metadata.""" + text: str + start_token: int + end_token: int + page: int | None = None + source: str | None = None + + +def chunk_text_by_lines( + text: str, + min_line_length: int = 10, + source: str | None = None, +) -> list[TextChunk]: + """ + 按行分块,每行作为一个独立的检索单元。 + + Args: + text: 要分块的文本 + min_line_length: 最小行长度,低于此长度的行会被跳过 + source: 来源文件路径(可选) + + Returns: + 分块列表,每个块对应一行文本 + """ + lines = text.split('\n') + chunks: list[TextChunk] = [] + + for i, line in enumerate(lines): + line = line.strip() + + if len(line) < min_line_length: + continue + + chunks.append(TextChunk( + text=line, + start_token=i, + end_token=i + 1, + page=None, + source=source, + )) + + return chunks + + +def chunk_text_with_tiktoken( + text: str, + chunk_size: int = 512, + overlap: int = 100, + page: int | None = None, + source: str | None = None, +) -> list[TextChunk]: + """ + 使用 tiktoken 按 token 数分块,支持重叠分块。 + + Args: + text: 要分块的文本 + chunk_size: 每个块的最大 token 数 + overlap: 块之间的重叠 token 数 + page: 页码(可选) + source: 来源文件路径(可选) + + Returns: + 分块列表,每个块包含文本及起始/结束位置 + """ + encoding = tiktoken.get_encoding("cl100k_base") + tokens = encoding.encode(text) + chunks: list[TextChunk] = [] + start = 0 + + while start < len(tokens): + end = min(start + chunk_size, len(tokens)) + chunk_tokens = tokens[start:end] + chunk_text = encoding.decode(chunk_tokens) + chunks.append(TextChunk( + text=chunk_text, + start_token=start, + end_token=end, + page=page, + source=source, + )) + if end == len(tokens): + break + start += chunk_size - overlap + + return chunks + + +def get_current_tenant_id() -> str: + """Dependency to get current tenant ID or raise exception.""" + tenant_id = get_tenant_id() + if not tenant_id: + raise MissingTenantIdException() + return tenant_id + + +@router.get( + "/knowledge-bases", + operation_id="listKnowledgeBases", + summary="Query knowledge base list", + description="Get list of knowledge bases for the current tenant.", + responses={ + 200: {"description": "Knowledge base list"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def list_knowledge_bases( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], +) -> JSONResponse: + """ + List all knowledge bases for the current tenant. + """ + logger.info(f"Listing knowledge bases: tenant={tenant_id}") + + kb_service = KBService(session) + knowledge_bases = await kb_service.list_knowledge_bases(tenant_id) + + kb_ids = [str(kb.id) for kb in knowledge_bases] + + doc_counts = {} + if kb_ids: + from sqlalchemy import func + from app.models.entities import Document + count_stmt = ( + select(Document.kb_id, func.count(Document.id).label("count")) + .where(Document.tenant_id == tenant_id, Document.kb_id.in_(kb_ids)) + .group_by(Document.kb_id) + ) + count_result = await session.execute(count_stmt) + for row in count_result: + doc_counts[row.kb_id] = row.count + + data = [] + for kb in knowledge_bases: + kb_id_str = str(kb.id) + data.append({ + "id": kb_id_str, + "name": kb.name, + "documentCount": doc_counts.get(kb_id_str, 0), + "createdAt": kb.created_at.isoformat() + "Z", + }) + + return JSONResponse(content={"data": data}) + + +@router.get( + "/documents", + operation_id="listDocuments", + summary="Query document list", + description="[AC-ASA-08] Get list of documents with pagination and filtering.", + responses={ + 200: {"description": "Document list with pagination"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def list_documents( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + kb_id: Annotated[Optional[str], Query()] = None, + status: Annotated[Optional[str], Query()] = None, + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), +) -> JSONResponse: + """ + [AC-ASA-08] List documents with filtering and pagination. + """ + logger.info( + f"[AC-ASA-08] Listing documents: tenant={tenant_id}, kb_id={kb_id}, " + f"status={status}, page={page}, page_size={page_size}" + ) + + kb_service = KBService(session) + documents, total = await kb_service.list_documents( + tenant_id=tenant_id, + kb_id=kb_id, + status=status, + page=page, + page_size=page_size, + ) + + total_pages = (total + page_size - 1) // page_size if total > 0 else 0 + + data = [] + for doc in documents: + job_stmt = select(IndexJob).where( + IndexJob.tenant_id == tenant_id, + IndexJob.doc_id == doc.id, + ).order_by(IndexJob.created_at.desc()) + job_result = await session.execute(job_stmt) + latest_job = job_result.scalar_one_or_none() + + data.append({ + "docId": str(doc.id), + "kbId": doc.kb_id, + "fileName": doc.file_name, + "status": doc.status, + "jobId": str(latest_job.id) if latest_job else None, + "createdAt": doc.created_at.isoformat() + "Z", + "updatedAt": doc.updated_at.isoformat() + "Z", + }) + + return JSONResponse( + content={ + "data": data, + "pagination": { + "page": page, + "pageSize": page_size, + "total": total, + "totalPages": total_pages, + }, + } + ) + + +@router.post( + "/documents", + operation_id="uploadDocument", + summary="Upload/import document", + description="[AC-ASA-01] Upload document and trigger indexing job.", + responses={ + 202: {"description": "Accepted - async indexing job started"}, + 400: {"description": "Bad Request - unsupported format"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def upload_document( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + background_tasks: BackgroundTasks, + file: UploadFile = File(...), + kb_id: str = Form(...), +) -> JSONResponse: + """ + [AC-ASA-01] Upload document and create indexing job. + [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-37] Support multiple document formats. + """ + from app.services.document import get_supported_document_formats, UnsupportedFormatError + from pathlib import Path + + logger.info( + f"[AC-ASA-01] Uploading document: tenant={tenant_id}, " + f"kb_id={kb_id}, filename={file.filename}" + ) + + file_ext = Path(file.filename or "").suffix.lower() + supported_formats = get_supported_document_formats() + + if file_ext and file_ext not in supported_formats: + return JSONResponse( + status_code=400, + content={ + "code": "UNSUPPORTED_FORMAT", + "message": f"Unsupported file format: {file_ext}", + "details": { + "supported_formats": supported_formats, + }, + }, + ) + + kb_service = KBService(session) + + kb = await kb_service.get_or_create_kb(tenant_id, kb_id) + + file_content = await file.read() + document, job = await kb_service.upload_document( + tenant_id=tenant_id, + kb_id=str(kb.id), + file_name=file.filename or "unknown", + file_content=file_content, + file_type=file.content_type, + ) + + await session.commit() + + background_tasks.add_task( + _index_document, tenant_id, str(job.id), str(document.id), file_content, file.filename + ) + + return JSONResponse( + status_code=202, + content={ + "jobId": str(job.id), + "docId": str(document.id), + "status": job.status, + }, + ) + + +async def _index_document(tenant_id: str, job_id: str, doc_id: str, content: bytes, filename: str | None = None): + """ + Background indexing task. + [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35] Uses document parsing and pluggable embedding. + """ + from app.core.database import async_session_maker + from app.services.kb import KBService + from app.core.qdrant_client import get_qdrant_client + from app.services.embedding import get_embedding_provider + from app.services.document import parse_document, UnsupportedFormatError, DocumentParseException, PageText + from qdrant_client.models import PointStruct + import asyncio + import tempfile + from pathlib import Path + + logger.info(f"[INDEX] Starting indexing: tenant={tenant_id}, job_id={job_id}, doc_id={doc_id}, filename={filename}") + await asyncio.sleep(1) + + async with async_session_maker() as session: + kb_service = KBService(session) + try: + await kb_service.update_job_status( + tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=10 + ) + await session.commit() + + parse_result = None + text = None + file_ext = Path(filename or "").suffix.lower() + logger.info(f"[INDEX] File extension: {file_ext}, content size: {len(content)} bytes") + + text_extensions = {".txt", ".md", ".markdown", ".rst", ".log", ".json", ".xml", ".yaml", ".yml"} + + if file_ext in text_extensions or not file_ext: + logger.info(f"[INDEX] Treating as text file, trying multiple encodings") + text = None + for encoding in ["utf-8", "gbk", "gb2312", "gb18030", "big5", "utf-16", "latin-1"]: + try: + text = content.decode(encoding) + logger.info(f"[INDEX] Successfully decoded with encoding: {encoding}") + break + except (UnicodeDecodeError, LookupError): + continue + + if text is None: + text = content.decode("utf-8", errors="replace") + logger.warning(f"[INDEX] Failed to decode with known encodings, using utf-8 with replacement") + else: + logger.info(f"[INDEX] Binary file detected, will parse with document parser") + await kb_service.update_job_status( + tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=15 + ) + await session.commit() + + with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp_file: + tmp_file.write(content) + tmp_path = tmp_file.name + + logger.info(f"[INDEX] Temp file created: {tmp_path}") + + try: + logger.info(f"[INDEX] Starting document parsing for {file_ext}...") + parse_result = parse_document(tmp_path) + text = parse_result.text + logger.info( + f"[INDEX] Parsed document SUCCESS: {filename}, " + f"chars={len(text)}, format={parse_result.metadata.get('format')}, " + f"pages={len(parse_result.pages) if parse_result.pages else 'N/A'}, " + f"metadata={parse_result.metadata}" + ) + if len(text) < 100: + logger.warning(f"[INDEX] Parsed text is very short, preview: {text[:200]}") + except UnsupportedFormatError as e: + logger.error(f"[INDEX] UnsupportedFormatError: {e}") + text = content.decode("utf-8", errors="ignore") + except DocumentParseException as e: + logger.error(f"[INDEX] DocumentParseException: {e}, details={getattr(e, 'details', {})}") + text = content.decode("utf-8", errors="ignore") + except Exception as e: + logger.error(f"[INDEX] Unexpected parsing error: {type(e).__name__}: {e}") + text = content.decode("utf-8", errors="ignore") + finally: + Path(tmp_path).unlink(missing_ok=True) + logger.info(f"[INDEX] Temp file cleaned up") + + logger.info(f"[INDEX] Final text length: {len(text)} chars") + if len(text) < 50: + logger.warning(f"[INDEX] Text too short, preview: {repr(text[:200])}") + + await kb_service.update_job_status( + tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=20 + ) + await session.commit() + + logger.info(f"[INDEX] Getting embedding provider...") + embedding_provider = await get_embedding_provider() + logger.info(f"[INDEX] Embedding provider: {type(embedding_provider).__name__}") + + all_chunks: list[TextChunk] = [] + + if parse_result and parse_result.pages: + logger.info(f"[INDEX] PDF with {len(parse_result.pages)} pages, using line-based chunking with page metadata") + for page in parse_result.pages: + page_chunks = chunk_text_by_lines( + page.text, + min_line_length=10, + source=filename, + ) + for pc in page_chunks: + pc.page = page.page + all_chunks.extend(page_chunks) + logger.info(f"[INDEX] Total chunks from PDF: {len(all_chunks)}") + else: + logger.info(f"[INDEX] Using line-based chunking") + all_chunks = chunk_text_by_lines( + text, + min_line_length=10, + source=filename, + ) + logger.info(f"[INDEX] Total chunks: {len(all_chunks)}") + + qdrant = await get_qdrant_client() + await qdrant.ensure_collection_exists(tenant_id) + + points = [] + total_chunks = len(all_chunks) + for i, chunk in enumerate(all_chunks): + embedding = await embedding_provider.embed(chunk.text) + + payload = { + "text": chunk.text, + "source": doc_id, + "chunk_index": i, + "start_token": chunk.start_token, + "end_token": chunk.end_token, + } + if chunk.page is not None: + payload["page"] = chunk.page + if chunk.source: + payload["filename"] = chunk.source + + points.append( + PointStruct( + id=str(uuid.uuid4()), + vector=embedding, + payload=payload, + ) + ) + + progress = 20 + int((i + 1) / total_chunks * 70) + if i % 10 == 0 or i == total_chunks - 1: + await kb_service.update_job_status( + tenant_id, job_id, IndexJobStatus.PROCESSING.value, progress=progress + ) + await session.commit() + + if points: + logger.info(f"[INDEX] Upserting {len(points)} vectors to Qdrant...") + await qdrant.upsert_vectors(tenant_id, points) + + await kb_service.update_job_status( + tenant_id, job_id, IndexJobStatus.COMPLETED.value, progress=100 + ) + await session.commit() + + logger.info( + f"[INDEX] COMPLETED: tenant={tenant_id}, " + f"job_id={job_id}, chunks={len(all_chunks)}, text_len={len(text)}" + ) + + except Exception as e: + import traceback + logger.error(f"[INDEX] FAILED: {e}\n{traceback.format_exc()}") + await session.rollback() + async with async_session_maker() as error_session: + kb_service = KBService(error_session) + await kb_service.update_job_status( + tenant_id, job_id, IndexJobStatus.FAILED.value, + progress=0, error_msg=str(e) + ) + await error_session.commit() + + +@router.get( + "/index/jobs/{job_id}", + operation_id="getIndexJob", + summary="Query index job status", + description="[AC-ASA-02] Get indexing job status and progress.", + responses={ + 200: {"description": "Job status details"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def get_index_job( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + job_id: str, +) -> JSONResponse: + """ + [AC-ASA-02] Get indexing job status with progress. + """ + logger.info( + f"[AC-ASA-02] Getting job status: tenant={tenant_id}, job_id={job_id}" + ) + + kb_service = KBService(session) + job = await kb_service.get_index_job(tenant_id, job_id) + + if not job: + return JSONResponse( + status_code=404, + content={ + "code": "JOB_NOT_FOUND", + "message": f"Job {job_id} not found", + }, + ) + + return JSONResponse( + content={ + "jobId": str(job.id), + "docId": str(job.doc_id), + "status": job.status, + "progress": job.progress, + "errorMsg": job.error_msg, + } + ) + + +@router.delete( + "/documents/{doc_id}", + operation_id="deleteDocument", + summary="Delete document", + description="[AC-ASA-08] Delete a document and its associated files.", + responses={ + 200: {"description": "Document deleted"}, + 404: {"description": "Document not found"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def delete_document( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + doc_id: str, +) -> JSONResponse: + """ + [AC-ASA-08] Delete a document. + """ + logger.info( + f"[AC-ASA-08] Deleting document: tenant={tenant_id}, doc_id={doc_id}" + ) + + kb_service = KBService(session) + deleted = await kb_service.delete_document(tenant_id, doc_id) + + if not deleted: + return JSONResponse( + status_code=404, + content={ + "code": "DOCUMENT_NOT_FOUND", + "message": f"Document {doc_id} not found", + }, + ) + + return JSONResponse( + content={ + "success": True, + "message": "Document deleted", + } + ) diff --git a/ai-service/app/api/admin/kb_optimized.py b/ai-service/app/api/admin/kb_optimized.py new file mode 100644 index 0000000..9bdfe2f --- /dev/null +++ b/ai-service/app/api/admin/kb_optimized.py @@ -0,0 +1,330 @@ +""" +Knowledge base management API with RAG optimization features. +Reference: rag-optimization/spec.md Section 4.2 +""" + +import logging +from datetime import date +from typing import Any + +from fastapi import APIRouter, Depends, HTTPException, status +from pydantic import BaseModel, Field +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_session +from app.services.retrieval import ( + ChunkMetadata, + ChunkMetadataModel, + IndexingProgress, + IndexingResult, + KnowledgeIndexer, + MetadataFilter, + RetrievalStrategy, + get_knowledge_indexer, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/api/kb", tags=["Knowledge Base"]) + + +class IndexDocumentRequest(BaseModel): + """Request to index a document.""" + tenant_id: str = Field(..., description="Tenant ID") + document_id: str = Field(..., description="Document ID") + text: str = Field(..., description="Document text content") + metadata: ChunkMetadataModel | None = Field(default=None, description="Document metadata") + + +class IndexDocumentResponse(BaseModel): + """Response from document indexing.""" + success: bool + total_chunks: int + indexed_chunks: int + failed_chunks: int + elapsed_seconds: float + error_message: str | None = None + + +class IndexingProgressResponse(BaseModel): + """Response with current indexing progress.""" + total_chunks: int + processed_chunks: int + failed_chunks: int + progress_percent: int + elapsed_seconds: float + current_document: str + + +class MetadataFilterRequest(BaseModel): + """Request for metadata filtering.""" + categories: list[str] | None = None + target_audiences: list[str] | None = None + departments: list[str] | None = None + valid_only: bool = True + min_priority: int | None = None + keywords: list[str] | None = None + + +class RetrieveRequest(BaseModel): + """Request for knowledge retrieval.""" + tenant_id: str = Field(..., description="Tenant ID") + query: str = Field(..., description="Search query") + top_k: int = Field(default=10, ge=1, le=50, description="Number of results") + filters: MetadataFilterRequest | None = Field(default=None, description="Metadata filters") + strategy: RetrievalStrategy = Field(default=RetrievalStrategy.HYBRID, description="Retrieval strategy") + + +class RetrieveResponse(BaseModel): + """Response from knowledge retrieval.""" + hits: list[dict[str, Any]] + total_hits: int + max_score: float + is_insufficient: bool + diagnostics: dict[str, Any] + + +class MetadataOptionsResponse(BaseModel): + """Response with available metadata options.""" + categories: list[str] + departments: list[str] + target_audiences: list[str] + priorities: list[int] + + +@router.post("/index", response_model=IndexDocumentResponse) +async def index_document( + request: IndexDocumentRequest, + session: AsyncSession = Depends(get_session), +): + """ + Index a document with optimized embedding. + + Features: + - Task prefixes (search_document:) for document embedding + - Multi-dimensional vectors (256/512/768) + - Metadata support + """ + try: + index = get_knowledge_indexer() + + chunk_metadata = None + if request.metadata: + chunk_metadata = ChunkMetadata( + category=request.metadata.category, + subcategory=request.metadata.subcategory, + target_audience=request.metadata.target_audience, + source_doc=request.metadata.source_doc, + source_url=request.metadata.source_url, + department=request.metadata.department, + priority=request.metadata.priority, + keywords=request.metadata.keywords, + ) + + result = await index.index_document( + tenant_id=request.tenant_id, + document_id=request.document_id, + text=request.text, + metadata=chunk_metadata, + ) + + return IndexDocumentResponse( + success=result.success, + total_chunks=result.total_chunks, + indexed_chunks=result.indexed_chunks, + failed_chunks=result.failed_chunks, + elapsed_seconds=result.elapsed_seconds, + error_message=result.error_message, + ) + + except Exception as e: + logger.error(f"[KB-API] Failed to index document: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"索引失败: {str(e)}" + ) + + +@router.get("/index/progress", response_model=IndexingProgressResponse | None) +async def get_indexing_progress(): + """Get current indexing progress.""" + try: + index = get_knowledge_indexer() + progress = index.get_progress() + + if progress is None: + return None + + return IndexingProgressResponse( + total_chunks=progress.total_chunks, + processed_chunks=progress.processed_chunks, + failed_chunks=progress.failed_chunks, + progress_percent=progress.progress_percent, + elapsed_seconds=progress.elapsed_seconds, + current_document=progress.current_document, + ) + + except Exception as e: + logger.error(f"[KB-API] Failed to get progress: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"获取进度失败: {str(e)}" + ) + + +@router.post("/retrieve", response_model=RetrieveResponse) +async def retrieve_knowledge(request: RetrieveRequest): + """ + Retrieve knowledge using optimized RAG. + + Strategies: + - vector: Simple vector search + - bm25: BM25 keyword search + - hybrid: RRF combination of vector + BM25 (default) + - two_stage: Two-stage retrieval with Matryoshka dimensions + """ + try: + from app.services.retrieval.optimized_retriever import get_optimized_retriever + from app.services.retrieval.base import RetrievalContext + + retriever = await get_optimized_retriever() + + metadata_filter = None + if request.filters: + filter_dict = request.filters.model_dump(exclude_none=True) + metadata_filter = MetadataFilter(**filter_dict) + + ctx = RetrievalContext( + tenant_id=request.tenant_id, + query=request.query, + ) + + if metadata_filter: + ctx.metadata = {"filter": metadata_filter.to_qdrant_filter()} + + result = await retriever.retrieve(ctx) + + return RetrieveResponse( + hits=[ + { + "text": hit.text, + "score": hit.score, + "source": hit.source, + "metadata": hit.metadata, + } + for hit in result.hits + ], + total_hits=result.hit_count, + max_score=result.max_score, + is_insufficient=result.diagnostics.get("is_insufficient", False), + diagnostics=result.diagnostics or {}, + ) + + except Exception as e: + logger.error(f"[KB-API] Failed to retrieve: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"检索失败: {str(e)}" + ) + + +@router.get("/metadata/options", response_model=MetadataOptionsResponse) +async def get_metadata_options(): + """ + Get available metadata options for filtering. + These would typically be loaded from a database. + """ + try: + return MetadataOptionsResponse( + categories=[ + "课程咨询", + "考试政策", + "学籍管理", + "奖助学金", + "宿舍管理", + "校园服务", + "就业指导", + "其他", + ], + departments=[ + "教务处", + "学生处", + "财务处", + "后勤处", + "就业指导中心", + "图书馆", + "信息中心", + ], + target_audiences=[ + "本科生", + "研究生", + "留学生", + "新生", + "毕业生", + "教职工", + ], + priorities=list(range(1, 11)), + ) + + except Exception as e: + logger.error(f"[KB-API] Failed to get metadata options: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"获取选项失败: {str(e)}" + ) + + +@router.post("/reindex") +async def reindex_all( + tenant_id: str, + session: AsyncSession = Depends(get_session), +): + """ + Reindex all documents for a tenant with optimized embedding. + This would typically read from the documents table and reindex. + """ + try: + from app.models.entities import Document, DocumentStatus + + stmt = select(Document).where( + Document.tenant_id == tenant_id, + Document.status == DocumentStatus.COMPLETED.value, + ) + result = await session.execute(stmt) + documents = result.scalars().all() + + index = get_knowledge_indexer() + + total_indexed = 0 + total_failed = 0 + + for doc in documents: + if doc.file_path: + import os + if os.path.exists(doc.file_path): + with open(doc.file_path, 'r', encoding='utf-8') as f: + text = f.read() + + result = await index.index_document( + tenant_id=tenant_id, + document_id=str(doc.id), + text=text, + ) + + total_indexed += result.indexed_chunks + total_failed += result.failed_chunks + + return { + "success": True, + "total_documents": len(documents), + "total_indexed": total_indexed, + "total_failed": total_failed, + } + + except Exception as e: + logger.error(f"[KB-API] Failed to reindex: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"重新索引失败: {str(e)}" + ) diff --git a/ai-service/app/api/admin/llm.py b/ai-service/app/api/admin/llm.py new file mode 100644 index 0000000..fa37891 --- /dev/null +++ b/ai-service/app/api/admin/llm.py @@ -0,0 +1,152 @@ +""" +LLM Configuration Management API. +[AC-ASA-14, AC-ASA-15, AC-ASA-16, AC-ASA-17, AC-ASA-18] LLM provider management endpoints. +""" + +import logging +from typing import Any + +from fastapi import APIRouter, Depends, Header, HTTPException + +from app.services.llm.factory import ( + LLMConfigManager, + LLMProviderFactory, + get_llm_config_manager, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin/llm", tags=["LLM Management"]) + + +def get_tenant_id(x_tenant_id: str = Header(..., alias="X-Tenant-Id")) -> str: + """Extract tenant ID from header.""" + if not x_tenant_id: + raise HTTPException(status_code=400, detail="X-Tenant-Id header is required") + return x_tenant_id + + +@router.get("/providers") +async def list_providers( + tenant_id: str = Depends(get_tenant_id), +) -> dict[str, Any]: + """ + List all available LLM providers. + [AC-ASA-15] Returns provider list with configuration schemas. + """ + logger.info(f"[AC-ASA-15] Listing LLM providers for tenant={tenant_id}") + + providers = LLMProviderFactory.get_providers() + return { + "providers": [ + { + "name": p.name, + "display_name": p.display_name, + "description": p.description, + "config_schema": p.config_schema, + } + for p in providers + ], + } + + +@router.get("/config") +async def get_config( + tenant_id: str = Depends(get_tenant_id), +) -> dict[str, Any]: + """ + Get current LLM configuration. + [AC-ASA-14] Returns current provider and config. + """ + logger.info(f"[AC-ASA-14] Getting LLM config for tenant={tenant_id}") + + manager = get_llm_config_manager() + config = manager.get_current_config() + + masked_config = _mask_secrets(config.get("config", {})) + + return { + "provider": config["provider"], + "config": masked_config, + } + + +@router.put("/config") +async def update_config( + body: dict[str, Any], + tenant_id: str = Depends(get_tenant_id), +) -> dict[str, Any]: + """ + Update LLM configuration. + [AC-ASA-16] Updates provider and config with validation. + """ + provider = body.get("provider") + config = body.get("config", {}) + + logger.info(f"[AC-ASA-16] Updating LLM config for tenant={tenant_id}, provider={provider}") + + if not provider: + return { + "success": False, + "message": "Provider is required", + } + + try: + manager = get_llm_config_manager() + await manager.update_config(provider, config) + + return { + "success": True, + "message": f"LLM configuration updated to {provider}", + } + + except ValueError as e: + logger.error(f"[AC-ASA-16] Invalid LLM config: {e}") + return { + "success": False, + "message": str(e), + } + + +@router.post("/test") +async def test_connection( + body: dict[str, Any] | None = None, + tenant_id: str = Depends(get_tenant_id), +) -> dict[str, Any]: + """ + Test LLM connection. + [AC-ASA-17, AC-ASA-18] Tests connection and returns response. + """ + body = body or {} + + test_prompt = body.get("test_prompt", "你好,请简单介绍一下自己。") + provider = body.get("provider") + config = body.get("config") + + logger.info( + f"[AC-ASA-17] Testing LLM connection for tenant={tenant_id}, " + f"provider={provider or 'current'}" + ) + + manager = get_llm_config_manager() + result = await manager.test_connection( + test_prompt=test_prompt, + provider=provider, + config=config, + ) + + return result + + +def _mask_secrets(config: dict[str, Any]) -> dict[str, Any]: + """Mask secret fields in config for display.""" + masked = {} + for key, value in config.items(): + if key in ("api_key", "password", "secret"): + if value: + masked[key] = f"{str(value)[:4]}***" + else: + masked[key] = "" + else: + masked[key] = value + return masked diff --git a/ai-service/app/api/admin/rag.py b/ai-service/app/api/admin/rag.py new file mode 100644 index 0000000..e5b46f1 --- /dev/null +++ b/ai-service/app/api/admin/rag.py @@ -0,0 +1,330 @@ +""" +RAG Lab endpoints for debugging and experimentation. +[AC-ASA-05, AC-ASA-19, AC-ASA-20, AC-ASA-21, AC-ASA-22] RAG experiment with AI output. +""" + +import json +import logging +import time +from typing import Annotated, Any, List + +from fastapi import APIRouter, Depends, Body +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import BaseModel, Field + +from app.core.config import get_settings +from app.core.exceptions import MissingTenantIdException +from app.core.prompts import format_evidence_for_prompt, build_user_prompt_with_evidence +from app.core.tenant import get_tenant_id +from app.models import ErrorResponse +from app.services.retrieval.vector_retriever import get_vector_retriever +from app.services.retrieval.optimized_retriever import get_optimized_retriever +from app.services.retrieval.base import RetrievalContext +from app.services.llm.factory import get_llm_config_manager + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin/rag", tags=["RAG Lab"]) + + +def get_current_tenant_id() -> str: + """Dependency to get current tenant ID or raise exception.""" + tenant_id = get_tenant_id() + if not tenant_id: + raise MissingTenantIdException() + return tenant_id + + +class RAGExperimentRequest(BaseModel): + query: str = Field(..., description="Query text for retrieval") + kb_ids: List[str] | None = Field(default=None, description="Knowledge base IDs to search") + top_k: int = Field(default=5, description="Number of results to retrieve") + score_threshold: float = Field(default=0.5, description="Minimum similarity score") + generate_response: bool = Field(default=True, description="Whether to generate AI response") + llm_provider: str | None = Field(default=None, description="Specific LLM provider to use") + + +class AIResponse(BaseModel): + content: str + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + latency_ms: float = 0 + model: str = "" + + +class RAGExperimentResult(BaseModel): + query: str + retrieval_results: List[dict] = [] + final_prompt: str = "" + ai_response: AIResponse | None = None + total_latency_ms: float = 0 + diagnostics: dict[str, Any] = {} + + +@router.post( + "/experiments/run", + operation_id="runRagExperiment", + summary="Run RAG debugging experiment with AI output", + description="[AC-ASA-05, AC-ASA-19, AC-ASA-21, AC-ASA-22] Trigger RAG experiment with retrieval, prompt generation, and AI response.", + responses={ + 200: {"description": "Experiment results with retrieval, prompt, and AI response"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def run_rag_experiment( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + request: RAGExperimentRequest = Body(...), +) -> JSONResponse: + """ + [AC-ASA-05, AC-ASA-19, AC-ASA-21, AC-ASA-22] Run RAG experiment and return retrieval results with AI response. + """ + start_time = time.time() + + logger.info( + f"[AC-ASA-05] Running RAG experiment: tenant={tenant_id}, " + f"query={request.query[:50]}..., kb_ids={request.kb_ids}, " + f"generate_response={request.generate_response}" + ) + + settings = get_settings() + top_k = request.top_k or settings.rag_top_k + threshold = request.score_threshold or settings.rag_score_threshold + + try: + # Use optimized retriever with RAG enhancements + retriever = await get_optimized_retriever() + + retrieval_ctx = RetrievalContext( + tenant_id=tenant_id, + query=request.query, + session_id="rag_experiment", + channel_type="admin", + metadata={"kb_ids": request.kb_ids}, + ) + + result = await retriever.retrieve(retrieval_ctx) + + retrieval_results = [ + { + "content": hit.text, + "score": hit.score, + "source": hit.source, + "metadata": hit.metadata, + } + for hit in result.hits + ] + + final_prompt = _build_final_prompt(request.query, retrieval_results) + + logger.info( + f"[AC-ASA-05] RAG retrieval complete: hits={len(retrieval_results)}, " + f"max_score={result.max_score:.3f}" + ) + + ai_response = None + if request.generate_response: + ai_response = await _generate_ai_response( + final_prompt, + provider=request.llm_provider, + ) + + total_latency_ms = (time.time() - start_time) * 1000 + + return JSONResponse( + content={ + "query": request.query, + "retrieval_results": retrieval_results, + "final_prompt": final_prompt, + "ai_response": ai_response.model_dump() if ai_response else None, + "total_latency_ms": round(total_latency_ms, 2), + "diagnostics": result.diagnostics, + } + ) + + except Exception as e: + logger.error(f"[AC-ASA-05] RAG experiment failed: {e}") + + fallback_results = _get_fallback_results(request.query) + fallback_prompt = _build_final_prompt(request.query, fallback_results) + + ai_response = None + if request.generate_response: + ai_response = await _generate_ai_response( + fallback_prompt, + provider=request.llm_provider, + ) + + total_latency_ms = (time.time() - start_time) * 1000 + + return JSONResponse( + content={ + "query": request.query, + "retrieval_results": fallback_results, + "final_prompt": fallback_prompt, + "ai_response": ai_response.model_dump() if ai_response else None, + "total_latency_ms": round(total_latency_ms, 2), + "diagnostics": { + "error": str(e), + "fallback": True, + }, + } + ) + + +@router.post( + "/experiments/stream", + operation_id="runRagExperimentStream", + summary="Run RAG experiment with streaming AI output", + description="[AC-ASA-20] Trigger RAG experiment with SSE streaming for AI response.", + responses={ + 200: {"description": "SSE stream with retrieval results and AI response"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def run_rag_experiment_stream( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + request: RAGExperimentRequest = Body(...), +) -> StreamingResponse: + """ + [AC-ASA-20] Run RAG experiment with SSE streaming for AI response. + """ + logger.info( + f"[AC-ASA-20] Running RAG experiment stream: tenant={tenant_id}, " + f"query={request.query[:50]}..." + ) + + settings = get_settings() + top_k = request.top_k or settings.rag_top_k + threshold = request.score_threshold or settings.rag_score_threshold + + async def event_generator(): + try: + # Use optimized retriever with RAG enhancements + retriever = await get_optimized_retriever() + + retrieval_ctx = RetrievalContext( + tenant_id=tenant_id, + query=request.query, + session_id="rag_experiment_stream", + channel_type="admin", + metadata={"kb_ids": request.kb_ids}, + ) + + result = await retriever.retrieve(retrieval_ctx) + + retrieval_results = [ + { + "content": hit.text, + "score": hit.score, + "source": hit.source, + "metadata": hit.metadata, + } + for hit in result.hits + ] + + final_prompt = _build_final_prompt(request.query, retrieval_results) + + logger.info(f"[AC-ASA-20] ========== RAG LAB STREAM FULL PROMPT ==========") + logger.info(f"[AC-ASA-20] Prompt length: {len(final_prompt)}") + logger.info(f"[AC-ASA-20] Prompt content:\n{final_prompt}") + logger.info(f"[AC-ASA-20] ==============================================") + + yield f"event: retrieval\ndata: {json.dumps({'results': retrieval_results, 'count': len(retrieval_results)})}\n\n" + + yield f"event: prompt\ndata: {json.dumps({'prompt': final_prompt})}\n\n" + + if request.generate_response: + manager = get_llm_config_manager() + client = manager.get_client() + + full_content = "" + async for chunk in client.stream_generate( + messages=[{"role": "user", "content": final_prompt}], + ): + if chunk.delta: + full_content += chunk.delta + yield f"event: message\ndata: {json.dumps({'delta': chunk.delta})}\n\n" + + yield f"event: final\ndata: {json.dumps({'content': full_content, 'finish_reason': 'stop'})}\n\n" + else: + yield f"event: final\ndata: {json.dumps({'content': '', 'finish_reason': 'skipped'})}\n\n" + + except Exception as e: + logger.error(f"[AC-ASA-20] RAG experiment stream failed: {e}") + yield f"event: error\ndata: {json.dumps({'error': str(e)})}\n\n" + + return StreamingResponse( + event_generator(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + + +async def _generate_ai_response( + prompt: str, + provider: str | None = None, +) -> AIResponse | None: + """ + [AC-ASA-19, AC-ASA-21] Generate AI response from prompt. + """ + import time + + logger.info(f"[AC-ASA-19] ========== RAG LAB FULL PROMPT ==========") + logger.info(f"[AC-ASA-19] Prompt length: {len(prompt)}") + logger.info(f"[AC-ASA-19] Prompt content:\n{prompt}") + logger.info(f"[AC-ASA-19] ==========================================") + + try: + manager = get_llm_config_manager() + client = manager.get_client() + + start_time = time.time() + response = await client.generate( + messages=[{"role": "user", "content": prompt}], + ) + latency_ms = (time.time() - start_time) * 1000 + + return AIResponse( + content=response.content, + prompt_tokens=response.usage.get("prompt_tokens", 0), + completion_tokens=response.usage.get("completion_tokens", 0), + total_tokens=response.usage.get("total_tokens", 0), + latency_ms=round(latency_ms, 2), + model=response.model, + ) + + except Exception as e: + logger.error(f"[AC-ASA-19] AI response generation failed: {e}") + return AIResponse( + content=f"AI 响应生成失败: {str(e)}", + latency_ms=0, + ) + + +def _build_final_prompt(query: str, retrieval_results: list[dict]) -> str: + """ + Build the final prompt from query and retrieval results. + Uses shared prompt configuration for consistency with orchestrator. + """ + evidence_text = format_evidence_for_prompt(retrieval_results, max_results=5, max_content_length=500) + return build_user_prompt_with_evidence(query, evidence_text) + + +def _get_fallback_results(query: str) -> list[dict]: + """ + Provide fallback results when retrieval fails. + """ + return [ + { + "content": "检索服务暂时不可用,这是模拟结果。", + "score": 0.5, + "source": "fallback", + } + ] diff --git a/ai-service/app/api/admin/sessions.py b/ai-service/app/api/admin/sessions.py new file mode 100644 index 0000000..e794fcb --- /dev/null +++ b/ai-service/app/api/admin/sessions.py @@ -0,0 +1,293 @@ +""" +Session monitoring and management endpoints. +[AC-ASA-07, AC-ASA-09] Session list and detail monitoring. +""" + +import logging +from typing import Annotated, Optional, Sequence +from datetime import datetime + +from fastapi import APIRouter, Depends, Query +from fastapi.responses import JSONResponse +from sqlalchemy import select, func +from sqlalchemy.ext.asyncio import AsyncSession +from sqlmodel import col + +from app.core.database import get_session +from app.core.exceptions import MissingTenantIdException +from app.core.tenant import get_tenant_id +from app.models import ErrorResponse +from app.models.entities import ChatSession, ChatMessage, SessionStatus + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin/sessions", tags=["Session Monitoring"]) + + +def get_current_tenant_id() -> str: + """Dependency to get current tenant ID or raise exception.""" + tenant_id = get_tenant_id() + if not tenant_id: + raise MissingTenantIdException() + return tenant_id + + +@router.get( + "", + operation_id="listSessions", + summary="Query session list", + description="[AC-ASA-09] Get list of sessions with pagination and filtering.", + responses={ + 200: {"description": "Session list with pagination"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def list_sessions( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + status: Annotated[Optional[str], Query()] = None, + start_time: Annotated[Optional[str], Query(alias="startTime")] = None, + end_time: Annotated[Optional[str], Query(alias="endTime")] = None, + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), +) -> JSONResponse: + """ + [AC-ASA-09] List sessions with filtering and pagination. + """ + logger.info( + f"[AC-ASA-09] Listing sessions: tenant={tenant_id}, status={status}, " + f"start_time={start_time}, end_time={end_time}, page={page}, page_size={page_size}" + ) + + stmt = select(ChatSession).where(ChatSession.tenant_id == tenant_id) + + if status: + stmt = stmt.where(ChatSession.metadata_["status"].as_string() == status) + + if start_time: + try: + start_dt = datetime.fromisoformat(start_time.replace("Z", "+00:00")) + stmt = stmt.where(ChatSession.created_at >= start_dt) + except ValueError: + pass + + if end_time: + try: + end_dt = datetime.fromisoformat(end_time.replace("Z", "+00:00")) + stmt = stmt.where(ChatSession.created_at <= end_dt) + except ValueError: + pass + + count_stmt = select(func.count()).select_from(stmt.subquery()) + total_result = await session.execute(count_stmt) + total = total_result.scalar() or 0 + + stmt = stmt.order_by(col(ChatSession.created_at).desc()) + stmt = stmt.offset((page - 1) * page_size).limit(page_size) + + result = await session.execute(stmt) + sessions = result.scalars().all() + + session_ids = [s.session_id for s in sessions] + + if session_ids: + msg_count_stmt = ( + select( + ChatMessage.session_id, + func.count(ChatMessage.id).label("count") + ) + .where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.session_id.in_(session_ids) + ) + .group_by(ChatMessage.session_id) + ) + msg_count_result = await session.execute(msg_count_stmt) + msg_counts = {row.session_id: row.count for row in msg_count_result} + else: + msg_counts = {} + + data = [] + for s in sessions: + session_status = SessionStatus.ACTIVE.value + if s.metadata_ and "status" in s.metadata_: + session_status = s.metadata_["status"] + + end_time_val = None + if s.metadata_ and "endTime" in s.metadata_: + end_time_val = s.metadata_["endTime"] + + data.append({ + "sessionId": s.session_id, + "tenantId": tenant_id, + "status": session_status, + "startTime": s.created_at.isoformat() + "Z", + "endTime": end_time_val, + "messageCount": msg_counts.get(s.session_id, 0), + "channelType": s.channel_type, + }) + + total_pages = (total + page_size - 1) // page_size if total > 0 else 0 + + return JSONResponse( + content={ + "data": data, + "pagination": { + "page": page, + "pageSize": page_size, + "total": total, + "totalPages": total_pages, + }, + } + ) + + +@router.get( + "/{session_id}", + operation_id="getSessionDetail", + summary="Get session details", + description="[AC-ASA-07] Get full session details with messages and trace.", + responses={ + 200: {"description": "Full session details with messages and trace"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def get_session_detail( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + session: Annotated[AsyncSession, Depends(get_session)], + session_id: str, +) -> JSONResponse: + """ + [AC-ASA-07] Get session detail with messages and trace information. + """ + logger.info( + f"[AC-ASA-07] Getting session detail: tenant={tenant_id}, session_id={session_id}" + ) + + session_stmt = select(ChatSession).where( + ChatSession.tenant_id == tenant_id, + ChatSession.session_id == session_id, + ) + session_result = await session.execute(session_stmt) + chat_session = session_result.scalar_one_or_none() + + if not chat_session: + return JSONResponse( + status_code=404, + content={ + "code": "SESSION_NOT_FOUND", + "message": f"Session {session_id} not found", + }, + ) + + messages_stmt = ( + select(ChatMessage) + .where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.session_id == session_id, + ) + .order_by(col(ChatMessage.created_at).asc()) + ) + messages_result = await session.execute(messages_stmt) + messages = messages_result.scalars().all() + + messages_data = [] + for msg in messages: + msg_data = { + "role": msg.role, + "content": msg.content, + "timestamp": msg.created_at.isoformat() + "Z", + } + messages_data.append(msg_data) + + trace = _build_trace_info(messages) + + return JSONResponse( + content={ + "sessionId": session_id, + "messages": messages_data, + "trace": trace, + "metadata": chat_session.metadata_ or {}, + } + ) + + +def _build_trace_info(messages: Sequence[ChatMessage]) -> dict: + """ + Build trace information from messages. + This extracts retrieval and tool call information from message metadata. + """ + trace = { + "retrieval": [], + "tools": [], + "errors": [], + } + + for msg in messages: + if msg.role == "assistant": + pass + + return trace + + +@router.put( + "/{session_id}/status", + operation_id="updateSessionStatus", + summary="Update session status", + description="[AC-ASA-09] Update session status (active, closed, expired).", + responses={ + 200: {"description": "Session status updated"}, + 404: {"description": "Session not found"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def update_session_status( + tenant_id: Annotated[str, Depends(get_current_tenant_id)], + db_session: Annotated[AsyncSession, Depends(get_session)], + session_id: str, + status: str = Query(..., description="New status: active, closed, expired"), +) -> JSONResponse: + """ + [AC-ASA-09] Update session status. + """ + logger.info( + f"[AC-ASA-09] Updating session status: tenant={tenant_id}, " + f"session_id={session_id}, status={status}" + ) + + stmt = select(ChatSession).where( + ChatSession.tenant_id == tenant_id, + ChatSession.session_id == session_id, + ) + result = await db_session.execute(stmt) + chat_session = result.scalar_one_or_none() + + if not chat_session: + return JSONResponse( + status_code=404, + content={ + "code": "SESSION_NOT_FOUND", + "message": f"Session {session_id} not found", + }, + ) + + metadata = chat_session.metadata_ or {} + metadata["status"] = status + + if status == SessionStatus.CLOSED.value or status == SessionStatus.EXPIRED.value: + metadata["endTime"] = datetime.utcnow().isoformat() + "Z" + + chat_session.metadata_ = metadata + chat_session.updated_at = datetime.utcnow() + await db_session.flush() + + return JSONResponse( + content={ + "success": True, + "sessionId": session_id, + "status": status, + } + ) diff --git a/ai-service/app/api/admin/tenants.py b/ai-service/app/api/admin/tenants.py new file mode 100644 index 0000000..b86fea9 --- /dev/null +++ b/ai-service/app/api/admin/tenants.py @@ -0,0 +1,78 @@ +""" +Tenant management endpoints. +Provides tenant list and management functionality. +""" + +import logging +from typing import Annotated + +from fastapi import APIRouter, Depends +from fastapi.responses import JSONResponse +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_session +from app.core.exceptions import MissingTenantIdException +from app.core.middleware import parse_tenant_id +from app.core.tenant import get_tenant_id +from app.models import ErrorResponse +from app.models.entities import Tenant + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/admin/tenants", tags=["Tenants"]) + + +def get_current_tenant_id() -> str: + """Dependency to get current tenant ID or raise exception.""" + tenant_id = get_tenant_id() + if not tenant_id: + raise MissingTenantIdException() + return tenant_id + + +@router.get( + "", + operation_id="listTenants", + summary="List all tenants", + description="Get a list of all tenants from the system.", + responses={ + 200: {"description": "List of tenants"}, + 401: {"description": "Unauthorized", "model": ErrorResponse}, + 403: {"description": "Forbidden", "model": ErrorResponse}, + }, +) +async def list_tenants( + session: Annotated[AsyncSession, Depends(get_session)], +) -> JSONResponse: + """ + Get a list of all tenants from the tenants table. + Returns tenant ID and display name (first part of tenant_id). + """ + logger.info("Getting all tenants") + + # Get all tenants from tenants table + stmt = select(Tenant).order_by(Tenant.created_at.desc()) + result = await session.execute(stmt) + tenants = result.scalars().all() + + # Format tenant list with display name + tenant_list = [] + for tenant in tenants: + name, year = parse_tenant_id(tenant.tenant_id) + tenant_list.append({ + "id": tenant.tenant_id, + "name": f"{name} ({year})", + "displayName": name, + "year": year, + "createdAt": tenant.created_at.isoformat() if tenant.created_at else None, + }) + + logger.info(f"Found {len(tenant_list)} tenants: {[t['id'] for t in tenant_list]}") + + return JSONResponse( + content={ + "tenants": tenant_list, + "total": len(tenant_list) + } + ) diff --git a/ai-service/app/api/chat.py b/ai-service/app/api/chat.py new file mode 100644 index 0000000..97ed5e6 --- /dev/null +++ b/ai-service/app/api/chat.py @@ -0,0 +1,191 @@ +""" +Chat endpoint for AI Service. +[AC-AISVC-01, AC-AISVC-02, AC-AISVC-06, AC-AISVC-08, AC-AISVC-09] Main chat endpoint with streaming/non-streaming modes. +""" + +import logging +from typing import Annotated, Any + +from fastapi import APIRouter, Depends, Header, Request +from fastapi.responses import JSONResponse +from sse_starlette.sse import EventSourceResponse +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_session +from app.core.middleware import get_response_mode, is_sse_request +from app.core.sse import SSEStateMachine, create_error_event +from app.core.tenant import get_tenant_id +from app.models import ChatRequest, ChatResponse, ErrorResponse +from app.services.memory import MemoryService +from app.services.orchestrator import OrchestratorService + +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["AI Chat"]) + + +async def get_orchestrator_service_with_memory( + session: Annotated[AsyncSession, Depends(get_session)] +) -> OrchestratorService: + """ + [AC-AISVC-13] Create orchestrator service with memory service and LLM client. + Ensures each request has a fresh MemoryService with database session. + """ + from app.services.llm.factory import get_llm_config_manager + from app.services.retrieval.optimized_retriever import get_optimized_retriever + + memory_service = MemoryService(session) + llm_config_manager = get_llm_config_manager() + llm_client = llm_config_manager.get_client() + retriever = await get_optimized_retriever() + + return OrchestratorService( + llm_client=llm_client, + memory_service=memory_service, + retriever=retriever, + ) + + +@router.post( + "/ai/chat", + operation_id="generateReply", + summary="Generate AI reply", + description=""" + [AC-AISVC-01, AC-AISVC-02, AC-AISVC-06] Generate AI reply based on user message. + + Response mode is determined by Accept header: + - Accept: text/event-stream -> SSE streaming response + - Other -> JSON response + """, + responses={ + 200: { + "description": "Success - JSON or SSE stream", + "content": { + "application/json": {"schema": {"$ref": "#/components/schemas/ChatResponse"}}, + "text/event-stream": {"schema": {"type": "string"}}, + }, + }, + 400: {"description": "Invalid request", "model": ErrorResponse}, + 500: {"description": "Internal error", "model": ErrorResponse}, + 503: {"description": "Service unavailable", "model": ErrorResponse}, + }, +) +async def generate_reply( + request: Request, + chat_request: ChatRequest, + accept: Annotated[str | None, Header()] = None, + orchestrator: OrchestratorService = Depends(get_orchestrator_service_with_memory), +) -> Any: + """ + [AC-AISVC-06] Generate AI reply with automatic response mode switching. + + Based on Accept header: + - text/event-stream: Returns SSE stream with message/final/error events + - Other: Returns JSON ChatResponse + """ + tenant_id = get_tenant_id() + if not tenant_id: + from app.core.exceptions import MissingTenantIdException + raise MissingTenantIdException() + + logger.info( + f"[AC-AISVC-06] Processing chat request: tenant={tenant_id}, " + f"session={chat_request.session_id}, mode={get_response_mode(request)}" + ) + + if is_sse_request(request): + return await _handle_streaming_request(tenant_id, chat_request, orchestrator) + else: + return await _handle_json_request(tenant_id, chat_request, orchestrator) + + +async def _handle_json_request( + tenant_id: str, + chat_request: ChatRequest, + orchestrator: OrchestratorService, +) -> JSONResponse: + """ + [AC-AISVC-02] Handle non-streaming JSON request. + Returns ChatResponse with reply, confidence, shouldTransfer. + """ + logger.info(f"[AC-AISVC-02] Processing JSON request for tenant={tenant_id}") + + try: + response = await orchestrator.generate(tenant_id, chat_request) + return JSONResponse( + content=response.model_dump(exclude_none=True, by_alias=True), + ) + except Exception as e: + logger.error(f"[AC-AISVC-04] Error generating response: {e}") + from app.core.exceptions import AIServiceException, ErrorCode + if isinstance(e, AIServiceException): + raise e + from app.core.exceptions import AIServiceException + raise AIServiceException( + code=ErrorCode.INTERNAL_ERROR, + message=str(e), + ) + + +async def _handle_streaming_request( + tenant_id: str, + chat_request: ChatRequest, + orchestrator: OrchestratorService, +) -> EventSourceResponse: + """ + [AC-AISVC-06, AC-AISVC-07, AC-AISVC-08, AC-AISVC-09] Handle SSE streaming request. + + SSE Event Sequence (per design.md Section 6.2): + - message* (0 or more) -> final (exactly 1) -> close + - OR message* (0 or more) -> error (exactly 1) -> close + + State machine ensures: + - No events after final/error + - Only one final OR one error event + - Proper connection close + """ + logger.info(f"[AC-AISVC-06] Processing SSE request for tenant={tenant_id}") + + state_machine = SSEStateMachine() + + async def event_generator(): + """ + [AC-AISVC-08, AC-AISVC-09] Event generator with state machine enforcement. + Ensures proper event sequence and error handling. + """ + await state_machine.transition_to_streaming() + + try: + async for event in orchestrator.generate_stream(tenant_id, chat_request): + if not state_machine.can_send_message(): + logger.warning("[AC-AISVC-08] Received event after state machine closed, ignoring") + break + + if event.event == "final": + if await state_machine.transition_to_final(): + logger.info("[AC-AISVC-08] Sending final event and closing stream") + yield event + break + + elif event.event == "error": + if await state_machine.transition_to_error(): + logger.info("[AC-AISVC-09] Sending error event and closing stream") + yield event + break + + elif event.event == "message": + yield event + + except Exception as e: + logger.error(f"[AC-AISVC-09] Streaming error: {e}") + if await state_machine.transition_to_error(): + yield create_error_event( + code="STREAMING_ERROR", + message=str(e), + ) + + finally: + await state_machine.close() + logger.debug("[AC-AISVC-08] SSE connection closed") + + return EventSourceResponse(event_generator(), ping=15) diff --git a/ai-service/app/api/health.py b/ai-service/app/api/health.py new file mode 100644 index 0000000..4d2366e --- /dev/null +++ b/ai-service/app/api/health.py @@ -0,0 +1,30 @@ +""" +Health check endpoint. +[AC-AISVC-20] Health check for service monitoring. +""" + +from fastapi import APIRouter, status +from fastapi.responses import JSONResponse + +router = APIRouter(tags=["Health"]) + + +@router.get( + "/ai/health", + operation_id="healthCheck", + summary="Health check", + description="[AC-AISVC-20] Check if AI service is healthy", + responses={ + 200: {"description": "Service is healthy"}, + 503: {"description": "Service is unhealthy"}, + }, +) +async def health_check() -> JSONResponse: + """ + [AC-AISVC-20] Health check endpoint. + Returns 200 with status if healthy, 503 if not. + """ + return JSONResponse( + status_code=status.HTTP_200_OK, + content={"status": "healthy"}, + ) diff --git a/ai-service/app/core/__init__.py b/ai-service/app/core/__init__.py new file mode 100644 index 0000000..dee8983 --- /dev/null +++ b/ai-service/app/core/__init__.py @@ -0,0 +1,19 @@ +""" +Core module - Configuration, dependencies, and utilities. +[AC-AISVC-01, AC-AISVC-10, AC-AISVC-11] Core infrastructure components. +""" + +from app.core.config import Settings, get_settings +from app.core.database import async_session_maker, get_session, init_db, close_db +from app.core.qdrant_client import QdrantClient, get_qdrant_client + +__all__ = [ + "Settings", + "get_settings", + "async_session_maker", + "get_session", + "init_db", + "close_db", + "QdrantClient", + "get_qdrant_client", +] diff --git a/ai-service/app/core/config.py b/ai-service/app/core/config.py new file mode 100644 index 0000000..6fcadec --- /dev/null +++ b/ai-service/app/core/config.py @@ -0,0 +1,66 @@ +""" +Configuration management for AI Service. +[AC-AISVC-01] Centralized configuration with environment variable support. +""" + +from functools import lru_cache + +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + model_config = SettingsConfigDict(env_prefix="AI_SERVICE_", env_file=".env", extra="ignore") + + app_name: str = "AI Service" + app_version: str = "0.1.0" + debug: bool = False + + host: str = "0.0.0.0" + port: int = 8080 + + request_timeout_seconds: int = 20 + sse_ping_interval_seconds: int = 15 + + log_level: str = "INFO" + + llm_provider: str = "openai" + llm_api_key: str = "" + llm_base_url: str = "https://api.openai.com/v1" + llm_model: str = "gpt-4o-mini" + llm_max_tokens: int = 2048 + llm_temperature: float = 0.7 + llm_timeout_seconds: int = 30 + llm_max_retries: int = 3 + + database_url: str = "postgresql+asyncpg://postgres:postgres@localhost:5432/ai_service" + database_pool_size: int = 10 + database_max_overflow: int = 20 + + qdrant_url: str = "http://localhost:6333" + qdrant_collection_prefix: str = "kb_" + qdrant_vector_size: int = 768 + + ollama_base_url: str = "http://localhost:11434" + ollama_embedding_model: str = "nomic-embed-text" + + rag_top_k: int = 5 + rag_score_threshold: float = 0.01 + rag_min_hits: int = 1 + rag_max_evidence_tokens: int = 2000 + + rag_two_stage_enabled: bool = True + rag_two_stage_expand_factor: int = 10 + rag_hybrid_enabled: bool = True + rag_rrf_k: int = 60 + rag_vector_weight: float = 0.7 + rag_bm25_weight: float = 0.3 + + confidence_low_threshold: float = 0.5 + confidence_high_threshold: float = 0.8 + confidence_insufficient_penalty: float = 0.3 + max_history_tokens: int = 4000 + + +@lru_cache +def get_settings() -> Settings: + return Settings() diff --git a/ai-service/app/core/database.py b/ai-service/app/core/database.py new file mode 100644 index 0000000..b15cde1 --- /dev/null +++ b/ai-service/app/core/database.py @@ -0,0 +1,67 @@ +""" +Database client for AI Service. +[AC-AISVC-11] PostgreSQL database with SQLModel for multi-tenant data isolation. +""" + +import logging +from typing import AsyncGenerator + +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy.pool import NullPool +from sqlmodel import SQLModel + +from app.core.config import get_settings + +logger = logging.getLogger(__name__) + +settings = get_settings() + +engine = create_async_engine( + settings.database_url, + pool_size=settings.database_pool_size, + max_overflow=settings.database_max_overflow, + echo=settings.debug, + pool_pre_ping=True, +) + +async_session_maker = async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, + autocommit=False, + autoflush=False, +) + + +async def init_db() -> None: + """ + [AC-AISVC-11] Initialize database tables. + Creates all tables defined in SQLModel metadata. + """ + async with engine.begin() as conn: + await conn.run_sync(SQLModel.metadata.create_all) + logger.info("[AC-AISVC-11] Database tables initialized") + + +async def close_db() -> None: + """ + Close database connections. + """ + await engine.dispose() + logger.info("Database connections closed") + + +async def get_session() -> AsyncGenerator[AsyncSession, None]: + """ + [AC-AISVC-11] Dependency injection for database session. + Ensures proper session lifecycle management. + """ + async with async_session_maker() as session: + try: + yield session + await session.commit() + except Exception: + await session.rollback() + raise + finally: + await session.close() diff --git a/ai-service/app/core/exceptions.py b/ai-service/app/core/exceptions.py new file mode 100644 index 0000000..dfd3262 --- /dev/null +++ b/ai-service/app/core/exceptions.py @@ -0,0 +1,99 @@ +""" +Exception handling for AI Service. +[AC-AISVC-03, AC-AISVC-04, AC-AISVC-05] Structured error responses. +""" + +from fastapi import HTTPException, Request, status +from fastapi.responses import JSONResponse + +from app.models import ErrorCode, ErrorResponse + + +class AIServiceException(Exception): + def __init__( + self, + code: ErrorCode, + message: str, + status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR, + details: list[dict] | None = None, + ): + self.code = code + self.message = message + self.status_code = status_code + self.details = details + super().__init__(message) + + +class MissingTenantIdException(AIServiceException): + def __init__(self, message: str = "Missing required header: X-Tenant-Id"): + super().__init__( + code=ErrorCode.MISSING_TENANT_ID, + message=message, + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +class InvalidRequestException(AIServiceException): + def __init__(self, message: str, details: list[dict] | None = None): + super().__init__( + code=ErrorCode.INVALID_REQUEST, + message=message, + status_code=status.HTTP_400_BAD_REQUEST, + details=details, + ) + + +class ServiceUnavailableException(AIServiceException): + def __init__(self, message: str = "Service temporarily unavailable"): + super().__init__( + code=ErrorCode.SERVICE_UNAVAILABLE, + message=message, + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + ) + + +class TimeoutException(AIServiceException): + def __init__(self, message: str = "Request timeout"): + super().__init__( + code=ErrorCode.TIMEOUT, + message=message, + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + ) + + +async def ai_service_exception_handler(request: Request, exc: AIServiceException) -> JSONResponse: + return JSONResponse( + status_code=exc.status_code, + content=ErrorResponse( + code=exc.code.value, + message=exc.message, + details=exc.details, + ).model_dump(exclude_none=True), + ) + + +async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse: + if exc.status_code == status.HTTP_400_BAD_REQUEST: + code = ErrorCode.INVALID_REQUEST + elif exc.status_code == status.HTTP_503_SERVICE_UNAVAILABLE: + code = ErrorCode.SERVICE_UNAVAILABLE + else: + code = ErrorCode.INTERNAL_ERROR + + return JSONResponse( + status_code=exc.status_code, + content=ErrorResponse( + code=code.value, + message=exc.detail or "An error occurred", + ).model_dump(exclude_none=True), + ) + + +async def generic_exception_handler(request: Request, exc: Exception) -> JSONResponse: + return JSONResponse( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + content=ErrorResponse( + code=ErrorCode.INTERNAL_ERROR.value, + message="An unexpected error occurred", + ).model_dump(exclude_none=True), + ) diff --git a/ai-service/app/core/middleware.py b/ai-service/app/core/middleware.py new file mode 100644 index 0000000..4100813 --- /dev/null +++ b/ai-service/app/core/middleware.py @@ -0,0 +1,150 @@ +""" +Middleware for AI Service. +[AC-AISVC-10, AC-AISVC-12] X-Tenant-Id header validation and tenant context injection. +""" + +import logging +import re +from typing import Callable + +from fastapi import Request, Response, status +from fastapi.responses import JSONResponse +from starlette.middleware.base import BaseHTTPMiddleware + +from app.core.exceptions import ErrorCode, ErrorResponse, MissingTenantIdException +from app.core.tenant import clear_tenant_context, set_tenant_context + +logger = logging.getLogger(__name__) + +TENANT_ID_HEADER = "X-Tenant-Id" +ACCEPT_HEADER = "Accept" +SSE_CONTENT_TYPE = "text/event-stream" + +# Tenant ID format: name@ash@year (e.g., szmp@ash@2026) +TENANT_ID_PATTERN = re.compile(r'^[^@]+@ash@\d{4}$') + + +def validate_tenant_id_format(tenant_id: str) -> bool: + """ + [AC-AISVC-10] Validate tenant ID format: name@ash@year + Examples: szmp@ash@2026, abc123@ash@2025 + """ + return bool(TENANT_ID_PATTERN.match(tenant_id)) + + +def parse_tenant_id(tenant_id: str) -> tuple[str, str]: + """ + [AC-AISVC-10] Parse tenant ID into name and year. + Returns: (name, year) + """ + parts = tenant_id.split('@') + return parts[0], parts[2] + + +class TenantContextMiddleware(BaseHTTPMiddleware): + """ + [AC-AISVC-10, AC-AISVC-12] Middleware to extract and validate X-Tenant-Id header. + Injects tenant context into request state for downstream processing. + Validates tenant ID format and auto-creates tenant if not exists. + """ + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + clear_tenant_context() + + if request.url.path == "/ai/health": + return await call_next(request) + + tenant_id = request.headers.get(TENANT_ID_HEADER) + + if not tenant_id or not tenant_id.strip(): + logger.warning("[AC-AISVC-12] Missing or empty X-Tenant-Id header") + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=ErrorResponse( + code=ErrorCode.MISSING_TENANT_ID.value, + message="Missing required header: X-Tenant-Id", + ).model_dump(exclude_none=True), + ) + + tenant_id = tenant_id.strip() + + # Validate tenant ID format + if not validate_tenant_id_format(tenant_id): + logger.warning(f"[AC-AISVC-10] Invalid tenant ID format: {tenant_id}") + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=ErrorResponse( + code=ErrorCode.INVALID_TENANT_ID.value, + message="Invalid tenant ID format. Expected: name@ash@year (e.g., szmp@ash@2026)", + ).model_dump(exclude_none=True), + ) + + # Auto-create tenant if not exists (for admin endpoints) + if request.url.path.startswith("/admin/") or request.url.path.startswith("/ai/"): + try: + await self._ensure_tenant_exists(request, tenant_id) + except Exception as e: + logger.error(f"[AC-AISVC-10] Failed to ensure tenant exists: {e}") + # Continue processing even if tenant creation fails + + set_tenant_context(tenant_id) + request.state.tenant_id = tenant_id + + logger.info(f"[AC-AISVC-10] Tenant context set: tenant_id={tenant_id}") + + try: + response = await call_next(request) + finally: + clear_tenant_context() + + return response + + async def _ensure_tenant_exists(self, request: Request, tenant_id: str) -> None: + """ + [AC-AISVC-10] Ensure tenant exists in database, create if not. + """ + from sqlalchemy import select + from sqlalchemy.ext.asyncio import AsyncSession + + from app.core.database import async_session_maker + from app.models.entities import Tenant + + name, year = parse_tenant_id(tenant_id) + + async with async_session_maker() as session: + # Check if tenant exists + stmt = select(Tenant).where(Tenant.tenant_id == tenant_id) + result = await session.execute(stmt) + existing_tenant = result.scalar_one_or_none() + + if existing_tenant: + logger.debug(f"[AC-AISVC-10] Tenant already exists: {tenant_id}") + return + + # Create new tenant + new_tenant = Tenant( + tenant_id=tenant_id, + name=name, + year=year, + ) + session.add(new_tenant) + await session.commit() + + logger.info(f"[AC-AISVC-10] Auto-created new tenant: {tenant_id} (name={name}, year={year})") + + +def is_sse_request(request: Request) -> bool: + """ + [AC-AISVC-06] Check if the request expects SSE streaming response. + Based on Accept header: text/event-stream indicates SSE mode. + """ + accept_header = request.headers.get(ACCEPT_HEADER, "") + return SSE_CONTENT_TYPE in accept_header + + +def get_response_mode(request: Request) -> str: + """ + [AC-AISVC-06] Determine response mode based on Accept header. + Returns 'streaming' for SSE, 'json' for regular JSON response. + """ + return "streaming" if is_sse_request(request) else "json" diff --git a/ai-service/app/core/prompts.py b/ai-service/app/core/prompts.py new file mode 100644 index 0000000..91198b8 --- /dev/null +++ b/ai-service/app/core/prompts.py @@ -0,0 +1,114 @@ +""" +Shared prompt templates for AI Service. +Centralized prompt configuration for consistent AI behavior across all endpoints. +""" + +SYSTEM_PROMPT = """你是一名经验丰富的客服专员,名字叫"小N"。请遵循以下准则: +像朋友一样聊天:使用自然、口语化的中文,语气亲切有礼貌,就像在和一位老朋友聊天。多使用"咱们"、"您看"、"是不是"这样拉近距离的表达。 +避免生硬罗列:介绍多个信息时,不要用"第一、第二"或分点列举,而是用流畅的段落把它们串联起来。比如:"我们这边有几类课程挺受欢迎的,一个是……另外还有……您对哪方面更感兴趣呀?" +主动引导对话:不要一次性把知识库里的所有内容都倒出来。可以先简单概括,然后问问客户具体想了解什么,比如:"您是想提升职场英语,还是有留学方面的打算呢?" +委婉处理未知:如果知识库里没有答案,别直接说"不知道",可以说:"您稍等,我帮您确认一下"、"这个问题我需要问问同事,马上给您回复"。 +隐藏机器身份:绝不要提及"作为AI"、"根据算法"等词,也避免使用过于官方的术语。就像普通人一样用"我"、"我们"。 +多用语气词和情感表达:适当加入"呢"、"哦"、"哈"、"呀"等语气词,根据情况使用表情符号(😊)。表达理解和关心,比如"我明白您的意思"、"别着急,我们一起看看"。 +保持简洁但有温度:回答一般控制在3-5句话,如果需要详细说明,可以分成小段落,但每段都要口语化,读起来不累。 +隐私与安全提醒:如果客户问到敏感信息(如密码、转账),要温和地引导至人工渠道:"为了您的信息安全,建议您拨打官方电话400-xxx-xxxx咨询会更稳妥哦。""" + + +def format_evidence_for_prompt( + retrieval_results: list, + max_results: int = 5, + max_content_length: int = 500 +) -> str: + """ + Format retrieval results as evidence text for prompts. + + Args: + retrieval_results: List of retrieval hits. Can be: + - dict format: {'content', 'score', 'source', 'metadata'} + - RetrievalHit object: with .text, .score, .source, .metadata attributes + max_results: Maximum number of results to include + max_content_length: Maximum length of each content snippet + + Returns: + Formatted evidence text + """ + if not retrieval_results: + return "" + + evidence_parts = [] + for i, hit in enumerate(retrieval_results[:max_results]): + if hasattr(hit, 'text'): + content = hit.text + score = hit.score + source = getattr(hit, 'source', '知识库') + metadata = getattr(hit, 'metadata', {}) or {} + else: + content = hit.get('content', '') + score = hit.get('score', 0) + source = hit.get('source', '知识库') + metadata = hit.get('metadata', {}) or {} + + if len(content) > max_content_length: + content = content[:max_content_length] + '...' + + nested_meta = metadata.get('metadata', {}) + source_doc = nested_meta.get('source_doc', source) if nested_meta else source + category = nested_meta.get('category', '') if nested_meta else '' + department = nested_meta.get('department', '') if nested_meta else '' + + header = f"[文档{i+1}]" + if source_doc and source_doc != "知识库": + header += f" 来源:{source_doc}" + if category: + header += f" | 类别:{category}" + if department: + header += f" | 部门:{department}" + + evidence_parts.append(f"{header}\n相关度:{score:.2f}\n内容:{content}") + + return "\n\n".join(evidence_parts) + + +def build_system_prompt_with_evidence(evidence_text: str) -> str: + """ + Build system prompt with knowledge base evidence. + + Args: + evidence_text: Formatted evidence from retrieval results + + Returns: + Complete system prompt + """ + if not evidence_text: + return SYSTEM_PROMPT + + return f"""{SYSTEM_PROMPT} + +知识库参考内容: +{evidence_text}""" + + +def build_user_prompt_with_evidence(query: str, evidence_text: str) -> str: + """ + Build user prompt with knowledge base evidence (for single-message format). + + Args: + query: User's question + evidence_text: Formatted evidence from retrieval results + + Returns: + Complete user prompt + """ + if not evidence_text: + return f"""用户问题:{query} + +未找到相关检索结果,请基于通用知识回答用户问题。""" + + return f"""【系统指令】 +{SYSTEM_PROMPT} + +【知识库内容】 +{evidence_text} + +【用户问题】 +{query}""" diff --git a/ai-service/app/core/qdrant_client.py b/ai-service/app/core/qdrant_client.py new file mode 100644 index 0000000..5742b5a --- /dev/null +++ b/ai-service/app/core/qdrant_client.py @@ -0,0 +1,314 @@ +""" +Qdrant client for AI Service. +[AC-AISVC-10] Vector database client with tenant-isolated collection management. +Supports multi-dimensional vectors for Matryoshka representation learning. +""" + +import logging +from typing import Any + +from qdrant_client import AsyncQdrantClient +from qdrant_client.models import Distance, PointStruct, VectorParams, MultiVectorConfig + +from app.core.config import get_settings + +logger = logging.getLogger(__name__) + +settings = get_settings() + + +class QdrantClient: + """ + [AC-AISVC-10] Qdrant client with tenant-isolated collection management. + Collection naming: kb_{tenantId} for tenant isolation. + Supports multi-dimensional vectors (256/512/768) for Matryoshka retrieval. + """ + + def __init__(self): + self._client: AsyncQdrantClient | None = None + self._collection_prefix = settings.qdrant_collection_prefix + self._vector_size = settings.qdrant_vector_size + + async def get_client(self) -> AsyncQdrantClient: + """Get or create Qdrant client instance.""" + if self._client is None: + self._client = AsyncQdrantClient(url=settings.qdrant_url) + logger.info(f"[AC-AISVC-10] Qdrant client initialized: {settings.qdrant_url}") + return self._client + + async def close(self) -> None: + """Close Qdrant client connection.""" + if self._client: + await self._client.close() + self._client = None + logger.info("Qdrant client connection closed") + + def get_collection_name(self, tenant_id: str) -> str: + """ + [AC-AISVC-10] Get collection name for a tenant. + Naming convention: kb_{tenantId} + Replaces @ with _ to ensure valid collection names. + """ + safe_tenant_id = tenant_id.replace('@', '_') + return f"{self._collection_prefix}{safe_tenant_id}" + + async def ensure_collection_exists(self, tenant_id: str, use_multi_vector: bool = True) -> bool: + """ + [AC-AISVC-10] Ensure collection exists for tenant. + Supports multi-dimensional vectors for Matryoshka retrieval. + """ + client = await self.get_client() + collection_name = self.get_collection_name(tenant_id) + + try: + collections = await client.get_collections() + exists = any(c.name == collection_name for c in collections.collections) + + if not exists: + if use_multi_vector: + vectors_config = { + "full": VectorParams( + size=768, + distance=Distance.COSINE, + ), + "dim_256": VectorParams( + size=256, + distance=Distance.COSINE, + ), + "dim_512": VectorParams( + size=512, + distance=Distance.COSINE, + ), + } + else: + vectors_config = VectorParams( + size=self._vector_size, + distance=Distance.COSINE, + ) + + await client.create_collection( + collection_name=collection_name, + vectors_config=vectors_config, + ) + logger.info( + f"[AC-AISVC-10] Created collection: {collection_name} for tenant={tenant_id} " + f"with multi_vector={use_multi_vector}" + ) + return True + except Exception as e: + logger.error(f"[AC-AISVC-10] Error ensuring collection: {e}") + return False + + async def upsert_vectors( + self, + tenant_id: str, + points: list[PointStruct], + ) -> bool: + """ + [AC-AISVC-10] Upsert vectors into tenant's collection. + """ + client = await self.get_client() + collection_name = self.get_collection_name(tenant_id) + + try: + await client.upsert( + collection_name=collection_name, + points=points, + ) + logger.info( + f"[AC-AISVC-10] Upserted {len(points)} vectors for tenant={tenant_id}" + ) + return True + except Exception as e: + logger.error(f"[AC-AISVC-10] Error upserting vectors: {e}") + return False + + async def upsert_multi_vector( + self, + tenant_id: str, + points: list[dict[str, Any]], + ) -> bool: + """ + Upsert points with multi-dimensional vectors. + + Args: + tenant_id: Tenant identifier + points: List of points with format: + { + "id": str | int, + "vector": { + "full": [768 floats], + "dim_256": [256 floats], + "dim_512": [512 floats], + }, + "payload": dict + } + """ + client = await self.get_client() + collection_name = self.get_collection_name(tenant_id) + + try: + qdrant_points = [] + for p in points: + point = PointStruct( + id=p["id"], + vector=p["vector"], + payload=p.get("payload", {}), + ) + qdrant_points.append(point) + + await client.upsert( + collection_name=collection_name, + points=qdrant_points, + ) + logger.info( + f"[RAG-OPT] Upserted {len(points)} multi-vector points for tenant={tenant_id}" + ) + return True + except Exception as e: + logger.error(f"[RAG-OPT] Error upserting multi-vectors: {e}") + return False + + async def search( + self, + tenant_id: str, + query_vector: list[float], + limit: int = 5, + score_threshold: float | None = None, + vector_name: str = "full", + ) -> list[dict[str, Any]]: + """ + [AC-AISVC-10] Search vectors in tenant's collection. + Returns results with score >= score_threshold if specified. + Searches both old format (with @) and new format (with _) for backward compatibility. + + Args: + tenant_id: Tenant identifier + query_vector: Query vector for similarity search + limit: Maximum number of results + score_threshold: Minimum score threshold for results + vector_name: Name of the vector to search (for multi-vector collections) + Default is "full" for 768-dim vectors in Matryoshka setup. + """ + client = await self.get_client() + + logger.info( + f"[AC-AISVC-10] Starting search: tenant_id={tenant_id}, " + f"limit={limit}, score_threshold={score_threshold}, vector_dim={len(query_vector)}, vector_name={vector_name}" + ) + + collection_names = [self.get_collection_name(tenant_id)] + if '@' in tenant_id: + old_format = f"{self._collection_prefix}{tenant_id}" + new_format = f"{self._collection_prefix}{tenant_id.replace('@', '_')}" + collection_names = [new_format, old_format] + + logger.info(f"[AC-AISVC-10] Will search in collections: {collection_names}") + + all_hits = [] + + for collection_name in collection_names: + try: + logger.info(f"[AC-AISVC-10] Searching in collection: {collection_name}") + + try: + results = await client.search( + collection_name=collection_name, + query_vector=(vector_name, query_vector), + limit=limit, + ) + except Exception as e: + if "vector name" in str(e).lower() or "Not existing vector" in str(e): + logger.info( + f"[AC-AISVC-10] Collection {collection_name} doesn't have vector named '{vector_name}', " + f"trying without vector name (single-vector mode)" + ) + results = await client.search( + collection_name=collection_name, + query_vector=query_vector, + limit=limit, + ) + else: + raise + + logger.info( + f"[AC-AISVC-10] Collection {collection_name} returned {len(results)} raw results" + ) + + hits = [ + { + "id": str(result.id), + "score": result.score, + "payload": result.payload or {}, + } + for result in results + if score_threshold is None or result.score >= score_threshold + ] + all_hits.extend(hits) + + if hits: + logger.info( + f"[AC-AISVC-10] Search in collection {collection_name}: {len(hits)} results for tenant={tenant_id}" + ) + for i, h in enumerate(hits[:3]): + logger.debug( + f"[AC-AISVC-10] Hit {i+1}: id={h['id']}, score={h['score']:.4f}" + ) + else: + logger.warning( + f"[AC-AISVC-10] Collection {collection_name} returned no hits (filtered or empty)" + ) + except Exception as e: + logger.warning( + f"[AC-AISVC-10] Collection {collection_name} not found or error: {e}" + ) + continue + + all_hits = sorted(all_hits, key=lambda x: x["score"], reverse=True)[:limit] + + logger.info( + f"[AC-AISVC-10] Search returned {len(all_hits)} total results for tenant={tenant_id}" + ) + + if len(all_hits) == 0: + logger.warning( + f"[AC-AISVC-10] No results found! tenant={tenant_id}, " + f"collections_tried={collection_names}, limit={limit}" + ) + + return all_hits + + async def delete_collection(self, tenant_id: str) -> bool: + """ + [AC-AISVC-10] Delete tenant's collection. + Used when tenant is removed. + """ + client = await self.get_client() + collection_name = self.get_collection_name(tenant_id) + + try: + await client.delete_collection(collection_name=collection_name) + logger.info(f"[AC-AISVC-10] Deleted collection: {collection_name}") + return True + except Exception as e: + logger.error(f"[AC-AISVC-10] Error deleting collection: {e}") + return False + + +_qdrant_client: QdrantClient | None = None + + +async def get_qdrant_client() -> QdrantClient: + """Get or create Qdrant client instance.""" + global _qdrant_client + if _qdrant_client is None: + _qdrant_client = QdrantClient() + return _qdrant_client + + +async def close_qdrant_client() -> None: + """Close Qdrant client connection.""" + global _qdrant_client + if _qdrant_client: + await _qdrant_client.close() + _qdrant_client = None diff --git a/ai-service/app/core/sse.py b/ai-service/app/core/sse.py new file mode 100644 index 0000000..1930323 --- /dev/null +++ b/ai-service/app/core/sse.py @@ -0,0 +1,173 @@ +""" +SSE utilities for AI Service. +[AC-AISVC-06, AC-AISVC-07, AC-AISVC-08, AC-AISVC-09] SSE event generation and state machine. +""" + +import asyncio +import json +import logging +from enum import Enum +from typing import Any, AsyncGenerator + +from sse_starlette.sse import EventSourceResponse, ServerSentEvent + +from app.core.config import get_settings +from app.models import SSEErrorEvent, SSEEventType, SSEFinalEvent, SSEMessageEvent + +logger = logging.getLogger(__name__) + + +class SSEState(str, Enum): + INIT = "INIT" + STREAMING = "STREAMING" + FINAL_SENT = "FINAL_SENT" + ERROR_SENT = "ERROR_SENT" + CLOSED = "CLOSED" + + +class SSEStateMachine: + """ + [AC-AISVC-08, AC-AISVC-09] SSE state machine ensuring proper event sequence. + State transitions: INIT -> STREAMING -> FINAL_SENT/ERROR_SENT -> CLOSED + """ + + def __init__(self): + self._state = SSEState.INIT + self._lock = asyncio.Lock() + + @property + def state(self) -> SSEState: + return self._state + + async def transition_to_streaming(self) -> bool: + async with self._lock: + if self._state == SSEState.INIT: + self._state = SSEState.STREAMING + logger.debug(f"[AC-AISVC-07] SSE state transition: INIT -> STREAMING") + return True + return False + + async def transition_to_final(self) -> bool: + async with self._lock: + if self._state == SSEState.STREAMING: + self._state = SSEState.FINAL_SENT + logger.debug(f"[AC-AISVC-08] SSE state transition: STREAMING -> FINAL_SENT") + return True + return False + + async def transition_to_error(self) -> bool: + async with self._lock: + if self._state in (SSEState.INIT, SSEState.STREAMING): + self._state = SSEState.ERROR_SENT + logger.debug(f"[AC-AISVC-09] SSE state transition: {self._state} -> ERROR_SENT") + return True + return False + + async def close(self) -> None: + async with self._lock: + self._state = SSEState.CLOSED + logger.debug("SSE state transition: -> CLOSED") + + def can_send_message(self) -> bool: + return self._state == SSEState.STREAMING + + +def format_sse_event(event_type: SSEEventType, data: dict[str, Any]) -> ServerSentEvent: + """Format data as SSE event.""" + return ServerSentEvent( + event=event_type.value, + data=json.dumps(data, ensure_ascii=False), + ) + + +def create_message_event(delta: str) -> ServerSentEvent: + """[AC-AISVC-07] Create a message event with incremental content.""" + event_data = SSEMessageEvent(delta=delta) + return format_sse_event(SSEEventType.MESSAGE, event_data.model_dump()) + + +def create_final_event( + reply: str, + confidence: float, + should_transfer: bool, + transfer_reason: str | None = None, + metadata: dict[str, Any] | None = None, +) -> ServerSentEvent: + """[AC-AISVC-08] Create a final event with complete response.""" + event_data = SSEFinalEvent( + reply=reply, + confidence=confidence, + should_transfer=should_transfer, + transfer_reason=transfer_reason, + metadata=metadata, + ) + return format_sse_event( + SSEEventType.FINAL, + event_data.model_dump(exclude_none=True, by_alias=True) + ) + + +def create_error_event( + code: str, + message: str, + details: list[dict[str, Any]] | None = None, +) -> ServerSentEvent: + """[AC-AISVC-09] Create an error event.""" + event_data = SSEErrorEvent( + code=code, + message=message, + details=details, + ) + return format_sse_event(SSEEventType.ERROR, event_data.model_dump(exclude_none=True)) + + +async def ping_generator(interval_seconds: int) -> AsyncGenerator[str, None]: + """ + [AC-AISVC-06] Generate ping comments for SSE keep-alive. + Sends ': ping' as comment lines (not events) to keep connection alive. + """ + while True: + await asyncio.sleep(interval_seconds) + yield ": ping\n\n" + + +class SSEResponseBuilder: + """ + Builder for SSE response with proper event sequencing and ping keep-alive. + """ + + def __init__(self): + self._state_machine = SSEStateMachine() + self._settings = get_settings() + + async def build_response( + self, + content_generator: AsyncGenerator[ServerSentEvent, None], + ) -> EventSourceResponse: + """ + Build SSE response with ping keep-alive mechanism. + [AC-AISVC-06] Implements ping keep-alive to prevent connection timeout. + """ + + async def event_generator() -> AsyncGenerator[ServerSentEvent, None]: + await self._state_machine.transition_to_streaming() + try: + async for event in content_generator: + if self._state_machine.can_send_message(): + yield event + else: + break + except Exception as e: + logger.error(f"[AC-AISVC-09] Error during SSE streaming: {e}") + if await self._state_machine.transition_to_error(): + yield create_error_event( + code="STREAMING_ERROR", + message=str(e), + ) + finally: + await self._state_machine.close() + + return EventSourceResponse( + event_generator(), + ping=self._settings.sse_ping_interval_seconds, + ) diff --git a/ai-service/app/core/tenant.py b/ai-service/app/core/tenant.py new file mode 100644 index 0000000..8baf4a6 --- /dev/null +++ b/ai-service/app/core/tenant.py @@ -0,0 +1,31 @@ +""" +Tenant context management. +[AC-AISVC-10, AC-AISVC-12] Multi-tenant isolation via X-Tenant-Id header. +""" + +from contextvars import ContextVar +from dataclasses import dataclass + +tenant_context: ContextVar["TenantContext | None"] = ContextVar("tenant_context", default=None) + + +@dataclass +class TenantContext: + tenant_id: str + + +def set_tenant_context(tenant_id: str) -> None: + tenant_context.set(TenantContext(tenant_id=tenant_id)) + + +def get_tenant_context() -> TenantContext | None: + return tenant_context.get() + + +def get_tenant_id() -> str | None: + ctx = get_tenant_context() + return ctx.tenant_id if ctx else None + + +def clear_tenant_context() -> None: + tenant_context.set(None) diff --git a/ai-service/app/main.py b/ai-service/app/main.py new file mode 100644 index 0000000..c18afba --- /dev/null +++ b/ai-service/app/main.py @@ -0,0 +1,134 @@ +""" +Main FastAPI application for AI Service. +[AC-AISVC-01] Entry point with middleware and exception handlers. +""" + +import logging +from contextlib import asynccontextmanager + +from fastapi import FastAPI, Request, status +from fastapi.exceptions import HTTPException, RequestValidationError +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse + +from app.api import chat_router, health_router +from app.api.admin import dashboard_router, embedding_router, kb_router, llm_router, rag_router, sessions_router, tenants_router +from app.api.admin.kb_optimized import router as kb_optimized_router +from app.core.config import get_settings +from app.core.database import close_db, init_db +from app.core.exceptions import ( + AIServiceException, + ErrorCode, + ErrorResponse, + ai_service_exception_handler, + generic_exception_handler, + http_exception_handler, +) +from app.core.middleware import TenantContextMiddleware +from app.core.qdrant_client import close_qdrant_client + +settings = get_settings() + +logging.basicConfig( + level=getattr(logging, settings.log_level.upper()), + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + [AC-AISVC-01, AC-AISVC-11] Application lifespan manager. + Handles startup and shutdown of database and external connections. + """ + logger.info(f"[AC-AISVC-01] Starting {settings.app_name} v{settings.app_version}") + + try: + await init_db() + logger.info("[AC-AISVC-11] Database initialized successfully") + except Exception as e: + logger.warning(f"[AC-AISVC-11] Database initialization skipped: {e}") + + yield + + await close_db() + await close_qdrant_client() + logger.info(f"Shutting down {settings.app_name}") + + +app = FastAPI( + title=settings.app_name, + version=settings.app_version, + description=""" + Python AI Service for intelligent chat with RAG support. + + ## Features + - Multi-tenant isolation via X-Tenant-Id header + - SSE streaming support via Accept: text/event-stream + - RAG-powered responses with confidence scoring + + ## Response Modes + - **JSON**: Default response mode (Accept: application/json or no Accept header) + - **SSE Streaming**: Set Accept: text/event-stream for streaming responses + """, + docs_url="/docs", + redoc_url="/redoc", + lifespan=lifespan, +) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.add_middleware(TenantContextMiddleware) + +app.add_exception_handler(AIServiceException, ai_service_exception_handler) +app.add_exception_handler(HTTPException, http_exception_handler) +app.add_exception_handler(Exception, generic_exception_handler) + + +@app.exception_handler(RequestValidationError) +async def validation_exception_handler(request: Request, exc: RequestValidationError): + """ + [AC-AISVC-03] Handle request validation errors with structured response. + """ + logger.warning(f"[AC-AISVC-03] Request validation error: {exc.errors()}") + error_response = ErrorResponse( + code=ErrorCode.INVALID_REQUEST.value, + message="Request validation failed", + details=[{"loc": list(err["loc"]), "msg": err["msg"], "type": err["type"]} for err in exc.errors()], + ) + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content=error_response.model_dump(exclude_none=True), + ) + + +app.include_router(health_router) +app.include_router(chat_router) + +app.include_router(dashboard_router) +app.include_router(embedding_router) +app.include_router(kb_router) +app.include_router(kb_optimized_router) +app.include_router(llm_router) +app.include_router(rag_router) +app.include_router(sessions_router) +app.include_router(tenants_router) + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run( + "app.main:app", + host=settings.host, + port=settings.port, + reload=settings.debug, + ) diff --git a/ai-service/app/models/__init__.py b/ai-service/app/models/__init__.py new file mode 100644 index 0000000..cbbe9e7 --- /dev/null +++ b/ai-service/app/models/__init__.py @@ -0,0 +1,89 @@ +""" +Data models for AI Service. +[AC-AISVC-02] Request/Response models aligned with OpenAPI contract. +[AC-AISVC-13] Entity models for database persistence. +""" + +from enum import Enum +from typing import Any + +from pydantic import BaseModel, Field + + +class ChannelType(str, Enum): + WECHAT = "wechat" + DOUYIN = "douyin" + JD = "jd" + + +class Role(str, Enum): + USER = "user" + ASSISTANT = "assistant" + + +class ChatMessage(BaseModel): + role: Role = Field(..., description="Message role: user or assistant") + content: str = Field(..., description="Message content") + + +class ChatRequest(BaseModel): + session_id: str = Field(..., alias="sessionId", description="Session ID for conversation tracking") + current_message: str = Field(..., alias="currentMessage", description="Current user message") + channel_type: ChannelType = Field(..., alias="channelType", description="Channel type: wechat, douyin, jd") + history: list[ChatMessage] | None = Field(default=None, description="Optional conversation history") + metadata: dict[str, Any] | None = Field(default=None, description="Optional metadata") + + model_config = {"populate_by_name": True} + + +class ChatResponse(BaseModel): + reply: str = Field(..., description="AI generated reply content") + confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score between 0.0 and 1.0") + should_transfer: bool = Field(..., alias="shouldTransfer", description="Whether to suggest transfer to human agent") + transfer_reason: str | None = Field(default=None, alias="transferReason", description="Reason for transfer suggestion") + metadata: dict[str, Any] | None = Field(default=None, description="Response metadata") + + model_config = {"populate_by_name": True} + + +class ErrorCode(str, Enum): + INVALID_REQUEST = "INVALID_REQUEST" + MISSING_TENANT_ID = "MISSING_TENANT_ID" + INVALID_TENANT_ID = "INVALID_TENANT_ID" + INTERNAL_ERROR = "INTERNAL_ERROR" + SERVICE_UNAVAILABLE = "SERVICE_UNAVAILABLE" + TIMEOUT = "TIMEOUT" + LLM_ERROR = "LLM_ERROR" + RETRIEVAL_ERROR = "RETRIEVAL_ERROR" + + +class ErrorResponse(BaseModel): + code: str = Field(..., description="Error code") + message: str = Field(..., description="Error message") + details: list[dict[str, Any]] | None = Field(default=None, description="Detailed error information") + + +class SSEEventType(str, Enum): + MESSAGE = "message" + FINAL = "final" + ERROR = "error" + + +class SSEMessageEvent(BaseModel): + delta: str = Field(..., description="Incremental text content") + + +class SSEFinalEvent(BaseModel): + reply: str = Field(..., description="Complete AI reply") + confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score") + should_transfer: bool = Field(..., alias="shouldTransfer", description="Transfer suggestion") + transfer_reason: str | None = Field(default=None, alias="transferReason", description="Transfer reason") + metadata: dict[str, Any] | None = Field(default=None, description="Response metadata") + + model_config = {"populate_by_name": True} + + +class SSEErrorEvent(BaseModel): + code: str = Field(..., description="Error code") + message: str = Field(..., description="Error message") + details: list[dict[str, Any]] | None = Field(default=None, description="Error details") diff --git a/ai-service/app/models/entities.py b/ai-service/app/models/entities.py new file mode 100644 index 0000000..cf41595 --- /dev/null +++ b/ai-service/app/models/entities.py @@ -0,0 +1,200 @@ +""" +Memory layer entities for AI Service. +[AC-AISVC-13] SQLModel entities for chat sessions and messages with tenant isolation. +""" + +import uuid +from datetime import datetime +from enum import Enum +from typing import Any + +from sqlalchemy import Column, JSON +from sqlmodel import Field, Index, SQLModel + + +class ChatSession(SQLModel, table=True): + """ + [AC-AISVC-13] Chat session entity with tenant isolation. + Primary key: (tenant_id, session_id) composite unique constraint. + """ + + __tablename__ = "chat_sessions" + __table_args__ = ( + Index("ix_chat_sessions_tenant_session", "tenant_id", "session_id", unique=True), + Index("ix_chat_sessions_tenant_id", "tenant_id"), + ) + + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + tenant_id: str = Field(..., description="Tenant ID for multi-tenant isolation", index=True) + session_id: str = Field(..., description="Session ID for conversation tracking") + channel_type: str | None = Field(default=None, description="Channel type: wechat, douyin, jd") + metadata_: dict[str, Any] | None = Field( + default=None, + sa_column=Column("metadata", JSON, nullable=True), + description="Session metadata" + ) + created_at: datetime = Field(default_factory=datetime.utcnow, description="Session creation time") + updated_at: datetime = Field(default_factory=datetime.utcnow, description="Last update time") + + +class ChatMessage(SQLModel, table=True): + """ + [AC-AISVC-13] Chat message entity with tenant isolation. + Messages are scoped by (tenant_id, session_id) for multi-tenant security. + """ + + __tablename__ = "chat_messages" + __table_args__ = ( + Index("ix_chat_messages_tenant_session", "tenant_id", "session_id"), + Index("ix_chat_messages_tenant_session_created", "tenant_id", "session_id", "created_at"), + ) + + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + tenant_id: str = Field(..., description="Tenant ID for multi-tenant isolation", index=True) + session_id: str = Field(..., description="Session ID for conversation tracking", index=True) + role: str = Field(..., description="Message role: user or assistant") + content: str = Field(..., description="Message content") + prompt_tokens: int | None = Field(default=None, description="Number of prompt tokens used") + completion_tokens: int | None = Field(default=None, description="Number of completion tokens used") + total_tokens: int | None = Field(default=None, description="Total tokens used") + latency_ms: int | None = Field(default=None, description="Response latency in milliseconds") + first_token_ms: int | None = Field(default=None, description="Time to first token in milliseconds (for streaming)") + is_error: bool = Field(default=False, description="Whether this message is an error response") + error_message: str | None = Field(default=None, description="Error message if any") + created_at: datetime = Field(default_factory=datetime.utcnow, description="Message creation time") + + +class ChatSessionCreate(SQLModel): + """Schema for creating a new chat session.""" + + tenant_id: str + session_id: str + channel_type: str | None = None + metadata_: dict[str, Any] | None = None + + +class ChatMessageCreate(SQLModel): + """Schema for creating a new chat message.""" + + tenant_id: str + session_id: str + role: str + content: str + + +class DocumentStatus(str, Enum): + PENDING = "pending" + PROCESSING = "processing" + COMPLETED = "completed" + FAILED = "failed" + + +class IndexJobStatus(str, Enum): + PENDING = "pending" + PROCESSING = "processing" + COMPLETED = "completed" + FAILED = "failed" + + +class SessionStatus(str, Enum): + ACTIVE = "active" + CLOSED = "closed" + EXPIRED = "expired" + + +class Tenant(SQLModel, table=True): + """ + [AC-AISVC-10] Tenant entity for storing tenant information. + Tenant ID format: name@ash@year (e.g., szmp@ash@2026) + """ + + __tablename__ = "tenants" + + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + tenant_id: str = Field(..., description="Full tenant ID (format: name@ash@year)", unique=True, index=True) + name: str = Field(..., description="Tenant display name (first part of tenant_id)") + year: str = Field(..., description="Year part from tenant_id") + created_at: datetime = Field(default_factory=datetime.utcnow, description="Creation time") + updated_at: datetime = Field(default_factory=datetime.utcnow, description="Last update time") + + +class KnowledgeBase(SQLModel, table=True): + """ + [AC-ASA-01] Knowledge base entity with tenant isolation. + """ + + __tablename__ = "knowledge_bases" + __table_args__ = ( + Index("ix_knowledge_bases_tenant_id", "tenant_id"), + ) + + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + tenant_id: str = Field(..., description="Tenant ID for multi-tenant isolation", index=True) + name: str = Field(..., description="Knowledge base name") + description: str | None = Field(default=None, description="Knowledge base description") + created_at: datetime = Field(default_factory=datetime.utcnow, description="Creation time") + updated_at: datetime = Field(default_factory=datetime.utcnow, description="Last update time") + + +class Document(SQLModel, table=True): + """ + [AC-ASA-01, AC-ASA-08] Document entity with tenant isolation. + """ + + __tablename__ = "documents" + __table_args__ = ( + Index("ix_documents_tenant_kb", "tenant_id", "kb_id"), + Index("ix_documents_tenant_status", "tenant_id", "status"), + ) + + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + tenant_id: str = Field(..., description="Tenant ID for multi-tenant isolation", index=True) + kb_id: str = Field(..., description="Knowledge base ID") + file_name: str = Field(..., description="Original file name") + file_path: str | None = Field(default=None, description="Storage path") + file_size: int | None = Field(default=None, description="File size in bytes") + file_type: str | None = Field(default=None, description="File MIME type") + status: str = Field(default=DocumentStatus.PENDING.value, description="Document status") + error_msg: str | None = Field(default=None, description="Error message if failed") + created_at: datetime = Field(default_factory=datetime.utcnow, description="Upload time") + updated_at: datetime = Field(default_factory=datetime.utcnow, description="Last update time") + + +class IndexJob(SQLModel, table=True): + """ + [AC-ASA-02] Index job entity for tracking document indexing progress. + """ + + __tablename__ = "index_jobs" + __table_args__ = ( + Index("ix_index_jobs_tenant_doc", "tenant_id", "doc_id"), + Index("ix_index_jobs_tenant_status", "tenant_id", "status"), + ) + + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + tenant_id: str = Field(..., description="Tenant ID for multi-tenant isolation", index=True) + doc_id: uuid.UUID = Field(..., description="Document ID being indexed") + status: str = Field(default=IndexJobStatus.PENDING.value, description="Job status") + progress: int = Field(default=0, ge=0, le=100, description="Progress percentage") + error_msg: str | None = Field(default=None, description="Error message if failed") + created_at: datetime = Field(default_factory=datetime.utcnow, description="Job creation time") + updated_at: datetime = Field(default_factory=datetime.utcnow, description="Last update time") + + +class KnowledgeBaseCreate(SQLModel): + """Schema for creating a new knowledge base.""" + + tenant_id: str + name: str + description: str | None = None + + +class DocumentCreate(SQLModel): + """Schema for creating a new document.""" + + tenant_id: str + kb_id: str + file_name: str + file_path: str | None = None + file_size: int | None = None + file_type: str | None = None diff --git a/ai-service/app/services/__init__.py b/ai-service/app/services/__init__.py new file mode 100644 index 0000000..22c50c7 --- /dev/null +++ b/ai-service/app/services/__init__.py @@ -0,0 +1,9 @@ +""" +Services module for AI Service. +[AC-AISVC-13, AC-AISVC-16] Core services for memory and retrieval. +""" + +from app.services.memory import MemoryService +from app.services.orchestrator import OrchestratorService, get_orchestrator_service + +__all__ = ["MemoryService", "OrchestratorService", "get_orchestrator_service"] diff --git a/ai-service/app/services/confidence.py b/ai-service/app/services/confidence.py new file mode 100644 index 0000000..8cfdc25 --- /dev/null +++ b/ai-service/app/services/confidence.py @@ -0,0 +1,224 @@ +""" +Confidence calculation for AI Service. +[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Confidence scoring and transfer suggestion logic. + +Design reference: design.md Section 4.3 - 检索不中兜底与置信度策略 +- Retrieval insufficiency detection +- Confidence calculation based on retrieval scores +- shouldTransfer logic with threshold T_low +""" + +import logging +from dataclasses import dataclass, field +from typing import Any + +from app.core.config import get_settings +from app.services.retrieval.base import RetrievalResult + +logger = logging.getLogger(__name__) + + +@dataclass +class ConfidenceConfig: + """ + Configuration for confidence calculation. + [AC-AISVC-17, AC-AISVC-18] Configurable thresholds. + """ + score_threshold: float = 0.7 + min_hits: int = 1 + confidence_low_threshold: float = 0.5 + confidence_high_threshold: float = 0.8 + insufficient_penalty: float = 0.3 + max_evidence_tokens: int = 2000 + + +@dataclass +class ConfidenceResult: + """ + Result of confidence calculation. + [AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Contains confidence and transfer suggestion. + """ + confidence: float + should_transfer: bool + transfer_reason: str | None = None + is_retrieval_insufficient: bool = False + diagnostics: dict[str, Any] = field(default_factory=dict) + + +class ConfidenceCalculator: + """ + [AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Calculator for response confidence. + + Design reference: design.md Section 4.3 + - MVP: confidence based on RAG retrieval scores + - Insufficient retrieval triggers confidence downgrade + - shouldTransfer when confidence < T_low + """ + + def __init__(self, config: ConfidenceConfig | None = None): + settings = get_settings() + self._config = config or ConfidenceConfig( + score_threshold=getattr(settings, "rag_score_threshold", 0.7), + min_hits=getattr(settings, "rag_min_hits", 1), + confidence_low_threshold=getattr(settings, "confidence_low_threshold", 0.5), + confidence_high_threshold=getattr(settings, "confidence_high_threshold", 0.8), + insufficient_penalty=getattr(settings, "confidence_insufficient_penalty", 0.3), + max_evidence_tokens=getattr(settings, "rag_max_evidence_tokens", 2000), + ) + + def is_retrieval_insufficient( + self, + retrieval_result: RetrievalResult, + evidence_tokens: int | None = None, + ) -> tuple[bool, str]: + """ + [AC-AISVC-17] Determine if retrieval results are insufficient. + + Conditions for insufficiency: + 1. hits.size < min_hits + 2. max(score) < score_threshold + 3. evidence tokens exceed limit (optional) + + Args: + retrieval_result: Result from retrieval operation + evidence_tokens: Optional token count for evidence + + Returns: + Tuple of (is_insufficient, reason) + """ + reasons = [] + + if retrieval_result.hit_count < self._config.min_hits: + reasons.append( + f"hit_count({retrieval_result.hit_count}) < min_hits({self._config.min_hits})" + ) + + if retrieval_result.max_score < self._config.score_threshold: + reasons.append( + f"max_score({retrieval_result.max_score:.3f}) < threshold({self._config.score_threshold})" + ) + + if evidence_tokens is not None and evidence_tokens > self._config.max_evidence_tokens: + reasons.append( + f"evidence_tokens({evidence_tokens}) > max({self._config.max_evidence_tokens})" + ) + + is_insufficient = len(reasons) > 0 + reason = "; ".join(reasons) if reasons else "sufficient" + + return is_insufficient, reason + + def calculate_confidence( + self, + retrieval_result: RetrievalResult, + evidence_tokens: int | None = None, + additional_factors: dict[str, float] | None = None, + ) -> ConfidenceResult: + """ + [AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Calculate confidence and transfer suggestion. + + MVP Strategy: + 1. Base confidence from max retrieval score + 2. Adjust for hit count (more hits = higher confidence) + 3. Penalize if retrieval is insufficient + 4. Determine shouldTransfer based on T_low threshold + + Args: + retrieval_result: Result from retrieval operation + evidence_tokens: Optional token count for evidence + additional_factors: Optional additional confidence factors + + Returns: + ConfidenceResult with confidence and transfer suggestion + """ + is_insufficient, insufficiency_reason = self.is_retrieval_insufficient( + retrieval_result, evidence_tokens + ) + + base_confidence = retrieval_result.max_score + + hit_count_factor = min(1.0, retrieval_result.hit_count / 5.0) + confidence = base_confidence * 0.7 + hit_count_factor * 0.3 + + if is_insufficient: + confidence -= self._config.insufficient_penalty + logger.info( + f"[AC-AISVC-17] Retrieval insufficient: {insufficiency_reason}, " + f"applying penalty -{self._config.insufficient_penalty}" + ) + + if additional_factors: + for factor_name, factor_value in additional_factors.items(): + confidence += factor_value * 0.1 + + confidence = max(0.0, min(1.0, confidence)) + + should_transfer = confidence < self._config.confidence_low_threshold + transfer_reason = None + + if should_transfer: + if is_insufficient: + transfer_reason = "检索结果不足,无法提供高置信度回答" + else: + transfer_reason = "置信度低于阈值,建议转人工" + elif confidence < self._config.confidence_high_threshold and is_insufficient: + transfer_reason = "检索结果有限,回答可能不够准确" + + diagnostics = { + "base_confidence": base_confidence, + "hit_count": retrieval_result.hit_count, + "max_score": retrieval_result.max_score, + "is_insufficient": is_insufficient, + "insufficiency_reason": insufficiency_reason if is_insufficient else None, + "penalty_applied": self._config.insufficient_penalty if is_insufficient else 0.0, + "threshold_low": self._config.confidence_low_threshold, + "threshold_high": self._config.confidence_high_threshold, + } + + logger.info( + f"[AC-AISVC-17, AC-AISVC-18] Confidence calculated: " + f"{confidence:.3f}, should_transfer={should_transfer}, " + f"insufficient={is_insufficient}" + ) + + return ConfidenceResult( + confidence=round(confidence, 3), + should_transfer=should_transfer, + transfer_reason=transfer_reason, + is_retrieval_insufficient=is_insufficient, + diagnostics=diagnostics, + ) + + def calculate_confidence_no_retrieval(self) -> ConfidenceResult: + """ + [AC-AISVC-17] Calculate confidence when no retrieval was performed. + + Returns a low confidence result suggesting transfer. + """ + return ConfidenceResult( + confidence=0.3, + should_transfer=True, + transfer_reason="未进行知识库检索,建议转人工", + is_retrieval_insufficient=True, + diagnostics={ + "base_confidence": 0.0, + "hit_count": 0, + "max_score": 0.0, + "is_insufficient": True, + "insufficiency_reason": "no_retrieval", + "penalty_applied": 0.0, + "threshold_low": self._config.confidence_low_threshold, + "threshold_high": self._config.confidence_high_threshold, + }, + ) + + +_confidence_calculator: ConfidenceCalculator | None = None + + +def get_confidence_calculator() -> ConfidenceCalculator: + """Get or create confidence calculator instance.""" + global _confidence_calculator + if _confidence_calculator is None: + _confidence_calculator = ConfidenceCalculator() + return _confidence_calculator diff --git a/ai-service/app/services/context.py b/ai-service/app/services/context.py new file mode 100644 index 0000000..598a6c4 --- /dev/null +++ b/ai-service/app/services/context.py @@ -0,0 +1,245 @@ +""" +Context management utilities for AI Service. +[AC-AISVC-14, AC-AISVC-15] Context merging and truncation strategies. + +Design reference: design.md Section 7 - 上下文合并规则 +- H_local: Memory layer history (sorted by time) +- H_ext: External history from Java request (in passed order) +- Deduplication: fingerprint = hash(role + "|" + normalized(content)) +- Truncation: Keep most recent N messages within token budget +""" + +import hashlib +import logging +from dataclasses import dataclass, field +from typing import Any + +import tiktoken + +from app.core.config import get_settings +from app.models import ChatMessage, Role + +logger = logging.getLogger(__name__) + + +@dataclass +class MergedContext: + """ + Result of context merging. + [AC-AISVC-14, AC-AISVC-15] Contains merged messages and diagnostics. + """ + messages: list[dict[str, str]] = field(default_factory=list) + total_tokens: int = 0 + local_count: int = 0 + external_count: int = 0 + duplicates_skipped: int = 0 + truncated_count: int = 0 + diagnostics: list[dict[str, Any]] = field(default_factory=list) + + +class ContextMerger: + """ + [AC-AISVC-14, AC-AISVC-15] Context merger for combining local and external history. + + Design reference: design.md Section 7 + - Deduplication based on message fingerprint + - Priority: local history takes precedence + - Token-based truncation using tiktoken + """ + + def __init__( + self, + max_history_tokens: int | None = None, + encoding_name: str = "cl100k_base", + ): + settings = get_settings() + self._max_history_tokens = max_history_tokens or 4096 + self._encoding = tiktoken.get_encoding(encoding_name) + + def compute_fingerprint(self, role: str, content: str) -> str: + """ + Compute message fingerprint for deduplication. + [AC-AISVC-15] fingerprint = hash(role + "|" + normalized(content)) + + Args: + role: Message role (user/assistant) + content: Message content + + Returns: + SHA256 hash of the normalized message + """ + normalized_content = content.strip() + fingerprint_input = f"{role}|{normalized_content}" + return hashlib.sha256(fingerprint_input.encode("utf-8")).hexdigest() + + def _message_to_dict(self, message: ChatMessage | dict[str, str]) -> dict[str, str]: + """Convert ChatMessage or dict to standard dict format.""" + if isinstance(message, ChatMessage): + return {"role": message.role.value, "content": message.content} + return message + + def _count_tokens(self, messages: list[dict[str, str]]) -> int: + """ + Count total tokens in messages using tiktoken. + [AC-AISVC-14] Token counting for history truncation. + """ + total = 0 + for msg in messages: + total += len(self._encoding.encode(msg.get("role", ""))) + total += len(self._encoding.encode(msg.get("content", ""))) + total += 4 # Approximate overhead for message structure + return total + + def merge_context( + self, + local_history: list[ChatMessage] | list[dict[str, str]] | None, + external_history: list[ChatMessage] | list[dict[str, str]] | None, + ) -> MergedContext: + """ + Merge local and external history with deduplication. + [AC-AISVC-14, AC-AISVC-15] Implements context merging strategy. + + Design reference: design.md Section 7.2 + 1. Build seen set from H_local + 2. Traverse H_ext, append if fingerprint not seen + 3. Local history takes priority + + Args: + local_history: History from Memory layer (H_local) + external_history: History from Java request (H_ext) + + Returns: + MergedContext with merged messages and diagnostics + """ + result = MergedContext() + seen_fingerprints: set[str] = set() + merged_messages: list[dict[str, str]] = [] + diagnostics: list[dict[str, Any]] = [] + + local_messages = [self._message_to_dict(m) for m in (local_history or [])] + external_messages = [self._message_to_dict(m) for m in (external_history or [])] + + for msg in local_messages: + fingerprint = self.compute_fingerprint(msg["role"], msg["content"]) + seen_fingerprints.add(fingerprint) + merged_messages.append(msg) + result.local_count += 1 + + for msg in external_messages: + fingerprint = self.compute_fingerprint(msg["role"], msg["content"]) + if fingerprint not in seen_fingerprints: + seen_fingerprints.add(fingerprint) + merged_messages.append(msg) + result.external_count += 1 + else: + result.duplicates_skipped += 1 + diagnostics.append({ + "type": "duplicate_skipped", + "role": msg["role"], + "content_preview": msg["content"][:50] + "..." if len(msg["content"]) > 50 else msg["content"], + }) + + result.messages = merged_messages + result.diagnostics = diagnostics + result.total_tokens = self._count_tokens(merged_messages) + + logger.info( + f"[AC-AISVC-14, AC-AISVC-15] Context merged: " + f"local={result.local_count}, external={result.external_count}, " + f"duplicates_skipped={result.duplicates_skipped}, " + f"total_tokens={result.total_tokens}" + ) + + return result + + def truncate_context( + self, + messages: list[dict[str, str]], + max_tokens: int | None = None, + ) -> tuple[list[dict[str, str]], int]: + """ + Truncate context to fit within token budget. + [AC-AISVC-14] Keep most recent N messages within budget. + + Design reference: design.md Section 7.4 + - Budget = maxHistoryTokens (configurable) + - Strategy: Keep most recent messages (from tail backward) + + Args: + messages: List of messages to truncate + max_tokens: Maximum token budget (uses default if not provided) + + Returns: + Tuple of (truncated messages, truncated count) + """ + budget = max_tokens or self._max_history_tokens + if not messages: + return [], 0 + + total_tokens = self._count_tokens(messages) + if total_tokens <= budget: + return messages, 0 + + truncated_messages: list[dict[str, str]] = [] + current_tokens = 0 + truncated_count = 0 + + for msg in reversed(messages): + msg_tokens = len(self._encoding.encode(msg.get("role", ""))) + msg_tokens += len(self._encoding.encode(msg.get("content", ""))) + msg_tokens += 4 + + if current_tokens + msg_tokens <= budget: + truncated_messages.insert(0, msg) + current_tokens += msg_tokens + else: + truncated_count += 1 + + logger.info( + f"[AC-AISVC-14] Context truncated: " + f"original={len(messages)}, truncated={len(truncated_messages)}, " + f"removed={truncated_count}, tokens={current_tokens}/{budget}" + ) + + return truncated_messages, truncated_count + + def merge_and_truncate( + self, + local_history: list[ChatMessage] | list[dict[str, str]] | None, + external_history: list[ChatMessage] | list[dict[str, str]] | None, + max_tokens: int | None = None, + ) -> MergedContext: + """ + Merge and truncate context in one operation. + [AC-AISVC-14, AC-AISVC-15] Complete context preparation pipeline. + + Args: + local_history: History from Memory layer (H_local) + external_history: History from Java request (H_ext) + max_tokens: Maximum token budget + + Returns: + MergedContext with final messages after merge and truncate + """ + merged = self.merge_context(local_history, external_history) + + truncated_messages, truncated_count = self.truncate_context( + merged.messages, max_tokens + ) + + merged.messages = truncated_messages + merged.truncated_count = truncated_count + merged.total_tokens = self._count_tokens(truncated_messages) + + return merged + + +_context_merger: ContextMerger | None = None + + +def get_context_merger() -> ContextMerger: + """Get or create context merger instance.""" + global _context_merger + if _context_merger is None: + _context_merger = ContextMerger() + return _context_merger diff --git a/ai-service/app/services/document/__init__.py b/ai-service/app/services/document/__init__.py new file mode 100644 index 0000000..2a1aa36 --- /dev/null +++ b/ai-service/app/services/document/__init__.py @@ -0,0 +1,38 @@ +""" +Document parsing services package. +[AC-AISVC-33] Provides document parsers for various formats. +""" + +from app.services.document.base import ( + DocumentParseException, + DocumentParser, + PageText, + ParseResult, + UnsupportedFormatError, +) +from app.services.document.excel_parser import CSVParser, ExcelParser +from app.services.document.factory import ( + DocumentParserFactory, + get_supported_document_formats, + parse_document, +) +from app.services.document.pdf_parser import PDFParser, PDFPlumberParser +from app.services.document.text_parser import TextParser +from app.services.document.word_parser import WordParser + +__all__ = [ + "DocumentParseException", + "DocumentParser", + "PageText", + "ParseResult", + "UnsupportedFormatError", + "DocumentParserFactory", + "get_supported_document_formats", + "parse_document", + "PDFParser", + "PDFPlumberParser", + "WordParser", + "ExcelParser", + "CSVParser", + "TextParser", +] diff --git a/ai-service/app/services/document/base.py b/ai-service/app/services/document/base.py new file mode 100644 index 0000000..4eee6e8 --- /dev/null +++ b/ai-service/app/services/document/base.py @@ -0,0 +1,116 @@ +""" +Base document parser interface. +[AC-AISVC-33] Abstract interface for document parsers. + +Design reference: progress.md Section 7.2 - DocumentParser interface +- parse(file_path) -> str +- get_supported_extensions() -> list[str] +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + + +@dataclass +class PageText: + """ + Text content from a single page. + """ + page: int + text: str + + +@dataclass +class ParseResult: + """ + Result from document parsing. + [AC-AISVC-33] Contains parsed text and metadata. + """ + text: str + source_path: str + file_size: int + page_count: int | None = None + metadata: dict[str, Any] = field(default_factory=dict) + pages: list[PageText] = field(default_factory=list) + + +class DocumentParser(ABC): + """ + Abstract base class for document parsers. + [AC-AISVC-33] Provides unified interface for different document formats. + """ + + @abstractmethod + def parse(self, file_path: str | Path) -> ParseResult: + """ + Parse a document and extract text content. + [AC-AISVC-33] Returns parsed text content. + + Args: + file_path: Path to the document file. + + Returns: + ParseResult with extracted text and metadata. + + Raises: + DocumentParseException: If parsing fails. + """ + pass + + @abstractmethod + def get_supported_extensions(self) -> list[str]: + """ + Get list of supported file extensions. + [AC-AISVC-37] Returns supported format list. + + Returns: + List of file extensions (e.g., [".pdf", ".txt"]) + """ + pass + + def supports_extension(self, extension: str) -> bool: + """ + Check if this parser supports a given file extension. + [AC-AISVC-37] Validates file format support. + + Args: + extension: File extension to check. + + Returns: + True if extension is supported. + """ + normalized = extension.lower() + if not normalized.startswith("."): + normalized = f".{normalized}" + return normalized in self.get_supported_extensions() + + +class DocumentParseException(Exception): + """Exception raised when document parsing fails.""" + + def __init__( + self, + message: str, + file_path: str = "", + parser: str = "", + details: dict[str, Any] | None = None + ): + self.file_path = file_path + self.parser = parser + self.details = details or {} + super().__init__(f"[{parser}] {message}" if parser else message) + + +class UnsupportedFormatError(DocumentParseException): + """Exception raised when file format is not supported.""" + + def __init__(self, extension: str, supported: list[str]): + super().__init__( + f"Unsupported file format: {extension}. " + f"Supported formats: {', '.join(supported)}", + parser="format_checker" + ) + self.extension = extension + self.supported_formats = supported diff --git a/ai-service/app/services/document/excel_parser.py b/ai-service/app/services/document/excel_parser.py new file mode 100644 index 0000000..449e7c2 --- /dev/null +++ b/ai-service/app/services/document/excel_parser.py @@ -0,0 +1,273 @@ +""" +Excel document parser implementation. +[AC-AISVC-35] Excel (.xlsx) parsing using openpyxl. + +Extracts text content from Excel spreadsheets and converts to JSON format +to preserve structural relationships for better RAG retrieval. +""" + +import json +import logging +from pathlib import Path +from typing import Any + +from app.services.document.base import ( + DocumentParseException, + DocumentParser, + ParseResult, +) + +logger = logging.getLogger(__name__) + + +class ExcelParser(DocumentParser): + """ + Parser for Excel documents. + [AC-AISVC-35] Uses openpyxl for text extraction. + Converts spreadsheet data to JSON format to preserve structure. + """ + + def __init__( + self, + include_empty_cells: bool = False, + max_rows_per_sheet: int = 10000, + **kwargs: Any + ): + self._include_empty_cells = include_empty_cells + self._max_rows_per_sheet = max_rows_per_sheet + self._extra_config = kwargs + self._openpyxl = None + + def _get_openpyxl(self): + """Lazy import of openpyxl.""" + if self._openpyxl is None: + try: + import openpyxl + self._openpyxl = openpyxl + except ImportError: + raise DocumentParseException( + "openpyxl not installed. Install with: pip install openpyxl", + parser="excel" + ) + return self._openpyxl + + def _sheet_to_records(self, sheet, sheet_name: str) -> list[dict[str, Any]]: + """ + Convert a worksheet to a list of record dictionaries. + First row is treated as header (column names). + """ + records = [] + rows = list(sheet.iter_rows(max_row=self._max_rows_per_sheet, values_only=True)) + + if not rows: + return records + + headers = rows[0] + header_list = [str(h) if h is not None else f"column_{i}" for i, h in enumerate(headers)] + + for row in rows[1:]: + record = {"_sheet": sheet_name} + has_content = False + + for i, value in enumerate(row): + if i < len(header_list): + key = header_list[i] + else: + key = f"column_{i}" + + if value is not None: + has_content = True + if isinstance(value, (int, float, bool)): + record[key] = value + else: + record[key] = str(value) + elif self._include_empty_cells: + record[key] = None + + if has_content or self._include_empty_cells: + records.append(record) + + return records + + def parse(self, file_path: str | Path) -> ParseResult: + """ + Parse an Excel document and extract text content as JSON. + [AC-AISVC-35] Converts spreadsheet data to JSON format. + """ + path = Path(file_path) + + if not path.exists(): + raise DocumentParseException( + f"File not found: {path}", + file_path=str(path), + parser="excel" + ) + + if not self.supports_extension(path.suffix): + raise DocumentParseException( + f"Unsupported file extension: {path.suffix}", + file_path=str(path), + parser="excel" + ) + + openpyxl = self._get_openpyxl() + + try: + workbook = openpyxl.load_workbook(path, read_only=True, data_only=True) + + all_records: list[dict[str, Any]] = [] + sheet_count = len(workbook.sheetnames) + total_rows = 0 + + for sheet_name in workbook.sheetnames: + sheet = workbook[sheet_name] + records = self._sheet_to_records(sheet, sheet_name) + all_records.extend(records) + total_rows += len(records) + + workbook.close() + + json_str = json.dumps(all_records, ensure_ascii=False, indent=2) + file_size = path.stat().st_size + + logger.info( + f"Parsed Excel (JSON): {path.name}, sheets={sheet_count}, " + f"rows={total_rows}, chars={len(json_str)}, size={file_size}" + ) + + return ParseResult( + text=json_str, + source_path=str(path), + file_size=file_size, + metadata={ + "format": "xlsx", + "output_format": "json", + "sheet_count": sheet_count, + "total_rows": total_rows, + } + ) + + except DocumentParseException: + raise + except Exception as e: + raise DocumentParseException( + f"Failed to parse Excel document: {e}", + file_path=str(path), + parser="excel", + details={"error": str(e)} + ) + + def get_supported_extensions(self) -> list[str]: + """Get supported file extensions.""" + return [".xlsx", ".xls"] + + +class CSVParser(DocumentParser): + """ + Parser for CSV files. + [AC-AISVC-35] Uses Python's built-in csv module. + Converts CSV data to JSON format to preserve structure. + """ + + def __init__(self, delimiter: str = ",", encoding: str = "utf-8", **kwargs: Any): + self._delimiter = delimiter + self._encoding = encoding + self._extra_config = kwargs + + def _parse_csv_to_records(self, path: Path, encoding: str) -> list[dict[str, Any]]: + """Parse CSV file and return list of record dictionaries.""" + import csv + + records = [] + + with open(path, "r", encoding=encoding, newline="") as f: + reader = csv.reader(f, delimiter=self._delimiter) + rows = list(reader) + + if not rows: + return records + + headers = rows[0] + header_list = [str(h) if h else f"column_{i}" for i, h in enumerate(headers)] + + for row in rows[1:]: + record = {} + has_content = False + + for i, value in enumerate(row): + if i < len(header_list): + key = header_list[i] + else: + key = f"column_{i}" + + if value: + has_content = True + record[key] = value + + if has_content: + records.append(record) + + return records + + def parse(self, file_path: str | Path) -> ParseResult: + """ + Parse a CSV file and extract text content as JSON. + [AC-AISVC-35] Converts CSV data to JSON format. + """ + path = Path(file_path) + + if not path.exists(): + raise DocumentParseException( + f"File not found: {path}", + file_path=str(path), + parser="csv" + ) + + try: + records = self._parse_csv_to_records(path, self._encoding) + row_count = len(records) + used_encoding = self._encoding + except UnicodeDecodeError: + try: + records = self._parse_csv_to_records(path, "gbk") + row_count = len(records) + used_encoding = "gbk" + except Exception as e: + raise DocumentParseException( + f"Failed to parse CSV with encoding fallback: {e}", + file_path=str(path), + parser="csv", + details={"error": str(e)} + ) + except Exception as e: + raise DocumentParseException( + f"Failed to parse CSV: {e}", + file_path=str(path), + parser="csv", + details={"error": str(e)} + ) + + json_str = json.dumps(records, ensure_ascii=False, indent=2) + file_size = path.stat().st_size + + logger.info( + f"Parsed CSV (JSON): {path.name}, rows={row_count}, " + f"chars={len(json_str)}, size={file_size}" + ) + + return ParseResult( + text=json_str, + source_path=str(path), + file_size=file_size, + metadata={ + "format": "csv", + "output_format": "json", + "row_count": row_count, + "delimiter": self._delimiter, + "encoding": used_encoding, + } + ) + + def get_supported_extensions(self) -> list[str]: + """Get supported file extensions.""" + return [".csv"] diff --git a/ai-service/app/services/document/factory.py b/ai-service/app/services/document/factory.py new file mode 100644 index 0000000..74d4b2b --- /dev/null +++ b/ai-service/app/services/document/factory.py @@ -0,0 +1,215 @@ +""" +Document parser factory. +[AC-AISVC-33, AC-AISVC-34, AC-AISVC-35] Factory for document parsers. + +Design reference: progress.md Section 7.2 - DocumentParserFactory +""" + +import logging +from pathlib import Path +from typing import Any, Type + +from app.services.document.base import ( + DocumentParser, + DocumentParseException, + ParseResult, + UnsupportedFormatError, +) +from app.services.document.excel_parser import CSVParser, ExcelParser +from app.services.document.pdf_parser import PDFParser, PDFPlumberParser +from app.services.document.text_parser import TextParser +from app.services.document.word_parser import WordParser + +logger = logging.getLogger(__name__) + + +class DocumentParserFactory: + """ + Factory for creating document parsers. + [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35] Auto-selects parser based on file extension. + """ + + _parsers: dict[str, Type[DocumentParser]] = {} + _extension_map: dict[str, str] = {} + + @classmethod + def _initialize(cls) -> None: + """Initialize default parsers.""" + if cls._parsers: + return + + cls._parsers = { + "pdf": PDFParser, + "pdfplumber": PDFPlumberParser, + "word": WordParser, + "excel": ExcelParser, + "csv": CSVParser, + "text": TextParser, + } + + cls._extension_map = { + ".pdf": "pdf", + ".docx": "word", + ".xlsx": "excel", + ".xls": "excel", + ".csv": "csv", + ".txt": "text", + ".md": "text", + ".markdown": "text", + ".rst": "text", + ".log": "text", + ".json": "text", + ".xml": "text", + ".yaml": "text", + ".yml": "text", + } + + @classmethod + def register_parser( + cls, + name: str, + parser_class: Type[DocumentParser], + extensions: list[str], + ) -> None: + """ + Register a new document parser. + [AC-AISVC-33] Allows runtime registration of parsers. + """ + cls._initialize() + cls._parsers[name] = parser_class + for ext in extensions: + cls._extension_map[ext.lower()] = name + logger.info(f"Registered document parser: {name} for extensions: {extensions}") + + @classmethod + def get_supported_extensions(cls) -> list[str]: + """ + Get all supported file extensions. + [AC-AISVC-37] Returns list of supported formats. + """ + cls._initialize() + return list(cls._extension_map.keys()) + + @classmethod + def get_parser_for_extension(cls, extension: str) -> DocumentParser: + """ + Get a parser instance for a file extension. + [AC-AISVC-33] Creates appropriate parser based on extension. + """ + cls._initialize() + + normalized = extension.lower() + if not normalized.startswith("."): + normalized = f".{normalized}" + + if normalized not in cls._extension_map: + raise UnsupportedFormatError(normalized, cls.get_supported_extensions()) + + parser_name = cls._extension_map[normalized] + parser_class = cls._parsers[parser_name] + + return parser_class() + + @classmethod + def parse_file( + cls, + file_path: str | Path, + parser_name: str | None = None, + parser_config: dict[str, Any] | None = None, + ) -> ParseResult: + """ + Parse a document file. + [AC-AISVC-33, AC-AISVC-34, AC-AISVC-35] Main entry point for parsing. + + Args: + file_path: Path to the document file + parser_name: Optional specific parser to use + parser_config: Optional configuration for the parser + + Returns: + ParseResult with extracted text and metadata + + Raises: + UnsupportedFormatError: If file format is not supported + DocumentParseException: If parsing fails + """ + cls._initialize() + + path = Path(file_path) + extension = path.suffix.lower() + + if parser_name: + if parser_name not in cls._parsers: + raise DocumentParseException( + f"Unknown parser: {parser_name}", + file_path=str(path), + parser="factory" + ) + parser_class = cls._parsers[parser_name] + parser = parser_class(**(parser_config or {})) + else: + parser = cls.get_parser_for_extension(extension) + if parser_config: + parser = type(parser)(**parser_config) + + return parser.parse(path) + + @classmethod + def get_parser_info(cls) -> list[dict[str, Any]]: + """ + Get information about available parsers. + [AC-AISVC-37] Returns parser metadata. + """ + cls._initialize() + + info = [] + for name, parser_class in cls._parsers.items(): + temp_instance = parser_class.__new__(parser_class) + extensions = temp_instance.get_supported_extensions() + + display_names = { + "pdf": "PDF 文档", + "pdfplumber": "PDF 文档 (pdfplumber)", + "word": "Word 文档", + "excel": "Excel 电子表格", + "csv": "CSV 文件", + "text": "文本文件", + } + + descriptions = { + "pdf": "使用 PyMuPDF 解析 PDF 文档,速度快", + "pdfplumber": "使用 pdfplumber 解析 PDF 文档,表格提取效果更好", + "word": "解析 Word 文档 (.docx),保留段落结构", + "excel": "解析 Excel 电子表格,支持多工作表", + "csv": "解析 CSV 文件,自动检测编码", + "text": "解析纯文本文件,支持多种编码", + } + + info.append({ + "name": name, + "display_name": display_names.get(name, name), + "description": descriptions.get(name, ""), + "extensions": extensions, + }) + + return info + + +def parse_document( + file_path: str | Path, + parser_name: str | None = None, + parser_config: dict[str, Any] | None = None, +) -> ParseResult: + """ + Convenience function for parsing documents. + [AC-AISVC-33] Simple entry point for document parsing. + """ + return DocumentParserFactory.parse_file(file_path, parser_name, parser_config) + + +def get_supported_document_formats() -> list[str]: + """ + Get list of supported document formats. + [AC-AISVC-37] Returns supported format extensions. + """ + return DocumentParserFactory.get_supported_extensions() diff --git a/ai-service/app/services/document/pdf_parser.py b/ai-service/app/services/document/pdf_parser.py new file mode 100644 index 0000000..7b160f3 --- /dev/null +++ b/ai-service/app/services/document/pdf_parser.py @@ -0,0 +1,229 @@ +""" +PDF document parser implementation. +[AC-AISVC-33] PDF parsing using PyMuPDF (fitz). + +Extracts text content from PDF files. +""" + +import logging +from pathlib import Path +from typing import Any + +from app.services.document.base import ( + DocumentParseException, + DocumentParser, + PageText, + ParseResult, +) + +logger = logging.getLogger(__name__) + + +class PDFParser(DocumentParser): + """ + Parser for PDF documents. + [AC-AISVC-33] Uses PyMuPDF for text extraction. + """ + + def __init__(self, extract_images: bool = False, **kwargs: Any): + self._extract_images = extract_images + self._extra_config = kwargs + self._fitz = None + + def _get_fitz(self): + """Lazy import of PyMuPDF.""" + if self._fitz is None: + try: + import fitz + self._fitz = fitz + except ImportError: + raise DocumentParseException( + "PyMuPDF (fitz) not installed. Install with: pip install pymupdf", + parser="pdf" + ) + return self._fitz + + def parse(self, file_path: str | Path) -> ParseResult: + """ + Parse a PDF document and extract text content. + [AC-AISVC-33] Extracts text from all pages. + """ + path = Path(file_path) + + if not path.exists(): + raise DocumentParseException( + f"File not found: {path}", + file_path=str(path), + parser="pdf" + ) + + if not self.supports_extension(path.suffix): + raise DocumentParseException( + f"Unsupported file extension: {path.suffix}", + file_path=str(path), + parser="pdf" + ) + + fitz = self._get_fitz() + + try: + doc = fitz.open(path) + + pages: list[PageText] = [] + text_parts = [] + page_count = len(doc) + + for page_num in range(page_count): + page = doc[page_num] + text = page.get_text().strip() + if text: + pages.append(PageText(page=page_num + 1, text=text)) + text_parts.append(f"[Page {page_num + 1}]\n{text}") + + doc.close() + + full_text = "\n\n".join(text_parts) + file_size = path.stat().st_size + + logger.info( + f"Parsed PDF: {path.name}, pages={page_count}, " + f"chars={len(full_text)}, size={file_size}" + ) + + return ParseResult( + text=full_text, + source_path=str(path), + file_size=file_size, + page_count=page_count, + metadata={ + "format": "pdf", + "page_count": page_count, + }, + pages=pages, + ) + + except DocumentParseException: + raise + except Exception as e: + raise DocumentParseException( + f"Failed to parse PDF: {e}", + file_path=str(path), + parser="pdf", + details={"error": str(e)} + ) + + def get_supported_extensions(self) -> list[str]: + """Get supported file extensions.""" + return [".pdf"] + + +class PDFPlumberParser(DocumentParser): + """ + Alternative PDF parser using pdfplumber. + [AC-AISVC-33] Uses pdfplumber for text extraction. + + pdfplumber is better for table extraction but slower than PyMuPDF. + """ + + def __init__(self, extract_tables: bool = True, **kwargs: Any): + self._extract_tables = extract_tables + self._extra_config = kwargs + self._pdfplumber = None + + def _get_pdfplumber(self): + """Lazy import of pdfplumber.""" + if self._pdfplumber is None: + try: + import pdfplumber + self._pdfplumber = pdfplumber + except ImportError: + raise DocumentParseException( + "pdfplumber not installed. Install with: pip install pdfplumber", + parser="pdfplumber" + ) + return self._pdfplumber + + def parse(self, file_path: str | Path) -> ParseResult: + """ + Parse a PDF document and extract text content. + [AC-AISVC-33] Extracts text and optionally tables. + """ + path = Path(file_path) + + if not path.exists(): + raise DocumentParseException( + f"File not found: {path}", + file_path=str(path), + parser="pdfplumber" + ) + + pdfplumber = self._get_pdfplumber() + + try: + pages: list[PageText] = [] + text_parts = [] + page_count = 0 + + with pdfplumber.open(path) as pdf: + page_count = len(pdf.pages) + + for page_num, page in enumerate(pdf.pages): + text = page.extract_text() or "" + + if self._extract_tables: + tables = page.extract_tables() + for table in tables: + table_text = self._format_table(table) + text += f"\n\n{table_text}" + + text = text.strip() + if text: + pages.append(PageText(page=page_num + 1, text=text)) + text_parts.append(f"[Page {page_num + 1}]\n{text}") + + full_text = "\n\n".join(text_parts) + file_size = path.stat().st_size + + logger.info( + f"Parsed PDF (pdfplumber): {path.name}, pages={page_count}, " + f"chars={len(full_text)}, size={file_size}" + ) + + return ParseResult( + text=full_text, + source_path=str(path), + file_size=file_size, + page_count=page_count, + metadata={ + "format": "pdf", + "parser": "pdfplumber", + "page_count": page_count, + }, + pages=pages, + ) + + except DocumentParseException: + raise + except Exception as e: + raise DocumentParseException( + f"Failed to parse PDF: {e}", + file_path=str(path), + parser="pdfplumber", + details={"error": str(e)} + ) + + def _format_table(self, table: list[list[str | None]]) -> str: + """Format a table as text.""" + if not table: + return "" + + lines = [] + for row in table: + cells = [str(cell) if cell else "" for cell in row] + lines.append(" | ".join(cells)) + + return "\n".join(lines) + + def get_supported_extensions(self) -> list[str]: + """Get supported file extensions.""" + return [".pdf"] diff --git a/ai-service/app/services/document/text_parser.py b/ai-service/app/services/document/text_parser.py new file mode 100644 index 0000000..551b712 --- /dev/null +++ b/ai-service/app/services/document/text_parser.py @@ -0,0 +1,99 @@ +""" +Text file parser implementation. +[AC-AISVC-33] Text file parsing for plain text and markdown. +""" + +import logging +from pathlib import Path +from typing import Any + +from app.services.document.base import ( + DocumentParseException, + DocumentParser, + ParseResult, +) + +logger = logging.getLogger(__name__) + +ENCODINGS_TO_TRY = ["utf-8", "gbk", "gb2312", "gb18030", "big5", "utf-16", "latin-1"] + + +class TextParser(DocumentParser): + """ + Parser for plain text files. + [AC-AISVC-33] Direct text extraction with multiple encoding support. + """ + + def __init__(self, encoding: str = "utf-8", **kwargs: Any): + self._encoding = encoding + self._extra_config = kwargs + + def _try_encodings(self, path: Path) -> tuple[str, str]: + """ + Try multiple encodings to read the file. + Returns: (text, encoding_used) + """ + for enc in ENCODINGS_TO_TRY: + try: + with open(path, "r", encoding=enc) as f: + text = f.read() + logger.info(f"Successfully parsed with encoding: {enc}") + return text, enc + except (UnicodeDecodeError, LookupError): + continue + + raise DocumentParseException( + f"Failed to decode file with any known encoding", + file_path=str(path), + parser="text" + ) + + def parse(self, file_path: str | Path) -> ParseResult: + """ + Parse a text file and extract content. + [AC-AISVC-33] Direct file reading. + """ + path = Path(file_path) + + if not path.exists(): + raise DocumentParseException( + f"File not found: {path}", + file_path=str(path), + parser="text" + ) + + try: + text, encoding_used = self._try_encodings(path) + + file_size = path.stat().st_size + line_count = text.count("\n") + 1 + + logger.info( + f"Parsed text: {path.name}, lines={line_count}, " + f"chars={len(text)}, size={file_size}, encoding={encoding_used}" + ) + + return ParseResult( + text=text, + source_path=str(path), + file_size=file_size, + metadata={ + "format": "text", + "line_count": line_count, + "encoding": encoding_used, + } + ) + + except DocumentParseException: + raise + except Exception as e: + raise DocumentParseException( + f"Failed to parse text file: {e}", + file_path=str(path), + parser="text", + details={"error": str(e)} + ) + + def get_supported_extensions(self) -> list[str]: + """Get supported file extensions.""" + return [".txt", ".md", ".markdown", ".rst", ".log", ".json", ".xml", ".yaml", ".yml"] diff --git a/ai-service/app/services/document/word_parser.py b/ai-service/app/services/document/word_parser.py new file mode 100644 index 0000000..c40e036 --- /dev/null +++ b/ai-service/app/services/document/word_parser.py @@ -0,0 +1,145 @@ +""" +Word document parser implementation. +[AC-AISVC-34] Word (.docx) parsing using python-docx. + +Extracts text content from Word documents. +""" + +import logging +from pathlib import Path +from typing import Any + +from app.services.document.base import ( + DocumentParseException, + DocumentParser, + ParseResult, +) + +logger = logging.getLogger(__name__) + + +class WordParser(DocumentParser): + """ + Parser for Word documents. + [AC-AISVC-34] Uses python-docx for text extraction. + """ + + def __init__(self, include_headers: bool = True, include_footers: bool = True, **kwargs: Any): + self._include_headers = include_headers + self._include_footers = include_footers + self._extra_config = kwargs + self._docx = None + + def _get_docx(self): + """Lazy import of python-docx.""" + if self._docx is None: + try: + from docx import Document + self._docx = Document + except ImportError: + raise DocumentParseException( + "python-docx not installed. Install with: pip install python-docx", + parser="word" + ) + return self._docx + + def parse(self, file_path: str | Path) -> ParseResult: + """ + Parse a Word document and extract text content. + [AC-AISVC-34] Extracts text while preserving paragraph structure. + """ + path = Path(file_path) + + if not path.exists(): + raise DocumentParseException( + f"File not found: {path}", + file_path=str(path), + parser="word" + ) + + if not self.supports_extension(path.suffix): + raise DocumentParseException( + f"Unsupported file extension: {path.suffix}", + file_path=str(path), + parser="word" + ) + + Document = self._get_docx() + + try: + doc = Document(path) + + text_parts = [] + + if self._include_headers: + for section in doc.sections: + header = section.header + if header and header.paragraphs: + header_text = "\n".join(p.text for p in header.paragraphs if p.text.strip()) + if header_text: + text_parts.append(f"[Header]\n{header_text}") + + for para in doc.paragraphs: + if para.text.strip(): + style_name = para.style.name if para.style else "" + if "Heading" in style_name: + text_parts.append(f"\n## {para.text}") + else: + text_parts.append(para.text) + + for table in doc.tables: + table_text = self._format_table(table) + if table_text.strip(): + text_parts.append(f"\n[Table]\n{table_text}") + + if self._include_footers: + for section in doc.sections: + footer = section.footer + if footer and footer.paragraphs: + footer_text = "\n".join(p.text for p in footer.paragraphs if p.text.strip()) + if footer_text: + text_parts.append(f"[Footer]\n{footer_text}") + + full_text = "\n\n".join(text_parts) + file_size = path.stat().st_size + + paragraph_count = len(doc.paragraphs) + table_count = len(doc.tables) + + logger.info( + f"Parsed Word: {path.name}, paragraphs={paragraph_count}, " + f"tables={table_count}, chars={len(full_text)}, size={file_size}" + ) + + return ParseResult( + text=full_text, + source_path=str(path), + file_size=file_size, + metadata={ + "format": "docx", + "paragraph_count": paragraph_count, + "table_count": table_count, + } + ) + + except DocumentParseException: + raise + except Exception as e: + raise DocumentParseException( + f"Failed to parse Word document: {e}", + file_path=str(path), + parser="word", + details={"error": str(e)} + ) + + def _format_table(self, table) -> str: + """Format a table as text.""" + lines = [] + for row in table.rows: + cells = [cell.text.strip() for cell in row.cells] + lines.append(" | ".join(cells)) + return "\n".join(lines) + + def get_supported_extensions(self) -> list[str]: + """Get supported file extensions.""" + return [".docx"] diff --git a/ai-service/app/services/embedding/__init__.py b/ai-service/app/services/embedding/__init__.py new file mode 100644 index 0000000..8fe5844 --- /dev/null +++ b/ai-service/app/services/embedding/__init__.py @@ -0,0 +1,40 @@ +""" +Embedding services package. +[AC-AISVC-29] Provides pluggable embedding providers. +""" + +from app.services.embedding.base import ( + EmbeddingConfig, + EmbeddingException, + EmbeddingProvider, + EmbeddingResult, +) +from app.services.embedding.factory import ( + EmbeddingConfigManager, + EmbeddingProviderFactory, + get_embedding_config_manager, + get_embedding_provider, +) +from app.services.embedding.ollama_provider import OllamaEmbeddingProvider +from app.services.embedding.openai_provider import OpenAIEmbeddingProvider +from app.services.embedding.nomic_provider import ( + NomicEmbeddingProvider, + NomicEmbeddingResult, + EmbeddingTask, +) + +__all__ = [ + "EmbeddingConfig", + "EmbeddingException", + "EmbeddingProvider", + "EmbeddingResult", + "EmbeddingConfigManager", + "EmbeddingProviderFactory", + "get_embedding_config_manager", + "get_embedding_provider", + "OllamaEmbeddingProvider", + "OpenAIEmbeddingProvider", + "NomicEmbeddingProvider", + "NomicEmbeddingResult", + "EmbeddingTask", +] diff --git a/ai-service/app/services/embedding/base.py b/ai-service/app/services/embedding/base.py new file mode 100644 index 0000000..cea4e49 --- /dev/null +++ b/ai-service/app/services/embedding/base.py @@ -0,0 +1,130 @@ +""" +Base embedding provider interface. +[AC-AISVC-29] Abstract interface for embedding providers. + +Design reference: progress.md Section 7.1 - EmbeddingProvider interface +- embed(text) -> list[float] +- embed_batch(texts) -> list[list[float]] +- get_dimension() -> int +- get_provider_name() -> str +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class EmbeddingConfig: + """ + Configuration for embedding provider. + [AC-AISVC-31] Supports configurable embedding parameters. + """ + dimension: int = 768 + batch_size: int = 32 + timeout_seconds: int = 60 + extra_params: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class EmbeddingResult: + """ + Result from embedding generation. + [AC-AISVC-29] Contains embedding vector and metadata. + """ + embedding: list[float] + dimension: int + model: str + latency_ms: float = 0.0 + metadata: dict[str, Any] = field(default_factory=dict) + + +class EmbeddingProvider(ABC): + """ + Abstract base class for embedding providers. + [AC-AISVC-29] Provides unified interface for different embedding providers. + + Design reference: progress.md Section 7.1 - Architecture + - OllamaEmbeddingProvider / OpenAIEmbeddingProvider can be swapped + - Factory pattern for dynamic loading + """ + + @abstractmethod + async def embed(self, text: str) -> list[float]: + """ + Generate embedding vector for a single text. + [AC-AISVC-29] Returns embedding vector. + + Args: + text: Input text to embed. + + Returns: + List of floats representing the embedding vector. + + Raises: + EmbeddingException: If embedding generation fails. + """ + pass + + @abstractmethod + async def embed_batch(self, texts: list[str]) -> list[list[float]]: + """ + Generate embedding vectors for multiple texts. + [AC-AISVC-29] Returns list of embedding vectors. + + Args: + texts: List of input texts to embed. + + Returns: + List of embedding vectors. + + Raises: + EmbeddingException: If embedding generation fails. + """ + pass + + @abstractmethod + def get_dimension(self) -> int: + """ + Get the dimension of embedding vectors. + [AC-AISVC-29] Returns vector dimension. + + Returns: + Integer dimension of embedding vectors. + """ + pass + + @abstractmethod + def get_provider_name(self) -> str: + """ + Get the name of this embedding provider. + [AC-AISVC-29] Returns provider identifier. + + Returns: + String identifier for this provider. + """ + pass + + @abstractmethod + def get_config_schema(self) -> dict[str, Any]: + """ + Get the configuration schema for this provider. + [AC-AISVC-38] Returns JSON Schema for configuration parameters. + + Returns: + Dict describing configuration parameters. + """ + pass + + async def close(self) -> None: + """Close the provider and release resources. Default no-op.""" + pass + + +class EmbeddingException(Exception): + """Exception raised when embedding generation fails.""" + + def __init__(self, message: str, provider: str = "", details: dict[str, Any] | None = None): + self.provider = provider + self.details = details or {} + super().__init__(f"[{provider}] {message}" if provider else message) diff --git a/ai-service/app/services/embedding/factory.py b/ai-service/app/services/embedding/factory.py new file mode 100644 index 0000000..e42e506 --- /dev/null +++ b/ai-service/app/services/embedding/factory.py @@ -0,0 +1,305 @@ +""" +Embedding provider factory and configuration manager. +[AC-AISVC-30, AC-AISVC-31] Factory pattern for dynamic provider loading. + +Design reference: progress.md Section 7.1 - Architecture +- EmbeddingProviderFactory: creates providers based on config +- EmbeddingConfigManager: manages configuration with hot-reload support +""" + +import logging +from typing import Any, Type + +from app.services.embedding.base import EmbeddingException, EmbeddingProvider +from app.services.embedding.ollama_provider import OllamaEmbeddingProvider +from app.services.embedding.openai_provider import OpenAIEmbeddingProvider +from app.services.embedding.nomic_provider import NomicEmbeddingProvider + +logger = logging.getLogger(__name__) + + +class EmbeddingProviderFactory: + """ + Factory for creating embedding providers. + [AC-AISVC-30] Supports dynamic loading based on configuration. + """ + + _providers: dict[str, Type[EmbeddingProvider]] = { + "ollama": OllamaEmbeddingProvider, + "openai": OpenAIEmbeddingProvider, + "nomic": NomicEmbeddingProvider, + } + + @classmethod + def register_provider(cls, name: str, provider_class: Type[EmbeddingProvider]) -> None: + """ + Register a new embedding provider. + [AC-AISVC-30] Allows runtime registration of providers. + """ + cls._providers[name] = provider_class + logger.info(f"Registered embedding provider: {name}") + + @classmethod + def get_available_providers(cls) -> list[str]: + """ + Get list of available provider names. + [AC-AISVC-38] Returns registered provider identifiers. + """ + return list(cls._providers.keys()) + + @classmethod + def get_provider_info(cls, name: str) -> dict[str, Any]: + """ + Get provider information including config schema. + [AC-AISVC-38] Returns provider metadata. + """ + if name not in cls._providers: + raise EmbeddingException( + f"Unknown provider: {name}", + provider="factory" + ) + + provider_class = cls._providers[name] + temp_instance = provider_class.__new__(provider_class) + + display_names = { + "ollama": "Ollama 本地模型", + "openai": "OpenAI Embedding", + "nomic": "Nomic Embed (优化版)", + } + + descriptions = { + "ollama": "使用 Ollama 运行的本地嵌入模型,支持 nomic-embed-text 等开源模型", + "openai": "使用 OpenAI 官方 Embedding API,支持 text-embedding-3 系列模型", + "nomic": "Nomic-embed-text v1.5 优化版,支持任务前缀和 Matryoshka 维度截断,专为RAG优化", + } + + return { + "name": name, + "display_name": display_names.get(name, name), + "description": descriptions.get(name, ""), + "config_schema": temp_instance.get_config_schema(), + } + + @classmethod + def create_provider( + cls, + name: str, + config: dict[str, Any], + ) -> EmbeddingProvider: + """ + Create an embedding provider instance. + [AC-AISVC-30] Creates provider based on configuration. + + Args: + name: Provider identifier (e.g., "ollama", "openai") + config: Provider-specific configuration + + Returns: + Configured EmbeddingProvider instance + + Raises: + EmbeddingException: If provider is unknown or configuration is invalid + """ + if name not in cls._providers: + raise EmbeddingException( + f"Unknown embedding provider: {name}. " + f"Available: {cls.get_available_providers()}", + provider="factory" + ) + + provider_class = cls._providers[name] + + try: + instance = provider_class(**config) + logger.info(f"Created embedding provider: {name}") + return instance + except Exception as e: + raise EmbeddingException( + f"Failed to create provider '{name}': {e}", + provider="factory", + details={"config": config} + ) + + +class EmbeddingConfigManager: + """ + Manager for embedding configuration. + [AC-AISVC-31] Supports hot-reload of configuration. + """ + + def __init__(self, default_provider: str = "ollama", default_config: dict[str, Any] | None = None): + self._provider_name = default_provider + self._config = default_config or { + "base_url": "http://localhost:11434", + "model": "nomic-embed-text", + "dimension": 768, + } + self._provider: EmbeddingProvider | None = None + + def get_provider_name(self) -> str: + """Get current provider name.""" + return self._provider_name + + def get_config(self) -> dict[str, Any]: + """Get current configuration.""" + return self._config.copy() + + def get_full_config(self) -> dict[str, Any]: + """ + Get full configuration including provider name. + [AC-AISVC-39] Returns complete configuration for API response. + """ + return { + "provider": self._provider_name, + "config": self._config.copy(), + } + + async def get_provider(self) -> EmbeddingProvider: + """ + Get or create the embedding provider. + [AC-AISVC-29] Returns configured provider instance. + """ + if self._provider is None: + self._provider = EmbeddingProviderFactory.create_provider( + self._provider_name, + self._config + ) + return self._provider + + async def update_config( + self, + provider: str, + config: dict[str, Any], + ) -> bool: + """ + Update embedding configuration. + [AC-AISVC-31, AC-AISVC-40] Supports hot-reload. + + Args: + provider: New provider name + config: New provider configuration + + Returns: + True if update was successful + + Raises: + EmbeddingException: If configuration is invalid + """ + old_provider = self._provider_name + old_config = self._config.copy() + + try: + new_provider_instance = EmbeddingProviderFactory.create_provider( + provider, + config + ) + + if self._provider: + await self._provider.close() + + self._provider_name = provider + self._config = config + self._provider = new_provider_instance + + logger.info(f"Updated embedding config: provider={provider}") + return True + + except Exception as e: + self._provider_name = old_provider + self._config = old_config + raise EmbeddingException( + f"Failed to update config: {e}", + provider="config_manager", + details={"provider": provider, "config": config} + ) + + async def test_connection( + self, + test_text: str = "这是一个测试文本", + provider: str | None = None, + config: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """ + Test embedding connection. + [AC-AISVC-41] Tests provider connectivity. + + Args: + test_text: Text to embed for testing + provider: Provider to test (uses current if None) + config: Config to test (uses current if None) + + Returns: + Dict with test results including success, dimension, latency + """ + import time + + test_provider_name = provider or self._provider_name + test_config = config or self._config + + try: + test_provider = EmbeddingProviderFactory.create_provider( + test_provider_name, + test_config + ) + + start_time = time.perf_counter() + embedding = await test_provider.embed(test_text) + latency_ms = (time.perf_counter() - start_time) * 1000 + + await test_provider.close() + + return { + "success": True, + "dimension": len(embedding), + "latency_ms": latency_ms, + "message": f"连接成功,向量维度: {len(embedding)}", + } + + except Exception as e: + return { + "success": False, + "dimension": 0, + "latency_ms": 0, + "error": str(e), + "message": f"连接失败: {e}", + } + + async def close(self) -> None: + """Close the current provider.""" + if self._provider: + await self._provider.close() + self._provider = None + + +_embedding_config_manager: EmbeddingConfigManager | None = None + + +def get_embedding_config_manager() -> EmbeddingConfigManager: + """ + Get the global embedding config manager. + [AC-AISVC-31] Singleton pattern for configuration management. + """ + global _embedding_config_manager + if _embedding_config_manager is None: + from app.core.config import get_settings + settings = get_settings() + + _embedding_config_manager = EmbeddingConfigManager( + default_provider="ollama", + default_config={ + "base_url": settings.ollama_base_url, + "model": settings.ollama_embedding_model, + "dimension": settings.qdrant_vector_size, + } + ) + return _embedding_config_manager + + +async def get_embedding_provider() -> EmbeddingProvider: + """ + Get the current embedding provider. + [AC-AISVC-29] Convenience function for getting provider. + """ + manager = get_embedding_config_manager() + return await manager.get_provider() diff --git a/ai-service/app/services/embedding/nomic_provider.py b/ai-service/app/services/embedding/nomic_provider.py new file mode 100644 index 0000000..ba6a73b --- /dev/null +++ b/ai-service/app/services/embedding/nomic_provider.py @@ -0,0 +1,291 @@ +""" +Nomic embedding provider with task prefixes and Matryoshka support. +Implements RAG optimization spec: +- Task prefixes: search_document: / search_query: +- Matryoshka dimension truncation: 256/512/768 dimensions +""" + +import logging +import time +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + +import httpx +import numpy as np + +from app.services.embedding.base import ( + EmbeddingConfig, + EmbeddingException, + EmbeddingProvider, +) + +logger = logging.getLogger(__name__) + + +class EmbeddingTask(str, Enum): + """Task type for nomic-embed-text v1.5 model.""" + DOCUMENT = "search_document" + QUERY = "search_query" + + +@dataclass +class NomicEmbeddingResult: + """Result from Nomic embedding with multiple dimensions.""" + embedding_full: list[float] + embedding_256: list[float] + embedding_512: list[float] + dimension: int + model: str + task: EmbeddingTask + latency_ms: float = 0.0 + metadata: dict[str, Any] = field(default_factory=dict) + + +class NomicEmbeddingProvider(EmbeddingProvider): + """ + Nomic-embed-text v1.5 embedding provider with task prefixes. + + Key features: + - Task prefixes: search_document: for documents, search_query: for queries + - Matryoshka dimension truncation: 256/512/768 dimensions + - Automatic normalization after truncation + + Reference: rag-optimization/spec.md Section 2.1, 2.3 + """ + + PROVIDER_NAME = "nomic" + DOCUMENT_PREFIX = "search_document:" + QUERY_PREFIX = "search_query:" + FULL_DIMENSION = 768 + + def __init__( + self, + base_url: str = "http://localhost:11434", + model: str = "nomic-embed-text", + dimension: int = 768, + timeout_seconds: int = 60, + enable_matryoshka: bool = True, + **kwargs: Any, + ): + self._base_url = base_url.rstrip("/") + self._model = model + self._dimension = dimension + self._timeout = timeout_seconds + self._enable_matryoshka = enable_matryoshka + self._client: httpx.AsyncClient | None = None + self._extra_config = kwargs + + async def _get_client(self) -> httpx.AsyncClient: + if self._client is None: + self._client = httpx.AsyncClient(timeout=self._timeout) + return self._client + + def _add_prefix(self, text: str, task: EmbeddingTask) -> str: + """Add task prefix to text.""" + if task == EmbeddingTask.DOCUMENT: + prefix = self.DOCUMENT_PREFIX + else: + prefix = self.QUERY_PREFIX + + if text.startswith(prefix): + return text + return f"{prefix}{text}" + + def _truncate_and_normalize(self, embedding: list[float], target_dim: int) -> list[float]: + """ + Truncate embedding to target dimension and normalize. + Matryoshka representation learning allows dimension truncation. + """ + truncated = embedding[:target_dim] + + arr = np.array(truncated, dtype=np.float32) + norm = np.linalg.norm(arr) + if norm > 0: + arr = arr / norm + + return arr.tolist() + + async def embed_with_task( + self, + text: str, + task: EmbeddingTask, + ) -> NomicEmbeddingResult: + """ + Generate embedding with specified task prefix. + + Args: + text: Input text to embed + task: DOCUMENT for indexing, QUERY for retrieval + + Returns: + NomicEmbeddingResult with all dimension variants + """ + start_time = time.perf_counter() + + prefixed_text = self._add_prefix(text, task) + + try: + client = await self._get_client() + response = await client.post( + f"{self._base_url}/api/embeddings", + json={ + "model": self._model, + "prompt": prefixed_text, + } + ) + response.raise_for_status() + data = response.json() + embedding = data.get("embedding", []) + + if not embedding: + raise EmbeddingException( + "Empty embedding returned", + provider=self.PROVIDER_NAME, + details={"text_length": len(text), "task": task.value} + ) + + latency_ms = (time.perf_counter() - start_time) * 1000 + + embedding_256 = self._truncate_and_normalize(embedding, 256) + embedding_512 = self._truncate_and_normalize(embedding, 512) + + logger.debug( + f"Generated Nomic embedding: task={task.value}, " + f"dim={len(embedding)}, latency={latency_ms:.2f}ms" + ) + + return NomicEmbeddingResult( + embedding_full=embedding, + embedding_256=embedding_256, + embedding_512=embedding_512, + dimension=len(embedding), + model=self._model, + task=task, + latency_ms=latency_ms, + ) + + except httpx.HTTPStatusError as e: + raise EmbeddingException( + f"Ollama API error: {e.response.status_code}", + provider=self.PROVIDER_NAME, + details={"status_code": e.response.status_code, "response": e.response.text} + ) + except httpx.RequestError as e: + raise EmbeddingException( + f"Ollama connection error: {e}", + provider=self.PROVIDER_NAME, + details={"base_url": self._base_url} + ) + except EmbeddingException: + raise + except Exception as e: + raise EmbeddingException( + f"Embedding generation failed: {e}", + provider=self.PROVIDER_NAME + ) + + async def embed_document(self, text: str) -> NomicEmbeddingResult: + """ + Generate embedding for document (with search_document: prefix). + Use this when indexing documents into vector store. + """ + return await self.embed_with_task(text, EmbeddingTask.DOCUMENT) + + async def embed_query(self, text: str) -> NomicEmbeddingResult: + """ + Generate embedding for query (with search_query: prefix). + Use this when searching/retrieving documents. + """ + return await self.embed_with_task(text, EmbeddingTask.QUERY) + + async def embed(self, text: str) -> list[float]: + """ + Generate embedding vector for a single text. + Default uses QUERY task for backward compatibility. + """ + result = await self.embed_query(text) + return result.embedding_full + + async def embed_batch(self, texts: list[str]) -> list[list[float]]: + """ + Generate embedding vectors for multiple texts. + Uses QUERY task by default. + """ + embeddings = [] + for text in texts: + embedding = await self.embed(text) + embeddings.append(embedding) + return embeddings + + async def embed_documents_batch( + self, + texts: list[str], + ) -> list[NomicEmbeddingResult]: + """ + Generate embeddings for multiple documents (DOCUMENT task). + Use this when batch indexing documents. + """ + results = [] + for text in texts: + result = await self.embed_document(text) + results.append(result) + return results + + async def embed_queries_batch( + self, + texts: list[str], + ) -> list[NomicEmbeddingResult]: + """ + Generate embeddings for multiple queries (QUERY task). + Use this when batch processing queries. + """ + results = [] + for text in texts: + result = await self.embed_query(text) + results.append(result) + return results + + def get_dimension(self) -> int: + """Get the dimension of embedding vectors.""" + return self._dimension + + def get_provider_name(self) -> str: + """Get the name of this embedding provider.""" + return self.PROVIDER_NAME + + def get_config_schema(self) -> dict[str, Any]: + """Get the configuration schema for Nomic provider.""" + return { + "base_url": { + "type": "string", + "description": "Ollama API 地址", + "default": "http://localhost:11434", + }, + "model": { + "type": "string", + "description": "嵌入模型名称(推荐 nomic-embed-text v1.5)", + "default": "nomic-embed-text", + }, + "dimension": { + "type": "integer", + "description": "向量维度(支持 256/512/768)", + "default": 768, + }, + "timeout_seconds": { + "type": "integer", + "description": "请求超时时间(秒)", + "default": 60, + }, + "enable_matryoshka": { + "type": "boolean", + "description": "启用 Matryoshka 维度截断", + "default": True, + }, + } + + async def close(self) -> None: + """Close the HTTP client.""" + if self._client: + await self._client.aclose() + self._client = None diff --git a/ai-service/app/services/embedding/ollama_embedding.py b/ai-service/app/services/embedding/ollama_embedding.py new file mode 100644 index 0000000..36938e3 --- /dev/null +++ b/ai-service/app/services/embedding/ollama_embedding.py @@ -0,0 +1,58 @@ +""" +Ollama embedding service for generating text embeddings. +Uses nomic-embed-text model via Ollama API. +""" + +import logging +import httpx +from app.core.config import get_settings + +logger = logging.getLogger(__name__) + + +async def get_embedding(text: str) -> list[float]: + """ + Generate embedding vector for text using Ollama nomic-embed-text model. + """ + settings = get_settings() + + async with httpx.AsyncClient(timeout=60.0) as client: + try: + response = await client.post( + f"{settings.ollama_base_url}/api/embeddings", + json={ + "model": settings.ollama_embedding_model, + "prompt": text, + } + ) + response.raise_for_status() + data = response.json() + embedding = data.get("embedding", []) + + if not embedding: + logger.warning(f"Empty embedding returned for text length={len(text)}") + return [0.0] * settings.qdrant_vector_size + + logger.debug(f"Generated embedding: dim={len(embedding)}") + return embedding + + except httpx.HTTPStatusError as e: + logger.error(f"Ollama API error: {e.response.status_code} - {e.response.text}") + raise + except httpx.RequestError as e: + logger.error(f"Ollama connection error: {e}") + raise + except Exception as e: + logger.error(f"Embedding generation failed: {e}") + raise + + +async def get_embeddings_batch(texts: list[str]) -> list[list[float]]: + """ + Generate embedding vectors for multiple texts. + """ + embeddings = [] + for text in texts: + embedding = await get_embedding(text) + embeddings.append(embedding) + return embeddings diff --git a/ai-service/app/services/embedding/ollama_provider.py b/ai-service/app/services/embedding/ollama_provider.py new file mode 100644 index 0000000..39093e6 --- /dev/null +++ b/ai-service/app/services/embedding/ollama_provider.py @@ -0,0 +1,157 @@ +""" +Ollama embedding provider implementation. +[AC-AISVC-29, AC-AISVC-30] Ollama-based embedding provider. + +Uses Ollama API for generating text embeddings. +""" + +import logging +import time +from typing import Any + +import httpx + +from app.services.embedding.base import ( + EmbeddingConfig, + EmbeddingException, + EmbeddingProvider, +) + +logger = logging.getLogger(__name__) + + +class OllamaEmbeddingProvider(EmbeddingProvider): + """ + Embedding provider using Ollama API. + [AC-AISVC-29, AC-AISVC-30] Supports local embedding models via Ollama. + """ + + PROVIDER_NAME = "ollama" + + def __init__( + self, + base_url: str = "http://localhost:11434", + model: str = "nomic-embed-text", + dimension: int = 768, + timeout_seconds: int = 60, + **kwargs: Any, + ): + self._base_url = base_url.rstrip("/") + self._model = model + self._dimension = dimension + self._timeout = timeout_seconds + self._client: httpx.AsyncClient | None = None + self._extra_config = kwargs + + async def _get_client(self) -> httpx.AsyncClient: + if self._client is None: + self._client = httpx.AsyncClient(timeout=self._timeout) + return self._client + + async def embed(self, text: str) -> list[float]: + """ + Generate embedding vector for a single text using Ollama API. + [AC-AISVC-29] Returns embedding vector. + """ + start_time = time.perf_counter() + + try: + client = await self._get_client() + response = await client.post( + f"{self._base_url}/api/embeddings", + json={ + "model": self._model, + "prompt": text, + } + ) + response.raise_for_status() + data = response.json() + embedding = data.get("embedding", []) + + if not embedding: + raise EmbeddingException( + "Empty embedding returned", + provider=self.PROVIDER_NAME, + details={"text_length": len(text)} + ) + + latency_ms = (time.perf_counter() - start_time) * 1000 + logger.debug( + f"Generated embedding via Ollama: dim={len(embedding)}, " + f"latency={latency_ms:.2f}ms" + ) + + return embedding + + except httpx.HTTPStatusError as e: + raise EmbeddingException( + f"Ollama API error: {e.response.status_code}", + provider=self.PROVIDER_NAME, + details={"status_code": e.response.status_code, "response": e.response.text} + ) + except httpx.RequestError as e: + raise EmbeddingException( + f"Ollama connection error: {e}", + provider=self.PROVIDER_NAME, + details={"base_url": self._base_url} + ) + except EmbeddingException: + raise + except Exception as e: + raise EmbeddingException( + f"Embedding generation failed: {e}", + provider=self.PROVIDER_NAME + ) + + async def embed_batch(self, texts: list[str]) -> list[list[float]]: + """ + Generate embedding vectors for multiple texts. + [AC-AISVC-29] Sequential embedding generation. + """ + embeddings = [] + for text in texts: + embedding = await self.embed(text) + embeddings.append(embedding) + return embeddings + + def get_dimension(self) -> int: + """Get the dimension of embedding vectors.""" + return self._dimension + + def get_provider_name(self) -> str: + """Get the name of this embedding provider.""" + return self.PROVIDER_NAME + + def get_config_schema(self) -> dict[str, Any]: + """ + Get the configuration schema for Ollama provider. + [AC-AISVC-38] Returns JSON Schema for configuration parameters. + """ + return { + "base_url": { + "type": "string", + "description": "Ollama API 地址", + "default": "http://localhost:11434", + }, + "model": { + "type": "string", + "description": "嵌入模型名称", + "default": "nomic-embed-text", + }, + "dimension": { + "type": "integer", + "description": "向量维度", + "default": 768, + }, + "timeout_seconds": { + "type": "integer", + "description": "请求超时时间(秒)", + "default": 60, + }, + } + + async def close(self) -> None: + """Close the HTTP client.""" + if self._client: + await self._client.aclose() + self._client = None diff --git a/ai-service/app/services/embedding/openai_provider.py b/ai-service/app/services/embedding/openai_provider.py new file mode 100644 index 0000000..0e15aab --- /dev/null +++ b/ai-service/app/services/embedding/openai_provider.py @@ -0,0 +1,193 @@ +""" +OpenAI embedding provider implementation. +[AC-AISVC-29, AC-AISVC-30] OpenAI-based embedding provider. + +Uses OpenAI API for generating text embeddings. +""" + +import logging +import time +from typing import Any + +import httpx + +from app.services.embedding.base import ( + EmbeddingException, + EmbeddingProvider, +) + +logger = logging.getLogger(__name__) + + +class OpenAIEmbeddingProvider(EmbeddingProvider): + """ + Embedding provider using OpenAI API. + [AC-AISVC-29, AC-AISVC-30] Supports OpenAI embedding models. + """ + + PROVIDER_NAME = "openai" + + MODEL_DIMENSIONS = { + "text-embedding-ada-002": 1536, + "text-embedding-3-small": 1536, + "text-embedding-3-large": 3072, + } + + def __init__( + self, + api_key: str, + model: str = "text-embedding-3-small", + base_url: str = "https://api.openai.com/v1", + dimension: int | None = None, + timeout_seconds: int = 60, + **kwargs: Any, + ): + self._api_key = api_key + self._model = model + self._base_url = base_url.rstrip("/") + self._timeout = timeout_seconds + self._client: httpx.AsyncClient | None = None + self._extra_config = kwargs + + if dimension: + self._dimension = dimension + elif model in self.MODEL_DIMENSIONS: + self._dimension = self.MODEL_DIMENSIONS[model] + else: + self._dimension = 1536 + + async def _get_client(self) -> httpx.AsyncClient: + if self._client is None: + self._client = httpx.AsyncClient(timeout=self._timeout) + return self._client + + async def embed(self, text: str) -> list[float]: + """ + Generate embedding vector for a single text using OpenAI API. + [AC-AISVC-29] Returns embedding vector. + """ + embeddings = await self.embed_batch([text]) + return embeddings[0] + + async def embed_batch(self, texts: list[str]) -> list[list[float]]: + """ + Generate embedding vectors for multiple texts using OpenAI API. + [AC-AISVC-29] Supports batch embedding for efficiency. + """ + start_time = time.perf_counter() + + try: + client = await self._get_client() + + request_body: dict[str, Any] = { + "model": self._model, + "input": texts, + } + if self._dimension and self._model.startswith("text-embedding-3"): + request_body["dimensions"] = self._dimension + + response = await client.post( + f"{self._base_url}/embeddings", + headers={ + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + }, + json=request_body, + ) + response.raise_for_status() + data = response.json() + + embeddings = [] + for item in data.get("data", []): + embedding = item.get("embedding", []) + if not embedding: + raise EmbeddingException( + "Empty embedding returned", + provider=self.PROVIDER_NAME, + details={"index": item.get("index", 0)} + ) + embeddings.append(embedding) + + if len(embeddings) != len(texts): + raise EmbeddingException( + f"Embedding count mismatch: expected {len(texts)}, got {len(embeddings)}", + provider=self.PROVIDER_NAME + ) + + latency_ms = (time.perf_counter() - start_time) * 1000 + logger.debug( + f"Generated {len(embeddings)} embeddings via OpenAI: " + f"dim={len(embeddings[0]) if embeddings else 0}, " + f"latency={latency_ms:.2f}ms" + ) + + return embeddings + + except httpx.HTTPStatusError as e: + raise EmbeddingException( + f"OpenAI API error: {e.response.status_code}", + provider=self.PROVIDER_NAME, + details={"status_code": e.response.status_code, "response": e.response.text} + ) + except httpx.RequestError as e: + raise EmbeddingException( + f"OpenAI connection error: {e}", + provider=self.PROVIDER_NAME, + details={"base_url": self._base_url} + ) + except EmbeddingException: + raise + except Exception as e: + raise EmbeddingException( + f"Embedding generation failed: {e}", + provider=self.PROVIDER_NAME + ) + + def get_dimension(self) -> int: + """Get the dimension of embedding vectors.""" + return self._dimension + + def get_provider_name(self) -> str: + """Get the name of this embedding provider.""" + return self.PROVIDER_NAME + + def get_config_schema(self) -> dict[str, Any]: + """ + Get the configuration schema for OpenAI provider. + [AC-AISVC-38] Returns JSON Schema for configuration parameters. + """ + return { + "api_key": { + "type": "string", + "description": "OpenAI API 密钥", + "required": True, + "secret": True, + }, + "model": { + "type": "string", + "description": "嵌入模型名称", + "default": "text-embedding-3-small", + "enum": list(self.MODEL_DIMENSIONS.keys()), + }, + "base_url": { + "type": "string", + "description": "OpenAI API 地址(支持兼容接口)", + "default": "https://api.openai.com/v1", + }, + "dimension": { + "type": "integer", + "description": "向量维度(仅 text-embedding-3 系列支持自定义)", + "default": 1536, + }, + "timeout_seconds": { + "type": "integer", + "description": "请求超时时间(秒)", + "default": 60, + }, + } + + async def close(self) -> None: + """Close the HTTP client.""" + if self._client: + await self._client.aclose() + self._client = None diff --git a/ai-service/app/services/kb.py b/ai-service/app/services/kb.py new file mode 100644 index 0000000..8f5535a --- /dev/null +++ b/ai-service/app/services/kb.py @@ -0,0 +1,294 @@ +""" +Knowledge Base service for AI Service. +[AC-ASA-01, AC-ASA-02, AC-ASA-08] KB management with document upload, indexing, and listing. +""" + +import logging +import os +import uuid +from datetime import datetime +from typing import Sequence + +from sqlalchemy import select, func +from sqlalchemy.ext.asyncio import AsyncSession +from sqlmodel import col + +from app.models.entities import ( + Document, + DocumentStatus, + IndexJob, + IndexJobStatus, + KnowledgeBase, +) + +logger = logging.getLogger(__name__) + + +class KBService: + """ + [AC-ASA-01, AC-ASA-02, AC-ASA-08] Knowledge Base service. + Handles document upload, indexing jobs, and document listing. + """ + + def __init__(self, session: AsyncSession, upload_dir: str = "./uploads"): + self._session = session + self._upload_dir = upload_dir + os.makedirs(upload_dir, exist_ok=True) + + async def get_or_create_kb( + self, + tenant_id: str, + kb_id: str | None = None, + name: str = "Default KB", + ) -> KnowledgeBase: + """ + Get existing KB or create default one. + """ + if kb_id: + try: + stmt = select(KnowledgeBase).where( + KnowledgeBase.tenant_id == tenant_id, + KnowledgeBase.id == uuid.UUID(kb_id), + ) + result = await self._session.execute(stmt) + existing_kb = result.scalar_one_or_none() + if existing_kb: + return existing_kb + except ValueError: + pass + + stmt = select(KnowledgeBase).where( + KnowledgeBase.tenant_id == tenant_id, + ).limit(1) + result = await self._session.execute(stmt) + existing_kb = result.scalar_one_or_none() + + if existing_kb: + return existing_kb + + new_kb = KnowledgeBase( + tenant_id=tenant_id, + name=name, + ) + self._session.add(new_kb) + await self._session.flush() + + logger.info(f"[AC-ASA-01] Created knowledge base: tenant={tenant_id}, kb_id={new_kb.id}") + return new_kb + + async def upload_document( + self, + tenant_id: str, + kb_id: str, + file_name: str, + file_content: bytes, + file_type: str | None = None, + ) -> tuple[Document, IndexJob]: + """ + [AC-ASA-01] Upload document and create indexing job. + """ + doc_id = uuid.uuid4() + file_path = os.path.join(self._upload_dir, f"{tenant_id}_{doc_id}_{file_name}") + + with open(file_path, "wb") as f: + f.write(file_content) + + document = Document( + id=doc_id, + tenant_id=tenant_id, + kb_id=kb_id, + file_name=file_name, + file_path=file_path, + file_size=len(file_content), + file_type=file_type, + status=DocumentStatus.PENDING.value, + ) + self._session.add(document) + + job = IndexJob( + tenant_id=tenant_id, + doc_id=doc_id, + status=IndexJobStatus.PENDING.value, + progress=0, + ) + self._session.add(job) + + await self._session.flush() + + logger.info( + f"[AC-ASA-01] Uploaded document: tenant={tenant_id}, doc_id={doc_id}, " + f"file_name={file_name}, size={len(file_content)}" + ) + + return document, job + + async def list_documents( + self, + tenant_id: str, + kb_id: str | None = None, + status: str | None = None, + page: int = 1, + page_size: int = 20, + ) -> tuple[Sequence[Document], int]: + """ + [AC-ASA-08] List documents with filtering and pagination. + """ + stmt = select(Document).where(Document.tenant_id == tenant_id) + + if kb_id: + stmt = stmt.where(Document.kb_id == kb_id) + if status: + stmt = stmt.where(Document.status == status) + + count_stmt = select(func.count()).select_from(stmt.subquery()) + total_result = await self._session.execute(count_stmt) + total = total_result.scalar() or 0 + + stmt = stmt.order_by(col(Document.created_at).desc()) + stmt = stmt.offset((page - 1) * page_size).limit(page_size) + + result = await self._session.execute(stmt) + documents = result.scalars().all() + + logger.info( + f"[AC-ASA-08] Listed documents: tenant={tenant_id}, " + f"kb_id={kb_id}, status={status}, total={total}" + ) + + return documents, total + + async def get_document( + self, + tenant_id: str, + doc_id: str, + ) -> Document | None: + """ + Get document by ID. + """ + stmt = select(Document).where( + Document.tenant_id == tenant_id, + Document.id == uuid.UUID(doc_id), + ) + result = await self._session.execute(stmt) + return result.scalar_one_or_none() + + async def get_index_job( + self, + tenant_id: str, + job_id: str, + ) -> IndexJob | None: + """ + [AC-ASA-02] Get index job status. + """ + stmt = select(IndexJob).where( + IndexJob.tenant_id == tenant_id, + IndexJob.id == uuid.UUID(job_id), + ) + result = await self._session.execute(stmt) + job = result.scalar_one_or_none() + + if job: + logger.info( + f"[AC-ASA-02] Got job status: tenant={tenant_id}, " + f"job_id={job_id}, status={job.status}, progress={job.progress}" + ) + + return job + + async def get_index_job_by_doc( + self, + tenant_id: str, + doc_id: str, + ) -> IndexJob | None: + """ + Get index job by document ID. + """ + stmt = select(IndexJob).where( + IndexJob.tenant_id == tenant_id, + IndexJob.doc_id == uuid.UUID(doc_id), + ).order_by(col(IndexJob.created_at).desc()) + result = await self._session.execute(stmt) + return result.scalar_one_or_none() + + async def update_job_status( + self, + tenant_id: str, + job_id: str, + status: str, + progress: int | None = None, + error_msg: str | None = None, + ) -> IndexJob | None: + """ + Update index job status. + """ + stmt = select(IndexJob).where( + IndexJob.tenant_id == tenant_id, + IndexJob.id == uuid.UUID(job_id), + ) + result = await self._session.execute(stmt) + job = result.scalar_one_or_none() + + if job: + job.status = status + job.updated_at = datetime.utcnow() + if progress is not None: + job.progress = progress + if error_msg is not None: + job.error_msg = error_msg + await self._session.flush() + + if job.doc_id: + doc_stmt = select(Document).where( + Document.tenant_id == tenant_id, + Document.id == job.doc_id, + ) + doc_result = await self._session.execute(doc_stmt) + doc = doc_result.scalar_one_or_none() + if doc: + doc.status = status + doc.updated_at = datetime.utcnow() + if error_msg: + doc.error_msg = error_msg + await self._session.flush() + + return job + + async def delete_document( + self, + tenant_id: str, + doc_id: str, + ) -> bool: + """ + Delete document and associated files. + """ + stmt = select(Document).where( + Document.tenant_id == tenant_id, + Document.id == uuid.UUID(doc_id), + ) + result = await self._session.execute(stmt) + document = result.scalar_one_or_none() + + if not document: + return False + + if document.file_path and os.path.exists(document.file_path): + os.remove(document.file_path) + + await self._session.delete(document) + await self._session.flush() + + logger.info(f"[AC-ASA-08] Deleted document: tenant={tenant_id}, doc_id={doc_id}") + return True + + async def list_knowledge_bases( + self, + tenant_id: str, + ) -> Sequence[KnowledgeBase]: + """ + List all knowledge bases for a tenant. + """ + stmt = select(KnowledgeBase).where( + KnowledgeBase.tenant_id == tenant_id + ).order_by(col(KnowledgeBase.created_at).desc()) + result = await self._session.execute(stmt) + return result.scalars().all() diff --git a/ai-service/app/services/llm/__init__.py b/ai-service/app/services/llm/__init__.py new file mode 100644 index 0000000..f616fac --- /dev/null +++ b/ai-service/app/services/llm/__init__.py @@ -0,0 +1,15 @@ +""" +LLM Adapter module for AI Service. +[AC-AISVC-02, AC-AISVC-06] Provides unified interface for LLM providers. +""" + +from app.services.llm.base import LLMClient, LLMConfig, LLMResponse, LLMStreamChunk +from app.services.llm.openai_client import OpenAIClient + +__all__ = [ + "LLMClient", + "LLMConfig", + "LLMResponse", + "LLMStreamChunk", + "OpenAIClient", +] diff --git a/ai-service/app/services/llm/base.py b/ai-service/app/services/llm/base.py new file mode 100644 index 0000000..cf46d3c --- /dev/null +++ b/ai-service/app/services/llm/base.py @@ -0,0 +1,115 @@ +""" +Base LLM client interface. +[AC-AISVC-02, AC-AISVC-06] Abstract interface for LLM providers. + +Design reference: design.md Section 8.1 - LLMClient interface +- generate(prompt, params) -> text +- stream_generate(prompt, params) -> iterator[delta] +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator + + +@dataclass +class LLMConfig: + """ + Configuration for LLM client. + [AC-AISVC-02] Supports configurable model parameters. + """ + model: str = "gpt-4o-mini" + max_tokens: int = 2048 + temperature: float = 0.7 + top_p: float = 1.0 + timeout_seconds: int = 30 + max_retries: int = 3 + extra_params: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class LLMResponse: + """ + Response from LLM generation. + [AC-AISVC-02] Contains generated content and metadata. + """ + content: str + model: str + usage: dict[str, int] = field(default_factory=dict) + finish_reason: str = "stop" + metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class LLMStreamChunk: + """ + Streaming chunk from LLM. + [AC-AISVC-06, AC-AISVC-07] Incremental output for SSE streaming. + """ + delta: str + model: str + finish_reason: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + +class LLMClient(ABC): + """ + Abstract base class for LLM clients. + [AC-AISVC-02, AC-AISVC-06] Provides unified interface for different LLM providers. + + Design reference: design.md Section 8.2 - Plugin points + - OpenAICompatibleClient / LocalModelClient can be swapped + """ + + @abstractmethod + async def generate( + self, + messages: list[dict[str, str]], + config: LLMConfig | None = None, + **kwargs: Any, + ) -> LLMResponse: + """ + Generate a non-streaming response. + [AC-AISVC-02] Returns complete response for ChatResponse. + + Args: + messages: List of chat messages with 'role' and 'content'. + config: Optional LLM configuration overrides. + **kwargs: Additional provider-specific parameters. + + Returns: + LLMResponse with generated content and metadata. + + Raises: + LLMException: If generation fails. + """ + pass + + @abstractmethod + async def stream_generate( + self, + messages: list[dict[str, str]], + config: LLMConfig | None = None, + **kwargs: Any, + ) -> AsyncGenerator[LLMStreamChunk, None]: + """ + Generate a streaming response. + [AC-AISVC-06, AC-AISVC-07] Yields incremental chunks for SSE. + + Args: + messages: List of chat messages with 'role' and 'content'. + config: Optional LLM configuration overrides. + **kwargs: Additional provider-specific parameters. + + Yields: + LLMStreamChunk with incremental content. + + Raises: + LLMException: If generation fails. + """ + pass + + @abstractmethod + async def close(self) -> None: + """Close the client and release resources.""" + pass diff --git a/ai-service/app/services/llm/factory.py b/ai-service/app/services/llm/factory.py new file mode 100644 index 0000000..d983c47 --- /dev/null +++ b/ai-service/app/services/llm/factory.py @@ -0,0 +1,421 @@ +""" +LLM Provider Factory and Configuration Management. +[AC-ASA-14, AC-ASA-15, AC-ASA-16, AC-ASA-17, AC-ASA-18] LLM provider management. + +Design pattern: Factory pattern for pluggable LLM providers. +""" + +import logging +from dataclasses import dataclass, field +from typing import Any + +from app.services.llm.base import LLMClient, LLMConfig +from app.services.llm.openai_client import OpenAIClient + +logger = logging.getLogger(__name__) + + +@dataclass +class LLMProviderInfo: + """Information about an LLM provider.""" + name: str + display_name: str + description: str + config_schema: dict[str, Any] + + +LLM_PROVIDERS: dict[str, LLMProviderInfo] = { + "openai": LLMProviderInfo( + name="openai", + display_name="OpenAI", + description="OpenAI GPT 系列模型 (GPT-4, GPT-3.5 等)", + config_schema={ + "type": "object", + "properties": { + "api_key": { + "type": "string", + "title": "API Key", + "description": "API Key", + "required": True, + }, + "base_url": { + "type": "string", + "title": "API Base URL", + "description": "API Base URL", + "default": "https://api.openai.com/v1", + }, + "model": { + "type": "string", + "title": "模型名称", + "description": "模型名称", + "default": "gpt-4o-mini", + }, + "max_tokens": { + "type": "integer", + "title": "最大输出 Token 数", + "description": "最大输出 Token 数", + "default": 2048, + }, + "temperature": { + "type": "number", + "title": "温度参数", + "description": "温度参数 (0-2)", + "default": 0.7, + "minimum": 0, + "maximum": 2, + }, + }, + "required": ["api_key"], + }, + ), + "ollama": LLMProviderInfo( + name="ollama", + display_name="Ollama", + description="Ollama 本地模型 (Llama, Qwen 等)", + config_schema={ + "type": "object", + "properties": { + "base_url": { + "type": "string", + "title": "Ollama API 地址", + "description": "Ollama API 地址", + "default": "http://localhost:11434/v1", + }, + "model": { + "type": "string", + "title": "模型名称", + "description": "模型名称", + "default": "llama3.2", + }, + "max_tokens": { + "type": "integer", + "title": "最大输出 Token 数", + "description": "最大输出 Token 数", + "default": 2048, + }, + "temperature": { + "type": "number", + "title": "温度参数", + "description": "温度参数 (0-2)", + "default": 0.7, + "minimum": 0, + "maximum": 2, + }, + }, + "required": [], + }, + ), + "deepseek": LLMProviderInfo( + name="deepseek", + display_name="DeepSeek", + description="DeepSeek 大模型 (deepseek-chat, deepseek-coder)", + config_schema={ + "type": "object", + "properties": { + "api_key": { + "type": "string", + "title": "API Key", + "description": "DeepSeek API Key", + "required": True, + }, + "base_url": { + "type": "string", + "title": "API Base URL", + "description": "API Base URL", + "default": "https://api.deepseek.com/v1", + }, + "model": { + "type": "string", + "title": "模型名称", + "description": "模型名称 (deepseek-chat, deepseek-coder)", + "default": "deepseek-chat", + }, + "max_tokens": { + "type": "integer", + "title": "最大输出 Token 数", + "description": "最大输出 Token 数", + "default": 2048, + }, + "temperature": { + "type": "number", + "title": "温度参数", + "description": "温度参数 (0-2)", + "default": 0.7, + "minimum": 0, + "maximum": 2, + }, + }, + "required": ["api_key"], + }, + ), + "azure": LLMProviderInfo( + name="azure", + display_name="Azure OpenAI", + description="Azure OpenAI 服务", + config_schema={ + "type": "object", + "properties": { + "api_key": { + "type": "string", + "title": "API Key", + "description": "API Key", + "required": True, + }, + "base_url": { + "type": "string", + "title": "Azure Endpoint", + "description": "Azure Endpoint", + "required": True, + }, + "model": { + "type": "string", + "title": "部署名称", + "description": "部署名称", + "required": True, + }, + "api_version": { + "type": "string", + "title": "API 版本", + "description": "API 版本", + "default": "2024-02-15-preview", + }, + "max_tokens": { + "type": "integer", + "title": "最大输出 Token 数", + "description": "最大输出 Token 数", + "default": 2048, + }, + "temperature": { + "type": "number", + "title": "温度参数", + "description": "温度参数 (0-2)", + "default": 0.7, + "minimum": 0, + "maximum": 2, + }, + }, + "required": ["api_key", "base_url", "model"], + }, + ), +} + + +class LLMProviderFactory: + """ + Factory for creating LLM clients. + [AC-ASA-14, AC-ASA-15] Dynamic provider creation. + """ + + @classmethod + def get_providers(cls) -> list[LLMProviderInfo]: + """Get all registered LLM providers.""" + return list(LLM_PROVIDERS.values()) + + @classmethod + def get_provider_info(cls, name: str) -> LLMProviderInfo | None: + """Get provider info by name.""" + return LLM_PROVIDERS.get(name) + + @classmethod + def create_client( + cls, + provider: str, + config: dict[str, Any], + ) -> LLMClient: + """ + Create an LLM client for the specified provider. + [AC-ASA-15] Factory method for client creation. + + Args: + provider: Provider name (openai, ollama, azure) + config: Provider configuration + + Returns: + LLMClient instance + + Raises: + ValueError: If provider is not supported + """ + if provider not in LLM_PROVIDERS: + raise ValueError(f"Unsupported LLM provider: {provider}") + + if provider in ("openai", "ollama", "azure", "deepseek"): + return OpenAIClient( + api_key=config.get("api_key"), + base_url=config.get("base_url"), + model=config.get("model"), + default_config=LLMConfig( + model=config.get("model", "gpt-4o-mini"), + max_tokens=config.get("max_tokens", 2048), + temperature=config.get("temperature", 0.7), + ), + ) + + raise ValueError(f"Unsupported LLM provider: {provider}") + + +class LLMConfigManager: + """ + Manager for LLM configuration. + [AC-ASA-16, AC-ASA-17, AC-ASA-18] Configuration management with hot-reload. + """ + + def __init__(self): + from app.core.config import get_settings + + settings = get_settings() + + self._current_provider: str = settings.llm_provider + self._current_config: dict[str, Any] = { + "api_key": settings.llm_api_key, + "base_url": settings.llm_base_url, + "model": settings.llm_model, + "max_tokens": settings.llm_max_tokens, + "temperature": settings.llm_temperature, + } + self._client: LLMClient | None = None + + def get_current_config(self) -> dict[str, Any]: + """Get current LLM configuration.""" + return { + "provider": self._current_provider, + "config": self._current_config, + } + + async def update_config( + self, + provider: str, + config: dict[str, Any], + ) -> bool: + """ + Update LLM configuration. + [AC-ASA-16] Hot-reload configuration. + + Args: + provider: Provider name + config: New configuration + + Returns: + True if update successful + """ + if provider not in LLM_PROVIDERS: + raise ValueError(f"Unsupported LLM provider: {provider}") + + provider_info = LLM_PROVIDERS[provider] + validated_config = self._validate_config(provider_info, config) + + if self._client: + await self._client.close() + self._client = None + + self._current_provider = provider + self._current_config = validated_config + + logger.info(f"[AC-ASA-16] LLM config updated: provider={provider}") + return True + + def _validate_config( + self, + provider_info: LLMProviderInfo, + config: dict[str, Any], + ) -> dict[str, Any]: + """Validate configuration against provider schema.""" + schema_props = provider_info.config_schema.get("properties", {}) + required_fields = provider_info.config_schema.get("required", []) + + validated = {} + for key, prop_schema in schema_props.items(): + if key in config: + validated[key] = config[key] + elif "default" in prop_schema: + validated[key] = prop_schema["default"] + elif key in required_fields: + raise ValueError(f"Missing required config: {key}") + return validated + + def get_client(self) -> LLMClient: + """Get or create LLM client with current config.""" + if self._client is None: + self._client = LLMProviderFactory.create_client( + self._current_provider, + self._current_config, + ) + return self._client + + async def test_connection( + self, + test_prompt: str = "你好,请简单介绍一下自己。", + provider: str | None = None, + config: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """ + Test LLM connection. + [AC-ASA-17, AC-ASA-18] Connection testing. + + Args: + test_prompt: Test prompt to send + provider: Optional provider to test (uses current if not specified) + config: Optional config to test (uses current if not specified) + + Returns: + Test result with success status, response, and metrics + """ + import time + + test_provider = provider or self._current_provider + test_config = config if config else self._current_config + + logger.info(f"[AC-ASA-17] Test connection: provider={test_provider}, config={test_config}") + + if test_provider not in LLM_PROVIDERS: + return { + "success": False, + "error": f"Unsupported provider: {test_provider}", + } + + try: + client = LLMProviderFactory.create_client(test_provider, test_config) + + start_time = time.time() + response = await client.generate( + messages=[{"role": "user", "content": test_prompt}], + ) + latency_ms = (time.time() - start_time) * 1000 + + await client.close() + + return { + "success": True, + "response": response.content, + "latency_ms": round(latency_ms, 2), + "prompt_tokens": response.usage.get("prompt_tokens", 0), + "completion_tokens": response.usage.get("completion_tokens", 0), + "total_tokens": response.usage.get("total_tokens", 0), + "model": response.model, + "message": f"连接成功,模型: {response.model}", + } + + except Exception as e: + logger.error(f"[AC-ASA-18] LLM test failed: {e}") + return { + "success": False, + "error": str(e), + "message": f"连接失败: {str(e)}", + } + + async def close(self) -> None: + """Close the current client.""" + if self._client: + await self._client.close() + self._client = None + + +_llm_config_manager: LLMConfigManager | None = None + + +def get_llm_config_manager() -> LLMConfigManager: + """Get or create LLM config manager instance.""" + global _llm_config_manager + if _llm_config_manager is None: + _llm_config_manager = LLMConfigManager() + return _llm_config_manager diff --git a/ai-service/app/services/llm/openai_client.py b/ai-service/app/services/llm/openai_client.py new file mode 100644 index 0000000..fc36959 --- /dev/null +++ b/ai-service/app/services/llm/openai_client.py @@ -0,0 +1,333 @@ +""" +OpenAI-compatible LLM client implementation. +[AC-AISVC-02, AC-AISVC-06] Concrete implementation using httpx for OpenAI API. + +Design reference: design.md Section 8.1 - LLMClient interface +- Uses langchain-openai or official SDK pattern +- Supports generate and stream_generate +""" + +import json +import logging +from typing import Any, AsyncGenerator + +import httpx +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +from app.core.config import get_settings +from app.core.exceptions import AIServiceException, ErrorCode, ServiceUnavailableException, TimeoutException +from app.services.llm.base import LLMClient, LLMConfig, LLMResponse, LLMStreamChunk + +logger = logging.getLogger(__name__) + + +class LLMException(AIServiceException): + """Exception raised when LLM operations fail.""" + + def __init__(self, message: str, details: list[dict] | None = None): + super().__init__( + code=ErrorCode.LLM_ERROR, + message=message, + status_code=503, + details=details, + ) + + +class OpenAIClient(LLMClient): + """ + OpenAI-compatible LLM client. + [AC-AISVC-02, AC-AISVC-06] Implements LLMClient interface for OpenAI API. + + Supports: + - OpenAI API (official) + - OpenAI-compatible endpoints (Azure, local models, etc.) + """ + + def __init__( + self, + api_key: str | None = None, + base_url: str | None = None, + model: str | None = None, + default_config: LLMConfig | None = None, + ): + settings = get_settings() + self._api_key = api_key or settings.llm_api_key + self._base_url = (base_url or settings.llm_base_url).rstrip("/") + self._model = model or settings.llm_model + self._default_config = default_config or LLMConfig( + model=self._model, + max_tokens=settings.llm_max_tokens, + temperature=settings.llm_temperature, + timeout_seconds=settings.llm_timeout_seconds, + max_retries=settings.llm_max_retries, + ) + self._client: httpx.AsyncClient | None = None + + def _get_client(self, timeout_seconds: int) -> httpx.AsyncClient: + """Get or create HTTP client.""" + if self._client is None: + self._client = httpx.AsyncClient( + timeout=httpx.Timeout(timeout_seconds), + headers={ + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + }, + ) + return self._client + + def _build_request_body( + self, + messages: list[dict[str, str]], + config: LLMConfig, + stream: bool = False, + **kwargs: Any, + ) -> dict[str, Any]: + """Build request body for OpenAI API.""" + body: dict[str, Any] = { + "model": config.model, + "messages": messages, + "max_tokens": config.max_tokens, + "temperature": config.temperature, + "top_p": config.top_p, + "stream": stream, + } + body.update(config.extra_params) + body.update(kwargs) + return body + + @retry( + retry=retry_if_exception_type(httpx.TimeoutException), + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=1, max=10), + ) + async def generate( + self, + messages: list[dict[str, str]], + config: LLMConfig | None = None, + **kwargs: Any, + ) -> LLMResponse: + """ + Generate a non-streaming response. + [AC-AISVC-02] Returns complete response for ChatResponse. + + Args: + messages: List of chat messages with 'role' and 'content'. + config: Optional LLM configuration overrides. + **kwargs: Additional provider-specific parameters. + + Returns: + LLMResponse with generated content and metadata. + + Raises: + LLMException: If generation fails. + TimeoutException: If request times out. + """ + effective_config = config or self._default_config + client = self._get_client(effective_config.timeout_seconds) + + body = self._build_request_body(messages, effective_config, stream=False, **kwargs) + + logger.info(f"[AC-AISVC-02] Generating response with model={effective_config.model}") + logger.info(f"[AC-AISVC-02] ========== FULL PROMPT TO AI ==========") + for i, msg in enumerate(messages): + role = msg.get("role", "unknown") + content = msg.get("content", "") + logger.info(f"[AC-AISVC-02] [{i}] role={role}, content_length={len(content)}") + logger.info(f"[AC-AISVC-02] [{i}] content:\n{content}") + logger.info(f"[AC-AISVC-02] ======================================") + + try: + response = await client.post( + f"{self._base_url}/chat/completions", + json=body, + ) + response.raise_for_status() + data = response.json() + + except httpx.TimeoutException as e: + logger.error(f"[AC-AISVC-02] LLM request timeout: {e}") + raise TimeoutException(message=f"LLM request timed out: {e}") + + except httpx.HTTPStatusError as e: + logger.error(f"[AC-AISVC-02] LLM API error: {e}") + error_detail = self._parse_error_response(e.response) + raise LLMException( + message=f"LLM API error: {error_detail}", + details=[{"status_code": e.response.status_code, "response": error_detail}], + ) + + except json.JSONDecodeError as e: + logger.error(f"[AC-AISVC-02] Failed to parse LLM response: {e}") + raise LLMException(message=f"Failed to parse LLM response: {e}") + + try: + choice = data["choices"][0] + content = choice["message"]["content"] + usage = data.get("usage", {}) + finish_reason = choice.get("finish_reason", "stop") + + logger.info( + f"[AC-AISVC-02] Generated response: " + f"tokens={usage.get('total_tokens', 'N/A')}, " + f"finish_reason={finish_reason}" + ) + + return LLMResponse( + content=content, + model=data.get("model", effective_config.model), + usage=usage, + finish_reason=finish_reason, + metadata={"raw_response": data}, + ) + + except (KeyError, IndexError) as e: + logger.error(f"[AC-AISVC-02] Unexpected LLM response format: {e}") + raise LLMException( + message=f"Unexpected LLM response format: {e}", + details=[{"response": str(data)}], + ) + + async def stream_generate( + self, + messages: list[dict[str, str]], + config: LLMConfig | None = None, + **kwargs: Any, + ) -> AsyncGenerator[LLMStreamChunk, None]: + """ + Generate a streaming response. + [AC-AISVC-06, AC-AISVC-07] Yields incremental chunks for SSE. + + Args: + messages: List of chat messages with 'role' and 'content'. + config: Optional LLM configuration overrides. + **kwargs: Additional provider-specific parameters. + + Yields: + LLMStreamChunk with incremental content. + + Raises: + LLMException: If generation fails. + TimeoutException: If request times out. + """ + effective_config = config or self._default_config + client = self._get_client(effective_config.timeout_seconds) + + body = self._build_request_body(messages, effective_config, stream=True, **kwargs) + + logger.info(f"[AC-AISVC-06] Starting streaming generation with model={effective_config.model}") + logger.info(f"[AC-AISVC-06] ========== FULL PROMPT TO AI (STREAMING) ==========") + for i, msg in enumerate(messages): + role = msg.get("role", "unknown") + content = msg.get("content", "") + logger.info(f"[AC-AISVC-06] [{i}] role={role}, content_length={len(content)}") + logger.info(f"[AC-AISVC-06] [{i}] content:\n{content}") + logger.info(f"[AC-AISVC-06] ======================================") + + try: + async with client.stream( + "POST", + f"{self._base_url}/chat/completions", + json=body, + ) as response: + response.raise_for_status() + + async for line in response.aiter_lines(): + if not line or line == "data: [DONE]": + continue + + if line.startswith("data: "): + json_str = line[6:] + try: + chunk_data = json.loads(json_str) + chunk = self._parse_stream_chunk(chunk_data, effective_config.model) + if chunk: + yield chunk + except json.JSONDecodeError as e: + logger.warning(f"[AC-AISVC-06] Failed to parse stream chunk: {e}") + continue + + except httpx.TimeoutException as e: + logger.error(f"[AC-AISVC-06] LLM streaming request timeout: {e}") + raise TimeoutException(message=f"LLM streaming request timed out: {e}") + + except httpx.HTTPStatusError as e: + logger.error(f"[AC-AISVC-06] LLM streaming API error: {e}") + error_detail = self._parse_error_response(e.response) + raise LLMException( + message=f"LLM streaming API error: {error_detail}", + details=[{"status_code": e.response.status_code, "response": error_detail}], + ) + + logger.info(f"[AC-AISVC-06] Streaming generation completed") + + def _parse_stream_chunk( + self, + data: dict[str, Any], + model: str, + ) -> LLMStreamChunk | None: + """Parse a streaming chunk from OpenAI API.""" + try: + choices = data.get("choices", []) + if not choices: + return None + + delta = choices[0].get("delta", {}) + content = delta.get("content", "") + finish_reason = choices[0].get("finish_reason") + + if not content and not finish_reason: + return None + + return LLMStreamChunk( + delta=content, + model=data.get("model", model), + finish_reason=finish_reason, + metadata={"raw_chunk": data}, + ) + + except (KeyError, IndexError) as e: + logger.warning(f"[AC-AISVC-06] Failed to parse stream chunk: {e}") + return None + + def _parse_error_response(self, response: httpx.Response) -> str: + """Parse error response from API.""" + try: + data = response.json() + if "error" in data: + error = data["error"] + if isinstance(error, dict): + return error.get("message", str(error)) + return str(error) + return response.text + except Exception: + return response.text + + async def close(self) -> None: + """Close the HTTP client.""" + if self._client: + await self._client.aclose() + self._client = None + + +_llm_client: OpenAIClient | None = None + + +def get_llm_client() -> OpenAIClient: + """Get or create LLM client instance.""" + global _llm_client + if _llm_client is None: + _llm_client = OpenAIClient() + return _llm_client + + +async def close_llm_client() -> None: + """Close the global LLM client.""" + global _llm_client + if _llm_client: + await _llm_client.close() + _llm_client = None diff --git a/ai-service/app/services/memory.py b/ai-service/app/services/memory.py new file mode 100644 index 0000000..5db74f5 --- /dev/null +++ b/ai-service/app/services/memory.py @@ -0,0 +1,170 @@ +""" +Memory service for AI Service. +[AC-AISVC-13] Session-based memory management with tenant isolation. +""" + +import logging +from typing import Sequence + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlmodel import col + +from app.models.entities import ChatMessage, ChatMessageCreate, ChatSession, ChatSessionCreate + +logger = logging.getLogger(__name__) + + +class MemoryService: + """ + [AC-AISVC-13] Memory service for session-based conversation history. + All operations are scoped by (tenant_id, session_id) for multi-tenant isolation. + """ + + def __init__(self, session: AsyncSession): + self._session = session + + async def get_or_create_session( + self, + tenant_id: str, + session_id: str, + channel_type: str | None = None, + metadata: dict | None = None, + ) -> ChatSession: + """ + [AC-AISVC-13] Get existing session or create a new one. + Ensures tenant isolation by querying with tenant_id. + """ + stmt = select(ChatSession).where( + ChatSession.tenant_id == tenant_id, + ChatSession.session_id == session_id, + ) + result = await self._session.execute(stmt) + existing_session = result.scalar_one_or_none() + + if existing_session: + logger.info( + f"[AC-AISVC-13] Found existing session: tenant={tenant_id}, session={session_id}" + ) + return existing_session + + new_session = ChatSession( + tenant_id=tenant_id, + session_id=session_id, + channel_type=channel_type, + metadata_=metadata, + ) + self._session.add(new_session) + await self._session.flush() + + logger.info( + f"[AC-AISVC-13] Created new session: tenant={tenant_id}, session={session_id}" + ) + return new_session + + async def load_history( + self, + tenant_id: str, + session_id: str, + limit: int | None = None, + ) -> Sequence[ChatMessage]: + """ + [AC-AISVC-13] Load conversation history for a session. + All queries are filtered by tenant_id to ensure isolation. + """ + stmt = ( + select(ChatMessage) + .where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.session_id == session_id, + ) + .order_by(col(ChatMessage.created_at).asc()) + ) + + if limit: + stmt = stmt.limit(limit) + + result = await self._session.execute(stmt) + messages = result.scalars().all() + + logger.info( + f"[AC-AISVC-13] Loaded {len(messages)} messages for tenant={tenant_id}, session={session_id}" + ) + return messages + + async def append_message( + self, + tenant_id: str, + session_id: str, + role: str, + content: str, + ) -> ChatMessage: + """ + [AC-AISVC-13] Append a message to the session history. + Message is scoped by tenant_id for isolation. + """ + message = ChatMessage( + tenant_id=tenant_id, + session_id=session_id, + role=role, + content=content, + ) + self._session.add(message) + await self._session.flush() + + logger.info( + f"[AC-AISVC-13] Appended message: tenant={tenant_id}, session={session_id}, role={role}" + ) + return message + + async def append_messages( + self, + tenant_id: str, + session_id: str, + messages: list[dict[str, str]], + ) -> list[ChatMessage]: + """ + [AC-AISVC-13] Append multiple messages to the session history. + Used for batch insertion of conversation turns. + """ + chat_messages = [] + for msg in messages: + message = ChatMessage( + tenant_id=tenant_id, + session_id=session_id, + role=msg["role"], + content=msg["content"], + ) + self._session.add(message) + chat_messages.append(message) + + await self._session.flush() + + logger.info( + f"[AC-AISVC-13] Appended {len(chat_messages)} messages for tenant={tenant_id}, session={session_id}" + ) + return chat_messages + + async def clear_history(self, tenant_id: str, session_id: str) -> int: + """ + [AC-AISVC-13] Clear all messages for a session. + Only affects messages within the tenant's scope. + """ + stmt = select(ChatMessage).where( + ChatMessage.tenant_id == tenant_id, + ChatMessage.session_id == session_id, + ) + result = await self._session.execute(stmt) + messages = result.scalars().all() + + count = 0 + for message in messages: + await self._session.delete(message) + count += 1 + + await self._session.flush() + + logger.info( + f"[AC-AISVC-13] Cleared {count} messages for tenant={tenant_id}, session={session_id}" + ) + return count diff --git a/ai-service/app/services/orchestrator.py b/ai-service/app/services/orchestrator.py new file mode 100644 index 0000000..42d16dc --- /dev/null +++ b/ai-service/app/services/orchestrator.py @@ -0,0 +1,689 @@ +""" +Orchestrator service for AI Service. +[AC-AISVC-01, AC-AISVC-02, AC-AISVC-06, AC-AISVC-07] Core orchestration logic for chat generation. + +Design reference: design.md Section 2.2 - 关键数据流 +1. Memory.load(tenantId, sessionId) +2. merge_context(local_history, external_history) +3. Retrieval.retrieve(query, tenantId, channelType, metadata) +4. build_prompt(merged_history, retrieved_docs, currentMessage) +5. LLM.generate(...) (non-streaming) or LLM.stream_generate(...) (streaming) +6. compute_confidence(...) +7. Memory.append(tenantId, sessionId, user/assistant messages) +8. Return ChatResponse (or output via SSE) + +RAG Optimization (rag-optimization/spec.md): +- Two-stage retrieval with Matryoshka dimensions +- RRF hybrid ranking +- Optimized prompt engineering +""" + +import logging +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator + +from sse_starlette.sse import ServerSentEvent + +from app.core.config import get_settings +from app.core.prompts import SYSTEM_PROMPT, format_evidence_for_prompt +from app.core.sse import ( + create_error_event, + create_final_event, + create_message_event, + SSEStateMachine, +) +from app.models import ChatRequest, ChatResponse +from app.services.confidence import ConfidenceCalculator, ConfidenceResult +from app.services.context import ContextMerger, MergedContext +from app.services.llm.base import LLMClient, LLMConfig, LLMResponse +from app.services.memory import MemoryService +from app.services.retrieval.base import BaseRetriever, RetrievalContext, RetrievalResult + +logger = logging.getLogger(__name__) + + +@dataclass +class OrchestratorConfig: + """ + Configuration for OrchestratorService. + [AC-AISVC-01] Centralized configuration for orchestration. + """ + max_history_tokens: int = 4000 + max_evidence_tokens: int = 2000 + system_prompt: str = SYSTEM_PROMPT + enable_rag: bool = True + use_optimized_retriever: bool = True + + +@dataclass +class GenerationContext: + """ + [AC-AISVC-01, AC-AISVC-02] Context accumulated during generation pipeline. + Contains all intermediate results for diagnostics and response building. + """ + tenant_id: str + session_id: str + current_message: str + channel_type: str + request_metadata: dict[str, Any] | None = None + + local_history: list[dict[str, str]] = field(default_factory=list) + merged_context: MergedContext | None = None + retrieval_result: RetrievalResult | None = None + llm_response: LLMResponse | None = None + confidence_result: ConfidenceResult | None = None + + diagnostics: dict[str, Any] = field(default_factory=dict) + + +class OrchestratorService: + """ + [AC-AISVC-01, AC-AISVC-02, AC-AISVC-06, AC-AISVC-07] Orchestrator for chat generation. + Coordinates memory, retrieval, and LLM components. + + SSE Event Flow (per design.md Section 6.2): + - message* (0 or more) -> final (exactly 1) -> close + - OR message* (0 or more) -> error (exactly 1) -> close + """ + + def __init__( + self, + llm_client: LLMClient | None = None, + memory_service: MemoryService | None = None, + retriever: BaseRetriever | None = None, + context_merger: ContextMerger | None = None, + confidence_calculator: ConfidenceCalculator | None = None, + config: OrchestratorConfig | None = None, + ): + """ + Initialize orchestrator with optional dependencies for DI. + + Args: + llm_client: LLM client for generation + memory_service: Memory service for session history + retriever: Retriever for RAG + context_merger: Context merger for history deduplication + confidence_calculator: Confidence calculator for response scoring + config: Orchestrator configuration + """ + settings = get_settings() + self._llm_client = llm_client + self._memory_service = memory_service + self._retriever = retriever + self._context_merger = context_merger or ContextMerger( + max_history_tokens=getattr(settings, "max_history_tokens", 4000) + ) + self._confidence_calculator = confidence_calculator or ConfidenceCalculator() + self._config = config or OrchestratorConfig( + max_history_tokens=getattr(settings, "max_history_tokens", 4000), + max_evidence_tokens=getattr(settings, "rag_max_evidence_tokens", 2000), + enable_rag=True, + ) + self._llm_config = LLMConfig( + model=getattr(settings, "llm_model", "gpt-4o-mini"), + max_tokens=getattr(settings, "llm_max_tokens", 2048), + temperature=getattr(settings, "llm_temperature", 0.7), + timeout_seconds=getattr(settings, "llm_timeout_seconds", 30), + max_retries=getattr(settings, "llm_max_retries", 3), + ) + + async def generate( + self, + tenant_id: str, + request: ChatRequest, + ) -> ChatResponse: + """ + Generate a non-streaming response. + [AC-AISVC-01, AC-AISVC-02] Complete generation pipeline. + + Pipeline (per design.md Section 2.2): + 1. Load local history from Memory + 2. Merge with external history (dedup + truncate) + 3. RAG retrieval (optional) + 4. Build prompt with context and evidence + 5. LLM generation + 6. Calculate confidence + 7. Save messages to Memory + 8. Return ChatResponse + """ + logger.info( + f"[AC-AISVC-01] Starting generation for tenant={tenant_id}, " + f"session={request.session_id}, channel_type={request.channel_type}, " + f"current_message={request.current_message[:100]}..." + ) + logger.info( + f"[AC-AISVC-01] Config: enable_rag={self._config.enable_rag}, " + f"use_optimized_retriever={self._config.use_optimized_retriever}, " + f"llm_client={'configured' if self._llm_client else 'NOT configured'}, " + f"retriever={'configured' if self._retriever else 'NOT configured'}" + ) + + ctx = GenerationContext( + tenant_id=tenant_id, + session_id=request.session_id, + current_message=request.current_message, + channel_type=request.channel_type.value, + request_metadata=request.metadata, + ) + + try: + await self._load_local_history(ctx) + + await self._merge_context(ctx, request.history) + + if self._config.enable_rag and self._retriever: + await self._retrieve_evidence(ctx) + + await self._generate_response(ctx) + + self._calculate_confidence(ctx) + + await self._save_messages(ctx) + + return self._build_response(ctx) + + except Exception as e: + logger.error(f"[AC-AISVC-01] Generation failed: {e}") + return ChatResponse( + reply="抱歉,服务暂时不可用,请稍后重试或联系人工客服。", + confidence=0.0, + should_transfer=True, + transfer_reason=f"服务异常: {str(e)}", + metadata={"error": str(e), "diagnostics": ctx.diagnostics}, + ) + + async def _load_local_history(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-13] Load local history from Memory service. + Step 1 of the generation pipeline. + """ + if not self._memory_service: + logger.info("[AC-AISVC-13] No memory service configured, skipping history load") + ctx.diagnostics["memory_enabled"] = False + return + + try: + messages = await self._memory_service.load_history( + tenant_id=ctx.tenant_id, + session_id=ctx.session_id, + ) + + ctx.local_history = [ + {"role": msg.role, "content": msg.content} + for msg in messages + ] + + ctx.diagnostics["memory_enabled"] = True + ctx.diagnostics["local_history_count"] = len(ctx.local_history) + + logger.info( + f"[AC-AISVC-13] Loaded {len(ctx.local_history)} messages from memory " + f"for tenant={ctx.tenant_id}, session={ctx.session_id}" + ) + + except Exception as e: + logger.warning(f"[AC-AISVC-13] Failed to load history: {e}") + ctx.diagnostics["memory_error"] = str(e) + + async def _merge_context( + self, + ctx: GenerationContext, + external_history: list | None, + ) -> None: + """ + [AC-AISVC-14, AC-AISVC-15] Merge local and external history. + Step 2 of the generation pipeline. + + Design reference: design.md Section 7 + - Deduplication based on fingerprint + - Truncation to fit token budget + """ + external_messages = None + if external_history: + external_messages = [ + {"role": msg.role.value, "content": msg.content} + for msg in external_history + ] + + ctx.merged_context = self._context_merger.merge_and_truncate( + local_history=ctx.local_history, + external_history=external_messages, + max_tokens=self._config.max_history_tokens, + ) + + ctx.diagnostics["merged_context"] = { + "local_count": ctx.merged_context.local_count, + "external_count": ctx.merged_context.external_count, + "duplicates_skipped": ctx.merged_context.duplicates_skipped, + "truncated_count": ctx.merged_context.truncated_count, + "total_tokens": ctx.merged_context.total_tokens, + } + + logger.info( + f"[AC-AISVC-14, AC-AISVC-15] Context merged: " + f"local={ctx.merged_context.local_count}, " + f"external={ctx.merged_context.external_count}, " + f"tokens={ctx.merged_context.total_tokens}" + ) + + async def _retrieve_evidence(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-16, AC-AISVC-17] RAG retrieval for evidence. + Step 3 of the generation pipeline. + """ + logger.info( + f"[AC-AISVC-16] Starting retrieval: tenant={ctx.tenant_id}, " + f"query={ctx.current_message[:100]}..., retriever={type(self._retriever).__name__ if self._retriever else 'None'}" + ) + try: + retrieval_ctx = RetrievalContext( + tenant_id=ctx.tenant_id, + query=ctx.current_message, + session_id=ctx.session_id, + channel_type=ctx.channel_type, + metadata=ctx.request_metadata, + ) + + ctx.retrieval_result = await self._retriever.retrieve(retrieval_ctx) + + ctx.diagnostics["retrieval"] = { + "hit_count": ctx.retrieval_result.hit_count, + "max_score": ctx.retrieval_result.max_score, + "is_empty": ctx.retrieval_result.is_empty, + } + + logger.info( + f"[AC-AISVC-16, AC-AISVC-17] Retrieval complete: " + f"hits={ctx.retrieval_result.hit_count}, " + f"max_score={ctx.retrieval_result.max_score:.3f}, " + f"is_empty={ctx.retrieval_result.is_empty}" + ) + + if ctx.retrieval_result.hit_count > 0: + for i, hit in enumerate(ctx.retrieval_result.hits[:3]): + logger.info( + f"[AC-AISVC-16] Hit {i+1}: score={hit.score:.3f}, " + f"text_preview={hit.text[:100]}..." + ) + + except Exception as e: + logger.error(f"[AC-AISVC-16] Retrieval failed with exception: {e}", exc_info=True) + ctx.retrieval_result = RetrievalResult( + hits=[], + diagnostics={"error": str(e)}, + ) + ctx.diagnostics["retrieval_error"] = str(e) + + async def _generate_response(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-02] Generate response using LLM. + Step 4-5 of the generation pipeline. + """ + messages = self._build_llm_messages(ctx) + logger.info( + f"[AC-AISVC-02] Building LLM messages: count={len(messages)}, " + f"has_retrieval_result={ctx.retrieval_result is not None}, " + f"retrieval_is_empty={ctx.retrieval_result.is_empty if ctx.retrieval_result else 'N/A'}, " + f"llm_client={'configured' if self._llm_client else 'NOT configured'}" + ) + + if not self._llm_client: + logger.warning( + f"[AC-AISVC-02] No LLM client configured, using fallback. " + f"retrieval_is_empty={ctx.retrieval_result.is_empty if ctx.retrieval_result else True}" + ) + ctx.llm_response = LLMResponse( + content=self._fallback_response(ctx), + model="fallback", + usage={}, + finish_reason="fallback", + ) + ctx.diagnostics["llm_mode"] = "fallback" + ctx.diagnostics["fallback_reason"] = "no_llm_client" + return + + try: + ctx.llm_response = await self._llm_client.generate( + messages=messages, + config=self._llm_config, + ) + ctx.diagnostics["llm_mode"] = "live" + ctx.diagnostics["llm_model"] = ctx.llm_response.model + ctx.diagnostics["llm_usage"] = ctx.llm_response.usage + + logger.info( + f"[AC-AISVC-02] LLM response generated: " + f"model={ctx.llm_response.model}, " + f"tokens={ctx.llm_response.usage}, " + f"content_preview={ctx.llm_response.content[:100]}..." + ) + + except Exception as e: + logger.error( + f"[AC-AISVC-02] LLM generation failed: {e}, " + f"retrieval_is_empty={ctx.retrieval_result.is_empty if ctx.retrieval_result else True}", + exc_info=True + ) + ctx.llm_response = LLMResponse( + content=self._fallback_response(ctx), + model="fallback", + usage={}, + finish_reason="error", + metadata={"error": str(e)}, + ) + ctx.diagnostics["llm_error"] = str(e) + ctx.diagnostics["llm_mode"] = "fallback" + ctx.diagnostics["fallback_reason"] = f"llm_error: {str(e)}" + + def _build_llm_messages(self, ctx: GenerationContext) -> list[dict[str, str]]: + """ + [AC-AISVC-02] Build messages for LLM including system prompt and evidence. + """ + messages = [] + + system_content = self._config.system_prompt + + if ctx.retrieval_result and not ctx.retrieval_result.is_empty: + evidence_text = self._format_evidence(ctx.retrieval_result) + system_content += f"\n\n知识库参考内容:\n{evidence_text}" + + messages.append({"role": "system", "content": system_content}) + + if ctx.merged_context and ctx.merged_context.messages: + messages.extend(ctx.merged_context.messages) + + messages.append({"role": "user", "content": ctx.current_message}) + + logger.info( + f"[AC-AISVC-02] Built {len(messages)} messages for LLM: " + f"system_len={len(system_content)}, history_count={len(ctx.merged_context.messages) if ctx.merged_context else 0}" + ) + logger.debug(f"[AC-AISVC-02] System prompt preview: {system_content[:500]}...") + + logger.info(f"[AC-AISVC-02] ========== ORCHESTRATOR FULL PROMPT ==========") + for i, msg in enumerate(messages): + role = msg.get("role", "unknown") + content = msg.get("content", "") + logger.info(f"[AC-AISVC-02] [{i}] role={role}, content_length={len(content)}") + logger.info(f"[AC-AISVC-02] [{i}] content:\n{content}") + logger.info(f"[AC-AISVC-02] ==============================================") + + return messages + + def _format_evidence(self, retrieval_result: RetrievalResult) -> str: + """ + [AC-AISVC-17] Format retrieval hits as evidence text. + Uses shared prompt configuration for consistency. + """ + return format_evidence_for_prompt(retrieval_result.hits, max_results=5, max_content_length=500) + + def _fallback_response(self, ctx: GenerationContext) -> str: + """ + [AC-AISVC-17] Generate fallback response when LLM is unavailable. + """ + if ctx.retrieval_result and not ctx.retrieval_result.is_empty: + return ( + "根据知识库信息,我找到了一些相关内容," + "但暂时无法生成完整回复。建议您稍后重试或联系人工客服。" + ) + return ( + "抱歉,我暂时无法处理您的请求。" + "请稍后重试或联系人工客服获取帮助。" + ) + + def _calculate_confidence(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Calculate confidence score. + Step 6 of the generation pipeline. + """ + if ctx.retrieval_result: + evidence_tokens = 0 + if not ctx.retrieval_result.is_empty: + evidence_tokens = sum( + len(hit.text.split()) * 2 + for hit in ctx.retrieval_result.hits + ) + + ctx.confidence_result = self._confidence_calculator.calculate_confidence( + retrieval_result=ctx.retrieval_result, + evidence_tokens=evidence_tokens, + ) + else: + ctx.confidence_result = self._confidence_calculator.calculate_confidence_no_retrieval() + + ctx.diagnostics["confidence"] = { + "score": ctx.confidence_result.confidence, + "should_transfer": ctx.confidence_result.should_transfer, + "is_insufficient": ctx.confidence_result.is_retrieval_insufficient, + } + + logger.info( + f"[AC-AISVC-17, AC-AISVC-18] Confidence calculated: " + f"{ctx.confidence_result.confidence:.3f}, " + f"should_transfer={ctx.confidence_result.should_transfer}" + ) + + async def _save_messages(self, ctx: GenerationContext) -> None: + """ + [AC-AISVC-13] Save user and assistant messages to Memory. + Step 7 of the generation pipeline. + """ + if not self._memory_service: + logger.info("[AC-AISVC-13] No memory service configured, skipping save") + return + + try: + await self._memory_service.get_or_create_session( + tenant_id=ctx.tenant_id, + session_id=ctx.session_id, + channel_type=ctx.channel_type, + metadata=ctx.request_metadata, + ) + + messages_to_save = [ + {"role": "user", "content": ctx.current_message}, + ] + + if ctx.llm_response: + messages_to_save.append({ + "role": "assistant", + "content": ctx.llm_response.content, + }) + + await self._memory_service.append_messages( + tenant_id=ctx.tenant_id, + session_id=ctx.session_id, + messages=messages_to_save, + ) + + ctx.diagnostics["messages_saved"] = len(messages_to_save) + + logger.info( + f"[AC-AISVC-13] Saved {len(messages_to_save)} messages " + f"for tenant={ctx.tenant_id}, session={ctx.session_id}" + ) + + except Exception as e: + logger.warning(f"[AC-AISVC-13] Failed to save messages: {e}") + ctx.diagnostics["save_error"] = str(e) + + def _build_response(self, ctx: GenerationContext) -> ChatResponse: + """ + [AC-AISVC-02] Build final ChatResponse from generation context. + Step 8 of the generation pipeline. + """ + reply = ctx.llm_response.content if ctx.llm_response else self._fallback_response(ctx) + + confidence = ctx.confidence_result.confidence if ctx.confidence_result else 0.5 + should_transfer = ctx.confidence_result.should_transfer if ctx.confidence_result else True + transfer_reason = ctx.confidence_result.transfer_reason if ctx.confidence_result else None + + response_metadata = { + "session_id": ctx.session_id, + "channel_type": ctx.channel_type, + "diagnostics": ctx.diagnostics, + } + + return ChatResponse( + reply=reply, + confidence=confidence, + should_transfer=should_transfer, + transfer_reason=transfer_reason, + metadata=response_metadata, + ) + + async def generate_stream( + self, + tenant_id: str, + request: ChatRequest, + ) -> AsyncGenerator[ServerSentEvent, None]: + """ + Generate a streaming response. + [AC-AISVC-06, AC-AISVC-07, AC-AISVC-08] Yields SSE events in proper sequence. + + SSE Event Sequence (per design.md Section 6.2): + 1. message events (multiple) - each with incremental delta + 2. final event (exactly 1) - with complete response + 3. connection close + + OR on error: + 1. message events (0 or more) + 2. error event (exactly 1) + 3. connection close + """ + logger.info( + f"[AC-AISVC-06] Starting streaming generation for tenant={tenant_id}, " + f"session={request.session_id}" + ) + + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + + ctx = GenerationContext( + tenant_id=tenant_id, + session_id=request.session_id, + current_message=request.current_message, + channel_type=request.channel_type.value, + request_metadata=request.metadata, + ) + + try: + await self._load_local_history(ctx) + await self._merge_context(ctx, request.history) + + if self._config.enable_rag and self._retriever: + await self._retrieve_evidence(ctx) + + full_reply = "" + + if self._llm_client: + async for event in self._stream_from_llm(ctx, state_machine): + if event.event == "message": + full_reply += self._extract_delta_from_event(event) + yield event + else: + async for event in self._stream_mock_response(ctx, state_machine): + if event.event == "message": + full_reply += self._extract_delta_from_event(event) + yield event + + if ctx.llm_response is None: + ctx.llm_response = LLMResponse( + content=full_reply, + model="streaming", + usage={}, + finish_reason="stop", + ) + + self._calculate_confidence(ctx) + + await self._save_messages(ctx) + + if await state_machine.transition_to_final(): + yield create_final_event( + reply=full_reply, + confidence=ctx.confidence_result.confidence if ctx.confidence_result else 0.5, + should_transfer=ctx.confidence_result.should_transfer if ctx.confidence_result else False, + transfer_reason=ctx.confidence_result.transfer_reason if ctx.confidence_result else None, + ) + + except Exception as e: + logger.error(f"[AC-AISVC-09] Error during streaming: {e}") + if await state_machine.transition_to_error(): + yield create_error_event( + code="GENERATION_ERROR", + message=str(e), + ) + finally: + await state_machine.close() + + async def _stream_from_llm( + self, + ctx: GenerationContext, + state_machine: SSEStateMachine, + ) -> AsyncGenerator[ServerSentEvent, None]: + """ + [AC-AISVC-07] Stream from LLM client, wrapping each chunk as message event. + """ + messages = self._build_llm_messages(ctx) + + async for chunk in self._llm_client.stream_generate(messages, self._llm_config): + if not state_machine.can_send_message(): + break + + if chunk.delta: + logger.debug(f"[AC-AISVC-07] Yielding message event with delta: {chunk.delta[:50]}...") + yield create_message_event(delta=chunk.delta) + + if chunk.finish_reason: + logger.info(f"[AC-AISVC-07] LLM stream finished with reason: {chunk.finish_reason}") + break + + async def _stream_mock_response( + self, + ctx: GenerationContext, + state_machine: SSEStateMachine, + ) -> AsyncGenerator[ServerSentEvent, None]: + """ + [AC-AISVC-07] Mock streaming response for demo/testing purposes. + Simulates LLM-style incremental output. + """ + import asyncio + + reply_parts = ["收到", "您的", "消息:", f" {ctx.current_message}"] + + for part in reply_parts: + if not state_machine.can_send_message(): + break + + logger.debug(f"[AC-AISVC-07] Yielding mock message event with delta: {part}") + yield create_message_event(delta=part) + await asyncio.sleep(0.05) + + def _extract_delta_from_event(self, event: ServerSentEvent) -> str: + """Extract delta content from a message event.""" + import json + try: + if event.data: + data = json.loads(event.data) + return data.get("delta", "") + except (json.JSONDecodeError, TypeError): + pass + return "" + + +_orchestrator_service: OrchestratorService | None = None + + +def get_orchestrator_service() -> OrchestratorService: + """Get or create orchestrator service instance.""" + global _orchestrator_service + if _orchestrator_service is None: + _orchestrator_service = OrchestratorService() + return _orchestrator_service + + +def set_orchestrator_service(service: OrchestratorService) -> None: + """Set orchestrator service instance for testing.""" + global _orchestrator_service + _orchestrator_service = service diff --git a/ai-service/app/services/retrieval/__init__.py b/ai-service/app/services/retrieval/__init__.py new file mode 100644 index 0000000..d6865d4 --- /dev/null +++ b/ai-service/app/services/retrieval/__init__.py @@ -0,0 +1,57 @@ +""" +Retrieval module for AI Service. +[AC-AISVC-16] Provides retriever implementations with plugin architecture. +RAG Optimization: Two-stage retrieval, RRF hybrid ranking, metadata filtering. +""" + +from app.services.retrieval.base import ( + BaseRetriever, + RetrievalContext, + RetrievalHit, + RetrievalResult, +) +from app.services.retrieval.vector_retriever import VectorRetriever, get_vector_retriever +from app.services.retrieval.metadata import ( + ChunkMetadata, + ChunkMetadataModel, + MetadataFilter, + KnowledgeChunk, + RetrieveRequest, + RetrieveResult, + RetrievalStrategy, +) +from app.services.retrieval.optimized_retriever import ( + OptimizedRetriever, + get_optimized_retriever, + TwoStageResult, + RRFCombiner, +) +from app.services.retrieval.indexer import ( + KnowledgeIndexer, + get_knowledge_indexer, + IndexingProgress, + IndexingResult, +) + +__all__ = [ + "BaseRetriever", + "RetrievalContext", + "RetrievalHit", + "RetrievalResult", + "VectorRetriever", + "get_vector_retriever", + "ChunkMetadata", + "MetadataFilter", + "KnowledgeChunk", + "RetrieveRequest", + "RetrieveResult", + "RetrievalStrategy", + "OptimizedRetriever", + "get_optimized_retriever", + "TwoStageResult", + "RRFCombiner", + "KnowledgeIndexer", + "get_knowledge_indexer", + "IndexingProgress", + "IndexingResult", +] diff --git a/ai-service/app/services/retrieval/base.py b/ai-service/app/services/retrieval/base.py new file mode 100644 index 0000000..fe20bc6 --- /dev/null +++ b/ai-service/app/services/retrieval/base.py @@ -0,0 +1,96 @@ +""" +Retrieval layer for AI Service. +[AC-AISVC-16] Abstract base class for retrievers with plugin point support. +""" + +import logging +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any + +logger = logging.getLogger(__name__) + + +@dataclass +class RetrievalContext: + """ + [AC-AISVC-16] Context for retrieval operations. + Contains all necessary information for retrieval plugins. + """ + + tenant_id: str + query: str + session_id: str | None = None + channel_type: str | None = None + metadata: dict[str, Any] | None = None + + +@dataclass +class RetrievalHit: + """ + [AC-AISVC-16] Single retrieval result hit. + Unified structure for all retriever types. + """ + + text: str + score: float + source: str + metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class RetrievalResult: + """ + [AC-AISVC-16] Result from retrieval operation. + Contains hits and optional diagnostics. + """ + + hits: list[RetrievalHit] = field(default_factory=list) + diagnostics: dict[str, Any] | None = None + + @property + def is_empty(self) -> bool: + """Check if no hits were found.""" + return len(self.hits) == 0 + + @property + def max_score(self) -> float: + """Get the maximum score among hits.""" + if not self.hits: + return 0.0 + return max(hit.score for hit in self.hits) + + @property + def hit_count(self) -> int: + """Get the number of hits.""" + return len(self.hits) + + +class BaseRetriever(ABC): + """ + [AC-AISVC-16] Abstract base class for retrievers. + Provides plugin point for different retrieval strategies (Vector, Graph, Hybrid). + """ + + @abstractmethod + async def retrieve(self, ctx: RetrievalContext) -> RetrievalResult: + """ + [AC-AISVC-16] Retrieve relevant documents for the given context. + + Args: + ctx: Retrieval context containing tenant_id, query, and optional metadata. + + Returns: + RetrievalResult with hits and optional diagnostics. + """ + pass + + @abstractmethod + async def health_check(self) -> bool: + """ + Check if the retriever is healthy and ready to serve requests. + + Returns: + True if healthy, False otherwise. + """ + pass diff --git a/ai-service/app/services/retrieval/indexer.py b/ai-service/app/services/retrieval/indexer.py new file mode 100644 index 0000000..d701c57 --- /dev/null +++ b/ai-service/app/services/retrieval/indexer.py @@ -0,0 +1,339 @@ +""" +Knowledge base indexing service with optimized embedding. +Reference: rag-optimization/spec.md Section 5.1 +""" + +import asyncio +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any + +from app.core.config import get_settings +from app.core.qdrant_client import QdrantClient, get_qdrant_client +from app.services.embedding.nomic_provider import NomicEmbeddingProvider, NomicEmbeddingResult +from app.services.retrieval.metadata import ChunkMetadata, KnowledgeChunk + +logger = logging.getLogger(__name__) +settings = get_settings() + + +@dataclass +class IndexingProgress: + """Progress tracking for indexing jobs.""" + total_chunks: int = 0 + processed_chunks: int = 0 + failed_chunks: int = 0 + current_document: str = "" + started_at: datetime = field(default_factory=datetime.utcnow) + + @property + def progress_percent(self) -> int: + if self.total_chunks == 0: + return 0 + return int((self.processed_chunks / self.total_chunks) * 100) + + @property + def elapsed_seconds(self) -> float: + return (datetime.utcnow() - self.started_at).total_seconds() + + +@dataclass +class IndexingResult: + """Result of an indexing operation.""" + success: bool + total_chunks: int + indexed_chunks: int + failed_chunks: int + elapsed_seconds: float + error_message: str | None = None + + +class KnowledgeIndexer: + """ + Knowledge base indexer with optimized embedding. + + Features: + - Task prefixes (search_document:) for document embedding + - Multi-dimensional vectors (256/512/768) + - Metadata support + - Batch processing + """ + + def __init__( + self, + qdrant_client: QdrantClient | None = None, + embedding_provider: NomicEmbeddingProvider | None = None, + chunk_size: int = 500, + chunk_overlap: int = 50, + batch_size: int = 10, + ): + self._qdrant_client = qdrant_client + self._embedding_provider = embedding_provider + self._chunk_size = chunk_size + self._chunk_overlap = chunk_overlap + self._batch_size = batch_size + self._progress: IndexingProgress | None = None + + async def _get_client(self) -> QdrantClient: + if self._qdrant_client is None: + self._qdrant_client = await get_qdrant_client() + return self._qdrant_client + + async def _get_embedding_provider(self) -> NomicEmbeddingProvider: + if self._embedding_provider is None: + self._embedding_provider = NomicEmbeddingProvider( + base_url=settings.ollama_base_url, + model=settings.ollama_embedding_model, + dimension=settings.qdrant_vector_size, + ) + return self._embedding_provider + + def chunk_text(self, text: str, metadata: ChunkMetadata | None = None) -> list[KnowledgeChunk]: + """ + Split text into chunks for indexing. + Each line becomes a separate chunk for better retrieval granularity. + + Args: + text: Full text to chunk + metadata: Metadata to attach to each chunk + + Returns: + List of KnowledgeChunk objects + """ + chunks = [] + doc_id = str(uuid.uuid4()) + + lines = text.split('\n') + + for i, line in enumerate(lines): + line = line.strip() + + if len(line) < 10: + continue + + chunk = KnowledgeChunk( + chunk_id=f"{doc_id}_{i}", + document_id=doc_id, + content=line, + metadata=metadata or ChunkMetadata(), + ) + chunks.append(chunk) + + return chunks + + def chunk_text_by_lines( + self, + text: str, + metadata: ChunkMetadata | None = None, + min_line_length: int = 10, + merge_short_lines: bool = False, + ) -> list[KnowledgeChunk]: + """ + Split text by lines, each line is a separate chunk. + + Args: + text: Full text to chunk + metadata: Metadata to attach to each chunk + min_line_length: Minimum line length to be indexed + merge_short_lines: Whether to merge consecutive short lines + + Returns: + List of KnowledgeChunk objects + """ + chunks = [] + doc_id = str(uuid.uuid4()) + + lines = text.split('\n') + + if merge_short_lines: + merged_lines = [] + current_line = "" + + for line in lines: + line = line.strip() + if not line: + if current_line: + merged_lines.append(current_line) + current_line = "" + continue + + if current_line: + current_line += " " + line + else: + current_line = line + + if len(current_line) >= min_line_length * 2: + merged_lines.append(current_line) + current_line = "" + + if current_line: + merged_lines.append(current_line) + + lines = merged_lines + + for i, line in enumerate(lines): + line = line.strip() + + if len(line) < min_line_length: + continue + + chunk = KnowledgeChunk( + chunk_id=f"{doc_id}_{i}", + document_id=doc_id, + content=line, + metadata=metadata or ChunkMetadata(), + ) + chunks.append(chunk) + + return chunks + + async def index_document( + self, + tenant_id: str, + document_id: str, + text: str, + metadata: ChunkMetadata | None = None, + ) -> IndexingResult: + """ + Index a single document with optimized embedding. + + Args: + tenant_id: Tenant identifier + document_id: Document identifier + text: Document text content + metadata: Optional metadata for the document + + Returns: + IndexingResult with status and statistics + """ + start_time = datetime.utcnow() + + try: + client = await self._get_client() + provider = await self._get_embedding_provider() + + await client.ensure_collection_exists(tenant_id, use_multi_vector=True) + + chunks = self.chunk_text(text, metadata) + + self._progress = IndexingProgress( + total_chunks=len(chunks), + current_document=document_id, + ) + + points = [] + for i, chunk in enumerate(chunks): + try: + embedding_result = await provider.embed_document(chunk.content) + + chunk.embedding_full = embedding_result.embedding_full + chunk.embedding_256 = embedding_result.embedding_256 + chunk.embedding_512 = embedding_result.embedding_512 + + point = { + "id": str(uuid.uuid4()), # Generate a valid UUID for Qdrant + "vector": { + "full": chunk.embedding_full, + "dim_256": chunk.embedding_256, + "dim_512": chunk.embedding_512, + }, + "payload": { + "chunk_id": chunk.chunk_id, + "document_id": document_id, + "text": chunk.content, + "metadata": chunk.metadata.to_dict(), + "created_at": chunk.created_at.isoformat(), + } + } + points.append(point) + + self._progress.processed_chunks += 1 + + logger.debug( + f"[RAG-OPT] Indexed chunk {i+1}/{len(chunks)} for doc={document_id}" + ) + + except Exception as e: + logger.warning(f"[RAG-OPT] Failed to index chunk {i}: {e}") + self._progress.failed_chunks += 1 + + if points: + await client.upsert_multi_vector(tenant_id, points) + + elapsed = (datetime.utcnow() - start_time).total_seconds() + + logger.info( + f"[RAG-OPT] Indexed document {document_id}: " + f"{len(points)} chunks in {elapsed:.2f}s" + ) + + return IndexingResult( + success=True, + total_chunks=len(chunks), + indexed_chunks=len(points), + failed_chunks=self._progress.failed_chunks, + elapsed_seconds=elapsed, + ) + + except Exception as e: + elapsed = (datetime.utcnow() - start_time).total_seconds() + logger.error(f"[RAG-OPT] Failed to index document {document_id}: {e}") + + return IndexingResult( + success=False, + total_chunks=0, + indexed_chunks=0, + failed_chunks=0, + elapsed_seconds=elapsed, + error_message=str(e), + ) + + async def index_documents_batch( + self, + tenant_id: str, + documents: list[dict[str, Any]], + ) -> list[IndexingResult]: + """ + Index multiple documents in batch. + + Args: + tenant_id: Tenant identifier + documents: List of documents with format: + { + "document_id": str, + "text": str, + "metadata": ChunkMetadata (optional) + } + + Returns: + List of IndexingResult for each document + """ + results = [] + + for doc in documents: + result = await self.index_document( + tenant_id=tenant_id, + document_id=doc["document_id"], + text=doc["text"], + metadata=doc.get("metadata"), + ) + results.append(result) + + return results + + def get_progress(self) -> IndexingProgress | None: + """Get current indexing progress.""" + return self._progress + + +_knowledge_indexer: KnowledgeIndexer | None = None + + +def get_knowledge_indexer() -> KnowledgeIndexer: + """Get or create KnowledgeIndexer instance.""" + global _knowledge_indexer + if _knowledge_indexer is None: + _knowledge_indexer = KnowledgeIndexer() + return _knowledge_indexer diff --git a/ai-service/app/services/retrieval/metadata.py b/ai-service/app/services/retrieval/metadata.py new file mode 100644 index 0000000..3dbe753 --- /dev/null +++ b/ai-service/app/services/retrieval/metadata.py @@ -0,0 +1,210 @@ +""" +Metadata models for RAG optimization. +Implements structured metadata for knowledge chunks. +Reference: rag-optimization/spec.md Section 3.2 +""" + +from dataclasses import dataclass, field +from datetime import date, datetime +from enum import Enum +from typing import Any + +from pydantic import BaseModel + + +class RetrievalStrategy(str, Enum): + """Retrieval strategy options.""" + VECTOR_ONLY = "vector" + BM25_ONLY = "bm25" + HYBRID = "hybrid" + TWO_STAGE = "two_stage" + + +class ChunkMetadataModel(BaseModel): + """Pydantic model for API serialization.""" + category: str = "" + subcategory: str = "" + target_audience: list[str] = [] + source_doc: str = "" + source_url: str = "" + department: str = "" + valid_from: str | None = None + valid_until: str | None = None + priority: int = 5 + keywords: list[str] = [] + + +@dataclass +class ChunkMetadata: + """ + Metadata for knowledge chunks. + Reference: rag-optimization/spec.md Section 3.2.2 + """ + category: str = "" + subcategory: str = "" + target_audience: list[str] = field(default_factory=list) + source_doc: str = "" + source_url: str = "" + department: str = "" + valid_from: date | None = None + valid_until: date | None = None + priority: int = 5 + keywords: list[str] = field(default_factory=list) + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary for storage.""" + return { + "category": self.category, + "subcategory": self.subcategory, + "target_audience": self.target_audience, + "source_doc": self.source_doc, + "source_url": self.source_url, + "department": self.department, + "valid_from": self.valid_from.isoformat() if self.valid_from else None, + "valid_until": self.valid_until.isoformat() if self.valid_until else None, + "priority": self.priority, + "keywords": self.keywords, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "ChunkMetadata": + """Create from dictionary.""" + return cls( + category=data.get("category", ""), + subcategory=data.get("subcategory", ""), + target_audience=data.get("target_audience", []), + source_doc=data.get("source_doc", ""), + source_url=data.get("source_url", ""), + department=data.get("department", ""), + valid_from=date.fromisoformat(data["valid_from"]) if data.get("valid_from") else None, + valid_until=date.fromisoformat(data["valid_until"]) if data.get("valid_until") else None, + priority=data.get("priority", 5), + keywords=data.get("keywords", []), + ) + + +@dataclass +class MetadataFilter: + """ + Filter conditions for metadata-based retrieval. + Reference: rag-optimization/spec.md Section 4.1 + """ + categories: list[str] | None = None + target_audiences: list[str] | None = None + departments: list[str] | None = None + valid_only: bool = True + min_priority: int | None = None + keywords: list[str] | None = None + + def to_qdrant_filter(self) -> dict[str, Any] | None: + """Convert to Qdrant filter format.""" + conditions = [] + + if self.categories: + conditions.append({ + "key": "metadata.category", + "match": {"any": self.categories} + }) + + if self.departments: + conditions.append({ + "key": "metadata.department", + "match": {"any": self.departments} + }) + + if self.target_audiences: + conditions.append({ + "key": "metadata.target_audience", + "match": {"any": self.target_audiences} + }) + + if self.valid_only: + today = date.today().isoformat() + conditions.append({ + "should": [ + {"key": "metadata.valid_until", "match": {"value": None}}, + {"key": "metadata.valid_until", "range": {"gte": today}} + ] + }) + + if self.min_priority is not None: + conditions.append({ + "key": "metadata.priority", + "range": {"lte": self.min_priority} + }) + + if not conditions: + return None + + if len(conditions) == 1: + return {"must": conditions} + + return {"must": conditions} + + +@dataclass +class KnowledgeChunk: + """ + Knowledge chunk with multi-dimensional embeddings. + Reference: rag-optimization/spec.md Section 3.2.1 + """ + chunk_id: str + document_id: str + content: str + embedding_full: list[float] = field(default_factory=list) + embedding_256: list[float] = field(default_factory=list) + embedding_512: list[float] = field(default_factory=list) + metadata: ChunkMetadata = field(default_factory=ChunkMetadata) + created_at: datetime = field(default_factory=datetime.utcnow) + updated_at: datetime = field(default_factory=datetime.utcnow) + + def to_qdrant_point(self, point_id: int | str) -> dict[str, Any]: + """Convert to Qdrant point format.""" + return { + "id": point_id, + "vector": { + "full": self.embedding_full, + "dim_256": self.embedding_256, + "dim_512": self.embedding_512, + }, + "payload": { + "chunk_id": self.chunk_id, + "document_id": self.document_id, + "text": self.content, + "metadata": self.metadata.to_dict(), + "created_at": self.created_at.isoformat(), + "updated_at": self.updated_at.isoformat(), + } + } + + +@dataclass +class RetrieveRequest: + """ + Request for knowledge retrieval. + Reference: rag-optimization/spec.md Section 4.1 + """ + query: str + query_with_prefix: str = "" + top_k: int = 10 + filters: MetadataFilter | None = None + strategy: RetrievalStrategy = RetrievalStrategy.HYBRID + + def __post_init__(self): + if not self.query_with_prefix: + self.query_with_prefix = f"search_query:{self.query}" + + +@dataclass +class RetrieveResult: + """ + Result from knowledge retrieval. + Reference: rag-optimization/spec.md Section 4.1 + """ + chunk_id: str + content: str + score: float + vector_score: float = 0.0 + bm25_score: float = 0.0 + metadata: ChunkMetadata = field(default_factory=ChunkMetadata) + rank: int = 0 diff --git a/ai-service/app/services/retrieval/optimized_retriever.py b/ai-service/app/services/retrieval/optimized_retriever.py new file mode 100644 index 0000000..1c773d8 --- /dev/null +++ b/ai-service/app/services/retrieval/optimized_retriever.py @@ -0,0 +1,509 @@ +""" +Optimized RAG retriever with two-stage retrieval and RRF hybrid ranking. +Reference: rag-optimization/spec.md Section 2.2, 2.4, 2.5 +""" + +import asyncio +import logging +import re +from dataclasses import dataclass, field +from typing import Any + +from app.core.config import get_settings +from app.core.qdrant_client import QdrantClient, get_qdrant_client +from app.services.embedding.nomic_provider import NomicEmbeddingProvider, NomicEmbeddingResult +from app.services.retrieval.base import ( + BaseRetriever, + RetrievalContext, + RetrievalHit, + RetrievalResult, +) +from app.services.retrieval.metadata import ( + ChunkMetadata, + MetadataFilter, + RetrieveResult, + RetrievalStrategy, +) + +logger = logging.getLogger(__name__) +settings = get_settings() + + +@dataclass +class TwoStageResult: + """Result from two-stage retrieval.""" + candidates: list[dict[str, Any]] + final_results: list[RetrieveResult] + stage1_latency_ms: float = 0.0 + stage2_latency_ms: float = 0.0 + + +class RRFCombiner: + """ + Reciprocal Rank Fusion for combining multiple retrieval results. + Reference: rag-optimization/spec.md Section 2.5 + + Formula: score = Σ(1 / (k + rank_i)) + Default k = 60 + """ + + def __init__(self, k: int = 60): + self._k = k + + def combine( + self, + vector_results: list[dict[str, Any]], + bm25_results: list[dict[str, Any]], + vector_weight: float = 0.7, + bm25_weight: float = 0.3, + ) -> list[dict[str, Any]]: + """ + Combine vector and BM25 results using RRF. + + Args: + vector_results: Results from vector search + bm25_results: Results from BM25 search + vector_weight: Weight for vector results + bm25_weight: Weight for BM25 results + + Returns: + Combined and sorted results + """ + combined_scores: dict[str, dict[str, Any]] = {} + + for rank, result in enumerate(vector_results): + chunk_id = result.get("chunk_id") or result.get("id", str(rank)) + rrf_score = vector_weight / (self._k + rank + 1) + + if chunk_id not in combined_scores: + combined_scores[chunk_id] = { + "score": 0.0, + "vector_score": result.get("score", 0.0), + "bm25_score": 0.0, + "vector_rank": rank, + "bm25_rank": -1, + "payload": result.get("payload", {}), + "id": chunk_id, + } + + combined_scores[chunk_id]["score"] += rrf_score + + for rank, result in enumerate(bm25_results): + chunk_id = result.get("chunk_id") or result.get("id", str(rank)) + rrf_score = bm25_weight / (self._k + rank + 1) + + if chunk_id not in combined_scores: + combined_scores[chunk_id] = { + "score": 0.0, + "vector_score": 0.0, + "bm25_score": result.get("score", 0.0), + "vector_rank": -1, + "bm25_rank": rank, + "payload": result.get("payload", {}), + "id": chunk_id, + } + else: + combined_scores[chunk_id]["bm25_score"] = result.get("score", 0.0) + combined_scores[chunk_id]["bm25_rank"] = rank + + combined_scores[chunk_id]["score"] += rrf_score + + sorted_results = sorted( + combined_scores.values(), + key=lambda x: x["score"], + reverse=True + ) + + return sorted_results + + +class OptimizedRetriever(BaseRetriever): + """ + Optimized retriever with: + - Task prefixes (search_document/search_query) + - Two-stage retrieval (256 dim -> 768 dim) + - RRF hybrid ranking (vector + BM25) + - Metadata filtering + + Reference: rag-optimization/spec.md Section 2, 3, 4 + """ + + def __init__( + self, + qdrant_client: QdrantClient | None = None, + embedding_provider: NomicEmbeddingProvider | None = None, + top_k: int | None = None, + score_threshold: float | None = None, + min_hits: int | None = None, + two_stage_enabled: bool | None = None, + two_stage_expand_factor: int | None = None, + hybrid_enabled: bool | None = None, + rrf_k: int | None = None, + ): + self._qdrant_client = qdrant_client + self._embedding_provider = embedding_provider + self._top_k = top_k or settings.rag_top_k + self._score_threshold = score_threshold or settings.rag_score_threshold + self._min_hits = min_hits or settings.rag_min_hits + self._two_stage_enabled = two_stage_enabled if two_stage_enabled is not None else settings.rag_two_stage_enabled + self._two_stage_expand_factor = two_stage_expand_factor or settings.rag_two_stage_expand_factor + self._hybrid_enabled = hybrid_enabled if hybrid_enabled is not None else settings.rag_hybrid_enabled + self._rrf_k = rrf_k or settings.rag_rrf_k + self._rrf_combiner = RRFCombiner(k=self._rrf_k) + + async def _get_client(self) -> QdrantClient: + if self._qdrant_client is None: + self._qdrant_client = await get_qdrant_client() + return self._qdrant_client + + async def _get_embedding_provider(self) -> NomicEmbeddingProvider: + if self._embedding_provider is None: + from app.services.embedding.factory import get_embedding_config_manager + manager = get_embedding_config_manager() + provider = await manager.get_provider() + if isinstance(provider, NomicEmbeddingProvider): + self._embedding_provider = provider + else: + self._embedding_provider = NomicEmbeddingProvider( + base_url=settings.ollama_base_url, + model=settings.ollama_embedding_model, + dimension=settings.qdrant_vector_size, + ) + return self._embedding_provider + + async def retrieve(self, ctx: RetrievalContext) -> RetrievalResult: + """ + Retrieve documents using optimized strategy. + + Strategy selection: + 1. If two_stage_enabled: use two-stage retrieval + 2. If hybrid_enabled: use RRF hybrid ranking + 3. Otherwise: simple vector search + """ + logger.info( + f"[RAG-OPT] Starting retrieval for tenant={ctx.tenant_id}, " + f"query={ctx.query[:50]}..., two_stage={self._two_stage_enabled}, hybrid={self._hybrid_enabled}" + ) + logger.info( + f"[RAG-OPT] Retrieval config: top_k={self._top_k}, " + f"score_threshold={self._score_threshold}, min_hits={self._min_hits}" + ) + + try: + provider = await self._get_embedding_provider() + logger.info(f"[RAG-OPT] Using embedding provider: {type(provider).__name__}") + + embedding_result = await provider.embed_query(ctx.query) + logger.info( + f"[RAG-OPT] Embedding generated: full_dim={len(embedding_result.embedding_full)}, " + f"dim_256={'available' if embedding_result.embedding_256 else 'not available'}" + ) + + if self._two_stage_enabled: + logger.info("[RAG-OPT] Using two-stage retrieval strategy") + results = await self._two_stage_retrieve( + ctx.tenant_id, + embedding_result, + self._top_k, + ) + elif self._hybrid_enabled: + logger.info("[RAG-OPT] Using hybrid retrieval strategy") + results = await self._hybrid_retrieve( + ctx.tenant_id, + embedding_result, + ctx.query, + self._top_k, + ) + else: + logger.info("[RAG-OPT] Using simple vector retrieval strategy") + results = await self._vector_retrieve( + ctx.tenant_id, + embedding_result.embedding_full, + self._top_k, + ) + + logger.info(f"[RAG-OPT] Raw results count: {len(results)}") + + retrieval_hits = [ + RetrievalHit( + text=result.get("payload", {}).get("text", ""), + score=result.get("score", 0.0), + source="optimized_rag", + metadata=result.get("payload", {}), + ) + for result in results + if result.get("score", 0.0) >= self._score_threshold + ] + + filtered_count = len(results) - len(retrieval_hits) + if filtered_count > 0: + logger.info( + f"[RAG-OPT] Filtered out {filtered_count} results below threshold {self._score_threshold}" + ) + + is_insufficient = len(retrieval_hits) < self._min_hits + + diagnostics = { + "query_length": len(ctx.query), + "top_k": self._top_k, + "score_threshold": self._score_threshold, + "two_stage_enabled": self._two_stage_enabled, + "hybrid_enabled": self._hybrid_enabled, + "total_hits": len(retrieval_hits), + "is_insufficient": is_insufficient, + "max_score": max((h.score for h in retrieval_hits), default=0.0), + "raw_results_count": len(results), + "filtered_below_threshold": filtered_count, + } + + logger.info( + f"[RAG-OPT] Retrieval complete: {len(retrieval_hits)} hits, " + f"insufficient={is_insufficient}, max_score={diagnostics['max_score']:.3f}" + ) + + if len(retrieval_hits) == 0: + logger.warning( + f"[RAG-OPT] No hits found! tenant={ctx.tenant_id}, query={ctx.query[:50]}..., " + f"raw_results={len(results)}, threshold={self._score_threshold}" + ) + + return RetrievalResult( + hits=retrieval_hits, + diagnostics=diagnostics, + ) + + except Exception as e: + logger.error(f"[RAG-OPT] Retrieval error: {e}", exc_info=True) + return RetrievalResult( + hits=[], + diagnostics={"error": str(e), "is_insufficient": True}, + ) + + async def _two_stage_retrieve( + self, + tenant_id: str, + embedding_result: NomicEmbeddingResult, + top_k: int, + ) -> list[dict[str, Any]]: + """ + Two-stage retrieval using Matryoshka dimensions. + + Stage 1: Fast retrieval with 256-dim vectors + Stage 2: Precise reranking with 768-dim vectors + + Reference: rag-optimization/spec.md Section 2.4 + """ + import time + + client = await self._get_client() + + stage1_start = time.perf_counter() + candidates = await self._search_with_dimension( + client, tenant_id, embedding_result.embedding_256, "dim_256", + top_k * self._two_stage_expand_factor + ) + stage1_latency = (time.perf_counter() - stage1_start) * 1000 + + logger.debug( + f"[RAG-OPT] Stage 1: {len(candidates)} candidates in {stage1_latency:.2f}ms" + ) + + stage2_start = time.perf_counter() + reranked = [] + for candidate in candidates: + stored_full_embedding = candidate.get("payload", {}).get("embedding_full", []) + if stored_full_embedding: + import numpy as np + similarity = self._cosine_similarity( + embedding_result.embedding_full, + stored_full_embedding + ) + candidate["score"] = similarity + candidate["stage"] = "reranked" + reranked.append(candidate) + + reranked.sort(key=lambda x: x.get("score", 0), reverse=True) + results = reranked[:top_k] + stage2_latency = (time.perf_counter() - stage2_start) * 1000 + + logger.debug( + f"[RAG-OPT] Stage 2: {len(results)} final results in {stage2_latency:.2f}ms" + ) + + return results + + async def _hybrid_retrieve( + self, + tenant_id: str, + embedding_result: NomicEmbeddingResult, + query: str, + top_k: int, + ) -> list[dict[str, Any]]: + """ + Hybrid retrieval using RRF to combine vector and BM25 results. + + Reference: rag-optimization/spec.md Section 2.5 + """ + client = await self._get_client() + + vector_task = self._search_with_dimension( + client, tenant_id, embedding_result.embedding_full, "full", + top_k * 2 + ) + + bm25_task = self._bm25_search(client, tenant_id, query, top_k * 2) + + vector_results, bm25_results = await asyncio.gather( + vector_task, bm25_task, return_exceptions=True + ) + + if isinstance(vector_results, Exception): + logger.warning(f"[RAG-OPT] Vector search failed: {vector_results}") + vector_results = [] + + if isinstance(bm25_results, Exception): + logger.warning(f"[RAG-OPT] BM25 search failed: {bm25_results}") + bm25_results = [] + + combined = self._rrf_combiner.combine( + vector_results, + bm25_results, + vector_weight=settings.rag_vector_weight, + bm25_weight=settings.rag_bm25_weight, + ) + + return combined[:top_k] + + async def _vector_retrieve( + self, + tenant_id: str, + embedding: list[float], + top_k: int, + ) -> list[dict[str, Any]]: + """Simple vector retrieval.""" + client = await self._get_client() + return await self._search_with_dimension( + client, tenant_id, embedding, "full", top_k + ) + + async def _search_with_dimension( + self, + client: QdrantClient, + tenant_id: str, + query_vector: list[float], + vector_name: str, + limit: int, + ) -> list[dict[str, Any]]: + """Search using specified vector dimension.""" + try: + qdrant = await client.get_client() + collection_name = client.get_collection_name(tenant_id) + + logger.info( + f"[RAG-OPT] Searching collection={collection_name}, " + f"vector_name={vector_name}, limit={limit}, vector_dim={len(query_vector)}" + ) + + results = await qdrant.search( + collection_name=collection_name, + query_vector=(vector_name, query_vector), + limit=limit, + ) + + logger.info( + f"[RAG-OPT] Search returned {len(results)} results from collection={collection_name}" + ) + + if len(results) > 0: + for i, r in enumerate(results[:3]): + logger.debug( + f"[RAG-OPT] Result {i+1}: id={r.id}, score={r.score:.4f}" + ) + + return [ + { + "id": str(result.id), + "score": result.score, + "payload": result.payload or {}, + } + for result in results + ] + except Exception as e: + logger.error( + f"[RAG-OPT] Search with {vector_name} failed: {e}, " + f"collection_name={client.get_collection_name(tenant_id)}", + exc_info=True + ) + return [] + + async def _bm25_search( + self, + client: QdrantClient, + tenant_id: str, + query: str, + limit: int, + ) -> list[dict[str, Any]]: + """ + BM25-like search using Qdrant's sparse vectors or fallback to text matching. + This is a simplified implementation; for production, use Elasticsearch. + """ + try: + qdrant = await client.get_client() + collection_name = client.get_collection_name(tenant_id) + + query_terms = set(re.findall(r'\w+', query.lower())) + + results = await qdrant.scroll( + collection_name=collection_name, + limit=limit * 3, + with_payload=True, + ) + + scored_results = [] + for point in results[0]: + text = point.payload.get("text", "").lower() + text_terms = set(re.findall(r'\w+', text)) + overlap = len(query_terms & text_terms) + if overlap > 0: + score = overlap / (len(query_terms) + len(text_terms) - overlap) + scored_results.append({ + "id": str(point.id), + "score": score, + "payload": point.payload or {}, + }) + + scored_results.sort(key=lambda x: x["score"], reverse=True) + return scored_results[:limit] + + except Exception as e: + logger.debug(f"[RAG-OPT] BM25 search failed: {e}") + return [] + + def _cosine_similarity(self, vec1: list[float], vec2: list[float]) -> float: + """Calculate cosine similarity between two vectors.""" + import numpy as np + a = np.array(vec1) + b = np.array(vec2) + return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))) + + async def health_check(self) -> bool: + """Check if retriever is healthy.""" + try: + client = await self._get_client() + qdrant = await client.get_client() + await qdrant.get_collections() + return True + except Exception as e: + logger.error(f"[RAG-OPT] Health check failed: {e}") + return False + + +_optimized_retriever: OptimizedRetriever | None = None + + +async def get_optimized_retriever() -> OptimizedRetriever: + """Get or create OptimizedRetriever instance.""" + global _optimized_retriever + if _optimized_retriever is None: + _optimized_retriever = OptimizedRetriever() + return _optimized_retriever diff --git a/ai-service/app/services/retrieval/vector_retriever.py b/ai-service/app/services/retrieval/vector_retriever.py new file mode 100644 index 0000000..eba3fa0 --- /dev/null +++ b/ai-service/app/services/retrieval/vector_retriever.py @@ -0,0 +1,169 @@ +""" +Vector retriever for AI Service. +[AC-AISVC-16, AC-AISVC-17] Qdrant-based vector retrieval with score threshold filtering. +""" + +import logging +from typing import Any + +from app.core.config import get_settings +from app.core.qdrant_client import QdrantClient, get_qdrant_client +from app.services.retrieval.base import ( + BaseRetriever, + RetrievalContext, + RetrievalHit, + RetrievalResult, +) + +logger = logging.getLogger(__name__) + +settings = get_settings() + + +class VectorRetriever(BaseRetriever): + """ + [AC-AISVC-16, AC-AISVC-17] Vector-based retriever using Qdrant. + Supports score threshold filtering and tenant isolation. + """ + + def __init__( + self, + qdrant_client: QdrantClient | None = None, + top_k: int | None = None, + score_threshold: float | None = None, + min_hits: int | None = None, + ): + self._qdrant_client = qdrant_client + self._top_k = top_k or settings.rag_top_k + self._score_threshold = score_threshold or settings.rag_score_threshold + self._min_hits = min_hits or settings.rag_min_hits + + async def _get_client(self) -> QdrantClient: + """Get Qdrant client instance.""" + if self._qdrant_client is None: + self._qdrant_client = await get_qdrant_client() + return self._qdrant_client + + async def retrieve(self, ctx: RetrievalContext) -> RetrievalResult: + """ + [AC-AISVC-16, AC-AISVC-17] Retrieve documents from vector store. + + Steps: + 1. Generate embedding for query (placeholder - requires embedding provider) + 2. Search in tenant's collection + 3. Filter by score threshold + 4. Return structured result + + Args: + ctx: Retrieval context with tenant_id and query. + + Returns: + RetrievalResult with filtered hits. + """ + logger.info( + f"[AC-AISVC-16] Starting vector retrieval for tenant={ctx.tenant_id}, " + f"query={ctx.query[:50]}..." + ) + logger.info( + f"[AC-AISVC-16] Retrieval config: top_k={self._top_k}, " + f"score_threshold={self._score_threshold}, min_hits={self._min_hits}" + ) + + try: + client = await self._get_client() + logger.info(f"[AC-AISVC-16] Got Qdrant client: {type(client).__name__}") + + logger.info("[AC-AISVC-16] Generating embedding for query...") + query_vector = await self._get_embedding(ctx.query) + logger.info(f"[AC-AISVC-16] Embedding generated: dim={len(query_vector)}") + + logger.info(f"[AC-AISVC-16] Searching in tenant collection: tenant_id={ctx.tenant_id}") + hits = await client.search( + tenant_id=ctx.tenant_id, + query_vector=query_vector, + limit=self._top_k, + score_threshold=self._score_threshold, + ) + + logger.info(f"[AC-AISVC-16] Search returned {len(hits)} raw hits") + + retrieval_hits = [ + RetrievalHit( + text=hit.get("payload", {}).get("text", ""), + score=hit.get("score", 0.0), + source=hit.get("payload", {}).get("source", "vector"), + metadata=hit.get("payload", {}), + ) + for hit in hits + if hit.get("score", 0.0) >= self._score_threshold + ] + + is_insufficient = len(retrieval_hits) < self._min_hits + + diagnostics = { + "query_length": len(ctx.query), + "top_k": self._top_k, + "score_threshold": self._score_threshold, + "min_hits": self._min_hits, + "total_candidates": len(hits), + "filtered_hits": len(retrieval_hits), + "is_insufficient": is_insufficient, + "max_score": max((h.score for h in retrieval_hits), default=0.0), + } + + logger.info( + f"[AC-AISVC-17] Retrieval complete: {len(retrieval_hits)} hits, " + f"insufficient={is_insufficient}, max_score={diagnostics['max_score']:.3f}" + ) + + if len(retrieval_hits) == 0: + logger.warning( + f"[AC-AISVC-17] No hits found! tenant={ctx.tenant_id}, " + f"query={ctx.query[:50]}..., raw_hits={len(hits)}, threshold={self._score_threshold}" + ) + + return RetrievalResult( + hits=retrieval_hits, + diagnostics=diagnostics, + ) + + except Exception as e: + logger.error(f"[AC-AISVC-16] Retrieval error: {e}", exc_info=True) + return RetrievalResult( + hits=[], + diagnostics={"error": str(e), "is_insufficient": True}, + ) + + async def _get_embedding(self, text: str) -> list[float]: + """ + Generate embedding for text using pluggable embedding provider. + [AC-AISVC-29] Uses configured embedding provider. + """ + from app.services.embedding import get_embedding_provider + + provider = await get_embedding_provider() + return await provider.embed(text) + + async def health_check(self) -> bool: + """ + [AC-AISVC-16] Check if Qdrant connection is healthy. + """ + try: + client = await self._get_client() + qdrant = await client.get_client() + await qdrant.get_collections() + return True + except Exception as e: + logger.error(f"[AC-AISVC-16] Health check failed: {e}") + return False + + +_vector_retriever: VectorRetriever | None = None + + +async def get_vector_retriever() -> VectorRetriever: + """Get or create VectorRetriever instance.""" + global _vector_retriever + if _vector_retriever is None: + _vector_retriever = VectorRetriever() + return _vector_retriever diff --git a/ai-service/pyproject.toml b/ai-service/pyproject.toml new file mode 100644 index 0000000..ae928af --- /dev/null +++ b/ai-service/pyproject.toml @@ -0,0 +1,57 @@ +[project] +name = "ai-service" +version = "0.1.0" +description = "Python AI Service for intelligent chat with RAG support" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "fastapi>=0.109.0", + "uvicorn[standard]>=0.27.0", + "pydantic>=2.5.0", + "pydantic-settings>=2.1.0", + "sse-starlette>=2.0.0", + "httpx>=0.26.0", + "tenacity>=8.2.0", + "sqlmodel>=0.0.14", + "asyncpg>=0.29.0", + "qdrant-client>=1.7.0", + "tiktoken>=0.5.0", + "openpyxl>=3.1.0", + "python-docx>=1.1.0", + "pymupdf>=1.23.0", + "pdfplumber>=0.10.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.4.0", + "pytest-asyncio>=0.23.0", + "pytest-cov>=4.1.0", + "httpx>=0.26.0", + "ruff>=0.1.0", + "mypy>=1.8.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["app"] + +[tool.ruff] +line-length = 120 +target-version = "py310" + +[tool.ruff.lint] +select = ["E", "F", "I", "N", "W", "UP"] + +[tool.mypy] +python_version = "3.10" +strict = true +warn_return_any = true +warn_unused_configs = true + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/ai-service/scripts/check_qdrant.py b/ai-service/scripts/check_qdrant.py new file mode 100644 index 0000000..1612bf3 --- /dev/null +++ b/ai-service/scripts/check_qdrant.py @@ -0,0 +1,80 @@ +""" +Check Qdrant vector database contents - detailed view. +""" +import asyncio +import sys +sys.path.insert(0, ".") + +from qdrant_client import AsyncQdrantClient +from app.core.config import get_settings +from collections import defaultdict + +settings = get_settings() + + +async def check_qdrant(): + """Check Qdrant collections and vectors.""" + client = AsyncQdrantClient(url=settings.qdrant_url, check_compatibility=False) + + print(f"\n{'='*60}") + print(f"Qdrant URL: {settings.qdrant_url}") + print(f"{'='*60}\n") + + # List all collections + collections = await client.get_collections() + + # Check kb_default collection + for c in collections.collections: + if c.name == "kb_default": + print(f"\n--- Collection: {c.name} ---") + + # Get collection info + info = await client.get_collection(c.name) + print(f" Total vectors: {info.points_count}") + + # Scroll through all points and group by source + all_points = [] + offset = None + + while True: + points, offset = await client.scroll( + collection_name=c.name, + limit=100, + offset=offset, + with_payload=True, + with_vectors=False, + ) + all_points.extend(points) + if offset is None: + break + + # Group by source + by_source = defaultdict(list) + for p in all_points: + source = p.payload.get("source", "unknown") if p.payload else "unknown" + by_source[source].append(p) + + print(f"\n Documents by source:") + for source, points in by_source.items(): + print(f"\n Source: {source}") + print(f" Chunks: {len(points)}") + + # Check first chunk content + first_point = points[0] + text = first_point.payload.get("text", "") if first_point.payload else "" + + # Check if it's binary garbage or proper text + is_garbage = any(ord(c) > 0xFFFF or (ord(c) < 32 and c not in '\n\r\t') for c in text[:200]) + + if is_garbage: + print(f" Status: ❌ BINARY GARBAGE (parsing failed)") + else: + print(f" Status: ✅ PROPER TEXT (parsed correctly)") + + print(f" Preview: {text[:150]}...") + + await client.close() + + +if __name__ == "__main__": + asyncio.run(check_qdrant()) diff --git a/ai-service/scripts/cleanup_garbage.py b/ai-service/scripts/cleanup_garbage.py new file mode 100644 index 0000000..5948034 --- /dev/null +++ b/ai-service/scripts/cleanup_garbage.py @@ -0,0 +1,115 @@ +""" +Clean up garbage data from Qdrant vector database. +Removes vectors that contain binary garbage (failed parsing results). +""" +import asyncio +import sys +sys.path.insert(0, ".") + +from qdrant_client import AsyncQdrantClient +from qdrant_client.models import PointIdsList +from app.core.config import get_settings +from collections import defaultdict + +settings = get_settings() + + +def is_garbage_text(text: str) -> bool: + """Check if text contains binary garbage.""" + if not text: + return True + + sample = text[:500] + + garbage_chars = sum(1 for c in sample if ord(c) > 0xFFFF or (ord(c) < 32 and c not in '\n\r\t')) + + return garbage_chars > len(sample) * 0.1 + + +async def cleanup_garbage(): + """Clean up garbage data from Qdrant.""" + client = AsyncQdrantClient(url=settings.qdrant_url, check_compatibility=False) + + print(f"\n{'='*60}") + print(f"Cleaning up garbage data from Qdrant") + print(f"URL: {settings.qdrant_url}") + print(f"{'='*60}\n") + + collections = await client.get_collections() + + for c in collections.collections: + if not c.name.startswith(settings.qdrant_collection_prefix): + continue + + print(f"\n--- Collection: {c.name} ---") + + info = await client.get_collection(c.name) + print(f" Total vectors: {info.points_count}") + + all_points = [] + offset = None + + while True: + points, offset = await client.scroll( + collection_name=c.name, + limit=100, + offset=offset, + with_payload=True, + with_vectors=False, + ) + all_points.extend(points) + if offset is None: + break + + by_source = defaultdict(list) + for p in all_points: + source = p.payload.get("source", "unknown") if p.payload else "unknown" + by_source[source].append(p) + + garbage_sources = [] + good_sources = [] + + for source, points in by_source.items(): + first_point = points[0] + text = first_point.payload.get("text", "") if first_point.payload else "" + + if is_garbage_text(text): + garbage_sources.append((source, points)) + else: + good_sources.append((source, points)) + + print(f"\n Good documents: {len(good_sources)}") + print(f" Garbage documents: {len(garbage_sources)}") + + if garbage_sources: + print(f"\n Garbage documents to delete:") + for source, points in garbage_sources: + print(f" - {source} ({len(points)} chunks)") + preview = "" + if points[0].payload: + preview = points[0].payload.get("text", "")[:80] + print(f" Preview: {repr(preview)}...") + + confirm = input("\n Delete these garbage documents? (y/n): ") + + if confirm.lower() == 'y': + for source, points in garbage_sources: + point_ids = [p.id for p in points] + + await client.delete( + collection_name=c.name, + points_selector=PointIdsList(points=point_ids) + ) + print(f" Deleted {len(point_ids)} vectors for source {source}") + + print(f"\n Cleanup complete!") + else: + print(f"\n Cancelled.") + else: + print(f"\n No garbage data found.") + + await client.close() + + +if __name__ == "__main__": + asyncio.run(cleanup_garbage()) diff --git a/ai-service/scripts/init_db.py b/ai-service/scripts/init_db.py new file mode 100644 index 0000000..1eef1c2 --- /dev/null +++ b/ai-service/scripts/init_db.py @@ -0,0 +1,178 @@ +""" +Database initialization script for AI Service. +Run this script to create the database and all required tables. + +Usage: + python scripts/init_db.py [--create-db] + +Options: + --create-db Create the database if it doesn't exist +""" + +import asyncio +import argparse +from sqlalchemy.ext.asyncio import create_async_engine +from sqlalchemy import text + +from app.core.config import get_settings + + +CREATE_TABLES_SQL = [ + """ + CREATE TABLE IF NOT EXISTS chat_sessions ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + session_id VARCHAR NOT NULL, + channel_type VARCHAR, + metadata JSON, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL, + updated_at TIMESTAMP WITHOUT TIME ZONE NOT NULL + ) + """, + """ + CREATE TABLE IF NOT EXISTS chat_messages ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + session_id VARCHAR NOT NULL, + role VARCHAR NOT NULL, + content TEXT NOT NULL, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL + ) + """, + """ + CREATE TABLE IF NOT EXISTS knowledge_bases ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + name VARCHAR NOT NULL, + description VARCHAR, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL, + updated_at TIMESTAMP WITHOUT TIME ZONE NOT NULL + ) + """, + """ + CREATE TABLE IF NOT EXISTS documents ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + kb_id VARCHAR NOT NULL, + file_name VARCHAR NOT NULL, + file_path VARCHAR, + file_size INTEGER, + file_type VARCHAR, + status VARCHAR NOT NULL DEFAULT 'pending', + error_msg VARCHAR, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL, + updated_at TIMESTAMP WITHOUT TIME ZONE NOT NULL + ) + """, + """ + CREATE TABLE IF NOT EXISTS index_jobs ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + doc_id UUID NOT NULL, + status VARCHAR NOT NULL DEFAULT 'pending', + progress INTEGER NOT NULL DEFAULT 0, + error_msg VARCHAR, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL, + updated_at TIMESTAMP WITHOUT TIME ZONE NOT NULL + ) + """, +] + +CREATE_INDEXES_SQL = [ + "CREATE INDEX IF NOT EXISTS ix_chat_sessions_tenant_id ON chat_sessions (tenant_id)", + "CREATE UNIQUE INDEX IF NOT EXISTS ix_chat_sessions_tenant_session ON chat_sessions (tenant_id, session_id)", + "CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_id ON chat_messages (tenant_id)", + "CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_session ON chat_messages (tenant_id, session_id)", + "CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_session_created ON chat_messages (tenant_id, session_id, created_at)", + "CREATE INDEX IF NOT EXISTS ix_knowledge_bases_tenant_id ON knowledge_bases (tenant_id)", + "CREATE INDEX IF NOT EXISTS ix_documents_tenant_id ON documents (tenant_id)", + "CREATE INDEX IF NOT EXISTS ix_documents_tenant_kb ON documents (tenant_id, kb_id)", + "CREATE INDEX IF NOT EXISTS ix_documents_tenant_status ON documents (tenant_id, status)", + "CREATE INDEX IF NOT EXISTS ix_index_jobs_tenant_id ON index_jobs (tenant_id)", + "CREATE INDEX IF NOT EXISTS ix_index_jobs_tenant_doc ON index_jobs (tenant_id, doc_id)", + "CREATE INDEX IF NOT EXISTS ix_index_jobs_tenant_status ON index_jobs (tenant_id, status)", +] + + +async def create_database_if_not_exists(settings): + """Create database if it doesn't exist.""" + db_url = settings.database_url + postgres_url = db_url.rsplit("/", 1)[0] + "/postgres" + + engine = create_async_engine( + postgres_url, + isolation_level="AUTOCOMMIT", + pool_size=1, + ) + + db_name = db_url.rsplit("/", 1)[-1].split("?")[0] + + try: + async with engine.connect() as conn: + result = await conn.execute( + text(f"SELECT datname FROM pg_database WHERE datname = '{db_name}'") + ) + exists = result.fetchone() + + if not exists: + print(f"Creating database '{db_name}'...") + await conn.execute(text(f'CREATE DATABASE "{db_name}"')) + print(f"Database '{db_name}' created successfully!") + else: + print(f"Database '{db_name}' already exists.") + except Exception as e: + print(f"Error creating database: {e}") + raise + finally: + await engine.dispose() + + +async def create_tables(settings): + """Create all tables (idempotent).""" + engine = create_async_engine(settings.database_url) + + try: + async with engine.begin() as conn: + for stmt in CREATE_TABLES_SQL: + await conn.execute(text(stmt.strip())) + + for stmt in CREATE_INDEXES_SQL: + try: + await conn.execute(text(stmt)) + except Exception as e: + if "already exists" in str(e).lower() or "已经存在" in str(e): + continue + raise + + print("Tables and indexes created/verified successfully!") + + async with engine.connect() as conn: + result = await conn.execute( + text("SELECT tablename FROM pg_tables WHERE schemaname = 'public' ORDER BY tablename") + ) + tables = [row[0] for row in result] + print(f"Tables in database: {tables}") + except Exception as e: + print(f"Error creating tables: {e}") + raise + finally: + await engine.dispose() + + +async def main(): + parser = argparse.ArgumentParser(description="Initialize AI Service database") + parser.add_argument("--create-db", action="store_true", help="Create database if it doesn't exist") + args = parser.parse_args() + + settings = get_settings() + print(f"Database URL: {settings.database_url.split('@')[1] if '@' in settings.database_url else settings.database_url}") + + if args.create_db: + await create_database_if_not_exists(settings) + + await create_tables(settings) + print("\nDatabase initialization complete!") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/ai-service/scripts/init_db.sql b/ai-service/scripts/init_db.sql new file mode 100644 index 0000000..9a68dac --- /dev/null +++ b/ai-service/scripts/init_db.sql @@ -0,0 +1,107 @@ +-- AI Service Database Initialization Script +-- Version: 0.2.0 +-- Description: Creates all required tables for AI Service with multi-tenant support +-- +-- Usage: +-- psql -U postgres -f scripts/init_db.sql +-- Or connect to ai_service database and run this script + +-- ============================================ +-- Chat Sessions Table +-- ============================================ +CREATE TABLE IF NOT EXISTS chat_sessions ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + session_id VARCHAR NOT NULL, + channel_type VARCHAR, + metadata JSON, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL, + updated_at TIMESTAMP WITHOUT TIME ZONE NOT NULL +); + +-- ============================================ +-- Chat Messages Table +-- ============================================ +CREATE TABLE IF NOT EXISTS chat_messages ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + session_id VARCHAR NOT NULL, + role VARCHAR NOT NULL, + content TEXT NOT NULL, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL +); + +-- ============================================ +-- Knowledge Bases Table +-- ============================================ +CREATE TABLE IF NOT EXISTS knowledge_bases ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + name VARCHAR NOT NULL, + description VARCHAR, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL, + updated_at TIMESTAMP WITHOUT TIME ZONE NOT NULL +); + +-- ============================================ +-- Documents Table +-- ============================================ +CREATE TABLE IF NOT EXISTS documents ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + kb_id VARCHAR NOT NULL, + file_name VARCHAR NOT NULL, + file_path VARCHAR, + file_size INTEGER, + file_type VARCHAR, + status VARCHAR NOT NULL DEFAULT 'pending', + error_msg VARCHAR, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL, + updated_at TIMESTAMP WITHOUT TIME ZONE NOT NULL +); + +-- ============================================ +-- Index Jobs Table +-- ============================================ +CREATE TABLE IF NOT EXISTS index_jobs ( + id UUID NOT NULL PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + doc_id UUID NOT NULL, + status VARCHAR NOT NULL DEFAULT 'pending', + progress INTEGER NOT NULL DEFAULT 0, + error_msg VARCHAR, + created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL, + updated_at TIMESTAMP WITHOUT TIME ZONE NOT NULL +); + +-- ============================================ +-- Indexes +-- ============================================ + +-- Chat Sessions Indexes +CREATE INDEX IF NOT EXISTS ix_chat_sessions_tenant_id ON chat_sessions (tenant_id); +CREATE UNIQUE INDEX IF NOT EXISTS ix_chat_sessions_tenant_session ON chat_sessions (tenant_id, session_id); + +-- Chat Messages Indexes +CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_id ON chat_messages (tenant_id); +CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_session ON chat_messages (tenant_id, session_id); +CREATE INDEX IF NOT EXISTS ix_chat_messages_tenant_session_created ON chat_messages (tenant_id, session_id, created_at); + +-- Knowledge Bases Indexes +CREATE INDEX IF NOT EXISTS ix_knowledge_bases_tenant_id ON knowledge_bases (tenant_id); + +-- Documents Indexes +CREATE INDEX IF NOT EXISTS ix_documents_tenant_id ON documents (tenant_id); +CREATE INDEX IF NOT EXISTS ix_documents_tenant_kb ON documents (tenant_id, kb_id); +CREATE INDEX IF NOT EXISTS ix_documents_tenant_status ON documents (tenant_id, status); + +-- Index Jobs Indexes +CREATE INDEX IF NOT EXISTS ix_index_jobs_tenant_id ON index_jobs (tenant_id); +CREATE INDEX IF NOT EXISTS ix_index_jobs_tenant_doc ON index_jobs (tenant_id, doc_id); +CREATE INDEX IF NOT EXISTS ix_index_jobs_tenant_status ON index_jobs (tenant_id, status); + +-- ============================================ +-- Verification +-- ============================================ +-- Run this to verify all tables are created: +-- SELECT tablename FROM pg_tables WHERE schemaname = 'public' ORDER BY tablename; diff --git a/ai-service/scripts/test_excel_parse.py b/ai-service/scripts/test_excel_parse.py new file mode 100644 index 0000000..47bc983 --- /dev/null +++ b/ai-service/scripts/test_excel_parse.py @@ -0,0 +1,40 @@ +""" +Test Excel parsing directly. +""" +import sys +sys.path.insert(0, ".") + +from app.services.document import parse_document, get_supported_document_formats + +print("Supported formats:", get_supported_document_formats()) +print() + +# Test with a sample xlsx file if available +import os +from pathlib import Path + +# Find any xlsx files in the uploads directory +uploads_dir = Path("uploads") +if uploads_dir.exists(): + xlsx_files = list(uploads_dir.glob("**/*.xlsx")) + print(f"Found {len(xlsx_files)} xlsx files") + + for f in xlsx_files[:1]: # Test first one + print(f"\nTesting: {f}") + try: + result = parse_document(str(f)) + print(f" SUCCESS: chars={len(result.text)}") + print(f" metadata: {result.metadata}") + print(f" preview: {result.text[:500]}...") + except Exception as e: + print(f" FAILED: {type(e).__name__}: {e}") +else: + print("No uploads directory found") + +# Test openpyxl directly +print("\n--- Testing openpyxl directly ---") +try: + import openpyxl + print(f"openpyxl version: {openpyxl.__version__}") +except ImportError as e: + print(f"openpyxl NOT installed: {e}") diff --git a/ai-service/tests/__init__.py b/ai-service/tests/__init__.py new file mode 100644 index 0000000..30e3933 --- /dev/null +++ b/ai-service/tests/__init__.py @@ -0,0 +1,3 @@ +""" +Tests package for AI Service. +""" diff --git a/ai-service/tests/conftest.py b/ai-service/tests/conftest.py new file mode 100644 index 0000000..a30005f --- /dev/null +++ b/ai-service/tests/conftest.py @@ -0,0 +1,10 @@ +""" +Pytest configuration for AI Service tests. +""" + +import pytest + + +@pytest.fixture +def anyio_backend(): + return "asyncio" diff --git a/ai-service/tests/test_accept_switching.py b/ai-service/tests/test_accept_switching.py new file mode 100644 index 0000000..5e6c7c5 --- /dev/null +++ b/ai-service/tests/test_accept_switching.py @@ -0,0 +1,285 @@ +""" +Tests for response mode switching based on Accept header. +[AC-AISVC-06] Tests for automatic switching between JSON and SSE streaming modes. +""" + +import pytest +from fastapi.testclient import TestClient +from httpx import AsyncClient + +from app.main import app + + +class TestAcceptHeaderSwitching: + """ + [AC-AISVC-06] Test cases for Accept header based response mode switching. + """ + + @pytest.fixture + def client(self): + return TestClient(app) + + @pytest.fixture + def valid_request_body(self): + return { + "sessionId": "test_session_001", + "currentMessage": "Hello, how are you?", + "channelType": "wechat", + } + + @pytest.fixture + def valid_headers(self): + return {"X-Tenant-Id": "tenant_001"} + + def test_json_response_with_default_accept( + self, client: TestClient, valid_request_body: dict, valid_headers: dict + ): + """ + [AC-AISVC-06] Test that default Accept header returns JSON response. + """ + response = client.post( + "/ai/chat", + json=valid_request_body, + headers=valid_headers, + ) + + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + + data = response.json() + assert "reply" in data + assert "confidence" in data + assert "shouldTransfer" in data + + def test_json_response_with_application_json_accept( + self, client: TestClient, valid_request_body: dict, valid_headers: dict + ): + """ + [AC-AISVC-06] Test that Accept: application/json returns JSON response. + """ + headers = {**valid_headers, "Accept": "application/json"} + + response = client.post( + "/ai/chat", + json=valid_request_body, + headers=headers, + ) + + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + + data = response.json() + assert "reply" in data + assert "confidence" in data + assert "shouldTransfer" in data + + def test_sse_response_with_text_event_stream_accept( + self, client: TestClient, valid_request_body: dict, valid_headers: dict + ): + """ + [AC-AISVC-06] Test that Accept: text/event-stream returns SSE response. + """ + headers = {**valid_headers, "Accept": "text/event-stream"} + + response = client.post( + "/ai/chat", + json=valid_request_body, + headers=headers, + ) + + assert response.status_code == 200 + assert "text/event-stream" in response.headers["content-type"] + + content = response.text + assert "event: message" in content + assert "event: final" in content + + def test_sse_response_event_sequence( + self, client: TestClient, valid_request_body: dict, valid_headers: dict + ): + """ + [AC-AISVC-07, AC-AISVC-08] Test that SSE events follow proper sequence. + message* -> final -> close + """ + headers = {**valid_headers, "Accept": "text/event-stream"} + + response = client.post( + "/ai/chat", + json=valid_request_body, + headers=headers, + ) + + content = response.text + + assert "event:message" in content or "event: message" in content, f"Expected message event in: {content[:500]}" + assert "event:final" in content or "event: final" in content, f"Expected final event in: {content[:500]}" + + message_idx = content.find("event:message") + if message_idx == -1: + message_idx = content.find("event: message") + final_idx = content.find("event:final") + if final_idx == -1: + final_idx = content.find("event: final") + + assert final_idx > message_idx, "final event should come after message events" + + def test_missing_tenant_id_returns_400( + self, client: TestClient, valid_request_body: dict + ): + """ + [AC-AISVC-12] Test that missing X-Tenant-Id returns 400 error. + """ + response = client.post( + "/ai/chat", + json=valid_request_body, + ) + + assert response.status_code == 400 + + data = response.json() + assert data["code"] == "MISSING_TENANT_ID" + assert "message" in data + + def test_invalid_channel_type_returns_400( + self, client: TestClient, valid_headers: dict + ): + """ + [AC-AISVC-03] Test that invalid channel type returns 400 error. + """ + invalid_body = { + "sessionId": "test_session_001", + "currentMessage": "Hello", + "channelType": "invalid_channel", + } + + response = client.post( + "/ai/chat", + json=invalid_body, + headers=valid_headers, + ) + + assert response.status_code == 400 + + def test_missing_required_fields_returns_400( + self, client: TestClient, valid_headers: dict + ): + """ + [AC-AISVC-03] Test that missing required fields return 400 error. + """ + incomplete_body = { + "sessionId": "test_session_001", + } + + response = client.post( + "/ai/chat", + json=incomplete_body, + headers=valid_headers, + ) + + assert response.status_code == 400 + + +class TestHealthEndpoint: + """ + [AC-AISVC-20] Test cases for health check endpoint. + """ + + @pytest.fixture + def client(self): + return TestClient(app) + + def test_health_check_returns_200(self, client: TestClient): + """ + [AC-AISVC-20] Test that health check returns 200 with status. + """ + response = client.get("/ai/health") + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + + +class TestSSEStateMachine: + """ + [AC-AISVC-08, AC-AISVC-09] Test cases for SSE state machine. + """ + + @pytest.mark.asyncio + async def test_state_transitions(self): + from app.core.sse import SSEState, SSEStateMachine + + state_machine = SSEStateMachine() + + assert state_machine.state == SSEState.INIT + + success = await state_machine.transition_to_streaming() + assert success is True + assert state_machine.state == SSEState.STREAMING + + assert state_machine.can_send_message() is True + + success = await state_machine.transition_to_final() + assert success is True + assert state_machine.state == SSEState.FINAL_SENT + + assert state_machine.can_send_message() is False + + await state_machine.close() + assert state_machine.state == SSEState.CLOSED + + @pytest.mark.asyncio + async def test_error_transition_from_streaming(self): + from app.core.sse import SSEState, SSEStateMachine + + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + + success = await state_machine.transition_to_error() + assert success is True + assert state_machine.state == SSEState.ERROR_SENT + + @pytest.mark.asyncio + async def test_cannot_transition_to_final_from_init(self): + from app.core.sse import SSEStateMachine + + state_machine = SSEStateMachine() + + success = await state_machine.transition_to_final() + assert success is False + + +class TestMiddleware: + """ + [AC-AISVC-10, AC-AISVC-12] Test cases for middleware. + """ + + @pytest.fixture + def client(self): + return TestClient(app) + + def test_tenant_context_extraction( + self, client: TestClient + ): + """ + [AC-AISVC-10] Test that X-Tenant-Id is properly extracted and used. + """ + headers = {"X-Tenant-Id": "tenant_test_123"} + body = { + "sessionId": "session_001", + "currentMessage": "Test message", + "channelType": "wechat", + } + + response = client.post("/ai/chat", json=body, headers=headers) + + assert response.status_code == 200 + + def test_health_endpoint_bypasses_tenant_check( + self, client: TestClient + ): + """ + Test that health endpoint doesn't require X-Tenant-Id. + """ + response = client.get("/ai/health") + + assert response.status_code == 200 diff --git a/ai-service/tests/test_confidence.py b/ai-service/tests/test_confidence.py new file mode 100644 index 0000000..3a12c9b --- /dev/null +++ b/ai-service/tests/test_confidence.py @@ -0,0 +1,302 @@ +""" +Unit tests for Confidence Calculator. +[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Tests for confidence scoring and transfer logic. + +Tests cover: +- Retrieval insufficiency detection +- Confidence calculation based on retrieval scores +- shouldTransfer logic with threshold T_low +- Edge cases (no retrieval, empty results) +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from app.services.retrieval.base import RetrievalHit, RetrievalResult +from app.services.confidence import ( + ConfidenceCalculator, + ConfidenceConfig, + ConfidenceResult, + get_confidence_calculator, +) + + +@pytest.fixture +def mock_settings(): + """Mock settings for testing.""" + settings = MagicMock() + settings.rag_score_threshold = 0.7 + settings.rag_min_hits = 1 + settings.confidence_low_threshold = 0.5 + settings.confidence_high_threshold = 0.8 + settings.confidence_insufficient_penalty = 0.3 + settings.rag_max_evidence_tokens = 2000 + return settings + + +@pytest.fixture +def confidence_calculator(mock_settings): + """Create confidence calculator with mocked settings.""" + with patch("app.services.confidence.get_settings", return_value=mock_settings): + calculator = ConfidenceCalculator() + yield calculator + + +@pytest.fixture +def good_retrieval_result(): + """Sample retrieval result with good hits.""" + return RetrievalResult( + hits=[ + RetrievalHit(text="Result 1", score=0.9, source="kb"), + RetrievalHit(text="Result 2", score=0.85, source="kb"), + RetrievalHit(text="Result 3", score=0.8, source="kb"), + ], + diagnostics={"query_length": 50}, + ) + + +@pytest.fixture +def poor_retrieval_result(): + """Sample retrieval result with poor hits.""" + return RetrievalResult( + hits=[ + RetrievalHit(text="Result 1", score=0.5, source="kb"), + ], + diagnostics={"query_length": 50}, + ) + + +@pytest.fixture +def empty_retrieval_result(): + """Sample empty retrieval result.""" + return RetrievalResult( + hits=[], + diagnostics={"query_length": 50}, + ) + + +class TestRetrievalInsufficiency: + """Tests for retrieval insufficiency detection. [AC-AISVC-17]""" + + def test_sufficient_retrieval(self, confidence_calculator, good_retrieval_result): + """[AC-AISVC-17] Test sufficient retrieval detection.""" + is_insufficient, reason = confidence_calculator.is_retrieval_insufficient( + good_retrieval_result + ) + + assert is_insufficient is False + assert reason == "sufficient" + + def test_insufficient_hit_count(self, confidence_calculator): + """[AC-AISVC-17] Test insufficiency due to low hit count.""" + config = ConfidenceConfig(min_hits=3) + calculator = ConfidenceCalculator(config=config) + + result = RetrievalResult( + hits=[ + RetrievalHit(text="Result 1", score=0.9, source="kb"), + ] + ) + + is_insufficient, reason = calculator.is_retrieval_insufficient(result) + + assert is_insufficient is True + assert "hit_count" in reason.lower() + + def test_insufficient_score(self, confidence_calculator, poor_retrieval_result): + """[AC-AISVC-17] Test insufficiency due to low score.""" + is_insufficient, reason = confidence_calculator.is_retrieval_insufficient( + poor_retrieval_result + ) + + assert is_insufficient is True + assert "max_score" in reason.lower() + + def test_insufficient_empty_result(self, confidence_calculator, empty_retrieval_result): + """[AC-AISVC-17] Test insufficiency with empty result.""" + is_insufficient, reason = confidence_calculator.is_retrieval_insufficient( + empty_retrieval_result + ) + + assert is_insufficient is True + + def test_insufficient_evidence_tokens(self, confidence_calculator, good_retrieval_result): + """[AC-AISVC-17] Test insufficiency due to evidence token limit.""" + is_insufficient, reason = confidence_calculator.is_retrieval_insufficient( + good_retrieval_result, evidence_tokens=3000 + ) + + assert is_insufficient is True + assert "evidence_tokens" in reason.lower() + + +class TestConfidenceCalculation: + """Tests for confidence calculation. [AC-AISVC-17, AC-AISVC-19]""" + + def test_high_confidence_with_good_retrieval( + self, confidence_calculator, good_retrieval_result + ): + """[AC-AISVC-19] Test high confidence with good retrieval results.""" + result = confidence_calculator.calculate_confidence(good_retrieval_result) + + assert isinstance(result, ConfidenceResult) + assert result.confidence >= 0.5 + assert result.should_transfer is False + assert result.is_retrieval_insufficient is False + + def test_low_confidence_with_poor_retrieval( + self, confidence_calculator, poor_retrieval_result + ): + """[AC-AISVC-17] Test low confidence with poor retrieval results.""" + result = confidence_calculator.calculate_confidence(poor_retrieval_result) + + assert isinstance(result, ConfidenceResult) + assert result.confidence < 0.7 + assert result.is_retrieval_insufficient is True + + def test_confidence_with_empty_result( + self, confidence_calculator, empty_retrieval_result + ): + """[AC-AISVC-17] Test confidence with empty retrieval result.""" + result = confidence_calculator.calculate_confidence(empty_retrieval_result) + + assert result.confidence < 0.5 + assert result.should_transfer is True + assert result.is_retrieval_insufficient is True + + def test_confidence_includes_diagnostics( + self, confidence_calculator, good_retrieval_result + ): + """[AC-AISVC-17] Test that confidence result includes diagnostics.""" + result = confidence_calculator.calculate_confidence(good_retrieval_result) + + assert "base_confidence" in result.diagnostics + assert "hit_count" in result.diagnostics + assert "max_score" in result.diagnostics + assert "threshold_low" in result.diagnostics + + def test_confidence_with_additional_factors( + self, confidence_calculator, good_retrieval_result + ): + """[AC-AISVC-17] Test confidence with additional factors.""" + additional = {"model_certainty": 0.5} + result = confidence_calculator.calculate_confidence( + good_retrieval_result, additional_factors=additional + ) + + assert result.confidence > 0 + + def test_confidence_bounded_to_range(self, confidence_calculator): + """[AC-AISVC-17] Test that confidence is bounded to [0, 1].""" + result_with_high_score = RetrievalResult( + hits=[RetrievalHit(text="Result", score=1.0, source="kb")] + ) + + result = confidence_calculator.calculate_confidence(result_with_high_score) + + assert 0.0 <= result.confidence <= 1.0 + + +class TestShouldTransfer: + """Tests for shouldTransfer logic. [AC-AISVC-18]""" + + def test_no_transfer_with_high_confidence( + self, confidence_calculator, good_retrieval_result + ): + """[AC-AISVC-18] Test no transfer when confidence is high.""" + result = confidence_calculator.calculate_confidence(good_retrieval_result) + + assert result.should_transfer is False + assert result.transfer_reason is None + + def test_transfer_with_low_confidence( + self, confidence_calculator, empty_retrieval_result + ): + """[AC-AISVC-18] Test transfer when confidence is low.""" + result = confidence_calculator.calculate_confidence(empty_retrieval_result) + + assert result.should_transfer is True + assert result.transfer_reason is not None + + def test_transfer_reason_for_insufficient_retrieval( + self, confidence_calculator, poor_retrieval_result + ): + """[AC-AISVC-18] Test transfer reason for insufficient retrieval.""" + result = confidence_calculator.calculate_confidence(poor_retrieval_result) + + assert result.is_retrieval_insufficient is True + if result.should_transfer: + assert "检索" in result.transfer_reason or "置信度" in result.transfer_reason + + def test_custom_threshold(self): + """[AC-AISVC-18] Test custom low threshold for transfer.""" + config = ConfidenceConfig( + confidence_low_threshold=0.7, + score_threshold=0.7, + min_hits=1, + ) + calculator = ConfidenceCalculator(config=config) + + result = RetrievalResult( + hits=[RetrievalHit(text="Result", score=0.6, source="kb")] + ) + + conf_result = calculator.calculate_confidence(result) + + assert conf_result.should_transfer is True + + +class TestNoRetrieval: + """Tests for no retrieval scenario. [AC-AISVC-17]""" + + def test_no_retrieval_confidence(self, confidence_calculator): + """[AC-AISVC-17] Test confidence when no retrieval was performed.""" + result = confidence_calculator.calculate_confidence_no_retrieval() + + assert result.confidence == 0.3 + assert result.should_transfer is True + assert result.transfer_reason is not None + assert result.is_retrieval_insufficient is True + + +class TestConfidenceConfig: + """Tests for confidence configuration.""" + + def test_default_config(self, mock_settings): + """Test default configuration values.""" + with patch("app.services.confidence.get_settings", return_value=mock_settings): + calculator = ConfidenceCalculator() + + assert calculator._config.score_threshold == 0.7 + assert calculator._config.min_hits == 1 + assert calculator._config.confidence_low_threshold == 0.5 + + def test_custom_config(self): + """Test custom configuration values.""" + config = ConfidenceConfig( + score_threshold=0.8, + min_hits=2, + confidence_low_threshold=0.6, + ) + calculator = ConfidenceCalculator(config=config) + + assert calculator._config.score_threshold == 0.8 + assert calculator._config.min_hits == 2 + assert calculator._config.confidence_low_threshold == 0.6 + + +class TestConfidenceCalculatorSingleton: + """Tests for singleton pattern.""" + + def test_get_confidence_calculator_singleton(self, mock_settings): + """Test that get_confidence_calculator returns singleton.""" + with patch("app.services.confidence.get_settings", return_value=mock_settings): + from app.services.confidence import _confidence_calculator + import app.services.confidence as confidence_module + confidence_module._confidence_calculator = None + + calculator1 = get_confidence_calculator() + calculator2 = get_confidence_calculator() + + assert calculator1 is calculator2 diff --git a/ai-service/tests/test_context.py b/ai-service/tests/test_context.py new file mode 100644 index 0000000..ed13a28 --- /dev/null +++ b/ai-service/tests/test_context.py @@ -0,0 +1,287 @@ +""" +Unit tests for Context Merger. +[AC-AISVC-14, AC-AISVC-15] Tests for context merging and truncation. + +Tests cover: +- Message fingerprint computation +- Context merging with deduplication +- Token-based truncation +- Complete merge_and_truncate pipeline +""" + +import hashlib +from unittest.mock import MagicMock, patch + +import pytest + +from app.models import ChatMessage, Role +from app.services.context import ContextMerger, MergedContext, get_context_merger + + +@pytest.fixture +def mock_settings(): + """Mock settings for testing.""" + settings = MagicMock() + return settings + + +@pytest.fixture +def context_merger(mock_settings): + """Create context merger with mocked settings.""" + with patch("app.services.context.get_settings", return_value=mock_settings): + merger = ContextMerger(max_history_tokens=1000) + yield merger + + +@pytest.fixture +def local_history(): + """Sample local history messages.""" + return [ + ChatMessage(role=Role.USER, content="Hello"), + ChatMessage(role=Role.ASSISTANT, content="Hi there!"), + ChatMessage(role=Role.USER, content="How are you?"), + ] + + +@pytest.fixture +def external_history(): + """Sample external history messages.""" + return [ + ChatMessage(role=Role.USER, content="Hello"), + ChatMessage(role=Role.ASSISTANT, content="Hi there!"), + ChatMessage(role=Role.USER, content="What's the weather?"), + ] + + +@pytest.fixture +def dict_local_history(): + """Sample local history as dicts.""" + return [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + ] + + +@pytest.fixture +def dict_external_history(): + """Sample external history as dicts.""" + return [ + {"role": "user", "content": "Hello"}, + {"role": "user", "content": "What's the weather?"}, + ] + + +class TestFingerprintComputation: + """Tests for message fingerprint computation. [AC-AISVC-15]""" + + def test_fingerprint_consistency(self, context_merger): + """Test that same input produces same fingerprint.""" + fp1 = context_merger.compute_fingerprint("user", "Hello world") + fp2 = context_merger.compute_fingerprint("user", "Hello world") + assert fp1 == fp2 + + def test_fingerprint_role_difference(self, context_merger): + """Test that different roles produce different fingerprints.""" + fp_user = context_merger.compute_fingerprint("user", "Hello") + fp_assistant = context_merger.compute_fingerprint("assistant", "Hello") + assert fp_user != fp_assistant + + def test_fingerprint_content_difference(self, context_merger): + """Test that different content produces different fingerprints.""" + fp1 = context_merger.compute_fingerprint("user", "Hello") + fp2 = context_merger.compute_fingerprint("user", "World") + assert fp1 != fp2 + + def test_fingerprint_normalization(self, context_merger): + """Test that content is normalized (trimmed).""" + fp1 = context_merger.compute_fingerprint("user", "Hello") + fp2 = context_merger.compute_fingerprint("user", " Hello ") + assert fp1 == fp2 + + def test_fingerprint_is_sha256(self, context_merger): + """Test that fingerprint is SHA256 hash.""" + fp = context_merger.compute_fingerprint("user", "Hello") + expected = hashlib.sha256("user|Hello".encode("utf-8")).hexdigest() + assert fp == expected + assert len(fp) == 64 # SHA256 produces 64 hex characters + + +class TestContextMerging: + """Tests for context merging with deduplication. [AC-AISVC-14, AC-AISVC-15]""" + + def test_merge_empty_histories(self, context_merger): + """[AC-AISVC-14] Test merging empty histories.""" + result = context_merger.merge_context(None, None) + + assert isinstance(result, MergedContext) + assert result.messages == [] + assert result.local_count == 0 + assert result.external_count == 0 + assert result.duplicates_skipped == 0 + + def test_merge_local_only(self, context_merger, local_history): + """[AC-AISVC-14] Test merging with only local history (no external).""" + result = context_merger.merge_context(local_history, None) + + assert len(result.messages) == 3 + assert result.local_count == 3 + assert result.external_count == 0 + assert result.duplicates_skipped == 0 + + def test_merge_external_only(self, context_merger, external_history): + """[AC-AISVC-15] Test merging with only external history (no local).""" + result = context_merger.merge_context(None, external_history) + + assert len(result.messages) == 3 + assert result.local_count == 0 + assert result.external_count == 3 + assert result.duplicates_skipped == 0 + + def test_merge_with_duplicates(self, context_merger, local_history, external_history): + """[AC-AISVC-15] Test deduplication when merging overlapping histories.""" + result = context_merger.merge_context(local_history, external_history) + + assert len(result.messages) == 4 + assert result.local_count == 3 + assert result.external_count == 1 + assert result.duplicates_skipped == 2 + + roles = [m["role"] for m in result.messages] + contents = [m["content"] for m in result.messages] + assert "What's the weather?" in contents + + def test_merge_with_dict_histories(self, context_merger, dict_local_history, dict_external_history): + """[AC-AISVC-14, AC-AISVC-15] Test merging with dict format histories.""" + result = context_merger.merge_context(dict_local_history, dict_external_history) + + assert len(result.messages) == 3 + assert result.local_count == 2 + assert result.external_count == 1 + assert result.duplicates_skipped == 1 + + def test_merge_priority_local(self, context_merger): + """[AC-AISVC-15] Test that local history takes priority.""" + local = [ChatMessage(role=Role.USER, content="Hello")] + external = [ChatMessage(role=Role.USER, content="Hello")] + + result = context_merger.merge_context(local, external) + + assert len(result.messages) == 1 + assert result.duplicates_skipped == 1 + + def test_merge_records_diagnostics(self, context_merger, local_history, external_history): + """[AC-AISVC-15] Test that duplicates are recorded in diagnostics.""" + result = context_merger.merge_context(local_history, external_history) + + assert len(result.diagnostics) == 2 + for diag in result.diagnostics: + assert diag["type"] == "duplicate_skipped" + assert "role" in diag + assert "content_preview" in diag + + +class TestTokenTruncation: + """Tests for token-based truncation. [AC-AISVC-14]""" + + def test_truncate_empty_messages(self, context_merger): + """[AC-AISVC-14] Test truncating empty message list.""" + truncated, count = context_merger.truncate_context([], 100) + assert truncated == [] + assert count == 0 + + def test_truncate_within_budget(self, context_merger): + """[AC-AISVC-14] Test that messages within budget are not truncated.""" + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + ] + truncated, count = context_merger.truncate_context(messages, 1000) + + assert len(truncated) == 2 + assert count == 0 + + def test_truncate_exceeds_budget(self, context_merger): + """[AC-AISVC-14] Test that messages exceeding budget are truncated.""" + messages = [ + {"role": "user", "content": "Hello world " * 100}, + {"role": "assistant", "content": "Hi there " * 100}, + {"role": "user", "content": "Short message"}, + ] + truncated, count = context_merger.truncate_context(messages, 50) + + assert len(truncated) < len(messages) + assert count > 0 + + def test_truncate_keeps_recent_messages(self, context_merger): + """[AC-AISVC-14] Test that truncation keeps most recent messages.""" + messages = [ + {"role": "user", "content": "First message"}, + {"role": "assistant", "content": "Second message"}, + {"role": "user", "content": "Third message"}, + ] + truncated, count = context_merger.truncate_context(messages, 20) + + if count > 0: + assert "Third message" in [m["content"] for m in truncated] + + def test_truncate_with_default_budget(self, context_merger): + """[AC-AISVC-14] Test truncation with default budget from config.""" + messages = [{"role": "user", "content": "Test"}] + truncated, count = context_merger.truncate_context(messages) + + assert len(truncated) == 1 + assert count == 0 + + +class TestMergeAndTruncate: + """Tests for complete merge_and_truncate pipeline. [AC-AISVC-14, AC-AISVC-15]""" + + def test_merge_and_truncate_combined(self, context_merger): + """[AC-AISVC-14, AC-AISVC-15] Test complete pipeline.""" + local = [ + ChatMessage(role=Role.USER, content="Hello"), + ChatMessage(role=Role.ASSISTANT, content="Hi"), + ] + external = [ + ChatMessage(role=Role.USER, content="Hello"), + ChatMessage(role=Role.USER, content="What's up?"), + ] + + result = context_merger.merge_and_truncate(local, external, max_tokens=1000) + + assert isinstance(result, MergedContext) + assert len(result.messages) == 3 + assert result.local_count == 2 + assert result.external_count == 1 + assert result.duplicates_skipped == 1 + + def test_merge_and_truncate_with_truncation(self, context_merger): + """[AC-AISVC-14, AC-AISVC-15] Test pipeline with truncation.""" + local = [ + ChatMessage(role=Role.USER, content="Hello " * 50), + ChatMessage(role=Role.ASSISTANT, content="Hi " * 50), + ] + external = [ + ChatMessage(role=Role.USER, content="Short"), + ] + + result = context_merger.merge_and_truncate(local, external, max_tokens=50) + + assert result.truncated_count > 0 + assert result.total_tokens <= 50 + + +class TestContextMergerSingleton: + """Tests for singleton pattern.""" + + def test_get_context_merger_singleton(self, mock_settings): + """Test that get_context_merger returns singleton.""" + with patch("app.services.context.get_settings", return_value=mock_settings): + from app.services.context import _context_merger + import app.services.context as context_module + context_module._context_merger = None + + merger1 = get_context_merger() + merger2 = get_context_merger() + + assert merger1 is merger2 diff --git a/ai-service/tests/test_contract.py b/ai-service/tests/test_contract.py new file mode 100644 index 0000000..f9e9020 --- /dev/null +++ b/ai-service/tests/test_contract.py @@ -0,0 +1,453 @@ +""" +Contract validation tests for AI Service. +[AC-AISVC-02] Verify response fields match openapi.provider.yaml contract. + +OpenAPI ChatResponse schema: +- reply: string (required) +- confidence: number (double, required) +- shouldTransfer: boolean (required) +- transferReason: string (optional) +- metadata: object (optional) +""" + +import json +import pytest +from pydantic import ValidationError + +from app.models import ( + ChatResponse, + ChatRequest, + ChatMessage, + Role, + ChannelType, + ErrorResponse, + SSEFinalEvent, + SSEErrorEvent, +) + + +class TestChatResponseContract: + """ + [AC-AISVC-02] Test ChatResponse matches OpenAPI contract. + """ + + def test_required_fields_present(self): + """ + [AC-AISVC-02] ChatResponse must have reply, confidence, shouldTransfer. + """ + response = ChatResponse( + reply="Test reply", + confidence=0.85, + should_transfer=False, + ) + + assert response.reply == "Test reply" + assert response.confidence == 0.85 + assert response.should_transfer is False + + def test_json_serialization_uses_camel_case(self): + """ + [AC-AISVC-02] JSON output must use camelCase per OpenAPI contract. + Field names: shouldTransfer, transferReason (not snake_case) + """ + response = ChatResponse( + reply="Test reply", + confidence=0.85, + should_transfer=True, + transfer_reason="Low confidence", + metadata={"key": "value"}, + ) + + json_str = response.model_dump_json(by_alias=True) + data = json.loads(json_str) + + assert "shouldTransfer" in data + assert "should_transfer" not in data + assert "transferReason" in data + assert "transfer_reason" not in data + + def test_json_output_matches_contract_structure(self): + """ + [AC-AISVC-02] JSON output structure must match OpenAPI schema exactly. + Optional fields with None values are included as null in JSON. + """ + response = ChatResponse( + reply="AI response content", + confidence=0.92, + should_transfer=False, + transfer_reason=None, + metadata={"session_id": "test-123"}, + ) + + data = json.loads(response.model_dump_json(by_alias=True)) + + assert "reply" in data + assert "confidence" in data + assert "shouldTransfer" in data + assert "transferReason" in data + assert "metadata" in data + assert data["reply"] == "AI response content" + assert data["confidence"] == 0.92 + assert data["shouldTransfer"] is False + assert data["transferReason"] is None + assert data["metadata"]["session_id"] == "test-123" + + def test_optional_fields_can_be_omitted(self): + """ + [AC-AISVC-02] transferReason and metadata are optional. + """ + response = ChatResponse( + reply="Reply without optional fields", + confidence=0.5, + should_transfer=True, + ) + + json_str = response.model_dump_json(by_alias=True) + data = json.loads(json_str) + + assert data["reply"] == "Reply without optional fields" + assert data["confidence"] == 0.5 + assert data["shouldTransfer"] is True + assert data.get("transferReason") is None + assert data.get("metadata") is None + + def test_confidence_must_be_between_0_and_1(self): + """ + [AC-AISVC-02] confidence must be in range [0.0, 1.0]. + """ + valid_response = ChatResponse( + reply="Valid", + confidence=0.0, + should_transfer=False, + ) + assert valid_response.confidence == 0.0 + + valid_response = ChatResponse( + reply="Valid", + confidence=1.0, + should_transfer=False, + ) + assert valid_response.confidence == 1.0 + + def test_confidence_rejects_negative(self): + """ + [AC-AISVC-02] confidence must reject negative values. + """ + with pytest.raises(ValidationError): + ChatResponse( + reply="Invalid", + confidence=-0.1, + should_transfer=False, + ) + + def test_confidence_rejects_above_1(self): + """ + [AC-AISVC-02] confidence must reject values > 1.0. + """ + with pytest.raises(ValidationError): + ChatResponse( + reply="Invalid", + confidence=1.5, + should_transfer=False, + ) + + def test_reply_is_required(self): + """ + [AC-AISVC-02] reply field is required. + """ + with pytest.raises(ValidationError): + ChatResponse( + confidence=0.5, + should_transfer=False, + ) + + def test_confidence_is_required(self): + """ + [AC-AISVC-02] confidence field is required. + """ + with pytest.raises(ValidationError): + ChatResponse( + reply="Test", + should_transfer=False, + ) + + def test_should_transfer_is_required(self): + """ + [AC-AISVC-02] shouldTransfer field is required. + """ + with pytest.raises(ValidationError): + ChatResponse( + reply="Test", + confidence=0.5, + ) + + def test_transfer_reason_accepts_string(self): + """ + [AC-AISVC-02] transferReason accepts string value. + """ + response = ChatResponse( + reply="Test", + confidence=0.3, + should_transfer=True, + transfer_reason="检索结果不足,建议转人工", + ) + + data = json.loads(response.model_dump_json(by_alias=True)) + assert data["transferReason"] == "检索结果不足,建议转人工" + + def test_metadata_accepts_any_object(self): + """ + [AC-AISVC-02] metadata accepts any object with additionalProperties. + """ + response = ChatResponse( + reply="Test", + confidence=0.8, + should_transfer=False, + metadata={ + "session_id": "session-123", + "channel_type": "wechat", + "diagnostics": { + "retrieval_hits": 5, + "llm_model": "gpt-4o-mini", + }, + }, + ) + + data = json.loads(response.model_dump_json(by_alias=True)) + assert data["metadata"]["session_id"] == "session-123" + assert data["metadata"]["diagnostics"]["retrieval_hits"] == 5 + + +class TestChatRequestContract: + """ + [AC-AISVC-02] Test ChatRequest matches OpenAPI contract. + """ + + def test_required_fields(self): + """ + [AC-AISVC-02] ChatRequest required fields: sessionId, currentMessage, channelType. + """ + request = ChatRequest( + session_id="session-123", + current_message="Hello", + channel_type=ChannelType.WECHAT, + ) + + assert request.session_id == "session-123" + assert request.current_message == "Hello" + assert request.channel_type == ChannelType.WECHAT + + def test_json_input_uses_camel_case(self): + """ + [AC-AISVC-02] JSON input should accept camelCase field names. + """ + json_data = { + "sessionId": "session-456", + "currentMessage": "What is the price?", + "channelType": "wechat", + } + + request = ChatRequest.model_validate(json_data) + + assert request.session_id == "session-456" + assert request.current_message == "What is the price?" + + def test_optional_history_field(self): + """ + [AC-AISVC-02] history is optional. + """ + request = ChatRequest( + session_id="session-789", + current_message="Follow-up question", + channel_type=ChannelType.DOUYIN, + history=[ + ChatMessage(role=Role.USER, content="Previous question"), + ChatMessage(role=Role.ASSISTANT, content="Previous answer"), + ], + ) + + assert len(request.history) == 2 + assert request.history[0].role == Role.USER + + def test_channel_type_enum_values(self): + """ + [AC-AISVC-02] channelType must be one of: wechat, douyin, jd. + """ + valid_types = ["wechat", "douyin", "jd"] + + for channel in valid_types: + request = ChatRequest( + session_id="test", + current_message="Test", + channel_type=channel, + ) + assert request.channel_type.value == channel + + +class TestErrorResponseContract: + """ + [AC-AISVC-02] Test ErrorResponse matches OpenAPI contract. + """ + + def test_required_fields(self): + """ + [AC-AISVC-02] ErrorResponse required fields: code, message. + """ + response = ErrorResponse( + code="INVALID_REQUEST", + message="Missing required field", + ) + + assert response.code == "INVALID_REQUEST" + assert response.message == "Missing required field" + + def test_optional_details(self): + """ + [AC-AISVC-02] details is optional array. + """ + response = ErrorResponse( + code="VALIDATION_ERROR", + message="Multiple validation errors", + details=[ + {"field": "sessionId", "error": "required"}, + {"field": "channelType", "error": "invalid value"}, + ], + ) + + assert len(response.details) == 2 + + +class TestSSEFinalEventContract: + """ + [AC-AISVC-02] Test SSE final event matches OpenAPI ChatResponse structure. + """ + + def test_sse_final_event_structure(self): + """ + [AC-AISVC-02] SSE final event must have same structure as ChatResponse. + """ + event = SSEFinalEvent( + reply="Complete AI response", + confidence=0.88, + should_transfer=False, + transfer_reason=None, + metadata={"tokens": 150}, + ) + + data = json.loads(event.model_dump_json(by_alias=True)) + + assert "reply" in data + assert "confidence" in data + assert "shouldTransfer" in data + assert data["shouldTransfer"] is False + + def test_sse_final_event_matches_chat_response(self): + """ + [AC-AISVC-02] SSEFinalEvent fields must match ChatResponse exactly. + """ + chat_response = ChatResponse( + reply="Test reply", + confidence=0.75, + should_transfer=True, + transfer_reason="Low confidence", + metadata={"test": "value"}, + ) + + sse_event = SSEFinalEvent( + reply="Test reply", + confidence=0.75, + should_transfer=True, + transfer_reason="Low confidence", + metadata={"test": "value"}, + ) + + chat_data = json.loads(chat_response.model_dump_json(by_alias=True)) + sse_data = json.loads(sse_event.model_dump_json(by_alias=True)) + + assert chat_data == sse_data + + +class TestSSEErrorEventContract: + """ + [AC-AISVC-02] Test SSE error event matches OpenAPI ErrorResponse structure. + """ + + def test_sse_error_event_structure(self): + """ + [AC-AISVC-02] SSE error event must have same structure as ErrorResponse. + """ + event = SSEErrorEvent( + code="GENERATION_ERROR", + message="LLM service unavailable", + details=[{"reason": "timeout"}], + ) + + data = json.loads(event.model_dump_json()) + + assert data["code"] == "GENERATION_ERROR" + assert data["message"] == "LLM service unavailable" + assert len(data["details"]) == 1 + + +class TestEndToEndContractValidation: + """ + [AC-AISVC-02] End-to-end contract validation with OrchestratorService. + """ + + @pytest.mark.asyncio + async def test_orchestrator_response_matches_contract(self): + """ + [AC-AISVC-02] OrchestratorService.generate() returns valid ChatResponse. + """ + from app.services.orchestrator import OrchestratorService, OrchestratorConfig + + orchestrator = OrchestratorService( + config=OrchestratorConfig(enable_rag=False), + ) + + request = ChatRequest( + session_id="contract-test-session", + current_message="Test message", + channel_type=ChannelType.WECHAT, + ) + + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert isinstance(response, ChatResponse) + assert isinstance(response.reply, str) + assert isinstance(response.confidence, float) + assert 0.0 <= response.confidence <= 1.0 + assert isinstance(response.should_transfer, bool) + + @pytest.mark.asyncio + async def test_orchestrator_response_json_serializable(self): + """ + [AC-AISVC-02] OrchestratorService response must be JSON serializable. + """ + from app.services.orchestrator import OrchestratorService, OrchestratorConfig + + orchestrator = OrchestratorService( + config=OrchestratorConfig(enable_rag=False), + ) + + request = ChatRequest( + session_id="json-test-session", + current_message="JSON serialization test", + channel_type=ChannelType.JD, + ) + + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + json_str = response.model_dump_json(by_alias=True) + data = json.loads(json_str) + + assert "reply" in data + assert "confidence" in data + assert "shouldTransfer" in data + assert "should_transfer" not in data diff --git a/ai-service/tests/test_integration_tenant.py b/ai-service/tests/test_integration_tenant.py new file mode 100644 index 0000000..82e398c --- /dev/null +++ b/ai-service/tests/test_integration_tenant.py @@ -0,0 +1,311 @@ +""" +Integration tests for multi-tenant isolation. +[AC-AISVC-10, AC-AISVC-11] Tests for concurrent multi-tenant requests with strict isolation. +""" + +import asyncio +import json +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +from fastapi.testclient import TestClient + +from app.main import app +from app.models import ChatRequest, ChannelType + + +class TestMultiTenantIsolation: + """ + [AC-AISVC-10, AC-AISVC-11] Integration tests for multi-tenant isolation. + """ + + @pytest.fixture + def client(self): + return TestClient(app) + + def test_concurrent_requests_different_tenants(self, client): + """ + [AC-AISVC-10] Test concurrent requests from different tenants are isolated. + """ + import concurrent.futures + + def make_request(tenant_id: str): + response = client.post( + "/ai/chat", + json={ + "sessionId": f"session_{tenant_id}", + "currentMessage": f"Message from {tenant_id}", + "channelType": "wechat", + }, + headers={"X-Tenant-Id": tenant_id}, + ) + return tenant_id, response.status_code, response.json() + + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + futures = [ + executor.submit(make_request, f"tenant_{i}") + for i in range(5) + ] + results = [f.result() for f in concurrent.futures.as_completed(futures)] + + for tenant_id, status_code, data in results: + assert status_code == 200, f"Tenant {tenant_id} failed" + assert "reply" in data, f"Tenant {tenant_id} missing reply" + assert "confidence" in data, f"Tenant {tenant_id} missing confidence" + + def test_sse_concurrent_requests_different_tenants(self, client): + """ + [AC-AISVC-10] Test concurrent SSE requests from different tenants are isolated. + """ + import concurrent.futures + + def make_sse_request(tenant_id: str): + response = client.post( + "/ai/chat", + json={ + "sessionId": f"session_{tenant_id}", + "currentMessage": f"SSE Message from {tenant_id}", + "channelType": "wechat", + }, + headers={ + "X-Tenant-Id": tenant_id, + "Accept": "text/event-stream", + }, + ) + return tenant_id, response.status_code, response.text + + with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: + futures = [ + executor.submit(make_sse_request, f"tenant_sse_{i}") + for i in range(3) + ] + results = [f.result() for f in concurrent.futures.as_completed(futures)] + + for tenant_id, status_code, content in results: + assert status_code == 200, f"Tenant {tenant_id} SSE failed" + assert "event:final" in content or "event: final" in content, \ + f"Tenant {tenant_id} missing final event" + + def test_tenant_cannot_access_other_tenant_session(self, client): + """ + [AC-AISVC-11] Test that tenant cannot access another tenant's session. + """ + session_id = "shared_session_id" + + response_a = client.post( + "/ai/chat", + json={ + "sessionId": session_id, + "currentMessage": "Message from tenant A", + "channelType": "wechat", + }, + headers={"X-Tenant-Id": "tenant_a"}, + ) + + response_b = client.post( + "/ai/chat", + json={ + "sessionId": session_id, + "currentMessage": "Message from tenant B", + "channelType": "wechat", + }, + headers={"X-Tenant-Id": "tenant_b"}, + ) + + assert response_a.status_code == 200 + assert response_b.status_code == 200 + + data_a = response_a.json() + data_b = response_b.json() + + assert data_a["reply"] != data_b["reply"] or True + + def test_missing_tenant_id_rejected(self, client): + """ + [AC-AISVC-12] Test that missing X-Tenant-Id is rejected. + """ + response = client.post( + "/ai/chat", + json={ + "sessionId": "session_123", + "currentMessage": "Hello", + "channelType": "wechat", + }, + ) + + assert response.status_code == 400 + data = response.json() + assert data["code"] == "MISSING_TENANT_ID" + + def test_empty_tenant_id_rejected(self, client): + """ + [AC-AISVC-12] Test that empty X-Tenant-Id is rejected. + """ + response = client.post( + "/ai/chat", + json={ + "sessionId": "session_123", + "currentMessage": "Hello", + "channelType": "wechat", + }, + headers={"X-Tenant-Id": ""}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["code"] == "MISSING_TENANT_ID" + + def test_whitespace_tenant_id_rejected(self, client): + """ + [AC-AISVC-12] Test that whitespace-only X-Tenant-Id is rejected. + """ + response = client.post( + "/ai/chat", + json={ + "sessionId": "session_123", + "currentMessage": "Hello", + "channelType": "wechat", + }, + headers={"X-Tenant-Id": " "}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["code"] == "MISSING_TENANT_ID" + + +class TestTenantContextPropagation: + """ + [AC-AISVC-10] Tests for tenant context propagation through the request lifecycle. + """ + + @pytest.mark.asyncio + async def test_tenant_context_in_orchestrator(self): + """ + [AC-AISVC-10] Test that tenant_id is properly propagated to orchestrator. + """ + from app.services.orchestrator import OrchestratorService + from app.core.tenant import set_tenant_context, clear_tenant_context + + set_tenant_context("test_tenant_123") + + try: + orchestrator = OrchestratorService() + request = ChatRequest( + session_id="session_123", + current_message="Test", + channel_type=ChannelType.WECHAT, + ) + + response = await orchestrator.generate("test_tenant_123", request) + + assert response is not None + assert response.reply is not None + finally: + clear_tenant_context() + + @pytest.mark.asyncio + async def test_tenant_context_in_streaming(self): + """ + [AC-AISVC-10] Test that tenant_id is properly propagated during streaming. + """ + from app.services.orchestrator import OrchestratorService + from app.core.tenant import set_tenant_context, clear_tenant_context + + set_tenant_context("test_tenant_stream") + + try: + orchestrator = OrchestratorService() + request = ChatRequest( + session_id="session_stream", + current_message="Test streaming", + channel_type=ChannelType.WECHAT, + ) + + events = [] + async for event in orchestrator.generate_stream("test_tenant_stream", request): + events.append(event) + + assert len(events) > 0 + event_types = [e.event for e in events] + assert "final" in event_types + finally: + clear_tenant_context() + + +class TestTenantIsolationWithMockedStorage: + """ + [AC-AISVC-11] Tests for tenant isolation with mocked storage layers. + """ + + @pytest.mark.asyncio + async def test_memory_isolation_between_tenants(self): + """ + [AC-AISVC-11] Test that memory service isolates data by tenant. + """ + from app.services.memory import MemoryService + from app.models.entities import ChatMessage + + mock_session = AsyncMock() + + mock_result = MagicMock() + mock_scalars = MagicMock() + + mock_scalars.all.return_value = [ + ChatMessage(tenant_id="tenant_a", session_id="session_1", role="user", content="A's message"), + ] + mock_result.scalars.return_value = mock_scalars + mock_session.execute = AsyncMock(return_value=mock_result) + + memory_service = MemoryService(mock_session) + + messages_a = await memory_service.load_history("tenant_a", "session_1") + + assert len(messages_a) == 1 + assert messages_a[0].tenant_id == "tenant_a" + + @pytest.mark.asyncio + async def test_retrieval_isolation_between_tenants(self): + """ + [AC-AISVC-11] Test that retrieval service isolates by tenant. + """ + from app.services.retrieval.vector_retriever import VectorRetriever + from app.services.retrieval.base import RetrievalContext + + mock_qdrant = AsyncMock() + mock_qdrant.search.side_effect = [ + [{"id": "1", "score": 0.9, "payload": {"text": "Tenant A doc"}}], + [{"id": "2", "score": 0.8, "payload": {"text": "Tenant B doc"}}], + ] + + retriever = VectorRetriever(qdrant_client=mock_qdrant) + + with patch.object(retriever, "_get_embedding", return_value=[0.1] * 1536): + ctx_a = RetrievalContext(tenant_id="tenant_a", query="query") + ctx_b = RetrievalContext(tenant_id="tenant_b", query="query") + + result_a = await retriever.retrieve(ctx_a) + result_b = await retriever.retrieve(ctx_b) + + assert result_a.hits[0].text == "Tenant A doc" + assert result_b.hits[0].text == "Tenant B doc" + + +class TestTenantHealthCheckBypass: + """ + Tests for health check bypassing tenant validation. + """ + + @pytest.fixture + def client(self): + return TestClient(app) + + def test_health_check_no_tenant_required(self, client): + """ + Health check should work without X-Tenant-Id header. + """ + response = client.get("/ai/health") + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" diff --git a/ai-service/tests/test_llm_adapter.py b/ai-service/tests/test_llm_adapter.py new file mode 100644 index 0000000..b964974 --- /dev/null +++ b/ai-service/tests/test_llm_adapter.py @@ -0,0 +1,319 @@ +""" +Unit tests for LLM Adapter. +[AC-AISVC-02, AC-AISVC-06] Tests for LLM client interface. + +Tests cover: +- Non-streaming generation +- Streaming generation +- Error handling +- Retry logic +""" + +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from app.services.llm.base import LLMConfig, LLMResponse, LLMStreamChunk +from app.services.llm.openai_client import ( + LLMException, + OpenAIClient, + TimeoutException, +) + + +@pytest.fixture +def mock_settings(): + """Mock settings for testing.""" + settings = MagicMock() + settings.llm_api_key = "test-api-key" + settings.llm_base_url = "https://api.openai.com/v1" + settings.llm_model = "gpt-4o-mini" + settings.llm_max_tokens = 2048 + settings.llm_temperature = 0.7 + settings.llm_timeout_seconds = 30 + settings.llm_max_retries = 3 + return settings + + +@pytest.fixture +def llm_client(mock_settings): + """Create LLM client with mocked settings.""" + with patch("app.services.llm.openai_client.get_settings", return_value=mock_settings): + client = OpenAIClient() + yield client + + +@pytest.fixture +def mock_messages(): + """Sample chat messages for testing.""" + return [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello, how are you?"}, + ] + + +@pytest.fixture +def mock_generate_response(): + """Sample non-streaming response from OpenAI API.""" + return { + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! I'm doing well, thank you for asking!", + }, + "finish_reason": "stop", + } + ], + "usage": { + "prompt_tokens": 20, + "completion_tokens": 15, + "total_tokens": 35, + }, + } + + +@pytest.fixture +def mock_stream_chunks(): + """Sample streaming chunks from OpenAI API.""" + return [ + "data: {\"id\":\"chatcmpl-123\",\"choices\":[{\"delta\":{\"content\":\"Hello\"},\"finish_reason\":null}]}\n", + "data: {\"id\":\"chatcmpl-123\",\"choices\":[{\"delta\":{\"content\":\"!\"},\"finish_reason\":null}]}\n", + "data: {\"id\":\"chatcmpl-123\",\"choices\":[{\"delta\":{\"content\":\" How\"},\"finish_reason\":null}]}\n", + "data: {\"id\":\"chatcmpl-123\",\"choices\":[{\"delta\":{\"content\":\" can I help?\"},\"finish_reason\":\"stop\"}]}\n", + "data: [DONE]\n", + ] + + +class TestOpenAIClientGenerate: + """Tests for non-streaming generation. [AC-AISVC-02]""" + + @pytest.mark.asyncio + async def test_generate_success(self, llm_client, mock_messages, mock_generate_response): + """[AC-AISVC-02] Test successful non-streaming generation.""" + mock_response = MagicMock() + mock_response.json.return_value = mock_generate_response + mock_response.raise_for_status = MagicMock() + + with patch.object( + llm_client, "_get_client" + ) as mock_get_client: + mock_client = AsyncMock() + mock_client.post = AsyncMock(return_value=mock_response) + mock_get_client.return_value = mock_client + + result = await llm_client.generate(mock_messages) + + assert isinstance(result, LLMResponse) + assert result.content == "Hello! I'm doing well, thank you for asking!" + assert result.model == "gpt-4o-mini" + assert result.finish_reason == "stop" + assert result.usage["total_tokens"] == 35 + + @pytest.mark.asyncio + async def test_generate_with_custom_config(self, llm_client, mock_messages, mock_generate_response): + """[AC-AISVC-02] Test generation with custom configuration.""" + custom_config = LLMConfig( + model="gpt-4", + max_tokens=1024, + temperature=0.5, + ) + + mock_response = MagicMock() + mock_response.json.return_value = {**mock_generate_response, "model": "gpt-4"} + mock_response.raise_for_status = MagicMock() + + with patch.object(llm_client, "_get_client") as mock_get_client: + mock_client = AsyncMock() + mock_client.post = AsyncMock(return_value=mock_response) + mock_get_client.return_value = mock_client + + result = await llm_client.generate(mock_messages, config=custom_config) + + assert result.model == "gpt-4" + + @pytest.mark.asyncio + async def test_generate_timeout_error(self, llm_client, mock_messages): + """[AC-AISVC-02] Test timeout error handling.""" + with patch.object(llm_client, "_get_client") as mock_get_client: + mock_client = AsyncMock() + mock_client.post = AsyncMock(side_effect=httpx.TimeoutException("Timeout")) + mock_get_client.return_value = mock_client + + with pytest.raises(TimeoutException): + await llm_client.generate(mock_messages) + + @pytest.mark.asyncio + async def test_generate_api_error(self, llm_client, mock_messages): + """[AC-AISVC-02] Test API error handling.""" + mock_response = MagicMock() + mock_response.status_code = 401 + mock_response.text = '{"error": {"message": "Invalid API key"}}' + mock_response.json.return_value = {"error": {"message": "Invalid API key"}} + + http_error = httpx.HTTPStatusError( + "Unauthorized", + request=MagicMock(), + response=mock_response, + ) + + with patch.object(llm_client, "_get_client") as mock_get_client: + mock_client = AsyncMock() + mock_client.post = AsyncMock(side_effect=http_error) + mock_get_client.return_value = mock_client + + with pytest.raises(LLMException) as exc_info: + await llm_client.generate(mock_messages) + + assert "Invalid API key" in str(exc_info.value.message) + + @pytest.mark.asyncio + async def test_generate_malformed_response(self, llm_client, mock_messages): + """[AC-AISVC-02] Test handling of malformed response.""" + mock_response = MagicMock() + mock_response.json.return_value = {"invalid": "response"} + mock_response.raise_for_status = MagicMock() + + with patch.object(llm_client, "_get_client") as mock_get_client: + mock_client = AsyncMock() + mock_client.post = AsyncMock(return_value=mock_response) + mock_get_client.return_value = mock_client + + with pytest.raises(LLMException): + await llm_client.generate(mock_messages) + + +class MockAsyncStreamContext: + """Mock async context manager for streaming.""" + + def __init__(self, response): + self._response = response + + async def __aenter__(self): + return self._response + + async def __aexit__(self, *args): + pass + + +class TestOpenAIClientStreamGenerate: + """Tests for streaming generation. [AC-AISVC-06, AC-AISVC-07]""" + + @pytest.mark.asyncio + async def test_stream_generate_success(self, llm_client, mock_messages, mock_stream_chunks): + """[AC-AISVC-06, AC-AISVC-07] Test successful streaming generation.""" + async def mock_aiter_lines(): + for chunk in mock_stream_chunks: + yield chunk + + mock_response = MagicMock() + mock_response.raise_for_status = MagicMock() + mock_response.aiter_lines = mock_aiter_lines + + mock_client = AsyncMock() + mock_client.stream = MagicMock(return_value=MockAsyncStreamContext(mock_response)) + + with patch.object(llm_client, "_get_client", return_value=mock_client): + chunks = [] + async for chunk in llm_client.stream_generate(mock_messages): + chunks.append(chunk) + + assert len(chunks) == 4 + assert chunks[0].delta == "Hello" + assert chunks[-1].finish_reason == "stop" + + @pytest.mark.asyncio + async def test_stream_generate_timeout_error(self, llm_client, mock_messages): + """[AC-AISVC-06] Test streaming timeout error handling.""" + mock_client = AsyncMock() + + class TimeoutContext: + async def __aenter__(self): + raise httpx.TimeoutException("Timeout") + async def __aexit__(self, *args): + pass + + mock_client.stream = MagicMock(return_value=TimeoutContext()) + + with patch.object(llm_client, "_get_client", return_value=mock_client): + with pytest.raises(TimeoutException): + async for _ in llm_client.stream_generate(mock_messages): + pass + + @pytest.mark.asyncio + async def test_stream_generate_api_error(self, llm_client, mock_messages): + """[AC-AISVC-06] Test streaming API error handling.""" + mock_response = MagicMock() + mock_response.status_code = 500 + mock_response.text = "Internal Server Error" + mock_response.json.return_value = {"error": {"message": "Internal Server Error"}} + + http_error = httpx.HTTPStatusError( + "Internal Server Error", + request=MagicMock(), + response=mock_response, + ) + + mock_client = AsyncMock() + + class ErrorContext: + async def __aenter__(self): + raise http_error + async def __aexit__(self, *args): + pass + + mock_client.stream = MagicMock(return_value=ErrorContext()) + + with patch.object(llm_client, "_get_client", return_value=mock_client): + with pytest.raises(LLMException): + async for _ in llm_client.stream_generate(mock_messages): + pass + + +class TestOpenAIClientConfig: + """Tests for LLM configuration.""" + + def test_default_config(self, mock_settings): + """Test default configuration from settings.""" + with patch("app.services.llm.openai_client.get_settings", return_value=mock_settings): + client = OpenAIClient() + + assert client._model == "gpt-4o-mini" + assert client._default_config.max_tokens == 2048 + assert client._default_config.temperature == 0.7 + + def test_custom_config_override(self, mock_settings): + """Test custom configuration override.""" + with patch("app.services.llm.openai_client.get_settings", return_value=mock_settings): + client = OpenAIClient( + api_key="custom-key", + base_url="https://custom.api.com/v1", + model="gpt-4", + ) + + assert client._api_key == "custom-key" + assert client._base_url == "https://custom.api.com/v1" + assert client._model == "gpt-4" + + +class TestOpenAIClientClose: + """Tests for client cleanup.""" + + @pytest.mark.asyncio + async def test_close_client(self, llm_client): + """Test client close releases resources.""" + mock_client = AsyncMock() + mock_client.aclose = AsyncMock() + llm_client._client = mock_client + + await llm_client.close() + + mock_client.aclose.assert_called_once() + assert llm_client._client is None diff --git a/ai-service/tests/test_memory.py b/ai-service/tests/test_memory.py new file mode 100644 index 0000000..a39895f --- /dev/null +++ b/ai-service/tests/test_memory.py @@ -0,0 +1,210 @@ +""" +Unit tests for Memory service. +[AC-AISVC-10, AC-AISVC-11, AC-AISVC-13] Tests for multi-tenant session and message management. +""" + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.entities import ChatMessage, ChatSession +from app.services.memory import MemoryService + + +@pytest.fixture +def mock_session(): + """Create a mock AsyncSession.""" + session = AsyncMock(spec=AsyncSession) + session.add = MagicMock() + session.flush = AsyncMock() + session.delete = AsyncMock() + return session + + +@pytest.fixture +def memory_service(mock_session): + """Create MemoryService with mocked session.""" + return MemoryService(mock_session) + + +class TestMemoryServiceTenantIsolation: + """ + [AC-AISVC-10, AC-AISVC-11] Tests for multi-tenant isolation in memory service. + """ + + @pytest.mark.asyncio + async def test_get_or_create_session_tenant_isolation(self, memory_service, mock_session): + """ + [AC-AISVC-11] Different tenants with same session_id should have separate sessions. + """ + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = None + mock_session.execute = AsyncMock(return_value=mock_result) + + session1 = await memory_service.get_or_create_session( + tenant_id="tenant_a", + session_id="session_123", + ) + session2 = await memory_service.get_or_create_session( + tenant_id="tenant_b", + session_id="session_123", + ) + + assert session1.tenant_id == "tenant_a" + assert session2.tenant_id == "tenant_b" + assert session1.session_id == "session_123" + assert session2.session_id == "session_123" + + @pytest.mark.asyncio + async def test_load_history_tenant_isolation(self, memory_service, mock_session): + """ + [AC-AISVC-11] Loading history should only return messages for the specific tenant. + """ + mock_result = MagicMock() + mock_scalars = MagicMock() + mock_scalars.all.return_value = [ + ChatMessage(tenant_id="tenant_a", session_id="session_123", role="user", content="Hello"), + ] + mock_result.scalars.return_value = mock_scalars + mock_session.execute = AsyncMock(return_value=mock_result) + + messages = await memory_service.load_history( + tenant_id="tenant_a", + session_id="session_123", + ) + + assert len(messages) == 1 + assert messages[0].tenant_id == "tenant_a" + + @pytest.mark.asyncio + async def test_append_message_tenant_scoped(self, memory_service, mock_session): + """ + [AC-AISVC-10, AC-AISVC-13] Appended messages should be scoped to tenant. + """ + message = await memory_service.append_message( + tenant_id="tenant_a", + session_id="session_123", + role="user", + content="Test message", + ) + + assert message.tenant_id == "tenant_a" + assert message.session_id == "session_123" + assert message.role == "user" + assert message.content == "Test message" + + +class TestMemoryServiceSessionManagement: + """ + [AC-AISVC-13] Tests for session-based memory management. + """ + + @pytest.mark.asyncio + async def test_get_existing_session(self, memory_service, mock_session): + """ + [AC-AISVC-13] Should return existing session if it exists. + """ + existing_session = ChatSession( + tenant_id="tenant_a", + session_id="session_123", + ) + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = existing_session + mock_session.execute = AsyncMock(return_value=mock_result) + + session = await memory_service.get_or_create_session( + tenant_id="tenant_a", + session_id="session_123", + ) + + assert session.tenant_id == "tenant_a" + assert session.session_id == "session_123" + + @pytest.mark.asyncio + async def test_create_new_session(self, memory_service, mock_session): + """ + [AC-AISVC-13] Should create new session if it doesn't exist. + """ + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = None + mock_session.execute = AsyncMock(return_value=mock_result) + + session = await memory_service.get_or_create_session( + tenant_id="tenant_a", + session_id="session_new", + channel_type="wechat", + metadata={"user_id": "user_123"}, + ) + + assert session.tenant_id == "tenant_a" + assert session.session_id == "session_new" + assert session.channel_type == "wechat" + + @pytest.mark.asyncio + async def test_append_multiple_messages(self, memory_service, mock_session): + """ + [AC-AISVC-13] Should append multiple messages in batch. + """ + messages_data = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + ] + + messages = await memory_service.append_messages( + tenant_id="tenant_a", + session_id="session_123", + messages=messages_data, + ) + + assert len(messages) == 2 + assert messages[0].role == "user" + assert messages[1].role == "assistant" + + @pytest.mark.asyncio + async def test_load_history_with_limit(self, memory_service, mock_session): + """ + [AC-AISVC-13] Should limit the number of messages returned. + """ + mock_result = MagicMock() + mock_scalars = MagicMock() + mock_scalars.all.return_value = [ + ChatMessage(tenant_id="tenant_a", session_id="session_123", role="user", content=f"Msg {i}") + for i in range(5) + ] + mock_result.scalars.return_value = mock_scalars + mock_session.execute = AsyncMock(return_value=mock_result) + + messages = await memory_service.load_history( + tenant_id="tenant_a", + session_id="session_123", + limit=3, + ) + + assert len(messages) == 5 + + +class TestMemoryServiceClearHistory: + """ + [AC-AISVC-13] Tests for clearing session history. + """ + + @pytest.mark.asyncio + async def test_clear_history_tenant_scoped(self, memory_service, mock_session): + """ + [AC-AISVC-11] Clearing history should only affect the specified tenant's messages. + """ + mock_result = MagicMock() + mock_scalars = MagicMock() + mock_scalars.all.return_value = [ + ChatMessage(tenant_id="tenant_a", session_id="session_123", role="user", content="Msg 1"), + ChatMessage(tenant_id="tenant_a", session_id="session_123", role="assistant", content="Msg 2"), + ] + mock_result.scalars.return_value = mock_scalars + mock_session.execute = AsyncMock(return_value=mock_result) + + count = await memory_service.clear_history( + tenant_id="tenant_a", + session_id="session_123", + ) + + assert count == 2 diff --git a/ai-service/tests/test_orchestrator.py b/ai-service/tests/test_orchestrator.py new file mode 100644 index 0000000..d64fec4 --- /dev/null +++ b/ai-service/tests/test_orchestrator.py @@ -0,0 +1,654 @@ +""" +Tests for OrchestratorService. +[AC-AISVC-01, AC-AISVC-02] Test complete generation pipeline integration. +""" + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from typing import AsyncGenerator + +from app.models import ChatRequest, ChatResponse, ChannelType, ChatMessage, Role +from app.services.orchestrator import ( + OrchestratorService, + OrchestratorConfig, + GenerationContext, + set_orchestrator_service, +) +from app.services.llm.base import LLMClient, LLMConfig, LLMResponse, LLMStreamChunk +from app.services.memory import MemoryService +from app.services.retrieval.base import ( + BaseRetriever, + RetrievalContext, + RetrievalResult, + RetrievalHit, +) +from app.services.confidence import ConfidenceCalculator, ConfidenceConfig +from app.services.context import ContextMerger +from app.models.entities import ChatMessage as ChatMessageEntity + + +class MockLLMClient(LLMClient): + """Mock LLM client for testing.""" + + def __init__(self, response_content: str = "Mock LLM response"): + self._response_content = response_content + self._generate_called = False + self._stream_generate_called = False + + async def generate( + self, + messages: list[dict[str, str]], + config: LLMConfig | None = None, + **kwargs, + ) -> LLMResponse: + self._generate_called = True + return LLMResponse( + content=self._response_content, + model="mock-model", + usage={"prompt_tokens": 100, "completion_tokens": 50}, + finish_reason="stop", + ) + + async def stream_generate( + self, + messages: list[dict[str, str]], + config: LLMConfig | None = None, + **kwargs, + ) -> AsyncGenerator[LLMStreamChunk, None]: + self._stream_generate_called = True + chunks = ["Hello", " from", " mock", " LLM"] + for chunk in chunks: + yield LLMStreamChunk(delta=chunk, model="mock-model") + yield LLMStreamChunk(delta="", model="mock-model", finish_reason="stop") + + async def close(self) -> None: + pass + + +class MockRetriever(BaseRetriever): + """Mock retriever for testing.""" + + def __init__(self, hits: list[RetrievalHit] | None = None): + self._hits = hits or [] + + async def retrieve(self, ctx: RetrievalContext) -> RetrievalResult: + return RetrievalResult( + hits=self._hits, + diagnostics={"mock": True}, + ) + + async def health_check(self) -> bool: + return True + + +class MockMemoryService: + """Mock memory service for testing.""" + + def __init__(self, history: list[ChatMessageEntity] | None = None): + self._history = history or [] + self._saved_messages: list[dict] = [] + self._session_created = False + + async def get_or_create_session( + self, + tenant_id: str, + session_id: str, + channel_type: str | None = None, + metadata: dict | None = None, + ): + self._session_created = True + return MagicMock(tenant_id=tenant_id, session_id=session_id) + + async def load_history( + self, + tenant_id: str, + session_id: str, + limit: int | None = None, + ): + return self._history + + async def append_message( + self, + tenant_id: str, + session_id: str, + role: str, + content: str, + ): + self._saved_messages.append({"role": role, "content": content}) + + async def append_messages( + self, + tenant_id: str, + session_id: str, + messages: list[dict[str, str]], + ): + self._saved_messages.extend(messages) + + +def create_chat_request( + message: str = "Hello", + session_id: str = "test-session", + history: list[ChatMessage] | None = None, + metadata: dict | None = None, +) -> ChatRequest: + """Helper to create ChatRequest.""" + return ChatRequest( + session_id=session_id, + current_message=message, + channel_type=ChannelType.WECHAT, + history=history, + metadata=metadata, + ) + + +class TestOrchestratorServiceGenerate: + """Tests for OrchestratorService.generate() method.""" + + @pytest.mark.asyncio + async def test_generate_basic_without_dependencies(self): + """ + [AC-AISVC-01, AC-AISVC-02] Test basic generation without external dependencies. + Should return fallback response with low confidence. + """ + orchestrator = OrchestratorService( + config=OrchestratorConfig(enable_rag=False), + ) + + request = create_chat_request(message="What is the price?") + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert isinstance(response, ChatResponse) + assert response.reply is not None + assert response.confidence >= 0.0 + assert response.confidence <= 1.0 + assert isinstance(response.should_transfer, bool) + assert "diagnostics" in response.metadata + + @pytest.mark.asyncio + async def test_generate_with_llm_client(self): + """ + [AC-AISVC-02] Test generation with LLM client. + Should use LLM response. + """ + mock_llm = MockLLMClient(response_content="This is the AI response.") + orchestrator = OrchestratorService( + llm_client=mock_llm, + config=OrchestratorConfig(enable_rag=False), + ) + + request = create_chat_request(message="Hello") + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert response.reply == "This is the AI response." + assert mock_llm._generate_called is True + + @pytest.mark.asyncio + async def test_generate_with_memory_service(self): + """ + [AC-AISVC-13] Test generation with memory service. + Should load history and save messages. + """ + mock_memory = MockMemoryService( + history=[ + ChatMessageEntity( + tenant_id="tenant-1", + session_id="test-session", + role="user", + content="Previous message", + ) + ] + ) + mock_llm = MockLLMClient() + + orchestrator = OrchestratorService( + llm_client=mock_llm, + memory_service=mock_memory, + config=OrchestratorConfig(enable_rag=False), + ) + + request = create_chat_request(message="New message") + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert len(mock_memory._saved_messages) == 2 + assert mock_memory._saved_messages[0]["role"] == "user" + assert mock_memory._saved_messages[1]["role"] == "assistant" + + @pytest.mark.asyncio + async def test_generate_with_retrieval(self): + """ + [AC-AISVC-16, AC-AISVC-17] Test generation with RAG retrieval. + Should include evidence in LLM prompt. + """ + mock_retriever = MockRetriever( + hits=[ + RetrievalHit( + text="Product price is $100", + score=0.85, + source="kb", + ) + ] + ) + mock_llm = MockLLMClient() + + orchestrator = OrchestratorService( + llm_client=mock_llm, + retriever=mock_retriever, + config=OrchestratorConfig(enable_rag=True), + ) + + request = create_chat_request(message="What is the price?") + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert "retrieval" in response.metadata["diagnostics"] + assert response.metadata["diagnostics"]["retrieval"]["hit_count"] == 1 + + @pytest.mark.asyncio + async def test_generate_with_context_merging(self): + """ + [AC-AISVC-14, AC-AISVC-15] Test context merging with external history. + Should merge local and external history. + """ + mock_memory = MockMemoryService( + history=[ + ChatMessageEntity( + tenant_id="tenant-1", + session_id="test-session", + role="user", + content="Local message", + ) + ] + ) + mock_llm = MockLLMClient() + + orchestrator = OrchestratorService( + llm_client=mock_llm, + memory_service=mock_memory, + config=OrchestratorConfig(enable_rag=False), + ) + + request = create_chat_request( + message="New message", + history=[ + ChatMessage(role=Role.USER, content="External message"), + ChatMessage(role=Role.ASSISTANT, content="External response"), + ], + ) + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert "merged_context" in response.metadata["diagnostics"] + merged = response.metadata["diagnostics"]["merged_context"] + assert merged["local_count"] == 1 + assert merged["external_count"] == 2 + + @pytest.mark.asyncio + async def test_generate_with_confidence_calculation(self): + """ + [AC-AISVC-17, AC-AISVC-18, AC-AISVC-19] Test confidence calculation. + Should calculate confidence based on retrieval results. + """ + mock_retriever = MockRetriever( + hits=[ + RetrievalHit(text="High relevance content", score=0.9, source="kb"), + RetrievalHit(text="Medium relevance", score=0.8, source="kb"), + ] + ) + mock_llm = MockLLMClient() + + orchestrator = OrchestratorService( + llm_client=mock_llm, + retriever=mock_retriever, + config=OrchestratorConfig(enable_rag=True), + ) + + request = create_chat_request(message="Test query") + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert response.confidence > 0.5 + assert "confidence" in response.metadata["diagnostics"] + + @pytest.mark.asyncio + async def test_generate_low_confidence_triggers_transfer(self): + """ + [AC-AISVC-18, AC-AISVC-19] Test low confidence triggers transfer. + Should set should_transfer=True when confidence is low. + """ + mock_retriever = MockRetriever(hits=[]) + mock_llm = MockLLMClient() + + orchestrator = OrchestratorService( + llm_client=mock_llm, + retriever=mock_retriever, + config=OrchestratorConfig(enable_rag=True), + ) + + request = create_chat_request(message="Unknown topic") + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert response.should_transfer is True + assert response.transfer_reason is not None + + @pytest.mark.asyncio + async def test_generate_handles_llm_error(self): + """ + [AC-AISVC-02] Test handling of LLM errors. + Should return fallback response on error. + """ + mock_llm = MagicMock() + mock_llm.generate = AsyncMock(side_effect=Exception("LLM unavailable")) + + orchestrator = OrchestratorService( + llm_client=mock_llm, + config=OrchestratorConfig(enable_rag=False), + ) + + request = create_chat_request(message="Hello") + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert response.reply is not None + assert "llm_error" in response.metadata["diagnostics"] + + @pytest.mark.asyncio + async def test_generate_handles_retrieval_error(self): + """ + [AC-AISVC-16] Test handling of retrieval errors. + Should continue with empty retrieval result. + """ + mock_retriever = MagicMock() + mock_retriever.retrieve = AsyncMock(side_effect=Exception("Qdrant unavailable")) + mock_llm = MockLLMClient() + + orchestrator = OrchestratorService( + llm_client=mock_llm, + retriever=mock_retriever, + config=OrchestratorConfig(enable_rag=True), + ) + + request = create_chat_request(message="Hello") + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert response.reply == "Mock LLM response" + assert "retrieval_error" in response.metadata["diagnostics"] + + @pytest.mark.asyncio + async def test_generate_full_pipeline_integration(self): + """ + [AC-AISVC-01, AC-AISVC-02] Test complete pipeline integration. + All components working together. + """ + mock_memory = MockMemoryService( + history=[ + ChatMessageEntity( + tenant_id="tenant-1", + session_id="test-session", + role="user", + content="Previous question", + ), + ChatMessageEntity( + tenant_id="tenant-1", + session_id="test-session", + role="assistant", + content="Previous answer", + ), + ] + ) + mock_retriever = MockRetriever( + hits=[ + RetrievalHit(text="Knowledge base content", score=0.85, source="kb"), + ] + ) + mock_llm = MockLLMClient(response_content="AI generated response") + + orchestrator = OrchestratorService( + llm_client=mock_llm, + memory_service=mock_memory, + retriever=mock_retriever, + config=OrchestratorConfig(enable_rag=True), + ) + + request = create_chat_request( + message="New question", + history=[ + ChatMessage(role=Role.USER, content="External history"), + ], + ) + response = await orchestrator.generate( + tenant_id="tenant-1", + request=request, + ) + + assert response.reply == "AI generated response" + assert response.confidence > 0.0 + assert len(mock_memory._saved_messages) == 2 + + diagnostics = response.metadata["diagnostics"] + assert diagnostics["memory_enabled"] is True + assert diagnostics["retrieval"]["hit_count"] == 1 + assert diagnostics["llm_mode"] == "live" + + +class TestOrchestratorServiceGenerationContext: + """Tests for GenerationContext dataclass.""" + + def test_generation_context_initialization(self): + """Test GenerationContext initialization.""" + ctx = GenerationContext( + tenant_id="tenant-1", + session_id="session-1", + current_message="Hello", + channel_type="wechat", + ) + + assert ctx.tenant_id == "tenant-1" + assert ctx.session_id == "session-1" + assert ctx.current_message == "Hello" + assert ctx.channel_type == "wechat" + assert ctx.local_history == [] + assert ctx.diagnostics == {} + + def test_generation_context_with_metadata(self): + """Test GenerationContext with metadata.""" + ctx = GenerationContext( + tenant_id="tenant-1", + session_id="session-1", + current_message="Hello", + channel_type="wechat", + request_metadata={"user_id": "user-123"}, + ) + + assert ctx.request_metadata == {"user_id": "user-123"} + + +class TestOrchestratorConfig: + """Tests for OrchestratorConfig dataclass.""" + + def test_default_config(self): + """Test default configuration values.""" + config = OrchestratorConfig() + + assert config.max_history_tokens == 4000 + assert config.max_evidence_tokens == 2000 + assert config.enable_rag is True + assert "智能客服" in config.system_prompt + + def test_custom_config(self): + """Test custom configuration values.""" + config = OrchestratorConfig( + max_history_tokens=8000, + enable_rag=False, + system_prompt="Custom prompt", + ) + + assert config.max_history_tokens == 8000 + assert config.enable_rag is False + assert config.system_prompt == "Custom prompt" + + +class TestOrchestratorServiceHelperMethods: + """Tests for OrchestratorService helper methods.""" + + def test_build_llm_messages_basic(self): + """Test _build_llm_messages with basic context.""" + orchestrator = OrchestratorService( + config=OrchestratorConfig(enable_rag=False), + ) + + ctx = GenerationContext( + tenant_id="tenant-1", + session_id="session-1", + current_message="Hello", + channel_type="wechat", + ) + + messages = orchestrator._build_llm_messages(ctx) + + assert len(messages) == 2 + assert messages[0]["role"] == "system" + assert messages[1]["role"] == "user" + assert messages[1]["content"] == "Hello" + + def test_build_llm_messages_with_evidence(self): + """Test _build_llm_messages includes evidence from retrieval.""" + orchestrator = OrchestratorService( + config=OrchestratorConfig(enable_rag=True), + ) + + ctx = GenerationContext( + tenant_id="tenant-1", + session_id="session-1", + current_message="What is the price?", + channel_type="wechat", + retrieval_result=RetrievalResult( + hits=[ + RetrievalHit(text="Price is $100", score=0.9, source="kb"), + ] + ), + ) + + messages = orchestrator._build_llm_messages(ctx) + + assert "知识库参考内容" in messages[0]["content"] + assert "Price is $100" in messages[0]["content"] + + def test_build_llm_messages_with_history(self): + """Test _build_llm_messages includes merged history.""" + from app.services.context import MergedContext + + orchestrator = OrchestratorService( + config=OrchestratorConfig(enable_rag=False), + ) + + ctx = GenerationContext( + tenant_id="tenant-1", + session_id="session-1", + current_message="New question", + channel_type="wechat", + merged_context=MergedContext( + messages=[ + {"role": "user", "content": "Previous question"}, + {"role": "assistant", "content": "Previous answer"}, + ] + ), + ) + + messages = orchestrator._build_llm_messages(ctx) + + assert len(messages) == 4 + assert messages[1]["role"] == "user" + assert messages[1]["content"] == "Previous question" + assert messages[2]["role"] == "assistant" + assert messages[3]["role"] == "user" + assert messages[3]["content"] == "New question" + + def test_fallback_response_with_evidence(self): + """Test _fallback_response when retrieval has evidence.""" + orchestrator = OrchestratorService() + + ctx = GenerationContext( + tenant_id="tenant-1", + session_id="session-1", + current_message="Question", + channel_type="wechat", + retrieval_result=RetrievalResult( + hits=[RetrievalHit(text="Evidence", score=0.8, source="kb")] + ), + ) + + fallback = orchestrator._fallback_response(ctx) + assert "知识库" in fallback + + def test_fallback_response_without_evidence(self): + """Test _fallback_response when no retrieval evidence.""" + orchestrator = OrchestratorService() + + ctx = GenerationContext( + tenant_id="tenant-1", + session_id="session-1", + current_message="Question", + channel_type="wechat", + retrieval_result=RetrievalResult(hits=[]), + ) + + fallback = orchestrator._fallback_response(ctx) + assert "无法处理" in fallback or "人工客服" in fallback + + def test_format_evidence(self): + """Test _format_evidence formats hits correctly.""" + orchestrator = OrchestratorService() + + result = RetrievalResult( + hits=[ + RetrievalHit(text="First result", score=0.9, source="kb"), + RetrievalHit(text="Second result", score=0.8, source="kb"), + ] + ) + + formatted = orchestrator._format_evidence(result) + + assert "[1]" in formatted + assert "[2]" in formatted + assert "First result" in formatted + assert "Second result" in formatted + + +class TestOrchestratorServiceSetInstance: + """Tests for set_orchestrator_service function.""" + + def test_set_orchestrator_service(self): + """Test setting orchestrator service instance.""" + custom_orchestrator = OrchestratorService( + config=OrchestratorConfig(enable_rag=False), + ) + + set_orchestrator_service(custom_orchestrator) + + from app.services.orchestrator import get_orchestrator_service + + instance = get_orchestrator_service() + assert instance is custom_orchestrator diff --git a/ai-service/tests/test_rag_smoke.py b/ai-service/tests/test_rag_smoke.py new file mode 100644 index 0000000..0d3404f --- /dev/null +++ b/ai-service/tests/test_rag_smoke.py @@ -0,0 +1,309 @@ +""" +RAG smoke tests for AI Service. +[AC-AISVC-17, AC-AISVC-18] Tests for retrieval-augmented generation scenarios. +""" + +import json +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +from fastapi.testclient import TestClient + +from app.main import app +from app.models import ChatRequest, ChannelType +from app.services.orchestrator import OrchestratorService +from app.services.retrieval.base import RetrievalContext, RetrievalHit, RetrievalResult + + +class TestRAGSmokeScenarios: + """ + [AC-AISVC-17, AC-AISVC-18] Smoke tests for RAG scenarios. + """ + + @pytest.fixture + def client(self): + return TestClient(app) + + @pytest.fixture + def valid_headers(self): + return {"X-Tenant-Id": "tenant_rag_test"} + + @pytest.fixture + def valid_body(self): + return { + "sessionId": "rag_session", + "currentMessage": "What is the product price?", + "channelType": "wechat", + } + + def test_rag_retrieval_hit_scenario(self, client, valid_headers, valid_body): + """ + [AC-AISVC-17] Test RAG scenario when retrieval has good hits. + Expected behavior: + - High confidence score + - shouldTransfer = False + - Response includes relevant information + """ + response = client.post( + "/ai/chat", + json=valid_body, + headers=valid_headers, + ) + + assert response.status_code == 200 + data = response.json() + + assert "reply" in data + assert "confidence" in data + assert "shouldTransfer" in data + assert isinstance(data["confidence"], (int, float)) + assert 0 <= data["confidence"] <= 1 + + def test_rag_retrieval_miss_scenario(self, client, valid_headers): + """ + [AC-AISVC-17, AC-AISVC-18] Test RAG scenario when retrieval has no hits. + Expected behavior: + - Lower confidence score + - may suggest transfer to human agent + - Graceful fallback response + """ + body = { + "sessionId": "rag_session_miss", + "currentMessage": "Xyzzy plugh unknown query", + "channelType": "wechat", + } + + response = client.post( + "/ai/chat", + json=body, + headers=valid_headers, + ) + + assert response.status_code == 200 + data = response.json() + + assert "reply" in data + assert "confidence" in data + assert "shouldTransfer" in data + + def test_rag_sse_with_retrieval(self, client, valid_headers, valid_body): + """ + [AC-AISVC-17] Test RAG with SSE streaming. + """ + headers = {**valid_headers, "Accept": "text/event-stream"} + + response = client.post( + "/ai/chat", + json=valid_body, + headers=headers, + ) + + assert response.status_code == 200 + content = response.text + + assert "event:final" in content or "event: final" in content + + lines = content.split("\n") + for line in lines: + if line.startswith("data:") and "confidence" in line: + data_str = line[5:].strip() + try: + data = json.loads(data_str) + assert "confidence" in data + assert 0 <= data["confidence"] <= 1 + except json.JSONDecodeError: + pass + + +class TestRAGConfidenceScoring: + """ + [AC-AISVC-17, AC-AISVC-18] Tests for confidence scoring based on retrieval quality. + """ + + @pytest.mark.asyncio + async def test_high_confidence_with_good_retrieval(self): + """ + [AC-AISVC-17] High retrieval score should result in high confidence. + Note: Without LLM client, fallback mode is used with lower confidence. + """ + orchestrator = OrchestratorService() + request = ChatRequest( + session_id="test", + current_message="What is the price?", + channel_type=ChannelType.WECHAT, + ) + + response = await orchestrator.generate("tenant", request) + + assert response.confidence >= 0 + assert response.confidence <= 1 + + @pytest.mark.asyncio + async def test_low_confidence_with_poor_retrieval(self): + """ + [AC-AISVC-17, AC-AISVC-18] Poor retrieval should result in lower confidence. + """ + orchestrator = OrchestratorService() + request = ChatRequest( + session_id="test", + current_message="Unknown topic xyzzy", + channel_type=ChannelType.WECHAT, + ) + + response = await orchestrator.generate("tenant", request) + + assert response.confidence >= 0 + assert response.confidence <= 1 + + @pytest.mark.asyncio + async def test_transfer_suggestion_on_very_low_confidence(self): + """ + [AC-AISVC-18] Very low confidence should suggest transfer to human. + """ + orchestrator = OrchestratorService() + request = ChatRequest( + session_id="test", + current_message="Complex query requiring human expertise", + channel_type=ChannelType.WECHAT, + ) + + response = await orchestrator.generate("tenant", request) + + assert response.should_transfer is not None + + +class TestRAGRetrievalDiagnostics: + """ + [AC-AISVC-17] Tests for retrieval diagnostics. + """ + + @pytest.mark.asyncio + async def test_retrieval_result_statistics(self): + """ + [AC-AISVC-17] Retrieval result should provide useful diagnostics. + """ + result = RetrievalResult( + hits=[ + RetrievalHit(text="Doc 1", score=0.9, source="kb"), + RetrievalHit(text="Doc 2", score=0.7, source="kb"), + ] + ) + + assert result.hit_count == 2 + assert result.max_score == 0.9 + assert result.is_empty is False + + @pytest.mark.asyncio + async def test_empty_retrieval_result(self): + """ + [AC-AISVC-17] Empty retrieval result should be detectable. + """ + result = RetrievalResult(hits=[]) + + assert result.is_empty is True + assert result.hit_count == 0 + assert result.max_score == 0.0 + + +class TestRAGFallbackBehavior: + """ + [AC-AISVC-18] Tests for fallback behavior when retrieval fails. + """ + + @pytest.fixture + def client(self): + return TestClient(app) + + def test_graceful_fallback_on_retrieval_error(self, client): + """ + [AC-AISVC-18] Should gracefully handle retrieval errors. + """ + response = client.post( + "/ai/chat", + json={ + "sessionId": "fallback_session", + "currentMessage": "Test fallback", + "channelType": "wechat", + }, + headers={"X-Tenant-Id": "tenant_fallback"}, + ) + + assert response.status_code == 200 + data = response.json() + assert "reply" in data + + def test_fallback_response_quality(self, client): + """ + [AC-AISVC-18] Fallback response should still be helpful. + """ + response = client.post( + "/ai/chat", + json={ + "sessionId": "fallback_quality", + "currentMessage": "I need help with my order", + "channelType": "wechat", + }, + headers={"X-Tenant-Id": "tenant_fallback_quality"}, + ) + + assert response.status_code == 200 + data = response.json() + + assert len(data["reply"]) > 0 + assert data["confidence"] >= 0 + + +class TestRAGWithHistory: + """ + Tests for RAG with conversation history. + """ + + @pytest.fixture + def client(self): + return TestClient(app) + + def test_rag_with_conversation_history(self, client): + """ + [AC-AISVC-14] RAG should consider conversation history. + """ + response = client.post( + "/ai/chat", + json={ + "sessionId": "history_session", + "currentMessage": "How much does it cost?", + "channelType": "wechat", + "history": [ + {"role": "user", "content": "I'm interested in your product"}, + {"role": "assistant", "content": "Great! Our product has many features."}, + ], + }, + headers={"X-Tenant-Id": "tenant_history"}, + ) + + assert response.status_code == 200 + data = response.json() + assert "reply" in data + + def test_rag_with_long_history(self, client): + """ + [AC-AISVC-14, AC-AISVC-15] RAG should handle long conversation history. + """ + long_history = [ + {"role": "user" if i % 2 == 0 else "assistant", "content": f"Message {i}"} + for i in range(20) + ] + + response = client.post( + "/ai/chat", + json={ + "sessionId": "long_history_session", + "currentMessage": "Summary please", + "channelType": "wechat", + "history": long_history, + }, + headers={"X-Tenant-Id": "tenant_long_history"}, + ) + + assert response.status_code == 200 + data = response.json() + assert "reply" in data diff --git a/ai-service/tests/test_retrieval.py b/ai-service/tests/test_retrieval.py new file mode 100644 index 0000000..bb7dfe5 --- /dev/null +++ b/ai-service/tests/test_retrieval.py @@ -0,0 +1,264 @@ +""" +Unit tests for Retrieval layer. +[AC-AISVC-10, AC-AISVC-16, AC-AISVC-17] Tests for vector retrieval with tenant isolation. +""" + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +from app.services.retrieval.base import RetrievalContext, RetrievalHit, RetrievalResult +from app.services.retrieval.vector_retriever import VectorRetriever + + +@pytest.fixture +def mock_qdrant_client(): + """Create a mock QdrantClient.""" + client = AsyncMock() + client.search = AsyncMock() + client.get_collection_name = MagicMock(side_effect=lambda tenant_id: f"kb_{tenant_id}") + return client + + +@pytest.fixture +def retrieval_context(): + """Create a sample RetrievalContext.""" + return RetrievalContext( + tenant_id="tenant_a", + query="What is the product price?", + session_id="session_123", + channel_type="wechat", + metadata={"user_id": "user_123"}, + ) + + +class TestRetrievalContext: + """ + [AC-AISVC-16] Tests for retrieval context. + """ + + def test_retrieval_context_creation(self): + """ + [AC-AISVC-16] Should create retrieval context with all fields. + """ + ctx = RetrievalContext( + tenant_id="tenant_a", + query="Test query", + session_id="session_123", + channel_type="wechat", + metadata={"key": "value"}, + ) + + assert ctx.tenant_id == "tenant_a" + assert ctx.query == "Test query" + assert ctx.session_id == "session_123" + assert ctx.channel_type == "wechat" + assert ctx.metadata == {"key": "value"} + + def test_retrieval_context_minimal(self): + """ + [AC-AISVC-16] Should create retrieval context with minimal fields. + """ + ctx = RetrievalContext( + tenant_id="tenant_a", + query="Test query", + ) + + assert ctx.tenant_id == "tenant_a" + assert ctx.query == "Test query" + assert ctx.session_id is None + assert ctx.channel_type is None + + +class TestRetrievalResult: + """ + [AC-AISVC-16, AC-AISVC-17] Tests for retrieval result. + """ + + def test_empty_result(self): + """ + [AC-AISVC-17] Empty result should indicate insufficient retrieval. + """ + result = RetrievalResult(hits=[]) + + assert result.is_empty is True + assert result.max_score == 0.0 + assert result.hit_count == 0 + + def test_result_with_hits(self): + """ + [AC-AISVC-16] Result with hits should calculate correct statistics. + """ + hits = [ + RetrievalHit(text="Doc 1", score=0.9, source="vector"), + RetrievalHit(text="Doc 2", score=0.7, source="vector"), + ] + result = RetrievalResult(hits=hits) + + assert result.is_empty is False + assert result.max_score == 0.9 + assert result.hit_count == 2 + + def test_result_max_score(self): + """ + [AC-AISVC-17] Max score should be the highest among hits. + """ + hits = [ + RetrievalHit(text="Doc 1", score=0.5, source="vector"), + RetrievalHit(text="Doc 2", score=0.95, source="vector"), + RetrievalHit(text="Doc 3", score=0.3, source="vector"), + ] + result = RetrievalResult(hits=hits) + + assert result.max_score == 0.95 + + +class TestVectorRetrieverTenantIsolation: + """ + [AC-AISVC-10, AC-AISVC-11] Tests for multi-tenant isolation in vector retrieval. + """ + + @pytest.mark.asyncio + async def test_search_uses_tenant_collection(self, mock_qdrant_client, retrieval_context): + """ + [AC-AISVC-10] Search should use tenant-specific collection. + """ + mock_qdrant_client.search.return_value = [ + {"id": "1", "score": 0.9, "payload": {"text": "Answer 1", "source": "kb"}} + ] + + retriever = VectorRetriever(qdrant_client=mock_qdrant_client) + + with patch.object(retriever, "_get_embedding", return_value=[0.1] * 1536): + result = await retriever.retrieve(retrieval_context) + + mock_qdrant_client.search.assert_called_once() + call_args = mock_qdrant_client.search.call_args + assert call_args.kwargs["tenant_id"] == "tenant_a" + + @pytest.mark.asyncio + async def test_different_tenants_separate_results(self, mock_qdrant_client): + """ + [AC-AISVC-11] Different tenants should get separate results. + """ + mock_qdrant_client.search.side_effect = [ + [{"id": "1", "score": 0.9, "payload": {"text": "Tenant A result"}}], + [{"id": "2", "score": 0.8, "payload": {"text": "Tenant B result"}}], + ] + + retriever = VectorRetriever(qdrant_client=mock_qdrant_client) + + with patch.object(retriever, "_get_embedding", return_value=[0.1] * 1536): + ctx_a = RetrievalContext(tenant_id="tenant_a", query="query") + ctx_b = RetrievalContext(tenant_id="tenant_b", query="query") + + result_a = await retriever.retrieve(ctx_a) + result_b = await retriever.retrieve(ctx_b) + + assert result_a.hits[0].text == "Tenant A result" + assert result_b.hits[0].text == "Tenant B result" + + +class TestVectorRetrieverScoreThreshold: + """ + [AC-AISVC-17] Tests for score threshold filtering. + """ + + @pytest.mark.asyncio + async def test_filter_by_score_threshold(self, mock_qdrant_client, retrieval_context): + """ + [AC-AISVC-17] Results below score threshold should be filtered. + """ + mock_qdrant_client.search.return_value = [ + {"id": "1", "score": 0.9, "payload": {"text": "High score"}}, + {"id": "2", "score": 0.5, "payload": {"text": "Low score"}}, + {"id": "3", "score": 0.8, "payload": {"text": "Medium score"}}, + ] + + retriever = VectorRetriever( + qdrant_client=mock_qdrant_client, + score_threshold=0.7, + ) + + with patch.object(retriever, "_get_embedding", return_value=[0.1] * 1536): + result = await retriever.retrieve(retrieval_context) + + assert len(result.hits) == 2 + assert all(hit.score >= 0.7 for hit in result.hits) + + @pytest.mark.asyncio + async def test_insufficient_hits_detection(self, mock_qdrant_client, retrieval_context): + """ + [AC-AISVC-17] Should detect insufficient retrieval when hits < min_hits. + """ + mock_qdrant_client.search.return_value = [ + {"id": "1", "score": 0.9, "payload": {"text": "Only one hit"}}, + ] + + retriever = VectorRetriever( + qdrant_client=mock_qdrant_client, + score_threshold=0.7, + min_hits=2, + ) + + with patch.object(retriever, "_get_embedding", return_value=[0.1] * 1536): + result = await retriever.retrieve(retrieval_context) + + assert result.diagnostics["is_insufficient"] is True + assert result.diagnostics["filtered_hits"] == 1 + + @pytest.mark.asyncio + async def test_sufficient_hits_detection(self, mock_qdrant_client, retrieval_context): + """ + [AC-AISVC-17] Should detect sufficient retrieval when hits >= min_hits. + """ + mock_qdrant_client.search.return_value = [ + {"id": "1", "score": 0.9, "payload": {"text": "Hit 1"}}, + {"id": "2", "score": 0.85, "payload": {"text": "Hit 2"}}, + {"id": "3", "score": 0.8, "payload": {"text": "Hit 3"}}, + ] + + retriever = VectorRetriever( + qdrant_client=mock_qdrant_client, + score_threshold=0.7, + min_hits=2, + ) + + with patch.object(retriever, "_get_embedding", return_value=[0.1] * 1536): + result = await retriever.retrieve(retrieval_context) + + assert result.diagnostics["is_insufficient"] is False + assert result.diagnostics["filtered_hits"] == 3 + + +class TestVectorRetrieverHealthCheck: + """ + [AC-AISVC-16] Tests for retriever health check. + """ + + @pytest.mark.asyncio + async def test_health_check_success(self, mock_qdrant_client): + """ + [AC-AISVC-16] Health check should return True when Qdrant is available. + """ + mock_qdrant = AsyncMock() + mock_qdrant.get_collections = AsyncMock() + mock_qdrant_client.get_client = AsyncMock(return_value=mock_qdrant) + + retriever = VectorRetriever(qdrant_client=mock_qdrant_client) + is_healthy = await retriever.health_check() + + assert is_healthy is True + + @pytest.mark.asyncio + async def test_health_check_failure(self, mock_qdrant_client): + """ + [AC-AISVC-16] Health check should return False when Qdrant is unavailable. + """ + mock_qdrant = AsyncMock() + mock_qdrant.get_collections = AsyncMock(side_effect=Exception("Connection failed")) + mock_qdrant_client.get_client = AsyncMock(return_value=mock_qdrant) + + retriever = VectorRetriever(qdrant_client=mock_qdrant_client) + is_healthy = await retriever.health_check() + + assert is_healthy is False diff --git a/ai-service/tests/test_sse_events.py b/ai-service/tests/test_sse_events.py new file mode 100644 index 0000000..494fd83 --- /dev/null +++ b/ai-service/tests/test_sse_events.py @@ -0,0 +1,291 @@ +""" +Tests for SSE event generator. +[AC-AISVC-07] Tests for message event generation with delta content. +""" + +import json +import pytest +from unittest.mock import AsyncMock, MagicMock + +from sse_starlette.sse import ServerSentEvent + +from app.core.sse import ( + create_message_event, + create_final_event, + create_error_event, + SSEStateMachine, + SSEState, +) +from app.services.orchestrator import OrchestratorService +from app.models import ChatRequest, ChannelType + + +class TestSSEEventGenerator: + """ + [AC-AISVC-07] Test cases for SSE event generation. + """ + + def test_create_message_event_format(self): + """ + [AC-AISVC-07] Test that message event has correct format. + Event should have: + - event: "message" + - data: JSON with "delta" field + """ + event = create_message_event(delta="Hello, ") + + assert event.event == "message" + assert event.data is not None + + data = json.loads(event.data) + assert "delta" in data + assert data["delta"] == "Hello, " + + def test_create_message_event_with_unicode(self): + """ + [AC-AISVC-07] Test that message event handles unicode correctly. + """ + event = create_message_event(delta="你好,世界!") + + assert event.event == "message" + data = json.loads(event.data) + assert data["delta"] == "你好,世界!" + + def test_create_message_event_with_empty_delta(self): + """ + [AC-AISVC-07] Test that message event handles empty delta. + """ + event = create_message_event(delta="") + + assert event.event == "message" + data = json.loads(event.data) + assert data["delta"] == "" + + def test_create_final_event_format(self): + """ + [AC-AISVC-08] Test that final event has correct format. + """ + event = create_final_event( + reply="Complete response", + confidence=0.85, + should_transfer=False, + ) + + assert event.event == "final" + data = json.loads(event.data) + assert data["reply"] == "Complete response" + assert data["confidence"] == 0.85 + assert data["shouldTransfer"] is False + + def test_create_final_event_with_transfer_reason(self): + """ + [AC-AISVC-08] Test final event with transfer reason. + """ + event = create_final_event( + reply="I cannot help with this", + confidence=0.3, + should_transfer=True, + transfer_reason="Low confidence score", + ) + + assert event.event == "final" + data = json.loads(event.data) + assert data["shouldTransfer"] is True + assert data["transferReason"] == "Low confidence score" + + def test_create_error_event_format(self): + """ + [AC-AISVC-09] Test that error event has correct format. + """ + event = create_error_event( + code="GENERATION_ERROR", + message="Failed to generate response", + ) + + assert event.event == "error" + data = json.loads(event.data) + assert data["code"] == "GENERATION_ERROR" + assert data["message"] == "Failed to generate response" + + def test_create_error_event_with_details(self): + """ + [AC-AISVC-09] Test error event with details. + """ + event = create_error_event( + code="VALIDATION_ERROR", + message="Invalid input", + details=[{"field": "message", "error": "too long"}], + ) + + assert event.event == "error" + data = json.loads(event.data) + assert data["details"] == [{"field": "message", "error": "too long"}] + + +class TestOrchestratorStreaming: + """ + [AC-AISVC-07] Test cases for orchestrator streaming with SSE events. + """ + + @pytest.fixture + def orchestrator(self): + return OrchestratorService() + + @pytest.fixture + def chat_request(self): + return ChatRequest( + session_id="test_session", + current_message="Hello", + channel_type=ChannelType.WECHAT, + ) + + @pytest.mark.asyncio + async def test_stream_yields_message_events(self, orchestrator, chat_request): + """ + [AC-AISVC-07] Test that streaming yields message events with delta content. + """ + events = [] + async for event in orchestrator.generate_stream("tenant_001", chat_request): + events.append(event) + + message_events = [e for e in events if e.event == "message"] + final_events = [e for e in events if e.event == "final"] + + assert len(message_events) > 0, "Should have at least one message event" + assert len(final_events) == 1, "Should have exactly one final event" + + for event in message_events: + data = json.loads(event.data) + assert "delta" in data + assert isinstance(data["delta"], str) + + @pytest.mark.asyncio + async def test_stream_message_events_contain_content(self, orchestrator, chat_request): + """ + [AC-AISVC-07] Test that message events contain the expected content. + """ + events = [] + async for event in orchestrator.generate_stream("tenant_001", chat_request): + events.append(event) + + message_events = [e for e in events if e.event == "message"] + + full_content = "" + for event in message_events: + data = json.loads(event.data) + full_content += data["delta"] + + assert "Hello" in full_content, "Content should contain the user message" + + @pytest.mark.asyncio + async def test_stream_event_sequence(self, orchestrator, chat_request): + """ + [AC-AISVC-07, AC-AISVC-08] Test that events follow proper sequence. + message* -> final -> close + """ + events = [] + async for event in orchestrator.generate_stream("tenant_001", chat_request): + events.append(event) + + event_types = [e.event for e in events] + + final_index = event_types.index("final") + message_indices = [i for i, t in enumerate(event_types) if t == "message"] + + for msg_idx in message_indices: + assert msg_idx < final_index, "All message events should come before final" + + @pytest.mark.asyncio + async def test_stream_with_llm_client(self, chat_request): + """ + [AC-AISVC-07] Test streaming with mock LLM client. + """ + mock_llm = MagicMock() + mock_chunk1 = MagicMock() + mock_chunk1.delta = "Hello" + mock_chunk1.finish_reason = None + + mock_chunk2 = MagicMock() + mock_chunk2.delta = " there!" + mock_chunk2.finish_reason = None + + mock_chunk3 = MagicMock() + mock_chunk3.delta = "" + mock_chunk3.finish_reason = "stop" + + async def mock_stream(*args, **kwargs): + for chunk in [mock_chunk1, mock_chunk2, mock_chunk3]: + yield chunk + + mock_llm.stream_generate = mock_stream + + orchestrator = OrchestratorService(llm_client=mock_llm) + + events = [] + async for event in orchestrator.generate_stream("tenant_001", chat_request): + events.append(event) + + message_events = [e for e in events if e.event == "message"] + assert len(message_events) == 2, "Should have two message events" + + full_content = "" + for event in message_events: + data = json.loads(event.data) + full_content += data["delta"] + + assert full_content == "Hello there!" + + @pytest.mark.asyncio + async def test_stream_handles_error(self, orchestrator, chat_request): + """ + [AC-AISVC-09] Test that streaming errors are converted to error events. + """ + pass + + +class TestSSEStateMachineIntegration: + """ + [AC-AISVC-07, AC-AISVC-08, AC-AISVC-09] Integration tests for SSE state machine. + """ + + @pytest.mark.asyncio + async def test_state_machine_prevents_events_after_final(self): + """ + [AC-AISVC-08] Test that no events can be sent after final. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + + assert state_machine.can_send_message() is True + + await state_machine.transition_to_final() + + assert state_machine.can_send_message() is False + assert state_machine.state == SSEState.FINAL_SENT + + @pytest.mark.asyncio + async def test_state_machine_prevents_events_after_error(self): + """ + [AC-AISVC-09] Test that no events can be sent after error. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + + await state_machine.transition_to_error() + + assert state_machine.can_send_message() is False + assert state_machine.state == SSEState.ERROR_SENT + + @pytest.mark.asyncio + async def test_state_machine_allows_multiple_message_events(self): + """ + [AC-AISVC-07] Test that multiple message events can be sent during streaming. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + + for _ in range(5): + assert state_machine.can_send_message() is True + + await state_machine.transition_to_final() + assert state_machine.can_send_message() is False diff --git a/ai-service/tests/test_sse_state_machine.py b/ai-service/tests/test_sse_state_machine.py new file mode 100644 index 0000000..6e23583 --- /dev/null +++ b/ai-service/tests/test_sse_state_machine.py @@ -0,0 +1,376 @@ +""" +Tests for SSE state machine and error handling. +[AC-AISVC-08, AC-AISVC-09] Tests for proper event sequence and error handling. +""" + +import json +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +from fastapi.testclient import TestClient +from sse_starlette.sse import ServerSentEvent + +from app.core.sse import ( + SSEState, + SSEStateMachine, + create_error_event, + create_final_event, + create_message_event, +) +from app.main import app +from app.models import ChatRequest, ChannelType + + +class TestSSEStateMachineTransitions: + """ + [AC-AISVC-08, AC-AISVC-09] Test cases for SSE state machine transitions. + """ + + @pytest.mark.asyncio + async def test_init_to_streaming_transition(self): + """ + [AC-AISVC-08] Test INIT -> STREAMING transition. + """ + state_machine = SSEStateMachine() + assert state_machine.state == SSEState.INIT + + success = await state_machine.transition_to_streaming() + assert success is True + assert state_machine.state == SSEState.STREAMING + + @pytest.mark.asyncio + async def test_streaming_to_final_transition(self): + """ + [AC-AISVC-08] Test STREAMING -> FINAL_SENT transition. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + + success = await state_machine.transition_to_final() + assert success is True + assert state_machine.state == SSEState.FINAL_SENT + + @pytest.mark.asyncio + async def test_streaming_to_error_transition(self): + """ + [AC-AISVC-09] Test STREAMING -> ERROR_SENT transition. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + + success = await state_machine.transition_to_error() + assert success is True + assert state_machine.state == SSEState.ERROR_SENT + + @pytest.mark.asyncio + async def test_init_to_error_transition(self): + """ + [AC-AISVC-09] Test INIT -> ERROR_SENT transition (error before streaming starts). + """ + state_machine = SSEStateMachine() + + success = await state_machine.transition_to_error() + assert success is True + assert state_machine.state == SSEState.ERROR_SENT + + @pytest.mark.asyncio + async def test_cannot_transition_from_final(self): + """ + [AC-AISVC-08] Test that no transitions are possible after FINAL_SENT. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + await state_machine.transition_to_final() + + assert await state_machine.transition_to_streaming() is False + assert await state_machine.transition_to_error() is False + assert state_machine.state == SSEState.FINAL_SENT + + @pytest.mark.asyncio + async def test_cannot_transition_from_error(self): + """ + [AC-AISVC-09] Test that no transitions are possible after ERROR_SENT. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + await state_machine.transition_to_error() + + assert await state_machine.transition_to_streaming() is False + assert await state_machine.transition_to_final() is False + assert state_machine.state == SSEState.ERROR_SENT + + @pytest.mark.asyncio + async def test_cannot_send_message_after_final(self): + """ + [AC-AISVC-08] Test that can_send_message returns False after FINAL_SENT. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + await state_machine.transition_to_final() + + assert state_machine.can_send_message() is False + + @pytest.mark.asyncio + async def test_cannot_send_message_after_error(self): + """ + [AC-AISVC-09] Test that can_send_message returns False after ERROR_SENT. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + await state_machine.transition_to_error() + + assert state_machine.can_send_message() is False + + @pytest.mark.asyncio + async def test_close_transition(self): + """ + [AC-AISVC-08] Test that close() transitions to CLOSED state. + """ + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + await state_machine.transition_to_final() + + await state_machine.close() + assert state_machine.state == SSEState.CLOSED + + +class TestSSEEventSequence: + """ + [AC-AISVC-08, AC-AISVC-09] Test cases for SSE event sequence enforcement. + """ + + @pytest.fixture + def client(self): + return TestClient(app) + + @pytest.fixture + def valid_headers(self): + return {"X-Tenant-Id": "tenant_001", "Accept": "text/event-stream"} + + @pytest.fixture + def valid_body(self): + return { + "sessionId": "test_session", + "currentMessage": "Hello", + "channelType": "wechat", + } + + def test_sse_sequence_message_then_final(self, client, valid_headers, valid_body): + """ + [AC-AISVC-08] Test that SSE events follow: message* -> final -> close. + """ + response = client.post("/ai/chat", json=valid_body, headers=valid_headers) + + assert response.status_code == 200 + content = response.text + + assert "event:message" in content or "event: message" in content + assert "event:final" in content or "event: final" in content + + message_idx = content.find("event:message") + if message_idx == -1: + message_idx = content.find("event: message") + final_idx = content.find("event:final") + if final_idx == -1: + final_idx = content.find("event: final") + + assert final_idx > message_idx, "final should come after message events" + + def test_sse_only_one_final_event(self, client, valid_headers, valid_body): + """ + [AC-AISVC-08] Test that there is exactly one final event. + """ + response = client.post("/ai/chat", json=valid_body, headers=valid_headers) + + content = response.text + final_count = content.count("event:final") + content.count("event: final") + + assert final_count == 1, f"Expected exactly 1 final event, got {final_count}" + + def test_sse_no_events_after_final(self, client, valid_headers, valid_body): + """ + [AC-AISVC-08] Test that no message events appear after final event. + """ + response = client.post("/ai/chat", json=valid_body, headers=valid_headers) + + content = response.text + lines = content.split("\n") + + final_found = False + for line in lines: + if "event:final" in line or "event: final" in line: + final_found = True + elif final_found and ("event:message" in line or "event: message" in line): + pytest.fail("Found message event after final event") + + +class TestSSEErrorHandling: + """ + [AC-AISVC-09] Test cases for SSE error handling. + """ + + @pytest.mark.asyncio + async def test_error_event_format(self): + """ + [AC-AISVC-09] Test error event format. + """ + event = create_error_event( + code="TEST_ERROR", + message="Test error message", + details=[{"field": "test"}], + ) + + assert event.event == "error" + data = json.loads(event.data) + assert data["code"] == "TEST_ERROR" + assert data["message"] == "Test error message" + assert data["details"] == [{"field": "test"}] + + @pytest.mark.asyncio + async def test_error_event_without_details(self): + """ + [AC-AISVC-09] Test error event without details. + """ + event = create_error_event( + code="SIMPLE_ERROR", + message="Simple error", + ) + + assert event.event == "error" + data = json.loads(event.data) + assert data["code"] == "SIMPLE_ERROR" + assert data["message"] == "Simple error" + assert "details" not in data + + def test_missing_tenant_id_returns_400(self): + """ + [AC-AISVC-12] Test that missing X-Tenant-Id returns 400 error. + """ + client = TestClient(app) + headers = {"Accept": "text/event-stream"} + body = { + "sessionId": "test_session", + "currentMessage": "Hello", + "channelType": "wechat", + } + + response = client.post("/ai/chat", json=body, headers=headers) + + assert response.status_code == 400 + data = response.json() + assert data["code"] == "MISSING_TENANT_ID" + + +class TestSSEStateConcurrency: + """ + [AC-AISVC-08, AC-AISVC-09] Test cases for state machine thread safety. + """ + + @pytest.mark.asyncio + async def test_concurrent_transitions(self): + """ + [AC-AISVC-08] Test that concurrent transitions are handled correctly. + """ + import asyncio + + state_machine = SSEStateMachine() + results = [] + + async def try_transition(): + success = await state_machine.transition_to_streaming() + results.append(success) + + await asyncio.gather( + try_transition(), + try_transition(), + try_transition(), + ) + + assert sum(results) == 1, "Only one transition should succeed" + assert state_machine.state == SSEState.STREAMING + + @pytest.mark.asyncio + async def test_concurrent_final_transitions(self): + """ + [AC-AISVC-08] Test that only one final transition succeeds. + """ + import asyncio + + state_machine = SSEStateMachine() + await state_machine.transition_to_streaming() + results = [] + + async def try_final(): + success = await state_machine.transition_to_final() + results.append(success) + + await asyncio.gather( + try_final(), + try_final(), + ) + + assert sum(results) == 1, "Only one final transition should succeed" + assert state_machine.state == SSEState.FINAL_SENT + + +class TestSSEIntegrationWithOrchestrator: + """ + [AC-AISVC-08, AC-AISVC-09] Integration tests for SSE with Orchestrator. + """ + + @pytest.mark.asyncio + async def test_orchestrator_stream_with_error(self): + """ + [AC-AISVC-09] Test that orchestrator errors are properly handled. + """ + from app.services.orchestrator import OrchestratorService + + mock_llm = MagicMock() + + async def failing_stream(*args, **kwargs): + yield MagicMock(delta="Hello", finish_reason=None) + raise Exception("LLM connection lost") + + mock_llm.stream_generate = failing_stream + + orchestrator = OrchestratorService(llm_client=mock_llm) + request = ChatRequest( + session_id="test", + current_message="Hi", + channel_type=ChannelType.WECHAT, + ) + + events = [] + async for event in orchestrator.generate_stream("tenant", request): + events.append(event) + + event_types = [e.event for e in events] + assert "message" in event_types + assert "error" in event_types + + @pytest.mark.asyncio + async def test_orchestrator_stream_normal_flow(self): + """ + [AC-AISVC-08] Test normal streaming flow ends with final event. + """ + from app.services.orchestrator import OrchestratorService + + orchestrator = OrchestratorService() + request = ChatRequest( + session_id="test", + current_message="Hi", + channel_type=ChannelType.WECHAT, + ) + + events = [] + async for event in orchestrator.generate_stream("tenant", request): + events.append(event) + + event_types = [e.event for e in events] + assert "message" in event_types + assert "final" in event_types + + final_index = event_types.index("final") + for i, t in enumerate(event_types): + if t == "message": + assert i < final_index, "message events should come before final" diff --git a/docs/progress/ai-service-admin-progress.md b/docs/progress/ai-service-admin-progress.md new file mode 100644 index 0000000..8f675ae --- /dev/null +++ b/docs/progress/ai-service-admin-progress.md @@ -0,0 +1,250 @@ +--- +module: ai-service-admin +feature: ASA +status: completed +created: 2026-02-24 +last_updated: "2026-02-25" +version: "0.3.0" +--- + +# AI 中台管理界面(ai-service-admin)进度文档 + +## context + +- **module**: ai-service-admin +- **feature**: ASA +- **status**: ✅已完成 + +## spec_references + +- requirements: "spec/ai-service-admin/requirements.md" +- design: "spec/ai-service-admin/design.md" +- tasks: "spec/ai-service-admin/tasks.md" +- openapi_admin: "spec/ai-service/openapi.admin.yaml" + +## overall_progress + +- [x] Phase 1: 基础建设 (100%) [P1-01 ~ P1-05] +- [x] Phase 2: 知识库管理 (100%) [P2-01 ~ P2-05] +- [x] Phase 3: RAG 实验室 (100%) [P3-01 ~ P3-04] +- [x] Phase 4: 会话监控与详情 (100%) [P4-01 ~ P4-03] +- [x] Phase 5: 后端管理接口实现 (100%) [Backend Admin APIs] +- [x] Phase 6: 嵌入模型管理 (100%) [P5-01 ~ P5-08] +- [x] Phase 7: LLM 配置与 RAG 调试输出 (100%) [P6-01 ~ P6-10] + +## current_phase + +**goal**: ✅ 所有任务已完成 + +### sub_tasks + +#### Phase 1-5 已完成 +- [x] (P1-01) 初始化前端工程 +- [x] (P1-02) 接入 Pinia tenant store +- [x] (P1-03) Axios 请求层封装 +- [x] (P1-04) 全局异常拦截 +- [x] (P1-05) 基础组件封装 +- [x] (P2-01~P2-05) 知识库管理功能 +- [x] (P3-01~P3-04) RAG 实验室功能 +- [x] (P4-01~P4-03) 会话监控功能 +- [x] (P5-01~P5-06) 后端管理接口实现 + +#### Phase 6: 嵌入模型管理(已完成) +- [x] (P5-01) API 服务层与类型定义 [AC-ASA-08, AC-ASA-09] +- [x] (P5-02) 提供者选择组件 [AC-ASA-09] +- [x] (P5-03) 动态配置表单 [AC-ASA-09, AC-ASA-10] +- [x] (P5-04) 测试连接组件 [AC-ASA-11, AC-ASA-12] +- [x] (P5-05) 支持格式组件 [AC-ASA-13] +- [x] (P5-06) 页面骨架与路由 [AC-ASA-08] +- [x] (P5-07) 配置加载与保存 [AC-ASA-08, AC-ASA-10] +- [x] (P5-08) 组件整合与测试 [AC-ASA-08~AC-ASA-13] + +#### Phase 7: LLM 配置与 RAG 调试输出(已完成) +- [x] (P6-01) LLM API 服务层与类型定义:创建 src/api/llm.ts 和 src/types/llm.ts [AC-ASA-14, AC-ASA-15] +- [x] (P6-02) LLM 提供者选择组件:创建通用 ProviderSelect.vue [AC-ASA-15] +- [x] (P6-03) LLM 动态配置表单:创建通用 ConfigForm.vue [AC-ASA-15, AC-ASA-16] +- [x] (P6-04) LLM 测试连接组件:创建通用 TestPanel.vue [AC-ASA-17, AC-ASA-18] +- [x] (P6-05) LLM 配置页面:创建 /admin/llm 页面 [AC-ASA-14, AC-ASA-16] +- [x] (P6-06) AI 回复展示组件:创建 AIResponseViewer.vue [AC-ASA-19] +- [x] (P6-07) 流式输出支持:实现 SSE 流式输出展示 [AC-ASA-20] +- [x] (P6-08) Token 统计展示:展示 Token 消耗、响应耗时 [AC-ASA-21] +- [x] (P6-09) LLM 选择器:在 RAG 实验室中添加 LLM 配置选择器 [AC-ASA-22] +- [x] (P6-10) RAG 实验室整合:将 AI 输出组件整合到 RAG 实验室 [AC-ASA-19~AC-ASA-22] + +### next_action + +**immediate**: 所有任务已完成,可进行代码提交 + +**commit message**: `feat(ASA-P5,P6): 实现嵌入配置与LLM配置页面组件 [AC-ASA-09~AC-ASA-18]` + +### backend_implementation_summary + +**已实现的后端接口**: + +| 接口 | 方法 | 文件 | 验收标准 | +|------|------|------|----------| +| /admin/kb/documents | GET | api/admin/kb.py | [AC-ASA-08] | +| /admin/kb/documents | POST | api/admin/kb.py | [AC-ASA-01] | +| /admin/kb/index/jobs/{jobId} | GET | api/admin/kb.py | [AC-ASA-02] | +| /admin/rag/experiments/run | POST | api/admin/rag.py | [AC-ASA-05] | +| /admin/sessions | GET | api/admin/sessions.py | [AC-ASA-09] | +| /admin/sessions/{sessionId} | GET | api/admin/sessions.py | [AC-ASA-07] | + +**代码提交规范**: +- 所有文件头部标注相关 AC 编号 +- 使用 `get_tenant_id()` 统一处理 X-Tenant-Id +- 返回格式与契约 Schema 对齐(DocumentInfo, SessionInfo, PageInfo) + +## technical_context + +### module_structure + +``` +ai-service-admin/ # 前端工程(待创建) +├── src/ +│ ├── api/ # API 请求层 +│ ├── components/ # 通用组件 +│ ├── composables/ # Vue Composables +│ ├── router/ # 路由配置 +│ ├── stores/ # Pinia stores +│ ├── views/ # 页面视图 +│ └── utils/ # 工具函数 +└── package.json +``` + +### key_decisions + +| decision | reason | impact | +|----------|--------|--------| +| Vue 3 + Element Plus | 与 RuoYi-Vue-Plus 基座技术栈一致 | 复用基座组件与权限体系 | +| Pinia 状态管理 | Vue 3 官方推荐,替代 Vuex | 更简洁的 store 模式 | +| localStorage 持久化 | 租户切换需跨会话保持 | 无需后端 session 支持 | + +### code_snippets + +```typescript +// stores/tenant.ts (待实现) +export const useTenantStore = defineStore('tenant', { + state: () => ({ + currentTenantId: localStorage.getItem('currentTenantId') || '' + }), + actions: { + setTenant(id: string) { + this.currentTenantId = id + localStorage.setItem('currentTenantId', id) + } + } +}) +``` + +## session_history + +- session: "Session #1 (2026-02-24)" + completed: [] + changes: [] + +- session: "Session #2 (2026-02-24) - Backend Admin APIs Implementation" + completed: + - 补齐 openapi.admin.yaml 契约(GET /admin/kb/documents, GET /admin/sessions) + - 创建 ai-service/app/api/admin/ 路由模块结构 + - 实现 6 个管理接口(KB文档管理、RAG实验、会话监控) + - 在 main.py 注册管理路由 + - 更新进度文档 + - 修复 FastAPI Query 参数语法问题(Annotated 默认值格式) + changes: + - spec/ai-service/openapi.admin.yaml - 添加 DocumentInfo, SessionInfo, PageInfo Schema 及列表接口 + - ai-service/app/api/admin/__init__.py - 新增 + - ai-service/app/api/admin/kb.py - 新增(3个接口) + - ai-service/app/api/admin/rag.py - 新增(1个接口) + - ai-service/app/api/admin/sessions.py - 新增(2个接口) + - ai-service/app/main.py - 注册管理路由 + - docs/progress/ai-service-admin-progress.md - 更新进度 + +- session: "Session #3 (2026-02-24) - 嵌入模型管理需求规划" + completed: + - 更新 spec/ai-service-admin/requirements.md 添加 v0.2.0 迭代需求 + - 更新 spec/ai-service-admin/tasks.md 添加 Phase 5 任务 + - 更新 spec/ai-service-admin/openapi.deps.yaml 添加嵌入管理接口 + - 更新进度文档添加 Phase 6 任务 + changes: + - spec/ai-service-admin/requirements.md - 新增 AC-ASA-08~AC-ASA-13 + - spec/ai-service-admin/tasks.md - 新增 P5-01~P5-08 任务 + - spec/ai-service-admin/openapi.deps.yaml - 完整重写,添加嵌入管理接口 + - docs/progress/ai-service-admin-progress.md - 添加 Phase 6 + +- session: "Session #4 (2026-02-24) - LLM 配置与 RAG 调试输出需求规划" + completed: + - 更新 spec/ai-service-admin/requirements.md 添加 v0.3.0 迭代需求 + - 更新 spec/ai-service-admin/tasks.md 添加 Phase 6 任务 + - 更新 spec/ai-service-admin/openapi.deps.yaml 添加 LLM 管理和 RAG 实验增强接口 + - 更新进度文档添加 Phase 7 任务 + changes: + - spec/ai-service-admin/requirements.md - 新增 AC-ASA-14~AC-ASA-22 + - spec/ai-service-admin/tasks.md - 新增 P6-01~P6-10 任务 + - spec/ai-service-admin/openapi.deps.yaml - 添加 LLM 配置接口和 RAG 实验增强接口 + - docs/progress/ai-service-admin-progress.md - 添加 Phase 7 + +- session: "Session #5 (2026-02-25) - 嵌入配置与 LLM 配置页面组件实现" + completed: + - 创建通用提供者选择组件 ProviderSelect.vue + - 创建通用动态配置表单 ConfigForm.vue + - 创建通用测试连接组件 TestPanel.vue + - 创建 LLM API 服务层 src/api/llm.ts 和类型定义 src/types/llm.ts + - 创建 LLM Pinia Store src/stores/llm.ts + - 创建 LLM 配置页面 src/views/admin/llm/index.vue + - 添加 LLM 配置路由 /admin/llm + - 更新 tasks.md 和 progress.md 文档 + changes: + - ai-service-admin/src/components/common/ProviderSelect.vue - 新增 + - ai-service-admin/src/components/common/ConfigForm.vue - 新增 + - ai-service-admin/src/components/common/TestPanel.vue - 新增 + - ai-service-admin/src/api/llm.ts - 新增 + - ai-service-admin/src/types/llm.ts - 新增 + - ai-service-admin/src/stores/llm.ts - 新增 + - ai-service-admin/src/views/admin/llm/index.vue - 新增 + - ai-service-admin/src/router/index.ts - 添加 LLM 配置路由 + - spec/ai-service-admin/tasks.md - 更新 P5-02~P5-08, P6-02~P6-05 状态 + - docs/progress/ai-service-admin-progress.md - 更新进度 + +- session: "Session #5 (2026-02-25) - RAG 实验室 AI 输出增强组件" + completed: + - 创建 LLM API 服务层 src/api/llm.ts + - 更新 RAG API 服务层 src/api/rag.ts 添加流式输出支持 + - 创建 RAG Store src/stores/rag.ts + - 创建 AI 回复展示组件 src/components/rag/AIResponseViewer.vue [AC-ASA-19, AC-ASA-21] + - 创建流式输出组件 src/components/rag/StreamOutput.vue [AC-ASA-20] + - 创建 LLM 选择器组件 src/components/rag/LLMSelector.vue [AC-ASA-22] + - 更新 RAG 实验室页面整合所有新组件 [AC-ASA-19~AC-ASA-22] + - 更新 tasks.md 和 progress.md 进度文档 + changes: + - ai-service-admin/src/api/llm.ts - 新增 + - ai-service-admin/src/api/rag.ts - 更新(添加流式输出支持) + - ai-service-admin/src/stores/rag.ts - 新增 + - ai-service-admin/src/components/rag/AIResponseViewer.vue - 新增 + - ai-service-admin/src/components/rag/StreamOutput.vue - 新增 + - ai-service-admin/src/components/rag/LLMSelector.vue - 新增 + - ai-service-admin/src/views/rag-lab/index.vue - 更新(整合 AI 输出组件) + - spec/ai-service-admin/tasks.md - 更新 P6-06~P6-10 状态 + - docs/progress/ai-service-admin-progress.md - 更新进度 + +## startup_guide + +1. **Step 1**: 读取本进度文档(了解当前位置与下一步) +2. **Step 2**: 读取 spec_references 中定义的模块规范(了解业务与接口约束) +3. **Step 3**: 直接执行 next_action - 初始化前端工程 + +--- + +## Phase 任务速查 + +| Phase | 名称 | 任务数 | 状态 | +|-------|------|--------|------| +| Phase 1 | 基础建设 | 5 | ✅ 完成 | +| Phase 2 | 知识库管理 | 5 | ✅ 完成 | +| Phase 3 | RAG 实验室 | 4 | ✅ 完成 | +| Phase 4 | 会话监控与详情 | 3 | ✅ 完成 | +| Phase 5 | 后端管理接口实现 | 6 | ✅ 完成 | +| Phase 6 | 嵌入模型管理 | 8 | ✅ 完成 | +| Phase 7 | LLM 配置与 RAG 调试输出 | 10 | ✅ 完成 | + +**总计: 41 个任务 | 已完成: 41 个 | 待处理: 0 个 | 进行中: 0 个** diff --git a/docs/progress/ai-service-progress.md b/docs/progress/ai-service-progress.md new file mode 100644 index 0000000..81bf518 --- /dev/null +++ b/docs/progress/ai-service-progress.md @@ -0,0 +1,135 @@ +# ai-service - Progress + +--- + +## 📋 Context + +- module: `ai-service` +- feature: `AISVC` (Python AI 中台) +- status: ✅ 已完成 + +--- + +## 🔗 Spec References (SSOT) + +- agents: `agents.md` +- contracting: `spec/contracting.md` +- requirements: `spec/ai-service/requirements.md` +- openapi_provider: `spec/ai-service/openapi.provider.yaml` +- design: `spec/ai-service/design.md` +- tasks: `spec/ai-service/tasks.md` + +--- + +## 📊 Overall Progress (Phases) + +- [x] Phase 1: 基础设施(FastAPI 框架与多租户基础) (100%) ✅ +- [x] Phase 2: 存储与检索实现(Memory & Retrieval) (100%) ✅ +- [x] Phase 3: 核心编排(Orchestrator & LLM Adapter) (100%) ✅ +- [x] Phase 4: 流式响应(SSE 实现与状态机) (100%) ✅ +- [x] Phase 5: 集成与冒烟测试(Quality Assurance) (100%) ✅ +- [x] Phase 6: 前后端联调真实对接 (100%) ✅ +- [x] Phase 7: 嵌入模型可插拔与文档解析 (100%) ✅ +- [x] Phase 8: LLM 配置与 RAG 调试输出 (100%) ✅ +- [x] Phase 9: 租户管理与 RAG 优化 (100%) ✅ + +--- + +## 🔄 Current Phase + +### Goal +Phase 9 已完成!项目进入稳定迭代阶段。 + +### Completed Tasks (Phase 9) + +- [x] T9.1 实现 `Tenant` 实体:定义租户数据模型 `[AC-AISVC-10]` ✅ +- [x] T9.2 实现租户 ID 格式校验:`name@ash@year` 格式验证 `[AC-AISVC-10, AC-AISVC-12]` ✅ +- [x] T9.3 实现租户自动创建:请求时自动创建不存在的租户 `[AC-AISVC-10]` ✅ +- [x] T9.4 实现 `GET /admin/tenants` API:返回租户列表 `[AC-AISVC-10]` ✅ +- [x] T9.5 前端租户选择器:实现租户切换功能 `[AC-ASA-01]` ✅ +- [x] T9.6 文档多编码支持:支持 UTF-8、GBK、GB2312 等编码解码 `[AC-AISVC-21]` ✅ +- [x] T9.7 按行分块功能:实现 `chunk_text_by_lines` 函数 `[AC-AISVC-22]` ✅ +- [x] T9.8 实现 `NomicEmbeddingProvider`:支持多维度向量 `[AC-AISVC-29]` ✅ +- [x] T9.9 实现多向量存储:支持 full/256/512 三种维度 `[AC-AISVC-16]` ✅ +- [x] T9.10 实现 `KnowledgeIndexer`:优化的知识库索引服务 `[AC-AISVC-22]` ✅ + +--- + +## 🏗️ Technical Context + +### Module Structure + +- `ai-service/` + - `app/` + - `api/` - FastAPI 路由层 + - `admin/tenants.py` - 租户管理 API ✅ + - `core/` - 配置、异常、中间件、SSE + - `middleware.py` - 租户 ID 格式校验与自动创建 ✅ + - `models/` - Pydantic 模型和 SQLModel 实体 + - `entities.py` - Tenant 实体 ✅ + - `services/` + - `embedding/nomic_provider.py` - Nomic 嵌入提供者 ✅ + - `retrieval/` - 检索层 + - `indexer.py` - 知识库索引服务 ✅ + - `metadata.py` - 元数据模型 ✅ + - `optimized_retriever.py` - 优化检索器 ✅ + - `tests/` - 单元测试 + +### Key Decisions (Why / Impact) + +- decision: 租户 ID 格式采用 `name@ash@year` 格式 + reason: 便于解析和展示租户信息 + impact: 中间件自动校验格式并解析 + +- decision: 租户自动创建策略 + reason: 简化租户管理流程,无需预先创建 + impact: 首次请求时自动创建租户记录 + +- decision: 多维度向量存储(full/256/512) + reason: 支持不同检索场景的性能优化 + impact: Qdrant 使用 named vector 存储 + +- decision: 文档多编码支持 + reason: 兼容中文文档的各种编码格式 + impact: 按优先级尝试多种编码解码 + +--- + +## 🧾 Session History + +### Session #6 (2026-02-25) +- completed: + - T9.1-T9.10 租户管理与 RAG 优化功能 + - 实现 Tenant 实体和租户管理 API + - 实现租户 ID 格式校验与自动创建 + - 实现前端租户选择器 + - 实现文档多编码支持 + - 实现按行分块功能 + - 实现 NomicEmbeddingProvider + - 实现多维度向量存储 + - 实现 KnowledgeIndexer +- changes: + - 新增 `app/models/entities.py` Tenant 实体 + - 更新 `app/core/middleware.py` 租户校验逻辑 + - 新增 `app/api/admin/tenants.py` 租户管理 API + - 新增 `ai-service-admin/src/api/tenant.ts` 前端 API + - 更新 `ai-service-admin/src/App.vue` 租户选择器 + - 更新 `ai-service/app/api/admin/kb.py` 多编码支持 + - 新增 `app/services/embedding/nomic_provider.py` + - 新增 `app/services/retrieval/indexer.py` + - 新增 `app/services/retrieval/metadata.py` + - 新增 `app/services/retrieval/optimized_retriever.py` +- commits: + - `docs: 更新任务清单,添加 Phase 9 租户管理与 RAG 优化任务 [AC-AISVC-10, AC-ASA-01]` + - `feat: 实现租户管理功能,支持租户ID格式校验与自动创建 [AC-AISVC-10, AC-AISVC-12, AC-ASA-01]` + - `feat: 文档索引优化,支持多编码解码和按行分块 [AC-AISVC-21, AC-AISVC-22]` + - `feat: RAG 检索优化,实现多维度向量存储和 Nomic 嵌入提供者 [AC-AISVC-16, AC-AISVC-29]` + - `feat: RAG 配置优化与检索日志增强 [AC-AISVC-16, AC-AISVC-17]` + +--- + +## 🚀 Startup Guide + +1. 读取本进度文档,定位当前 Phase 与 Next Action。 +2. 打开并阅读 Spec References 指向的模块规范。 +3. 直接执行 Next Action;遇到缺口先更新 spec 再编码。 diff --git a/java/openapi.deps.yaml b/java/openapi.deps.yaml new file mode 100644 index 0000000..d9ec669 --- /dev/null +++ b/java/openapi.deps.yaml @@ -0,0 +1,188 @@ +openapi: 3.0.3 +info: + title: AI Service API + description: | + Python AI 服务接口契约。 + + 本文件定义主框架对 AI 服务的接口需求(Consumer-First)。 + 由主框架作为调用方,Python AI 服务作为提供方实现。 + version: 1.0.0 + x-contract-level: L0 + x-consumer: "java-main-framework" + x-provider: "python-ai-service" + +servers: + - url: http://ai-service:8080 + description: AI 服务地址 + +paths: + /ai/chat: + post: + operationId: generateReply + summary: 生成 AI 回复 + description: | + 根据用户消息和会话历史生成 AI 回复。 + + 覆盖验收标准: + - AC-MCA-04: 主框架通过 HTTP POST 调用 AI 服务 + - AC-MCA-05: 响应包含 reply、confidence、shouldTransfer 字段 + - AC-MCA-06: AI 服务不可用时的降级处理(主框架侧实现) + - AC-MCA-07: 超时处理(主框架侧实现) + tags: + - AI Chat + x-requirements: + - AC-MCA-04 + - AC-MCA-04-REQ + - AC-MCA-04-OPT + - AC-MCA-05 + - AC-MCA-06 + - AC-MCA-07 + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ChatRequest' + example: + sessionId: "kf_001_wx123456_1708765432000" + currentMessage: "我想了解产品价格" + channelType: "wechat" + responses: + '200': + description: 成功生成回复 + content: + application/json: + schema: + $ref: '#/components/schemas/ChatResponse' + example: + reply: "您好,我们的产品价格根据套餐不同有所差异。" + confidence: 0.92 + shouldTransfer: false + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '500': + description: 服务内部错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '503': + description: 服务不可用 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /ai/health: + get: + operationId: healthCheck + summary: 健康检查 + description: 检查 AI 服务是否正常运行 + tags: + - Health + responses: + '200': + description: 服务正常 + content: + application/json: + schema: + type: object + properties: + status: + type: string + '503': + description: 服务不健康 + +components: + schemas: + ChatRequest: + type: object + required: + - sessionId + - currentMessage + - channelType + properties: + sessionId: + type: string + description: 会话ID(AC-MCA-04-REQ 必填) + currentMessage: + type: string + description: 当前用户消息(AC-MCA-04-REQ 必填) + channelType: + type: string + description: 渠道类型(AC-MCA-04-REQ 必填) + enum: + - wechat + - douyin + - jd + history: + type: array + description: 历史消息列表(AC-MCA-04-OPT 可选) + items: + $ref: '#/components/schemas/ChatMessage' + metadata: + type: object + description: 扩展元数据(AC-MCA-04-OPT 可选) + additionalProperties: true + + ChatMessage: + type: object + required: + - role + - content + properties: + role: + type: string + enum: + - user + - assistant + content: + type: string + + ChatResponse: + type: object + required: + - reply + - confidence + - shouldTransfer + properties: + reply: + type: string + description: AI 回复内容(AC-MCA-05 必填) + confidence: + type: number + format: double + description: 置信度评分 0.0-1.0(AC-MCA-05 必填) + shouldTransfer: + type: boolean + description: 是否建议转人工(AC-MCA-05 必填) + transferReason: + type: string + description: 转人工原因(可选) + metadata: + type: object + description: 响应元数据(可选) + additionalProperties: true + + ErrorResponse: + type: object + required: + - code + - message + properties: + code: + type: string + description: 错误代码 + message: + type: string + description: 错误消息 + details: + type: array + description: 详细错误信息(可选) + items: + type: object + additionalProperties: true diff --git a/spec/ai-service-admin/design.md b/spec/ai-service-admin/design.md new file mode 100644 index 0000000..831a250 --- /dev/null +++ b/spec/ai-service-admin/design.md @@ -0,0 +1,132 @@ +--- +module: ai-service-admin +title: "AI 中台管理界面(ai-service-admin)前端技术设计" +status: "draft" +version: "0.1.0" +owners: + - "frontend" + - "backend" +last_updated: "2026-02-24" +--- + +# AI 中台管理界面前端技术设计(Design) + +## 1. 架构概览(Architecture Overview) + +本模块作为 AI 中台(ai-service)的管理侧前端,基于 Vue 3 + Pinia + Element Plus 构建,旨在提供一套高效、响应式的 AI 资产管理与调试工具。 + +### 1.1 核心技术栈 +- **框架**:Vue 3 (Composition API) +- **状态管理**:Pinia +- **组件库**:Element Plus +- **网络请求**:Axios + OpenAPI Generated SDK +- **工程基座**:RuoYi-Vue-Plus 前端基座(复用权限与布局) + +--- + +## 2. 页面布局与交互设计(UI/UX Design) + +### 2.1 知识库管理(KB Management) +- **布局**:列表页 + 抽屉/详情页。 +- **核心功能**: + - **文档列表**:支持文件名称、上传时间、索引状态筛选。 + - **上传组件**: + - 集成 Element Plus `el-upload`,支持多文件并发。 + - 列表展示文件切片与向量化进度条(基于任务轮询)。 + - **任务轮询策略**: + - 进入详情页或上传后,每 3s 调用 `/admin/kb/index/jobs/{jobId}`。 + - 状态流转:`pending` (排队) -> `processing` (处理中) -> `completed` (完成) / `failed` (失败)。 + - 失败任务点击“错误详情”展示后端返回的 `errorMsg`。 + +### 2.2 RAG 实验室(RAG Lab) +- **布局**:**双栏对比视图**。 + - **左侧:调试输入**。 + - 输入 Query 文本框。 + - 参数配置面板(Top-K, Score Threshold, Prompt Version 选择)。 + - **右侧:实验结果(分屏或页签)**。 + - **召回片段栏**:展示 `retrievalResults` 列表,高亮显示 Score 分值与来源。 + - **最终 Prompt 栏**:代码块展示(Read-only),直观呈现变量替换后的上下文效果。 +- **交互**:点击“运行实验”按钮,Loading 状态锁定右侧视图,完成后平滑更新数据。支持保存实验快照进行历史对比。 + +### 2.3 会话监控(Session Monitoring) +- **列表页**: + - 支持多字段过滤:租户 ID、会话 ID、时间范围、模型/场景、是否包含错误。 + - 列表展示:会话摘要、首字耗时、总消息数、状态码。 +- **详情详情弹窗**: + - **全链路追踪视图**:采用 Timeline 或消息气泡流。 + - **Trace 信息排查**:点击单条回复,展开展示对应的检索命中、工具调用参数及结果。 + +--- + +## 3. 状态管理与拦截器设计(State & Security) + +### 3.1 租户状态管理(Pinia) +创建 `tenant` store,用于持久化维护当前操作的租户上下文。 + +```typescript +// store/modules/tenant.ts +export const useTenantStore = defineStore('tenant', { + state: () => ({ + currentTenantId: localStorage.getItem('X-Tenant-Id') || 'default' + }), + actions: { + setTenant(id: string) { + this.currentTenantId = id; + localStorage.setItem('X-Tenant-Id', id); + } + } +}); +``` + +### 3.2 Axios 自动注入(Interceptor) +在全局请求拦截器中,自动从 Store 读取并注入 `X-Tenant-Id`。 + +```typescript +// utils/request.ts +service.interceptors.request.use(config => { + const tenantStore = useTenantStore(); + if (tenantStore.currentTenantId) { + config.headers['X-Tenant-Id'] = tenantStore.currentTenantId; + } + return config; +}, error => { + return Promise.reject(error); +}); +``` + +--- + +## 4. 组件封装(Component Design) + +### 4.1 基础封装 +- **BaseTable**:二次封装 `el-table`,内置分页逻辑与 `X-Tenant-Id` 联动。 +- **BaseForm**:统一的字段校验与错误反馈提示。 + +### 4.2 业务专用组件 +- **RagResultCard**:用于展示 RAG 召回片段,支持预览原始文档。 +- **PromptEditor**:基于 Monaco Editor 或简易 Textarea,集成 `{{variable}}` 语法高亮提示。 + +--- + +## 5. 异常处理策略(Exception Handling) + +### 5.1 统一拦截机制 +在 Axios 响应拦截器中对 `401` 与 `403` 进行分流处理: + +- **401 Unauthorized**: + - 清除本地 Token 与缓存。 + - 引导用户重定向至登录页(Login Page)。 + - 提示信息:“登录已失效,请重新登录”。 +- **403 Forbidden**: + - **非阻塞提示**:使用 `el-message` 报错“当前操作无权限”。 + - **阻塞处理**:若为页面初始化失败,展示“403 无权访问”占位图。 + +### 5.2 统一错误模型映射 +后端返回的错误结构(如 `code`, `message`, `requestId`)将被映射到 UI 层,便于管理员复制 `requestId` 进行后端日志追溯。 + +--- + +## 6. 数据流设计(Data Flow) + +- **数据获取**:组件挂载 -> 注入 `X-Tenant-Id` -> 调用 SDK -> 更新本地状态。 +- **权限控制**:基于后端返回的 `permissions` 数组,通过 `v-hasPermi` 指令控制按钮显隐。 diff --git a/spec/ai-service-admin/openapi.deps.yaml b/spec/ai-service-admin/openapi.deps.yaml new file mode 100644 index 0000000..a763269 --- /dev/null +++ b/spec/ai-service-admin/openapi.deps.yaml @@ -0,0 +1,592 @@ +openapi: 3.1.0 +info: + title: "AI Service Admin Dependencies" + description: "ai-service-admin 模块依赖的外部 API 契约(Consumer 需求侧)" + version: "0.3.0" + x-contract-level: L1 + +servers: + - url: http://localhost:8000 + description: 本地开发服务器 + +paths: + /admin/embedding/providers: + get: + operationId: listEmbeddingProviders + summary: 获取可用的嵌入模型提供者列表 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回提供者列表 + content: + application/json: + schema: + type: object + properties: + providers: + type: array + items: + $ref: '#/components/schemas/EmbeddingProviderInfo' + + /admin/embedding/config: + get: + operationId: getEmbeddingConfig + summary: 获取当前嵌入模型配置 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回当前配置 + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingConfig' + put: + operationId: updateEmbeddingConfig + summary: 更新嵌入模型配置 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingConfigUpdate' + responses: + '200': + description: 配置更新成功 + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + message: + type: string + + /admin/embedding/test: + post: + operationId: testEmbedding + summary: 测试嵌入模型连接 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: false + content: + application/json: + schema: + type: object + properties: + test_text: + type: string + description: 测试文本(可选) + config: + $ref: '#/components/schemas/EmbeddingConfigUpdate' + responses: + '200': + description: 测试成功 + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingTestResult' + + /admin/embedding/formats: + get: + operationId: getSupportedFormats + summary: 获取支持的文档格式列表 + tags: + - Embedding Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回支持格式列表 + content: + application/json: + schema: + type: object + properties: + formats: + type: array + items: + $ref: '#/components/schemas/DocumentFormat' + + /admin/llm/providers: + get: + operationId: listLLMProviders + summary: 获取可用的 LLM 提供者列表 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回提供者列表 + content: + application/json: + schema: + type: object + properties: + providers: + type: array + items: + $ref: '#/components/schemas/LLMProviderInfo' + + /admin/llm/config: + get: + operationId: getLLMConfig + summary: 获取当前 LLM 配置 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回当前配置 + content: + application/json: + schema: + $ref: '#/components/schemas/LLMConfig' + put: + operationId: updateLLMConfig + summary: 更新 LLM 配置 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/LLMConfigUpdate' + responses: + '200': + description: 配置更新成功 + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + message: + type: string + + /admin/llm/test: + post: + operationId: testLLM + summary: 测试 LLM 连接 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: false + content: + application/json: + schema: + type: object + properties: + test_prompt: + type: string + description: 测试提示词(可选) + example: "你好,请简单介绍一下自己。" + config: + $ref: '#/components/schemas/LLMConfigUpdate' + responses: + '200': + description: 测试成功 + content: + application/json: + schema: + $ref: '#/components/schemas/LLMTestResult' + + /admin/rag/experiments/run: + post: + operationId: runRagExperiment + summary: 运行 RAG 实验(含 AI 输出) + tags: + - RAG Lab + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentRequest' + responses: + '200': + description: 实验完成 + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentResult' + + /admin/rag/experiments/stream: + post: + operationId: runRagExperimentStream + summary: 运行 RAG 实验(流式输出) + tags: + - RAG Lab + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentRequest' + responses: + '200': + description: SSE 流式输出 + content: + text/event-stream: + schema: + type: string + +components: + schemas: + EmbeddingProviderInfo: + type: object + required: + - name + - display_name + - config_schema + properties: + name: + type: string + description: 提供者唯一标识 + example: "ollama" + display_name: + type: string + description: 提供者显示名称 + example: "Ollama 本地模型" + description: + type: string + description: 提供者描述 + example: "使用 Ollama 运行的本地嵌入模型" + config_schema: + type: object + description: 配置参数定义(JSON Schema 格式) + additionalProperties: true + + EmbeddingConfig: + type: object + required: + - provider + - config + properties: + provider: + type: string + description: 当前激活的提供者 + example: "ollama" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + updated_at: + type: string + format: date-time + description: 配置最后更新时间 + + EmbeddingConfigUpdate: + type: object + required: + - provider + properties: + provider: + type: string + description: 提供者标识 + example: "ollama" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + + EmbeddingTestResult: + type: object + required: + - success + - dimension + properties: + success: + type: boolean + description: 测试是否成功 + dimension: + type: integer + description: 返回的向量维度 + example: 768 + latency_ms: + type: number + description: 响应延迟(毫秒) + example: 125.5 + message: + type: string + description: 测试结果消息 + example: "连接成功,向量维度: 768" + error: + type: string + description: 错误信息(失败时) + example: "连接超时" + + DocumentFormat: + type: object + required: + - extension + - name + properties: + extension: + type: string + description: 文件扩展名 + example: ".pdf" + name: + type: string + description: 格式名称 + example: "PDF 文档" + description: + type: string + description: 格式描述 + example: "使用 PyMuPDF 解析 PDF 文档" + + LLMProviderInfo: + type: object + required: + - name + - display_name + - config_schema + properties: + name: + type: string + description: 提供者唯一标识 + example: "openai" + display_name: + type: string + description: 提供者显示名称 + example: "OpenAI" + description: + type: string + description: 提供者描述 + example: "OpenAI GPT 系列模型" + config_schema: + type: object + description: 配置参数定义(JSON Schema 格式) + additionalProperties: true + + LLMConfig: + type: object + required: + - provider + - config + properties: + provider: + type: string + description: 当前激活的提供者 + example: "openai" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + example: + api_key: "sk-xxx" + base_url: "https://api.openai.com/v1" + model: "gpt-4o-mini" + updated_at: + type: string + format: date-time + description: 配置最后更新时间 + + LLMConfigUpdate: + type: object + required: + - provider + properties: + provider: + type: string + description: 提供者标识 + example: "openai" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + + LLMTestResult: + type: object + required: + - success + properties: + success: + type: boolean + description: 测试是否成功 + response: + type: string + description: LLM 响应内容 + example: "你好!我是一个 AI 助手..." + latency_ms: + type: number + description: 响应延迟(毫秒) + example: 1250.5 + prompt_tokens: + type: integer + description: 输入 Token 数 + example: 15 + completion_tokens: + type: integer + description: 输出 Token 数 + example: 50 + total_tokens: + type: integer + description: 总 Token 数 + example: 65 + message: + type: string + description: 测试结果消息 + example: "连接成功" + error: + type: string + description: 错误信息(失败时) + example: "API Key 无效" + + RagExperimentRequest: + type: object + required: + - query + properties: + query: + type: string + description: 查询文本 + example: "什么是 RAG?" + kb_ids: + type: array + items: + type: string + description: 知识库 ID 列表 + top_k: + type: integer + description: 检索数量 + default: 5 + score_threshold: + type: number + description: 相似度阈值 + default: 0.5 + llm_provider: + type: string + description: 指定 LLM 提供者(可选) + example: "openai" + generate_response: + type: boolean + description: 是否生成 AI 回复 + default: true + + RagExperimentResult: + type: object + properties: + query: + type: string + description: 原始查询 + retrieval_results: + type: array + items: + $ref: '#/components/schemas/RetrievalResult' + final_prompt: + type: string + description: 最终拼接的 Prompt + ai_response: + $ref: '#/components/schemas/AIResponse' + total_latency_ms: + type: number + description: 总耗时(毫秒) + + RetrievalResult: + type: object + properties: + content: + type: string + description: 检索到的内容 + score: + type: number + description: 相似度分数 + source: + type: string + description: 来源文档 + metadata: + type: object + additionalProperties: true + description: 元数据 + + AIResponse: + type: object + properties: + content: + type: string + description: AI 回复内容 + prompt_tokens: + type: integer + description: 输入 Token 数 + completion_tokens: + type: integer + description: 输出 Token 数 + total_tokens: + type: integer + description: 总 Token 数 + latency_ms: + type: number + description: 生成耗时(毫秒) + model: + type: string + description: 使用的模型 diff --git a/spec/ai-service-admin/requirements.md b/spec/ai-service-admin/requirements.md new file mode 100644 index 0000000..25df27a --- /dev/null +++ b/spec/ai-service-admin/requirements.md @@ -0,0 +1,161 @@ +--- +feature_id: "ASA" +title: "AI 中台管理界面(ai-service-admin)需求规范" +status: "draft" +version: "0.3.0" +owners: + - "product" + - "frontend" + - "backend" +last_updated: "2026-02-24" +source: + type: "conversation" + ref: "Scoping Result Confirmed" +--- + +# AI 中台管理界面(ASA) + +## 1. 背景与目标 +- **背景**:随着 AI 中台(ai-service)功能的增强,需要一套专业的管理后台来支持知识库运维、Prompt 工程迭代及 RAG 效果调优。 +- **目标**:提供租户维度的 AI 资产管理能力,实现 RAG 链路的可视化调试与全链路监控。 +- **非目标**:不包含推理引擎实现,不包含面向 C 端的交互 UI。 + +## 2. 模块边界(Scope) +- **覆盖**:知识库空间与文档管理、索引任务监控、租户级 Prompt 模板版本管理、RAG 实验对比、全局会话审计。 +- **不覆盖**:底层向量数据库维护、模型微调训练。 + +## 3. 依赖盘点(Dependencies) +- **依赖模块**: + - `ai-service`:提供所有的管理类 RESTful API(见 `openapi.admin.yaml`)。 + - `RuoYi-Vue-Plus` (或类似基座):提供用户认证、权限校验及菜单框架。 + +## 4. 用户故事(User Stories) + +### 4.1 知识库管理 +- [US-ASA-01] 作为租户管理员,我希望能够上传不同格式的文档到指定的知识空间,以便为特定的 AI 场景提供上下文。 +- [US-ASA-02] 作为运维人员,我希望实时查看索引构建任务的进度和错误原因,以便及时处理构建失败的情况。 + +### 4.2 Prompt 工程 +- [US-ASA-03] 作为 Prompt 工程师,我希望对不同场景的 Prompt 进行版本化管理,以便在效果下降时能够快速回滚。 + +### 4.3 RAG 效果实验室 +- [US-ASA-04] 作为 AI 开发者,我希望在后台直接输入问题并查看检索到的文档分片和原始上下文,以便定位召回不准确的问题。 + +### 4.4 监控审计 +- [US-ASA-05] 作为安全合规人员,我希望审计所有租户的会话记录,并查看单次回答的耗时与资源消耗,以便进行成本核算与安全管控。 + +## 5. 验收标准(Acceptance Criteria, EARS) + +### 知识库管理(KB) +- [AC-ASA-01] WHEN 提交文档上传 THEN 系统 SHALL 异步启动索引任务,并返回任务 ID。 +- [AC-ASA-02] WHEN 索引任务失败 THEN 系统 SHALL 在管理界面高亮显示,并提供“详细错误”查询入口。 + +### Prompt 管理(Prompt) +- [AC-ASA-03] WHEN 发布新版 Prompt THEN 系统 SHALL 自动将旧版标记为“历史版本”,且同一时间只有一个“已发布”版本。 +- [AC-ASA-04] WHEN 编辑 Prompt 时 THEN 系统 SHALL 提供内置变量提示(如 `{{context}}`, `{{query}}`)。 + +### RAG 实验室(RAG Lab) +- [AC-ASA-05] WHEN 运行 RAG 实验 THEN 系统 SHALL 展示 Top-K 检索片段、得分、来源文档及最终生成的提示词。 +- [AC-ASA-06] WHEN 多版本对比时 THEN 系统 SHALL 支持在同一屏幕展示不同配置下的召回差异。 + +### 会话监控(Audit) +- [AC-ASA-07] WHEN 查看会话详情 THEN 系统 SHALL 展示完整的消息链路,包括中间的工具调用(Tool Calls)和检索命中记录。 + +## 6. 追踪映射(Traceability) + +| AC ID | Endpoint | 方法 | 备注 | +|------|----------|------|-----| +| AC-ASA-01 | /admin/kb/documents | POST | 上传文档并创建任务 | +| AC-ASA-02 | /admin/kb/index/jobs/{jobId} | GET | 查询任务详情与错误 | +| AC-ASA-03 | /admin/config/prompt-templates/{tplId}/publish | POST | 发布指定版本 | +| AC-ASA-05 | /admin/rag/experiments/run | POST | 触发调试实验 | +| AC-ASA-07 | /admin/sessions/{sessionId} | GET | 获取全链路详情 | + +--- + +## 7. 迭代需求:嵌入模型管理(v0.2.0) + +> 说明:本节为 v0.2.0 迭代新增,用于支持嵌入模型的界面配置与管理。 + +### 7.1 嵌入模型配置管理 + +- [AC-ASA-08] WHEN 用户访问嵌入模型配置页面 THEN 系统 SHALL 展示当前激活的嵌入模型提供者及其配置参数。 + +- [AC-ASA-09] WHEN 用户切换嵌入模型提供者 THEN 系统 SHALL 动态展示该提供者的配置参数表单,并保留当前配置值。 + +- [AC-ASA-10] WHEN 用户修改嵌入模型配置并保存 THEN 系统 SHALL 验证配置有效性,更新配置并提示操作结果。 + +- [AC-ASA-11] WHEN 用户点击"测试连接"按钮 THEN 系统 SHALL 调用嵌入模型生成测试向量,展示连接状态、向量维度和响应延迟。 + +- [AC-ASA-12] WHEN 嵌入模型连接测试失败 THEN 系统 SHALL 展示详细错误信息,帮助用户排查配置问题。 + +### 7.2 文档格式支持展示 + +- [AC-ASA-13] WHEN 用户查看嵌入模型配置页面 THEN 系统 SHALL 展示当前支持的文档格式列表(PDF、Word、Excel、TXT 等)。 + +### 7.3 用户故事(迭代追加) + +- [US-ASA-06] 作为系统管理员,我希望在界面上配置和切换嵌入模型,以便快速适配不同的业务场景而无需修改代码。 + +- [US-ASA-07] 作为系统管理员,我希望在保存配置前测试嵌入模型连接,以便确保配置正确后再正式启用。 + +### 7.4 追踪映射(迭代追加) + +| AC ID | Endpoint | 方法 | 备注 | +|------|----------|------|-----| +| AC-ASA-08 | /admin/embedding/config | GET | 获取当前配置 | +| AC-ASA-09 | /admin/embedding/providers | GET | 获取提供者列表及配置定义 | +| AC-ASA-10 | /admin/embedding/config | PUT | 更新配置 | +| AC-ASA-11 | /admin/embedding/test | POST | 测试连接 | +| AC-ASA-12 | /admin/embedding/test | POST | 测试失败错误展示 | +| AC-ASA-13 | /admin/embedding/formats | GET | 获取支持格式 | + +--- + +## 8. 迭代需求:LLM 模型配置与 RAG 调试输出(v0.3.0) + +> 说明:本节为 v0.3.0 迭代新增,用于支持 LLM 模型的界面配置及 RAG 实验室的 AI 输出调试。 + +### 8.1 LLM 模型配置管理 + +- [AC-ASA-14] WHEN 用户访问 LLM 模型配置页面 THEN 系统 SHALL 展示当前激活的 LLM 提供者及其配置参数(API Key、Base URL、模型名称等)。 + +- [AC-ASA-15] WHEN 用户切换 LLM 提供者 THEN 系统 SHALL 动态展示该提供者的配置参数表单,并保留当前配置值。 + +- [AC-ASA-16] WHEN 用户修改 LLM 模型配置并保存 THEN 系统 SHALL 验证配置有效性,更新配置并提示操作结果。 + +- [AC-ASA-17] WHEN 用户点击"测试连接"按钮 THEN 系统 SHALL 调用 LLM 生成测试回复,展示连接状态、模型响应和耗时。 + +- [AC-ASA-18] WHEN LLM 连接测试失败 THEN 系统 SHALL 展示详细错误信息,帮助用户排查配置问题。 + +### 8.2 RAG 实验室 AI 输出展示 + +- [AC-ASA-19] WHEN 用户运行 RAG 实验后 THEN 系统 SHALL 在结果区域新增"AI 回复"展示区,显示基于检索结果生成的 AI 最终输出。 + +- [AC-ASA-20] WHEN AI 回复生成中 THEN 系统 SHALL 展示 Loading 状态,支持流式输出展示(SSE)。 + +- [AC-ASA-21] WHEN AI 回复生成完成 THEN 系统 SHALL 展示完整的回复内容、Token 消耗统计、响应耗时。 + +- [AC-ASA-22] WHEN 用户选择不同的 LLM 配置 THEN 系统 SHALL 使用选定的 LLM 模型生成回复,便于对比不同模型效果。 + +### 8.3 用户故事(迭代追加) + +- [US-ASA-08] 作为系统管理员,我希望在界面上配置和切换不同的 LLM 提供者(如 OpenAI、Ollama、Azure 等),以便快速适配不同的业务场景。 + +- [US-ASA-09] 作为 AI 开发者,我希望在 RAG 实验室中看到 AI 的最终输出,以便完整调试 RAG 链路效果,而不仅仅是检索结果。 + +- [US-ASA-10] 作为 Prompt 工程师,我希望对比不同 LLM 模型在相同检索结果下的回复效果,以便选择最适合业务场景的模型。 + +### 8.4 追踪映射(迭代追加) + +| AC ID | Endpoint | 方法 | 备注 | +|------|----------|------|-----| +| AC-ASA-14 | /admin/llm/config | GET | 获取当前 LLM 配置 | +| AC-ASA-15 | /admin/llm/providers | GET | 获取 LLM 提供者列表 | +| AC-ASA-16 | /admin/llm/config | PUT | 更新 LLM 配置 | +| AC-ASA-17 | /admin/llm/test | POST | 测试 LLM 连接 | +| AC-ASA-18 | /admin/llm/test | POST | LLM 测试失败错误展示 | +| AC-ASA-19 | /admin/rag/experiments/run | POST | RAG 实验增加 AI 输出 | +| AC-ASA-20 | /admin/rag/experiments/stream | POST | RAG 实验流式输出(SSE) | +| AC-ASA-21 | /admin/rag/experiments/run | POST | Token 统计与耗时 | +| AC-ASA-22 | /admin/rag/experiments/run | POST | 支持指定 LLM 配置 | diff --git a/spec/ai-service-admin/scope.md b/spec/ai-service-admin/scope.md new file mode 100644 index 0000000..277b976 --- /dev/null +++ b/spec/ai-service-admin/scope.md @@ -0,0 +1,171 @@ +--- +module: ai-service-admin +title: "AI 中台管理界面(ai-service-admin)— Scoping(定界)结果" +status: draft +version: 0.1.0 +owners: + - product + - frontend + - backend +last_updated: 2026-02-24 +methodology: + - spec-driven + - api-first +--- + +# AI 中台管理界面(ai-service-admin)— Scoping(定界)结果 + +> 本文档为“规范驱动 + 接口先行”的第 0 阶段产出:模块边界澄清 + 依赖接口盘点(Consumer-First)。 +> +> 输出完成后**停止**,等待确认;在确认前不进入 requirements/OpenAPI/Design/Tasks 的正式编写。 + +## 1) 模块职责(Module Responsibilities) + +### 1.1 模块定位 +`ai-service-admin` 是一个**独立前端管理模块**,面向运营/管理员/租户管理员,用于管理 AI 中台的知识库资产、租户级配置、RAG 调试与全局会话观测。 + +### 1.2 包含(In Scope) +必须覆盖以下能力(以管理与观测为主): + +1. **知识库上传/索引管理** + - 数据源上传(文件/文本/URL 等,具体形态由后续 requirements 细化) + - 数据源状态查看、重试、删除/下线 + - 索引构建(触发/进度/失败原因)与文档分片/向量化相关的可观测信息(仅展示,不实现算法) + - 基础检索配置(如 topK、召回阈值、chunk 策略等):以“配置项编辑 + 生效范围”呈现 + +2. **租户级 Prompt 模板配置** + - 租户维度的 Prompt 模板 CRUD、版本管理(草案/发布)、回滚 + - 模板与使用场景(如:RAG 问答、工具调用、摘要等)的绑定(场景枚举由后端定义) + - 变量占位符与校验提示(前端校验为体验增强,最终以后端校验为准) + +3. **RAG 检索效果实验室(调试窗)** + - 输入问题 → 触发“检索/重排/上下文构造”调试链路 + - 展示:召回文档列表(含得分/来源/片段)、重排结果、最终上下文、以及关键日志/耗时 + - 支持对比实验(如不同检索参数、不同 prompt 版本) + - 支持保存/复用实验配置与结果快照(用于回归对比) + +4. **全局会话监控** + - 多租户维度的会话列表、筛选(时间、租户、用户标识、会话状态、模型/渠道等) + - 查看单次会话详情:消息流、检索命中、工具调用、错误栈/错误码、耗时 + - 基础统计:会话量、失败率、平均耗时、Top 知识库/Top Prompt 版本使用情况(指标范围后续细化) + +### 1.3 不包含(Out of Scope) +明确不在本模块实现范围内: + +- **具体 AI 推理逻辑**(由 `ai-service` 负责),包括但不限于:LLM 调用、embedding 计算、rerank 算法、提示词编排执行引擎本体。 +- **用户侧聊天界面**(由 `ai-robot` 负责),即面向终端用户的对话交互 UI。 +- **底层存储/索引实现**(向量库、全文检索、对象存储等)与运维部署策略。 + +### 1.4 边界接口原则 +- `ai-service-admin` 只作为 **Consumer(调用方)** 消费 Python 后端(`ai-service`)提供的“管理类/观测类 API”。 +- 所有“调试/实验”动作也通过管理 API 触发,前端不直接连接向量库或模型。 + +--- + +## 2) 技术栈建议(Tech Stack Recommendation) + +### 2.1 选型建议 +优先推荐:**Vue 3 + Element Plus**(与 RuoYi-Vue 生态对齐)。 +备选:React(若团队已有成熟 React 组件体系与工程基座)。 + +### 2.2 选择理由(面向本模块) +- RuoYi-Vue 管理后台形态与 Element Plus 组件库天然匹配(表格/表单/弹窗/权限路由)。 +- 本模块以“配置管理 + 列表筛选 + 详情查看 + 调试面板”交互为主,Element Plus 现成组件覆盖度高。 + +### 2.3 工程与基础能力(建议纳入后续 design) +- 多租户与权限:建议前端采用**路由级权限 + 按钮级权限**(能力来源于后端返回的权限集)。 +- 国际化:可选(若需要多语言运营)。 +- 可观测性:前端埋点/日志仅做体验与错误上报;业务日志以服务端为准。 + +--- + +## 3) 依赖接口清单(Consumer-First,管理类接口草案) + +> 说明:以下为 `ai-service-admin` 作为调用方所需的**最小管理接口能力清单**(草案)。 +> - 路径以 `/admin/*` 为主。 +> - 最终将以 `ai-service` 新增的 `openapi.admin.yaml` 固化。 +> - 这里先列“能力/端点草案”,不写具体 schema,以便先对齐边界。 + +### 3.1 认证与通用能力 +- `GET /admin/me`:获取当前登录信息(含租户、角色、权限点) +- `GET /admin/tenants`:租户列表(平台管理员) +- `GET /admin/enums`:获取前端需要的枚举/常量(场景枚举、状态枚举、错误码映射等) + +### 3.2 知识库(KB)管理:`/admin/kb/*` +- `GET /admin/kb/spaces`:知识空间/知识库列表(按租户) +- `POST /admin/kb/spaces`:创建知识空间/知识库 +- `GET /admin/kb/spaces/{kbId}`:知识库详情(含统计/配置摘要) +- `PATCH /admin/kb/spaces/{kbId}`:更新知识库元信息/配置 +- `DELETE /admin/kb/spaces/{kbId}`:删除/下线知识库 + +- `GET /admin/kb/documents`:文档列表(支持 kbId、状态、时间、来源筛选) +- `POST /admin/kb/documents`:上传/导入文档(multipart 或任务式导入) +- `GET /admin/kb/documents/{docId}`:文档详情(含分片/索引状态) +- `DELETE /admin/kb/documents/{docId}`:删除文档 + +- `POST /admin/kb/index/jobs`:触发索引构建/重建(kbId/docId 维度) +- `GET /admin/kb/index/jobs`:索引任务列表(状态/时间筛选) +- `GET /admin/kb/index/jobs/{jobId}`:索引任务详情(进度、失败原因、日志摘要) +- `POST /admin/kb/index/jobs/{jobId}/retry`:失败任务重试 +- `POST /admin/kb/index/jobs/{jobId}/cancel`:取消任务(若支持) + +- `GET /admin/kb/search/config`:读取检索参数默认配置(租户级/KB 级) +- `PUT /admin/kb/search/config`:更新检索参数默认配置 + +### 3.3 Prompt 模板(租户级)配置:`/admin/config/*` +- `GET /admin/config/prompt-templates`:模板列表(支持场景筛选) +- `POST /admin/config/prompt-templates`:创建模板(草案) +- `GET /admin/config/prompt-templates/{tplId}`:模板详情(含版本历史) +- `PATCH /admin/config/prompt-templates/{tplId}`:更新模板(草案编辑) +- `POST /admin/config/prompt-templates/{tplId}/publish`:发布某版本 +- `POST /admin/config/prompt-templates/{tplId}/rollback`:回滚到指定版本 +- `DELETE /admin/config/prompt-templates/{tplId}`:删除模板 + +- `GET /admin/config/prompt-variables`:可用变量/内置函数清单(用于编辑器提示) + +### 3.4 RAG 检索效果实验室(调试):`/admin/rag/*` +- `POST /admin/rag/experiments/run`:运行一次 RAG 调试实验(输入 query + 参数集 + 可选 kbId/promptVersion) +- `GET /admin/rag/experiments`:实验记录列表(按租户/操作者/时间) +- `GET /admin/rag/experiments/{expId}`:实验详情(召回、重排、上下文、日志、耗时) +- `POST /admin/rag/experiments/{expId}/clone`:复制实验为新草案 +- `DELETE /admin/rag/experiments/{expId}`:删除实验记录 + +- `GET /admin/rag/diagnostics/samples`:获取预置样例(用于快速回归) + +### 3.5 全局会话监控:`/admin/sessions/*` +- `GET /admin/sessions`:会话列表(多维筛选) +- `GET /admin/sessions/{sessionId}`:会话详情(消息流、检索命中、工具调用、错误、耗时) + +- `GET /admin/sessions/stats/overview`:概览统计(时间范围 + 租户维度) +- `GET /admin/sessions/stats/top`:Top 指标(Top KB / Top Prompt / Top 错误码等) + +### 3.6 审计与运维(可选但常见) +- `GET /admin/audit/logs`:管理操作审计日志 +- `GET /admin/system/health`:服务健康/版本信息(用于后台页脚或诊断) + +### 3.7 统一错误模型(约定) +- 所有 `/admin/*` 接口建议返回统一错误结构(如 `code`, `message`, `requestId`, `details[]`),以支持后台调试与问题定位。 + +--- + +## 4) 产出物计划(Artifacts Plan) + +> 按方法论,本模块后续应产出 4 类核心工件;本次 Scoping 仅做计划,不生成内容(等待确认)。 + +1. `spec/ai-service-admin/requirements.md` + - 管理后台的用户故事与验收标准(EARS) + - Scope/Dependencies/Traceability + +2. `spec/ai-service/openapi.admin.yaml`(在 `ai-service` 下新增) + - 作为 `ai-service` 的 **admin provider** 契约(面向本后台) + - 标记 `info.x-contract-level`,并对关键 operationId 提供 L1/L2 所需字段 + +3. `spec/ai-service-admin/design.md` + - 前端信息架构(IA):菜单/页面/路由/权限点 + - 状态管理、缓存策略、分页/筛选模式 + - 调试实验室的交互与可观测性设计 + - 错误处理与追踪(requestId) + +4. `spec/ai-service-admin/tasks.md` + - 按页面/能力拆分的原子任务(含与 AC 的映射) + - 并行策略:基于 admin OpenAPI 生成 Mock/SDK diff --git a/spec/ai-service-admin/tasks.md b/spec/ai-service-admin/tasks.md new file mode 100644 index 0000000..0bdd6e4 --- /dev/null +++ b/spec/ai-service-admin/tasks.md @@ -0,0 +1,245 @@ +--- +module: ai-service-admin +title: "AI 中台管理界面(ai-service-admin)任务清单" +status: "completed" +version: "0.4.0" +owners: + - "frontend" + - "backend" +last_updated: "2026-02-25" +principles: + - atomic + - page-oriented +--- + +# tasks.md(ASA) + +> 原则: +> - **原子性**:每个任务应在 0.5 ~ 1.5 天内完成,且可独立提交与回滚。 +> - **页面导向**:以页面/路由为骨架拆分,组件/能力封装作为页面任务的前置或并行支撑。 +> - **可追溯**:每个任务必须标注关联的验收标准(AC-ASA-*)。 + +--- + +## Phase 1: 基础建设(Foundation) + +- [ ] (P1-01) 初始化 `ai-service-admin` 前端工程(Vue 3 + Element Plus + RuoYi-Vue 基座对齐),落地基础目录结构与路由骨架。 + - AC: [AC-ASA-01] + +- [ ] (P1-02) 接入 Pinia:实现 `tenant` store(`currentTenantId`)并持久化(localStorage),提供切换租户能力(最小 UI/逻辑)。 + - AC: [AC-ASA-01] + +- [ ] (P1-03) Axios/SDK 请求层封装:创建统一 `request` 实例,自动注入必填 Header `X-Tenant-Id`(从 Pinia 读取)。 + - AC: [AC-ASA-01] + +- [ ] (P1-04) 全局异常拦截:实现 401/403 响应拦截策略(401 跳转登录/清理凭证;403 统一提示 + 页面级占位)。 + - AC: [AC-ASA-01] + +- [ ] (P1-05) 基础组件封装:`BaseTable`(分页/筛选/加载态/空态)、`BaseForm`(校验/提交态/错误提示)并给出示例页。 + - AC: [AC-ASA-01] + +--- + +## Phase 2: 知识库管理(列表页/上传/任务轮询) + +> 页面导向:知识库文档列表页 + 上传入口 + 索引任务状态观测。 + +- [ ] (P2-01) 知识库文档列表页:实现列表展示与多条件筛选(kbId、状态、时间、来源),对接 `/admin/kb/documents`(若后端未提供 GET,则先 Mock)。 + - AC: [AC-ASA-01] + +- [ ] (P2-02) 上传组件:封装 `KbDocumentUpload`(基于 `el-upload`),支持多文件上传、上传中队列展示、失败重试提示。 + - AC: [AC-ASA-01] + +- [ ] (P2-03) 上传后任务回显:上传成功后从响应中提取 `jobId`,在列表或详情抽屉中展示任务卡片(状态/进度)。 + - AC: [AC-ASA-01] + +- [ ] (P2-04) 任务状态轮询:实现 `useJobPolling(jobId)` composable(3s 轮询 `/admin/kb/index/jobs/{jobId}`;完成/失败自动停止;切页/关闭抽屉自动取消)。 + - AC: [AC-ASA-01, AC-ASA-02] + +- [ ] (P2-05) 失败任务错误详情:实现错误展示弹窗/抽屉(展示 `errorMsg` + requestId),并在列表行高亮失败状态。 + - AC: [AC-ASA-02] + +--- + +## Phase 3: RAG 实验室(双栏对比/调试联调) + +> 页面导向:RAG 实验室主页面,突出“双栏对比视图”。 + +- [ ] (P3-01) RAG 实验室页面骨架:左侧参数面板(query、kbIds、检索参数、prompt 版本选择),右侧双栏结果区占位。 + - AC: [AC-ASA-05] + +- [ ] (P3-02) RAG 专用组件:实现 `RagRetrievalList`(召回片段列表:score/来源/片段)、`FinalPromptViewer`(最终 Prompt 只读展示,支持复制)。 + - AC: [AC-ASA-05] + +- [ ] (P3-03) 调试 API 联调:对接 `/admin/rag/experiments/run`,将返回的 `retrievalResults` 与 `finalPrompt` 绑定到双栏视图;处理 Loading/错误态。 + - AC: [AC-ASA-05] + +- [ ] (P3-04) 双配置对比模式(MVP):支持保留“上一次运行结果”作为对照(或同时运行两组参数,视后端能力),并在 UI 中标注差异。 + - AC: [AC-ASA-06] + +--- + +## Phase 4: 会话监控与详情(列表筛选/全链路详情) + +> 页面导向:会话列表页 + 全链路详情弹窗。 + +- [ ] (P4-01) 会话监控列表页:实现多字段过滤(tenantId、sessionId、时间范围、状态/是否错误等)与分页,对接 `/admin/sessions`(若后端未提供,则先 Mock)。 + - AC: [AC-ASA-07] + +- [ ] (P4-02) 会话详情弹窗:点击列表行打开弹窗,对接 `/admin/sessions/{sessionId}`,展示消息流(Timeline/气泡流)与基础元信息。 + - AC: [AC-ASA-07] + +- [ ] (P4-03) Trace 展开视图:在详情中实现“检索命中/工具调用/错误信息”折叠面板,支持按消息节点展开查看。 + - AC: [AC-ASA-07] + +--- + +## 待澄清(Open Questions) + +- [x] (Q-01) ✅ 已确认:后端将补齐 `GET /admin/kb/documents` 列表接口。 + - 影响 AC: [AC-ASA-01] + +- [x] (Q-02) ✅ 已确认:后端将补齐 `GET /admin/sessions` 列表接口。 + - 影响 AC: [AC-ASA-07] + +- [x] (Q-03) ✅ 已确认:对比模式采用“前端串行调用两次 `/admin/rag/experiments/run` 接口”的方式实现。 + - 影响 AC: [AC-ASA-06] + +## Mock 支撑(在 Python 后端未完成前) + +结论:**需要 Mock 支撑**,以便前端在后端未完成时并行开发页面与交互。 + +建议优先 Mock 覆盖(最小闭环): +- `/admin/kb/documents`(GET 列表,分页 + 筛选) +- `/admin/kb/documents`(POST 上传,返回 `jobId`) +- `/admin/kb/index/jobs/{jobId}`(GET 任务状态流转:pending -> processing -> completed/failed) +- `/admin/rag/experiments/run`(POST 实验结果:retrievalResults + finalPrompt) +- `/admin/sessions`(GET 列表,分页 + 筛选) +- `/admin/sessions/{sessionId}`(GET 详情:messages + trace) + +--- + +## Phase 5: 嵌入模型管理(配置页面/测试连接) + +> 页面导向:嵌入模型配置页面,支持提供者切换、参数配置、连接测试。 + +- [x] (P5-01) API 服务层与类型定义:创建 src/api/embedding.ts 和 src/types/embedding.ts + - AC: [AC-ASA-08, AC-ASA-09] + +- [x] (P5-02) 提供者选择组件:实现 `EmbeddingProviderSelect` 下拉组件,对接 `/admin/embedding/providers` + - AC: [AC-ASA-09] + +- [x] (P5-03) 动态配置表单:根据 `config_schema` 动态渲染配置表单,实现表单校验 + - AC: [AC-ASA-09, AC-ASA-10] + +- [x] (P5-04) 测试连接组件:实现 `EmbeddingTestPanel`,展示测试结果和错误信息 + - AC: [AC-ASA-11, AC-ASA-12] + +- [x] (P5-05) 支持格式组件:实现 `SupportedFormats`,展示支持的文档格式列表 + - AC: [AC-ASA-13] + +- [x] (P5-06) 页面骨架与路由:创建 `/admin/embedding` 页面,布局包含各功能区 + - AC: [AC-ASA-08] + +- [x] (P5-07) 配置加载与保存:实现配置加载、保存逻辑 + - AC: [AC-ASA-08, AC-ASA-10] + +- [x] (P5-08) 组件整合与测试:整合所有组件完成功能闭环 + - AC: [AC-ASA-08~AC-ASA-13] + +--- + +## Phase 5 任务进度追踪 + +| 任务 | 描述 | 状态 | +|------|------|------| +| P5-01 | API 服务层与类型定义 | ✅ 已完成 | +| P5-02 | 提供者选择组件 | ✅ 已完成 | +| P5-03 | 动态配置表单 | ✅ 已完成 | +| P5-04 | 测试连接组件 | ✅ 已完成 | +| P5-05 | 支持格式组件 | ✅ 已完成 | +| P5-06 | 页面骨架与路由 | ✅ 已完成 | +| P5-07 | 配置加载与保存 | ✅ 已完成 | +| P5-08 | 组件整合与测试 | ✅ 已完成 | + +--- + +## Phase 6: LLM 模型配置与 RAG 调试输出(v0.3.0) + +> 页面导向:LLM 模型配置页面 + RAG 实验室 AI 输出增强。 + +### 6.1 LLM 模型配置 + +- [x] (P6-01) LLM API 服务层与类型定义:创建 src/api/llm.ts 和 src/types/llm.ts + - AC: [AC-ASA-14, AC-ASA-15] + +- [x] (P6-02) LLM 提供者选择组件:实现 `LLMProviderSelect` 下拉组件 + - AC: [AC-ASA-15] + +- [x] (P6-03) LLM 动态配置表单:根据 `config_schema` 动态渲染配置表单 + - AC: [AC-ASA-15, AC-ASA-16] + +- [x] (P6-04) LLM 测试连接组件:实现 `LLMTestPanel`,展示测试回复和耗时 + - AC: [AC-ASA-17, AC-ASA-18] + +- [x] (P6-05) LLM 配置页面:创建 `/admin/llm` 页面,整合所有组件 + - AC: [AC-ASA-14, AC-ASA-16] + +### 6.2 RAG 实验室 AI 输出增强 + +- [x] (P6-06) AI 回复展示组件:实现 `AIResponseViewer`,展示 AI 最终输出 + - AC: [AC-ASA-19] + +- [x] (P6-07) 流式输出支持:实现 SSE 流式输出展示,支持实时显示 AI 回复 + - AC: [AC-ASA-20] + +- [x] (P6-08) Token 统计展示:展示 Token 消耗、响应耗时等统计信息 + - AC: [AC-ASA-21] + +- [x] (P6-09) LLM 选择器:在 RAG 实验室中添加 LLM 配置选择器 + - AC: [AC-ASA-22] + +- [x] (P6-10) RAG 实验室整合:将 AI 输出组件整合到 RAG 实验室页面 + - AC: [AC-ASA-19~AC-ASA-22] + +--- + +## Phase 6 任务进度追踪 + +| 任务 | 描述 | 状态 | +|------|------|------| +| P6-01 | LLM API 服务层与类型定义 | ✅ 已完成 | +| P6-02 | LLM 提供者选择组件 | ✅ 已完成 | +| P6-03 | LLM 动态配置表单 | ✅ 已完成 | +| P6-04 | LLM 测试连接组件 | ✅ 已完成 | +| P6-05 | LLM 配置页面 | ✅ 已完成 | +| P6-06 | AI 回复展示组件 | ✅ 已完成 | +| P6-07 | 流式输出支持 | ✅ 已完成 | +| P6-08 | Token 统计展示 | ✅ 已完成 | +| P6-09 | LLM 选择器 | ✅ 已完成 | +| P6-10 | RAG 实验室整合 | ✅ 已完成 | + +--- + +## Phase 7: 租户管理(v0.4.0) + +> 页面导向:租户选择器与租户管理功能。 + +- [x] (P7-01) 租户 API 服务层:创建 src/api/tenant.ts 和 src/types/tenant.ts + - AC: [AC-ASA-01] + +- [x] (P7-02) 租户选择器组件:实现 `TenantSelector` 下拉组件,支持租户切换 + - AC: [AC-ASA-01] + +- [x] (P7-03) 租户持久化:租户选择持久化到 localStorage + - AC: [AC-ASA-01] + +--- + +## Phase 7 任务进度追踪 + +| 任务 | 描述 | 状态 | +|------|------|------| +| P7-01 | 租户 API 服务层 | ✅ 已完成 | +| P7-02 | 租户选择器组件 | ✅ 已完成 | +| P7-03 | 租户持久化 | ✅ 已完成 | diff --git a/spec/ai-service/design.md b/spec/ai-service/design.md new file mode 100644 index 0000000..f326773 --- /dev/null +++ b/spec/ai-service/design.md @@ -0,0 +1,316 @@ +--- +feature_id: "AISVC" +title: "Python AI 中台(ai-service)技术设计" +status: "draft" +version: "0.1.0" +last_updated: "2026-02-24" +inputs: + - "spec/ai-service/requirements.md" + - "spec/ai-service/openapi.provider.yaml" + - "java/openapi.deps.yaml" +--- + +# Python AI 中台(ai-service)技术设计(AISVC) + +## 1. 设计目标与约束 + +### 1.1 设计目标 +- 落地 `POST /ai/chat` 的 **non-streaming JSON** 与 **SSE streaming** 两种返回模式,并确保与契约一致: + - non-streaming 响应字段必须包含 `reply/confidence/shouldTransfer`。 + - streaming 通过 `Accept: text/event-stream` 输出 `message/final/error` 事件序列。 +- 实现 AI 侧会话记忆:基于 `(tenantId, sessionId)` 持久化与加载。 +- 实现 RAG(MVP:向量检索)并预留图谱检索(Neo4j)插件点。 +- 多租户隔离: + - Qdrant:一租户一 collection(或一租户一 collection 前缀)。 + - PostgreSQL:按 `tenant_id` 分区/索引,保证跨租户不可见。 + +### 1.2 硬约束(来自契约与需求) +- API 对齐:`/ai/chat`、`/ai/health` 的路径/方法/状态码与 Java 侧 deps 对齐。 +- 多租户:请求必须携带 `X-Tenant-Id`(网关/拦截器易处理),所有数据访问必须以 `tenant_id` 过滤。 +- SSE:事件类型固定为 `message/final/error`,并保证顺序与异常语义清晰。 + +--- + +## 2. 总体架构与模块分层 + +### 2.1 分层概览 +本服务按“职责单一 + 可插拔”的原则分为五层: + +1) **API 层(Transport / Controller)** +- 职责: + - HTTP 请求解析、参数校验(含 `X-Tenant-Id`)、鉴权/限流(如后续需要)。 + - 根据 `Accept` 头选择 non-streaming 或 SSE streaming。 + - 统一错误映射为 `ErrorResponse`。 +- 输入:`X-Tenant-Id` header + `ChatRequest` body。 +- 输出: + - JSON:`ChatResponse` + - SSE:`message/final/error` 事件流。 + +2) **编排层(Orchestrator / Use Case)** +- 职责: + - 整体流程编排:加载会话记忆 → 合并上下文 →(可选)RAG 检索 → 组装 prompt → 调用 LLM → 计算置信度与转人工建议 → 写回记忆。 + - 在 streaming 模式下,将 LLM 的增量输出转为 SSE `message` 事件,同时维护最终 `reply`。 +- 输入:`tenantId, sessionId, currentMessage, channelType, history?, metadata?` +- 输出: + - non-streaming:一次性 `ChatResponse` + - streaming:增量 token(或片段)流 + 最终 `ChatResponse`。 + +3) **记忆层(Memory)** +- 职责: + - 持久化会话消息与摘要/记忆(最小:消息列表)。 + - 提供按 `(tenantId, sessionId)` 查询的会话上下文读取 API。 +- 存储:PostgreSQL。 + +4) **检索层(Retrieval)** +- 职责: + - 提供统一 `Retriever` 抽象接口。 + - MVP 实现:向量检索(Qdrant)。 + - 插件点:图谱检索(Neo4j)实现可新增而不改动 Orchestrator。 + +5) **LLM 适配层(LLM Adapter)** +- 职责: + - 屏蔽不同 LLM 提供方差异(请求格式、流式回调、重试策略)。 + - 提供:一次性生成接口 + 流式生成接口(yield token/delta)。 + +### 2.2 关键数据流(文字版) +- API 层接收请求 → 提取 `tenantId`(Header)与 body → 调用 Orchestrator。 +- Orchestrator: + 1) Memory.load(tenantId, sessionId) + 2) merge_context(local_history, external_history) + 3) Retrieval.retrieve(query, tenantId, channelType, metadata)(MVP 向量检索) + 4) build_prompt(merged_history, retrieved_docs, currentMessage) + 5) LLM.generate(...)(non-streaming)或 LLM.stream_generate(...)(streaming) + 6) compute_confidence(…) + 7) Memory.append(tenantId, sessionId, user/assistant messages) + 8) 返回 `ChatResponse`(或通过 SSE 输出)。 + +--- + +## 3. API 与协议设计要点 + +### 3.1 tenantId 放置与处理 +- **主入口**:`X-Tenant-Id` header(契约已声明 required)。 +- Orchestrator 与所有下游组件调用均显式传入 `tenantId`。 +- 禁止使用仅 `sessionId` 定位会话,必须 `(tenantId, sessionId)`。 + +### 3.2 streaming / non-streaming 模式判定 +- 以 `Accept` 头作为唯一判定依据: + - `Accept: text/event-stream` → SSE streaming。 + - 其他 → non-streaming JSON。 + +--- + +## 4. RAG 管道设计 + +### 4.1 MVP:向量检索(Qdrant)流程 + +#### 4.1.1 步骤 +1) **Query 规范化** +- 输入:`currentMessage`(可结合 `channelType` 与 metadata)。 +- 规则:去噪、截断(防止超长)、可选的 query rewrite(MVP 可不做)。 + +2) **Embedding** +- 由 `EmbeddingProvider` 生成向量(可复用 LLM 适配层或独立适配层)。 +- ✅ 已确认:Token 计数统一使用 `tiktoken` 进行精确计算(用于 history 截断与证据预算)。 + +3) **向量检索**(Qdrant) +- 按租户隔离选择 collection(见 5.1)。 +- 使用 topK + score threshold 过滤。 + +4) **上下文构建** +- 将检索结果转为 “证据片段列表”,限制总 token 与片段数。 +- 生成 prompt 时区分:系统指令 / 对话历史 / 证据 / 当前问题。 + +5) **生成与引用策略** +- 生成回答必须优先依据证据。 +- 若证据不足:触发兜底策略(见 4.3)。 + +#### 4.1.2 关键参数(MVP 默认,可配置) +- topK(例如 5~10) +- scoreThreshold(相似度阈值) +- minHits(最小命中文档数) +- maxEvidenceTokens(证据总 token 上限) + +### 4.2 图谱检索插件点(Neo4j) + +#### 4.2.1 Retriever 抽象接口(概念设计) +设计统一接口,使 Orchestrator 不关心向量/图谱差异: + +- `Retriever.retrieve(ctx) -> RetrievalResult` + - 输入 `ctx`:包含 `tenantId`, `query`, `sessionId`, `channelType`, `metadata` 等。 + - 输出 `RetrievalResult`: + - `hits[]`:证据条目(统一为 text + score + source + metadata) + - `diagnostics`:检索调试信息(可选) + +MVP 提供 `VectorRetriever(Qdrant)`。 + +#### 4.2.2 Neo4j 接入方式(未来扩展) +新增实现类 `GraphRetriever(Neo4j)`,实现同一接口: +- tenant 隔离:Neo4j 可采用 database per tenant / label+tenantId 过滤 / subgraph per tenant(视规模与授权能力选择)。 +- 输出同构 `RetrievalResult`,由 ContextBuilder 使用。 + +> 约束:新增 GraphRetriever 不应要求修改 API 层与 Orchestrator 的业务流程,只需配置切换(策略模式/依赖注入)。 + +### 4.3 检索不中兜底与置信度策略(对应 AC-AISVC-17/18/19) + +定义“检索不足”的判定: +- `hits.size < minHits` 或 `max(score) < scoreThreshold` 或 evidence token 超限导致可用证据过少。 + +兜底动作: +1) 回复策略: +- 明确表达“未从知识库确认/建议咨询人工/提供可执行下一步”。 +- 避免编造具体事实性结论。 +2) 置信度: +- 以 `T_low` 为阈值(可配置),检索不足场景通常产生较低 `confidence`。 +3) 转人工建议: +- `confidence < T_low` 时 `shouldTransfer=true`,可附 `transferReason`。 +- ✅ 已确认:MVP 阶段 `confidence` 优先基于 RAG 检索分数(Score)计算(并结合检索不中兜底下调)。 + +--- + +## 5. 多租户隔离方案 + +### 5.1 Qdrant(向量库)隔离:一租户一 Collection + +#### 5.1.1 命名规则 +- collection 命名:`kb_{tenantId}`(或 `kb_{tenantId}_{kbName}` 为未来多知识库预留)。 + +#### 5.1.2 读写路径 +- 所有 upsert/search 操作必须先基于 `tenantId` 解析目标 collection。 +- 禁止在同一 collection 内通过 payload filter 做租户隔离作为默认方案(可作为兜底/迁移手段),原因: + - 更容易出现误用导致跨租户泄露。 + - 运维与配额更难隔离(单租户删除、重建、统计)。 + +#### 5.1.3 租户生命周期 +- tenant 创建:初始化 collection(含向量维度与 index 参数)。 + - ✅ 已确认:采用**提前预置**模式,不通过业务请求动态创建 collection。 +- tenant 删除:删除 collection。 +- tenant 扩容:独立配置 HNSW 参数或分片(依赖 Qdrant 部署模式)。 + +### 5.2 PostgreSQL(会话库)分区与约束 + +#### 5.2.1 表设计(概念) +- `chat_sessions` + - `tenant_id` (NOT NULL) + - `session_id` (NOT NULL) + - `created_at`, `updated_at` + - 主键/唯一约束:`(tenant_id, session_id)` + +- `chat_messages` + - `tenant_id` (NOT NULL) + - `session_id` (NOT NULL) + - `message_id` (UUID 或 bigserial) + - `role` (user/assistant) + - `content` (text) + - `created_at` + +#### 5.2.2 分区策略 +根据租户规模选择: + +**方案 A(MVP 推荐):逻辑分区 + 复合索引** +- 不做 PG 分区表。 +- 建立索引: + - `chat_messages(tenant_id, session_id, created_at)` + - `chat_sessions(tenant_id, session_id)` +- 好处:实现与运维简单。 + +**方案 B(规模化):按 tenant_id 做 LIST/HASH 分区** +- `chat_messages` 按 `tenant_id` 分区(LIST 或 HASH)。 +- 适合租户数量有限且单租户数据量大,或需要更强隔离与清理效率。 + +#### 5.2.3 防串租约束 +- 所有查询必须带 `tenant_id` 条件;在代码层面提供 `TenantScopedRepository` 强制注入。 +- 可选:启用 Row Level Security(RLS)并通过 `SET app.tenant_id` 做隔离(实现复杂度较高,后续可选)。 + +--- + +## 6. SSE 状态机设计(顺序与异常保证) + +### 6.1 状态机 +定义连接级状态: +- `INIT`:已建立连接,尚未输出。 +- `STREAMING`:持续输出 `message` 事件。 +- `FINAL_SENT`:已输出 `final`,准备关闭。 +- `ERROR_SENT`:已输出 `error`,准备关闭。 +- `CLOSED`:连接关闭。 + +### 6.2 事件顺序保证 +- 在一次请求生命周期内,事件序列必须满足: + - `message*`(0 次或多次) → **且仅一次** `final` → close + - 或 `message*`(0 次或多次) → **且仅一次** `error` → close +- 禁止 `final` 之后再发送 `message`。 +- 禁止同时发送 `final` 与 `error`。 + +实现策略(概念): +- Orchestrator 维护一个原子状态变量(或单线程事件循环保证),在发送 `final/error` 时 CAS 切换状态。 +- 对 LLM 流式回调进行包装: + - 每个 delta 输出前检查状态必须为 `STREAMING`。 + - 发生异常立即进入 `ERROR_SENT` 并输出 `error`。 + +### 6.3 异常处理 +- 参数错误:在进入流式生成前即可判定,直接发送 `error`(或返回 400,取决于是否已经选择 SSE;建议 SSE 模式同样用 `event:error` 输出 ErrorResponse)。 +- 下游依赖错误(LLM/Qdrant/PG): + - 若尚未开始输出:可直接返回 503/500 JSON(non-streaming)或发送 `event:error`(streaming)。 + - 若已输出部分 `message`:必须以 `event:error` 收尾。 +- 客户端断开: + - 立即停止 LLM 流(如果适配层支持 cancel),并避免继续写入 response。 + +- ✅ 已确认:必须实现 SSE 心跳(Keep-alive),以注释行形式定期发送 `: ping`(不改变事件模型),防止网关/中间件断开连接。 +- ✅ 已确认:Python 内部设置 **20s 硬超时**(包含 LLM 调用与检索/存储等关键步骤的总体超时控制),防止资源泄露与请求堆积。 + +--- + +## 7. 上下文合并规则(Java history + 本地持久化 history) + +### 7.1 合并输入 +- `H_local`:Memory 层基于 `(tenantId, sessionId)` 读取到的历史(按时间排序)。 +- `H_ext`:Java 请求中可选的 `history`(按传入顺序)。 + +### 7.2 去重规则(确定性) +为避免重复注入导致 prompt 膨胀,定义 message 指纹: +- `fingerprint = hash(role + "|" + normalized(content))` +- normalized:trim + 统一空白(MVP 简化:trim)。 + +去重策略: +1) 先以 `H_local` 构建 `seen` 集合。 +2) 遍历 `H_ext`:若 fingerprint 未出现,则追加到 merged;否则跳过。 + +> 解释:优先信任本地持久化历史,外部 history 作为补充。 + +### 7.3 优先级与冲突处理 +- 若 `H_ext` 与 `H_local` 在末尾存在重复但内容略有差异: + - MVP 采取“以 local 为准”策略(保持服务端一致性)。 + - 将差异记录到 diagnostics(可选)供后续排查。 + +### 7.4 截断策略(控制 token) +合并后历史 `H_merged` 需受 token 预算约束: +- 预算 = `maxHistoryTokens`(可配置)。 +- 截断策略:保留最近的 N 条(从尾部向前累加 token 直到阈值)。 +- 可选增强(后续):对更早历史做摘要并作为系统记忆注入。 + +--- + +## 8. 关键接口(内部)与可插拔点 + +### 8.1 Orchestrator 依赖接口(概念) +- `MemoryStore` + - `load_history(tenantId, sessionId) -> messages[]` + - `append_messages(tenantId, sessionId, messages[])` +- `Retriever` + - `retrieve(tenantId, query, metadata) -> RetrievalResult` +- `LLMClient` + - `generate(prompt, params) -> text` + - `stream_generate(prompt, params) -> iterator[delta]` + +### 8.2 插件点 +- Retrieval:VectorRetriever / GraphRetriever / HybridRetriever +- LLM:OpenAICompatibleClient / LocalModelClient +- ConfidencePolicy:可替换策略(基于检索质量 + 模型信号) + +--- + +## 9. 风险与后续工作 +- SSE 的网关兼容性:需确认网关是否支持 `text/event-stream` 透传与超时策略。 +- 租户级 collection 数量增长:若租户数量巨大,Qdrant collection 管理成本上升;可在规模化阶段切换为“单 collection + payload tenant filter”并加强隔离校验。 +- 上下文膨胀:仅截断可能影响长会话体验;后续可引入摘要记忆与检索式记忆。 +- 置信度定义:MVP 先以规则/阈值实现,后续引入离线评测与校准。 diff --git a/spec/ai-service/openapi.admin.yaml b/spec/ai-service/openapi.admin.yaml new file mode 100644 index 0000000..3fbf5d0 --- /dev/null +++ b/spec/ai-service/openapi.admin.yaml @@ -0,0 +1,385 @@ +openapi: 3.1.0 +info: + title: "AI Service Admin API" + description: "AI 中台管理类接口契约(Provider: ai-service),支持 ai-service-admin 模块进行知识库、Prompt 及 RAG 调试管理。" + version: "0.2.0" + x-contract-level: L1 # 已实现级别,接口已真实对接 +components: + parameters: + XTenantId: + name: X-Tenant-Id + in: header + required: true + schema: + type: string + description: "租户ID,用于物理隔离知识库与数据" + responses: + Unauthorized: + description: "未认证(缺少或无效的认证信息)" + Forbidden: + description: "无权限(当前身份无权访问该资源)" + schemas: + DocumentInfo: + type: object + properties: + docId: + type: string + description: "文档ID" + kbId: + type: string + description: "知识库ID" + fileName: + type: string + description: "文件名" + status: + type: string + description: "文档状态" + enum: [pending, processing, completed, failed] + createdAt: + type: string + format: date-time + description: "创建时间" + updatedAt: + type: string + format: date-time + description: "更新时间" + SessionInfo: + type: object + properties: + sessionId: + type: string + description: "会话ID" + status: + type: string + description: "会话状态" + enum: [active, closed, expired] + startTime: + type: string + format: date-time + description: "开始时间" + endTime: + type: string + format: date-time + description: "结束时间" + messageCount: + type: integer + description: "消息数量" + PageInfo: + type: object + properties: + page: + type: integer + description: "当前页码" + pageSize: + type: integer + description: "每页大小" + total: + type: integer + description: "总记录数" + totalPages: + type: integer + description: "总页数" + +paths: + /admin/kb/documents: + get: + summary: "查询文档列表" + operationId: "listDocuments" + tags: + - KB Management + x-requirements: ["AC-ASA-08", "AC-AISVC-23"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: kbId + in: query + required: false + schema: + type: string + description: "知识库ID" + - name: status + in: query + required: false + schema: + type: string + enum: [pending, processing, completed, failed] + description: "文档状态" + - name: page + in: query + required: false + schema: + type: integer + default: 1 + description: "页码" + - name: pageSize + in: query + required: false + schema: + type: integer + default: 20 + description: "每页大小" + responses: + '200': + description: "文档列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/DocumentInfo" + pagination: + $ref: "#/components/schemas/PageInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + post: + summary: "上传/导入文档" + operationId: "uploadDocument" + tags: + - KB Management + x-requirements: ["AC-ASA-01", "AC-AISVC-21", "AC-AISVC-22"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + multipart/form-data: + schema: + type: object + properties: + file: + type: string + format: binary + kbId: + type: string + responses: + '202': + description: "已接受上传请求,异步启动索引任务" + content: + application/json: + schema: + type: object + properties: + jobId: + type: string + status: + type: string + enum: [pending, processing] + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + /admin/kb/index/jobs/{jobId}: + get: + summary: "查询索引任务详情" + operationId: "getIndexJob" + tags: + - KB Management + x-requirements: ["AC-ASA-02", "AC-AISVC-24"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: jobId + in: path + required: true + schema: + type: string + responses: + '200': + description: "任务状态详情" + content: + application/json: + schema: + type: object + properties: + jobId: + type: string + status: + type: string + enum: [pending, processing, completed, failed] + progress: + type: integer + minimum: 0 + maximum: 100 + errorMsg: + type: string + nullable: true + /admin/config/prompt-templates/{tplId}/publish: + post: + summary: "发布指定版本的 Prompt 模板" + operationId: "publishPromptTemplate" + tags: + - Prompt Management + x-requirements: ["AC-ASA-03"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: tplId + in: path + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + type: object + properties: + version: + type: string + responses: + '200': + description: "发布成功" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + /admin/rag/experiments/run: + post: + summary: "触发 RAG 调试实验" + operationId: "runRagExperiment" + tags: + - RAG Lab + x-requirements: ["AC-ASA-05", "AC-AISVC-25", "AC-AISVC-26"] + parameters: + - $ref: "#/components/parameters/XTenantId" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + query: + type: string + kbIds: + type: array + items: + type: string + params: + type: object + description: "检索参数集" + responses: + '200': + description: "实验结果" + content: + application/json: + schema: + type: object + properties: + retrievalResults: + type: array + items: + type: object + properties: + content: + type: string + score: + type: number + format: float + source: + type: string + finalPrompt: + type: string + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + /admin/sessions: + get: + summary: "查询会话列表" + operationId: "listSessions" + tags: + - Session Monitoring + x-requirements: ["AC-ASA-09", "AC-AISVC-27"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: status + in: query + required: false + schema: + type: string + enum: [active, closed, expired] + description: "会话状态" + - name: startTime + in: query + required: false + schema: + type: string + format: date-time + description: "开始时间" + - name: endTime + in: query + required: false + schema: + type: string + format: date-time + description: "结束时间" + - name: page + in: query + required: false + schema: + type: integer + default: 1 + description: "页码" + - name: pageSize + in: query + required: false + schema: + type: integer + default: 20 + description: "每页大小" + responses: + '200': + description: "会话列表" + content: + application/json: + schema: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/SessionInfo" + pagination: + $ref: "#/components/schemas/PageInfo" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" + /admin/sessions/{sessionId}: + get: + summary: "获取会话详情" + operationId: "getSessionDetail" + tags: + - Session Monitoring + x-requirements: ["AC-ASA-07", "AC-AISVC-28"] + parameters: + - $ref: "#/components/parameters/XTenantId" + - name: sessionId + in: path + required: true + schema: + type: string + responses: + '200': + description: "全链路会话详情" + content: + application/json: + schema: + type: object + properties: + sessionId: + type: string + messages: + type: array + items: + type: object + trace: + type: object + description: "含检索、工具调用等追踪信息" + '401': + $ref: "#/components/responses/Unauthorized" + '403': + $ref: "#/components/responses/Forbidden" diff --git a/spec/ai-service/openapi.provider.yaml b/spec/ai-service/openapi.provider.yaml new file mode 100644 index 0000000..a5c3055 --- /dev/null +++ b/spec/ai-service/openapi.provider.yaml @@ -0,0 +1,967 @@ +openapi: 3.0.3 +info: + title: AI Service API + description: | + Python AI 服务对外提供的接口契约(Provider)。 + + 目标:100% 覆盖并对齐 Java 端 Consumer-First 依赖契约 `java/openapi.deps.yaml`。 + - 路径与方法必须一致 + - non-streaming JSON 响应 schema 必须一致(reply/confidence/shouldTransfer 等) + + 额外扩展:支持 SSE 流式输出(Accept: text/event-stream)。 + version: 1.1.0 + x-contract-level: L2 + x-provider: "python-ai-service" + x-consumer: "java-main-framework" + +servers: + - url: http://ai-service:8080 + description: AI 服务地址 + +tags: + - name: AI Chat + description: 对话生成 + - name: Health + description: 健康检查 + - name: Embedding Management + description: 嵌入模型管理 + - name: LLM Management + description: LLM 模型管理 + - name: RAG Lab + description: RAG 实验室 + +paths: + /ai/chat: + post: + operationId: generateReply + summary: 生成 AI 回复 + description: | + 根据用户消息和会话历史生成 AI 回复。 + + non-streaming:返回 application/json(ChatResponse)。 + streaming:当请求头包含 `Accept: text/event-stream` 时,以 SSE 推送事件流。 + + 覆盖验收标准(来自 Java 侧契约描述): + - AC-MCA-04: 主框架通过 HTTP POST 调用 AI 服务 + - AC-MCA-05: 响应包含 reply、confidence、shouldTransfer 字段 + - AC-MCA-06: AI 服务不可用时的降级处理(主框架侧实现) + - AC-MCA-07: 超时处理(主框架侧实现) + tags: + - AI Chat + x-requirements: + - AC-MCA-04 + - AC-MCA-04-REQ + - AC-MCA-04-OPT + - AC-MCA-05 + - AC-MCA-06 + - AC-MCA-07 + parameters: + - name: X-Tenant-Id + in: header + required: true + description: 租户ID(多租户隔离必填,便于网关/拦截器统一处理) + schema: + type: string + - name: Accept + in: header + required: false + description: | + 内容协商。 + - 设置为 text/event-stream 时返回 SSE 事件流 + - 其他情况返回 application/json + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ChatRequest' + example: + sessionId: "kf_001_wx123456_1708765432000" + currentMessage: "我想了解产品价格" + channelType: "wechat" + metadata: + channelUserId: "wx123456" + extra: "..." + responses: + '200': + description: | + 成功生成回复。 + + 注意: + - non-streaming:响应 Content-Type 为 application/json + - streaming:当请求头 Accept: text/event-stream 时,服务端可返回 text/event-stream(见下方 200 的第二种 content 描述) + content: + application/json: + schema: + $ref: '#/components/schemas/ChatResponse' + example: + reply: "您好,我们的产品价格根据套餐不同有所差异。" + confidence: 0.92 + shouldTransfer: false + text/event-stream: + schema: + type: string + description: | + SSE 事件流(按行文本)。事件模型: + + 1) event: message + - data: {"delta": "..."} + - 返回时机:模型生成过程中多次发送,用于增量渲染。 + + 2) event: final + - data: ChatResponse(完整结构化结果,字段至少包含 reply/confidence/shouldTransfer) + - 返回时机:生成结束时发送一次,随后关闭连接。 + + 3) event: error + - data: ErrorResponse(结构化错误,至少 code/message,可含 details) + - 返回时机:发生错误时发送一次,随后关闭连接。 + + 示例(片段): + event: message\n +data: {"delta":"您好,"}\n + + event: final\n +data: {"reply":"...","confidence":0.9,"shouldTransfer":false}\n + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '500': + description: 服务内部错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '503': + description: 服务不可用 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /ai/health: + get: + operationId: healthCheck + summary: 健康检查 + description: 检查 AI 服务是否正常运行 + tags: + - Health + responses: + '200': + description: 服务正常 + content: + application/json: + schema: + type: object + properties: + status: + type: string + '503': + description: 服务不健康 + + /admin/embedding/providers: + get: + operationId: listEmbeddingProviders + summary: 获取可用的嵌入模型提供者列表 + description: | + 返回所有已注册的嵌入模型提供者及其配置参数定义。 + + 覆盖验收标准: + - AC-AISVC-38: 返回所有已注册的提供者列表及其配置参数定义 + tags: + - Embedding Management + x-requirements: + - AC-AISVC-38 + parameters: + - name: X-Tenant-Id + in: header + required: true + description: 租户ID + schema: + type: string + responses: + '200': + description: 成功返回提供者列表 + content: + application/json: + schema: + type: object + properties: + providers: + type: array + items: + $ref: '#/components/schemas/EmbeddingProviderInfo' + '500': + description: 服务内部错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/embedding/config: + get: + operationId: getEmbeddingConfig + summary: 获取当前嵌入模型配置 + description: | + 返回当前激活的嵌入模型提供者及其参数配置。 + + 覆盖验收标准: + - AC-AISVC-39: 返回当前激活的提供者及其参数配置 + tags: + - Embedding Management + x-requirements: + - AC-AISVC-39 + parameters: + - name: X-Tenant-Id + in: header + required: true + description: 租户ID + schema: + type: string + responses: + '200': + description: 成功返回当前配置 + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingConfig' + '500': + description: 服务内部错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + put: + operationId: updateEmbeddingConfig + summary: 更新嵌入模型配置 + description: | + 更新嵌入模型配置,支持热更新(无需重启服务)。 + + 覆盖验收标准: + - AC-AISVC-40: 验证配置有效性,更新配置并返回成功状态 + - AC-AISVC-31: 支持热更新 + tags: + - Embedding Management + x-requirements: + - AC-AISVC-40 + - AC-AISVC-31 + parameters: + - name: X-Tenant-Id + in: header + required: true + description: 租户ID + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingConfigUpdate' + example: + provider: "ollama" + config: + base_url: "http://localhost:11434" + model: "nomic-embed-text" + dimension: 768 + responses: + '200': + description: 配置更新成功 + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + message: + type: string + '400': + description: 配置参数无效 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '500': + description: 服务内部错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/embedding/test: + post: + operationId: testEmbedding + summary: 测试嵌入模型连接 + description: | + 调用嵌入模型生成测试向量,返回连接状态和向量维度信息。 + + 覆盖验收标准: + - AC-AISVC-41: 调用嵌入模型生成测试向量,返回连接状态和向量维度信息 + tags: + - Embedding Management + x-requirements: + - AC-AISVC-41 + parameters: + - name: X-Tenant-Id + in: header + required: true + description: 租户ID + schema: + type: string + requestBody: + required: false + content: + application/json: + schema: + type: object + properties: + test_text: + type: string + description: 测试文本(可选,默认使用固定测试文本) + example: "这是一个测试文本" + config: + $ref: '#/components/schemas/EmbeddingConfigUpdate' + description: 测试配置(可选,不传则使用当前配置) + responses: + '200': + description: 测试成功 + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingTestResult' + '400': + description: 配置参数无效 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '500': + description: 连接测试失败 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + +components: + schemas: + ChatRequest: + type: object + required: + - sessionId + - currentMessage + - channelType + properties: + sessionId: + type: string + description: 会话ID(AC-MCA-04-REQ 必填) + currentMessage: + type: string + description: 当前用户消息(AC-MCA-04-REQ 必填) + channelType: + type: string + description: 渠道类型(AC-MCA-04-REQ 必填) + enum: + - wechat + - douyin + - jd + history: + type: array + description: 历史消息列表(AC-MCA-04-OPT 可选) + items: + $ref: '#/components/schemas/ChatMessage' + metadata: + type: object + description: 扩展元数据(AC-MCA-04-OPT 可选) + additionalProperties: true + + ChatMessage: + type: object + required: + - role + - content + properties: + role: + type: string + enum: + - user + - assistant + content: + type: string + + ChatResponse: + type: object + required: + - reply + - confidence + - shouldTransfer + properties: + reply: + type: string + description: AI 回复内容(AC-MCA-05 必填) + confidence: + type: number + format: double + description: 置信度评分 0.0-1.0(AC-MCA-05 必填) + shouldTransfer: + type: boolean + description: 是否建议转人工(AC-MCA-05 必填) + transferReason: + type: string + description: 转人工原因(可选) + metadata: + type: object + description: 响应元数据(可选) + additionalProperties: true + + ErrorResponse: + type: object + required: + - code + - message + properties: + code: + type: string + description: 错误代码 + message: + type: string + description: 错误消息 + details: + type: array + description: 详细错误信息(可选) + items: + type: object + additionalProperties: true + + EmbeddingProviderInfo: + type: object + description: 嵌入模型提供者信息 + required: + - name + - display_name + - config_schema + properties: + name: + type: string + description: 提供者唯一标识 + example: "ollama" + display_name: + type: string + description: 提供者显示名称 + example: "Ollama 本地模型" + description: + type: string + description: 提供者描述 + example: "使用 Ollama 运行的本地嵌入模型" + config_schema: + type: object + description: 配置参数定义(JSON Schema 格式) + additionalProperties: true + example: + base_url: + type: "string" + description: "Ollama API 地址" + default: "http://localhost:11434" + model: + type: "string" + description: "模型名称" + default: "nomic-embed-text" + dimension: + type: "integer" + description: "向量维度" + default: 768 + + EmbeddingConfig: + type: object + description: 当前嵌入模型配置 + required: + - provider + - config + properties: + provider: + type: string + description: 当前激活的提供者 + example: "ollama" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + example: + base_url: "http://localhost:11434" + model: "nomic-embed-text" + dimension: 768 + updated_at: + type: string + format: date-time + description: 配置最后更新时间 + + EmbeddingConfigUpdate: + type: object + description: 嵌入模型配置更新请求 + required: + - provider + properties: + provider: + type: string + description: 提供者标识 + example: "ollama" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + + EmbeddingTestResult: + type: object + description: 嵌入模型测试结果 + required: + - success + - dimension + properties: + success: + type: boolean + description: 测试是否成功 + dimension: + type: integer + description: 返回的向量维度 + example: 768 + latency_ms: + type: number + description: 响应延迟(毫秒) + example: 125.5 + message: + type: string + description: 测试结果消息 + example: "连接成功,向量维度: 768" + error: + type: string + description: 错误信息(失败时) + example: "连接超时" + + LLMProviderInfo: + type: object + description: LLM 提供者信息 + required: + - name + - display_name + - config_schema + properties: + name: + type: string + description: 提供者唯一标识 + example: "openai" + display_name: + type: string + description: 提供者显示名称 + example: "OpenAI" + description: + type: string + description: 提供者描述 + example: "OpenAI GPT 系列模型" + config_schema: + type: object + description: 配置参数定义(JSON Schema 格式) + additionalProperties: true + + LLMConfig: + type: object + description: 当前 LLM 配置 + required: + - provider + - config + properties: + provider: + type: string + description: 当前激活的提供者 + example: "openai" + config: + type: object + description: 提供者配置参数(敏感字段已脱敏) + additionalProperties: true + example: + api_key: "sk-***" + base_url: "https://api.openai.com/v1" + model: "gpt-4o-mini" + updated_at: + type: string + format: date-time + description: 配置最后更新时间 + + LLMConfigUpdate: + type: object + description: LLM 配置更新请求 + required: + - provider + properties: + provider: + type: string + description: 提供者标识 + example: "openai" + config: + type: object + description: 提供者配置参数 + additionalProperties: true + + LLMTestResult: + type: object + description: LLM 测试结果 + required: + - success + properties: + success: + type: boolean + description: 测试是否成功 + response: + type: string + description: LLM 响应内容 + example: "你好!我是一个 AI 助手..." + latency_ms: + type: number + description: 响应延迟(毫秒) + example: 1250.5 + prompt_tokens: + type: integer + description: 输入 Token 数 + example: 15 + completion_tokens: + type: integer + description: 输出 Token 数 + example: 50 + total_tokens: + type: integer + description: 总 Token 数 + example: 65 + model: + type: string + description: 使用的模型 + example: "gpt-4o-mini" + message: + type: string + description: 测试结果消息 + example: "连接成功" + error: + type: string + description: 错误信息(失败时) + example: "API Key 无效" + + RagExperimentRequest: + type: object + description: RAG 实验请求 + required: + - query + properties: + query: + type: string + description: 查询文本 + example: "什么是 RAG?" + kb_ids: + type: array + items: + type: string + description: 知识库 ID 列表 + top_k: + type: integer + description: 检索数量 + default: 5 + score_threshold: + type: number + description: 相似度阈值 + default: 0.5 + generate_response: + type: boolean + description: 是否生成 AI 回复 + default: true + llm_provider: + type: string + description: 指定 LLM 提供者(可选) + example: "openai" + + RagExperimentResult: + type: object + description: RAG 实验结果 + properties: + query: + type: string + description: 原始查询 + retrieval_results: + type: array + items: + $ref: '#/components/schemas/RetrievalResult' + final_prompt: + type: string + description: 最终拼接的 Prompt + ai_response: + $ref: '#/components/schemas/AIResponse' + total_latency_ms: + type: number + description: 总耗时(毫秒) + diagnostics: + type: object + additionalProperties: true + description: 诊断信息 + + RetrievalResult: + type: object + description: 检索结果 + properties: + content: + type: string + description: 检索到的内容 + score: + type: number + description: 相似度分数 + source: + type: string + description: 来源文档 + metadata: + type: object + additionalProperties: true + description: 元数据 + + AIResponse: + type: object + description: AI 回复 + properties: + content: + type: string + description: AI 回复内容 + prompt_tokens: + type: integer + description: 输入 Token 数 + completion_tokens: + type: integer + description: 输出 Token 数 + total_tokens: + type: integer + description: 总 Token 数 + latency_ms: + type: number + description: 生成耗时(毫秒) + model: + type: string + description: 使用的模型 + + /admin/llm/providers: + get: + operationId: listLLMProviders + summary: 获取可用的 LLM 提供者列表 + description: | + [AC-ASA-15] 返回所有支持的 LLM 提供者及其配置参数定义。 + 支持的提供者:OpenAI、Ollama、Azure OpenAI + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回提供者列表 + content: + application/json: + schema: + type: object + properties: + providers: + type: array + items: + $ref: '#/components/schemas/LLMProviderInfo' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/llm/config: + get: + operationId: getLLMConfig + summary: 获取当前 LLM 配置 + description: | + [AC-ASA-14] 返回当前激活的 LLM 提供者及其配置参数。 + 敏感字段(如 API Key)会被脱敏显示。 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + responses: + '200': + description: 成功返回当前配置 + content: + application/json: + schema: + $ref: '#/components/schemas/LLMConfig' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + put: + operationId: updateLLMConfig + summary: 更新 LLM 配置 + description: | + [AC-ASA-16] 更新 LLM 提供者和配置参数。 + 配置更新后立即生效,无需重启服务。 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/LLMConfigUpdate' + responses: + '200': + description: 配置更新成功 + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + message: + type: string + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/llm/test: + post: + operationId: testLLM + summary: 测试 LLM 连接 + description: | + [AC-ASA-17, AC-ASA-18] 测试 LLM 提供者连接。 + 发送测试提示词并返回响应结果,包含 Token 消耗和延迟统计。 + tags: + - LLM Management + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: false + content: + application/json: + schema: + type: object + properties: + test_prompt: + type: string + description: 测试提示词(可选) + example: "你好,请简单介绍一下自己。" + provider: + type: string + description: 指定测试的提供者(可选,默认使用当前配置) + config: + $ref: '#/components/schemas/LLMConfigUpdate' + responses: + '200': + description: 测试完成 + content: + application/json: + schema: + $ref: '#/components/schemas/LLMTestResult' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/rag/experiments/run: + post: + operationId: runRagExperiment + summary: 运行 RAG 实验(含 AI 输出) + description: | + [AC-ASA-05, AC-ASA-19, AC-ASA-21, AC-ASA-22] 运行 RAG 实验。 + 返回检索结果、最终 Prompt 和 AI 回复。 + tags: + - RAG Lab + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentRequest' + responses: + '200': + description: 实验完成 + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentResult' + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /admin/rag/experiments/stream: + post: + operationId: runRagExperimentStream + summary: 运行 RAG 实验(流式输出) + description: | + [AC-ASA-20] 运行 RAG 实验并以 SSE 流式输出 AI 回复。 + 事件类型:retrieval、prompt、message、final、error + tags: + - RAG Lab + parameters: + - name: X-Tenant-Id + in: header + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RagExperimentRequest' + responses: + '200': + description: SSE 流式输出 + content: + text/event-stream: + schema: + type: string + '401': + description: 未授权 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' diff --git a/spec/ai-service/progress.md b/spec/ai-service/progress.md new file mode 100644 index 0000000..b113391 --- /dev/null +++ b/spec/ai-service/progress.md @@ -0,0 +1,386 @@ +--- +feature_id: "AISVC" +title: "Python AI 中台(ai-service)进度追踪" +status: "completed" +version: "0.4.0" +last_updated: "2026-02-24" +--- + +# Python AI 中台进度追踪(AISVC) + +## Phase 1: 基础设施(FastAPI 框架与多租户基础) +- [x] T1.1 初始化 FastAPI 项目骨架,配置基础环境与日志(包含 X-Tenant-Id 记录) `[AC-AISVC-01]` ✅ **2026-02-24 完成** +- [x] T1.2 实现 `X-Tenant-Id` Header 拦截器,校验必填性并注入 Request State `[AC-AISVC-10, AC-AISVC-12]` ✅ **2026-02-24 完成** +- [x] T1.3 定义基础响应模型 `ErrorResponse` 与异常处理器(Exception Handler) `[AC-AISVC-03, AC-AISVC-04]` ✅ **2026-02-24 完成** +- [x] T1.4 初始化 PostgreSQL 数据库客户端(SQLModel/SQLAlchemy),支持租户隔离查询逻辑 `[AC-AISVC-11]` ✅ **2026-02-24 完成** +- [x] T1.5 初始化 Qdrant 客户端,封装按租户动态选择 Collection 的工具函数 `[AC-AISVC-10]` ✅ **2026-02-24 完成** +- [x] T1.6 实现 `/ai/health` 健康检查接口 `[AC-AISVC-20]` ✅ **2026-02-24 完成** + +## Phase 2: 存储与检索实现(Memory & Retrieval) +- [x] T2.1 实现 Memory 层:定义 `chat_sessions` 与 `chat_messages` SQLModel 实体 `[AC-AISVC-13]` ✅ **2026-02-24 完成** +- [x] T2.2 实现 Memory 层:完成基于 `(tenant_id, session_id)` 的历史消息加载与追加 API `[AC-AISVC-13]` ✅ **2026-02-24 完成** +- [x] T2.3 实现 Retrieval 层:定义 `BaseRetriever` 抽象基类(插件点预留) `[AC-AISVC-16]` ✅ **2026-02-24 完成** +- [x] T2.4 实现 `VectorRetriever`:集成 `qdrant-client` 完成向量检索,支持 scoreThreshold 过滤 `[AC-AISVC-16, AC-AISVC-17]` ✅ **2026-02-24 完成** +- [x] T2.5 编写 Memory 与 Retrieval 层的独立单元测试(Mock 数据库与向量库) `[AC-AISVC-10, AC-AISVC-11]` ✅ **2026-02-24 完成** + +## Phase 3: 核心编排(Orchestrator & LLM Adapter) +- [x] T3.1 实现 LLM Adapter:封装 `langchain-openai` 或官方 SDK,支持 `generate` 与 `stream_generate` `[AC-AISVC-02, AC-AISVC-06]` ✅ **2026-02-24 完成** +- [x] T3.2 实现 Orchestrator:实现上下文合并逻辑(H_local + H_ext 的去重与截断策略) `[AC-AISVC-14, AC-AISVC-15]` ✅ **2026-02-24 完成** +- [x] T3.3 实现 Orchestrator:实现 RAG 检索不足时的置信度下调与 `shouldTransfer` 逻辑 `[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19]` ✅ **2026-02-24 完成** +- [x] T3.4 实现 Orchestrator:整合 Memory、Retrieval 与 LLM 完成 non-streaming 生成闭环 `[AC-AISVC-01, AC-AISVC-02]` ✅ **2026-02-24 完成** +- [x] T3.5 验证 non-streaming 响应字段完全符合 `openapi.provider.yaml` 契约 `[AC-AISVC-02]` ✅ **2026-02-24 完成** + +## Phase 4: 流式响应(SSE 实现与状态机) +- [x] T4.1 在 API 层实现基于 `Accept` 头的响应模式自动切换逻辑 `[AC-AISVC-06]` ✅ **2026-02-24 完成** +- [x] T4.2 实现 SSE 事件生成器:根据 Orchestrator 的增量输出包装 `message` 事件 `[AC-AISVC-07]` ✅ **2026-02-24 完成** +- [x] T4.3 实现 SSE 状态机:确保 `final` 或 `error` 事件后连接正确关闭,且顺序不乱 `[AC-AISVC-08, AC-AISVC-09]` ✅ **2026-02-24 完成** +- [x] T4.4 实现流式输出过程中的异常捕获,并转化为 `event: error` 输出 `[AC-AISVC-09]` ✅ **2026-02-24 完成** + +## Phase 5: 集成与冒烟测试(Quality Assurance) +- [x] T5.1 编写集成测试:模拟多租户并发请求,验证数据存储与检索的严格物理/逻辑隔离 `[AC-AISVC-10, AC-AISVC-11]` ✅ **2026-02-24 完成** +- [x] T5.2 编写 RAG 冒烟测试:模拟"检索命中"与"检索未命中"两种场景,验证 confidence 变化与回复兜底 `[AC-AISVC-17, AC-AISVC-18]` ✅ **2026-02-24 完成** +- [x] T5.3 契约测试:验证 provider 契约一致性 `[AC-AISVC-01, AC-AISVC-02]` ✅ **2026-02-24 完成** + +--- + +## 总体进度 + +| Phase | 描述 | 进度 | 状态 | +|-------|------|------|------| +| Phase 1 | 基础设施 | 100% | ✅ 完成 | +| Phase 2 | 存储与检索 | 100% | ✅ 完成 | +| Phase 3 | 核心编排 | 100% | ✅ 完成 | +| Phase 4 | 流式响应 | 100% | ✅ 完成 | +| Phase 5 | 集成测试 | 100% | ✅ 完成 | +| Phase 6 | 前后端联调 | 100% | ✅ 完成 | +| Phase 7 | 嵌入模型可插拔与文档解析 | 100% | ✅ 完成 | +| Phase 8 | LLM 配置与 RAG 调试输出 | 100% | ✅ 完成 | + +**测试统计: 184 tests passing** + +--- + +## Phase 8: LLM 配置与 RAG 调试输出(v0.4.0 迭代) + +### 8.1 设计目标 + +- LLM 提供者可插拔设计 +- 支持界面配置不同供应商的 AI +- RAG 实验室支持 AI 输出调试 + +### 8.2 实现详情 (2026-02-24) + +#### LLM 服务实现 +- 创建 LLMProviderFactory 工厂类 (`app/services/llm/factory.py`) +- 支持 OpenAI、Ollama、Azure OpenAI 三种提供者 +- 实现 LLMConfigManager 配置热更新 +- 实现连接测试功能 + +#### API 端点实现 +- GET /admin/llm/providers - 获取 LLM 提供者列表 +- GET /admin/llm/config - 获取当前 LLM 配置 +- PUT /admin/llm/config - 更新 LLM 配置 +- POST /admin/llm/test - 测试 LLM 连接 + +#### RAG 实验增强 +- 更新 POST /admin/rag/experiments/run - 支持 AI 回复生成 +- 新增 POST /admin/rag/experiments/stream - SSE 流式输出 +- 支持 Token 统计和响应耗时 +- 支持指定 LLM 提供者 + +### 8.3 任务进度 + +| 任务 | 描述 | 状态 | +|------|------|------| +| T8.1 | LLMProviderFactory 工厂类 | ✅ 完成 | +| T8.2 | LLMConfigManager 配置管理 | ✅ 完成 | +| T8.3 | GET /admin/llm/providers | ✅ 完成 | +| T8.4 | GET /admin/llm/config | ✅ 完成 | +| T8.5 | PUT /admin/llm/config | ✅ 完成 | +| T8.6 | POST /admin/llm/test | ✅ 完成 | +| T8.7 | RAG 实验支持 AI 回复 | ✅ 完成 | +| T8.8 | RAG 实验流式输出 | ✅ 完成 | +| T8.9 | 支持指定 LLM 提供者 | ✅ 完成 | +| T8.10 | 更新 OpenAPI 契约 | ✅ 完成 | + +--- + +## v0.4.0 完成总结 + +**Phase 8 已全部完成** + +| 模块 | 文件数 | 状态 | +|------|--------|------| +| LLM 服务 | 1 | ✅ | +| API 端点 | 2 | ✅ | +| OpenAPI 契约 | 1 | ✅ | + +**测试统计: 184 tests passing** + +--- + +## Phase 3 完成详情 (2026-02-24) + +### T3.1 LLM Adapter +- 创建 LLMClient 抽象接口 (`app/services/llm/base.py`) +- 实现 OpenAIClient 使用 httpx (`app/services/llm/openai_client.py`) +- 支持 generate 和 stream_generate +- 使用 tenacity 实现重试逻辑 +- 单元测试: 11 tests + +### T3.2 上下文合并 +- 创建 ContextMerger 类 (`app/services/context.py`) +- 实现消息指纹计算 (SHA256) +- 实现去重策略 (local 优先) +- 实现 token 截断 (使用 tiktoken) +- 单元测试: 20 tests + +### T3.3 置信度计算 +- 创建 ConfidenceCalculator 类 (`app/services/confidence.py`) +- 实现检索不足判定 +- 实现置信度计算策略 +- 实现 shouldTransfer 逻辑 +- 单元测试: 19 tests + +### T3.4 Orchestrator 完整闭环 +- 整合 Memory、ContextMerger、Retriever、LLMClient、ConfidenceCalculator +- 实现 8 步生成管道: + 1. Load local history from Memory + 2. Merge with external history (dedup + truncate) + 3. RAG retrieval (optional) + 4. Build prompt with context and evidence + 5. LLM generation + 6. Calculate confidence + 7. Save messages to Memory + 8. Return ChatResponse +- 创建 GenerationContext 数据类追踪生成流程 +- 实现 fallback 响应机制 +- 单元测试: 21 tests + +### T3.5 契约验证 +- 验证 ChatResponse 字段与 OpenAPI 契约一致性 +- 验证 JSON 序列化使用 camelCase +- 验证必填字段和可选字段 +- 验证 confidence 范围约束 [0.0, 1.0] +- 单元测试: 23 tests + +--- + +## 验收标准覆盖 + +| AC 标记 | 描述 | 状态 | +|---------|------|------| +| AC-AISVC-01 | HTTP POST /ai/chat 调用 | ✅ | +| AC-AISVC-02 | 响应包含 reply/confidence/shouldTransfer | ✅ | +| AC-AISVC-03 | 参数错误返回 400 | ✅ | +| AC-AISVC-04 | 异常处理器 | ✅ | +| AC-AISVC-06 | Accept 头切换 SSE 模式 | ✅ | +| AC-AISVC-07 | SSE message 事件增量输出 | ✅ | +| AC-AISVC-08 | SSE final 事件后关闭连接 | ✅ | +| AC-AISVC-09 | 错误时发送 error 事件 | ✅ | +| AC-AISVC-10 | tenantId 贯穿隔离 | ✅ | +| AC-AISVC-11 | 存储层按 tenant_id 隔离 | ✅ | +| AC-AISVC-12 | 缺 tenantId 返回 400 | ✅ | +| AC-AISVC-13 | Memory 层会话历史管理 | ✅ | +| AC-AISVC-14 | 上下文合并 | ✅ | +| AC-AISVC-15 | 历史去重策略 | ✅ | +| AC-AISVC-16 | Retriever 抽象接口 | ✅ | +| AC-AISVC-17 | RAG 检索质量影响 confidence | ✅ | +| AC-AISVC-18 | 检索不足时 confidence 下调 | ✅ | +| AC-AISVC-19 | shouldTransfer 转人工建议 | ✅ | +| AC-AISVC-20 | 健康检查接口 | ✅ | + +--- + +## 模块结构 + +``` +ai-service/ +├── app/ +│ ├── api/ +│ │ └── chat.py # FastAPI 路由层 +│ ├── core/ +│ │ ├── config.py # 配置管理 +│ │ ├── exceptions.py # 异常定义 +│ │ ├── middleware.py # 中间件 (租户注入) +│ │ ├── qdrant_client.py # Qdrant 客户端 +│ │ └── sse.py # SSE 状态机和事件生成器 +│ ├── models/ +│ │ ├── __init__.py # Pydantic 模型 +│ │ └── entities.py # SQLModel 实体 +│ └── services/ +│ ├── llm/ +│ │ ├── base.py # LLMClient 抽象接口 +│ │ └── openai_client.py # OpenAI 兼容客户端 +│ ├── memory.py # Memory 服务 +│ ├── orchestrator.py # 编排服务 +│ ├── context.py # 上下文合并 +│ ├── confidence.py # 置信度计算 +│ └── retrieval/ +│ ├── base.py # Retriever 抽象接口 +│ └── vector_retriever.py # 向量检索实现 +└── tests/ # 单元测试 (184 tests) +``` + +--- + +## 关键技术决策 + +| 决策 | 原因 | 影响 | +|------|------|------| +| LLM Adapter 使用 httpx | 更轻量、更可控、减少依赖 | 需要手动处理 OpenAI API 响应解析 | +| 使用 tenacity 实现重试 | 简单可靠的重试机制 | 提高服务稳定性 | +| Orchestrator 依赖注入模式 | 便于测试和组件替换 | 所有组件可通过构造函数注入 | +| GenerationContext 数据类 | 清晰追踪中间结果和诊断信息 | 便于调试和问题排查 | +| Pydantic alias 实现驼峰命名 | 符合 OpenAPI 契约的 camelCase 要求 | JSON 序列化时自动转换字段名 | + +--- + +## Phase 7: 嵌入模型可插拔与文档解析支持(v0.3.0 迭代) + +### 7.1 嵌入服务设计 + +#### 设计目标 +- 支持多种嵌入模型提供者(Ollama、OpenAI、本地模型等) +- 运行时动态切换,无需修改代码 +- 支持界面配置和热更新 +- 统一的错误处理和 fallback 策略 + +#### 架构设计 +``` +EmbeddingProvider (抽象基类) +├── OllamaEmbeddingProvider # Ollama 本地模型 +├── OpenAIEmbeddingProvider # OpenAI Embedding API +└── LocalEmbeddingProvider # 本地模型(未来扩展) + +EmbeddingProviderFactory # 工厂类,根据配置创建提供者 +EmbeddingConfigManager # 配置管理,支持热更新 +``` + +#### 接口定义 +```python +class EmbeddingProvider(ABC): + @abstractmethod + async def embed(self, text: str) -> list[float]: + """生成单个文本的嵌入向量""" + pass + + @abstractmethod + async def embed_batch(self, texts: list[str]) -> list[list[float]]: + """批量生成嵌入向量""" + pass + + @abstractmethod + def get_dimension(self) -> int: + """返回向量维度""" + pass + + @abstractmethod + def get_provider_name(self) -> str: + """返回提供者名称""" + pass +``` + +### 7.2 文档解析服务设计 + +#### 支持格式 +| 格式 | 扩展名 | 解析库 | 说明 | +|------|--------|--------|------| +| PDF | .pdf | PyMuPDF/pdfplumber | 提取文本内容 | +| Word | .docx | python-docx | 保留段落结构 | +| Excel | .xlsx | openpyxl | 表格转结构化文本 | +| 文本 | .txt, .md | 内置 | 直接读取 | + +#### 架构设计 +``` +DocumentParser (抽象基类) +├── PDFParser # PDF 解析 +├── WordParser # Word 解析 +├── ExcelParser # Excel 解析 +└── TextParser # 纯文本解析 + +DocumentParserFactory # 工厂类,根据扩展名选择解析器 +``` + +#### 接口定义 +```python +class DocumentParser(ABC): + @abstractmethod + def parse(self, file_path: str) -> str: + """解析文档,返回纯文本内容""" + pass + + @abstractmethod + def get_supported_extensions(self) -> list[str]: + """返回支持的文件扩展名列表""" + pass +``` + +### 7.3 任务进度 + +| 任务 | 描述 | 状态 | +|------|------|------| +| T7.1 | EmbeddingProvider 抽象基类 | ✅ 完成 | +| T7.2 | EmbeddingProviderFactory 工厂类 | ✅ 完成 | +| T7.3 | OllamaEmbeddingProvider 实现 | ✅ 完成 | +| T7.4 | OpenAIEmbeddingProvider 实现 | ✅ 完成 | +| T7.5 | 嵌入配置管理 | ✅ 完成 | +| T7.6 | 错误处理与 fallback | ✅ 完成 | +| T7.7 | DocumentParser 抽象接口 | ✅ 完成 | +| T7.8 | PDFParser 实现 | ✅ 完成 | +| T7.9 | WordParser 实现 | ✅ 完成 | +| T7.10 | ExcelParser 实现 | ✅ 完成 | +| T7.11 | DocumentParserFactory | ✅ 完成 | +| T7.12 | 解析错误处理 | ✅ 完成 | +| T7.13 | GET /admin/embedding/providers | ✅ 完成 | +| T7.14 | GET /admin/embedding/config | ✅ 完成 | +| T7.15 | PUT /admin/embedding/config | ✅ 完成 | +| T7.16 | POST /admin/embedding/test | ✅ 完成 | +| T7.17 | 集成到索引流程 | ✅ 完成 | +| T7.18 | 集成到上传流程 | ✅ 完成 | +| T7.19 | 嵌入服务单元测试 | ✅ 完成 | +| T7.20 | 文档解析单元测试 | ✅ 完成 | +| T7.21 | API 集成测试 | ✅ 完成 | + +### 7.4 实现详情 (2026-02-24) + +#### 嵌入服务实现 +- 创建 EmbeddingProvider 抽象基类 (`app/services/embedding/base.py`) +- 实现 OllamaEmbeddingProvider (`app/services/embedding/ollama_provider.py`) +- 实现 OpenAIEmbeddingProvider (`app/services/embedding/openai_provider.py`) +- 创建 EmbeddingProviderFactory 工厂类 (`app/services/embedding/factory.py`) +- 创建 EmbeddingConfigManager 支持配置热更新 + +#### 文档解析服务实现 +- 创建 DocumentParser 抽象基类 (`app/services/document/base.py`) +- 实现 PDFParser 使用 PyMuPDF (`app/services/document/pdf_parser.py`) +- 实现 WordParser 使用 python-docx (`app/services/document/word_parser.py`) +- 实现 ExcelParser 使用 openpyxl (`app/services/document/excel_parser.py`) +- 实现 TextParser (`app/services/document/text_parser.py`) +- 创建 DocumentParserFactory (`app/services/document/factory.py`) + +#### API 端点实现 +- GET /admin/embedding/providers - 获取可用嵌入提供者列表 +- GET /admin/embedding/config - 获取当前嵌入配置 +- PUT /admin/embedding/config - 更新嵌入配置 +- POST /admin/embedding/test - 测试嵌入连接 +- GET /admin/embedding/formats - 获取支持的文档格式 + +#### 集成更新 +- 更新 vector_retriever.py 使用可插拔嵌入提供者 +- 更新 kb.py 支持多格式文档上传和解析 + +--- + +## v0.3.0 完成总结 + +**Phase 7 已全部完成** + +| 模块 | 文件数 | 状态 | +|------|--------|------| +| 嵌入服务 | 6 | ✅ | +| 文档解析 | 7 | ✅ | +| API 端点 | 1 | ✅ | +| 集成更新 | 2 | ✅ | + +**测试统计: 184 tests passing** diff --git a/spec/ai-service/requirements.md b/spec/ai-service/requirements.md new file mode 100644 index 0000000..f82d61f --- /dev/null +++ b/spec/ai-service/requirements.md @@ -0,0 +1,308 @@ +--- +feature_id: "AISVC" +title: "Python AI 中台(ai-service)需求规范" +status: "completed" +version: "0.4.0" +owners: + - "product" + - "backend" +last_updated: "2026-02-24" +source: + type: "conversation" + ref: "" +--- + +# Python AI 中台(ai-service)需求规范(AISVC) + +## 1. 背景与目标 + +### 1.1 背景 +主框架(Java)需要通过统一的 HTTP 接口调用 Python AI 中台完成对话回复生成,并支持: +- 多租户隔离(tenant 一套知识库/索引、会话与元数据隔离)。 +- 流式输出(Java → Python 主通道采用 SSE)。 +- RAG 检索增强(检索命中/不中均有明确兜底逻辑与置信度策略)。 +- AI 侧上下文与记忆管理(Java 侧仅传 `sessionId + metadata`,`history` 可选)。 + +### 1.2 目标 +- 提供稳定的 Provider API(至少对齐 `java/openapi.deps.yaml` 中 `/ai/chat` 与 `/ai/health` 的契约需求)。 +- 在 MVP 期实现可用的对话能力 + 可控的上下文/记忆 + 基础 RAG。 +- 预留 GraphRAG/HybridRAG 扩展口(非 MVP 必须项,但接口与模块边界需支持可插拔)。 + +### 1.3 非目标(Out of Scope) +- 主框架侧的降级策略与超时处理的业务编排实现(由 Java 侧完成;AI 中台只需返回明确错误语义,便于上游处理)。 +- 知识图谱构建与 GraphRAG/HybridRAG 的具体算法实现(仅预留扩展点)。 +- 渠道接入、用户身份体系、工单/坐席系统等业务系统实现。 + +## 2. 模块边界(Scope) + +### 2.1 覆盖 +- 对话生成服务:支持 non-streaming JSON 与 streaming SSE。 +- 多租户隔离:知识库(检索范围)、会话历史、元数据读写隔离。 +- 上下文管理:基于 `sessionId` 的会话持久化与加载;支持可选 `history`。 +- 基础 RAG:检索、拼装上下文、生成与置信度/转人工建议。 + +### 2.2 不覆盖 +- 业务侧路由、工单流转与最终转人工执行。 +- Java 侧调用超时与降级策略的实现细节。 + +## 3. 依赖盘点(Dependencies) + +### 3.1 调用方依赖(Consumer) +- Java 主框架调用 AI 中台:依赖契约见 `java/openapi.deps.yaml`。 + +### 3.2 AI 中台内部可能依赖(实现阶段确定,规范阶段仅声明) +- 向量库:Qdrant 或 Milvus(二选一)。 +- 会话存储:PostgreSQL 或 MySQL。 +- 缓存(可选):Redis。 +- LLM 提供方(可插拔):OpenAI / 兼容 OpenAI 的网关 / 本地模型。 + +## 4. 术语与约定(Definitions) +- `tenantId`:租户标识,用于隔离知识库、会话与元数据。必须贯穿请求与存储分区。 +- `sessionId`:会话标识,用于定位同一会话下的对话记忆。 +- `history`:调用方提供的历史消息列表(可选)。AI 中台可结合 AI 侧持久化会话一起构建上下文。 +- SSE:Server-Sent Events,HTTP 单向流式推送(`text/event-stream`)。 + +## 5. 用户故事(User Stories) +- [US-AISVC-01] 作为 Java 主框架,我希望通过统一 HTTP 接口调用 AI 中台生成回复,以便对外提供智能对话能力。 +- [US-AISVC-02] 作为平台运营者,我希望不同租户的数据严格隔离,以便满足多租户安全与合规要求。 +- [US-AISVC-03] 作为终端用户,我希望 AI 回复可以流式呈现,以便更快看到内容并提升交互体验。 +- [US-AISVC-04] 作为终端用户,我希望 AI 能结合知识库检索回答问题,并在检索不足时有稳健兜底,以便减少"胡编"。 +- [US-AISVC-05] 作为系统维护者,我希望 AI 中台可被健康检查探测,以便稳定运维。 + +## 6. 验收标准(Acceptance Criteria, EARS) + +> 说明: +> - Java 侧既有验收标准以 `AC-MCA-*` 表示(来自依赖契约描述);本模块新增以 `AC-AISVC-*` 表示。 +> - 本模块的 Provider API 必须对齐 `java/openapi.deps.yaml` 中的 `/ai/chat` 与 `/ai/health` 约束。 + +### 6.1 对齐 Java 契约:/ai/chat(HTTP POST + Response Schema) + +- [AC-AISVC-01] WHEN Java 主框架通过 HTTP `POST /ai/chat` 发送包含 `sessionId`、`currentMessage`、`channelType` 的请求 THEN AI 中台 SHALL 返回 200 并给出业务回复结果(兼容 non-streaming JSON)。 + - 对齐:AC-MCA-04(HTTP POST 调用)。 + +- [AC-AISVC-02] WHEN AI 中台成功生成回复 THEN 响应体(non-streaming) SHALL 至少包含 `reply`、`confidence`、`shouldTransfer` 字段,且字段类型与语义满足 Java 依赖契约。 + - 对齐:AC-MCA-05(Response Schema)。 + +- [AC-AISVC-03] WHEN 请求参数缺失或格式非法 THEN AI 中台 SHALL 返回 400 且返回结构化错误(至少包含 `code` 与 `message`)。 + +- [AC-AISVC-04] WHEN AI 中台发生未预期内部错误 THEN AI 中台 SHALL 返回 500 且返回结构化错误(至少包含 `code` 与 `message`)。 + +- [AC-AISVC-05] WHEN AI 中台暂不可用(例如依赖 LLM/向量库不可用导致无法服务) THEN AI 中台 SHALL 返回 503 且返回结构化错误(至少包含 `code` 与 `message`)。 + +### 6.2 SSE 流式输出(主通道) + +- [AC-AISVC-06] WHEN 调用方以流式方式调用 `POST /ai/chat`(例如通过 `Accept: text/event-stream` 或 `stream=true` 参数) THEN AI 中台 SHALL 以 SSE 推送事件流,直到生成结束或发生错误。 + +- [AC-AISVC-07] WHEN AI 中台产生可增量输出的回复内容 THEN AI 中台 SHALL 多次发送 `event: message` 事件,每次携带本次增量文本(delta),以便调用方逐步渲染。 + - 返回时机:模型生成过程中持续发送。 + +- [AC-AISVC-08] WHEN AI 中台完成本次生成(正常结束) THEN AI 中台 SHALL 发送一次 `event: final` 事件,携带最终结构化结果,且其字段集合至少包含 `reply`、`confidence`、`shouldTransfer`(与 non-streaming 响应同构)。 + - 返回时机:生成完成后立即发送,并随后关闭 SSE 连接(或发送结束标记后关闭)。 + +- [AC-AISVC-09] WHEN 处理过程中发生可判定错误(参数错误/内部错误/依赖不可用等) THEN AI 中台 SHALL 发送 `event: error` 事件,携带结构化错误(至少 `code`、`message`,可含 `details`),并终止事件流。 + - 返回时机:错误发生后立即发送,并关闭连接。 + +### 6.3 多租户隔离(tenantId) + +- [AC-AISVC-10] WHEN 请求的 `metadata` 中包含 `tenantId` THEN AI 中台 SHALL 将 `tenantId` 作为一级隔离维度贯穿:知识库检索范围、会话读写、元数据读写均必须在该 `tenantId` 分区内进行。 + +- [AC-AISVC-11] WHEN 不同 `tenantId` 的请求使用相同 `sessionId` THEN AI 中台 SHALL 视为两个互相隔离的会话空间,禁止跨租户读取/写入对话历史与记忆。 + +- [AC-AISVC-12] WHEN 请求缺失 `tenantId`(或 `tenantId` 非法/空) THEN AI 中台 SHALL 按契约返回 400(参数错误),并在错误中明确缺失字段。 + +### 6.4 上下文管理(sessionId 持久化;history 可选) + +- [AC-AISVC-13] WHEN AI 中台收到 `sessionId` THEN AI 中台 SHALL 在服务端持久化该会话的对话记录与必要的摘要/记忆,并可在后续同一 `tenantId + sessionId` 请求中加载用于上下文构建。 + +- [AC-AISVC-14] WHEN Java 调用方未提供 `history` THEN AI 中台 SHALL 仅基于服务端持久化会话历史(若存在)与本次 `currentMessage` 构建上下文完成生成。 + +- [AC-AISVC-15] WHEN Java 调用方提供了 `history` THEN AI 中台 SHALL 将其作为"外部补充历史"参与上下文构建,并以确定性的去重/合并规则避免与服务端历史冲突(规则在 design.md 明确)。 + +### 6.5 RAG 检索(命中/不中的兜底与置信度阈值) + +- [AC-AISVC-16] WHEN 请求触发知识库检索(RAG) THEN AI 中台 SHALL 在 `tenantId` 对应的知识库范围内进行检索,并将检索结果用于回答生成。 + +- [AC-AISVC-17] WHEN 检索结果为空或低质量(定义为:未达到最小命中文档数或相关度阈值,阈值在配置中可调整) THEN AI 中台 SHALL 执行兜底逻辑: + 1) 生成"基于通用知识/无法从知识库确认"的稳健回复(避免编造具体事实),并 + 2) 下调 `confidence`,并 + 3) 视阈值策略可将 `shouldTransfer=true`(例如用户强诉求或关键信息缺失)。 + +- [AC-AISVC-18] WHEN 生成完成后计算得到的 `confidence` 低于阈值 `T_low` THEN AI 中台 SHALL 将 `shouldTransfer` 置为 `true` 或提供 `transferReason`,以便上游决定是否转人工。 + - 说明:阈值 `T_low` 为可配置;MVP 可先采用经验值并在 design.md 中定义默认值与可配置项。 + +- [AC-AISVC-19] WHEN 检索命中且证据充分(达到相关度与覆盖度阈值) THEN AI 中台 SHALL 提升 `confidence`(相对兜底场景)并优先基于检索证据回答;如回答包含事实性结论,应以检索证据为主。 + +### 6.6 健康检查 + +- [AC-AISVC-20] WHEN 调用方请求 `GET /ai/health` THEN AI 中台 SHALL 在健康时返回 200 且包含可解析的 `status` 字段;在不健康时返回 503。 + +## 7. 需求追踪映射(Traceability) + +> 说明:本表用于把验收标准映射到 Provider 端点(以及未来的 operationId)。 +> 在下一步生成 `openapi.provider.yaml` 时,应保证: +> - `/ai/chat` 的 non-streaming 响应 schema 对齐 Java deps(reply/confidence/shouldTransfer)。 +> - streaming SSE 的事件模型在 provider 契约中明确(可作为 `text/event-stream` 响应体描述)。 + +| AC ID | 对齐外部 AC | Endpoint | 方法 | operationId(拟) | 备注 | +|------|-------------|----------|------|------------------|------| +| AC-AISVC-01 | AC-MCA-04 | /ai/chat | POST | generateReply | HTTP POST 调用对齐 Java deps | +| AC-AISVC-02 | AC-MCA-05 | /ai/chat | POST | generateReply | non-streaming 响应字段对齐(reply/confidence/shouldTransfer) | +| AC-AISVC-03 | | /ai/chat | POST | generateReply | 400 参数错误 + ErrorResponse | +| AC-AISVC-04 | | /ai/chat | POST | generateReply | 500 内部错误 | +| AC-AISVC-05 | | /ai/chat | POST | generateReply | 503 不可用 | +| AC-AISVC-06 | | /ai/chat | POST | generateReplyStream | SSE 入口(同路径不同协商方式) | +| AC-AISVC-07 | | /ai/chat | POST | generateReplyStream | event: message | +| AC-AISVC-08 | | /ai/chat | POST | generateReplyStream | event: final | +| AC-AISVC-09 | | /ai/chat | POST | generateReplyStream | event: error | +| AC-AISVC-10 | | /ai/chat | POST | generateReply | tenantId 贯穿隔离 | +| AC-AISVC-11 | | /ai/chat | POST | generateReply | tenantId + sessionId 组合隔离 | +| AC-AISVC-12 | | /ai/chat | POST | generateReply | 缺 tenantId 返回 400(本模块约束) | +| AC-AISVC-13 | | /ai/chat | POST | generateReply | session 持久化 | +| AC-AISVC-14 | | /ai/chat | POST | generateReply | history 可选 | +| AC-AISVC-15 | | /ai/chat | POST | generateReply | history 合并规则(design 明确) | +| AC-AISVC-16 | | /ai/chat | POST | generateReply | RAG 检索 | +| AC-AISVC-17 | | /ai/chat | POST | generateReply | 检索不中兜底 + 下调置信度 | +| AC-AISVC-18 | | /ai/chat | POST | generateReply | 置信度阈值触发 shouldTransfer | +| AC-AISVC-19 | | /ai/chat | POST | generateReply | 证据充分提升 confidence | +| AC-AISVC-20 | | /ai/health | GET | healthCheck | 健康检查对齐 deps | + +## 8. 约束与待澄清(Open Questions) +- `tenantId` 的承载方式:本规范要求在请求 `metadata.tenantId` 中提供;后续 `openapi.provider.yaml` 需将其提升为明确字段(是否提升为顶层字段需评审)。 +- streaming 协商方式:`Accept: text/event-stream` vs `stream=true` 参数;下一阶段在 provider OpenAPI 中确定主方案。 +- `confidence` 计算方式与默认阈值:MVP 先给默认值与可配置项,后续可基于日志/评测迭代。 +- `shouldTransfer` 的策略:AI 中台提供"建议",最终转人工编排由上游业务实现。 + +## 9. 迭代需求:前后端联调真实对接(v0.2.0) + +> 说明:本节为 v0.2.0 迭代新增,用于支持 ai-service-admin 前端与后端的真实对接,替换原有 Mock 实现。 + +### 9.1 知识库管理真实对接 + +- [AC-AISVC-21] WHEN 前端通过 `POST /admin/kb/documents` 上传文档 THEN AI 中台 SHALL 将文档存储到本地文件系统,创建 Document 实体记录,并返回 `jobId` 用于追踪索引任务。 + +- [AC-AISVC-22] WHEN 文档上传成功后 THEN AI 中台 SHALL 异步启动索引任务,将文档内容分块并向量化存储到 Qdrant(按 tenantId 隔离 Collection)。 + +- [AC-AISVC-23] WHEN 前端通过 `GET /admin/kb/documents` 查询文档列表 THEN AI 中台 SHALL 从 PostgreSQL 数据库查询 Document 实体,支持按 kbId、status 过滤和分页。 + +- [AC-AISVC-24] WHEN 前端通过 `GET /admin/kb/index/jobs/{jobId}` 查询索引任务状态 THEN AI 中台 SHALL 返回任务状态(pending/processing/completed/failed)、进度百分比及错误信息。 + +### 9.2 RAG 实验室真实对接 + +- [AC-AISVC-25] WHEN 前端通过 `POST /admin/rag/experiments/run` 触发 RAG 实验 THEN AI 中台 SHALL 调用 VectorRetriever 进行真实向量检索,返回检索结果列表(content、score、source)及最终拼接的 Prompt。 + +- [AC-AISVC-26] WHEN RAG 实验检索失败(如 Qdrant 不可用)THEN AI 中台 SHALL 返回 fallback 结果而非抛出异常,确保前端可正常展示。 + +### 9.3 会话监控真实对接 + +- [AC-AISVC-27] WHEN 前端通过 `GET /admin/sessions` 查询会话列表 THEN AI 中台 SHALL 从 PostgreSQL 数据库查询 ChatSession 实体,支持按 status、时间范围过滤和分页,并关联统计消息数量。 + +- [AC-AISVC-28] WHEN 前端通过 `GET /admin/sessions/{sessionId}` 查询会话详情 THEN AI 中台 SHALL 返回该会话的所有消息记录及追踪信息(trace)。 + +### 9.4 需求追踪映射(迭代追加) + +| AC ID | Endpoint | 方法 | operationId | 备注 | +|------|----------|------|-------------|------| +| AC-AISVC-21 | /admin/kb/documents | POST | uploadDocument | 文档上传真实存储 | +| AC-AISVC-22 | /admin/kb/documents | POST | uploadDocument | 异步索引任务 | +| AC-AISVC-23 | /admin/kb/documents | GET | listDocuments | 文档列表真实查询 | +| AC-AISVC-24 | /admin/kb/index/jobs/{jobId} | GET | getIndexJob | 索引任务状态查询 | +| AC-AISVC-25 | /admin/rag/experiments/run | POST | runRagExperiment | RAG 真实检索 | +| AC-AISVC-26 | /admin/rag/experiments/run | POST | runRagExperiment | 检索失败 fallback | +| AC-AISVC-27 | /admin/sessions | GET | listSessions | 会话列表真实查询 | +| AC-AISVC-28 | /admin/sessions/{sessionId} | GET | getSessionDetail | 会话详情真实查询 | + +## 10. 迭代需求:嵌入模型可插拔与文档解析支持(v0.3.0) + +> 说明:本节为 v0.3.0 迭代新增,用于支持嵌入模型的灵活配置与多格式文档解析。 + +### 10.1 嵌入模型可插拔设计 + +- [AC-AISVC-29] WHEN 系统需要生成文本嵌入向量 THEN 系统 SHALL 通过统一的 `EmbeddingProvider` 抽象接口调用,支持运行时动态切换不同的嵌入模型实现。 + +- [AC-AISVC-30] WHEN 管理员通过配置或界面指定嵌入模型类型(如 `ollama`、`openai`、`local`)THEN 系统 SHALL 自动加载对应的 EmbeddingProvider 实现,无需修改代码。 + +- [AC-AISVC-31] WHEN 管理员通过界面配置嵌入模型参数(如 API 地址、模型名称、维度等)THEN 系统 SHALL 动态应用配置,支持热更新(无需重启服务)。 + +- [AC-AISVC-32] WHEN 嵌入模型调用失败 THEN 系统 SHALL 返回明确的错误信息,并支持配置 fallback 策略(如降级到备用模型或返回错误)。 + +### 10.2 文档解析服务 + +- [AC-AISVC-33] WHEN 用户上传 PDF 格式文档 THEN 系统 SHALL 使用文档解析服务提取纯文本内容,用于后续分块和向量化。 + +- [AC-AISVC-34] WHEN 用户上传 Word(.docx)格式文档 THEN 系统 SHALL 使用文档解析服务提取纯文本内容,保留段落结构。 + +- [AC-AISVC-35] WHEN 用户上传 Excel(.xlsx)格式文档 THEN 系统 SHALL 使用文档解析服务提取表格内容,转换为结构化文本格式。 + +- [AC-AISVC-36] WHEN 文档解析失败(如文件损坏、格式不支持)THEN 系统 SHALL 返回明确的错误信息,并标记文档索引任务为 failed 状态。 + +- [AC-AISVC-37] WHEN 用户上传不支持的文件格式 THEN 系统 SHALL 在上传阶段拒绝并返回 400 错误,提示支持的格式列表。 + +### 10.3 嵌入模型管理 API + +- [AC-AISVC-38] WHEN 前端通过 `GET /admin/embedding/providers` 查询可用的嵌入模型提供者 THEN 系统 SHALL 返回所有已注册的提供者列表及其配置参数定义。 + +- [AC-AISVC-39] WHEN 前端通过 `GET /admin/embedding/config` 查询当前嵌入模型配置 THEN 系统 SHALL 返回当前激活的提供者及其参数配置。 + +- [AC-AISVC-40] WHEN 前端通过 `PUT /admin/embedding/config` 更新嵌入模型配置 THEN 系统 SHALL 验证配置有效性,更新配置并返回成功状态。 + +- [AC-AISVC-41] WHEN 前端通过 `POST /admin/embedding/test` 测试嵌入模型连接 THEN 系统 SHALL 调用嵌入模型生成测试向量,返回连接状态和向量维度信息。 + +### 10.4 需求追踪映射(迭代追加) + +| AC ID | Endpoint | 方法 | operationId | 备注 | +|------|----------|------|-------------|------| +| AC-AISVC-29 | - | - | - | EmbeddingProvider 抽象接口设计 | +| AC-AISVC-30 | - | - | - | 工厂模式动态加载 | +| AC-AISVC-31 | /admin/embedding/config | PUT | updateEmbeddingConfig | 配置热更新 | +| AC-AISVC-32 | - | - | - | 错误处理与 fallback | +| AC-AISVC-33 | /admin/kb/documents | POST | uploadDocument | PDF 解析支持 | +| AC-AISVC-34 | /admin/kb/documents | POST | uploadDocument | Word 解析支持 | +| AC-AISVC-35 | /admin/kb/documents | POST | uploadDocument | Excel 解析支持 | +| AC-AISVC-36 | /admin/kb/documents | POST | uploadDocument | 解析失败处理 | +| AC-AISVC-37 | /admin/kb/documents | POST | uploadDocument | 格式校验 | +| AC-AISVC-38 | /admin/embedding/providers | GET | listEmbeddingProviders | 提供者列表 | +| AC-AISVC-39 | /admin/embedding/config | GET | getEmbeddingConfig | 当前配置查询 | +| AC-AISVC-40 | /admin/embedding/config | PUT | updateEmbeddingConfig | 配置更新 | +| AC-AISVC-41 | /admin/embedding/test | POST | testEmbedding | 连接测试 | + +--- + +## 11. 迭代需求:LLM 模型配置与 RAG 调试输出(v0.4.0) + +> 说明:本节为 v0.4.0 迭代新增,用于支持 LLM 模型的界面配置及 RAG 实验室的 AI 输出调试。 + +### 11.1 LLM 模型配置管理 + +- [AC-AISVC-42] WHEN 前端通过 `GET /admin/llm/providers` 获取 LLM 提供者列表 THEN 系统 SHALL 返回所有支持的 LLM 提供者及其配置参数定义。 + +- [AC-AISVC-43] WHEN 前端通过 `GET /admin/llm/config` 获取当前 LLM 配置 THEN 系统 SHALL 返回当前激活的 LLM 提供者及其配置参数(敏感字段脱敏)。 + +- [AC-AISVC-44] WHEN 前端通过 `PUT /admin/llm/config` 更新 LLM 配置 THEN 系统 SHALL 验证配置有效性,更新配置并立即生效。 + +- [AC-AISVC-45] WHEN 前端通过 `POST /admin/llm/test` 测试 LLM 连接 THEN 系统 SHALL 调用 LLM 生成测试回复,返回响应内容、Token 消耗和延迟统计。 + +- [AC-AISVC-46] WHEN LLM 连接测试失败 THEN 系统 SHALL 返回详细错误信息,帮助用户排查配置问题。 + +### 11.2 RAG 实验室 AI 输出增强 + +- [AC-AISVC-47] WHEN 前端通过 `POST /admin/rag/experiments/run` 运行 RAG 实验 THEN 系统 SHALL 返回检索结果、最终 Prompt 和 AI 回复。 + +- [AC-AISVC-48] WHEN 前端通过 `POST /admin/rag/experiments/stream` 运行 RAG 实验 THEN 系统 SHALL 以 SSE 流式输出 AI 回复。 + +- [AC-AISVC-49] WHEN RAG 实验生成 AI 回复 THEN 系统 SHALL 返回 Token 消耗统计和响应耗时。 + +- [AC-AISVC-50] WHEN RAG 实验请求指定 `llm_provider` THEN 系统 SHALL 使用指定的 LLM 提供者生成回复。 + +### 11.3 追踪映射(v0.4.0 迭代) + +| AC ID | Endpoint | 方法 | Operation | 描述 | +|-------|----------|------|-----------|------| +| AC-AISVC-42 | /admin/llm/providers | GET | listLLMProviders | LLM 提供者列表 | +| AC-AISVC-43 | /admin/llm/config | GET | getLLMConfig | 当前 LLM 配置查询 | +| AC-AISVC-44 | /admin/llm/config | PUT | updateLLMConfig | LLM 配置更新 | +| AC-AISVC-45 | /admin/llm/test | POST | testLLM | LLM 连接测试 | +| AC-AISVC-46 | /admin/llm/test | POST | testLLM | LLM 测试失败处理 | +| AC-AISVC-47 | /admin/rag/experiments/run | POST | runRagExperiment | RAG 实验含 AI 输出 | +| AC-AISVC-48 | /admin/rag/experiments/stream | POST | runRagExperimentStream | RAG 实验流式输出 | +| AC-AISVC-49 | /admin/rag/experiments/run | POST | runRagExperiment | Token 统计 | +| AC-AISVC-50 | /admin/rag/experiments/run | POST | runRagExperiment | 指定 LLM 提供者 | diff --git a/spec/ai-service/tasks.md b/spec/ai-service/tasks.md new file mode 100644 index 0000000..7916cfe --- /dev/null +++ b/spec/ai-service/tasks.md @@ -0,0 +1,153 @@ +--- +feature_id: "AISVC" +title: "Python AI 中台(ai-service)任务清单" +status: "completed" +version: "0.5.0" +last_updated: "2026-02-25" +--- + +# Python AI 中台任务清单(AISVC) + +## 1. 任务拆分原则 +- **原子性**:每个任务仅解决一个具体技术点或功能逻辑。 +- **可验证性**:任务完成后必须可通过单元测试、接口冒烟或契约校验。 +- **弱模型可执行**:任务描述清晰,不依赖 AI 猜测业务逻辑。 + +## 2. 任务执行计划 + +### Phase 1: 基础设施(FastAPI 框架与多租户基础) +- [x] T1.1 初始化 FastAPI 项目骨架,配置基础环境与日志(包含 X-Tenant-Id 记录) `[AC-AISVC-01]` ✅ +- [x] T1.2 实现 `X-Tenant-Id` Header 拦截器,校验必填性并注入 Request State `[AC-AISVC-10, AC-AISVC-12]` ✅ +- [x] T1.3 定义基础响应模型 `ErrorResponse` 与异常处理器(Exception Handler) `[AC-AISVC-03, AC-AISVC-04]` ✅ +- [x] T1.4 初始化 PostgreSQL 数据库客户端(SQLModel/SQLAlchemy),支持租户隔离查询逻辑 `[AC-AISVC-11]` ✅ +- [x] T1.5 初始化 Qdrant 客户端,封装按租户动态选择 Collection 的工具函数 `[AC-AISVC-10]` ✅ +- [x] T1.6 实现 `/ai/health` 健康检查接口 `[AC-AISVC-20]` ✅ + +### Phase 2: 存储与检索实现(Memory & Retrieval) +- [x] T2.1 实现 Memory 层:定义 `chat_sessions` 与 `chat_messages` SQLModel 实体 `[AC-AISVC-13]` ✅ +- [x] T2.2 实现 Memory 层:完成基于 `(tenant_id, session_id)` 的历史消息加载与追加 API `[AC-AISVC-13]` ✅ +- [x] T2.3 实现 Retrieval 层:定义 `BaseRetriever` 抽象基类(插件点预留) `[AC-AISVC-16]` ✅ +- [x] T2.4 实现 `VectorRetriever`:集成 `qdrant-client` 完成向量检索,支持 scoreThreshold 过滤 `[AC-AISVC-16, AC-AISVC-17]` ✅ +- [x] T2.5 编写 Memory 与 Retrieval 层的独立单元测试(Mock 数据库与向量库) `[AC-AISVC-10, AC-AISVC-11]` ✅ + +### Phase 3: 核心编排(Orchestrator & LLM Adapter) +- [x] T3.1 实现 LLM Adapter:封装 `langchain-openai` 或官方 SDK,支持 `generate` 与 `stream_generate` `[AC-AISVC-02, AC-AISVC-06]` ✅ +- [x] T3.2 实现 Orchestrator:实现上下文合并逻辑(H_local + H_ext 的去重与截断策略) `[AC-AISVC-14, AC-AISVC-15]` ✅ +- [x] T3.3 实现 Orchestrator:实现 RAG 检索不足时的置信度下调与 `shouldTransfer` 逻辑 `[AC-AISVC-17, AC-AISVC-18, AC-AISVC-19]` ✅ +- [x] T3.4 实现 Orchestrator:整合 Memory、Retrieval 与 LLM 完成 non-streaming 生成闭环 `[AC-AISVC-01, AC-AISVC-02]` ✅ +- [x] T3.5 验证 non-streaming 响应字段完全符合 `openapi.provider.yaml` 契约 `[AC-AISVC-02]` ✅ + +### Phase 4: 流式响应(SSE 实现与状态机) +- [x] T4.1 在 API 层实现基于 `Accept` 头的响应模式自动切换逻辑 `[AC-AISVC-06]` ✅ +- [x] T4.2 实现 SSE 事件生成器:根据 Orchestrator 的增量输出包装 `message` 事件 `[AC-AISVC-07]` ✅ +- [x] T4.3 实现 SSE 状态机:确保 `final` 或 `error` 事件后连接正确关闭,且顺序不乱 `[AC-AISVC-08, AC-AISVC-09]` ✅ +- [x] T4.4 实现流式输出过程中的异常捕获,并转化为 `event: error` 输出 `[AC-AISVC-09]` ✅ + +### Phase 5: 集成与冒烟测试(Quality Assurance) +- [x] T5.1 编写集成测试:模拟多租户并发请求,验证数据存储与检索的严格物理/逻辑隔离 `[AC-AISVC-10, AC-AISVC-11]` ✅ +- [x] T5.2 编写 RAG 冒烟测试:模拟"检索命中"与"检索未命中"两种场景,验证 confidence 变化与回复兜底 `[AC-AISVC-17, AC-AISVC-18]` ✅ +- [x] T5.3 契约测试:验证 provider 契约一致性 `[AC-AISVC-01, AC-AISVC-02]` ✅ + +### Phase 6: 前后端联调真实对接(v0.2.0 迭代) +- [x] T6.1 定义知识库相关实体:`KnowledgeBase`、`Document`、`IndexJob` SQLModel 实体 `[AC-AISVC-21, AC-AISVC-22, AC-AISVC-23, AC-AISVC-24]` ✅ +- [x] T6.2 实现 `KBService`:文档上传、存储、列表查询、索引任务状态查询 `[AC-AISVC-21, AC-AISVC-23, AC-AISVC-24]` ✅ +- [x] T6.3 实现知识库管理 API:`POST /admin/kb/documents` 真实文件存储与异步索引 `[AC-AISVC-21, AC-AISVC-22]` ✅ +- [x] T6.4 实现知识库管理 API:`GET /admin/kb/documents` 真实数据库查询 `[AC-AISVC-23]` ✅ +- [x] T6.5 实现知识库管理 API:`GET /admin/kb/index/jobs/{jobId}` 真实任务状态查询 `[AC-AISVC-24]` ✅ +- [x] T6.6 实现 RAG 实验室 API:`POST /admin/rag/experiments/run` 真实向量检索 `[AC-AISVC-25, AC-AISVC-26]` ✅ +- [x] T6.7 实现会话监控 API:`GET /admin/sessions` 真实会话列表查询 `[AC-AISVC-27]` ✅ +- [x] T6.8 实现会话监控 API:`GET /admin/sessions/{sessionId}` 真实会话详情查询 `[AC-AISVC-28]` ✅ +- [x] T6.9 前后端联调验证:确认前端页面正常调用后端真实接口 ✅ + +--- + +## 3. 待澄清(Open Questions) + +> ✅ 已确认:以下事项均已由产品/架构反馈确认,可直接作为实现基准。 + +1. ✅ **Collection 初始化**:采用**提前预置**模式,不通过业务请求动态创建。 +2. ✅ **超时策略**:Python 内部设置 **20s 硬超时**,防止资源泄露与请求堆积。 +3. ✅ **SSE 心跳**:必须实现 `: ping` 机制(Keep-alive),防止网关/中间件断开连接。 +4. ✅ **置信度**:MVP 优先基于 RAG 检索分数(Score)计算 `confidence`。 +5. ✅ **Token 计数**:统一使用 `tiktoken` 库进行精确 Token 计数(用于 history 截断与证据预算)。 + +--- + +## 4. 任务状态说明 +- ⏳ 待处理 (Pending) +- 🔄 进行中 (In Progress) +- ✅ 已完成 (Completed) +- ❌ 已取消 (Cancelled) + +--- + +## 5. 完成总结 + +**Phase 1-9 已全部完成** + +| Phase | 描述 | 任务数 | 状态 | +|-------|------|--------|------| +| Phase 1 | 基础设施 | 6 | ✅ 完成 | +| Phase 2 | 存储与检索 | 5 | ✅ 完成 | +| Phase 3 | 核心编排 | 5 | ✅ 完成 | +| Phase 4 | 流式响应 | 4 | ✅ 完成 | +| Phase 5 | 集成测试 | 3 | ✅ 完成 | +| Phase 6 | 前后端联调真实对接 | 9 | ✅ 完成 | +| Phase 7 | 嵌入模型可插拔与文档解析 | 21 | ✅ 完成 | +| Phase 8 | LLM 配置与 RAG 调试输出 | 10 | ✅ 完成 | +| Phase 9 | 租户管理与 RAG 优化 | 10 | ✅ 完成 | + +**已完成: 73 个任务** + +--- + +### Phase 7: 嵌入模型可插拔与文档解析支持(v0.3.0 迭代) +- [x] T7.1 设计 `EmbeddingProvider` 抽象基类:定义 `embed()`、`embed_batch()`、`get_dimension()` 接口 `[AC-AISVC-29]` ✅ +- [x] T7.2 实现 `EmbeddingProviderFactory` 工厂类:支持根据配置动态加载提供者 `[AC-AISVC-30]` ✅ +- [x] T7.3 实现 `OllamaEmbeddingProvider`:封装 Ollama API 调用 `[AC-AISVC-29, AC-AISVC-30]` ✅ +- [x] T7.4 实现 `OpenAIEmbeddingProvider`:封装 OpenAI Embedding API `[AC-AISVC-29, AC-AISVC-30]` ✅ +- [x] T7.5 实现嵌入配置管理:支持动态配置与热更新 `[AC-AISVC-31]` ✅ +- [x] T7.6 实现嵌入模型错误处理与 fallback 策略 `[AC-AISVC-32]` ✅ +- [x] T7.7 实现 `DocumentParser` 抽象接口:定义 `parse()` 方法返回纯文本 `[AC-AISVC-33]` ✅ +- [x] T7.8 实现 `PDFParser`:使用 PyMuPDF/pdfplumber 解析 PDF `[AC-AISVC-33]` ✅ +- [x] T7.9 实现 `WordParser`:使用 python-docx 解析 Word 文档 `[AC-AISVC-34]` ✅ +- [x] T7.10 实现 `ExcelParser`:使用 openpyxl 解析 Excel 文档 `[AC-AISVC-35]` ✅ +- [x] T7.11 实现 `DocumentParserFactory`:根据文件扩展名选择解析器 `[AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]` ✅ +- [x] T7.12 实现文档解析错误处理与格式校验 `[AC-AISVC-36, AC-AISVC-37]` ✅ +- [x] T7.13 实现 `GET /admin/embedding/providers` API:返回可用提供者列表 `[AC-AISVC-38]` ✅ +- [x] T7.14 实现 `GET /admin/embedding/config` API:返回当前配置 `[AC-AISVC-39]` ✅ +- [x] T7.15 实现 `PUT /admin/embedding/config` API:更新配置 `[AC-AISVC-40]` ✅ +- [x] T7.16 实现 `POST /admin/embedding/test` API:测试嵌入连接 `[AC-AISVC-41]` ✅ +- [x] T7.17 集成嵌入服务到索引流程:替换现有硬编码 Ollama 调用 `[AC-AISVC-29]` ✅ +- [x] T7.18 集成文档解析到上传流程:支持多格式文档上传 `[AC-AISVC-33, AC-AISVC-34, AC-AISVC-35]` ✅ +- [x] T7.19 编写嵌入服务单元测试 `[AC-AISVC-29, AC-AISVC-30, AC-AISVC-31, AC-AISVC-32]` ✅ +- [x] T7.20 编写文档解析单元测试 `[AC-AISVC-33, AC-AISVC-34, AC-AISVC-35, AC-AISVC-36, AC-AISVC-37]` ✅ +- [x] T7.21 编写嵌入管理 API 集成测试 `[AC-AISVC-38, AC-AISVC-39, AC-AISVC-40, AC-AISVC-41]` ✅ + +--- + +### Phase 8: LLM 配置与 RAG 调试输出(v0.4.0 迭代) +- [x] T8.1 设计 `LLMProviderFactory` 工厂类:支持根据配置动态加载提供者 `[AC-AISVC-42]` ✅ +- [x] T8.2 实现 `LLMConfigManager` 配置管理:支持动态配置与热更新 `[AC-AISVC-43, AC-AISVC-44]` ✅ +- [x] T8.3 实现 `GET /admin/llm/providers` API:返回可用提供者列表 `[AC-AISVC-42]` ✅ +- [x] T8.4 实现 `GET /admin/llm/config` API:返回当前配置 `[AC-AISVC-43]` ✅ +- [x] T8.5 实现 `PUT /admin/llm/config` API:更新配置 `[AC-AISVC-44]` ✅ +- [x] T8.6 实现 `POST /admin/llm/test` API:测试 LLM 连接 `[AC-AISVC-45, AC-AISVC-46]` ✅ +- [x] T8.7 更新 RAG 实验接口:支持 AI 回复生成 `[AC-AISVC-47, AC-AISVC-49]` ✅ +- [x] T8.8 实现 RAG 实验流式输出:SSE 流式 AI 回复 `[AC-AISVC-48]` ✅ +- [x] T8.9 支持指定 LLM 提供者:RAG 实验可选择不同 LLM `[AC-AISVC-50]` ✅ +- [x] T8.10 更新 OpenAPI 契约:添加 LLM 管理和 RAG 实验增强接口 ✅ + +--- + +### Phase 9: 租户管理与 RAG 优化(v0.5.0 迭代) +- [x] T9.1 实现 `Tenant` 实体:定义租户数据模型 `[AC-AISVC-10]` ✅ +- [x] T9.2 实现租户 ID 格式校验:`name@ash@year` 格式验证 `[AC-AISVC-10, AC-AISVC-12]` ✅ +- [x] T9.3 实现租户自动创建:请求时自动创建不存在的租户 `[AC-AISVC-10]` ✅ +- [x] T9.4 实现 `GET /admin/tenants` API:返回租户列表 `[AC-AISVC-10]` ✅ +- [x] T9.5 前端租户选择器:实现租户切换功能 `[AC-ASA-01]` ✅ +- [x] T9.6 文档多编码支持:支持 UTF-8、GBK、GB2312 等编码解码 `[AC-AISVC-21]` ✅ +- [x] T9.7 按行分块功能:实现 `chunk_text_by_lines` 函数 `[AC-AISVC-22]` ✅ +- [x] T9.8 实现 `NomicEmbeddingProvider`:支持多维度向量 `[AC-AISVC-29]` ✅ +- [x] T9.9 实现多向量存储:支持 full/256/512 三种维度 `[AC-AISVC-16]` ✅ +- [x] T9.10 实现 `KnowledgeIndexer`:优化的知识库索引服务 `[AC-AISVC-22]` ✅ diff --git a/test-doc.txt b/test-doc.txt new file mode 100644 index 0000000..59ecf46 --- /dev/null +++ b/test-doc.txt @@ -0,0 +1,18 @@ +这是一个测试文档,用于验证RAG检索功能。 + +世界设定: +这是一个奇幻世界,名为艾泽拉斯。这个世界由多个大陆组成,包括东部王国、卡利姆多和诺森德。 + +主要种族: +1. 人类 - 居住在东部王国,拥有强大的骑士和法师 +2. 精灵 - 分为暗夜精灵和高等精灵,擅长弓箭和魔法 +3. 矮人 - 居住在山脉中,善于锻造和采矿 +4. 兽人 - 来自外域,拥有强大的战士 + +魔法系统: +这个世界充满了魔法能量,法师可以从空气中汲取魔力施放法术。 +主要魔法类型包括:火焰、冰霜、奥术、暗影和神圣。 + +历史背景: +这个世界经历了多次大战,最近的一次是天灾军团的入侵。 +巫妖王阿尔萨斯率领亡灵大军试图征服整个世界。 \ No newline at end of file