diff --git a/ai-service/migrations/003_user_memories.py b/ai-service/migrations/003_user_memories.py new file mode 100644 index 0000000..c1999c6 --- /dev/null +++ b/ai-service/migrations/003_user_memories.py @@ -0,0 +1,75 @@ +""" +Database Migration: User Memories Table. +[AC-IDMP-14] 用户级记忆滚动摘要表 + +创建时间: 2025-03-08 +变更说明: +- 新增 user_memories 表用于存储滚动摘要与事实/偏好/未解决问题 + +执行方式: +- SQLModel 会自动创建表(通过 init_db) +- 此脚本用于手动迁移或回滚 + +SQL DDL: +```sql +CREATE TABLE user_memories ( + id UUID PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + user_id VARCHAR NOT NULL, + summary TEXT, + facts JSON, + preferences JSON, + open_issues JSON, + summary_version INTEGER NOT NULL DEFAULT 1, + last_turn_id VARCHAR, + expires_at TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE INDEX ix_user_memories_tenant_user ON user_memories(tenant_id, user_id); +CREATE INDEX ix_user_memories_tenant_user_updated ON user_memories(tenant_id, user_id, updated_at); +``` + +回滚 SQL: +```sql +DROP TABLE IF EXISTS user_memories; +``` +""" + +USER_MEMORIES_DDL = """ +CREATE TABLE IF NOT EXISTS user_memories ( + id UUID PRIMARY KEY, + tenant_id VARCHAR NOT NULL, + user_id VARCHAR NOT NULL, + summary TEXT, + facts JSON, + preferences JSON, + open_issues JSON, + summary_version INTEGER NOT NULL DEFAULT 1, + last_turn_id VARCHAR, + expires_at TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); +""" + +USER_MEMORIES_INDEXES = """ +CREATE INDEX IF NOT EXISTS ix_user_memories_tenant_user ON user_memories(tenant_id, user_id); +CREATE INDEX IF NOT EXISTS ix_user_memories_tenant_user_updated ON user_memories(tenant_id, user_id, updated_at); +""" + +USER_MEMORIES_ROLLBACK = """ +DROP TABLE IF EXISTS user_memories; +""" + + +async def upgrade(conn): + """执行迁移""" + await conn.execute(USER_MEMORIES_DDL) + await conn.execute(USER_MEMORIES_INDEXES) + + +async def downgrade(conn): + """回滚迁移""" + await conn.execute(USER_MEMORIES_ROLLBACK) diff --git a/ai-service/scripts/clear_kb_vectors.py b/ai-service/scripts/clear_kb_vectors.py new file mode 100644 index 0000000..7e148b5 --- /dev/null +++ b/ai-service/scripts/clear_kb_vectors.py @@ -0,0 +1,178 @@ +""" +Script to cleanup vector data for a specific knowledge base. +Clears the Qdrant collection for the given KB ID, allowing re-indexing. +""" + +import asyncio +import logging +import sys + +sys.path.insert(0, "Q:\\agentProject\\ai-robot-core\\ai-service") + +from app.core.config import get_settings +from app.core.qdrant_client import get_qdrant_client +from app.core.database import get_session +from app.models.entities import KnowledgeBase, Document +from sqlalchemy import select + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +async def get_knowledge_base_info(kb_id: str) -> dict | None: + """Get knowledge base information from database.""" + async for session in get_session(): + stmt = select(KnowledgeBase).where(KnowledgeBase.id == kb_id) + result = await session.execute(stmt) + kb = result.scalar_one_or_none() + + if kb: + doc_stmt = select(Document).where(Document.kb_id == kb_id) + doc_result = await session.execute(doc_stmt) + documents = doc_result.scalars().all() + + return { + "id": str(kb.id), + "tenant_id": kb.tenant_id, + "name": kb.name, + "doc_count": len(documents), + "document_ids": [str(doc.id) for doc in documents] + } + return None + + +async def list_kb_collections(tenant_id: str, kb_id: str) -> list[str]: + """List all collections that might be related to the KB.""" + client = await get_qdrant_client() + qdrant = await client.get_client() + + collections = await qdrant.get_collections() + all_names = [c.name for c in collections.collections] + + safe_tenant = tenant_id.replace('@', '_') + safe_kb = kb_id.replace('-', '_')[:8] + + matching = [ + name for name in all_names + if safe_kb in name or kb_id.replace('-', '')[:8] in name.replace('_', '') + ] + + return matching + + +async def clear_kb_vector_data(tenant_id: str, kb_id: str, delete_docs: bool = False) -> bool: + """ + Clear vector data for a specific knowledge base. + + Args: + tenant_id: Tenant identifier + kb_id: Knowledge base ID + delete_docs: Whether to also delete document records from database + + Returns: + True if successful + """ + client = await get_qdrant_client() + qdrant = await client.get_client() + + collection_name = client.get_kb_collection_name(tenant_id, kb_id) + + try: + exists = await qdrant.collection_exists(collection_name) + if exists: + await qdrant.delete_collection(collection_name=collection_name) + logger.info(f"Deleted Qdrant collection: {collection_name}") + else: + logger.info(f"Collection {collection_name} does not exist") + + if delete_docs: + async for session in get_session(): + doc_stmt = select(Document).where(Document.kb_id == kb_id) + doc_result = await session.execute(doc_stmt) + documents = doc_result.scalars().all() + + for doc in documents: + await session.delete(doc) + + stmt = select(KnowledgeBase).where(KnowledgeBase.id == kb_id) + result = await session.execute(stmt) + kb = result.scalar_one_or_none() + if kb: + kb.doc_count = 0 + kb.updated_at = datetime.utcnow() + + await session.commit() + logger.info(f"Deleted {len(documents)} document records from database") + break + + return True + + except Exception as e: + logger.error(f"Failed to clear KB vector data: {e}") + return False + + +async def main(kb_id: str, delete_docs: bool = False): + """Main function to clear KB vector data.""" + logger.info(f"Starting cleanup for knowledge base: {kb_id}") + + kb_info = await get_knowledge_base_info(kb_id) + + if not kb_info: + logger.error(f"Knowledge base not found: {kb_id}") + return False + + logger.info(f"Found knowledge base:") + logger.info(f" - ID: {kb_info['id']}") + logger.info(f" - Name: {kb_info['name']}") + logger.info(f" - Tenant: {kb_info['tenant_id']}") + logger.info(f" - Document count: {kb_info['doc_count']}") + + matching_collections = await list_kb_collections(kb_info['tenant_id'], kb_id) + if matching_collections: + logger.info(f" - Related collections: {matching_collections}") + + print() + print("=" * 60) + print("WARNING: This will delete all vector data for this knowledge base!") + print(f"Collection to delete: kb_{kb_info['tenant_id'].replace('@', '_')}_{kb_id.replace('-', '_')[:8]}") + if delete_docs: + print("Document records in database will also be deleted!") + print("=" * 60) + print() + + confirm = input("Continue? (yes/no): ") + if confirm.lower() != "yes": + print("Cancelled") + return False + + success = await clear_kb_vector_data( + tenant_id=kb_info['tenant_id'], + kb_id=kb_id, + delete_docs=delete_docs + ) + + if success: + logger.info(f"Successfully cleared vector data for KB: {kb_id}") + logger.info("You can now re-index the knowledge base documents.") + else: + logger.error(f"Failed to clear vector data for KB: {kb_id}") + + return success + + +if __name__ == "__main__": + import argparse + from datetime import datetime + + parser = argparse.ArgumentParser(description="Clear vector data for a knowledge base") + parser.add_argument("kb_id", help="Knowledge base ID to clear") + parser.add_argument("--delete-docs", action="store_true", + help="Also delete document records from database") + + args = parser.parse_args() + + asyncio.run(main(args.kb_id, args.delete_docs)) diff --git a/ai-service/svg/kefu.svg b/ai-service/svg/kefu.svg new file mode 100644 index 0000000..715c3c5 --- /dev/null +++ b/ai-service/svg/kefu.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ai-service/svg/user.svg b/ai-service/svg/user.svg new file mode 100644 index 0000000..c34afaa --- /dev/null +++ b/ai-service/svg/user.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ai-service/svg/用户1.svg b/ai-service/svg/用户1.svg new file mode 100644 index 0000000..7c83ed9 --- /dev/null +++ b/ai-service/svg/用户1.svg @@ -0,0 +1 @@ + \ No newline at end of file