ai-robot-core/ai-service/scripts/cleanup_collections.py

121 lines
3.6 KiB
Python
Raw Permalink Normal View History

"""
清理 szmp@ash@2026 租户下不需要的 Qdrant collections
保留8559ebc9-bfaf-4211-8fe3-ee2b22a5e29c, 30c19c84-8f69-4768-9d23-7f4a5bc3627a
删除其他所有 collections
"""
import asyncio
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from qdrant_client import AsyncQdrantClient
from app.core.config import get_settings
async def cleanup_collections():
"""清理 collections"""
settings = get_settings()
client = AsyncQdrantClient(url=settings.qdrant_url)
tenant_id = "szmp@ash@2026"
safe_tenant_id = tenant_id.replace('@', '_')
prefix = f"kb_{safe_tenant_id}"
# 保留的 kb_id 前缀前8位
keep_kb_ids = [
"8559ebc9",
"30c19c84",
]
print(f"🔍 扫描租户 {tenant_id} 的 collections...")
print(f" 前缀: {prefix}")
print(f" 保留: {keep_kb_ids}")
print("-" * 80)
try:
collections = await client.get_collections()
# 找出该租户的所有 collections
tenant_collections = [
c.name for c in collections.collections
if c.name.startswith(prefix)
]
print(f"\n📊 找到 {len(tenant_collections)} 个 collections:")
for name in sorted(tenant_collections):
# 检查是否需要保留
should_keep = any(kb_id in name for kb_id in keep_kb_ids)
status = "✅ 保留" if should_keep else "❌ 删除"
print(f" {status} {name}")
print("\n" + "=" * 80)
print("开始删除...")
print("=" * 80)
deleted = []
skipped = []
for collection_name in tenant_collections:
# 检查是否需要保留
should_keep = any(kb_id in collection_name for kb_id in keep_kb_ids)
if should_keep:
print(f"\n⏭️ 跳过 {collection_name} (保留)")
skipped.append(collection_name)
continue
print(f"\n🗑️ 删除 {collection_name}...")
try:
await client.delete_collection(collection_name)
print(f" ✅ 已删除")
deleted.append(collection_name)
except Exception as e:
print(f" ❌ 删除失败: {e}")
print("\n" + "=" * 80)
print("清理完成!")
print("=" * 80)
print(f"\n📈 统计:")
print(f" 保留: {len(skipped)}")
for name in skipped:
print(f" - {name}")
print(f"\n 删除: {len(deleted)}")
for name in deleted:
print(f" - {name}")
except Exception as e:
print(f"\n❌ 错误: {e}")
import traceback
traceback.print_exc()
finally:
await client.close()
if __name__ == "__main__":
# 安全确认
print("=" * 80)
print("⚠️ 警告: 此操作将永久删除以下 collections:")
print(" - kb_szmp_ash_2026")
print(" - kb_szmp_ash_2026_fa4c1d61")
print(" - kb_szmp_ash_2026_3ddf0ce7")
print("\n 保留:")
print(" - kb_szmp_ash_2026_8559ebc9")
print(" - kb_szmp_ash_2026_30c19c84")
print("=" * 80)
print("\n确认删除? (yes/no): ", end="")
# 在非交互环境自动确认
import os
if os.environ.get('AUTO_CONFIRM') == 'true':
response = 'yes'
print('yes (auto)')
else:
response = input().strip().lower()
if response in ('yes', 'y'):
asyncio.run(cleanup_collections())
else:
print("\n❌ 已取消")