ai-robot-core/ai-service/scripts/cleanup_collections.py

121 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
清理 szmp@ash@2026 租户下不需要的 Qdrant collections
保留8559ebc9-bfaf-4211-8fe3-ee2b22a5e29c, 30c19c84-8f69-4768-9d23-7f4a5bc3627a
删除:其他所有 collections
"""
import asyncio
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from qdrant_client import AsyncQdrantClient
from app.core.config import get_settings
async def cleanup_collections():
"""清理 collections"""
settings = get_settings()
client = AsyncQdrantClient(url=settings.qdrant_url)
tenant_id = "szmp@ash@2026"
safe_tenant_id = tenant_id.replace('@', '_')
prefix = f"kb_{safe_tenant_id}"
# 保留的 kb_id 前缀前8位
keep_kb_ids = [
"8559ebc9",
"30c19c84",
]
print(f"🔍 扫描租户 {tenant_id} 的 collections...")
print(f" 前缀: {prefix}")
print(f" 保留: {keep_kb_ids}")
print("-" * 80)
try:
collections = await client.get_collections()
# 找出该租户的所有 collections
tenant_collections = [
c.name for c in collections.collections
if c.name.startswith(prefix)
]
print(f"\n📊 找到 {len(tenant_collections)} 个 collections:")
for name in sorted(tenant_collections):
# 检查是否需要保留
should_keep = any(kb_id in name for kb_id in keep_kb_ids)
status = "✅ 保留" if should_keep else "❌ 删除"
print(f" {status} {name}")
print("\n" + "=" * 80)
print("开始删除...")
print("=" * 80)
deleted = []
skipped = []
for collection_name in tenant_collections:
# 检查是否需要保留
should_keep = any(kb_id in collection_name for kb_id in keep_kb_ids)
if should_keep:
print(f"\n⏭️ 跳过 {collection_name} (保留)")
skipped.append(collection_name)
continue
print(f"\n🗑️ 删除 {collection_name}...")
try:
await client.delete_collection(collection_name)
print(f" ✅ 已删除")
deleted.append(collection_name)
except Exception as e:
print(f" ❌ 删除失败: {e}")
print("\n" + "=" * 80)
print("清理完成!")
print("=" * 80)
print(f"\n📈 统计:")
print(f" 保留: {len(skipped)}")
for name in skipped:
print(f" - {name}")
print(f"\n 删除: {len(deleted)}")
for name in deleted:
print(f" - {name}")
except Exception as e:
print(f"\n❌ 错误: {e}")
import traceback
traceback.print_exc()
finally:
await client.close()
if __name__ == "__main__":
# 安全确认
print("=" * 80)
print("⚠️ 警告: 此操作将永久删除以下 collections:")
print(" - kb_szmp_ash_2026")
print(" - kb_szmp_ash_2026_fa4c1d61")
print(" - kb_szmp_ash_2026_3ddf0ce7")
print("\n 保留:")
print(" - kb_szmp_ash_2026_8559ebc9")
print(" - kb_szmp_ash_2026_30c19c84")
print("=" * 80)
print("\n确认删除? (yes/no): ", end="")
# 在非交互环境自动确认
import os
if os.environ.get('AUTO_CONFIRM') == 'true':
response = 'yes'
print('yes (auto)')
else:
response = input().strip().lower()
if response in ('yes', 'y'):
asyncio.run(cleanup_collections())
else:
print("\n❌ 已取消")