""" Check Qdrant vector database contents - detailed view. """ import asyncio import sys sys.path.insert(0, ".") from qdrant_client import AsyncQdrantClient from app.core.config import get_settings from collections import defaultdict settings = get_settings() async def check_qdrant(): """Check Qdrant collections and vectors.""" client = AsyncQdrantClient(url=settings.qdrant_url, check_compatibility=False) print(f"\n{'='*60}") print(f"Qdrant URL: {settings.qdrant_url}") print(f"{'='*60}\n") # List all collections collections = await client.get_collections() # Check kb_default collection for c in collections.collections: if c.name == "kb_default": print(f"\n--- Collection: {c.name} ---") # Get collection info info = await client.get_collection(c.name) print(f" Total vectors: {info.points_count}") # Scroll through all points and group by source all_points = [] offset = None while True: points, offset = await client.scroll( collection_name=c.name, limit=100, offset=offset, with_payload=True, with_vectors=False, ) all_points.extend(points) if offset is None: break # Group by source by_source = defaultdict(list) for p in all_points: source = p.payload.get("source", "unknown") if p.payload else "unknown" by_source[source].append(p) print(f"\n Documents by source:") for source, points in by_source.items(): print(f"\n Source: {source}") print(f" Chunks: {len(points)}") # Check first chunk content first_point = points[0] text = first_point.payload.get("text", "") if first_point.payload else "" # Check if it's binary garbage or proper text is_garbage = any(ord(c) > 0xFFFF or (ord(c) < 32 and c not in '\n\r\t') for c in text[:200]) if is_garbage: print(f" Status: ❌ BINARY GARBAGE (parsing failed)") else: print(f" Status: ✅ PROPER TEXT (parsed correctly)") print(f" Preview: {text[:150]}...") await client.close() if __name__ == "__main__": asyncio.run(check_qdrant())