41 lines
1.1 KiB
Python
41 lines
1.1 KiB
Python
|
|
"""
|
||
|
|
Test Excel parsing directly.
|
||
|
|
"""
|
||
|
|
import sys
|
||
|
|
sys.path.insert(0, ".")
|
||
|
|
|
||
|
|
from app.services.document import parse_document, get_supported_document_formats
|
||
|
|
|
||
|
|
print("Supported formats:", get_supported_document_formats())
|
||
|
|
print()
|
||
|
|
|
||
|
|
# Test with a sample xlsx file if available
|
||
|
|
import os
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
# Find any xlsx files in the uploads directory
|
||
|
|
uploads_dir = Path("uploads")
|
||
|
|
if uploads_dir.exists():
|
||
|
|
xlsx_files = list(uploads_dir.glob("**/*.xlsx"))
|
||
|
|
print(f"Found {len(xlsx_files)} xlsx files")
|
||
|
|
|
||
|
|
for f in xlsx_files[:1]: # Test first one
|
||
|
|
print(f"\nTesting: {f}")
|
||
|
|
try:
|
||
|
|
result = parse_document(str(f))
|
||
|
|
print(f" SUCCESS: chars={len(result.text)}")
|
||
|
|
print(f" metadata: {result.metadata}")
|
||
|
|
print(f" preview: {result.text[:500]}...")
|
||
|
|
except Exception as e:
|
||
|
|
print(f" FAILED: {type(e).__name__}: {e}")
|
||
|
|
else:
|
||
|
|
print("No uploads directory found")
|
||
|
|
|
||
|
|
# Test openpyxl directly
|
||
|
|
print("\n--- Testing openpyxl directly ---")
|
||
|
|
try:
|
||
|
|
import openpyxl
|
||
|
|
print(f"openpyxl version: {openpyxl.__version__}")
|
||
|
|
except ImportError as e:
|
||
|
|
print(f"openpyxl NOT installed: {e}")
|