[docs]def_check_dependencies_once():""" Ensure dependencies are checked only once in a thread-safe and process-safe manner. This function is called automatically on module import to verify that all required dependencies (models, libraries, etc.) are available before document processing. This prevents runtime errors and provides early feedback about missing dependencies. A global variable tracks whether dependencies have been checked in the current thread. For process-level safety, a lock file at ~/.docviz/dependencies_checked.lock prevents multiple processes from performing the check simultaneously. Double-checked locking is used to minimize unnecessary locking and improve performance. The function handles different asyncio contexts: - Creates a new event loop if none exists - Uses asyncio.run() for clean execution - Handles cases where event loop is already running (e.g., Jupyter notebooks) Raises: Exception: If any required dependency is missing or the dependency check fails. The specific exception type depends on what dependency is missing (e.g., FileNotFoundError for missing models, ImportError for missing packages). """global__DEPENDENCIES_CHECKED# Use a lock file to ensure this runs only once across processeslock_file=get_docviz_directory()/"dependencies_checked.lock"lock_file.parent.mkdir(exist_ok=True)# Check if already verified in this session or globallyif__DEPENDENCIES_CHECKEDorlock_file.exists():returnwith__DEPENDENCIES_LOCK:# Double-check patternif__DEPENDENCIES_CHECKEDorlock_file.exists():returntry:_run_async_dependency_check()__DEPENDENCIES_CHECKED=Truelock_file.touch()exceptExceptionase:# If dependencies check fails, don't mark as checked# so it will retry next timeraisee
[docs]def_run_async_dependency_check():""" Run the async dependency check with proper event loop handling. This helper function handles different asyncio contexts gracefully: 1. If no event loop is running, use asyncio.run() (preferred modern approach) 2. If an event loop is already running (e.g., in Jupyter), create a new thread 3. Handle various edge cases and provide clear error messages Raises: RuntimeError: If dependency check fails after multiple attempts Exception: Original exception from check_dependencies() if it's not event loop related """try:asyncio.run(check_dependencies())exceptRuntimeErrorase:error_msg=str(e).lower()# Handle "asyncio.run() cannot be called from a running event loop"if"cannot be called from a running event loop"inerror_msg:# We're in an environment with a running event loop (e.g., Jupyter)# Run in a separate thread to avoid conflictsimportconcurrent.futuresdefrun_in_thread():loop=asyncio.new_event_loop()asyncio.set_event_loop(loop)try:returnloop.run_until_complete(check_dependencies())finally:loop.close()withconcurrent.futures.ThreadPoolExecutor()asexecutor:future=executor.submit(run_in_thread)future.result()else:# Re-raise other RuntimeErrorsraise
# Check dependencies on import_check_dependencies_once()__all__=["DetectionConfig","Document","ExtractionChunk","ExtractionConfig","ExtractionEntry","ExtractionResult","ExtractionType","LLMConfig","OCRConfig","SaveFormat","batch_extract","extract_content","extract_content_streaming","extract_content_streaming_sync","extract_content_sync",]