-
-
Notifications
You must be signed in to change notification settings - Fork 89
[Backend] Lazy Model Initialization & Fault-Tolerant Startup (#302) #323
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
433d464
cee0c99
2c90658
3dbb670
1d982ea
e086781
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,27 @@ | ||
| """Loads and exports once the sentence transformer model.""" | ||
| """ | ||
| Embedding model lazy loader wrapper. | ||
|
|
||
| from rag.embedding.embedding_utils import load_embedding_model | ||
| from api.config.loader import CONFIG | ||
| from utils import LoggerFactory | ||
| This module provides access to the sentence transformer embedding model | ||
| through lazy initialization to avoid blocking startup with heavy model loads. | ||
|
|
||
| logger = LoggerFactory.instance().get_logger("api") | ||
| DEPRECATED: Direct access to EMBEDDING_MODEL global is no longer supported. | ||
| Use api.models.runtime_models.get_embedding_model() instead. | ||
| """ | ||
|
|
||
| EMBEDDING_MODEL = load_embedding_model(CONFIG["retrieval"]["embedding_model_name"], logger) | ||
| from api.models.runtime_models import get_embedding_model | ||
|
|
||
| # For backward compatibility during transition, provide a lazy property-like accessor | ||
| # but warn about the deprecated approach | ||
| def _get_embedding_model_compat(): | ||
| """Backward compatibility wrapper.""" | ||
| model = get_embedding_model() | ||
| if model is None: | ||
| raise RuntimeError( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we should raise Error |
||
| "Embedding model not available. " | ||
| "Check logs for initialization errors. " | ||
| "Consider using get_embedding_model() for graceful degraded mode." | ||
| ) | ||
| return model | ||
|
|
||
| # Module-level getter for compatibility, but do NOT eagerly load | ||
| EMBEDDING_MODEL = property(lambda self: _get_embedding_model_compat()) # pylint: disable=invalid-name | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| """ | ||
| Runtime lazy model loader with caching and error handling. | ||
|
|
||
| This module provides thread-safe, lazy initialization for heavy components | ||
| (embedding model and LLM provider) that should not be loaded at import time. | ||
| Models are initialized on first use and cached for subsequent calls. | ||
| """ | ||
|
|
||
| from threading import Lock | ||
| from typing import Optional, Dict, Any | ||
| from utils import LoggerFactory | ||
|
|
||
| logger = LoggerFactory.instance().get_logger("models") | ||
|
|
||
| # Caching and locking for lazy initialization | ||
| _models_cache: Dict[str, Any] = {} | ||
| _models_lock = Lock() | ||
| _models_errors: Dict[str, str] = {} | ||
|
|
||
|
|
||
| def get_embedding_model(): | ||
| """ | ||
| Lazily load and cache the sentence transformer embedding model. | ||
|
|
||
| Returns: | ||
| Optional[SentenceTransformer]: The loaded model, or None if initialization failed. | ||
| Check logs for error details. | ||
| """ | ||
| return _get_cached_model( | ||
| "embedding", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Define this on top of the file as a var. |
||
| _load_embedding_model_impl | ||
| ) | ||
|
|
||
|
|
||
| def get_llm_provider(): | ||
| """ | ||
| Lazily load and cache the LLM provider. | ||
|
|
||
| Returns: | ||
| Optional[LlamaCppProvider]: The loaded provider, or None if initialization failed. | ||
| Check logs for error details. | ||
| """ | ||
|
|
||
| return _get_cached_model( | ||
| "llm", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Define on top |
||
| _load_llm_provider_impl | ||
| ) | ||
|
|
||
|
|
||
| def get_models_status() -> Dict[str, Any]: | ||
| """ | ||
| Get the current status of all cached models (for health checks). | ||
|
|
||
| Returns: | ||
| dict: Status object with keys: | ||
| - embedding_available: bool | ||
| - llm_available: bool | ||
| - embedding_error: Optional[str] | ||
| - llm_error: Optional[str] | ||
| """ | ||
| return { | ||
| "embedding_available": "embedding" in _models_cache, | ||
| "llm_available": "llm" in _models_cache, | ||
| "embedding_error": _models_errors.get("embedding"), | ||
| "llm_error": _models_errors.get("llm"), | ||
| } | ||
|
|
||
|
|
||
| def _get_cached_model(model_name: str, loader_fn) -> Optional[Any]: | ||
| """ | ||
| Generic lazy loader with caching and thread-safe locking. | ||
|
|
||
| Args: | ||
| model_name: Key for caching (e.g., "embedding", "llm") | ||
| loader_fn: Callable that returns the loaded model or raises an exception | ||
|
|
||
| Returns: | ||
| The cached model instance, or None if initialization failed. | ||
| """ | ||
| if model_name in _models_cache: | ||
| logger.debug("%s model already cached, returning existing instance", model_name) | ||
| return _models_cache[model_name] | ||
|
|
||
| if model_name in _models_errors: | ||
| return None | ||
|
|
||
| with _models_lock: | ||
| # Double-check after acquiring lock | ||
| if model_name in _models_cache: | ||
| return _models_cache[model_name] | ||
| if model_name in _models_errors: | ||
| return None | ||
|
|
||
| try: | ||
| logger.info("Initializing %s model for the first time...", model_name) | ||
| model = loader_fn() | ||
| _models_cache[model_name] = model | ||
| logger.info("%s model initialized successfully", model_name) | ||
| return model | ||
| except (ImportError, RuntimeError, ValueError) as exc: | ||
| error_msg = f"Failed to initialize {model_name}: {type(exc).__name__}: {exc}" | ||
| _models_errors[model_name] = error_msg | ||
| logger.error(error_msg, exc_info=True) | ||
| return None | ||
|
|
||
|
|
||
| def _load_embedding_model_impl(): | ||
| """ | ||
| Internal: actually load the embedding model. | ||
| Called only on first use and under lock. | ||
| """ | ||
| # pylint: disable=import-outside-toplevel | ||
| from sentence_transformers import SentenceTransformer | ||
| from api.config.loader import CONFIG | ||
|
|
||
| model_name = CONFIG["retrieval"]["embedding_model_name"] | ||
| logger.debug("Loading embedding model: %s", model_name) | ||
| return SentenceTransformer(model_name) | ||
|
|
||
|
|
||
| def _load_llm_provider_impl(): | ||
| """ | ||
| Internal: actually load the LLM provider. | ||
| Called only on first use and under lock. | ||
| """ | ||
| # pylint: disable=import-outside-toplevel | ||
| from api.models.llama_cpp_provider import LlamaCppProvider | ||
| from api.config.loader import CONFIG | ||
|
|
||
| if CONFIG.get("is_test_mode", False): | ||
| logger.info("Test mode enabled: LLM provider will not be instantiated") | ||
| return None | ||
|
|
||
| logger.debug("Initializing LLM provider...") | ||
| return LlamaCppProvider() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why you ignore this pylint?