jenkinsci
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎chatbot-core/api/__init__.py‎ b/‎chatbot-core/api/__init__.py‎
diff --git a/‎chatbot-core/api/config/__init__.py‎ b/‎chatbot-core/api/config/__init__.py‎
diff --git a/‎chatbot-core/api/config/config.yml‎
Lines changed: 19 additions & 0 deletions b/‎chatbot-core/api/config/config.yml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎chatbot-core/api/config/loader.py‎
Lines changed: 21 additions & 0 deletions b/‎chatbot-core/api/config/loader.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎chatbot-core/api/main.py‎
Lines changed: 22 additions & 0 deletions b/‎chatbot-core/api/main.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎chatbot-core/api/models/__init__.py‎ b/‎chatbot-core/api/models/__init__.py‎
diff --git a/‎chatbot-core/api/models/embedding_model.py‎
Lines changed: 9 additions & 0 deletions b/‎chatbot-core/api/models/embedding_model.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎chatbot-core/api/models/llama_cpp_provider.py‎
Lines changed: 64 additions & 0 deletions b/‎chatbot-core/api/models/llama_cpp_provider.py‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎chatbot-core/api/models/llm_provider.py‎
Lines changed: 20 additions & 0 deletions b/‎chatbot-core/api/models/llm_provider.py‎
Lines changed: 20 additions & 0 deletions
@@ -23,6 +23,9 @@ chatbot-core/data/**/*.json
 chatbot-core/data/embeddings/*
 !chatbot-core/data/embeddings/.gitkeep
 
+# Local model
+chatbot-core/api/models/**/*.gguf
+
 # Built frontend files
 src/main/webapp/static/*
 !src/main/webapp/static/.gitkeep
 
@@ -0,0 +1,19 @@
+api:
+  prefix: "/api/chatbot"
+
+llm:
+  model_path: "api/models/mistral/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+  max_tokens: 512
+  context_length: 2048
+  threads: 8
+  gpu_layers: 0
+  verbose: false
+
+retrieval:
+  embedding_model_name: "sentence-transformers/all-MiniLM-L6-v2"
+  top_k: 3
+  empty_context_message: "No context available."
+
+cors:
+  allowed_origins:
+    - "*"
@@ -0,0 +1,21 @@
+"""
+YAML-based configuration loader.
+
+Loads config.yml into a dictionary and exposes it as CONFIG.
+"""
+
+import os
+import yaml
+
+def load_config():
+    """
+    Loads and parses the config.yml file located in the same directory.
+
+    Returns:
+        dict: Parsed configuration values.
+    """
+    config_path = os.path.join(os.path.dirname(__file__), "config.yml")
+    with open(config_path, "r", encoding='utf-8') as f:
+        return yaml.safe_load(f)
+
+CONFIG = load_config()
@@ -0,0 +1,22 @@
+"""
+Main entry point for the FastAPI application.
+"""
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from api.routes import chatbot
+from api.config.loader import CONFIG
+
+app = FastAPI()
+
+# CORS
+app.add_middleware(
+    CORSMiddleware,
+    CONFIG["cors"]["allowed_origins"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Routes
+app.include_router(chatbot.router, prefix=CONFIG["api"]["prefix"])
@@ -0,0 +1,9 @@
+"""Loads and exports once the sentence transformer model."""
+
+from rag.embedding.embedding_utils import load_embedding_model
+from api.config.loader import CONFIG
+from utils import LoggerFactory
+
+logger = LoggerFactory.instance().get_logger("api")
+
+EMBEDDING_MODEL = load_embedding_model(CONFIG["retrieval"]["embedding_model_name"], logger)
@@ -0,0 +1,64 @@
+"""
+Llama.cpp Provider Implementation
+
+Implements the LLMProvider interface using a local model.
+
+This provider uses llama-cpp-python to run inference 
+on quantized models (GGUF format).
+"""
+
+from threading import Lock
+from llama_cpp import Llama
+from api.config.loader import CONFIG
+from api.models.llm_provider import LLMProvider
+from utils import LoggerFactory
+
+llm_config = CONFIG["llm"]
+logger = LoggerFactory.instance().get_logger("llm")
+
+# pylint: disable=too-few-public-methods
+class LlamaCppProvider(LLMProvider):
+    """
+    LLMProvider implementation for local llama.cpp models.
+    """
+    def __init__(self):
+        """
+        Initializes the Llama model with configuration from config.yml.
+        Sets up a lock to ensure thread-safe usage.
+        """
+        self.llm = Llama(
+            model_path=llm_config["model_path"],
+            n_ctx=llm_config["context_length"],
+            n_threads=llm_config["threads"],
+            n_gpu_layers=llm_config["gpu_layers"],
+            verbose=llm_config["verbose"]
+        )
+        self.lock = Lock()
+
+    def generate(self, prompt: str, max_tokens: int) -> str:
+        """
+        Generate a response from the model given a prompt.
+
+        Args:
+            prompt (str): Prompt to feed into the model.
+            max_tokens (int): Maximum number of tokens to generate.
+
+        Returns:
+            str: The generated text response.
+        """
+        try:
+            with self.lock:
+                output = self.llm(
+                    prompt=prompt,
+                    max_tokens=max_tokens,
+                    echo=False
+                )
+            return output["choices"][0]["text"].strip()
+        except ValueError as e:
+            logger.error("Invalid model configuration: %s", e)
+            raise RuntimeError("LLM model could not be initialized. Check the model path.") from e
+        except Exception as e: # pylint: disable=broad-exception-caught
+            logger.error("Unexpected error during LLM generation: %s", e)
+            return "Sorry, something went wrong during generation."
+
+llm_provider = LlamaCppProvider()
@@ -0,0 +1,20 @@
+"""
+LLM Provider Interface
+
+Defines an abstract class for all callable LLMs.
+"""
+
+from abc import ABC, abstractmethod
+
+# pylint: disable=too-few-public-methods
+class LLMProvider(ABC):
+    """
+    Abstract class for LLM providers. 
+    A local model or an external API extend/implement it.
+    """
+
+    @abstractmethod
+    def generate(self, prompt: str, max_tokens: int) -> str:
+        """
+        Generate a response given a prompt.
+        """