Skip to content

Commit 253dd4f

Browse files
Merge with main branch.
2 parents 47be8a4 + 552742a commit 253dd4f

23 files changed

Lines changed: 658 additions & 186 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ chatbot-core/data/**/*.json
2323
chatbot-core/data/embeddings/*
2424
!chatbot-core/data/embeddings/.gitkeep
2525

26+
# Local model
27+
chatbot-core/api/models/**/*.gguf
28+
2629
# Built frontend files
2730
src/main/webapp/static/*
2831
!src/main/webapp/static/.gitkeep

chatbot-core/api/__init__.py

Whitespace-only changes.

chatbot-core/api/config/__init__.py

Whitespace-only changes.

chatbot-core/api/config/config.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
api:
2+
prefix: "/api/chatbot"
3+
4+
llm:
5+
model_path: "api/models/mistral/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
6+
max_tokens: 512
7+
context_length: 2048
8+
threads: 8
9+
gpu_layers: 0
10+
verbose: false
11+
12+
retrieval:
13+
embedding_model_name: "sentence-transformers/all-MiniLM-L6-v2"
14+
top_k: 3
15+
empty_context_message: "No context available."
16+
17+
cors:
18+
allowed_origins:
19+
- "*"

chatbot-core/api/config/loader.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""
2+
YAML-based configuration loader.
3+
4+
Loads config.yml into a dictionary and exposes it as CONFIG.
5+
"""
6+
7+
import os
8+
import yaml
9+
10+
def load_config():
11+
"""
12+
Loads and parses the config.yml file located in the same directory.
13+
14+
Returns:
15+
dict: Parsed configuration values.
16+
"""
17+
config_path = os.path.join(os.path.dirname(__file__), "config.yml")
18+
with open(config_path, "r", encoding='utf-8') as f:
19+
return yaml.safe_load(f)
20+
21+
CONFIG = load_config()

chatbot-core/api/main.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""
2+
Main entry point for the FastAPI application.
3+
"""
4+
5+
from fastapi import FastAPI
6+
from fastapi.middleware.cors import CORSMiddleware
7+
from api.routes import chatbot
8+
from api.config.loader import CONFIG
9+
10+
app = FastAPI()
11+
12+
# CORS
13+
app.add_middleware(
14+
CORSMiddleware,
15+
CONFIG["cors"]["allowed_origins"],
16+
allow_credentials=True,
17+
allow_methods=["*"],
18+
allow_headers=["*"],
19+
)
20+
21+
# Routes
22+
app.include_router(chatbot.router, prefix=CONFIG["api"]["prefix"])

chatbot-core/api/models/__init__.py

Whitespace-only changes.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
"""Loads and exports once the sentence transformer model."""
2+
3+
from rag.embedding.embedding_utils import load_embedding_model
4+
from api.config.loader import CONFIG
5+
from utils import LoggerFactory
6+
7+
logger = LoggerFactory.instance().get_logger("api")
8+
9+
EMBEDDING_MODEL = load_embedding_model(CONFIG["retrieval"]["embedding_model_name"], logger)
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""
2+
Llama.cpp Provider Implementation
3+
4+
Implements the LLMProvider interface using a local model.
5+
6+
This provider uses llama-cpp-python to run inference
7+
on quantized models (GGUF format).
8+
"""
9+
10+
from threading import Lock
11+
from llama_cpp import Llama
12+
from api.config.loader import CONFIG
13+
from api.models.llm_provider import LLMProvider
14+
from utils import LoggerFactory
15+
16+
llm_config = CONFIG["llm"]
17+
logger = LoggerFactory.instance().get_logger("llm")
18+
19+
# pylint: disable=too-few-public-methods
20+
class LlamaCppProvider(LLMProvider):
21+
"""
22+
LLMProvider implementation for local llama.cpp models.
23+
"""
24+
def __init__(self):
25+
"""
26+
Initializes the Llama model with configuration from config.yml.
27+
Sets up a lock to ensure thread-safe usage.
28+
"""
29+
self.llm = Llama(
30+
model_path=llm_config["model_path"],
31+
n_ctx=llm_config["context_length"],
32+
n_threads=llm_config["threads"],
33+
n_gpu_layers=llm_config["gpu_layers"],
34+
verbose=llm_config["verbose"]
35+
)
36+
self.lock = Lock()
37+
38+
def generate(self, prompt: str, max_tokens: int) -> str:
39+
"""
40+
Generate a response from the model given a prompt.
41+
42+
Args:
43+
prompt (str): Prompt to feed into the model.
44+
max_tokens (int): Maximum number of tokens to generate.
45+
46+
Returns:
47+
str: The generated text response.
48+
"""
49+
try:
50+
with self.lock:
51+
output = self.llm(
52+
prompt=prompt,
53+
max_tokens=max_tokens,
54+
echo=False
55+
)
56+
return output["choices"][0]["text"].strip()
57+
except ValueError as e:
58+
logger.error("Invalid model configuration: %s", e)
59+
raise RuntimeError("LLM model could not be initialized. Check the model path.") from e
60+
except Exception as e: # pylint: disable=broad-exception-caught
61+
logger.error("Unexpected error during LLM generation: %s", e)
62+
return "Sorry, something went wrong during generation."
63+
64+
llm_provider = LlamaCppProvider()
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""
2+
LLM Provider Interface
3+
4+
Defines an abstract class for all callable LLMs.
5+
"""
6+
7+
from abc import ABC, abstractmethod
8+
9+
# pylint: disable=too-few-public-methods
10+
class LLMProvider(ABC):
11+
"""
12+
Abstract class for LLM providers.
13+
A local model or an external API extend/implement it.
14+
"""
15+
16+
@abstractmethod
17+
def generate(self, prompt: str, max_tokens: int) -> str:
18+
"""
19+
Generate a response given a prompt.
20+
"""

0 commit comments

Comments
 (0)