Skip to content

Commit 6e401ad

Browse files
committed
feat(#19): Add file upload support for chat sessions
- Add file_service.py for processing uploaded files (text extraction, validation) - Support 30+ file types including logs, code files, configs, and images - Add multipart upload endpoint POST /sessions/{id}/message/upload - Add GET /files/supported-extensions endpoint - Update frontend with attach button, file preview chips, and drag-drop ready UI - Display image thumbnails and file icons in chat messages - Add file size validation (10MB limit) and extension checks - Include unit tests for file service and upload routes
1 parent 0bcfad4 commit 6e401ad

14 files changed

Lines changed: 1651 additions & 42 deletions

File tree

chatbot-core/api/models/schemas.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,26 @@
66
"""
77

88
from enum import Enum
9+
from typing import List, Optional
910
from pydantic import BaseModel, field_validator
1011

1112

13+
class FileAttachment(BaseModel):
14+
"""
15+
Represents a processed file attachment.
16+
17+
Fields:
18+
filename (str): Original name of the uploaded file.
19+
type (str): Type of file - "text" or "image".
20+
content (str): Text content or base64 encoded image data.
21+
mime_type (str): MIME type of the file.
22+
"""
23+
filename: str
24+
type: str
25+
content: str
26+
mime_type: str
27+
28+
1229
class ChatRequest(BaseModel):
1330
"""
1431
Represents a user message submitted to the chatbot.
@@ -28,12 +45,79 @@ def message_must_not_be_empty(cls, v): # pylint: disable=no-self-argument
2845
raise ValueError("Message cannot be empty.")
2946
return v
3047

48+
49+
class ChatRequestWithFiles(BaseModel):
50+
"""
51+
Represents a user message with optional file attachments.
52+
53+
Fields:
54+
message (str): The user's input message.
55+
files (List[FileAttachment]): Optional list of file attachments.
56+
57+
Validation:
58+
- Rejects messages that are empty when no files are attached.
59+
"""
60+
message: str
61+
files: Optional[List[FileAttachment]] = None
62+
63+
@field_validator("message")
64+
def message_must_not_be_empty_unless_files(cls, v, info): # pylint: disable=no-self-argument
65+
"""Validator that checks that a message is not empty unless files are present."""
66+
# Allow empty message if files will be provided
67+
# Note: files validation happens after this, so we allow empty message here
68+
# The endpoint will validate that at least message or files are present
69+
return v
70+
3171
class ChatResponse(BaseModel):
3272
"""
3373
Represents the chatbot's reply.
3474
"""
3575
reply: str
3676

77+
78+
class ChatResponseWithFiles(BaseModel):
79+
"""
80+
Represents the chatbot's reply with information about processed files.
81+
82+
Fields:
83+
reply (str): The chatbot's text response.
84+
processed_files (List[str]): List of filenames that were processed.
85+
"""
86+
reply: str
87+
processed_files: Optional[List[str]] = None
88+
89+
90+
class FileUploadResponse(BaseModel):
91+
"""
92+
Response model for file upload operations.
93+
94+
Fields:
95+
success (bool): Whether the upload was successful.
96+
filename (str): Name of the uploaded file.
97+
type (str): Type of file processed ("text" or "image").
98+
message (str): Status message.
99+
"""
100+
success: bool
101+
filename: str
102+
type: str
103+
message: str
104+
105+
106+
class SupportedExtensionsResponse(BaseModel):
107+
"""
108+
Response model for supported file extensions.
109+
110+
Fields:
111+
text (List[str]): List of supported text file extensions.
112+
image (List[str]): List of supported image file extensions.
113+
max_text_size_mb (float): Maximum text file size in MB.
114+
max_image_size_mb (float): Maximum image file size in MB.
115+
"""
116+
text: List[str]
117+
image: List[str]
118+
max_text_size_mb: float
119+
max_image_size_mb: float
120+
37121
class SessionResponse(BaseModel):
38122
"""
39123
Response model when a new chat session is created.

chatbot-core/api/routes/chatbot.py

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,27 @@
66
the chat service logic.
77
"""
88

9-
from fastapi import APIRouter, HTTPException, Response, status
9+
from typing import List, Optional
10+
from fastapi import APIRouter, HTTPException, Response, status, UploadFile, File, Form
1011
from api.models.schemas import (
1112
ChatRequest,
1213
ChatResponse,
1314
SessionResponse,
14-
DeleteResponse
15+
DeleteResponse,
16+
FileAttachment,
17+
SupportedExtensionsResponse
1518
)
1619
from api.services.chat_service import get_chatbot_reply
1720
from api.services.memory import (
1821
init_session,
1922
delete_session,
2023
session_exists
2124
)
25+
from api.services.file_service import (
26+
process_uploaded_file,
27+
get_supported_extensions,
28+
FileProcessingError
29+
)
2230

2331
router = APIRouter()
2432

@@ -61,6 +69,84 @@ def chatbot_reply(session_id: str, request: ChatRequest):
6169
return get_chatbot_reply(session_id, request.message)
6270

6371

72+
@router.post("/sessions/{session_id}/message/upload", response_model=ChatResponse)
73+
async def chatbot_reply_with_files(
74+
session_id: str,
75+
message: str = Form(...),
76+
files: Optional[List[UploadFile]] = File(None)
77+
):
78+
"""
79+
POST endpoint to handle chatbot replies with file uploads.
80+
81+
Receives a user message with optional file attachments and returns
82+
the assistant's reply. Files are processed and their content is
83+
included in the context for the LLM.
84+
85+
Supported file types:
86+
- Text files: .txt, .log, .md, .json, .xml, .yaml, .yml, code files
87+
- Image files: .png, .jpg, .jpeg, .gif, .webp, .bmp
88+
89+
Args:
90+
session_id (str): The ID of the session from the URL path.
91+
message (str): The user's message (form field).
92+
files (List[UploadFile]): Optional list of uploaded files.
93+
94+
Returns:
95+
ChatResponse: The chatbot's generated reply.
96+
97+
Raises:
98+
HTTPException: 404 if session not found, 400 if file processing fails,
99+
422 if message is empty and no files provided.
100+
"""
101+
if not session_exists(session_id):
102+
raise HTTPException(status_code=404, detail="Session not found.")
103+
104+
# Validate that at least message or files are provided
105+
has_message = message and message.strip()
106+
has_files = files and len(files) > 0
107+
108+
if not has_message and not has_files:
109+
raise HTTPException(
110+
status_code=422,
111+
detail="Either message or files must be provided."
112+
)
113+
114+
# Process uploaded files
115+
processed_files: List[FileAttachment] = []
116+
117+
if files:
118+
for upload_file in files:
119+
try:
120+
content = await upload_file.read()
121+
processed = process_uploaded_file(content, upload_file.filename or "unknown")
122+
processed_files.append(FileAttachment(**processed))
123+
except FileProcessingError as e:
124+
raise HTTPException(status_code=400, detail=str(e))
125+
except Exception as e:
126+
raise HTTPException(
127+
status_code=500,
128+
detail=f"Failed to process file '{upload_file.filename}': {str(e)}"
129+
)
130+
131+
# Use default message if only files provided
132+
final_message = message.strip() if has_message else "Please analyze the attached file(s)."
133+
134+
return get_chatbot_reply(session_id, final_message, processed_files if processed_files else None)
135+
136+
137+
@router.get("/files/supported-extensions", response_model=SupportedExtensionsResponse)
138+
def get_supported_file_extensions():
139+
"""
140+
GET endpoint to retrieve supported file extensions for upload.
141+
142+
Returns:
143+
SupportedExtensionsResponse: Lists of supported text and image extensions,
144+
along with size limits.
145+
"""
146+
extensions = get_supported_extensions()
147+
return SupportedExtensionsResponse(**extensions)
148+
149+
64150
@router.delete("/sessions/{session_id}", response_model=DeleteResponse)
65151
def delete_chat(session_id: str):
66152
"""

chatbot-core/api/services/chat_service.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
RETRIEVER_AGENT_PROMPT,
1616
CONTEXT_RELEVANCE_PROMPT
1717
)
18-
from api.models.schemas import ChatResponse, QueryType, try_str_to_query_type
18+
from api.models.schemas import ChatResponse, QueryType, try_str_to_query_type, FileAttachment
1919
from api.services.memory import get_session
20+
from api.services.file_service import format_file_context
2021
from api.models.embedding_model import EMBEDDING_MODEL
2122
from api.tools.tools import TOOL_REGISTRY
2223
from api.tools.utils import get_default_tools_call, validate_tool_calls, make_placeholder_replacer
@@ -28,34 +29,57 @@
2829
retrieval_config = CONFIG["retrieval"]
2930
CODE_BLOCK_PLACEHOLDER_PATTERN = r"\[\[(?:CODE_BLOCK|CODE_SNIPPET)_(\d+)\]\]"
3031

31-
def get_chatbot_reply(session_id: str, user_input: str) -> ChatResponse:
32+
def get_chatbot_reply(
33+
session_id: str,
34+
user_input: str,
35+
files: Optional[List[FileAttachment]] = None
36+
) -> ChatResponse:
3237
"""
3338
Main chatbot entry point. Retrieves context, constructs a prompt with memory,
3439
and generates an LLM response. Also updates the memory with the latest exchange.
3540
3641
Args:
3742
session_id (str): The unique ID for the chat session.
3843
user_input (str): The latest user message.
44+
files (Optional[List[FileAttachment]]): Optional list of file attachments.
3945
4046
Returns:
4147
ChatResponse: The generated assistant response.
4248
"""
4349
logger.info("New message from session '%s'", session_id)
4450
logger.info("Handling the user query: %s", user_input)
51+
52+
if files:
53+
logger.info("Processing %d uploaded file(s)", len(files))
4554

4655
memory = get_session(session_id)
4756
if memory is None:
4857
raise RuntimeError(f"Session '{session_id}' not found in the memory store.")
4958

5059
context = retrieve_context(user_input)
5160
logger.info("Context retrieved: %s", context)
61+
62+
# Add file context if files are provided
63+
file_context = ""
64+
if files:
65+
file_dicts = [file.model_dump() for file in files]
66+
file_context = format_file_context(file_dicts)
67+
if file_context:
68+
logger.info("File context added: %d characters", len(file_context))
69+
context = f"{context}\n\n[User Uploaded Files]\n{file_context}"
5270

5371
prompt = build_prompt(user_input, context, memory)
5472

5573
logger.info("Generating answer with prompt: %s", prompt)
5674
reply = generate_answer(prompt)
5775

58-
memory.chat_memory.add_user_message(user_input)
76+
# Include file info in memory message
77+
user_message = user_input
78+
if files:
79+
file_names = [f.filename for f in files]
80+
user_message = f"{user_input}\n[Attached files: {', '.join(file_names)}]"
81+
82+
memory.chat_memory.add_user_message(user_message)
5983
memory.chat_memory.add_ai_message(reply)
6084

6185
return ChatResponse(reply=reply)

0 commit comments

Comments
 (0)