|
17 | 17 | #include <executorch/extension/llm/runner/text_llm_runner.h> |
18 | 18 | #include <executorch/extension/llm/runner/text_prefiller.h> |
19 | 19 | #include <executorch/extension/llm/runner/text_token_generator.h> |
| 20 | +#include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h> |
20 | 21 | #include <executorch/runtime/core/result.h> |
21 | 22 | #include <executorch/runtime/platform/runtime.h> |
22 | 23 | #include <pytorch/tokenizers/hf_tokenizer.h> |
@@ -223,12 +224,25 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner( |
223 | 224 |
|
224 | 225 | // Create the Module |
225 | 226 | std::unique_ptr<Module> module; |
| 227 | + uint32_t max_cached_memory_size_bytes_ = 1024 * 1024 * 10; // 10MB |
226 | 228 | if (data_files.size() > 0) { |
227 | 229 | module = std::make_unique<Module>( |
228 | | - model_path, data_files, load_mode, std::move(event_tracer)); |
| 230 | + model_path, data_files, load_mode, std::move(event_tracer), |
| 231 | + nullptr, // memory allocator |
| 232 | + std::make_unique< |
| 233 | + executorch::extension::CPUCachingAllocator>( // temp memory |
| 234 | + // allocator |
| 235 | + max_cached_memory_size_bytes_)); |
229 | 236 | } else { |
230 | 237 | module = std::make_unique<Module>( |
231 | | - model_path, load_mode, std::move(event_tracer)); |
| 238 | + model_path, |
| 239 | + load_mode, |
| 240 | + std::move(event_tracer), // event tracer |
| 241 | + nullptr, // memory allocator |
| 242 | + std::make_unique< |
| 243 | + executorch::extension::CPUCachingAllocator>( // temp memory |
| 244 | + // allocator |
| 245 | + max_cached_memory_size_bytes_)); |
232 | 246 | } |
233 | 247 |
|
234 | 248 | // Get metadata from Module |
|
0 commit comments