Skip to content

Commit 4098fe6

Browse files
committed
Advance OSS contribution for [Bug]: IntelEmbedding RuntimeError
Nightly Codex produced a focused contribution for #19328. Constraint: Automated nightly run; keep changes small and reviewable. Confidence: medium Scope-risk: narrow Tested: See uploaded nightly artifacts and workflow logs. Not-tested: Maintainer CI beyond this workflow.
1 parent d601b0f commit 4098fe6

2 files changed

Lines changed: 82 additions & 3 deletions

File tree

llama-index-integrations/embeddings/llama-index-embeddings-huggingface-optimum-intel/llama_index/embeddings/huggingface_optimum_intel/base.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, List, Optional
1+
from typing import Any, Dict, List, Optional
22

33
from llama_index.core.base.embeddings.base import (
44
DEFAULT_EMBED_BATCH_SIZE,
@@ -41,6 +41,8 @@ def __init__(
4141
cache_folder: Optional[str] = None,
4242
model: Optional[Any] = None,
4343
tokenizer: Optional[Any] = None,
44+
model_kwargs: Optional[Dict[str, Any]] = None,
45+
tokenizer_kwargs: Optional[Dict[str, Any]] = None,
4446
embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE,
4547
callback_manager: Optional[CallbackManager] = None,
4648
device: Optional[str] = None,
@@ -54,9 +56,19 @@ def __init__(
5456
"optimum-intel neural-compressor intel_extension_for_pytorch`"
5557
)
5658

59+
model_kwargs = model_kwargs or {}
60+
tokenizer_kwargs = tokenizer_kwargs or {}
61+
62+
if cache_folder:
63+
model_kwargs.setdefault("cache_dir", cache_folder)
64+
tokenizer_kwargs.setdefault("cache_dir", cache_folder)
65+
5766
device = device or infer_torch_device()
58-
model = model or IPEXModel.from_pretrained(folder_name).to(device)
59-
tokenizer = tokenizer or AutoTokenizer.from_pretrained(folder_name)
67+
model_kwargs.setdefault("weights_only", False)
68+
model = model or IPEXModel.from_pretrained(folder_name, **model_kwargs).to(device)
69+
tokenizer = tokenizer or AutoTokenizer.from_pretrained(
70+
folder_name, **tokenizer_kwargs
71+
)
6072

6173
if max_length is None:
6274
try:
@@ -83,6 +95,7 @@ def __init__(
8395
normalize=normalize,
8496
query_instruction=query_instruction,
8597
text_instruction=text_instruction,
98+
cache_folder=cache_folder,
8699
)
87100
self._model = model
88101
self._tokenizer = tokenizer
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,73 @@
1+
import sys
2+
import types
3+
14
from llama_index.core.base.embeddings.base import BaseEmbedding
25
from llama_index.embeddings.huggingface_optimum_intel import IntelEmbedding
6+
import llama_index.embeddings.huggingface_optimum_intel.base as optimum_intel_base
37

48

59
def test_optimum_intel_embedding_class():
610
names_of_base_classes = [b.__name__ for b in IntelEmbedding.__mro__]
711
assert BaseEmbedding.__name__ in names_of_base_classes
12+
13+
14+
def test_optimum_intel_load_kwargs(monkeypatch):
15+
model_calls = []
16+
tokenizer_calls = []
17+
18+
class MockConfig:
19+
max_position_embeddings = 512
20+
21+
class MockModel:
22+
config = MockConfig()
23+
24+
def to(self, device):
25+
self.device = device
26+
return self
27+
28+
class MockIPEXModel:
29+
@classmethod
30+
def from_pretrained(cls, folder_name, **kwargs):
31+
model_calls.append((folder_name, kwargs))
32+
return MockModel()
33+
34+
class MockTokenizer:
35+
model_max_length = 256
36+
37+
@classmethod
38+
def from_pretrained(cls, folder_name, **kwargs):
39+
tokenizer_calls.append((folder_name, kwargs))
40+
return cls()
41+
42+
optimum = types.ModuleType("optimum")
43+
optimum_intel = types.ModuleType("optimum.intel")
44+
optimum_intel.IPEXModel = MockIPEXModel
45+
monkeypatch.setitem(sys.modules, "optimum", optimum)
46+
monkeypatch.setitem(sys.modules, "optimum.intel", optimum_intel)
47+
monkeypatch.setattr(optimum_intel_base, "AutoTokenizer", MockTokenizer)
48+
49+
embed_model = IntelEmbedding(
50+
"Intel/bge-small-en-v1.5-rag-int8-static",
51+
cache_folder="/tmp/hf-cache",
52+
model_kwargs={"revision": "main"},
53+
tokenizer_kwargs={"use_fast": True},
54+
device="cpu",
55+
)
56+
57+
assert embed_model.cache_folder == "/tmp/hf-cache"
58+
assert model_calls == [
59+
(
60+
"Intel/bge-small-en-v1.5-rag-int8-static",
61+
{
62+
"revision": "main",
63+
"cache_dir": "/tmp/hf-cache",
64+
"weights_only": False,
65+
},
66+
)
67+
]
68+
assert tokenizer_calls == [
69+
(
70+
"Intel/bge-small-en-v1.5-rag-int8-static",
71+
{"use_fast": True, "cache_dir": "/tmp/hf-cache"},
72+
)
73+
]

0 commit comments

Comments
 (0)