diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 2c1dada3f1f2..23f8c914ea8a 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -185,6 +185,19 @@ def cythonize(*args, **kwargs): # tensorflow transitive dep, lower versions not compatible with Python3.10+ 'absl-py>=0.12.0', 'tensorflow-hub', + # tokenizers 0.23.0rc0 renamed the PyO3 kwarg of + # processors.RobertaProcessing (and BertProcessing) from `cls` to + # `cls_token` -- the rename was a drive-by inside huggingface/tokenizers + # https://github.com/huggingface/tokenizers/pull/1928. + # transformers' slow CLIP tokenizer still calls + # `processors.RobertaProcessing(sep=..., cls=..., ...)` at + # transformers/models/clip/tokenization_clip.py, so model load fails with + # "RobertaProcessing.__new__() got an unexpected keyword argument 'cls'". + # The ml tox envs run with pip_pre=True (tox.ini:32), so even though no + # 0.23 stable has shipped yet, the rc gets resolved. + # Drop this cap once transformers updates the CLIP call site to + # `cls_token=` or tokenizers reinstates `cls=` as a deprecation alias. + 'tokenizers<0.23', 'torch', 'transformers', ]