3636from lightning .fabric .strategies .ddp import DDPStrategy
3737from lightning .fabric .strategies .registry import _StrategyRegistry
3838from lightning .fabric .strategies .strategy import _Sharded
39+ from lightning .fabric .utilities .cloud_io import get_filesystem
3940from lightning .fabric .utilities .distributed import log
4041from lightning .fabric .utilities .imports import _TORCH_GREATER_EQUAL_2_6
4142from lightning .fabric .utilities .load import _move_state_into
4546
4647if TYPE_CHECKING :
4748 from deepspeed import DeepSpeedEngine
49+ from fsspec import AbstractFileSystem
4850 from torch .optim .lr_scheduler import _LRScheduler
4951
5052_DEEPSPEED_AVAILABLE = RequirementCache ("deepspeed" )
@@ -885,9 +887,9 @@ def _validate_device_index_selection(parallel_devices: list[torch.device]) -> No
885887 )
886888
887889
888- def _is_deepspeed_checkpoint (path : Path ) -> bool :
890+ def _is_deepspeed_checkpoint (path : str , fs : "AbstractFileSystem" ) -> bool :
889891 """Heuristic check whether the path points to a top-level DeepSpeed checkpoint directory."""
890- return path . is_dir ( ) and ( path / " checkpoint"). is_dir ( )
892+ return fs . isdir ( path ) and fs . isdir ( f" { path . rstrip ( '/' ) } / checkpoint" )
891893
892894
893895def _validate_checkpoint_directory (path : _PATH ) -> None :
@@ -903,25 +905,28 @@ def _validate_checkpoint_directory(path: _PATH) -> None:
903905 # ├── latest
904906 # └── zero_to_fp32.py
905907
906- path = Path (path )
907- path_is_ds_checkpoint = _is_deepspeed_checkpoint (path )
908- default_message = f"The provided path is not a valid DeepSpeed checkpoint: { path } "
908+ path_str = str (path )
909+ fs = get_filesystem (path_str )
910+ path_is_ds_checkpoint = _is_deepspeed_checkpoint (path_str , fs )
911+ default_message = f"The provided path is not a valid DeepSpeed checkpoint: { path_str } "
909912
910913 if not path_is_ds_checkpoint :
911914 # Case 1: User may have accidentally passed the subfolder "checkpoint"
912- parent_is_ds_checkpoint = _is_deepspeed_checkpoint ( path . parent )
913- if parent_is_ds_checkpoint :
915+ parent = path_str . rstrip ( "/" ). rsplit ( "/" , 1 )[ 0 ] if "/" in path_str else ""
916+ if parent and _is_deepspeed_checkpoint ( parent , fs ) :
914917 raise FileNotFoundError (
915918 f"{ default_message } . It looks like you passed the path to a subfolder."
916- f" Try to load using this parent directory instead: { path . parent } "
919+ f" Try to load using this parent directory instead: { parent } "
917920 )
918921 # Case 2: User may have accidentally passed the path to a file inside the "checkpoint" subfolder
919- parent_parent_is_ds_checkpoint = path .is_file () and _is_deepspeed_checkpoint (path .parent .parent )
920- if parent_parent_is_ds_checkpoint :
921- raise FileNotFoundError (
922- f"{ default_message } . It looks like you passed the path to a file inside a DeepSpeed checkpoint folder."
923- f" Try to load using this parent directory instead: { path .parent .parent } "
924- )
922+ if parent and fs .isfile (path_str ):
923+ grandparent = parent .rstrip ("/" ).rsplit ("/" , 1 )[0 ] if "/" in parent else ""
924+ if grandparent and _is_deepspeed_checkpoint (grandparent , fs ):
925+ raise FileNotFoundError (
926+ f"{ default_message } . It looks like you passed the path to a file inside a DeepSpeed"
927+ f" checkpoint folder."
928+ f" Try to load using this parent directory instead: { grandparent } "
929+ )
925930 raise FileNotFoundError (default_message )
926931
927932
0 commit comments