@@ -3270,7 +3270,7 @@ class ShardDataloader:
3270
3270
ShardDataloader converts a dataloader to a new dataloader which provided two capabilities:
3271
3271
1. split dataloader by shard_dim to do data parallel.
3272
3272
2. reshard the output of dataloader to distributed tensor.
3273
- if is_dataset_splitted is True, just need to do reshard.
3273
+ if is_dataset_split is True, just need to do reshard.
3274
3274
3275
3275
Args:
3276
3276
dataloader (paddle.io.DataLoader): The dataloader to be sharded.
@@ -3284,7 +3284,7 @@ class ShardDataloader:
3284
3284
shard_dims (list|tuple|str|int]): The mesh dimension to shard the dataloader.
3285
3285
Users can specify the shard_dim of each mesh or specify a single shard_dim for all meshes.
3286
3286
Default: None, which means the data loader will not be split, i.e. mp.
3287
- is_dataset_splitted (bool): Whether the dataset has been splitted.
3287
+ is_dataset_split (bool): Whether the dataset has been splitted.
3288
3288
dense_tensor_idx (list): A paired 2D list specifies the index of the dense_tensor in the output of dataloader.
3289
3289
It allows users to identify which elements within each output batch are dense_tensor.
3290
3290
first dense_tensor: the dense_tensor return by dataloader.
@@ -3299,13 +3299,13 @@ def __init__(
3299
3299
meshes : ProcessMesh | list [ProcessMesh ] | tuple [ProcessMesh ],
3300
3300
input_keys : list [str ] | tuple [str ] | None = None ,
3301
3301
shard_dims : list | tuple | str | int | None = None ,
3302
- is_dataset_splitted : bool = False ,
3302
+ is_dataset_split : bool = False ,
3303
3303
dense_tensor_idx : list [list [int ]] | None = None ,
3304
3304
):
3305
3305
# do some check
3306
- if is_dataset_splitted is True and shard_dims is None :
3306
+ if is_dataset_split is True and shard_dims is None :
3307
3307
raise ValueError (
3308
- "shard_dims must be set when is_dataset_splitted is True"
3308
+ "shard_dims must be set when is_dataset_split is True"
3309
3309
)
3310
3310
3311
3311
self ._meshes = to_list (meshes )
@@ -3332,7 +3332,7 @@ def __init__(
3332
3332
dp_rank = mesh .get_rank_by_dim_and_process_id (shard_dim , process_id )
3333
3333
dp_world_size = mesh .get_dim_size (shard_dim )
3334
3334
3335
- if is_dataset_splitted is True or shard_dims is None :
3335
+ if is_dataset_split is True or shard_dims is None :
3336
3336
self ._dataloader = dataloader
3337
3337
self .batch_size = dataloader .batch_sampler .batch_size
3338
3338
else :
@@ -3588,15 +3588,15 @@ def shard_dataloader(
3588
3588
meshes : ProcessMesh | Sequence [ProcessMesh ],
3589
3589
input_keys : Sequence [str ] | None = None ,
3590
3590
shard_dims : Sequence [str ] | Sequence [int ] | str | int | None = None ,
3591
- is_dataset_splitted : bool = False ,
3591
+ is_dataset_split : bool = False ,
3592
3592
dense_tensor_idx : list [list [int ]] | None = None ,
3593
3593
) -> ShardDataloader :
3594
3594
"""
3595
3595
Convert the dataloader to a ShardDataloader which provided two capabilities:
3596
3596
1. split dataloader by shard_dim to do data parallel if it it not None.
3597
3597
2. reshard the output of dataloader to distributed tensor.
3598
- if is_dataset_splitted is True, it means that the dataset has been split by users, and just need to do reshard.
3599
- only if is_dataset_splitted is False and shard_dims is not None, it will do split.
3598
+ if is_dataset_split is True, it means that the dataset has been split by users, and just need to do reshard.
3599
+ only if is_dataset_split is False and shard_dims is not None, it will do split.
3600
3600
3601
3601
Args:
3602
3602
dataloader (paddle.io.DataLoader): The dataloader to be sharded. the output of dataloader
@@ -3613,7 +3613,7 @@ def shard_dataloader(
3613
3613
The mesh dimension to shard the dataloader.
3614
3614
Users can specify the shard_dim of each mesh or specify a single shard_dim for all meshes.
3615
3615
Default: None, which means the data loader will not be split, i.e. mp.
3616
- is_dataset_splitted (bool): Whether the dataset has been splitted, Default: False.
3616
+ is_dataset_split (bool): Whether the dataset has been splitted, Default: False.
3617
3617
dense_tensor_idx (list): A paired 2D list specifies the index of the dense_tensor in the output of dataloader.
3618
3618
It allows users to identify which elements within each output batch are dense_tensor.
3619
3619
first dense_tensor: the dense_tensor return by dataloader.
@@ -3783,7 +3783,7 @@ def shard_dataloader(
3783
3783
meshes ,
3784
3784
input_keys ,
3785
3785
shard_dims ,
3786
- is_dataset_splitted ,
3786
+ is_dataset_split ,
3787
3787
dense_tensor_idx ,
3788
3788
)
3789
3789
0 commit comments