Skip to content

Commit d210712

Browse files
authored
Fix is_dataset_splitted is_dataset_split (#71713)
1 parent 5183da0 commit d210712

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

python/paddle/distributed/auto_parallel/api.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -3270,7 +3270,7 @@ class ShardDataloader:
32703270
ShardDataloader converts a dataloader to a new dataloader which provided two capabilities:
32713271
1. split dataloader by shard_dim to do data parallel.
32723272
2. reshard the output of dataloader to distributed tensor.
3273-
if is_dataset_splitted is True, just need to do reshard.
3273+
if is_dataset_split is True, just need to do reshard.
32743274
32753275
Args:
32763276
dataloader (paddle.io.DataLoader): The dataloader to be sharded.
@@ -3284,7 +3284,7 @@ class ShardDataloader:
32843284
shard_dims (list|tuple|str|int]): The mesh dimension to shard the dataloader.
32853285
Users can specify the shard_dim of each mesh or specify a single shard_dim for all meshes.
32863286
Default: None, which means the data loader will not be split, i.e. mp.
3287-
is_dataset_splitted (bool): Whether the dataset has been splitted.
3287+
is_dataset_split (bool): Whether the dataset has been splitted.
32883288
dense_tensor_idx (list): A paired 2D list specifies the index of the dense_tensor in the output of dataloader.
32893289
It allows users to identify which elements within each output batch are dense_tensor.
32903290
first dense_tensor: the dense_tensor return by dataloader.
@@ -3299,13 +3299,13 @@ def __init__(
32993299
meshes: ProcessMesh | list[ProcessMesh] | tuple[ProcessMesh],
33003300
input_keys: list[str] | tuple[str] | None = None,
33013301
shard_dims: list | tuple | str | int | None = None,
3302-
is_dataset_splitted: bool = False,
3302+
is_dataset_split: bool = False,
33033303
dense_tensor_idx: list[list[int]] | None = None,
33043304
):
33053305
# do some check
3306-
if is_dataset_splitted is True and shard_dims is None:
3306+
if is_dataset_split is True and shard_dims is None:
33073307
raise ValueError(
3308-
"shard_dims must be set when is_dataset_splitted is True"
3308+
"shard_dims must be set when is_dataset_split is True"
33093309
)
33103310

33113311
self._meshes = to_list(meshes)
@@ -3332,7 +3332,7 @@ def __init__(
33323332
dp_rank = mesh.get_rank_by_dim_and_process_id(shard_dim, process_id)
33333333
dp_world_size = mesh.get_dim_size(shard_dim)
33343334

3335-
if is_dataset_splitted is True or shard_dims is None:
3335+
if is_dataset_split is True or shard_dims is None:
33363336
self._dataloader = dataloader
33373337
self.batch_size = dataloader.batch_sampler.batch_size
33383338
else:
@@ -3588,15 +3588,15 @@ def shard_dataloader(
35883588
meshes: ProcessMesh | Sequence[ProcessMesh],
35893589
input_keys: Sequence[str] | None = None,
35903590
shard_dims: Sequence[str] | Sequence[int] | str | int | None = None,
3591-
is_dataset_splitted: bool = False,
3591+
is_dataset_split: bool = False,
35923592
dense_tensor_idx: list[list[int]] | None = None,
35933593
) -> ShardDataloader:
35943594
"""
35953595
Convert the dataloader to a ShardDataloader which provided two capabilities:
35963596
1. split dataloader by shard_dim to do data parallel if it it not None.
35973597
2. reshard the output of dataloader to distributed tensor.
3598-
if is_dataset_splitted is True, it means that the dataset has been split by users, and just need to do reshard.
3599-
only if is_dataset_splitted is False and shard_dims is not None, it will do split.
3598+
if is_dataset_split is True, it means that the dataset has been split by users, and just need to do reshard.
3599+
only if is_dataset_split is False and shard_dims is not None, it will do split.
36003600
36013601
Args:
36023602
dataloader (paddle.io.DataLoader): The dataloader to be sharded. the output of dataloader
@@ -3613,7 +3613,7 @@ def shard_dataloader(
36133613
The mesh dimension to shard the dataloader.
36143614
Users can specify the shard_dim of each mesh or specify a single shard_dim for all meshes.
36153615
Default: None, which means the data loader will not be split, i.e. mp.
3616-
is_dataset_splitted (bool): Whether the dataset has been splitted, Default: False.
3616+
is_dataset_split (bool): Whether the dataset has been splitted, Default: False.
36173617
dense_tensor_idx (list): A paired 2D list specifies the index of the dense_tensor in the output of dataloader.
36183618
It allows users to identify which elements within each output batch are dense_tensor.
36193619
first dense_tensor: the dense_tensor return by dataloader.
@@ -3783,7 +3783,7 @@ def shard_dataloader(
37833783
meshes,
37843784
input_keys,
37853785
shard_dims,
3786-
is_dataset_splitted,
3786+
is_dataset_split,
37873787
dense_tensor_idx,
37883788
)
37893789

test/auto_parallel/pir/while_unittest_pir.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def create_data_loader(self):
9595
meshes=mesh,
9696
shard_dims="x",
9797
input_keys=["inputs", "label"],
98-
is_dataset_splitted=True,
98+
is_dataset_split=True,
9999
)
100100
return dist_dataloader
101101

0 commit comments

Comments
 (0)