@@ -1334,14 +1334,9 @@ All parameter, weight, gradient are variables in Paddle.
1334
1334
Examples:
1335
1335
.. code-block:: python
1336
1336
1337
- build_strategy = fluid.BuildStrategy()
1338
- build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
1339
-
1340
- train_exe = fluid.ParallelExecutor(use_cuda=True,
1341
- loss_name=loss.name,
1342
- build_strategy=build_strategy)
1343
-
1344
- train_loss, = train_exe.run([loss.name], feed=feed_dict)
1337
+ import paddle.fluid as fluid
1338
+ build_strategy = fluid.BuildStrategy()
1339
+ build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
1345
1340
)DOC" );
1346
1341
1347
1342
py::enum_<BuildStrategy::ReduceStrategy>(build_strategy, " ReduceStrategy" )
@@ -1363,11 +1358,19 @@ All parameter, weight, gradient are variables in Paddle.
1363
1358
self.reduce_ = strategy;
1364
1359
},
1365
1360
R"DOC( The type is STR, there are two reduce strategies in ParallelExecutor,
1366
- 'AllReduce' and 'Reduce'. If you want that all the parameters'
1367
- optimization are done on all devices independently, you should choose 'AllReduce';
1368
- if you choose 'Reduce', all the parameters' optimization will be evenly distributed
1369
- to different devices, and then broadcast the optimized parameter to other devices.
1370
- In some models, `Reduce` is faster. Default 'AllReduce'. )DOC" )
1361
+ 'AllReduce' and 'Reduce'. If you want that all the parameters'
1362
+ optimization are done on all devices independently, you should choose 'AllReduce';
1363
+ if you choose 'Reduce', all the parameters' optimization will be evenly distributed
1364
+ to different devices, and then broadcast the optimized parameter to other devices.
1365
+ In some models, `Reduce` is faster. Default 'AllReduce'.
1366
+
1367
+ Examples:
1368
+ .. code-block:: python
1369
+
1370
+ import paddle.fluid as fluid
1371
+ build_strategy = fluid.BuildStrategy()
1372
+ build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
1373
+ )DOC" )
1371
1374
.def_property (
1372
1375
" gradient_scale_strategy" ,
1373
1376
[](const BuildStrategy &self) { return self.gradient_scale_ ; },
@@ -1377,10 +1380,18 @@ All parameter, weight, gradient are variables in Paddle.
1377
1380
self.gradient_scale_ = strategy;
1378
1381
},
1379
1382
R"DOC( The type is STR, there are three ways of defining :math:`loss@grad` in
1380
- ParallelExecutor, 'CoeffNumDevice', 'One' and 'Customized'. By default,
1381
- ParallelExecutor sets the :math:`loss@grad` according to the number of devices.
1382
- If you want to customize :math:`loss@grad`, you can choose 'Customized'.
1383
- Default 'CoeffNumDevice'.)DOC" )
1383
+ ParallelExecutor, 'CoeffNumDevice', 'One' and 'Customized'. By default,
1384
+ ParallelExecutor sets the :math:`loss@grad` according to the number of devices.
1385
+ If you want to customize :math:`loss@grad`, you can choose 'Customized'.
1386
+ Default 'CoeffNumDevice'.
1387
+
1388
+ Examples:
1389
+ .. code-block:: python
1390
+
1391
+ import paddle.fluid as fluid
1392
+ build_strategy = fluid.BuildStrategy()
1393
+ build_strategy.gradient_scale_strategy = True
1394
+ )DOC" )
1384
1395
.def_property (
1385
1396
" debug_graphviz_path" ,
1386
1397
[](const BuildStrategy &self) { return self.debug_graphviz_path_ ; },
@@ -1389,8 +1400,16 @@ All parameter, weight, gradient are variables in Paddle.
1389
1400
self.debug_graphviz_path_ = path;
1390
1401
},
1391
1402
R"DOC( The type is STR, debug_graphviz_path indicate the path that
1392
- writing the SSA Graph to file in the form of graphviz, you.
1393
- It is useful for debugging. Default "")DOC" )
1403
+ writing the SSA Graph to file in the form of graphviz.
1404
+ It is useful for debugging. Default ""
1405
+
1406
+ Examples:
1407
+ .. code-block:: python
1408
+
1409
+ import paddle.fluid as fluid
1410
+ build_strategy = fluid.BuildStrategy()
1411
+ build_strategy.debug_graphviz_path = ""
1412
+ )DOC" )
1394
1413
.def_property (
1395
1414
" enable_sequential_execution" ,
1396
1415
[](const BuildStrategy &self) {
@@ -1400,7 +1419,15 @@ All parameter, weight, gradient are variables in Paddle.
1400
1419
PADDLE_ENFORCE (!self.IsFinalized (), " BuildStrategy is finlaized." );
1401
1420
self.enable_sequential_execution_ = b;
1402
1421
},
1403
- R"DOC( The type is BOOL. If set True, the execution order of ops would be the same as what is in the program. Default False.)DOC" )
1422
+ R"DOC( The type is BOOL. If set True, the execution order of ops would be the same as what is in the program. Default False.
1423
+
1424
+ Examples:
1425
+ .. code-block:: python
1426
+
1427
+ import paddle.fluid as fluid
1428
+ build_strategy = fluid.BuildStrategy()
1429
+ build_strategy.enable_sequential_execution = True
1430
+ )DOC" )
1404
1431
.def_property (
1405
1432
" remove_unnecessary_lock" ,
1406
1433
[](const BuildStrategy &self) {
@@ -1410,7 +1437,15 @@ All parameter, weight, gradient are variables in Paddle.
1410
1437
PADDLE_ENFORCE (!self.IsFinalized (), " BuildStrategy is finlaized." );
1411
1438
self.remove_unnecessary_lock_ = b;
1412
1439
},
1413
- R"DOC( The type is BOOL. If set True, some locks in GPU ops would be released and ParallelExecutor would run faster. Default True.)DOC" )
1440
+ R"DOC( The type is BOOL. If set True, some locks in GPU ops would be released and ParallelExecutor would run faster. Default True.
1441
+
1442
+ Examples:
1443
+ .. code-block:: python
1444
+
1445
+ import paddle.fluid as fluid
1446
+ build_strategy = fluid.BuildStrategy()
1447
+ build_strategy.remove_unnecessary_lock = True
1448
+ )DOC" )
1414
1449
.def_property (
1415
1450
" num_trainers" ,
1416
1451
[](const BuildStrategy &self) { return self.num_trainers_ ; },
@@ -1439,8 +1474,16 @@ All parameter, weight, gradient are variables in Paddle.
1439
1474
self.fuse_elewise_add_act_ops_ = b;
1440
1475
},
1441
1476
R"DOC( The type is BOOL, fuse_elewise_add_act_ops indicate whether
1442
- to fuse elementwise_add_op and activation_op,
1443
- it may make the execution faster. Default False)DOC" )
1477
+ to fuse elementwise_add_op and activation_op,
1478
+ it may make the execution faster. Default False
1479
+
1480
+ Examples:
1481
+ .. code-block:: python
1482
+
1483
+ import paddle.fluid as fluid
1484
+ build_strategy = fluid.BuildStrategy()
1485
+ build_strategy.fuse_elewise_add_act_ops = True
1486
+ )DOC" )
1444
1487
.def_property (
1445
1488
" fuse_relu_depthwise_conv" ,
1446
1489
[](const BuildStrategy &self) {
@@ -1451,10 +1494,18 @@ All parameter, weight, gradient are variables in Paddle.
1451
1494
self.fuse_relu_depthwise_conv_ = b;
1452
1495
},
1453
1496
R"DOC( The type is BOOL, fuse_relu_depthwise_conv indicate whether
1454
- to fuse relu and depthwise_conv2d,
1455
- it will save GPU memory and may make the execution faster.
1456
- This options is only available in GPU devices.
1457
- Default False.)DOC" )
1497
+ to fuse relu and depthwise_conv2d,
1498
+ it will save GPU memory and may make the execution faster.
1499
+ This options is only available in GPU devices.
1500
+ Default False.
1501
+
1502
+ Examples:
1503
+ .. code-block:: python
1504
+
1505
+ import paddle.fluid as fluid
1506
+ build_strategy = fluid.BuildStrategy()
1507
+ build_strategy.fuse_relu_depthwise_conv = True
1508
+ )DOC" )
1458
1509
.def_property (
1459
1510
" fuse_broadcast_ops" ,
1460
1511
[](const BuildStrategy &self) { return self.fuse_broadcast_ops_ ; },
@@ -1491,7 +1542,15 @@ All parameter, weight, gradient are variables in Paddle.
1491
1542
Current implementation doesn't support FP16 training and CPU.
1492
1543
And only synchronous on one machine, not all machines.
1493
1544
1494
- Default False)DOC" )
1545
+ Default False
1546
+
1547
+ Examples:
1548
+ .. code-block:: python
1549
+
1550
+ import paddle.fluid as fluid
1551
+ build_strategy = fluid.BuildStrategy()
1552
+ build_strategy.sync_batch_norm = True
1553
+ )DOC" )
1495
1554
.def_property (
1496
1555
" memory_optimize" ,
1497
1556
[](const BuildStrategy &self) { return self.memory_optimize_ ; },
0 commit comments