bigcode-project · nopperl · Jun 20, 2023 · Jun 20, 2023
diff --git a/README.md b/README.md
@@ -159,7 +159,7 @@ python finetune/finetune.py \
 The size of the SE dataset is better manageable when using streaming. We also have to precise the split of the dataset that is used. For more details, check the [dataset's page](https://huggingface.co/datasets/ArmelR/stack-exchange-instruction) on 🤗. Similarly we can modify the command to account for the availability of GPUs
 
 ```bash
-python -m torch.distributed.launch \
+python -m torch.distributed.run \
   --nproc_per_node number_of_gpus finetune/finetune.py \
   --model_path="bigcode/starcoder"\
   --dataset_name="ArmelR/stack-exchange-instruction"\

diff --git a/finetune/finetune.py b/finetune/finetune.py
@@ -76,7 +76,6 @@ def get_args():
     parser.add_argument("--num_warmup_steps", type=int, default=100)
     parser.add_argument("--weight_decay", type=float, default=0.05)
 
-    parser.add_argument("--local_rank", type=int, default=0)
     parser.add_argument("--no_fp16", action="store_false")
     parser.add_argument("--bf16", action="store_true", default=True)
     parser.add_argument("--no_gradient_checkpointing", action="store_false", default=False)
@@ -309,4 +308,4 @@ def main(args):
 
     logging.set_verbosity_error()
 
-    main(args)
+    main(args)