This repository was archived by the owner on Oct 9, 2024. It is now read-only.
File tree Expand file tree Collapse file tree 3 files changed +8
-8
lines changed Expand file tree Collapse file tree 3 files changed +8
-8
lines changed Original file line number Diff line number Diff line change @@ -149,11 +149,11 @@ def generate():
149
149
generated = generate ()
150
150
total_new_tokens_generated += sum (new_tokens for _ , _ , new_tokens in generated )
151
151
torch .cuda .synchronize ()
152
- througput = (time .time () - t0 ) / (total_new_tokens_generated )
152
+ throughput = (time .time () - t0 ) / (total_new_tokens_generated )
153
153
print_rank0 (
154
154
f"""
155
155
*** Performance stats:
156
- Throughput per token including tokenize: { througput * 1000 :.2f} msecs
156
+ Throughput per token including tokenize: { throughput * 1000 :.2f} msecs
157
157
Start to ready to generate: { t_ready - t_start :.3f} secs
158
158
Tokenize and generate { total_new_tokens_generated } (bs={ args .batch_size } ) tokens: { t_generate_span :.3f} secs
159
159
Start to finish: { t_ready - t_start + t_generate_span :.3f} secs
Original file line number Diff line number Diff line change @@ -282,11 +282,11 @@ def generate():
282
282
generated = generate ()
283
283
total_new_tokens_generated += sum (new_tokens for _ , _ , new_tokens in generated )
284
284
torch .cuda .synchronize ()
285
- througput = (time .time () - t0 ) / (total_new_tokens_generated )
285
+ throughput = (time .time () - t0 ) / (total_new_tokens_generated )
286
286
print_rank0 (
287
287
f"""
288
288
*** Performance stats:
289
- Throughput per token including tokenize: { througput * 1000 :.2f} msecs
289
+ Throughput per token including tokenize: { throughput * 1000 :.2f} msecs
290
290
Start to ready to generate: { t_ready - t_start :.3f} secs
291
291
Tokenize and generate { total_new_tokens_generated } (bs={ args .batch_size } ) tokens: { t_generate_span :.3f} secs
292
292
Start to finish: { t_ready - t_start + t_generate_span :.3f} secs
Original file line number Diff line number Diff line change 1
1
# usage:
2
- # deepspeed --num_gpus 8 bloom-ds-inference.py --name bigscience/bloom
2
+ # deepspeed --num_gpus 8 bloom-ds-zero- inference.py --name bigscience/bloom
3
3
#
4
4
# to run benchmarks:
5
- # deepspeed --num_gpus 8 bloom-ds-inference.py --name bigscience/bloom --benchmark
5
+ # deepspeed --num_gpus 8 bloom-ds-zero- inference.py --name bigscience/bloom --benchmark
6
6
#
7
7
8
8
@@ -212,11 +212,11 @@ def generate():
212
212
torch .cuda .synchronize ()
213
213
# note that we actually generate world_size unique streams (though the benchmark feeds the same inputs)
214
214
total_new_tokens_generated *= world_size
215
- througput = (time .time () - t0 ) / (total_new_tokens_generated )
215
+ throughput = (time .time () - t0 ) / (total_new_tokens_generated )
216
216
print_rank0 (
217
217
f"""
218
218
*** Performance stats:
219
- Throughput per token including tokenize: { througput * 1000 :.2f} msecs
219
+ Throughput per token including tokenize: { throughput * 1000 :.2f} msecs
220
220
Start to ready to generate: { t_ready - t_start :.3f} secs
221
221
Tokenize and generate { total_new_tokens_generated } (bs={ args .batch_size } ) tokens: { t_generate_span :.3f} secs
222
222
Start to finish: { t_ready - t_start + t_generate_span :.3f} secs
You can’t perform that action at this time.
0 commit comments