Skip to content

Commit a0adda4

Browse files
committed
fix unit of e2e and warmup in benchmark
1 parent 6fe9bc9 commit a0adda4

File tree

2 files changed

+42
-35
lines changed

2 files changed

+42
-35
lines changed

docs/module_usage/instructions/benchmark.md

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,45 +30,45 @@ python main.py \
3030
+----------------+-----------------+-----------------+------------------------+
3131
| Component | Total Time (ms) | Number of Calls | Avg Time Per Call (ms) |
3232
+----------------+-----------------+-----------------+------------------------+
33-
| ReadCmp | 100.20136833 | 10 | 10.02013683 |
34-
| Resize | 17.05980301 | 20 | 0.85299015 |
35-
| Normalize | 45.44949532 | 20 | 2.27247477 |
36-
| ToCHWImage | 0.03671646 | 20 | 0.00183582 |
37-
| Copy2GPU | 12.28785515 | 10 | 1.22878551 |
38-
| Infer | 76.59482956 | 10 | 7.65948296 |
39-
| Copy2CPU | 0.39863586 | 10 | 0.03986359 |
40-
| DetPostProcess | 0.43916702 | 20 | 0.02195835 |
33+
| ReadCmp | 99.60412979 | 10 | 9.96041298 |
34+
| Resize | 17.01641083 | 20 | 0.85082054 |
35+
| Normalize | 44.61312294 | 20 | 2.23065615 |
36+
| ToCHWImage | 0.03385544 | 20 | 0.00169277 |
37+
| Copy2GPU | 13.46874237 | 10 | 1.34687424 |
38+
| Infer | 71.31743431 | 10 | 7.13174343 |
39+
| Copy2CPU | 0.39076805 | 10 | 0.03907681 |
40+
| DetPostProcess | 0.36168098 | 20 | 0.01808405 |
4141
+----------------+-----------------+-----------------+------------------------+
4242
+-------------+-----------------+---------------------+----------------------------+
4343
| Stage | Total Time (ms) | Number of Instances | Avg Time Per Instance (ms) |
4444
+-------------+-----------------+---------------------+----------------------------+
45-
| PreProcess | 162.74738312 | 20 | 8.13736916 |
46-
| Inference | 89.28132057 | 20 | 4.46406603 |
47-
| PostProcess | 0.43916702 | 20 | 0.02195835 |
48-
| End2End | 0.27992606 | 20 | 0.01399630 |
49-
| WarmUp | 5.37562728 | 5 | 1.07512546 |
45+
| PreProcess | 161.26751900 | 20 | 8.06337595 |
46+
| Inference | 85.17694473 | 20 | 4.25884724 |
47+
| PostProcess | 0.36168098 | 20 | 0.01808405 |
48+
| End2End | 256.90770149 | 20 | 12.84538507 |
49+
| WarmUp | 5412.37807274 | 10 | 541.23780727 |
5050
+-------------+-----------------+---------------------+----------------------------+
5151
```
5252

5353
在 Benchmark 结果中,会统计该模型全部组件(`Component`)的总耗时(`Total Time`,单位为“毫秒”)、**调用次数**`Number of Calls`)、**调用**平均执行耗时(`Avg Time Per Call`,单位“毫秒”),以及按预热(`WarmUp`)、预处理(`PreProcess`)、模型推理(`Inference`)、后处理(`PostProcess`)和端到端(`End2End`)进行划分的耗时统计,包括每个阶段的总耗时(`Total Time`,单位为“毫秒”)、**样本数**`Number of Instances`)和**单样本**平均执行耗时(`Avg Time Per Instance`,单位“毫秒”),同时,上述指标会保存到到本地: `./benchmark/detail.csv``./benchmark/summary.csv`
5454

5555
```csv
5656
Component,Total Time (ms),Number of Calls,Avg Time Per Call (ms)
57-
ReadCmp,100.20136833190918,10,10.020136833190918
58-
Resize,17.059803009033203,20,0.8529901504516602
59-
Normalize,45.44949531555176,20,2.272474765777588
60-
ToCHWImage,0.036716461181640625,20,0.0018358230590820312
61-
Copy2GPU,12.28785514831543,10,1.228785514831543
62-
Infer,76.59482955932617,10,7.659482955932617
63-
Copy2CPU,0.3986358642578125,10,0.03986358642578125
64-
DetPostProcess,0.4391670227050781,20,0.021958351135253906
57+
ReadCmp,99.60412979125977,10,9.960412979125977
58+
Resize,17.01641082763672,20,0.8508205413818359
59+
Normalize,44.61312294006348,20,2.230656147003174
60+
ToCHWImage,0.033855438232421875,20,0.0016927719116210938
61+
Copy2GPU,13.468742370605469,10,1.3468742370605469
62+
Infer,71.31743431091309,10,7.131743431091309
63+
Copy2CPU,0.39076805114746094,10,0.039076805114746094
64+
DetPostProcess,0.3616809844970703,20,0.018084049224853516
6565
```
6666

6767
```csv
6868
Stage,Total Time (ms),Number of Instances,Avg Time Per Instance (ms)
69-
PreProcess,162.74738311767578,20,8.137369155883789
70-
Inference,89.28132057189941,20,4.464066028594971
71-
PostProcess,0.4391670227050781,20,0.021958351135253906
72-
End2End,0.279926061630249,20,0.013996303081512451
73-
WarmUp,5.375627279281616,5,1.0751254558563232
69+
PreProcess,161.26751899719238,20,8.06337594985962
70+
Inference,85.17694473266602,20,4.258847236633301
71+
PostProcess,0.3616809844970703,20,0.018084049224853516
72+
End2End,256.90770149230957,20,12.845385074615479
73+
WarmUp,5412.3780727386475,10,541.2378072738647
7474
```

paddlex/inference/utils/benchmark.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def start(self):
3838
self._reset()
3939

4040
def warmup_stop(self, warmup_num):
41-
self._warmup_elapse = time.time() - self._warmup_start
41+
self._warmup_elapse = (time.time() - self._warmup_start) * 1000
4242
self._warmup_num = warmup_num
4343
self._reset()
4444

@@ -103,18 +103,25 @@ def gather(self, e2e_num):
103103
("End2End", self._e2e_elapse, e2e_num, self._e2e_elapse / e2e_num),
104104
]
105105
if self._warmup_elapse:
106-
summary.append(
107-
(
108-
"WarmUp",
109-
self._warmup_elapse,
110-
self._warmup_num,
111-
self._warmup_elapse / self._warmup_num,
112-
)
106+
warmup_elapse, warmup_num, warmup_avg = (
107+
self._warmup_elapse,
108+
self._warmup_num,
109+
self._warmup_elapse / self._warmup_num,
113110
)
111+
else:
112+
warmup_elapse, warmup_num, warmup_avg = 0, 0, 0
113+
summary.append(
114+
(
115+
"WarmUp",
116+
warmup_elapse,
117+
warmup_num,
118+
warmup_avg,
119+
)
120+
)
114121
return detail, summary
115122

116123
def collect(self, e2e_num):
117-
self._e2e_elapse = time.time() - self._e2e_tic
124+
self._e2e_elapse = (time.time() - self._e2e_tic) * 1000
118125
detail, summary = self.gather(e2e_num)
119126

120127
detail_head = [

0 commit comments

Comments
 (0)