Skip to content

Commit 16622c6

Browse files
committed
add dataset verify script
1 parent 7d44d8e commit 16622c6

File tree

1 file changed

+58
-0
lines changed

1 file changed

+58
-0
lines changed

scripts/dataset_verify.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
#
5+
# This script will access each image in the dataset to ensure that it is able to be loaded.
6+
# It will save the results for the entire dataset in a csv that can be accessed later.
7+
#
8+
9+
10+
11+
import os,sys
12+
from glob import glob
13+
import matplotlib.pyplot as plt
14+
import numpy as np
15+
import argparse
16+
17+
import torch
18+
import torch.nn.functional as F
19+
import torchvision, torchvision.transforms
20+
import torchxrayvision as xrv
21+
import dataset_utils
22+
from tqdm.autonotebook import tqdm
23+
import pandas as pd
24+
import time
25+
26+
parser = argparse.ArgumentParser()
27+
parser.add_argument('dataset', type=str, default="", help='')
28+
cfg = parser.parse_args()
29+
30+
31+
d = dataset_utils.get_data(cfg.dataset, merge=False)[0]
32+
33+
results = []
34+
for i in tqdm(range(len(d))):
35+
result = {'idx':i}
36+
try:
37+
start = time.time()
38+
s = d[i]
39+
end = time.time()
40+
41+
result['img_shape'] = s['img'].shape
42+
result['time'] = end - start
43+
44+
except KeyboardInterrupt:
45+
print('Interrupted')
46+
break
47+
except Exception as e:
48+
result['errortype'] = e.__class__.__name__
49+
result['errorargs'] = e.args
50+
51+
results.append(result)
52+
if i%1000 == 0:
53+
pd.DataFrame(results).to_csv(f'{cfg.dataset}-verifylog.csv', index=None)
54+
#break
55+
56+
57+
pd.DataFrame(results).to_csv(f'{cfg.dataset}-verifylog.csv', index=None)
58+

0 commit comments

Comments
 (0)