Skip to content

Commit 2039070

Browse files
authored
Merge pull request #668 from qingqing01/acc_image_proc
Accelerating image processing for CNN
2 parents b24bf99 + 978d6e8 commit 2039070

File tree

2 files changed

+279
-14
lines changed

2 files changed

+279
-14
lines changed
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
import os, sys
2+
import numpy as np
3+
from PIL import Image
4+
from cStringIO import StringIO
5+
import multiprocessing
6+
import functools
7+
import itertools
8+
9+
from paddle.utils.image_util import *
10+
from paddle.trainer.config_parser import logger
11+
12+
try:
13+
import cv2
14+
except ImportError:
15+
logger.warning("OpenCV2 is not installed, using PIL to prcoess")
16+
cv2 = None
17+
18+
__all__ = ["CvTransformer", "PILTransformer", "MultiProcessImageTransformer"]
19+
20+
21+
class CvTransformer(ImageTransformer):
22+
"""
23+
CvTransformer used python-opencv to process image.
24+
"""
25+
26+
def __init__(
27+
self,
28+
min_size=None,
29+
crop_size=None,
30+
transpose=(2, 0, 1), # transpose to C * H * W
31+
channel_swap=None,
32+
mean=None,
33+
is_train=True,
34+
is_color=True):
35+
ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color)
36+
self.min_size = min_size
37+
self.crop_size = crop_size
38+
self.is_train = is_train
39+
40+
def resize(self, im, min_size):
41+
row, col = im.shape[:2]
42+
new_row, new_col = min_size, min_size
43+
if row > col:
44+
new_row = min_size * row / col
45+
else:
46+
new_col = min_size * col / row
47+
im = cv2.resize(im, (new_row, new_col), interpolation=cv2.INTER_CUBIC)
48+
return im
49+
50+
def crop_and_flip(self, im):
51+
"""
52+
Return cropped image.
53+
The size of the cropped image is inner_size * inner_size.
54+
im: (H x W x K) ndarrays
55+
"""
56+
row, col = im.shape[:2]
57+
start_h, start_w = 0, 0
58+
if self.is_train:
59+
start_h = np.random.randint(0, row - self.crop_size + 1)
60+
start_w = np.random.randint(0, col - self.crop_size + 1)
61+
else:
62+
start_h = (row - self.crop_size) / 2
63+
start_w = (col - self.crop_size) / 2
64+
end_h, end_w = start_h + self.crop_size, start_w + self.crop_size
65+
if self.is_color:
66+
im = im[start_h:end_h, start_w:end_w, :]
67+
else:
68+
im = im[start_h:end_h, start_w:end_w]
69+
if (self.is_train) and (np.random.randint(2) == 0):
70+
if self.is_color:
71+
im = im[:, ::-1, :]
72+
else:
73+
im = im[:, ::-1]
74+
return im
75+
76+
def transform(self, im):
77+
im = self.resize(im, self.min_size)
78+
im = self.crop_and_flip(im)
79+
# transpose, swap channel, sub mean
80+
im = im.astype('float32')
81+
ImageTransformer.transformer(self, im)
82+
return im
83+
84+
def load_image_from_string(self, data):
85+
flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE
86+
im = cv2.imdecode(np.fromstring(data, np.uint8), flag)
87+
return im
88+
89+
def transform_from_string(self, data):
90+
im = self.load_image_from_string(data)
91+
return self.transform(im)
92+
93+
def load_image_from_file(self, file):
94+
flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE
95+
im = cv2.imread(file, flag)
96+
return im
97+
98+
def transform_from_file(self, file):
99+
im = self.load_image_from_file(file)
100+
return self.transform(im)
101+
102+
103+
class PILTransformer(ImageTransformer):
104+
"""
105+
PILTransformer used PIL to process image.
106+
"""
107+
108+
def __init__(
109+
self,
110+
min_size=None,
111+
crop_size=None,
112+
transpose=(2, 0, 1), # transpose to C * H * W
113+
channel_swap=None,
114+
mean=None,
115+
is_train=True,
116+
is_color=True):
117+
ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color)
118+
self.min_size = min_size
119+
self.crop_size = crop_size
120+
self.is_train = is_train
121+
122+
def resize(self, im, min_size):
123+
row, col = im.size[:2]
124+
new_row, new_col = min_size, min_size
125+
if row > col:
126+
new_row = min_size * row / col
127+
else:
128+
new_col = min_size * col / row
129+
im = im.resize((new_row, new_col), Image.ANTIALIAS)
130+
return im
131+
132+
def crop_and_flip(self, im):
133+
"""
134+
Return cropped image.
135+
The size of the cropped image is inner_size * inner_size.
136+
"""
137+
row, col = im.size[:2]
138+
start_h, start_w = 0, 0
139+
if self.is_train:
140+
start_h = np.random.randint(0, row - self.crop_size + 1)
141+
start_w = np.random.randint(0, col - self.crop_size + 1)
142+
else:
143+
start_h = (row - self.crop_size) / 2
144+
start_w = (col - self.crop_size) / 2
145+
end_h, end_w = start_h + self.crop_size, start_w + self.crop_size
146+
im = im.crop((start_h, start_w, end_h, end_w))
147+
if (self.is_train) and (np.random.randint(2) == 0):
148+
im = im.transpose(Image.FLIP_LEFT_RIGHT)
149+
return im
150+
151+
def transform(self, im):
152+
im = self.resize(im, self.min_size)
153+
im = self.crop_and_flip(im)
154+
im = np.array(im, dtype=np.float32) # convert to numpy.array
155+
# transpose, swap channel, sub mean
156+
ImageTransformer.transformer(self, im)
157+
return im
158+
159+
def load_image_from_string(self, data):
160+
im = Image.open(StringIO(data))
161+
return im
162+
163+
def transform_from_string(self, data):
164+
im = self.load_image_from_string(data)
165+
return self.transform(im)
166+
167+
def load_image_from_file(self, file):
168+
im = Image.open(file)
169+
return im
170+
171+
def transform_from_file(self, file):
172+
im = self.load_image_from_file(file)
173+
return self.transform(im)
174+
175+
176+
def job(is_img_string, transformer, (data, label)):
177+
if is_img_string:
178+
return transformer.transform_from_string(data), label
179+
else:
180+
return transformer.transform_from_file(data), label
181+
182+
183+
class MultiProcessImageTransformer(object):
184+
def __init__(self,
185+
procnum=10,
186+
resize_size=None,
187+
crop_size=None,
188+
transpose=(2, 0, 1),
189+
channel_swap=None,
190+
mean=None,
191+
is_train=True,
192+
is_color=True,
193+
is_img_string=True):
194+
"""
195+
Processing image with multi-process. If it is used in PyDataProvider,
196+
the simple usage for CNN is as follows:
197+
198+
.. code-block:: python
199+
200+
def hool(settings, is_train, **kwargs):
201+
settings.is_train = is_train
202+
settings.mean_value = np.array([103.939,116.779,123.68], dtype=np.float32)
203+
settings.input_types = [
204+
dense_vector(3 * 224 * 224),
205+
integer_value(1)]
206+
settings.transformer = MultiProcessImageTransformer(
207+
procnum=10,
208+
resize_size=256,
209+
crop_size=224,
210+
transpose=(2, 0, 1),
211+
mean=settings.mean_values,
212+
is_train=settings.is_train)
213+
214+
215+
@provider(init_hook=hook, pool_size=20480)
216+
def process(settings, file_list):
217+
with open(file_list, 'r') as fdata:
218+
for line in fdata:
219+
data_dic = np.load(line.strip()) # load the data batch pickled by Pickle.
220+
data = data_dic['data']
221+
labels = data_dic['label']
222+
labels = np.array(labels, dtype=np.float32)
223+
for im, lab in settings.dp.run(data, labels):
224+
yield [im.astype('float32'), int(lab)]
225+
226+
:param procnum: processor number.
227+
:type procnum: int
228+
:param resize_size: the shorter edge size of image after resizing.
229+
:type resize_size: int
230+
:param crop_size: the croping size.
231+
:type crop_size: int
232+
:param transpose: the transpose order, Paddle only allow C * H * W order.
233+
:type transpose: tuple or list
234+
:param channel_swap: the channel swap order, RGB or BRG.
235+
:type channel_swap: tuple or list
236+
:param mean: the mean values of image, per-channel mean or element-wise mean.
237+
:type mean: array, The dimension is 1 for per-channel mean.
238+
The dimension is 3 for element-wise mean.
239+
:param is_train: training peroid or testing peroid.
240+
:type is_train: bool.
241+
:param is_color: the image is color or gray.
242+
:type is_color: bool.
243+
:param is_img_string: The input can be the file name of image or image string.
244+
:type is_img_string: bool.
245+
"""
246+
247+
self.procnum = procnum
248+
self.pool = multiprocessing.Pool(procnum)
249+
self.is_img_string = is_img_string
250+
if cv2 is not None:
251+
self.transformer = CvTransformer(resize_size, crop_size, transpose,
252+
channel_swap, mean, is_train,
253+
is_color)
254+
else:
255+
self.transformer = PILTransformer(resize_size, crop_size, transpose,
256+
channel_swap, mean, is_train,
257+
is_color)
258+
259+
def run(self, data, label):
260+
fun = functools.partial(job, self.is_img_string, self.transformer)
261+
return self.pool.imap_unordered(
262+
fun, itertools.izip(data, label), chunksize=100 * self.procnum)

python/paddle/utils/image_util.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -186,29 +186,32 @@ def __init__(self,
186186
channel_swap=None,
187187
mean=None,
188188
is_color=True):
189-
self.transpose = transpose
190-
self.channel_swap = None
191-
self.mean = None
192189
self.is_color = is_color
190+
self.set_transpose(transpose)
191+
self.set_channel_swap(channel_swap)
192+
self.set_mean(mean)
193193

194194
def set_transpose(self, order):
195-
if self.is_color:
196-
assert 3 == len(order)
195+
if order is not None:
196+
if self.is_color:
197+
assert 3 == len(order)
197198
self.transpose = order
198199

199200
def set_channel_swap(self, order):
200-
if self.is_color:
201-
assert 3 == len(order)
201+
if order is not None:
202+
if self.is_color:
203+
assert 3 == len(order)
202204
self.channel_swap = order
203205

204206
def set_mean(self, mean):
205-
# mean value, may be one value per channel
206-
if mean.ndim == 1:
207-
mean = mean[:, np.newaxis, np.newaxis]
208-
else:
209-
# elementwise mean
210-
if self.is_color:
211-
assert len(mean.shape) == 3
207+
if mean is not None:
208+
# mean value, may be one value per channel
209+
if mean.ndim == 1:
210+
mean = mean[:, np.newaxis, np.newaxis]
211+
else:
212+
# elementwise mean
213+
if self.is_color:
214+
assert len(mean.shape) == 3
212215
self.mean = mean
213216

214217
def transformer(self, data):

0 commit comments

Comments
 (0)