|
| 1 | +import os, sys |
| 2 | +import numpy as np |
| 3 | +from PIL import Image |
| 4 | +from cStringIO import StringIO |
| 5 | +import multiprocessing |
| 6 | +import functools |
| 7 | +import itertools |
| 8 | + |
| 9 | +from paddle.utils.image_util import * |
| 10 | +from paddle.trainer.config_parser import logger |
| 11 | + |
| 12 | +try: |
| 13 | + import cv2 |
| 14 | +except ImportError: |
| 15 | + logger.warning("OpenCV2 is not installed, using PIL to prcoess") |
| 16 | + cv2 = None |
| 17 | + |
| 18 | +__all__ = ["CvTransformer", "PILTransformer", "MultiProcessImageTransformer"] |
| 19 | + |
| 20 | + |
| 21 | +class CvTransformer(ImageTransformer): |
| 22 | + """ |
| 23 | + CvTransformer used python-opencv to process image. |
| 24 | + """ |
| 25 | + |
| 26 | + def __init__( |
| 27 | + self, |
| 28 | + min_size=None, |
| 29 | + crop_size=None, |
| 30 | + transpose=(2, 0, 1), # transpose to C * H * W |
| 31 | + channel_swap=None, |
| 32 | + mean=None, |
| 33 | + is_train=True, |
| 34 | + is_color=True): |
| 35 | + ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color) |
| 36 | + self.min_size = min_size |
| 37 | + self.crop_size = crop_size |
| 38 | + self.is_train = is_train |
| 39 | + |
| 40 | + def resize(self, im, min_size): |
| 41 | + row, col = im.shape[:2] |
| 42 | + new_row, new_col = min_size, min_size |
| 43 | + if row > col: |
| 44 | + new_row = min_size * row / col |
| 45 | + else: |
| 46 | + new_col = min_size * col / row |
| 47 | + im = cv2.resize(im, (new_row, new_col), interpolation=cv2.INTER_CUBIC) |
| 48 | + return im |
| 49 | + |
| 50 | + def crop_and_flip(self, im): |
| 51 | + """ |
| 52 | + Return cropped image. |
| 53 | + The size of the cropped image is inner_size * inner_size. |
| 54 | + im: (H x W x K) ndarrays |
| 55 | + """ |
| 56 | + row, col = im.shape[:2] |
| 57 | + start_h, start_w = 0, 0 |
| 58 | + if self.is_train: |
| 59 | + start_h = np.random.randint(0, row - self.crop_size + 1) |
| 60 | + start_w = np.random.randint(0, col - self.crop_size + 1) |
| 61 | + else: |
| 62 | + start_h = (row - self.crop_size) / 2 |
| 63 | + start_w = (col - self.crop_size) / 2 |
| 64 | + end_h, end_w = start_h + self.crop_size, start_w + self.crop_size |
| 65 | + if self.is_color: |
| 66 | + im = im[start_h:end_h, start_w:end_w, :] |
| 67 | + else: |
| 68 | + im = im[start_h:end_h, start_w:end_w] |
| 69 | + if (self.is_train) and (np.random.randint(2) == 0): |
| 70 | + if self.is_color: |
| 71 | + im = im[:, ::-1, :] |
| 72 | + else: |
| 73 | + im = im[:, ::-1] |
| 74 | + return im |
| 75 | + |
| 76 | + def transform(self, im): |
| 77 | + im = self.resize(im, self.min_size) |
| 78 | + im = self.crop_and_flip(im) |
| 79 | + # transpose, swap channel, sub mean |
| 80 | + im = im.astype('float32') |
| 81 | + ImageTransformer.transformer(self, im) |
| 82 | + return im |
| 83 | + |
| 84 | + def load_image_from_string(self, data): |
| 85 | + flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE |
| 86 | + im = cv2.imdecode(np.fromstring(data, np.uint8), flag) |
| 87 | + return im |
| 88 | + |
| 89 | + def transform_from_string(self, data): |
| 90 | + im = self.load_image_from_string(data) |
| 91 | + return self.transform(im) |
| 92 | + |
| 93 | + def load_image_from_file(self, file): |
| 94 | + flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE |
| 95 | + im = cv2.imread(file, flag) |
| 96 | + return im |
| 97 | + |
| 98 | + def transform_from_file(self, file): |
| 99 | + im = self.load_image_from_file(file) |
| 100 | + return self.transform(im) |
| 101 | + |
| 102 | + |
| 103 | +class PILTransformer(ImageTransformer): |
| 104 | + """ |
| 105 | + PILTransformer used PIL to process image. |
| 106 | + """ |
| 107 | + |
| 108 | + def __init__( |
| 109 | + self, |
| 110 | + min_size=None, |
| 111 | + crop_size=None, |
| 112 | + transpose=(2, 0, 1), # transpose to C * H * W |
| 113 | + channel_swap=None, |
| 114 | + mean=None, |
| 115 | + is_train=True, |
| 116 | + is_color=True): |
| 117 | + ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color) |
| 118 | + self.min_size = min_size |
| 119 | + self.crop_size = crop_size |
| 120 | + self.is_train = is_train |
| 121 | + |
| 122 | + def resize(self, im, min_size): |
| 123 | + row, col = im.size[:2] |
| 124 | + new_row, new_col = min_size, min_size |
| 125 | + if row > col: |
| 126 | + new_row = min_size * row / col |
| 127 | + else: |
| 128 | + new_col = min_size * col / row |
| 129 | + im = im.resize((new_row, new_col), Image.ANTIALIAS) |
| 130 | + return im |
| 131 | + |
| 132 | + def crop_and_flip(self, im): |
| 133 | + """ |
| 134 | + Return cropped image. |
| 135 | + The size of the cropped image is inner_size * inner_size. |
| 136 | + """ |
| 137 | + row, col = im.size[:2] |
| 138 | + start_h, start_w = 0, 0 |
| 139 | + if self.is_train: |
| 140 | + start_h = np.random.randint(0, row - self.crop_size + 1) |
| 141 | + start_w = np.random.randint(0, col - self.crop_size + 1) |
| 142 | + else: |
| 143 | + start_h = (row - self.crop_size) / 2 |
| 144 | + start_w = (col - self.crop_size) / 2 |
| 145 | + end_h, end_w = start_h + self.crop_size, start_w + self.crop_size |
| 146 | + im = im.crop((start_h, start_w, end_h, end_w)) |
| 147 | + if (self.is_train) and (np.random.randint(2) == 0): |
| 148 | + im = im.transpose(Image.FLIP_LEFT_RIGHT) |
| 149 | + return im |
| 150 | + |
| 151 | + def transform(self, im): |
| 152 | + im = self.resize(im, self.min_size) |
| 153 | + im = self.crop_and_flip(im) |
| 154 | + im = np.array(im, dtype=np.float32) # convert to numpy.array |
| 155 | + # transpose, swap channel, sub mean |
| 156 | + ImageTransformer.transformer(self, im) |
| 157 | + return im |
| 158 | + |
| 159 | + def load_image_from_string(self, data): |
| 160 | + im = Image.open(StringIO(data)) |
| 161 | + return im |
| 162 | + |
| 163 | + def transform_from_string(self, data): |
| 164 | + im = self.load_image_from_string(data) |
| 165 | + return self.transform(im) |
| 166 | + |
| 167 | + def load_image_from_file(self, file): |
| 168 | + im = Image.open(file) |
| 169 | + return im |
| 170 | + |
| 171 | + def transform_from_file(self, file): |
| 172 | + im = self.load_image_from_file(file) |
| 173 | + return self.transform(im) |
| 174 | + |
| 175 | + |
| 176 | +def job(is_img_string, transformer, (data, label)): |
| 177 | + if is_img_string: |
| 178 | + return transformer.transform_from_string(data), label |
| 179 | + else: |
| 180 | + return transformer.transform_from_file(data), label |
| 181 | + |
| 182 | + |
| 183 | +class MultiProcessImageTransformer(object): |
| 184 | + def __init__(self, |
| 185 | + procnum=10, |
| 186 | + resize_size=None, |
| 187 | + crop_size=None, |
| 188 | + transpose=(2, 0, 1), |
| 189 | + channel_swap=None, |
| 190 | + mean=None, |
| 191 | + is_train=True, |
| 192 | + is_color=True, |
| 193 | + is_img_string=True): |
| 194 | + """ |
| 195 | + Processing image with multi-process. If it is used in PyDataProvider, |
| 196 | + the simple usage for CNN is as follows: |
| 197 | + |
| 198 | + .. code-block:: python |
| 199 | +
|
| 200 | + def hool(settings, is_train, **kwargs): |
| 201 | + settings.is_train = is_train |
| 202 | + settings.mean_value = np.array([103.939,116.779,123.68], dtype=np.float32) |
| 203 | + settings.input_types = [ |
| 204 | + dense_vector(3 * 224 * 224), |
| 205 | + integer_value(1)] |
| 206 | + settings.transformer = MultiProcessImageTransformer( |
| 207 | + procnum=10, |
| 208 | + resize_size=256, |
| 209 | + crop_size=224, |
| 210 | + transpose=(2, 0, 1), |
| 211 | + mean=settings.mean_values, |
| 212 | + is_train=settings.is_train) |
| 213 | +
|
| 214 | +
|
| 215 | + @provider(init_hook=hook, pool_size=20480) |
| 216 | + def process(settings, file_list): |
| 217 | + with open(file_list, 'r') as fdata: |
| 218 | + for line in fdata: |
| 219 | + data_dic = np.load(line.strip()) # load the data batch pickled by Pickle. |
| 220 | + data = data_dic['data'] |
| 221 | + labels = data_dic['label'] |
| 222 | + labels = np.array(labels, dtype=np.float32) |
| 223 | + for im, lab in settings.dp.run(data, labels): |
| 224 | + yield [im.astype('float32'), int(lab)] |
| 225 | +
|
| 226 | + :param procnum: processor number. |
| 227 | + :type procnum: int |
| 228 | + :param resize_size: the shorter edge size of image after resizing. |
| 229 | + :type resize_size: int |
| 230 | + :param crop_size: the croping size. |
| 231 | + :type crop_size: int |
| 232 | + :param transpose: the transpose order, Paddle only allow C * H * W order. |
| 233 | + :type transpose: tuple or list |
| 234 | + :param channel_swap: the channel swap order, RGB or BRG. |
| 235 | + :type channel_swap: tuple or list |
| 236 | + :param mean: the mean values of image, per-channel mean or element-wise mean. |
| 237 | + :type mean: array, The dimension is 1 for per-channel mean. |
| 238 | + The dimension is 3 for element-wise mean. |
| 239 | + :param is_train: training peroid or testing peroid. |
| 240 | + :type is_train: bool. |
| 241 | + :param is_color: the image is color or gray. |
| 242 | + :type is_color: bool. |
| 243 | + :param is_img_string: The input can be the file name of image or image string. |
| 244 | + :type is_img_string: bool. |
| 245 | + """ |
| 246 | + |
| 247 | + self.procnum = procnum |
| 248 | + self.pool = multiprocessing.Pool(procnum) |
| 249 | + self.is_img_string = is_img_string |
| 250 | + if cv2 is not None: |
| 251 | + self.transformer = CvTransformer(resize_size, crop_size, transpose, |
| 252 | + channel_swap, mean, is_train, |
| 253 | + is_color) |
| 254 | + else: |
| 255 | + self.transformer = PILTransformer(resize_size, crop_size, transpose, |
| 256 | + channel_swap, mean, is_train, |
| 257 | + is_color) |
| 258 | + |
| 259 | + def run(self, data, label): |
| 260 | + fun = functools.partial(job, self.is_img_string, self.transformer) |
| 261 | + return self.pool.imap_unordered( |
| 262 | + fun, itertools.izip(data, label), chunksize=100 * self.procnum) |
0 commit comments