From e21bc35fcc30606ec65c087df80639f83f3e883e Mon Sep 17 00:00:00 2001 From: celinejacques <53426409+celinejacques@users.noreply.github.com> Date: Thu, 1 Apr 2021 10:49:10 +0200 Subject: [PATCH 1/6] Fix conflict --- mplc/dataset.py | 249 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) diff --git a/mplc/dataset.py b/mplc/dataset.py index 4f4286bd..6363cc3a 100644 --- a/mplc/dataset.py +++ b/mplc/dataset.py @@ -13,6 +13,20 @@ import numpy as np import pandas as pd +<<<<<<< HEAD +======= +from joblib import dump, load +from keras.datasets import cifar10, cifar100, mnist, imdb +from keras.layers import Activation +from keras.layers import Conv2D, GlobalAveragePooling2D, MaxPooling2D +from keras.layers import Dense, Dropout +from keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten +from keras.losses import categorical_crossentropy +from keras.models import Sequential +from keras.optimizers import RMSprop +from keras.preprocessing import sequence +from keras.utils import to_categorical +>>>>>>> 0bb0faf (Add pytorch model for cifar100 [WIP]) from librosa import load as wav_load from librosa.feature import mfcc from loguru import logger @@ -194,6 +208,241 @@ def generate_new_model(self): return model +class Cifar100(Dataset): + def __init__(self): + self.input_shape = (32, 32, 3) + self.num_classes = 100 + x_test, x_train, y_test, y_train = self.load_data() + + super(Cifar10, self).__init__(dataset_name='cifar100', + num_classes=self.num_classes, + input_shape=self.input_shape, + x_train=x_train, + y_train=y_train, + x_test=x_test, + y_test=y_test) + + def load_data(self): + attempts = 0 + while True: + try: + (x_train, y_train), (x_test, y_test) = cifar100.load_data() + break + except (HTTPError, URLError) as e: + if hasattr(e, 'code'): + temp = e.code + else: + temp = e.errno + logger.debug( + f'URL fetch failure on ' + f'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz : ' + f'{temp} -- {e.reason}') + if attempts < constants.NUMBER_OF_DOWNLOAD_ATTEMPTS: + sleep(2) + attempts += 1 + else: + raise + + # Pre-process inputs + x_train = self.preprocess_dataset_inputs(x_train) + x_test = self.preprocess_dataset_inputs(x_test) + y_train = self.preprocess_dataset_labels(y_train) + y_test = self.preprocess_dataset_labels(y_test) + return x_test, x_train, y_test, y_train + + # Data samples pre-processing method for inputs + @staticmethod + def preprocess_dataset_inputs(x): + x = x.astype("float32") + x /= 255 + + return x + + # Data samples pre-processing method for labels + def preprocess_dataset_labels(self, y): + y = to_categorical(y, self.num_classes) + + return y + + # Model structure and generation + def generate_new_model(self): + """Return a CNN model from scratch based on given batch_size""" + + model = models.vgg16() + + # TODO: Add new model + # model = Sequential() + # model.add(Conv2D(32, (3, 3), padding='same', input_shape=self.input_shape)) + # model.add(Activation('relu')) + # model.add(Conv2D(32, (3, 3))) + # model.add(Activation('relu')) + # model.add(MaxPooling2D(pool_size=(2, 2))) + # model.add(Dropout(0.25)) + + # model.add(Conv2D(64, (3, 3), padding='same')) + # model.add(Activation('relu')) + # model.add(Conv2D(64, (3, 3))) + # model.add(Activation('relu')) + # model.add(MaxPooling2D(pool_size=(2, 2))) + # model.add(Dropout(0.25)) + + # model.add(Flatten()) + # model.add(Dense(512)) + # model.add(Activation('relu')) + # model.add(Dropout(0.5)) + # model.add(Dense(self.num_classes)) + # model.add(Activation('softmax')) + + # # initiate RMSprop optimizer + # opt = RMSprop(learning_rate=0.0001, decay=1e-6) + + # # Let's train the model using RMSprop + # model.compile(loss='categorical_crossentropy', + # optimizer=opt, + # metrics=['accuracy']) + + return model + + # train, test, val splits + @staticmethod + def train_test_split_local(x, y): + return train_test_split(x, y, test_size=0.1, random_state=42) + + @staticmethod + def train_val_split_local(x, y): + return train_test_split(x, y, test_size=0.1, random_state=42) + + + class cifar100_dataset(torch.utils.data.Dataset): + + def __init__(self, x, y, transform=[]): + self.x = x + self.y = y + self.transform = transform + + def __len__(self): + return len(self.x) + + def __getitem__(self, index): + + x = self.x[index] + y = torch.tensor(int(self.y[index])) + + if self.transform: + x = self.transform(x) + + return x, y + + + class ModelPytorch(torchvision.model.vgg16): + def __init__(self, optimizer, criterion): + super(Cifar100.ModelPytorch, self).__init__() + self.optimizer = optimizer + self.criterion = criterion + + def fit(self, x_train, y_train, batch_size, validation_data, epochs=1, verbose=False): + train_data = cifar100_dataset(x_train, y_train) + train_loader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True) + + history = super(Cifar100.ModelPytorch, self).train() + + for batch_idx, (image, label) in enumerate(trainloader): + images, labels = torch.autograd.Variable(image), torch.autograd.Variable(label) + + outputs = model(images) + loss = self.criterion(outputs, labels) + + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + [loss, acc] = self.evaluate(x_train, y_train) + [val_loss, val_acc] = self.evaluate(*validation_data) + # Mimic Keras' history + history.history = { + 'loss': [loss], + 'accuracy': [acc], + 'val_loss': [val_loss], + 'val_accuracy': [val_acc] + } + + return history + + def evaluate(self, x_eval, y_eval, **kwargs): + test_data = cifar100_dataset(x_eval, y_eval) + test_loader = data.DataLoader(test_data, batch_size=batch_size, shuffle=True) + + self.eval() + + with torch.no_grad(): + + y_true_np = [] + y_pred_np = [] + count=0 + for i, (images, labels) in enumerate(validation_loader): + count+= 1 + N = images.size(0) + + images = torch.autograd.Variable(images) + labels = torch.autograd.Variable(labels) + + outputs = model_ft(images) + + predictions = outputs.max(1, keepdim=True)[1] + + val_loss =+ criterion(outputs, labels).item() + val_acc =+ (predictions.eq(labels.view_as(predictions)).sum().item() / N) + + model_evaluation = [val_loss/count, val_acc/count] + + return model_evaluation + +#TODO + # def save_weights(self, path): + # if self.coef_ is None: + # raise ValueError( + # 'Coef and intercept are set to None, it seems the model has not been fit properly.') + # if '.h5' in path: + # logger.debug('Automatically switch file format from .h5 to .npy') + # path.replace('.h5', '.npy') + # np.save(path, self.get_weights()) + + # def load_weights(self, path): + # if '.h5' in path: + # logger.debug('Automatically switch file format from .h5 to .npy') + # path.replace('.h5', '.npy') + # weights = load(path) + # self.set_weights(weights) + + # def get_weights(self): + # if self.coef_ is None: + # return None + # else: + # return np.concatenate((self.coef_, self.intercept_.reshape(1, 1)), axis=1) + + # def set_weights(self, weights): + # if weights is None: + # self.coef_ = None + # self.intercept_ = None + # else: + # self.coef_ = np.array(weights[0][:-1]).reshape(1, -1) + # self.intercept_ = np.array(weights[0][-1]).reshape(1) + + # def save_model(self, path): + # if '.h5' in path: + # logger.debug('Automatically switch file format from .h5 to .joblib') + # path.replace('.h5', '.joblib') + # dump(self, path) + + # @staticmethod + # def load_model(path): + # if '.h5' in path: + # logger.debug('Automatically switch file format from .h5 to .joblib') + # path.replace('.h5', '.joblib') + # return load(path) + + + class Titanic(Dataset): def __init__(self, proportion=1, val_proportion=0.1): From 655376b5615431f665fe7a9cc7430b9ce5170e5f Mon Sep 17 00:00:00 2001 From: celinejacques <53426409+celinejacques@users.noreply.github.com> Date: Thu, 1 Apr 2021 18:42:02 +0200 Subject: [PATCH 2/6] Remove model from dataset --- mplc/dataset.py | 193 ++---------------------------------------------- 1 file changed, 8 insertions(+), 185 deletions(-) diff --git a/mplc/dataset.py b/mplc/dataset.py index 6363cc3a..3e492da3 100644 --- a/mplc/dataset.py +++ b/mplc/dataset.py @@ -13,25 +13,11 @@ import numpy as np import pandas as pd -<<<<<<< HEAD -======= -from joblib import dump, load -from keras.datasets import cifar10, cifar100, mnist, imdb -from keras.layers import Activation -from keras.layers import Conv2D, GlobalAveragePooling2D, MaxPooling2D -from keras.layers import Dense, Dropout -from keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten -from keras.losses import categorical_crossentropy -from keras.models import Sequential -from keras.optimizers import RMSprop -from keras.preprocessing import sequence -from keras.utils import to_categorical ->>>>>>> 0bb0faf (Add pytorch model for cifar100 [WIP]) from librosa import load as wav_load from librosa.feature import mfcc from loguru import logger from sklearn.model_selection import train_test_split -from tensorflow.keras.datasets import cifar10, mnist, imdb +from tensorflow.keras.datasets import cifar10, cifar100, mnist, imdb from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D, MaxPooling2D from tensorflow.keras.layers import Dense, Dropout @@ -43,7 +29,8 @@ from tensorflow.keras.utils import to_categorical from . import constants -from .models import LogisticRegression +from .models import LogisticRegression, ModelPytorch +from torchvision import models class Dataset(ABC): @@ -210,11 +197,11 @@ def generate_new_model(self): class Cifar100(Dataset): def __init__(self): - self.input_shape = (32, 32, 3) + self.input_shape = (3, 32, 32) self.num_classes = 100 x_test, x_train, y_test, y_train = self.load_data() - super(Cifar10, self).__init__(dataset_name='cifar100', + super(Cifar100, self).__init__(dataset_name='cifar100', num_classes=self.num_classes, input_shape=self.input_shape, x_train=x_train, @@ -246,8 +233,8 @@ def load_data(self): # Pre-process inputs x_train = self.preprocess_dataset_inputs(x_train) x_test = self.preprocess_dataset_inputs(x_test) - y_train = self.preprocess_dataset_labels(y_train) - y_test = self.preprocess_dataset_labels(y_test) + # y_train = self.preprocess_dataset_labels(y_train) + # y_test = self.preprocess_dataset_labels(y_test) return x_test, x_train, y_test, y_train # Data samples pre-processing method for inputs @@ -266,41 +253,7 @@ def preprocess_dataset_labels(self, y): # Model structure and generation def generate_new_model(self): - """Return a CNN model from scratch based on given batch_size""" - - model = models.vgg16() - - # TODO: Add new model - # model = Sequential() - # model.add(Conv2D(32, (3, 3), padding='same', input_shape=self.input_shape)) - # model.add(Activation('relu')) - # model.add(Conv2D(32, (3, 3))) - # model.add(Activation('relu')) - # model.add(MaxPooling2D(pool_size=(2, 2))) - # model.add(Dropout(0.25)) - - # model.add(Conv2D(64, (3, 3), padding='same')) - # model.add(Activation('relu')) - # model.add(Conv2D(64, (3, 3))) - # model.add(Activation('relu')) - # model.add(MaxPooling2D(pool_size=(2, 2))) - # model.add(Dropout(0.25)) - - # model.add(Flatten()) - # model.add(Dense(512)) - # model.add(Activation('relu')) - # model.add(Dropout(0.5)) - # model.add(Dense(self.num_classes)) - # model.add(Activation('softmax')) - - # # initiate RMSprop optimizer - # opt = RMSprop(learning_rate=0.0001, decay=1e-6) - - # # Let's train the model using RMSprop - # model.compile(loss='categorical_crossentropy', - # optimizer=opt, - # metrics=['accuracy']) - + model = ModelPytorch() return model # train, test, val splits @@ -312,136 +265,6 @@ def train_test_split_local(x, y): def train_val_split_local(x, y): return train_test_split(x, y, test_size=0.1, random_state=42) - - class cifar100_dataset(torch.utils.data.Dataset): - - def __init__(self, x, y, transform=[]): - self.x = x - self.y = y - self.transform = transform - - def __len__(self): - return len(self.x) - - def __getitem__(self, index): - - x = self.x[index] - y = torch.tensor(int(self.y[index])) - - if self.transform: - x = self.transform(x) - - return x, y - - - class ModelPytorch(torchvision.model.vgg16): - def __init__(self, optimizer, criterion): - super(Cifar100.ModelPytorch, self).__init__() - self.optimizer = optimizer - self.criterion = criterion - - def fit(self, x_train, y_train, batch_size, validation_data, epochs=1, verbose=False): - train_data = cifar100_dataset(x_train, y_train) - train_loader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True) - - history = super(Cifar100.ModelPytorch, self).train() - - for batch_idx, (image, label) in enumerate(trainloader): - images, labels = torch.autograd.Variable(image), torch.autograd.Variable(label) - - outputs = model(images) - loss = self.criterion(outputs, labels) - - self.optimizer.zero_grad() - loss.backward() - self.optimizer.step() - - [loss, acc] = self.evaluate(x_train, y_train) - [val_loss, val_acc] = self.evaluate(*validation_data) - # Mimic Keras' history - history.history = { - 'loss': [loss], - 'accuracy': [acc], - 'val_loss': [val_loss], - 'val_accuracy': [val_acc] - } - - return history - - def evaluate(self, x_eval, y_eval, **kwargs): - test_data = cifar100_dataset(x_eval, y_eval) - test_loader = data.DataLoader(test_data, batch_size=batch_size, shuffle=True) - - self.eval() - - with torch.no_grad(): - - y_true_np = [] - y_pred_np = [] - count=0 - for i, (images, labels) in enumerate(validation_loader): - count+= 1 - N = images.size(0) - - images = torch.autograd.Variable(images) - labels = torch.autograd.Variable(labels) - - outputs = model_ft(images) - - predictions = outputs.max(1, keepdim=True)[1] - - val_loss =+ criterion(outputs, labels).item() - val_acc =+ (predictions.eq(labels.view_as(predictions)).sum().item() / N) - - model_evaluation = [val_loss/count, val_acc/count] - - return model_evaluation - -#TODO - # def save_weights(self, path): - # if self.coef_ is None: - # raise ValueError( - # 'Coef and intercept are set to None, it seems the model has not been fit properly.') - # if '.h5' in path: - # logger.debug('Automatically switch file format from .h5 to .npy') - # path.replace('.h5', '.npy') - # np.save(path, self.get_weights()) - - # def load_weights(self, path): - # if '.h5' in path: - # logger.debug('Automatically switch file format from .h5 to .npy') - # path.replace('.h5', '.npy') - # weights = load(path) - # self.set_weights(weights) - - # def get_weights(self): - # if self.coef_ is None: - # return None - # else: - # return np.concatenate((self.coef_, self.intercept_.reshape(1, 1)), axis=1) - - # def set_weights(self, weights): - # if weights is None: - # self.coef_ = None - # self.intercept_ = None - # else: - # self.coef_ = np.array(weights[0][:-1]).reshape(1, -1) - # self.intercept_ = np.array(weights[0][-1]).reshape(1) - - # def save_model(self, path): - # if '.h5' in path: - # logger.debug('Automatically switch file format from .h5 to .joblib') - # path.replace('.h5', '.joblib') - # dump(self, path) - - # @staticmethod - # def load_model(path): - # if '.h5' in path: - # logger.debug('Automatically switch file format from .h5 to .joblib') - # path.replace('.h5', '.joblib') - # return load(path) - - class Titanic(Dataset): def __init__(self, proportion=1, From 16d4f90e08ae2cb9ccb57f1ad2f691b1fec52422 Mon Sep 17 00:00:00 2001 From: celinejacques <53426409+celinejacques@users.noreply.github.com> Date: Thu, 1 Apr 2021 18:42:25 +0200 Subject: [PATCH 3/6] Add pytorch model in models --- mplc/models.py | 151 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/mplc/models.py b/mplc/models.py index 6ed4ca10..30a6c472 100644 --- a/mplc/models.py +++ b/mplc/models.py @@ -5,6 +5,11 @@ from sklearn.metrics import log_loss from tensorflow.keras.backend import dot from tensorflow.keras.layers import Dense +import torch, torchvision +import torch.nn as nn +import torch.optim as optim +import torch.utils.data as data +import torchvision.transforms as transforms class LogisticRegression(skLR): @@ -88,6 +93,152 @@ def load_model(path): path.replace('.h5', '.joblib') return load(path) +class cifar100_dataset(torch.utils.data.Dataset): + + def __init__(self, x, y, transform=[]): + self.x = x + self.y = y + self.transform = transform + + def __len__(self): + return len(self.x) + + def __getitem__(self, index): + + x = self.x[index] + y = torch.tensor(int(self.y[index][0])) + + if self.transform: + x = self.transform(x) + + return x, y + +class ModelPytorch(nn.Module): + def __init__(self): + super(ModelPytorch, self).__init__() + model = torchvision.models.vgg16() + self.features = nn.Sequential(model.features) + self.avgpool = nn.AdaptiveAvgPool2d(output_size=(7, 7)) + self.classifier = nn.Sequential( + nn.Linear(25088, 4096), + nn.ReLU(inplace=True), + nn.Dropout(p=0.5, inplace=False), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Dropout(p=0.5, inplace=False), + nn.Linear(4096, 1000) + ) + self.optimizer = optim.Adam(model.parameters(), lr=1e-3) + + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + return self.classifier(x) + + + def fit(self, x_train, y_train, batch_size, validation_data, epochs=1, verbose=False, callbacks=None): + criterion = nn.CrossEntropyLoss() + transform = transforms.Compose([transforms.ToTensor()]) + + train_data = cifar100_dataset(x_train, y_train, transform) + train_loader = data.DataLoader(train_data, batch_size=int(batch_size), shuffle=True) + + history = super(ModelPytorch, self).train() + + for batch_idx, (image, label) in enumerate(train_loader): + images, labels = torch.autograd.Variable(image), torch.autograd.Variable(label) + + outputs = self.forward(images) + loss = criterion(outputs, labels) + + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + [loss, acc] = self.evaluate(x_train, y_train) + [val_loss, val_acc] = self.evaluate(*validation_data) + # Mimic Keras' history + history.history = { + 'loss': [loss], + 'accuracy': [acc], + 'val_loss': [val_loss], + 'val_accuracy': [val_acc] + } + + return history + + def evaluate(self, x_eval, y_eval, **kwargs): + criterion = nn.CrossEntropyLoss() + transform = transforms.Compose([transforms.ToTensor()]) + + test_data = cifar100_dataset(x_eval, y_eval, transform) + test_loader = data.DataLoader(test_data, shuffle=True) + + self.eval() + + with torch.no_grad(): + + y_true_np = [] + y_pred_np = [] + count=0 + for i, (images, labels) in enumerate(test_loader): + count+= 1 + N = images.size(0) + + images = torch.autograd.Variable(images) + labels = torch.autograd.Variable(labels) + + outputs = self(images) + predictions = outputs.max(1, keepdim=True)[1] + + val_loss =+ criterion(outputs, labels).item() + val_acc =+ (predictions.eq(labels.view_as(predictions)).sum().item() / N) + + model_evaluation = [val_loss/count, val_acc/count] + + return model_evaluation + + + def save_weights(self, path): + if '.h5' in path: + logger.debug('Automatically switch file format from .h5 to .pth') + path.replace('.h5', '.pth') + torch.save(self.state_dict(), path) + + + def load_weights(self, path): + if '.h5' in path: + logger.debug('Automatically switch file format from .h5 to .pth') + path.replace('.h5', '.pth') + weights = torch.load(path) + self.set_weights(weights) + + + def get_weights(self): + return self.state_dict() + + + def set_weights(self, weights): + self.load_state_dict(weights) + + + def save_model(self, path): + if '.h5' in path: + logger.debug('Automatically switch file format from .h5 to .pth') + path.replace('.h5', '.pth') + torch.save(self, path) + + + @staticmethod + def load_model(path): + if '.h5' in path: + logger.debug('Automatically switch file format from .h5 to .pth') + path.replace('.h5', '.pth') + model = torch.load(path) + return model.eval() + class NoiseAdaptationChannel(Dense): """ From 76ef62ba9702bf01b7c44747d2a44ce441e1337e Mon Sep 17 00:00:00 2001 From: celinejacques <53426409+celinejacques@users.noreply.github.com> Date: Thu, 1 Apr 2021 18:43:42 +0200 Subject: [PATCH 4/6] Add CIFAR100 to constant file --- mplc/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mplc/constants.py b/mplc/constants.py index 7f9171b8..db571aee 100644 --- a/mplc/constants.py +++ b/mplc/constants.py @@ -49,8 +49,9 @@ TITANIC = "titanic" ESC50 = "esc50" IMDB = 'imdb' +CIFAR100 = "cifar100" # Supported datasets -SUPPORTED_DATASETS_NAMES = [MNIST, CIFAR10, TITANIC, ESC50, IMDB] +SUPPORTED_DATASETS_NAMES = [MNIST, CIFAR10, TITANIC, ESC50, IMDB, CIFAR100] # Number of attempts allowed before raising an error while trying to download dataset NUMBER_OF_DOWNLOAD_ATTEMPTS = 3 From 4fe02fcbe9ea3d02c3019ac967ed6d1966592fc0 Mon Sep 17 00:00:00 2001 From: celinejacques <53426409+celinejacques@users.noreply.github.com> Date: Fri, 2 Apr 2021 17:33:12 +0200 Subject: [PATCH 5/6] Add cifar100 to scenario --- mplc/scenario.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mplc/scenario.py b/mplc/scenario.py index a77d3e1f..485a5414 100644 --- a/mplc/scenario.py +++ b/mplc/scenario.py @@ -138,6 +138,8 @@ def __init__( self.dataset = dataset_module.Esc50() elif dataset_name == constants.IMDB: self.dataset = dataset_module.Imdb() + elif dataset_name == constants.CIFAR100: + self.dataset = dataset_module.Cifar100() else: raise Exception( f"Dataset named '{dataset_name}' is not supported (yet). You can construct your own " From b15040aa644bfd188c22c20d5b1d690f6edb9efa Mon Sep 17 00:00:00 2001 From: celinejacques <53426409+celinejacques@users.noreply.github.com> Date: Sun, 4 Apr 2021 23:19:18 +0200 Subject: [PATCH 6/6] Change way to set weights --- mplc/models.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mplc/models.py b/mplc/models.py index 30a6c472..45da8a0e 100644 --- a/mplc/models.py +++ b/mplc/models.py @@ -1,4 +1,5 @@ import numpy as np +import collections from joblib import dump, load from loguru import logger from sklearn.linear_model import LogisticRegression as skLR @@ -217,11 +218,16 @@ def load_weights(self, path): def get_weights(self): - return self.state_dict() + self.state_dict() + weights = [] + for layer in self.state_dict().keys(): + weights.append(self.state_dict()[layer].numpy()) + return weights def set_weights(self, weights): - self.load_state_dict(weights) + for i, layer in enumerate(self.state_dict().keys()): + self.state_dict()[layer]= torch.Tensor(weights[i]) def save_model(self, path):