Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions ml/data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import numpy as np
import os, csv, random
from PIL import Image

"""
Code for extracting data and labels
Katherine Mayo 4/15/2018
"""

def unpack_image(image):
#assumes all images come in as the same size
#otherwise use Image.resize((width, height))
im = Image.open(image, 'r')
width, height = im.size
pixels = list(im.getdata())
pixels = np.array(pixels).astype(np.float32)
return pixels, width, height

#goes through a directory and gets the labels and images
def load_data(directoryName):
files = os.listdir(directoryName)
labels = []
data = []
width = 0
height = 0
for f in files:
labels.append(f.split('-')[0])
im, width, height = unpack_image(directoryName + '/' + f)
data.append(im)
return data, labels, height, width

def formatData(data, num, height, width):
data = np.array(data)
data = (data.reshape((num, height*width*3)).astype("float32"))
return data

def sampleData(trainData, trainLabels, div=0.1):
x = np.random.choice(trainData.shape[0], size=int(len(trainData)*div))
val_data = trainData[x, :]
val_labels = []
for i in x:
val_labels.append(trainLabels[i])
train_data = trainData[-x,:]
train_labels = [i for i in trainLabels if i not in val_labels]
return train_data, train_labels, val_data, val_labels
68 changes: 68 additions & 0 deletions ml/neuralNet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD, RMSprop, Adagrad, Adam

"""
Fully Connected Neural Network class
--size adjustable [number of layers and neurons in each layer]
Katherine Mayo 4/15/2018
"""

class neuralNet(object):

def __init__(self, numLayers=2, input_dim=734, num_classes=10, dropout=0, use_bn = False, reg=0.0):
self.use_batchnorm = use_bn
self.use_dropout = dropout > 0
self.reg = reg
self.numlayers = numLayers
self.num_classes = num_classes

"""
default values batch norm used only if the network uses batch normalization
momentum and decay values are used only if the SGD variations are desired
"""
def buildNet(self, neurons=4, optimizer='sgd', optParam={'lr': 0.001, 'momentum': 0, 'decay': 0}, bnorm={'momentum': 0.99, 'epsilon': 0.001}, dropout=0):
"""
{affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax
"""
model = Sequential()
model.add(Dense(neurons, input_shape=(784,), use_bias = True))
for i in range(self.numlayers-1):
model.add(Dense(neurons, use_bias=True))
if self.use_batchnorm:
model.add(BatchNormalization(momentum=bnorm['momentum'], epsilon=bnorm['epsilon']))
model.add(Activation('relu'))
if self.use_dropout:
model.add(Dropout(rate=dropout))
model.add(Dense(self.num_classes, activation='softmax'))

if optimizer == 'sgd':
opt = SGD(optParam['lr'], optParam['momentum'], optParam['decay'])
elif optimizer == 'rmsprop':
opt = RMSprop(optParam['lr'])
elif optimizer == 'adagrad':
opt = Adagrad(optParam['lr'])
else:
opt = Adam(optParam['lr'])
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
return model

def fitNet(self, model, data, batchSize=128, numEpochs=1, verbose=1):
trainData = data['train_data']
valData = data['val_data']
trainLab = data['train_labels']
valLab = data['val_labels']

networkHistory = model.fit(trainData, trainLab, batchSize, numEpochs, verbose, validation_data=(valData, valLab))
return networkHistory

"""
ONLY USE THE FOLLOWING WHEN TESTING
"""
def testModel(model, testData, testLabels):
loss, accuracy = model.evaluate(testData, testLabels, verbos=1)
return loss, accuracy

def predictTest(model, testData, testLabels):
predictions = model.predict(testData)
return predictions
26 changes: 26 additions & 0 deletions ml/runNet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from keras.datasets import mnist
from neuralNet import *
import utils as ut
from data_utils import *

"""
Main code for running Neural Network
"""

if __name__ == '__main__':

trainDir = ""
testDir = ""
classes = 2

train_data, train_labels, height, width = load_data(trainDir)
test_data, test_labels, _, _ = load_data(testDir)
train_data = formatData(train_data, classes, height, width)
test_data = formatData(test_data, classes, height, width)
train_data, train_labels, val_data, val_labels = sampleData(train_data, train_labels)

data = {'train_data': train_data, 'val_data': val_data, 'train_labels': train_labels, 'val_labels': val_labels, 'test_data': test_data, 'test_labels': test_labels}

model = neuralNet(input_dim=height*width*3, num_classes=classes, dropout=0, use_bn=False, reg=0.0)
net = model.buildNet(neurons=100, optimizer='sgd', optParam={'lr': 0.001, 'momentum': 0, 'decay': 0}, bnorm={'momentum': 0.99, 'epsilon': 0.001}, dropout=0)
hist = model.fitNet(net, data, batchSize=128, numEpochs=2, verbose=1)
26 changes: 26 additions & 0 deletions ml/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from keras.utils import np_utils
import matplotlib.pyplot as plt

"""
Utility methods for fixing categories and plotting graphs
Katherine Mayo 4/15/2018
"""

def setLabels(labels, numCategories):
return np_utils.to_categorical(labels, numCategories)

def plotNetHistory(networkHistory):
plot.figure()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(networkHistory.history['loss'])
plt.plot(networkHistory.history['val_loss'])
plt.legend(['Training', 'Validation'])
plt.figure()

plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(networkHistory.history['acc'])
plt.plot(networkHistory.history['val_acc'])
plt.legend(['Training', 'Validation'], loc='lowerright')
plt.show()