-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #20 from gamma-opt/add-cnn
Convolutional neural network (CNN) functionality
- Loading branch information
Showing
10 changed files
with
482 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
using Flux | ||
using Plots | ||
using JuMP | ||
using Gurobi | ||
using Random | ||
using Gogeta | ||
|
||
using Images | ||
using FileIO | ||
image = load("/Users/eetureijonen/Pictures/IMG_0195.JPG"); # swap in your own image | ||
downscaled_image = imresize(image, (70, 50)); | ||
|
||
input = reshape(Float32.(channelview(Gray.(downscaled_image))), 70, 50, 1, 1); | ||
input = input[end:-1:1, :, :, :]; | ||
size(CNN_model[1:6](input)) | ||
|
||
Random.seed!(1234) | ||
CNN_model = Flux.Chain( | ||
Conv((4,3), 1 => 10, pad=(2, 1), stride=(3, 2), relu), | ||
MeanPool((5,3), pad=(3, 2), stride=(2, 2)), | ||
MaxPool((3,4), pad=(1, 3), stride=(3, 2)), | ||
Conv((4,3), 10 => 5, pad=(2, 1), stride=(3, 2), relu), | ||
MaxPool((3,4), pad=(1, 3), stride=(3, 2)), | ||
Flux.flatten, | ||
Dense(20 => 100, relu), | ||
Dense(100 => 1) | ||
) | ||
|
||
# input image | ||
heatmap(input[:, :, 1, 1], background=false, legend=false, color=:inferno, aspect_ratio=:equal, axis=([], false)) | ||
|
||
# convolution layer outputs | ||
outputs = [CNN_model[1](input)[:, :, channel, 1] for channel in 1:10]; | ||
display.(heatmap.(outputs, background=false, legend=false, color = :inferno, aspect_ratio=:equal, axis=([], false))); | ||
|
||
# meanpool outputs | ||
outputs = [CNN_model[1:2](input)[:, :, channel, 1] for channel in 1:10]; | ||
display.(heatmap.(outputs, background=false, legend=false, color = :inferno, aspect_ratio=:equal, axis=([], false))); | ||
|
||
# maxpool | ||
outputs = [CNN_model[1:3](input)[:, :, channel, 1] for channel in 1:10]; | ||
display.(heatmap.(outputs, background=false, legend=false, color = :inferno, aspect_ratio=:equal, axis=([], false))); | ||
|
||
# new conv | ||
outputs = [CNN_model[1:4](input)[:, :, channel, 1] for channel in 1:5]; | ||
display.(heatmap.(outputs, background=false, legend=false, color = :inferno, aspect_ratio=:equal, axis=([], false))); | ||
|
||
# last maxpool | ||
outputs = [CNN_model[1:5](input)[:, :, channel, 1] for channel in 1:5]; | ||
display.(heatmap.(outputs, background=false, legend=false, color = :inferno, aspect_ratio=:equal, axis=([], false))); | ||
|
||
# create jump model from cnn | ||
jump = Model(Gurobi.Optimizer) | ||
set_silent(jump) | ||
cnns = get_structure(CNN_model, input); | ||
create_MIP_from_CNN!(jump, CNN_model, cnns) | ||
|
||
# Test that jump model produces same outputs for all layers as the CNN | ||
@time CNN_model[1](input)[:, :, :, 1] ≈ image_pass!(jump, input, cnns, 1) | ||
@time CNN_model[1:2](input)[:, :, :, 1] ≈ image_pass!(jump, input, cnns, 2) | ||
@time CNN_model[1:3](input)[:, :, :, 1] ≈ image_pass!(jump, input, cnns, 3) | ||
@time CNN_model[1:4](input)[:, :, :, 1] ≈ image_pass!(jump, input, cnns, 4) | ||
@time CNN_model[1:5](input)[:, :, :, 1] ≈ image_pass!(jump, input, cnns, 5) | ||
@time vec(CNN_model[1:6](input)) ≈ image_pass!(jump, input, cnns, 6) | ||
@time vec(CNN_model[1:7](input)) ≈ image_pass!(jump, input, cnns, 7) | ||
@time vec(CNN_model[1:8](input)) ≈ image_pass!(jump, input, cnns, 8) | ||
@time vec(CNN_model(input)) ≈ image_pass!(jump, input) | ||
|
||
# Plot true model meanpool fifth channel | ||
heatmap(CNN_model[1:2](input)[:, :, 5, 1], background=false, legend=false, color=:inferno, aspect_ratio=:equal, axis=([], false)) | ||
|
||
# Plot jump model meanpool fifth channel | ||
heatmap(image_pass!(jump, input, cnns, 2)[:, :, 5], background=false, legend=false, color=:inferno, aspect_ratio=:equal, axis=([], false)) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
""" | ||
function create_MIP_from_CNN!(jump_model::JuMP.Model, CNN_model::Flux.Chain, cnnstruct::CNNStructure) | ||
Creates a mixed-integer optimization problem from a `Flux.Chain` convolutional neural network model. | ||
The optimization formulation is saved in the `JuMP.Model` given as an input. | ||
The convolutional neural network must follow a certain structure: | ||
- It must consist of (in order) convolutional and pooling layers, a `Flux.flatten` layer and finally dense layers | ||
- I.e. allowed layer types: `Conv`, `MaxPool`, `MeanPool`, `Flux.flatten`, `Dense` | ||
- The activation function for all of the convolutional layers and the dense layers must be `ReLU` | ||
- The last dense layer must use the `identity` activation function | ||
- Input size, filter size, stride and padding can be chosen freely | ||
# Parameters | ||
- `jump_model`: an empty optimization model where the formulation will be saved | ||
- `CNN_model`: `Flux.Chain` containing the CNN | ||
- `cnnstruct`: holds the layer structure of the CNN | ||
""" | ||
function create_MIP_from_CNN!(jump_model::JuMP.Model, CNN_model::Flux.Chain, cnnstruct::CNNStructure) | ||
|
||
channels = cnnstruct.channels | ||
dims = cnnstruct.dims | ||
dense_lengths = cnnstruct.dense_lengths | ||
conv_inds = cnnstruct.conv_inds | ||
maxpool_inds = cnnstruct.maxpool_inds | ||
meanpool_inds = cnnstruct.meanpool_inds | ||
flatten_ind = cnnstruct.flatten_ind | ||
dense_inds = cnnstruct.dense_inds | ||
|
||
# 2d layers | ||
@variable(jump_model, c[layer=union(0, conv_inds, maxpool_inds, meanpool_inds), 1:dims[layer][1], 1:dims[layer][2], 1:channels[layer]] >= 0) # input is always between 0 and 1 | ||
@variable(jump_model, cs[layer=conv_inds, 1:dims[layer][1], 1:dims[layer][2], 1:channels[layer]] >= 0) | ||
@variable(jump_model, cz[layer=conv_inds, 1:dims[layer][1], 1:dims[layer][2], 1:channels[layer]], Bin) | ||
|
||
# dense layers | ||
@variable(jump_model, x[layer=union(flatten_ind, dense_inds), 1:dense_lengths[layer]]) | ||
@variable(jump_model, s[layer=dense_inds[1:end-1], 1:dense_lengths[layer]] >= 0) | ||
@variable(jump_model, z[layer=dense_inds[1:end-1], 1:dense_lengths[layer]], Bin) | ||
|
||
U_bounds_dense = Dict{Int, Vector}() | ||
L_bounds_dense = Dict{Int, Vector}() | ||
|
||
pixel_or_pad(layer, row, col, channel) = if haskey(c, (layer, row, col, channel)) c[layer, row, col, channel] else 0.0 end | ||
|
||
for (layer_index, layer_data) in enumerate(CNN_model) | ||
|
||
if layer_index in conv_inds | ||
|
||
filters = [Flux.params(layer_data)[1][:, :, in_channel, out_channel] for in_channel in 1:channels[layer_index-1], out_channel in 1:channels[layer_index]] | ||
biases = Flux.params(layer_data)[2] | ||
|
||
f_height, f_width = size(filters[1, 1]) | ||
|
||
for row in 1:dims[layer_index][1], col in 1:dims[layer_index][2] | ||
|
||
pos = (layer_data.stride[1]*(row-1) + 1 - layer_data.pad[1], layer_data.stride[2]*(col-1) + 1 - layer_data.pad[2]) | ||
|
||
for out_channel in 1:channels[layer_index] | ||
|
||
convolution = @expression(jump_model, | ||
sum([filters[in_channel, out_channel][f_height-i, f_width-j] * pixel_or_pad(layer_index-1, pos[1]+i, pos[2]+j, in_channel) | ||
for i in 0:f_height-1, | ||
j in 0:f_width-1, | ||
in_channel in 1:channels[layer_index-1]]) | ||
) | ||
|
||
@constraint(jump_model, c[layer_index, row, col, out_channel] - cs[layer_index, row, col, out_channel] == convolution + biases[out_channel]) | ||
@constraint(jump_model, c[layer_index, row, col, out_channel] <= 1.0 * (1-cz[layer_index, row, col, out_channel])) | ||
@constraint(jump_model, cs[layer_index, row, col, out_channel] <= 1.0 * cz[layer_index, row, col, out_channel]) | ||
end | ||
end | ||
|
||
elseif layer_index in maxpool_inds | ||
|
||
p_height = layer_data.k[1] | ||
p_width = layer_data.k[2] | ||
|
||
for row in 1:dims[layer_index][1], col in 1:dims[layer_index][2] | ||
|
||
pos = (layer_data.stride[1]*(row-1) - layer_data.pad[1], layer_data.stride[2]*(col-1) - layer_data.pad[2]) | ||
|
||
for channel in 1:channels[layer_index-1] | ||
|
||
@constraint(jump_model, [i in 1:p_height, j in 1:p_width], c[layer_index, row, col, channel] >= pixel_or_pad(layer_index-1, pos[1]+i, pos[2]+j, channel)) | ||
|
||
pz = @variable(jump_model, [1:p_height, 1:p_width], Bin) | ||
@constraint(jump_model, sum([pz[i, j] for i in 1:p_height, j in 1:p_width]) == 1) | ||
|
||
@constraint(jump_model, [i in 1:p_height, j in 1:p_width], c[layer_index, row, col, channel] <= pixel_or_pad(layer_index-1, pos[1]+i, pos[2]+j, channel) + (1-pz[i, j])) | ||
end | ||
end | ||
|
||
elseif layer_index in meanpool_inds | ||
|
||
p_height = layer_data.k[1] | ||
p_width = layer_data.k[2] | ||
|
||
for row in 1:dims[layer_index][1], col in 1:dims[layer_index][2] | ||
|
||
pos = (layer_data.stride[1]*(row-1) - layer_data.pad[1], layer_data.stride[2]*(col-1) - layer_data.pad[2]) | ||
|
||
for channel in 1:channels[layer_index-1] | ||
@constraint(jump_model, c[layer_index, row, col, channel] == 1/(p_height*p_width) * sum(pixel_or_pad(layer_index-1, pos[1]+i, pos[2]+j, channel) | ||
for i in 1:p_height, | ||
j in 1:p_width) | ||
) | ||
end | ||
end | ||
|
||
elseif layer_index == flatten_ind | ||
|
||
@constraint(jump_model, [channel in 1:channels[layer_index-1], row in dims[layer_index-1][1]:-1:1, col in 1:dims[layer_index-1][2]], | ||
x[flatten_ind, row + (col-1)*dims[layer_index-1][1] + (channel-1)*prod(dims[layer_index-1])] == c[layer_index-1, row, col, channel] | ||
) | ||
|
||
elseif layer_index in dense_inds | ||
|
||
weights = Flux.params(layer_data)[1] | ||
biases = Flux.params(layer_data)[2] | ||
|
||
n_neurons = length(biases) | ||
n_previous = length(x[layer_index-1, :]) | ||
|
||
# compute heuristic bounds | ||
if layer_index == minimum(dense_inds) | ||
U_bounds_dense[layer_index] = [sum(max(weights[neuron, previous] * 1.0, 0.0) for previous in 1:n_previous) + biases[neuron] for neuron in 1:n_neurons] | ||
L_bounds_dense[layer_index] = [sum(min(weights[neuron, previous] * 1.0, 0.0) for previous in 1:n_previous) + biases[neuron] for neuron in 1:n_neurons] | ||
else | ||
U_bounds_dense[layer_index] = [sum(max(weights[neuron, previous] * max(0, U_bounds_dense[layer_index-1][previous]), weights[neuron, previous] * max(0, L_bounds_dense[layer_index-1][previous])) for previous in 1:n_previous) + biases[neuron] for neuron in 1:n_neurons] | ||
L_bounds_dense[layer_index] = [sum(min(weights[neuron, previous] * max(0, U_bounds_dense[layer_index-1][previous]), weights[neuron, previous] * max(0, L_bounds_dense[layer_index-1][previous])) for previous in 1:n_previous) + biases[neuron] for neuron in 1:n_neurons] | ||
end | ||
|
||
if layer_data.σ == relu | ||
for neuron in 1:n_neurons | ||
@constraint(jump_model, x[layer_index, neuron] >= 0) | ||
@constraint(jump_model, x[layer_index, neuron] <= U_bounds_dense[layer_index][neuron] * (1 - z[layer_index, neuron])) | ||
@constraint(jump_model, s[layer_index, neuron] <= -L_bounds_dense[layer_index][neuron] * z[layer_index, neuron]) | ||
@constraint(jump_model, x[layer_index, neuron] - s[layer_index, neuron] == biases[neuron] + sum(weights[neuron, i] * x[layer_index-1, i] for i in 1:n_previous)) | ||
end | ||
elseif layer_data.σ == identity | ||
@constraint(jump_model, [neuron in 1:n_neurons], x[layer_index, neuron] == biases[neuron] + sum(weights[neuron, i] * x[layer_index-1, i] for i in 1:n_previous)) | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.