gamma-opt · EetuReijonen · Jan 23, 2024 · Jan 23, 2024 · Jan 24, 2024 · Jan 24, 2024
diff --git a/Project.toml b/Project.toml
@@ -4,16 +4,22 @@ authors = ["Nikita Belyak <[email protected]> and contributors"]
 version = "1.0.0-DEV"
 
 [deps]
+BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
 Gurobi = "2e9cd046-0924-5485-92f1-d5272153d98b"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
+PProf = "e4faabce-9ead-11e9-39d9-4379958e3056"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 SCIP = "82193955-e24f-5292-bf16-6f2c5261a85f"
 SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

diff --git a/README.md b/README.md
@@ -13,10 +13,6 @@ Currently supported models include neural networks using ReLU activation and tre
 * **tree ensemble to MIP conversion** - obtain an integer optimization problem from a trained tree ensemble model
 * **tree ensemble optimization** - optimize a trained decision tree model, i.e., find an input that maximizes the forest output
 
-#### Interface
-1. *extract_evotrees_info* - obtain a universal datatype TEModel from a trained EvoTrees model
-2. *tree_model_to_MIP* - obtain the integer programming JuMP model (with or without the split constraints)
-
 #### Workflow
 trained tree ensemble model $\rightarrow$ TEModel $\rightarrow$ JuMP model $\rightarrow$ optimization
 
@@ -27,12 +23,5 @@ trained tree ensemble model $\rightarrow$ TEModel $\rightarrow$ JuMP model $\rig
 * **neural network compression** - reduce network size by removing unnecessary nodes
 * **neural network optimization** - find the input that maximizes the neural network output
 
-#### Interface
-1. *create_JuMP_model*
-2. *create_CNN_JuMP_model*
-3. *bound_tightening*
-4. *evaluate!*
-5. *compress_network*
-
 #### Workflow
-trained neural network $\rightarrow$ NNModel $\rightarrow$ JuMP model $\rightarrow$ bound tightening $\rightarrow$ compression $\rightarrow$ optimization
+trained Flux NN model $\rightarrow$ JuMP model $\rightarrow$ bound tightening $\rightarrow$ compression $\rightarrow$ optimization
diff --git a/examples/NN_test.jl b/examples/NN_test.jl
@@ -0,0 +1,73 @@
+data = rand(Float32, (2, 1000)) .- 0.5f0;
+x_train = data[:, 1:750];
+y_train = [sum(x_train[:, col].^2) for col in 1:750];
+
+using Distributed
+using Flux
+using Random
+using Revise
+using Gogeta
+
+addprocs(7)
+
+@everywhere using Gogeta
+
+begin
+    Random.seed!(1234);
+
+    model = Chain(
+        Dense(2 => 10, relu),
+        Dense(10 => 50, relu),
+        Dense(50 => 20, relu),
+        Dense(20 => 5, relu),
+        Dense(5 => 1)
+    )
+end
+
+begin
+    Random.seed!(1234);
+
+    model = Chain(
+        Dense(2 => 10, relu),
+        Dense(10 => 20, relu),
+        Dense(20 => 5, relu),
+        Dense(5 => 1)
+    )
+end
+
+begin
+    Random.seed!(1234);
+
+    model = Chain(
+        Dense(2 => 3, relu),
+        Dense(3 => 1)
+    )
+end
+
+solver_params = SolverParams(silent=true, threads=0, relax=false, time_limit=0)
+
+@time jump_model, upper_bounds, lower_bounds = NN_to_MIP(model, [1.0, 1.0], [-1.0, -1.0], solver_params; tighten_bounds=true);
+@time jump_model_relax, upper_bounds_relax, lower_bounds_relax = NN_to_MIP(model, [1.0, 1.0], [-1.0, -1.0], solver_params; tighten_bounds=true);
+vec(model(x_train)) ≈ [forward_pass!(jump_model, x_train[:, i])[1] for i in 1:750]
+vec(model(x_train)) ≈ [forward_pass!(jump_model_relax, x_train[:, i])[1] for i in 1:750]
+
+rmprocs(workers())
+
+include("../src/neural_networks/old/bound_tightening.jl")
+
+n_neurons = 2 + sum(map(x -> length(x), [Flux.params(model)[2*k] for k in 1:length(model)]));
+@time U, L = bound_tightening(model, [i<=2 ? 1.0 : 1000.0 for i in 1:n_neurons], [i<=2 ? -1.0 : -1000.0 for i in 1:n_neurons])
+@time U_rel, L_rel = bound_tightening(model, [i<=2 ? 1.0 : 1000.0 for i in 1:n_neurons], [i<=2 ? -1.0 : -1000.0 for i in 1:n_neurons], false, true)
+
+all(map(x -> x > last(L)[] && x < last(U)[], model(x_train)))
+all(map(x -> x > -last(lower_bounds)[] && x < last(upper_bounds)[], model(x_train)))
+
+using Plots
+plot(collect(Iterators.flatten(upper_bounds)) .- U[3:end])
+plot!(collect(Iterators.flatten(lower_bounds)) .+ L[3:end])
+
+plot(collect(Iterators.flatten(upper_bounds_relax)) .- U_rel[3:end])
+plot!(collect(Iterators.flatten(lower_bounds_relax)) .+ L_rel[3:end])
+
+plot(collect(Iterators.flatten(upper_bounds_relax)) - collect(Iterators.flatten(upper_bounds)))
+plot!(collect(Iterators.flatten(lower_bounds_relax)) - collect(Iterators.flatten(lower_bounds)))
diff --git a/...Adadelta_0.001_0.001_0/layer_0_biases.npy → ...Adadelta_0.001_0.001_0/layer_0_biases.npy b/...Adadelta_0.001_0.001_0/layer_0_biases.npy → ...Adadelta_0.001_0.001_0/layer_0_biases.npy
diff --git a/...dadelta_0.001_0.001_0/layer_0_weights.npy → ...dadelta_0.001_0.001_0/layer_0_weights.npy b/...dadelta_0.001_0.001_0/layer_0_weights.npy → ...dadelta_0.001_0.001_0/layer_0_weights.npy
diff --git a/...Adadelta_0.001_0.001_0/layer_1_biases.npy → ...Adadelta_0.001_0.001_0/layer_1_biases.npy b/...Adadelta_0.001_0.001_0/layer_1_biases.npy → ...Adadelta_0.001_0.001_0/layer_1_biases.npy
diff --git a/...dadelta_0.001_0.001_0/layer_1_weights.npy → ...dadelta_0.001_0.001_0/layer_1_weights.npy b/...dadelta_0.001_0.001_0/layer_1_weights.npy → ...dadelta_0.001_0.001_0/layer_1_weights.npy
diff --git a/...Adadelta_0.001_0.001_0/layer_2_biases.npy → ...Adadelta_0.001_0.001_0/layer_2_biases.npy b/...Adadelta_0.001_0.001_0/layer_2_biases.npy → ...Adadelta_0.001_0.001_0/layer_2_biases.npy
diff --git a/...dadelta_0.001_0.001_0/layer_2_weights.npy → ...dadelta_0.001_0.001_0/layer_2_weights.npy b/...dadelta_0.001_0.001_0/layer_2_weights.npy → ...dadelta_0.001_0.001_0/layer_2_weights.npy
diff --git a/...Adadelta_0.001_0.001_0/layer_3_biases.npy → ...Adadelta_0.001_0.001_0/layer_3_biases.npy b/...Adadelta_0.001_0.001_0/layer_3_biases.npy → ...Adadelta_0.001_0.001_0/layer_3_biases.npy
diff --git a/...dadelta_0.001_0.001_0/layer_3_weights.npy → ...dadelta_0.001_0.001_0/layer_3_weights.npy b/...dadelta_0.001_0.001_0/layer_3_weights.npy → ...dadelta_0.001_0.001_0/layer_3_weights.npy
diff --git a/...Adadelta_0.001_0.001_0/layer_4_biases.npy → ...Adadelta_0.001_0.001_0/layer_4_biases.npy b/...Adadelta_0.001_0.001_0/layer_4_biases.npy → ...Adadelta_0.001_0.001_0/layer_4_biases.npy
diff --git a/...dadelta_0.001_0.001_0/layer_4_weights.npy → ...dadelta_0.001_0.001_0/layer_4_weights.npy b/...dadelta_0.001_0.001_0/layer_4_weights.npy → ...dadelta_0.001_0.001_0/layer_4_weights.npy
diff --git a/...l_Adadelta_0.001_0.001_0/lower_new_lp.npy → ...l_Adadelta_0.001_0.001_0/lower_new_lp.npy b/...l_Adadelta_0.001_0.001_0/lower_new_lp.npy → ...l_Adadelta_0.001_0.001_0/lower_new_lp.npy
diff --git a/...delta_0.001_0.001_0/lower_new_threads.npy → ...delta_0.001_0.001_0/lower_new_threads.npy b/...delta_0.001_0.001_0/lower_new_threads.npy → ...delta_0.001_0.001_0/lower_new_threads.npy
diff --git a/...0.001_0.001_0/neurons_pruned_by_layer.npy → ...0.001_0.001_0/neurons_pruned_by_layer.npy b/...0.001_0.001_0/neurons_pruned_by_layer.npy → ...0.001_0.001_0/neurons_pruned_by_layer.npy
diff --git a/.../model_Adadelta_0.001_0.001_0/results.npy → .../model_Adadelta_0.001_0.001_0/results.npy b/.../model_Adadelta_0.001_0.001_0/results.npy → .../model_Adadelta_0.001_0.001_0/results.npy
diff --git a/...l_Adadelta_0.001_0.001_0/upper_new_lp.npy → ...l_Adadelta_0.001_0.001_0/upper_new_lp.npy b/...l_Adadelta_0.001_0.001_0/upper_new_lp.npy → ...l_Adadelta_0.001_0.001_0/upper_new_lp.npy
diff --git a/...delta_0.001_0.001_0/upper_new_threads.npy → ...delta_0.001_0.001_0/upper_new_threads.npy b/...delta_0.001_0.001_0/upper_new_threads.npy → ...delta_0.001_0.001_0/upper_new_threads.npy
diff --git a/examples/compression_test.jl b/examples/compression_test.jl
@@ -0,0 +1,125 @@
+data = rand(Float32, (2, 1000)) .- 0.5f0;
+x_train = data[:, 1:750];
+
+using Flux
+using Random
+using Revise
+using Gogeta
+
+begin
+    Random.seed!(1234);
+
+    model = Chain(
+        Dense(2 => 10, relu),
+        Dense(10 => 50, relu),
+        Dense(50 => 20, relu),
+        Dense(20 => 5, relu),
+        Dense(5 => 1)
+    )
+end
+
+begin
+    Random.seed!(1234);
+
+    model = Chain(
+        Dense(2 => 10, relu),
+        Dense(10 => 20, relu),
+        Dense(20 => 5, relu),
+        Dense(5 => 1)
+    )
+end
+
+begin
+    Random.seed!(1234);
+
+    model = Chain(
+        Dense(2 => 3, relu),
+        Dense(3 => 1)
+    )
+end
+
+solver_params = SolverParams(silent=true, threads=0, relax=false, time_limit=0)
+
+@time jump, removed, compressed, U_comp, L_comp = compress(model, [1.0, 1.0], [-1.0, -1.0], solver_params);
+
+vec(model(x_train)) ≈ [forward_pass!(jump, x_train[:, i])[1] for i in 1:750]
+compressed(x_train) ≈ model(x_train)
+
+@time _, U_full, L_full = NN_to_MIP(model, [1.0, 1.0], [-1.0, -1.0], solver_params; tighten_bounds=true);
+
+using Plots
+
+x = LinRange(-1.5, -0.5, 100)
+y = LinRange(-0.5, 0.5, 100)
+
+contourf(x, y, (x, y) -> model(hcat(x, y)')[], c=cgrad(:viridis), lw=0)
+contourf(x, y, (x, y) -> compressed(hcat(x, y)')[], c=cgrad(:viridis), lw=0)
+
+contourf(x, y, (x, y) -> forward_pass!(jump_model, vec(hcat(x, y)) .|> Float32)[], c=cgrad(:viridis), lw=0)
+contourf(x, y, (x, y) -> forward_pass!(jump, vec(hcat(x, y)) .|> Float32)[], c=cgrad(:viridis), lw=0)
+
+plot(collect(Iterators.flatten(U_comp[1:end-1])))
+plot!(collect(Iterators.flatten(U_full[1:end-1])))
+
+subdir_path = joinpath(parent_dir, subdirs[1])
+model = load_model(n_neurons, subdir_path)
+
+solver_params = SolverParams(silent=true, threads=0, relax=false, time_limit=0)
+@time jump, removed, compressed, U_comp, L_comp = compress(model, [-0.5, 0.5], [-1.5, -0.5], solver_params; big_M=1_000_000.0);
+
+U_data, L_data = get_bounds(subdir_path)
+U_data = U_data[3:end]
+L_data = L_data[3:end]
+
+"""
+"""
+
+b = [Flux.params(model)[2*k] for k in 1:length(model)]
+neuron_count = [length(b[k]) for k in eachindex(b)]
+
+U_full = Vector{Vector}(undef, length(model))
+L_full = Vector{Vector}(undef, length(model))
+
+[U_full[layer] = Vector{Float64}(undef, neuron_count[layer]) for layer in 1:length(model)]
+[L_full[layer] = Vector{Float64}(undef, neuron_count[layer]) for layer in 1:length(model)]
+
+for layer in 1:length(model)
+    for neuron in 1:neuron_count[layer]
+        U_full[layer][neuron] = U_data[neuron + (layer == 1 ? 0 : cumsum(neuron_count)[layer-1])]
+        L_full[layer][neuron] = L_data[neuron + (layer == 1 ? 0 : cumsum(neuron_count)[layer-1])]
+    end
+end
+
+collect(Iterators.flatten(U_full)) == U_data
+collect(Iterators.flatten(L_full)) == L_data
+
+@time _, removed, compressed, U_comp, L_comp = compress(model, [-0.5, 0.5], [-1.5, -0.5], U_full, L_full);
+
+solver_params = SolverParams(silent=true, threads=0, relax=false, time_limit=0)
+
+@time compression_results = compress_fast(model, [-0.5, 0.5], [-1.5, -0.5], solver_params);
+jump_model, removed_neurons, compressed_model, bounds_U, bounds_L = compression_results;
+
+@time bound_results = NN_to_MIP(model, [-0.5, 0.5], [-1.5, -0.5], solver_params; tighten_bounds=true, big_M=100_000.0);
+
+bound_results[3]
+
+@time bound_compression = compress(model, [-0.5, 0.5], [-1.5, -0.5], bounds_U, bounds_L);
+
+@time bound_results = NN_to_MIP(compressed_model, [-0.5, 0.5], [-1.5, -0.5], solver_params; tighten_bounds=true, big_M=100_000.0);
+
+bound_results[1]
+
+contourf(x, y, (x, y) -> forward_pass!(jump_model, vec(hcat(x, y)) .|> Float32)[], c=cgrad(:viridis), lw=0)
+contourf(x, y, (x, y) -> model(hcat(x, y)')[], c=cgrad(:viridis), lw=0)
+contourf(x, y, (x, y) -> compressed_model(hcat(x, y)')[], c=cgrad(:viridis), lw=0)
+
+x1 = rand(Float32, 10) .- 1.5
+x2 = rand(Float32, 10) .- 0.5
+x = hcat(x1, x2)'
+
+vec(model(x)) ≈ [forward_pass!(jump, x[:, i] .|> Float32) for i in 1:size(x)[2]]
+vec(compressed(x)) ≈ [forward_pass!(jump, x[:, i] .|> Float32)[] for i in 1:size(x)[2]]
+compressed(x) ≈ model(x)
+
+@time jump_model, U_full, L_full = NN_to_MIP(model, [-0.5, 0.5], [-1.5, -0.5], solver_params; tighten_bounds=true, big_M=100_000.0);
diff --git a/examples/fast-bound-calc.jl b/examples/fast-bound-calc.jl
@@ -0,0 +1,66 @@
+function calculate_bounds_fast(model::JuMP.Model, layer, neuron, W, b, neurons, layers_removed)
+
+    upper = 1.0e10
+    lower = -1.0e10
+
+    function bounds_callback(cb_data, cb_where::Cint)
+
+        # Only run at integer solutions
+        if cb_where == GRB_CB_MIPSOL
+
+            objbound = Ref{Cdouble}()
+            objbest = Ref{Cdouble}()
+            GRBcbget(cb_data, cb_where, GRB_CB_MIPSOL_OBJBND, objbound)
+            GRBcbget(cb_data, cb_where, GRB_CB_MIPSOL_OBJBST, objbest)
+
+            if objective_sense(model) == MAX_SENSE
+
+                if objbest[] > 0
+                    upper = min(objbound[], 1.0e10)
+                    GRBterminate(backend(model))
+                end
+
+                if objbound[] <= 0
+                    upper = max(objbound[], 0.0)
+                    GRBterminate(backend(model))
+                end
+
+            elseif objective_sense(model) == MIN_SENSE
+
+                if objbest[] < 0
+                    lower = max(objbound[], -1.0e10)
+                    GRBterminate(backend(model))
+                end
+
+                if objbound[] >= 0
+                    lower = min(objbound[], 0.0)
+                    GRBterminate(backend(model))
+                end
+            end
+        end
+
+    end
+
+    @objective(model, Max, b[layer][neuron] + sum(W[layer][neuron, i] * model[:x][layer-1-layers_removed, i] for i in neurons(layer-1-layers_removed)))
+
+    set_attribute(model, "LazyConstraints", 1)
+    set_attribute(model, Gurobi.CallbackFunction(), bounds_callback)
+
+    optimize!(model)
+
+    set_objective_sense(model, MIN_SENSE)
+    optimize!(model)
+
+    status = if upper <= 0
+        "stabily inactive"
+    elseif lower >= 0
+        "stabily active"
+    else
+        "normal"
+    end
+    println("Neuron: $neuron, $status, bounds: [$lower, $upper]")
+
+    set_attribute(jump_model, Gurobi.CallbackFunction(), (cb_data, cb_where::Cint)->nothing)
+
+    return upper, lower
+end