test_network.py

#!/usr/bin/env python
# coding: utf-8

import os
from argparse import ArgumentParser
import tensorflow as tf
import numpy as np
from sklearn import mixture
from scipy import interpolate
import cv2
from tools import Normalizer, create_checkerboard, load_data, load_network
import colorsys
from matplotlib.colors import rgb_to_hsv
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
plt.ion()


def reconstruct_data(dir_model="model/trained", dir_dataset="dataset/generated/combined", indexes=11):
    """
    Test a network by reconstructing samples from the dataset.

    Parameters:
        dir_model - model directory
        dir_dataset - dataset directory
        index - indexes (if list) or number of random indexes (if int) of samples to reconstruct
    """

    # load the dataset
    m, s, n_samples, height, width, n_channels, n_joints = load_data(dir_dataset)

    # draw indexes if necessary
    if type(indexes) == int:
        indexes = np.random.choice(n_samples, indexes)

    # normalize the motor_input configuration in [-1, 1] and subsample the dataset
    m_normalizer = Normalizer(low=-1, high=1)
    m = m_normalizer.fit_transform(m)
    m = m[indexes, :]

    # normalize the pixel channels in [0, 1] and subsample the dataset
    s_normalizer = Normalizer(low=0, high=1, min_data=0, max_data=1)  # identity mapping in this case, as the pixel values are already in [0, 1]
    s = s_normalizer.transform(s)
    s = s[indexes, :]

    # load the network
    saver, motor_input, net_predicted_image, net_predicted_error = load_network(dir_model)

    # create a background checkerboard
    checkerboard = create_checkerboard(height, width)

    # create a figure
    fig = plt.figure(figsize=(18, 7))
    fig.suptitle('samples {}'.format(indexes), fontsize=12)

    # display the reconstructions
    with tf.Session() as sess:

        # reload the network's variable values
        saver.restore(sess, tf.train.latest_checkpoint(dir_model + "/"))

        for i, ind in enumerate(indexes):

            # ground truth image
            gt_green_image = s[i, :, :, :]

            # predict image
            predicted_image = sess.run(net_predicted_image, feed_dict={motor_input: m[[i], :]})[0]
            predicted_image = s_normalizer.reconstruct(predicted_image)  # identity mapping in this case, as the pixel values are already in [0, 1]

            # predict error
            predicted_error = sess.run(net_predicted_error, feed_dict={motor_input: m[[i], :]})[0]

            # build mask
            predicted_mask = (predicted_error <= 0.056).astype(float)

            # build the masked image
            alpha_channel = np.mean(predicted_mask, axis=2)
            transparent_masked_predicted_image = np.dstack((predicted_image * predicted_mask, alpha_channel))

            # display
            ax1 = fig.add_subplot(5, len(indexes), 0*len(indexes) + 1 + i)
            ax2 = fig.add_subplot(5, len(indexes), 1*len(indexes) + 1 + i)
            ax3 = fig.add_subplot(5, len(indexes), 2*len(indexes) + 1 + i)
            ax4 = fig.add_subplot(5, len(indexes), 3*len(indexes) + 1 + i)
            ax5 = fig.add_subplot(5, len(indexes), 4*len(indexes) + 1 + i)
            #
            ax1.set_title("ground-truth image")
            ax1.imshow(gt_green_image)
            ax1.axis("off")
            #
            ax2.set_title("predicted image")
            ax2.imshow(predicted_image)
            ax2.axis("off")
            #
            ax3.set_title("predicted error")
            ax3.imshow(predicted_error)
            ax3.axis("off")
            #
            ax4.set_title("mask")
            ax4.imshow(predicted_mask)
            ax4.axis("off")
            #
            ax5.set_title('masked predicted image')
            ax5.imshow(checkerboard)
            ax5.imshow(transparent_masked_predicted_image)
            ax5.axis("off")
        #
        # fig.savefig(".temp/reconstruction/reconstructions.svg")

    plt.show(block=False)
    plt.pause(0.001)


def evaluate_body_image(dir_model="model/trained", dir_green_dataset="dataset/generated/green", indexes=6):
    """
    Test the body image mask generated by a network by comparing it the ground-truth green-background dataset.

    Parameters:
        dir_model - model directory
        dir_green_dataset - green-background dataset directory
        index - indexes (if list) or number of random indexes (if int) of samples to reconstruct
    """

    # load the dataset
    m, s, n_samples, height, width, n_channels, n_joints = load_data(dir_green_dataset)

    # draw indexes if necessary
    if type(indexes) == int:
        indexes = np.random.choice(n_samples, indexes)

    # normalize the motor_input configuration in [-1, 1] and subsample the dataset
    m_normalizer = Normalizer(low=-1, high=1)
    m = m_normalizer.fit_transform(m)

    # normalize the pixel channels in [0, 1] and subsample the dataset
    s_normalizer = Normalizer(low=0, high=1, min_data=0, max_data=1)  # identity mapping in this case, as the pixel values are already in [0, 1]
    s = s_normalizer.transform(s)

    # load the network
    saver, motor_input, net_predicted_image, net_predicted_error = load_network(dir_model)

    # create a background checkerboard
    checkerboard = create_checkerboard(height, width)

    # track all matches over the dataset
    all_iou_body = []
    all_appearance_match = []

    # create figure
    fig = plt.figure(figsize=(9, 10))
    fig.suptitle('samples {}'.format(indexes), fontsize=12)

    with tf.Session() as sess:

        # reload the network's variable values
        saver.restore(sess, tf.train.latest_checkpoint(dir_model + "/"))

        # track the number of displayed indexes
        i = 0

        # compute the mask and appearance matches over the whole dataset
        for ind in range(n_samples):

            # ground-truth image
            gt_green_image = s[ind, :, :, :]  # image with green background - [height, width, 3] in [0, 1]

            # ground-truth body mask
            gt_body_mask = ((gt_green_image[:, :, 0] == 0) & (abs(gt_green_image[:, :, 1] - 141/255) <= 1e-3) & (gt_green_image[:, :, 2] == 0)).astype(float)
            gt_body_mask = 1 - np.repeat(gt_body_mask[:, :, np.newaxis], 3, axis=2)  # ground-truth body mask - [height, width, 3] in (0., 1.)

            # predicted image - [height, width, 3] in [0, 1+]
            predicted_image = sess.run(net_predicted_image, feed_dict={motor_input: m[[ind], :]})[0]
            predicted_image = s_normalizer.reconstruct(predicted_image)  # identity mapping in this case, as the pixel values are already in [0, 1]

            # predicted error - [height, width, 3] in [0, 1+]
            predicted_error = sess.run(net_predicted_error, feed_dict={motor_input: m[[ind], :]})[0]

            # predicted body mask
            predicted_body_mask = (predicted_error <= 0.056).astype(float)  # [height, width, 3] in (0., 1.)

            # evaluation of the predicted mask: Intersection over Union
            intersection = np.logical_and(gt_body_mask, predicted_body_mask)
            union = np.logical_or(gt_body_mask, predicted_body_mask)
            iou_body_mask = np.sum(intersection) / np.sum(union) if np.sum(union) > 0 else 1

            # error in the predicted image
            error_image = gt_green_image - predicted_image  # [height, width, 3] in [0., 1.+]

            # evaluation of the body appearance: mean error under the intersection of masks
            masked_image_error = error_image * intersection
            appearance_match = 1 - np.sum(np.abs(masked_image_error)) / np.sum(intersection) if np.sum(intersection) > 0 else 1

            # creation of the mask images with transparency for display
            alpha_channel = np.mean(intersection, axis=2)
            transparent_masked_gt_image = np.dstack((gt_green_image * intersection, alpha_channel))
            transparent_masked_predicted_image = np.dstack((predicted_image * intersection, alpha_channel))

            # store the matches and scores
            all_iou_body.append(iou_body_mask)
            all_appearance_match.append(appearance_match)

            # display the matches for the selected indexes
            if ind in indexes:

                # display
                ax1 = fig.add_subplot(6, len(indexes), 0*len(indexes) + 1 + i)
                ax2 = fig.add_subplot(6, len(indexes), 1*len(indexes) + 1 + i)
                ax3 = fig.add_subplot(6, len(indexes), 2*len(indexes) + 1 + i)
                ax4 = fig.add_subplot(6, len(indexes), 3*len(indexes) + 1 + i)
                ax5 = fig.add_subplot(6, len(indexes), 4*len(indexes) + 1 + i)
                ax6 = fig.add_subplot(6, len(indexes), 5*len(indexes) + 1 + i)
                #
                i = i + 1
                #
                ax1.set_title("ground-truth body mask")
                ax1.imshow(np.where(gt_body_mask == 1., 1., gt_green_image))
                ax1.axis("off")
                #
                ax2.set_title("predicted mask")
                ax2.imshow(predicted_body_mask)
                ax2.axis("off")
                #
                ax3.set_title("mask error: {:.2f}%".format(100 * iou_body_mask), fontsize=11)
                ax3.imshow((gt_body_mask - predicted_body_mask) / 2 + 0.5)
                ax3.axis("off")
                #
                ax4.set_title("masked ground-truth")
                ax4.imshow(checkerboard)
                ax4.imshow(transparent_masked_gt_image)
                ax4.axis("off")
                #
                ax5.set_title("masked prediction")
                ax5.imshow(checkerboard)
                ax5.imshow(transparent_masked_predicted_image)
                ax5.axis("off")
                #
                ax6.set_title("appearance error: {:.2f}%".format(100 * appearance_match), fontsize=11)
                ax6.imshow(checkerboard)
                ax6.imshow(masked_image_error / 2 + 0.5)
                ax6.axis("off")
                #
                #fig.savefig(".temp/mask_and_appearance_match/evaluation.svg".format(ind))

    # print the stats
    print("mask match = {mean} +/- {std}".format(mean=np.mean(all_iou_body), std=np.std(all_iou_body)))
    print("appearance match = {mean} +/- {std}".format(mean=np.mean(all_appearance_match), std=np.std(all_appearance_match)))

    plt.show(block=False)
    plt.pause(0.001)


def fit_gmm(dir_green_dataset="dataset/generated/green", dir_model="model/trained", indexes=100):
    """
    Fit a 2-Gaussian Mixture Model to the predicted prediction error distribution  over the whole dataset
    to distinguish the pixels belonging to the body image from the ones belonging to the background.

    Parameters:
        dir_dataset - dataset directory
        dir_model - model directory
        index - indexes (if list) or number of random indexes (if int) of samples to reconstruct
    """

    # load the dataset
    m, _, n_samples, _, _, _, _ = load_data(dir_green_dataset)

    # draw indexes if necessary
    if type(indexes) == int:
        indexes = np.random.choice(n_samples, indexes)

    # normalize the motor_input configuration in [-1, 1] and subsample the dataset
    m_normalizer = Normalizer(low=-1, high=1)
    m = m_normalizer.fit_transform(m)
    m = m[indexes, :]

    # load the network
    saver, motor_input, _, predicted_error = load_network(dir_model)

    # initialize list
    all_pred_errors = []

    # stack all the predicted prediction errors over the selected set of motor samples
    with tf.Session() as sess:

        # reload the network's variable values
        saver.restore(sess, tf.train.latest_checkpoint(dir_model + "/"))

        for i, ind in enumerate(indexes):

            # predict error
            curr_error = sess.run(predicted_error, feed_dict={motor_input: m[[i], :]})
            curr_error = curr_error[0]

            # append errors
            all_pred_errors = all_pred_errors + list(curr_error.flatten())

    # fit a 2-GMM model
    all_pred_errors = np.array(all_pred_errors).reshape(-1, 1)
    gmm_model = mixture.GaussianMixture(n_components=2, n_init=5)
    gmm_model.fit(all_pred_errors)

    # find the intersection of the two gaussians
    x = np.linspace(-0.05, 0.3, 1000).reshape(-1, 1)
    lp = gmm_model.score_samples(x)  # log probability
    p = gmm_model.predict_proba(x)  # class prediction
    diff = np.abs(p[:, 0] - p[:, 1])
    cross_index = np.argmin(diff)
    threshold = x[cross_index, 0]

    print("Estimated error threshold: {:.3f}".format(threshold))

    # display the histogram and optimizes gaussians
    fig = plt.figure()
    ax = fig.add_subplot(111)
    #
    ax.hist(all_pred_errors[:, 0], bins=100, normed=True, color="blue", rwidth=0.8, label="errors")
    ax.plot(x, np.exp(lp), 'r-', label="GMM")
    ax.legend(loc="upper left")
    #
    ax2 = ax.twinx()
    ax2.plot(x, p[:, 0], 'c--', label="Proba comp 1")
    ax2.plot(x, p[:, 1], 'g--', label="Proba comp 2")
    ax2.set_ylim([0, 1.2])
    ax2.legend(loc="upper right")
    #
    #fig.savefig(".temp/fitted_GMM/gmm.svg")
    #
    plt.show(block=False)
    plt.pause(0.001)

    return threshold


def explore_joint_space(dir_model="model/trained", motor_input_ref=None):
    """
    Regularly sample each dimension of the motor space and display the generated body image.

    Parameters:
        dir_model - model directory
        index - indexes (if list) or number of random indexes (if int) of samples to reconstruct
        motor_input_ref - reference motor input from which to explore the motor space
    """

    # load the network
    saver, motor_input, net_predicted_image, net_predicted_error = load_network(dir_model)

    # get parameters
    n_joints = motor_input.get_shape()[1].value

    # generate the reference motor input if necessary
    if motor_input_ref is None:
        motor_input_ref = np.zeros((1, n_joints))

    # create the sensory normalizer
    s_normalizer = Normalizer(low=0, high=1, min_data=0, max_data=1)  # identity mapping in this case, as the pixel values are already in [0, 1]

    # display the reconstructions
    with tf.Session() as sess:

        # reload the network's variable values
        saver.restore(sess, tf.train.latest_checkpoint(dir_model + "/"))

        # iterate over the motor dimensions
        for joint in range(n_joints):

            # create a figure
            fig = plt.figure(figsize=(12, 6))

            for index, val in enumerate(np.linspace(-1, 1, 6)):

                # variation to add to the reference motor input
                delta = [[val if i == joint else 0. for i in range(n_joints)]]

                # predict image
                predicted_image = sess.run(net_predicted_image, feed_dict={motor_input: motor_input_ref + delta})[0]
                predicted_image = s_normalizer.reconstruct(predicted_image)  # identity mapping in this case, as the pixel values are already in [0, 1]

                # predict error
                predicted_error = sess.run(net_predicted_error, feed_dict={motor_input: motor_input_ref + delta})[0]

                # display
                ax1 = fig.add_subplot(2, 6, 1 + index)
                ax2 = fig.add_subplot(2, 6, 7 + index)
                #
                fig.suptitle('joint {}'.format(joint), fontsize=12)
                #
                ax1.set_title("predicted image")
                ax1.imshow(predicted_image)
                ax1.axis("off")
                #
                ax2.set_title("predicted error")
                ax2.imshow(predicted_error)
                ax2.axis("off")
            #
            # fig.savefig(".temp/exploration/joint_{}.svg".format(joint))

    plt.show(block=False)
    plt.pause(0.001)


def generate_video(dir_model="model/trained", n_samples=2000, dir_video="temp/video"):
    """
    Generate of video of the estimated body image by randomly and smoothly moving in the motor space.

    Parameters:
        dir_model - model directory
        n_samples - number of samples in the motor space
        dir_video - directory where to save the video
    """

    # check the video directory
    if os.path.exists(dir_video):
        ans = input("> The folder {} already exists; do you want to overwrite its content? [y,n]: ".format(dir_video))
        if ans is not "y":
            print("exiting the program")
            return
    if not os.path.exists(dir_video):
        os.makedirs(dir_video)

    # normalize the pixel channels in [0, 1] and subsample the dataset
    s_normalizer = Normalizer(low=0, high=1, min_data=0, max_data=1)  # identity mapping in this case, as the pixel values are already in [0, 1]

    # load the network
    saver, motor_input, net_predicted_image, net_predicted_error = load_network(dir_model)

    # get parameters
    n_joints = motor_input.get_shape()[1].value
    height = net_predicted_image.get_shape()[1].value
    width = net_predicted_image.get_shape()[2].value

    # create a background checkerboard
    checkerboard = create_checkerboard(height, width)

    # create a smooth trajectory in the motor space
    n_anchors = n_samples//40
    anchors = 2 * np.random.rand(n_anchors, n_joints) - 1
    trajectory = np.full((n_samples, n_joints), np.nan)
    for k in range(4):
        tck = interpolate.splrep(np.linspace(0, 1, n_anchors), anchors[:, k])
        trajectory[:, k] = interpolate.splev(np.linspace(0, 1, n_samples), tck)

    # prepare the video writer
    video = cv2.VideoWriter(filename=dir_video + "/video.avi", fourcc=cv2.VideoWriter_fourcc(*'XVID'), fps=24, frameSize=(800, 600))

    # prepare the figure
    fig = plt.figure(figsize=(8, 6))
    ax0 = fig.add_subplot(231, projection="3d")
    ax1 = fig.add_subplot(234, projection="3d")
    ax2 = fig.add_subplot(232)
    ax3 = fig.add_subplot(233)
    ax4 = fig.add_subplot(235)
    ax5 = fig.add_subplot(236)

    with tf.Session() as sess:

        # reload the network's variable values
        saver.restore(sess, tf.train.latest_checkpoint(dir_model + "/"))

        for k in range(n_samples):

            print("\rframe {}".format(k, end=""))

            # get the motor input
            curr_motor = trajectory[[k], :]

            # predict image
            predicted_image = sess.run(net_predicted_image, feed_dict={motor_input: curr_motor})[0]
            predicted_image = s_normalizer.reconstruct(predicted_image)  # identity mapping in this case, as the pixel values are already in [0, 1]

            # predict error
            predicted_error = sess.run(net_predicted_error, feed_dict={motor_input: curr_motor})[0]

            # build mask
            predicted_mask = (predicted_error <= 0.056).astype(float)

            # build the masked image
            alpha_channel = np.mean(predicted_mask, axis=2)
            transparent_masked_predicted_image = np.dstack((predicted_image * predicted_mask, alpha_channel))

            # display the motor configuration with a trace
            ax0.cla()
            ax0.set_title("motor space $(m_1, m_2, m_3)$")
            ax0.plot(trajectory[max(0, k - 48):k, 0], trajectory[max(0, k - 48):k, 1], trajectory[max(0, k - 48):k, 2], 'b-')
            ax0.plot(trajectory[k - 1:k, 0], trajectory[k - 1:k, 1], trajectory[k - 1:k, 2], 'ro')
            ax0.set_xlim(-1, 1)
            ax0.set_ylim(-1, 1)
            ax0.set_zlim(-1, 1)
            ax0.set_xticklabels([])
            ax0.set_yticklabels([])
            ax0.set_zticklabels([])
            #
            ax1.cla()
            ax1.set_title("motor space $(m_2, m_3, m_4)$")
            ax1.plot(trajectory[max(0, k - 48):k, 1], trajectory[max(0, k - 48):k, 2], trajectory[max(0, k - 48):k, 3], 'b-')
            ax1.plot(trajectory[k - 1:k, 1], trajectory[k - 1:k, 2], trajectory[k - 1:k, 3], 'ro')
            ax1.set_xlim(-1, 1)
            ax1.set_ylim(-1, 1)
            ax1.set_zlim(-1, 1)
            ax1.set_xticklabels([])
            ax1.set_yticklabels([])
            ax1.set_zticklabels([])

            # display the predicted image
            ax2.cla()
            ax2.set_title("predicted image")
            ax2.imshow(predicted_image)
            ax2.axis("off")
            #
            ax3.cla()
            ax3.set_title("predicted error")
            ax3.imshow(predicted_error)
            ax3.axis("off")
            #
            ax4.cla()
            ax4.set_title("predicted mask")
            ax4.imshow(predicted_mask)
            ax4.axis("off")
            #
            ax5.cla()
            ax5.set_title("masked prediction")
            ax5.imshow(checkerboard)
            ax5.imshow(transparent_masked_predicted_image)
            ax5.axis("off")

            plt.show(block=False)
            fig.savefig(dir_video + "/img.png")
            plt.pause(0.001)

            # write frame
            image = cv2.imread(dir_video + "/img.png")
            video.write(image)

    # clean up
    cv2.destroyAllWindows()
    video.release()
    os.remove(dir_video + "/img.png")


if __name__ == "__main__":

    parser = ArgumentParser()
    parser.add_argument("-dm", "--dir_model", dest="dir_model", help="path to the model", default="model/trained")
    parser.add_argument("-dd", "--dir_dataset", dest="dir_dataset", help="path to training dataset", default="dataset/generated/combined")
    parser.add_argument("-dg", "--dir_green", dest="dir_green_dataset", help="path to training dataset with green background", default="dataset/generated/green")
    parser.add_argument("-dv", "--dir_video", dest="dir_video", help="directory to save the video", default="temp/video")

    args = parser.parse_args()
    dir_model = args.dir_model
    dir_dataset = args.dir_dataset
    dir_green_dataset = args.dir_green_dataset
    dir_video = args.dir_video

    reconstruct_data(dir_model=dir_model, dir_dataset=dir_dataset, indexes=3)
    evaluate_body_image(dir_model=dir_model, dir_green_dataset=dir_green_dataset, indexes=3)
    fit_gmm(dir_green_dataset=dir_green_dataset, dir_model=dir_model, indexes=100)
    explore_joint_space(dir_model=dir_model)
    generate_video(dir_model=dir_model, dir_video=dir_video)

    print("testing finished.")
    plt.show(block=True)