From 94a949172815ec2eb464409fe77f295136b11ced Mon Sep 17 00:00:00 2001 From: Michael Pilosov <40366263+mathematicalmichael@users.noreply.github.com> Date: Sat, 24 Apr 2021 20:55:08 -0600 Subject: [PATCH] Feature/linear example (#41) * dummy code working * wip... * minor updates * show 2-2 * working plots * improve runtimes with repeated measurements * working code * udpate * checkpoint * all good now * singular value plot * new figure * transparency * point to release candidate * bump to official version * leg_fsize -> legend_fsize * more helpful error messages on assert * Apply (cleanup) suggestions from code review * fix typo + styling --- .flake8 | 2 +- notebooks/MUD-Discretized.ipynb | 2 +- setup.cfg | 3 +- src/mud_examples/linear/lin.py | 339 +++++++++++++++++++++++++++--- src/mud_examples/linear/models.py | 48 +++-- src/mud_examples/monomial.py | 10 +- 6 files changed, 350 insertions(+), 54 deletions(-) diff --git a/.flake8 b/.flake8 index 9f883b1..df7a0b6 100644 --- a/.flake8 +++ b/.flake8 @@ -92,7 +92,7 @@ filename = disable-noqa = False # Set the maximum length that any line (with some exceptions) may be. -max-line-length = 88 +max-line-length = 120 # Set the maximum allowed McCabe complexity value for a block of code. max-complexity = 10 # Toggle whether pycodestyle should enforce matching the indentation of the opening bracket’s line. diff --git a/notebooks/MUD-Discretized.ipynb b/notebooks/MUD-Discretized.ipynb index 5f78e19..62d43cb 100644 --- a/notebooks/MUD-Discretized.ipynb +++ b/notebooks/MUD-Discretized.ipynb @@ -1894,7 +1894,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.8.8" }, "latex_envs": { "LaTeX_envs_menu_present": true, diff --git a/setup.cfg b/setup.cfg index 0348ba1..3ea7f1d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,7 +28,7 @@ package_dir = # DON'T CHANGE THE FOLLOWING LINE! IT WILL BE UPDATED BY PYSCAFFOLD! setup_requires = pyscaffold>=3.2a0,<3.3a0 # Add here dependencies of your project (semicolon/line-separated), e.g. -install_requires = mud~=0.0.22 +install_requires = mud~=0.0.24 numpy scipy matplotlib @@ -64,6 +64,7 @@ pub = console_scripts = mud_examples = mud_examples.runner:run generate_poisson_data = mud_examples.poisson:run + mud_run_lin_meas = mud_examples.linear.lin:run_meas mud_run_inv = mud_examples.runner:run_monomial mud_run_lin = mud_examples.runner:run_linear mud_run_pde = mud_examples.runner:run_pde diff --git a/src/mud_examples/linear/lin.py b/src/mud_examples/linear/lin.py index 174c951..e5b760f 100644 --- a/src/mud_examples/linear/lin.py +++ b/src/mud_examples/linear/lin.py @@ -5,13 +5,15 @@ # import os import sys +import scipy as sp # from mud_examples.runner import setup_logging import matplotlib.pyplot as plt import numpy as np from matplotlib import cm # from mud import __version__ as __mud_version__ -from mud.funs import map_sol, mud_sol +from mud.funs import map_sol, mud_sol, updated_cov from mud.norm import full_functional, norm_data, norm_input, norm_predicted +from mud.util import transform_linear_setup from scipy.linalg import null_space # from mud_examples import __version__ @@ -139,9 +141,9 @@ def numnonzero(x, tol=1E-4): _err_map = err_map_list[idx] _err_pin = err_pin_list[idx] - plt.plot(x, _err_mud[:-1], label='mud', c='k', lw=10) - plt.plot(x, _err_map[:-1], label='map', c='r', ls='--', lw=5) - plt.plot(x, _err_pin[:-1], label='lsq', c='xkcd:light blue', ls='-', lw=5) + plt.plot(x, _err_mud[:-1], label='MUD', c='k', lw=10) + plt.plot(x, _err_map[:-1], label='MAP', c='r', ls='--', lw=5) + plt.plot(x, _err_pin[:-1], label='LSQ', c='xkcd:light blue', ls='-', lw=5) # plt.plot(x, regression, c='g', ls='-') # plt.xlim(0,dim_output) @@ -156,7 +158,7 @@ def numnonzero(x, tol=1E-4): # plt.ylabel("$\\frac{||\\lambda^\\dagger - \\lambda||}{||\\lambda^\\dagger||}$", fontsize=fsize*1.25) plt.ylabel("Relative Error", fontsize=fsize * 1.25) plt.xlabel('Dimension of Output Space', fontsize=fsize) - plt.legend(['mud', 'map', 'least squares'], fontsize=fsize) + plt.legend(['MUD', 'MAP', 'Least Squares'], fontsize=fsize) # plt.annotate(f'Slope={slope:1.4f}', (4,4/7), fontsize=32) plt.savefig(f'{fdir}/{prefix}-convergence.png', bbox_inches='tight') plt.close() @@ -320,7 +322,8 @@ def main_rank(args): # plt.xscale('log') plt.ylim(0, 1.0) # plt.ylim(1E-4, 5E-2) - plt.ylabel("$\\frac{||\\lambda^\\dagger - \\lambda||}{||\\lambda^\\dagger||}$", fontsize=fsize*1.25) + # plt.ylabel("$\\frac{||\\lambda^\\dagger - \\lambda||}{||\\lambda^\\dagger||}$", fontsize=fsize*1.25) + plt.ylabel("Relative Error", fontsize=fsize*1.25) plt.xlabel('Rank(A)', fontsize=fsize) plt.legend(['MUD', 'MAP', 'Least Squares'], fontsize=fsize) # plt.annotate(f'Slope={slope:1.4f}', (4,4/7), fontsize=32) @@ -498,6 +501,266 @@ def main_contours(args): figname=out_file) +def main_meas(args): + """ + Main entrypoint for High-Dim Linear Measurement Example + """ + args = parse_args(args) + setup_logging(args.loglevel) + np.random.seed(args.seed) +# example = args.example +# num_trials = args.num_trials +# fsize = args.fsize +# linewidth = args.linewidth +# seed = args.seed + # dim_input = args.input_dim + # save = args.save +# alt = args.alt +# bayes = args.bayes +# prefix = args.prefix +# dist = args.dist + + presentation = False + save = True + + if not presentation: + plt.rcParams['mathtext.fontset'] = 'stix' + plt.rcParams['font.family'] = 'STIXGeneral' + fdir = 'figures/lin' + check_dir(fdir) + + fsize = 42 + + def numnonzero(x, tol=1E-4): + return len(x[abs(x) < tol]) + + # # Impact of Number of Measurements for Various Choices of $\\Sigma_\text{init}$ + + # dim_output = dim_input + dim_input, dim_output = 20, 5 + # seed = 12 + # np.random.seed(seed) + + initial_cov = np.diag(np.sort(np.random.rand(dim_input))[::-1] + 0.5) + # initial_cov = np.eye(dim_input) # will cause spectrum of updated covariance to contain repeated eigenvalues + + plt.figure(figsize=(10, 10)) + initial_mean = np.zeros(dim_input).reshape(-1, 1) + # initial_mean = np.random.randn(dim_input).reshape(-1,1) + # num_obs_list = np.arange(1, 101).tolist() + + lam_ref = np.random.randn(dim_input).reshape(-1, 1) + + prefix = 'lin-meas-cov' + + # Ns = [10, 50, 100, 500, 1000] + + sigma = 1E-1 + # np.random.seed(21) + # Ns = np.arange(10, 2001, 50).tolist() + Ns = [10, 100, 1000, 10000] + + # for _ in range(num_trials): + + operator_list, data_list, _ = models.createRandomLinearProblem( + lam_ref, + dim_output, + [max(Ns)] * dim_output, # want to iterate over increasing measurements + [0] * dim_output, # noiseless data bc we want to simulate multiple trials + dist='norm', + repeated=True, + ) + + MUD = np.zeros((dim_input, len(Ns))) + UP = np.zeros((dim_input, len(Ns))) + noise_draw = np.random.randn(dim_output, max(Ns)) * sigma + + for j, N in enumerate(Ns): + A, b, _ = transform_measurements(operator_list, data_list, N, sigma, noise_draw) + MUD[:, j] = mud_sol(A, b, cov=initial_cov) + up_cov = updated_cov(A, initial_cov) + up_sdvals = sp.linalg.svdvals(up_cov) + # print(up_sdvals.shape, dim_input, up_cov.shape) + UP[:, j] = up_sdvals + + # mud_var = MUD.var(axis=2) + lines = ['solid', 'dashed', 'dashdot', 'dotted'] + + for p in range(dim_input): + plt.plot( + Ns, UP[p, :], + label=f"SV {p}", + alpha=0.4, + lw=5, + ls=lines[p % len(lines)], + ) + + # plt.plot(Ns, mud_var, label='MUD', c='k', lw=10) + # plt.title("Precision of MUD Estimates", fontsize=1.25 * fsize) + plt.yscale('log') + plt.xscale('log') + plt.ylabel("Singular Values of $\\Sigma_{up}$", fontsize=fsize * 1.25) + plt.xlabel('Number of Measurements', fontsize=fsize) + # plt.legend() + if save: + plt.savefig(f'{fdir}/{prefix}-convergence.png', bbox_inches='tight') + plt.close() + else: + plt.show() + plt.close() + + plt.figure(figsize=(10, 10)) + plt.yscale('log') + for i, N in enumerate(Ns): + plt.scatter( + np.arange(dim_input), UP[:, i], + marker='o', + s=200, + facecolors='none', + edgecolors='k', + ) + + plt.plot( + np.arange(dim_input), UP[:, i], + label=f"$N={N:1.0E}$", + alpha=1, + lw=3, + ls=lines[i % len(lines)], + c='k', + ) + plt.xticks(np.arange(dim_input) + 1, rotation=90) + plt.xlabel('Index', fontsize=fsize) + plt.ylabel('Singular Value', fontsize=fsize) + + plt.legend() + + if save: + plt.savefig(f'{fdir}/{prefix}-sd-convergence.png', bbox_inches='tight') + plt.close() + else: + plt.show() + plt.close() + + + + +def main_meas_var(args): + """ + Main entrypoint for High-Dim Linear Measurement Example + """ + args = parse_args(args) + setup_logging(args.loglevel) + np.random.seed(args.seed) +# example = args.example +# num_trials = args.num_trials +# fsize = args.fsize +# linewidth = args.linewidth +# seed = args.seed + # dim_input = args.input_dim + # save = args.save +# alt = args.alt +# bayes = args.bayes +# prefix = args.prefix +# dist = args.dist + + presentation = False + save = True + + if not presentation: + plt.rcParams['mathtext.fontset'] = 'stix' + plt.rcParams['font.family'] = 'STIXGeneral' + fdir = 'figures/lin' + check_dir(fdir) + + fsize = 42 + + def numnonzero(x, tol=1E-4): + return len(x[abs(x) < tol]) + + # # Impact of Number of Measurements for Various Choices of $\\Sigma_\text{init}$ + + # dim_output = dim_input + dim_input, dim_output = 4, 2 + # seed = 12 + # np.random.seed(seed) + + # initial_cov = np.diag(np.sort(np.random.rand(dim_input))[::-1] + 0.5) + initial_cov = np.eye(dim_input) + + plt.figure(figsize=(10, 10)) + initial_mean = np.zeros(dim_input).reshape(-1, 1) + # initial_mean = np.random.randn(dim_input).reshape(-1,1) + # num_obs_list = np.arange(1, 101).tolist() + + lam_ref = np.random.randn(dim_input).reshape(-1, 1) + + prefix = 'lin-meas-cov' + + # Ns = [10, 50, 100, 500, 1000] + + sigma = 1E-1 + # np.random.seed(21) + Ns = np.arange(10, 2001, 50).tolist() + # Ns = [10, 50, 100, 500, 1000, 5000, 10000] + + num_trials = 50 + # for _ in range(num_trials): + + def A_N(M, N, sigma): + A = np.sqrt(N) / sigma * M + return A + + def d_N(M, lam, n): + d = M @ lam + n + assert len(d.ravel()) == len(n.ravel()), \ + f"Shape mismatch noise={n.shape}, data={d.shape}" + return d + + def b_N(N, d, sigma): + b = -1 / np.sqrt(N) * np.sum(np.divide(d, sigma), axis=1) + return b + + # M = np.random.normal(size=(dim_output, dim_input)) + operator_list, data_list, _ = models.createRandomLinearProblem( + lam_ref, + dim_output, + [max(Ns)] * dim_output, # want to iterate over increasing measurements + [0] * dim_output, # noiseless data bc we want to simulate multiple trials + dist='norm', + repeated=True, + ) + + # operator list has dim_output 1xdim_input matrices + MUD = np.zeros((dim_input, len(Ns), num_trials)) + # M = np.array(operator_list).reshape(dim_output, dim_input) + noise_draw = [np.random.randn(dim_output, max(Ns)) * sigma for _ in range(num_trials)] + for j, N in enumerate(Ns): + # _A = A_N(M, N, sigma) + for i in range(num_trials): + # _b = b_N(N, d_N(M, lam_ref, noise_draw[i][:, 0:N]), sigma) + # A, b = transform_linear_setup(operator_list, data_list, sigma) + A, b, _ = transform_measurements(operator_list, data_list, N, sigma, noise_draw[i]) + MUD[:, j, i] = mud_sol(A, b, cov=initial_cov) + + mud_var = MUD.var(axis=2).mean(axis=0) + plt.plot(Ns, mud_var, label='MUD', c='k', lw=10) + + # plt.title("Precision of MUD Estimates", fontsize=1.25 * fsize) + plt.yscale('log') + plt.xscale('log') + plt.ylabel("Singular Values of $\\Sigma_{up}$", fontsize=fsize * 1.25) + # plt.ylabel("Average Variance", fontsize=fsize * 1.25) + plt.xlabel('Number of Measurements', fontsize=fsize) + plt.legend() + # plt.legend(['MUD', 'Least Squares'], fontsize=fsize) + if save: + plt.savefig(f'{fdir}/{prefix}-convergence.png', bbox_inches='tight') + plt.close() + else: + plt.show() + plt.close() + + def main(args): """ Main entrypoint for example-generation @@ -505,6 +768,7 @@ def main(args): main_contours(args) main_rank(args) main_dim(args) + main_meas(args) def run(): @@ -512,10 +776,15 @@ def run(): """ main(sys.argv[1:]) -############################################################ +def run_meas(): + """Entry point for console_scripts + """ + main_meas(sys.argv[1:]) +############################################################ + def contour_example(A=np.array([[1, 1]]), b=np.zeros([1, 1]), # noqa: C901 cov_11=0.5, cov_01=-0.25, @@ -700,10 +969,10 @@ def contour_example(A=np.array([[1, 1]]), b=np.zeros([1, 1]), # noqa: C901 # plt.show() -def compare_mud_map_pin(A, b, d, mean, cov): - mud_pt = mud_sol(A, b, d, mean, cov) - map_pt = map_sol(A, b, d, mean, cov) - pin_pt = (np.linalg.pinv(A)@(d-b)).reshape(-1,1) +def compare_mud_map_pin(A, b, y, mean, cov): + mud_pt = mud_sol(A, b, y, mean, cov) + map_pt = map_sol(A, b, y, mean, cov) + pin_pt = (np.linalg.pinv(A) @ (y - b)).reshape(-1, 1) return mud_pt, map_pt, pin_pt @@ -714,7 +983,7 @@ def transform_rank_list(lam_ref, A, b, rank): """ _A = sum(A[0:rank]) _b = b - _d = _A@lam_ref + _b + _d = _A @ lam_ref + _b assert np.linalg.matrix_rank(_A) == rank, "Unexpected rank mismatch" return _A, _b, _d @@ -725,16 +994,27 @@ def transform_dim_out(lam_ref, A, b, dim): _A = A[:dim, :] _b = b[:dim, :] - _d = _A@lam_ref + _b + _d = _A @ lam_ref + _b return _A, _b, _d +def transform_measurements(operator_list, data_list, measurements, std_list, noise): + dim_output = len(operator_list) + N = measurements + _oper_list = [M[0:N,:] for M in operator_list] + _d = np.array([y[0:N] for y in data_list]) + noise[:, 0:N] + _data_list = _d.tolist() + A, b = transform_linear_setup(_oper_list, _data_list, std_list) + d = np.zeros(dim_output).reshape(-1, 1) + return A, b, d + + def compare_linear_sols_rank_list(lam_ref, A, b, - alpha=1, mean=None, cov=None): + alpha=1, mean=None, cov=None): """ Input and output dimensions fixed, varying rank 1..dim_output. """ - + return compare_linear_sols(transform_rank_list, lam_ref, A, b, alpha, mean, cov) @@ -747,34 +1027,41 @@ def compare_linear_sols_dim(lam_ref, A, b, def compare_linear_sols(transform, lam_ref, A, b, - alpha=1, mean=None, cov=None): + alpha=1, mean=None, cov=None): """ Input dimension fixed, varying according to the output of the anonymous function `transform`'s return. """ sols = {} - if isinstance(alpha, list) or isinstance(alpha, tuple): + if isinstance(alpha, (list, tuple)): alpha_list = alpha else: alpha_list = [alpha] + if isinstance(A, np.ndarray): + dim_in = A.shape[1] + else: + dim_in = len(A) + if mean is None: - mean = np.zeros(A.shape[1]) - + mean = np.zeros(dim_in) + if cov is None: - cov = np.eye(A.shape[1]) + cov = np.eye(dim_in) _logger.info("alpha = {}".format(alpha_list)) - if isinstance(A, list): # svd approach returns list - dim_output = A[0].shape[0] + if isinstance(A, list): # svd approach returns list (max dim output), so does measurement study (max meas) + max_iteration = A[0].shape[0] + if max_iteration == 1: + max_iteration = 100 # repeated measurement map is 1xp, this is a dirty override. else: - dim_output = A.shape[0] + max_iteration = A.shape[0] for alpha in alpha_list: sols[alpha] = [] - for _out in range(1, dim_output+1, 1): - _A, _b, _d = transform(lam_ref, A, b, _out) - _mud, _map, _pin = compare_mud_map_pin(_A, _b, _d, mean, alpha*cov) + for _out in range(1, max_iteration + 1, 1): + _A, _b, _y = transform(lam_ref, A, b, _out) + _mud, _map, _pin = compare_mud_map_pin(_A, _b, _y, mean, alpha * cov) sols[alpha].append((_mud, _map, _pin)) return sols diff --git a/src/mud_examples/linear/models.py b/src/mud_examples/linear/models.py index e4e0af1..fabb062 100644 --- a/src/mud_examples/linear/models.py +++ b/src/mud_examples/linear/models.py @@ -16,59 +16,67 @@ def createRandomLinearMap(dim_input, dim_output, else: M = np.random.rand(dim_output, dim_input) # noqa: E221 if repeated: # just use first row - M = np.array(list(M[0, :]) * dim_output) # noqa: E221 - M = M.reshape(dim_output, dim_input) # noqa: E221 + M = M[0, :].reshape(1, dim_input) return M -def createNoisyReferenceData(M, reference_point, std): +def createNoisyReferenceData(M, reference_point, std, num_data=None): dim_input = len(reference_point) # noqa: E221 - dim_output = M.shape[0] - assert M.shape[1] == dim_input, "Mperator/Data dimension mismatch" - if isinstance(std, int) or isinstance(std, float): - std = np.array([std] * dim_output) # noqa: E221 + if num_data is None: + num_data = M.shape[0] + assert M.shape[1] == dim_input, \ + f"Operator/Reference dimension mismatch. op: {M.shape}, input dim: {dim_input}" + assert M.shape[0] == 1 or M.shape[0] == num_data, \ + f"Operator/Data dimension mismatch. op: {M.shape}, observations: {num_data}" + if isinstance(std, (int, float)): # support for std per measurement + std = np.array([std] * num_data) # noqa: E221 + else: + assert len(std.ravel()) == num_data, \ + f"St. Dev / Data mismatch. data: {num_data}, std: {len(std.ravel())}" ref_input = np.array(list(reference_point)).reshape(-1, 1) # noqa: E221 ref_data = M @ ref_input # noqa: E221 - noise = np.diag(std) @ np.random.randn(dim_output, 1) # noqa: E221 + noise = np.diag(std) @ np.random.randn(num_data, 1) # noqa: E221 + if ref_data.shape[0] == 1: + ref_data = float(ref_data) data = ref_data + noise # noqa: E221 - return data + return data.ravel() -def createRandomLinearPair(reference_point, num_observations, std, +def createRandomLinearPair(reference_point, num_data, std, dist='normal', repeated=False): """ data will come from a normal distribution centered at zero with standard deviation given by `std` - QoI map will come from standard uniform or normal if dist=normal + QoI map will come from standard uniform, or normal if dist=normal if `repeated` is True, the map will be rank 1. """ dim_input = len(reference_point) - M = createRandomLinearMap(dim_input, num_observations, dist, repeated) # noqa: E221, E501 - data = createNoisyReferenceData(M, reference_point, std) # noqa: E221, E501 + M = createRandomLinearMap(dim_input, num_data, dist, repeated) # noqa: E221, E501 + data = createNoisyReferenceData(M, reference_point, std, num_data) # noqa: E221, E501 return M, data def createRandomLinearProblem(reference_point, num_qoi, - num_obs_list, std_list, + num_observations, std_list, dist='normal', repeated=False): """ Wrapper around `createRandomLinearQoI` to generalize to multiple QoI maps. """ - if isinstance(std_list, int) or isinstance(std_list, float): + if isinstance(std_list, (int, float)): std_list = [std_list] * num_qoi else: assert len(std_list) == num_qoi - if isinstance(num_obs_list, int) or isinstance(num_obs_list, float): - num_obs_list = [num_obs_list] * num_qoi + if isinstance(num_observations, (int, float)): + num_observations = [num_observations] * num_qoi else: - assert len(num_obs_list) == num_qoi + assert len(num_observations) == num_qoi - assert len(std_list) == len(num_obs_list) + assert len(std_list) == len(num_observations) results = [createRandomLinearPair(reference_point, n, s, dist, repeated) - for n, s in zip(num_obs_list, std_list)] + for n, s in zip(num_observations, std_list)] operator_list = [r[0] for r in results] # noqa: E221 data_list = [r[1] for r in results] # noqa: E221 return operator_list, data_list, std_list diff --git a/src/mud_examples/monomial.py b/src/mud_examples/monomial.py index 67395e4..28045c8 100644 --- a/src/mud_examples/monomial.py +++ b/src/mud_examples/monomial.py @@ -78,7 +78,7 @@ def main(args): # Compute more observations for use in BIP tick_fsize = 28 - leg_fsize = 24 + legend_fsize = 24 for num_data in [1, 5, 10, 20]: np.random.seed(123456) # Just for reproducibility, you can comment out if you want. data = norm.rvs(loc=mu, scale=sigma**2, size=num_data) @@ -121,13 +121,13 @@ def main(args): linewidth=4, label='Posterior') plt.xlim([-1, 1]) if num_data > 1: - plt.annotate(f'$N={num_data}$', (-0.75, 5)) + plt.annotate(f'$N={num_data}$', (-0.75, 5), fontsize=legend_fsize) plt.ylim([0, 28]) # fix axis height for comparisons plt.xticks(fontsize=tick_fsize) plt.yticks(fontsize=tick_fsize) plt.xlabel("$\\Lambda$", fontsize=1.25*tick_fsize) - plt.legend(fontsize=leg_fsize, loc='upper left') + plt.legend(fontsize=legend_fsize, loc='upper left') if save: plt.savefig(f'{fdir}/bip-vs-sip-{num_data}.png', bbox_inches='tight') @@ -149,12 +149,12 @@ def main(args): plt.xlim([-1, 1]) if num_data > 1: - plt.annotate(f'$N={num_data}$', (-0.75, 5)) + plt.annotate(f'$N={num_data}$', (-0.75, 5), fontsize=legend_fsize) plt.ylim([0, 20]) # fix axis height for comparisons plt.xticks(fontsize=tick_fsize) plt.yticks(fontsize=tick_fsize) plt.xlabel("$\\mathcal{D}$", fontsize=1.25*tick_fsize) - plt.legend(fontsize=leg_fsize, loc='upper left') + plt.legend(fontsize=legend_fsize, loc='upper left') if save: plt.savefig(f'{fdir}/bip-vs-sip-pf-{num_data}.png', bbox_inches='tight')