From 566e149ed840a913bfef9c0d7bf82feb41d6735d Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Tue, 16 Aug 2016 13:56:15 -0500 Subject: [PATCH 01/12] Added calculation of percentiles to example1.m without using Matlab Statistics toolbox. --- examples/example1.m | 6 ++++-- src/percentile.m | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 src/percentile.m diff --git a/examples/example1.m b/examples/example1.m index 1d82ea8..fe17e38 100644 --- a/examples/example1.m +++ b/examples/example1.m @@ -4,7 +4,7 @@ addpath(genpath('../src')); % load summary-level data -example_data = matfile('example1.mat'); +example_data = matfile('/tmp/pcarbo/example1.mat'); R = example_data.R; % population LD matrix bwd = example_data.bwd; % bandwidth of R @@ -19,7 +19,9 @@ % check model assumption chat = sqrt((betahat(:).^2) ./ (Nsnp(:).*(se(:).^2) + betahat(:).^2)); fprintf('Look at the five-number summary of log10 sample phenotype-genotype correlations: \n') -disp(prctile(log10(chat), 0:25:100)); +disp(percentile(log10(chat),0:0.25:1)); + +return % fit rss-bvsr model fprintf('Start RSS-BVSR analysis ... \n'); diff --git a/src/percentile.m b/src/percentile.m new file mode 100644 index 0000000..4ddaad3 --- /dev/null +++ b/src/percentile.m @@ -0,0 +1,26 @@ +% y = PERCENTILE(x, pct) +% Example: percentile(x, 0.10) is the value that is higher than 10% +% of the elements of x, and less than the remaining 90%. +% If the length of x is such that there is no element of x exactly +% corresponding to the 'pct' position, a weighted average of the two +% adjacent values is used. pct must be between 0 and 1 inclusive. +% +% percentile(x, 1) is a slow way to get max(x). +% percentile(x, 0.5) is the median of x. +% percentile(x, 0) is a slow way to get min(x). +% +% If x is a matrix, percentile operates on columns, returning multiple +% columns. +% If pct is a vector, multiple rows are returned, one per element of pct. +% +function y = percentile (x, pct) + if (size(x,1) == 1) + x = x.'; + end + x = sort(x); + n = ((size(x,1) - 1) * pct(:) + 1); + r = rem(n,1); + r1 = r * ones(1,size(x,2)); + y = (1-r1) .* x(n-r,:); + ix = find(r); + y(ix,:) = y(ix,:) + r1(ix,:) .* x(n(ix) - r(ix) + 1,:); From 691aa8fa640d28dc393cc2a379a0891dacb5384b Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Tue, 16 Aug 2016 14:09:15 -0500 Subject: [PATCH 02/12] Fixed up formatting a bit. --- examples/example1.m | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/examples/example1.m b/examples/example1.m index fe17e38..20ba15d 100644 --- a/examples/example1.m +++ b/examples/example1.m @@ -4,6 +4,7 @@ addpath(genpath('../src')); % load summary-level data +fprintf('Loading data.\n'); example_data = matfile('/tmp/pcarbo/example1.mat'); R = example_data.R; % population LD matrix @@ -16,36 +17,43 @@ fprintf('Data set is loaded ... \n'); -% check model assumption +% Check model assumption. chat = sqrt((betahat(:).^2) ./ (Nsnp(:).*(se(:).^2) + betahat(:).^2)); -fprintf('Look at the five-number summary of log10 sample phenotype-genotype correlations: \n') +fprintf('Look at the five-number summary of log10 sample\n'); +fprintf('phenotype-genotype correlations: \n') disp(percentile(log10(chat),0:0.25:1)); -return - -% fit rss-bvsr model +% Fit rss-bvsr model. fprintf('Start RSS-BVSR analysis ... \n'); Ndraw = 2e6; Nburn = 2e5; Nthin = 9e1; tic; -% 1. simulate posterior samples via mcmc -[betasam, gammasam, hsam, logpisam, Naccept] = rss_bvsr(betahat, se, R, Nsnp, Ndraw, Nburn, Nthin); -% 2. compute the posterior samples of pve + +% 1. Simulate posterior samples via mcmc. +[betasam, gammasam, hsam, logpisam, Naccept] = ... + rss_bvsr(betahat, se, R, Nsnp, Ndraw, Nburn, Nthin); + +% 2. Compute the posterior samples of PVE. matrix_type = 1; M = length(hsam); pvesam = zeros(M,1); progress_bar = progress('init','start PVE calculation'); for i = 1:M - pvesam(i) = compute_pve(betasam(i,:), betahat, se, Nsnp, bwd, BR, matrix_type); - progress_bar = progress(progress_bar, i/M); + pvesam(i) = compute_pve(betasam(i,:), betahat, se, Nsnp, bwd, BR,... + matrix_type); + progress_bar = progress(progress_bar, i/M); end runtime = toc; -% 3. save output -save('example1_rssbvsr.mat', 'betasam', 'gammasam', 'hsam', 'logpisam', 'pvesam', 'Naccept', 'runtime', '-v7.3'); + +% 3. Save output. +save('example1_rssbvsr.mat', 'betasam', 'gammasam', 'hsam', 'logpisam',... + 'pvesam', 'Naccept', 'runtime', '-v7.3'); clearvars betasam gammasam hsam logpisam pvesam Naccept runtime; fprintf('RSS-BVSR analysis is done ... \n'); +return + % fit rss-bslmm model fprintf('Start RSS-BSLMM analysis ... \n'); Ndraw = 2e6; From cace5817261aa01f55cc5169137df2a56df588d4 Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Tue, 16 Aug 2016 23:16:41 -0500 Subject: [PATCH 03/12] Replaced calls to 'unidrnd' with 'randint'. --- examples/startup.m | 3 +++ src/propose_gamma.m | 6 +++--- src/randint.m | 11 +++++++++++ src/update_zlabel.m | 6 +++--- 4 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 examples/startup.m create mode 100644 src/randint.m diff --git a/examples/startup.m b/examples/startup.m new file mode 100644 index 0000000..ffb0d20 --- /dev/null +++ b/examples/startup.m @@ -0,0 +1,3 @@ +addpath('~/matlab/extras'); +addpath('~/matlab/extras/lightspeed'); +addpath('~/matlab/extras/lapack'); diff --git a/src/propose_gamma.m b/src/propose_gamma.m index c31de0c..6fd012a 100644 --- a/src/propose_gamma.m +++ b/src/propose_gamma.m @@ -40,7 +40,7 @@ % rare case 2: all snps in the current model; must remove one snp if ngamma_new == p - col_id = unidrnd(ngamma_new); % uniformly pick one snp to remove + col_id = randint(1,ngamma_new); % uniformly pick one snp to remove r_remove = rank_new(col_id); remove(snpmap, r_remove); rank_new(col_id) = []; % remove the snp @@ -70,7 +70,7 @@ elseif gamma_flag >=0.4 && gamma_flag < 0.8 % remove a snp % uniformly draw a snp from the current model - col_id = unidrnd(ngamma_new); + col_id = randint(1,ngamma_new); r_remove = rank_new(col_id); % return the location (a.k.a. rank) of the chosen snp % calculate Pr(picking a snp NOT in the proposed model) prob_total = 1; @@ -84,7 +84,7 @@ ngamma_new = ngamma_new - 1; else % swap inclusion/exclusion status of two snps - col_id = unidrnd(ngamma_new); % pick a snp in the model to remove + col_id = randint(1,ngamma_new); % pick a snp in the model to remove r_remove = rank_new(col_id); while true r_add = sample(p_gamma); % randomly pick a snp based on marginal association rank diff --git a/src/randint.m b/src/randint.m new file mode 100644 index 0000000..64a6243 --- /dev/null +++ b/src/randint.m @@ -0,0 +1,11 @@ +% RANDINT Generate a matrix of random integers. +% RANDINT(A,B,M,N) generates an M x N matrix of random numbers +% uniformly distributed in the set of integers between A and B. +function X = randint (a, b, m, n) + if nargin < 3 + m = 1; + n = 1; + end + U = rand(m,n); + U = U .* (U < 1); + X = floor(a + (b-a+1)*U); diff --git a/src/update_zlabel.m b/src/update_zlabel.m index 6acc89c..fe58cd3 100644 --- a/src/update_zlabel.m +++ b/src/update_zlabel.m @@ -211,7 +211,7 @@ % rare case 2: all snps in the current model; must remove one snp if ngamma == p - r_rmv = unidrnd(ngamma); % uniformly pick one snp to remove + r_rmv = randint(1,ngamma); % uniformly pick one snp to remove z_logratio = z_logratio + log(ngamma); % compute the contribution to log MH ratio snp_change = r_rmv; end @@ -235,7 +235,7 @@ elseif gamma_flag >=0.4 && gamma_flag < 0.8 % remove a snp % uniformly draw a snp from the current model - col_id = unidrnd(ngamma); + col_id = randint(1,ngamma); r_rmv = rank(col_id); % return the location of the chosen snp % calculate Pr(picking a snp NOT in the proposed model) prob_total = 1; @@ -246,7 +246,7 @@ snp_change = r_rmv; else % swap inclusion/exclusion status of two snps - col_id = unidrnd(ngamma); % pick a snp in the model to remove + col_id = randint(1,ngamma); % pick a snp in the model to remove r_rmv = rank(col_id); while true r_add = sample(p_gamma); % randomly pick a snp based on marginal association rank From fc61b67f1ef3b89b2e87f69969e0a58f740de51a Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Wed, 17 Aug 2016 16:43:26 -0500 Subject: [PATCH 04/12] Made it to "RSS-BVSR analysis is done..." in example1.m. --- examples/example1.m | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/example1.m b/examples/example1.m index 20ba15d..653c3c1 100644 --- a/examples/example1.m +++ b/examples/example1.m @@ -46,13 +46,15 @@ end runtime = toc; + + % 3. Save output. -save('example1_rssbvsr.mat', 'betasam', 'gammasam', 'hsam', 'logpisam',... - 'pvesam', 'Naccept', 'runtime', '-v7.3'); +save('/tmp/pcarbo/example1_rssbvsr.mat', 'betasam', 'gammasam', 'hsam',... + 'logpisam','pvesam', 'Naccept', 'runtime', '-v7.3'); clearvars betasam gammasam hsam logpisam pvesam Naccept runtime; fprintf('RSS-BVSR analysis is done ... \n'); -return +% *** This is where I am in the script. *** % fit rss-bslmm model fprintf('Start RSS-BSLMM analysis ... \n'); From 09ae1deaca1ed0b9aa7126dd1cea11ae14cfdf06 Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Thu, 18 Aug 2016 12:42:09 -0500 Subject: [PATCH 05/12] I got the entire example1.m to run to completion. --- examples/example1.m | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/examples/example1.m b/examples/example1.m index 653c3c1..041910a 100644 --- a/examples/example1.m +++ b/examples/example1.m @@ -46,36 +46,37 @@ end runtime = toc; - - % 3. Save output. save('/tmp/pcarbo/example1_rssbvsr.mat', 'betasam', 'gammasam', 'hsam',... 'logpisam','pvesam', 'Naccept', 'runtime', '-v7.3'); clearvars betasam gammasam hsam logpisam pvesam Naccept runtime; fprintf('RSS-BVSR analysis is done ... \n'); -% *** This is where I am in the script. *** - % fit rss-bslmm model fprintf('Start RSS-BSLMM analysis ... \n'); Ndraw = 2e6; Nburn = 2e5; Nthin = 9e1; tic; + % 1. simulate posterior samples via mcmc -[bsam, zsam, lpsam, hsam, rsam, Naccept] = rss_bslmm(betahat, se, R, Nsnp, Ndraw, Nburn, Nthin); +[bsam, zsam, lpsam, hsam, rsam, Naccept] = ... + rss_bslmm(betahat, se, R, Nsnp, Ndraw, Nburn, Nthin); + % 2. compute the posterior samples of pve matrix_type = 1; M = length(hsam); pvesam = zeros(M,1); progress_bar = progress('init','start PVE calculation'); for i = 1:M - pvesam(i) = compute_pve(bsam(i,:), betahat, se, Nsnp, bwd, BR, matrix_type); - progress_bar = progress(progress_bar, i/M); + pvesam(i) = compute_pve(bsam(i,:), betahat, se, Nsnp, bwd, BR, matrix_type); + progress_bar = progress(progress_bar, i/M); end runtime = toc; + % 3. save output -save('example1_rssbslmm.mat', 'bsam', 'zsam', 'lpsam', 'hsam', 'rsam', 'pvesam', 'Naccept', 'runtime', '-v7.3'); +save('/tmp/pcarbo/example1_rssbslmm.mat', 'bsam', 'zsam', 'lpsam', 'hsam',... + 'rsam', 'pvesam', 'Naccept', 'runtime', '-v7.3'); clearvars bsam zsam lpsam hsam rsam pvesam Naccept runtime; fprintf('RSS-BSLMM analysis is done ... \n'); @@ -86,20 +87,25 @@ Nthin = 1e3; sigma_beta = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3]; tic; + % 1. simulate posterior samples via mcmc -[bsam, zsam, wsam, lsam, Naccept] = rss_ash(betahat, se, R, Nsnp, sigma_beta, Ndraw, Nburn, Nthin); +[bsam, zsam, wsam, lsam, Naccept] =... + rss_ash(betahat, se, R, Nsnp, sigma_beta, Ndraw, Nburn, Nthin); + % 2. compute the posterior samples of pve matrix_type = 1; M = length(lsam); pvesam = zeros(M,1); progress_bar = progress('init','start PVE calculation'); for i = 1:M - pvesam(i) = compute_pve(bsam(i,:), betahat, se, Nsnp, bwd, BR, matrix_type); - progress_bar = progress(progress_bar, i/M); + pvesam(i) = compute_pve(bsam(i,:), betahat, se, Nsnp, bwd, BR, matrix_type); + progress_bar = progress(progress_bar, i/M); end runtime = toc; + % 3. save output -save('example1_rssash.mat', 'bsam', 'zsam', 'wsam', 'lsam', 'pvesam', 'Naccept', 'runtime', '-v7.3'); +save('/tmp/pcarbo/example1_rssash.mat', 'bsam', 'zsam', 'wsam', 'lsam', ... + 'pvesam', 'Naccept', 'runtime', '-v7.3'); clearvars bsam zsam wsam lsam pvesam Naccept runtime; fprintf('RSS-ASH analysis is done ... \n'); From ac2c760d436f9b1bd0aaa8163391d03e3bc3d1f6 Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Thu, 18 Aug 2016 13:07:51 -0500 Subject: [PATCH 06/12] Added working_dir parameter to example1.m. --- examples/example1.m | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/examples/example1.m b/examples/example1.m index 041910a..47b54ee 100644 --- a/examples/example1.m +++ b/examples/example1.m @@ -1,11 +1,15 @@ clear; +% Set this to the directory containing example1.mat and where the output +% files will be stored. +working_dir = '.'; + % add search paths addpath(genpath('../src')); % load summary-level data fprintf('Loading data.\n'); -example_data = matfile('/tmp/pcarbo/example1.mat'); +example_data = matfile(cat(2,working_dir,'/','example1.mat')); R = example_data.R; % population LD matrix bwd = example_data.bwd; % bandwidth of R @@ -47,8 +51,9 @@ runtime = toc; % 3. Save output. -save('/tmp/pcarbo/example1_rssbvsr.mat', 'betasam', 'gammasam', 'hsam',... - 'logpisam','pvesam', 'Naccept', 'runtime', '-v7.3'); +save(cat(2,working_dir,'/','example1_rssbvsr.mat'),... + 'betasam', 'gammasam', 'hsam', 'logpisam', 'pvesam', ... + 'Naccept', 'runtime', '-v7.3'); clearvars betasam gammasam hsam logpisam pvesam Naccept runtime; fprintf('RSS-BVSR analysis is done ... \n'); @@ -75,8 +80,9 @@ runtime = toc; % 3. save output -save('/tmp/pcarbo/example1_rssbslmm.mat', 'bsam', 'zsam', 'lpsam', 'hsam',... - 'rsam', 'pvesam', 'Naccept', 'runtime', '-v7.3'); +save(cat(2,working_dir,'/','example1_rssbslmm.mat'),... + 'bsam', 'zsam', 'lpsam', 'hsam', 'rsam', 'pvesam',... + 'Naccept', 'runtime', '-v7.3'); clearvars bsam zsam lpsam hsam rsam pvesam Naccept runtime; fprintf('RSS-BSLMM analysis is done ... \n'); @@ -104,8 +110,9 @@ runtime = toc; % 3. save output -save('/tmp/pcarbo/example1_rssash.mat', 'bsam', 'zsam', 'wsam', 'lsam', ... - 'pvesam', 'Naccept', 'runtime', '-v7.3'); +save(cat(2,working_dir,'/','example1_rssash.mat'),... + 'bsam', 'zsam', 'wsam', 'lsam', 'pvesam', 'Naccept', ... + 'runtime', '-v7.3'); clearvars bsam zsam wsam lsam pvesam Naccept runtime; fprintf('RSS-ASH analysis is done ... \n'); From 114d3ee4065a9997bdb67730c76050046ddd91ef Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Fri, 19 Aug 2016 16:44:44 -0500 Subject: [PATCH 07/12] example2.m completed successfully; need to check output. --- examples/example2.m | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/examples/example2.m b/examples/example2.m index 1b6c7c9..736e66f 100644 --- a/examples/example2.m +++ b/examples/example2.m @@ -1,10 +1,15 @@ clear; +% Set this to the directory containing example1.mat and where the output +% files will be stored. +working_dir = '/tmp/pcarbo'; + % add search paths addpath(genpath('../src')); % load summary-level data -example_data = matfile('example2.mat'); +fprintf('Loading data.\n'); +example_data = matfile(cat(2,working_dir,'/','example2.mat')); betahat = example_data.betahat; % single-SNP effect size estimates se = example_data.se; % standard error of betahat @@ -16,32 +21,41 @@ Nthin = 9e1; % load the three types of ld matrix -genotype_data = matfile('genotype2.mat'); +genotype_data = matfile(cat(2,working_dir,'/','genotype2.mat')); % fit rss-bvsr model when R is the sample correlation in the panel cohort_R = genotype_data.cohort_R; tic; -[betasam, gammasam, hsam, logpisam, Naccept] = rss_bvsr(betahat, se, cohort_R, Nsnp, Ndraw, Nburn, Nthin); +[betasam, gammasam, hsam, logpisam, Naccept] = ... + rss_bvsr(betahat, se, cohort_R, Nsnp, Ndraw, Nburn, Nthin); runtime = toc; -save('example2_rss_c.mat', 'betasam', 'gammasam', 'hsam', 'logpisam', 'Naccept', 'runtime', '-v7.3'); +save(cat(2,working_dir,'/','example2_rss_c.mat'),... + 'betasam', 'gammasam', 'hsam', 'logpisam', 'Naccept',... + 'runtime', '-v7.3'); clearvars betasam gammasam hsam logpisam Naccept runtime; fprintf('RSS-C is done ... \n'); % fit rss-bvsr model when R is the sample correlation in the panel shrink_R = genotype_data.shrink_R; tic; -[betasam, gammasam, hsam, logpisam, Naccept] = rss_bvsr(betahat, se, shrink_R, Nsnp, Ndraw, Nburn, Nthin); +[betasam, gammasam, hsam, logpisam, Naccept] = ... + rss_bvsr(betahat, se, shrink_R, Nsnp, Ndraw, Nburn, Nthin); runtime = toc; -save('example2_rss_s.mat', 'betasam', 'gammasam', 'hsam', 'logpisam', 'Naccept', 'runtime', '-v7.3'); +save(cat(2,working_dir,'/','example2_rss_s.mat'),... + 'betasam', 'gammasam', 'hsam', 'logpisam', 'Naccept',... + 'runtime', '-v7.3'); clearvars betasam gammasam hsam logpisam Naccept runtime; fprintf('RSS is done ... \n'); % fit rss-bvsr model when R is the sample correlation in the panel panel_R = genotype_data.panel_R; tic; -[betasam, gammasam, hsam, logpisam, Naccept] = rss_bvsr(betahat, se, panel_R, Nsnp, Ndraw, Nburn, Nthin); +[betasam, gammasam, hsam, logpisam, Naccept] = ... + rss_bvsr(betahat, se, panel_R, Nsnp, Ndraw, Nburn, Nthin); runtime = toc; -save('example2_rss_p.mat', 'betasam', 'gammasam', 'hsam', 'logpisam', 'Naccept', 'runtime', '-v7.3'); +save(cat(2,working_dir,'/','example2_rss_p.mat'),... + 'betasam', 'gammasam', 'hsam', 'logpisam', ... + 'Naccept', 'runtime', '-v7.3'); clearvars betasam gammasam hsam logpisam Naccept runtime; fprintf('RSS-P is done ... \n'); From 559dd8808fa094c7a9b45605bf03644c72c8ce0e Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Wed, 24 Aug 2016 19:03:37 -0500 Subject: [PATCH 08/12] Working on updates to example3.m, and added example3_plots.m. --- examples/example2.m | 2 +- examples/example3.m | 52 +++++++++++++++++++++++++-------------- examples/example3_plots.m | 35 ++++++++++++++++++++++++++ examples/example4.m | 2 ++ 4 files changed, 72 insertions(+), 19 deletions(-) create mode 100644 examples/example3_plots.m diff --git a/examples/example2.m b/examples/example2.m index 736e66f..601fea1 100644 --- a/examples/example2.m +++ b/examples/example2.m @@ -2,7 +2,7 @@ % Set this to the directory containing example1.mat and where the output % files will be stored. -working_dir = '/tmp/pcarbo'; +working_dir = '.'; % add search paths addpath(genpath('../src')); diff --git a/examples/example3.m b/examples/example3.m index 4e46c4d..bb5a4b7 100644 --- a/examples/example3.m +++ b/examples/example3.m @@ -1,10 +1,15 @@ clear; +% Set this to the directory containing example1.mat and where the output +% files will be stored. +working_dir = '/tmp/pcarbo'; + % add search paths addpath(genpath('../src')); % load summary-level data -example_data = matfile('example1.mat'); +fprintf('Loading data.\n'); +example_data = matfile(cat(2,working_dir,'/','example1.mat')); R = example_data.R; % population LD matrix bwd = example_data.bwd; % bandwidth of R @@ -22,40 +27,49 @@ abs_diff = abs(se_1 - se_2); fprintf('Look at the five-number summary of the absolute difference between two definitions of SE: \n') -disp(prctile(log10(abs_diff), 0:25:100)); % require stat toolbox +disp(percentile(log10(abs_diff),0:0.25:1)); + +% fit rss-bvsr model with the first definition of se +fprintf('Start RSS-BVSR analysis on the first definition of SE... \n'); % mcmc info Ndraw = 2e6; Nburn = 2e5; Nthin = 9e1; -% fit rss-bvsr model with the first definition of se -fprintf('Start RSS-BVSR analysis on the first definition of SE... \n'); - tic; -% 1. simulate posterior samples via mcmc -[betasam, gammasam, hsam, logpisam, Naccept] = rss_bvsr(betahat, se_1, R, Nsnp, Ndraw, Nburn, Nthin); -% 2. compute the posterior samples of pve + +% 1. Simulate posterior samples via mcmc. +[betasam, gammasam, hsam, logpisam, Naccept] = ... + rss_bvsr(betahat, se_1, R, Nsnp, Ndraw, Nburn, Nthin); + +% 2. Compute the posterior samples of PVE. matrix_type = 1; M = length(hsam); pvesam = zeros(M,1); progress_bar = progress('init','start PVE calculation'); for i = 1:M - pvesam(i) = compute_pve(betasam(i,:), betahat, se_1, Nsnp, bwd, BR, matrix_type); - progress_bar = progress(progress_bar, i/M); + pvesam(i) = compute_pve(betasam(i,:), betahat, se_1, Nsnp, bwd, BR, matrix_type); + progress_bar = progress(progress_bar, i/M); end runtime = toc; -% 3. save output -save('example3_se1.mat', 'betasam', 'gammasam', 'hsam', 'logpisam', 'pvesam', 'Naccept', 'runtime', '-v7.3'); + +% 3. Save output. +save(cat(2,working_dir,'/','example3_se1.mat'),... + 'betasam', 'gammasam', 'hsam', 'logpisam', 'pvesam',... + 'Naccept', 'runtime', '-v7.3'); clearvars betasam gammasam hsam logpisam pvesam Naccept runtime; % fit rss-bvsr model with the second definition of se fprintf('Start RSS-BVSR analysis on the second definition of SE... \n'); tic; -% 1. simulate posterior samples via mcmc -[betasam, gammasam, hsam, logpisam, Naccept] = rss_bvsr(betahat, se_2, R, Nsnp, Ndraw, Nburn, Nthin); -% 2. compute the posterior samples of pve + +% 1. Simulate posterior samples via mcmc. +[betasam, gammasam, hsam, logpisam, Naccept] = ... + rss_bvsr(betahat, se_2, R, Nsnp, Ndraw, Nburn, Nthin); + +% 2. Compute the posterior samples of PVE. matrix_type = 1; M = length(hsam); pvesam = zeros(M,1); @@ -65,7 +79,9 @@ progress_bar = progress(progress_bar, i/M); end runtime = toc; -% 3. save output -save('example3_se2.mat', 'betasam', 'gammasam', 'hsam', 'logpisam', 'pvesam', 'Naccept', 'runtime', '-v7.3'); -clearvars betasam gammasam hsam logpisam pvesam Naccept runtime; +% 3. Save output. +save(cat(2,working_dir,'/','example3_se2.mat'),... + 'betasam', 'gammasam', 'hsam', 'logpisam', 'pvesam',... + 'Naccept', 'runtime', '-v7.3'); +clearvars betasam gammasam hsam logpisam pvesam Naccept runtime; diff --git a/examples/example3_plots.m b/examples/example3_plots.m new file mode 100644 index 0000000..9357b9f --- /dev/null +++ b/examples/example3_plots.m @@ -0,0 +1,35 @@ +% Note: Only works in MATLAB 2014a or later since "histogram" function +% was introduced in that version of MATLAB. +se1=matfile('example3_se1.mat'); +se2=matfile('example3_se2.mat'); + +figure(1); +subplot(2,2,1); +scatter(mean(se1.betasam)', mean(se2.betasam)','b','*'); +% Avoid using Statistics Toolbox: +hold on +plot([-1 2],[-1 2],'-','Color','red'); +hold off +% ozline = refline([1 0]); +% ozline.Color = 'r'; +xlabel('using se_1','Interpreter','none'); +ylabel('using se_2','Interpreter','none'); +title('Posterior mean of \beta'); + +subplot(2,2,2); +h1=histogram(se1.hsam,'Normalization','pdf'); +hold on; +h2= histogram(se2.hsam,'Normalization','pdf'); +title('Posterior density of h'); + +subplot(2,2,3); +h1=histogram(se1.logpisam,'Normalization','pdf'); +hold on; +h2= histogram(se2.logpisam,'Normalization','pdf'); +title('Posterior density of log(\pi)'); + +subplot(2,2,4); +h1=histogram(se1.pvesam,'Normalization','pdf'); +hold on; +h2=histogram(se2.pvesam,'Normalization','pdf'); +title('Posterior density of PVE'); \ No newline at end of file diff --git a/examples/example4.m b/examples/example4.m index 244f03c..1e98b06 100644 --- a/examples/example4.m +++ b/examples/example4.m @@ -33,6 +33,8 @@ bf = zeros(Nrep, 2); % absolute difference of estimated Bayes factors bf_reldiff = zeros(Nrep, 1); % relative difference of estimated Bayes factors +return + % generate the individual-level and summary-level data genotype = matfile('genotype.mat'); C = genotype.C; From 9cb328522c5513f9ea50b6d5b839ea215bc7bd96 Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Thu, 25 Aug 2016 14:19:05 -0500 Subject: [PATCH 09/12] I'm able to successfully run example4.m. --- examples/example3.m | 2 +- examples/example4.m | 36 ++++++++++++++++++++++-------------- examples/startup.m | 1 + 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/examples/example3.m b/examples/example3.m index bb5a4b7..fcf5e38 100644 --- a/examples/example3.m +++ b/examples/example3.m @@ -2,7 +2,7 @@ % Set this to the directory containing example1.mat and where the output % files will be stored. -working_dir = '/tmp/pcarbo'; +working_dir = '.'; % add search paths addpath(genpath('../src')); diff --git a/examples/example4.m b/examples/example4.m index 1e98b06..20812c7 100644 --- a/examples/example4.m +++ b/examples/example4.m @@ -1,9 +1,12 @@ clear; +% Set this to the directory containing example1.mat and where the output +% files will be stored. +working_dir = '/tmp/pcarbo/example4'; + % add search paths addpath(genpath('../src_vb')); addpath('../misc'); -addpath('/home/xiangzhu/varbvs-master/varbvs-MATLAB/'); % set the number of replications prompt = 'What is the number of replications? '; @@ -33,14 +36,12 @@ bf = zeros(Nrep, 2); % absolute difference of estimated Bayes factors bf_reldiff = zeros(Nrep, 1); % relative difference of estimated Bayes factors -return - % generate the individual-level and summary-level data -genotype = matfile('genotype.mat'); +genotype = matfile(cat(2,working_dir,'/','genotype.mat')); C = genotype.C; [n,p] = size(C); -AH = matfile('AH_chr16.mat'); +AH = matfile(cat(2,working_dir,'/','AH_chr16.mat')); H = AH.H; % hypothesis matrix 3323x3160 A = AH.A; % annotation matrix 12758x3323 paths = find(H(:,end)); % signal transduction (Biosystem, Reactome) @@ -52,7 +53,8 @@ myseed = 617 + i; % generate data under enrichment hypothesis - [true_para,individual_data,summary_data] = enrich_datamaker(C,thetatype,pve,myseed,snps); + [true_para,individual_data,summary_data] = ... + enrich_datamaker(C,thetatype,pve,myseed,snps); fprintf('Individual-level and summary-level data are ready ...\n'); % fix all hyper-parameters as their true values @@ -85,7 +87,8 @@ % run varbvs on the individual-level data options_n = struct('maxiter',1e8,'sa',sa,'logodds',theta0,'alpha',alpha0,... - 'mu',mu0,'sigma',sigma,'initialize_params',false,'verbose',false); + 'mu',mu0,'sigma',sigma,'initialize_params',false,... + 'verbose',false); fit_null = varbvs(X,[],y,[],'gaussian',options_n); @@ -93,7 +96,8 @@ logodds = repmat(theta0,p,ns); logodds(snps,:) = repmat(theta0+theta',length(snps),1); options_e = struct('maxiter',1e8,'sa',sa,'logodds',logodds,'alpha',alpha0,... - 'mu',mu0,'sigma',sigma,'initialize_params',false,'verbose',false); + 'mu',mu0,'sigma',sigma,'initialize_params',false,... + 'verbose',false); fit_gsea = varbvs(X,[],y,[],'gaussian',options_e); @@ -109,8 +113,10 @@ options = struct('alpha',alpha0,'mu',mu0,'verbose',false); - [logw_nr,alpha_nr,mu_nr,s_nr] = rss_varbvsr(betahat,se,SiRiS,sigb,logodds_n,options); - [logw_er,alpha_er,mu_er,s_er] = rss_varbvsr(betahat,se,SiRiS,sigb,logodds_e,options); + [logw_nr,alpha_nr,mu_nr,s_nr] = ... + rss_varbvsr(betahat,se,SiRiS,sigb,logodds_n,options); + [logw_er,alpha_er,mu_er,s_er] = ... + rss_varbvsr(betahat,se,SiRiS,sigb,logodds_e,options); bf_r = exp(logw_er-logw_nr); clear options betahat se SiRiS; @@ -135,9 +141,11 @@ bf_reldiff(i) = (bf_b-bf_r)/bf_b; fprintf('Trial %d is done ...\n', i); - end -% Save the output -output_name = strcat('example4-theta0',num2str(-theta0),'-theta',num2str(theta),'-',num2str(Nrep), 'trials.mat'); -save(output_name,'alpha_n_diff','alpha_e_diff','mu_n_diff','mu_e_diff','s_n_diff','s_e_diff','bf','bf_reldiff'); +% Save the output. +output_name = ... + strcat('example4-theta0',num2str(-theta0),'-theta',... + num2str(theta),'-',num2str(Nrep),'trials.mat'); +save(cat(2,working_dir,'/',output_name),'alpha_n_diff','alpha_e_diff',... + 'mu_n_diff','mu_e_diff','s_n_diff','s_e_diff','bf','bf_reldiff'); diff --git a/examples/startup.m b/examples/startup.m index ffb0d20..e3043e6 100644 --- a/examples/startup.m +++ b/examples/startup.m @@ -1,3 +1,4 @@ addpath('~/matlab/extras'); addpath('~/matlab/extras/lightspeed'); addpath('~/matlab/extras/lapack'); +addpath('~/git/varbvs/varbvs-MATLAB'); From 13e8a3413a484acd06ed02f850b69daa696f6f76 Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Fri, 26 Aug 2016 13:52:24 -0500 Subject: [PATCH 10/12] Added a script to generate plots for example4.m without the statistics toolbox. --- examples/example3_plots.m | 9 +++++---- examples/example4_plots.m | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 examples/example4_plots.m diff --git a/examples/example3_plots.m b/examples/example3_plots.m index 9357b9f..d9b0f1c 100644 --- a/examples/example3_plots.m +++ b/examples/example3_plots.m @@ -1,7 +1,8 @@ -% Note: Only works in MATLAB 2014a or later since "histogram" function -% was introduced in that version of MATLAB. -se1=matfile('example3_se1.mat'); -se2=matfile('example3_se2.mat'); +% Run this script after example3.m. Note: This script only works in MATLAB +% 2014a or later since "histogram" function was introduced in that version +% of MATLAB. +se1 = matfile('example3_se1.mat'); +se2 = matfile('example3_se2.mat'); figure(1); subplot(2,2,1); diff --git a/examples/example4_plots.m b/examples/example4_plots.m new file mode 100644 index 0000000..995410f --- /dev/null +++ b/examples/example4_plots.m @@ -0,0 +1,29 @@ +% Run this script after running example4.m. +load('example4-theta04-theta2-100trials.mat'); + +figure(1); +clf; +subplot(2,2,1); +bplot([alpha_n_diff mu_n_diff s_n_diff]); +set(gca,'YLim',[0 0.002]); +set(gca,'XTick',1:3,'XTickLabel',{'\alpha','\mu','s'},'FontSize',14); +ylabel('maximum absolute difference'); +title('Null models'); + +subplot(2,2,2); +bplot([alpha_e_diff mu_e_diff s_e_diff]) +set(gca,'YLim',[0 0.002]); +set(gca,'XTick',1:3,'XTickLabel',{'\alpha','\mu','s'},'FontSize',14); +ylabel('maximum absolute difference'); +title('Enrichment models'); + +subplot(2,2,3); +scatter(log10(bf(:, 1)), log10(bf(:, 2)),'b','*'); +hold on +plot([-5 30],[-5 30],'-','Color','red'); +hold off +% ozline = refline([1 0]); +% ozline.Color = 'r'; +xlabel('Log10 Bayes factors from VARBVS','Interpreter','none'); +ylabel('Log10 Bayes factors from RSS-VARBVSR','Interpreter','none'); +title(''); From 649451c2580a23f229f4f267a0c5072010bcab32 Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Fri, 26 Aug 2016 13:56:01 -0500 Subject: [PATCH 11/12] Adjusted example #4 plot. --- examples/example4_plots.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/example4_plots.m b/examples/example4_plots.m index 995410f..cb2a0b6 100644 --- a/examples/example4_plots.m +++ b/examples/example4_plots.m @@ -18,9 +18,9 @@ title('Enrichment models'); subplot(2,2,3); -scatter(log10(bf(:, 1)), log10(bf(:, 2)),'b','*'); +scatter(log10(bf(:, 1)), log10(bf(:, 2)),'b','o'); hold on -plot([-5 30],[-5 30],'-','Color','red'); +plot([-5 30],[-5 30],':','Color','red'); hold off % ozline = refline([1 0]); % ozline.Color = 'r'; From b97507335a7af6916ab7ea61917883b1bca7d8e9 Mon Sep 17 00:00:00 2001 From: Xiang Zhu Date: Thu, 15 Sep 2016 15:36:33 -0500 Subject: [PATCH 12/12] add the mdp variant of rss-bvsr --- .gitignore | 1 - src/rss_bvsr_mdp.m | 314 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 src/rss_bvsr_mdp.m diff --git a/.gitignore b/.gitignore index dae262b..e33660e 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,6 @@ src/lightspeed* src/lapack* src/progress.m src/compute_pve2.m -src/rss_bvsr_mdp.m src/rss_bvsr_cap.m src/propose_gamma_cap.m src/rss_bslmm_mwt.m diff --git a/src/rss_bvsr_mdp.m b/src/rss_bvsr_mdp.m new file mode 100644 index 0000000..ac2c883 --- /dev/null +++ b/src/rss_bvsr_mdp.m @@ -0,0 +1,314 @@ +function [betasam, gammasam, hsam, logpisam, msam, ngsam, Naccept] = rss_bvsr_mdp(betahat, se, R, Nsnp, Ndraw, Nburn, Nthin) +% USAGE: simulate posterior draws under RSS model with BVSR prior (allowing for multiplicative dispersion) +% INPUT: +% betahat: the marginal effects estimates, p by 1 +% se: the standard errors of betahat, p by 1 +% q: betahat ./ se.^2, p by 1 +% R: symmetric, positive definite matrix, p by p +% Nsnp: sample size of SNPs, p by 1 +% Ndraw: the total number of MCMC samples to draw, integer +% Nburn: the number of initial samples to discard ("burn-in"), integer +% Nthin: keep every Nthin-th simulation draw from the sequence ("thin"), integer +% OUTPUT: +% loglik: the log likelihood, Ndraw by 1 +% Naccept: the number of accepted moves in MH step, scalar +% hsam: the MCMC sample of h, Nsam by 1 +% gammasam: the MCMC sample of z, Nsam by p +% logpisam: the MCMC sample of log(pi), Nsam by 1 +% betasam: the MCMC sample of beta, Nsam by p +% msam: the MCMC sample of m, Nsam by 1 +% ngsam: the MCMC sample of |gamma|, Nsam by 1 + + % make sure the input summary-level data are column vectors + betahat = betahat(:); + se = se(:); + Nsnp = Nsnp(:); + p = length(betahat); + fprintf('total number of variants analyzed: %d \n', p); + if length(se) ~= p + error('length of betahat and se must be the same!') + end + + Naccept = 0; + Nstart = Nburn + Nthin; + Nsam = length(Nstart:Nthin:Ndraw); + tsam = 0; + + % pre-compute some quantities used in mcmc iterations + xxyy = 1 ./ ( Nsnp .* (se.^2) ); + q = betahat ./ (se.^2); + Si = 1 ./ se; + tic; + SiRiS = repmat(Si, 1, p) .* R .* repmat(Si', p, 1); + clear R; + L = chol(SiRiS, 'lower'); + Liq = L \ q; + clear L; + bAb = dot(Liq, Liq); + clear Liq; + mtime = toc; + fprintf('heavy matrix computation completed after %d hours \n', round(mtime/3600)); + + % preallocate memory for the output matrices + gammasam = zeros(Nsam, p); + hsam = zeros(Nsam, 1); + betasam = zeros(Nsam, p); + logpisam = zeros(Nsam, 1); + msam = zeros(Nsam, 1); + ngsam = zeros(Nsam, 1); + + % initiate model + abz = abs(betahat ./ se); + [gamma_start, ngamma_start, snp_rank] = initiate_model(p, abz); + gamma_old = gamma_start; % 0/1, 1 by p + rank_old = find(gamma_old); % integer, 1 by length(gamma_old == 1) + logpi_old = log(ngamma_start / p); + h_old = rand; + m_old = 1+rand; % m is close to 1 for most of autosomes + fprintf('model initiation done \n') + + % rank-based proposal distribution + p_gamma = pmf_ugmix(p, 2e3); + p_gamma = p_gamma(snp_rank); + + % viz the mcmc progress + progress_bar = progress('init','start MCMC iteration'); + + % calculate the posterior p(h gamma pi|betahat) for the FIRST iteration + psi_old = calc_beta_variance(xxyy, logpi_old, h_old); + [betapost_old, ~, logpost_old] = calc_posterior_bvsr_mdp(q, SiRiS, bAb, rank_old, psi_old, logpi_old, m_old); + beta_old = zeros(1, p); beta_old(rank_old) = betapost_old; + +for t = 1:Ndraw + + % MH sampler for [gamma, h, log(pi)] with small world proposal + if rand < 0.33 + extrastep = randperm(19); + repeat = 1 + extrastep(1); + else + repeat = 1; + end + + logMHratio = 0; + + % symmetric uniform random walk proposal for h + h_new = propose_h(h_old, repeat); + logMHratio = logMHratio + 0; + + % symmetric normal random walk proposal for m + m_new = m_old + 1e-2*randn; + logMHratio = logMHratio + 0; + + % 'rank based proposal' for gamma (Guan & Stephens 2011) + [rank_new, gamma_logratio] = propose_gamma(rank_old, p_gamma, repeat); + logMHratio = logMHratio + gamma_logratio; + + % symmetric uniform random walk proposal for log(pi) + [logpi_new, pi_logratio] = propose_logpi(logpi_old, repeat, p); + logMHratio = logMHratio + pi_logratio; + + % calculate the posterior p(h gamma pi|betahat) for the proposed value + psi_new = calc_beta_variance(xxyy, logpi_new, h_new); + [betapost_new, ~, logpost_new] = calc_posterior_bvsr_mdp(q, SiRiS, bAb, rank_new, psi_new, logpi_new, m_new); + beta_new = zeros(1, p); beta_new(rank_new) = betapost_new; + + logMHratio = logMHratio + logpost_new - logpost_old; % from the posterior + + % make a choice for MH step + if logMHratio>0 || log(rand) + + gamma_old = zeros(1,p); gamma_old(rank_old) = 1; + + % save the MCMC sample of [beta gamma h log(pi)] + if t > Nburn && mod(t-Nburn, Nthin) == 0 + tsam = tsam + 1; + gammasam(tsam, :) = gamma_old; + hsam(tsam) = h_old; + betasam(tsam, :) = beta_old; + logpisam(tsam) = logpi_old; + ngsam(tsam) = length(rank_old); + msam(tsam) = m_old; + end + + progress_bar = progress(progress_bar, t/Ndraw); + +end + +end + +function [gamma_start, ngamma_start, snp_rank] = initiate_model(p, abz) +% USAGE: initiate latent labels by marginal association absolute z-score +% INPUT: +% p: the number of snps analyzed +% abz: the absolute z-score obtained from single-marker model +% OUTPUT: +% gamma_start: 1 by p, the initial latent labels +% ngamma_start: integer, the initial number of snps included in the model +% snp_rank: integer, 1 by p, the rank of snps based their marginal p-values + + gamma_start = zeros(1, p); + q_genome = invnormcdf( 1 - (0.025/p) ); % matlab stat tool box: norminv + in_loci = (abz > q_genome); + ngamma_start = sum(in_loci); + [~, snp_rank] = sort(abz, 'descend'); + baseline = min(10, p-1); % avoid that initial pi == 1 (-Inf log likelihood) + if ngamma_start < baseline + ngamma_start = baseline; + gamma_start(snp_rank(1:baseline)) = 1; + else + gamma_start(in_loci) = 1; + end +end + +function p_gamma = pmf_ugmix(p, geomean) +% USAGE: find pmf of mixture of uniform (0.3) and truncated geometric (0.7) rvs +% INPUT: +% p: scalar, specifying the support for the mixture rv on 1, 2, ... p +% geomean: the mean parameter of the truncated geometrical rv, scalar +% OUTPUT: +% p_gamma: pmf vector, 1 by p + + gp = 1 - exp(-1/geomean); % the same as piMASS + unif_term = (0.3/p) * ones(1, p); + geom_seq = (1-gp) .^ (0:p-1); + geom_term = 0.7*gp/(1-(1-gp)^p) * geom_seq; + p_gamma = unif_term + geom_term; + p_gamma = p_gamma / sum(p_gamma); +end + +function psi = calc_beta_variance(xxyy, logpi, h) +% USAGE: calculate sigma_beta^2 in the same way as BSLMM +% INPUT: +% xxyy: scalar +% logpi: scalar +% h: scalar +% OUTPUT: psi: scalar, variance of beta, sigma_beta^2 + + xxyysum = sum(xxyy); + pival = exp(logpi); + psi = h / (pival * xxyysum); +end + +function h_new = propose_h(h_old, repeat) +% USAGE: symmetric uniform random walk proposal for h +% h_new = h_old + Unif(-0.1, 0.1) in the range [0,1] +% INPUT: +% h_old: the current value of h, scalar +% repeat: small-world proposal, scalar +% OUTPUT: +% h_new: the proposed value of h, scalar + + h = h_old; + for i=1:repeat + h = h + (rand-0.5) * 0.2; + while true + if h < 0 + h = 2*0 - h; + end + if h > 1 + h = 2*1 - h; + end + if (h >= 0 && h <= 1); break; end % until the proposed value is inside [0, 1] + end + end + h_new = h; +% NB: this is a symmetric jump, so its contribution to log MH ratio is ZERO +end + +function [logpi_new, pi_logratio] = propose_logpi(logpi_old, repeat, p) +% USAGE: symmetric uniform random walk proposal for log(pi) +% log(pi_new) = log(pi_old) + Unif(-0.05, 0.05) range log(1/p, 1) +% INPUT: +% logpi_old: the current value of log(pi), scalar +% repeat: small-world proposal, integer +% p: the total number of snps, integer +% OUTPUT: +% logpi_new: the proposed value of log(pi), scalar +% pi_logratio: the contribution of proposal for log(pi) to log MH acceptance ratio +% NB: replace 1 with 1-1e-8 to avoid drawing pi == 1, which leads to -Inf log likelihood + + pi_logratio = 0; + for i=1:repeat + logpi_new = logpi_old + (rand-0.5)*0.1; + while true + if logpi_new < log(1/p) + logpi_new = 2*log(1/p) - logpi_new; + end + if logpi_new > log(1-1e-8) + logpi_new = 2*log(1) - logpi_new; + end + if (logpi_new >= log(1/p) && logpi_new < log(1-1e-8)); break; end + end + pi_logratio = pi_logratio + (logpi_new-logpi_old); + logpi_old = logpi_new; + end +end + +function [betapost, loglik, logpost] = calc_posterior_bvsr_mdp(q, SiRiS, bAb, rank, psi, logpi, m) +% USAGE: calculate p(h gamma pi m|betahat) up to some constant +% INPUT: +% q: betahat ./ se^2, p by 1 +% SiRiS: S^{-1}RS^{-1}, p by p, sparse matrix +% bAb: betahat' * (SRS)^{-1} * betahat, scalar +% rank: the marginal rank of snps included in the current model, 1 by n_gamma +% logpi: the current value of log(pi), scalar with range log(1/p,1) +% psi: the current value of prior variance of beta, scalar +% m: the multiplicative dispersion parameter, scalar +% OUTPUT: +% logpost: the log posterior p(h gamma pi|betahat), scalar +% betapost: sample positive components of beta from its conditional posterior, 1 by n_gamma +% loglik: the log likelihood p(betahat|h gamma), scalar + + logpost = 0; + n_gamma = length(rank); % the number of snps in the current model, integer + p = length(q); % the total number of snps, integer + Psiz = psi * ones(n_gamma, 1); % the vector of current value of psi(gamma, h), n_gamma by 1 + + if n_gamma ~= 0 + + RSBSMz = q(rank); % n_gamma by 1 + OmegaInvz = SiRiS(rank, rank) / m; % n_gamma by n_gamma + + % add a diagonal matrix Psiz to OmegaInvz + OmegaInvz(1:(n_gamma+1):n_gamma^2) = OmegaInvz(1:(n_gamma+1):n_gamma^2) + 1 ./ Psiz'; + + L = chol(OmegaInvz, 'lower'); + U = L'; + T = solve_tril(L, RSBSMz) / m; + muz = solve_triu(U, T); + + % sample positive components of beta from Normal(muz, Omega) + betapost_err = solve_triu(U, randn(n_gamma,1)); + betapost = muz' + betapost_err'; % convert the column to row vector + + % calculate log posterior for MH + % p(h gamma pi m|betahat) = Const. * p(betahat|h gamma) * p(h) * p(gamma|pi) * p(pi) * p(m) + + % first compute likelihood p(betahat| h gamma) + logdet = - sum(log(diag(L))) - 0.5 * sum(log(Psiz)) - 0.5*p*log(m); + logquad = (dot(RSBSMz, muz) - bAb) / (2*m); + logpost = logpost + logdet + logquad; + + else % rare, extreme case: no snps included in the model + betapost = []; + logpost = 0; + end + + loglik = logpost; + + % add the part from p(gamma|pi) * p(pi) + logpost = logpost + (p-n_gamma) * log(1-exp(logpi)) + (n_gamma-1) * logpi; + + % add the part from p(m): normal(1, 0.1^2) + logpost = logpost - (m-1)^2 / (2*0.01); +end +