From 6c4c72da22755b8dcb336c18b96ed62d01993c2f Mon Sep 17 00:00:00 2001 From: YANG SHAOYI <34128364+ysyecust@users.noreply.github.com> Date: Sun, 28 Apr 2024 18:31:13 +0800 Subject: [PATCH] feat: update ligand group method (#125) * feat: update ligand group method * test: change unidock-tools dock energy range * test: unidock_tools dock test case energy_range * Update random seed --- unidock/src/cuda/kernel.h | 60 +++++++++++++++---- unidock/src/cuda/monte_carlo.cu | 35 +++++++++++ unidock/src/main/main.cpp | 53 ++++++++++------ .../tests/ut/dock/test_run_unidock.py | 8 +-- 4 files changed, 121 insertions(+), 35 deletions(-) diff --git a/unidock/src/cuda/kernel.h b/unidock/src/cuda/kernel.h index 2e15918..03a72f4 100644 --- a/unidock/src/cuda/kernel.h +++ b/unidock/src/cuda/kernel.h @@ -96,10 +96,10 @@ static constexpr size_t MAX_THREAD_ = 41700000 ; // modified for vina1.2, to cal static constexpr size_t MAX_LIGAND_NUM_ = 10250; }; struct SmallConfig { -static constexpr size_t MAX_NUM_OF_LIG_TORSION_ = 14; +static constexpr size_t MAX_NUM_OF_LIG_TORSION_ = 8; static constexpr size_t MAX_NUM_OF_FLEX_TORSION_ = 1; -static constexpr size_t MAX_NUM_OF_RIGID_ = 14; -static constexpr size_t MAX_NUM_OF_ATOMS_ = 80; +static constexpr size_t MAX_NUM_OF_RIGID_ = 12; +static constexpr size_t MAX_NUM_OF_ATOMS_ = 40; static constexpr size_t SIZE_OF_MOLEC_STRUC_ = ((3 + 4 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) * sizeof(float)); static constexpr size_t SIZE_OF_CHANGE_STRUC_ = @@ -132,10 +132,10 @@ static constexpr size_t MAX_THREAD_ = 41700000 ; // modified for vina1.2, to cal static constexpr size_t MAX_LIGAND_NUM_ = 10250; }; struct MediumConfig { -static constexpr size_t MAX_NUM_OF_LIG_TORSION_ = 18; +static constexpr size_t MAX_NUM_OF_LIG_TORSION_ = 16; static constexpr size_t MAX_NUM_OF_FLEX_TORSION_ = 1; static constexpr size_t MAX_NUM_OF_RIGID_ = 18; -static constexpr size_t MAX_NUM_OF_ATOMS_ = 100; +static constexpr size_t MAX_NUM_OF_ATOMS_ = 80; static constexpr size_t SIZE_OF_MOLEC_STRUC_ = ((3 + 4 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) * sizeof(float)); static constexpr size_t SIZE_OF_CHANGE_STRUC_ = @@ -143,7 +143,7 @@ static constexpr size_t SIZE_OF_CHANGE_STRUC_ = static constexpr size_t MAX_HESSIAN_MATRIX_D_SIZE_ = ((6 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_) * (6 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) / 2); -static constexpr size_t MAX_NUM_OF_LIG_PAIRS_ =330; +static constexpr size_t MAX_NUM_OF_LIG_PAIRS_ =600; static constexpr size_t MAX_NUM_OF_BFGS_STEPS_ =64; static constexpr size_t MAX_NUM_OF_RANDOM_MAP_= 1000 ;// not too large (stack overflow!) static constexpr size_t GRIDS_SIZE_ =37 ; // larger than vina1.1, max(XS_TYPE_SIZE, AD_TYPE_SIZE + 2) @@ -170,8 +170,8 @@ static constexpr size_t MAX_LIGAND_NUM_ = 10250; struct LargeConfig { static constexpr size_t MAX_NUM_OF_LIG_TORSION_ = 24; static constexpr size_t MAX_NUM_OF_FLEX_TORSION_ = 1; -static constexpr size_t MAX_NUM_OF_RIGID_ = 24; -static constexpr size_t MAX_NUM_OF_ATOMS_ = 100; +static constexpr size_t MAX_NUM_OF_RIGID_ = 36; +static constexpr size_t MAX_NUM_OF_ATOMS_ = 120; static constexpr size_t SIZE_OF_MOLEC_STRUC_ = ((3 + 4 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) * sizeof(float)); static constexpr size_t SIZE_OF_CHANGE_STRUC_ = @@ -179,7 +179,7 @@ static constexpr size_t SIZE_OF_CHANGE_STRUC_ = static constexpr size_t MAX_HESSIAN_MATRIX_D_SIZE_ = ((6 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_) * (6 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) / 2); -static constexpr size_t MAX_NUM_OF_LIG_PAIRS_ =512; +static constexpr size_t MAX_NUM_OF_LIG_PAIRS_ =1024; static constexpr size_t MAX_NUM_OF_BFGS_STEPS_ =64; static constexpr size_t MAX_NUM_OF_RANDOM_MAP_= 1000 ;// not too large (stack overflow!) static constexpr size_t GRIDS_SIZE_ =37 ; // larger than vina1.1, max(XS_TYPE_SIZE, AD_TYPE_SIZE + 2) @@ -204,10 +204,46 @@ static constexpr size_t MAX_THREAD_ = 41700000 ; // modified for vina1.2, to cal static constexpr size_t MAX_LIGAND_NUM_ = 10250; }; struct ExtraLargeConfig { +static constexpr size_t MAX_NUM_OF_LIG_TORSION_ = 36; +static constexpr size_t MAX_NUM_OF_FLEX_TORSION_ = 1; +static constexpr size_t MAX_NUM_OF_RIGID_ = 64; +static constexpr size_t MAX_NUM_OF_ATOMS_ = 160; +static constexpr size_t SIZE_OF_MOLEC_STRUC_ = +((3 + 4 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) * sizeof(float)); +static constexpr size_t SIZE_OF_CHANGE_STRUC_ = + ((3 + 3 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) * sizeof(float)); +static constexpr size_t MAX_HESSIAN_MATRIX_D_SIZE_ = + ((6 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_) + * (6 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) / 2); +static constexpr size_t MAX_NUM_OF_LIG_PAIRS_ =2048; +static constexpr size_t MAX_NUM_OF_BFGS_STEPS_ =64; +static constexpr size_t MAX_NUM_OF_RANDOM_MAP_= 1000 ;// not too large (stack overflow!) +static constexpr size_t GRIDS_SIZE_ =37 ; // larger than vina1.1, max(XS_TYPE_SIZE, AD_TYPE_SIZE + 2) + +static constexpr size_t MAX_NUM_OF_GRID_MI_ =128; // 55 +static constexpr size_t MAX_NUM_OF_GRID_MJ_= 128; // 55 +static constexpr size_t MAX_NUM_OF_GRID_MK_ =128 ; // 81 +static constexpr size_t MAX_NUM_OF_GRID_POINT_ =512000; + +//#define GRID_MI 65//55 +//#define GRID_MJ 71//55 +//#define GRID_MK 61//81 +static constexpr size_t MAX_PRECAL_NUM_ATOM_ =30; +static constexpr size_t MAX_P_DATA_M_DATA_SIZE_ =MAX_NUM_OF_ATOMS_*(MAX_NUM_OF_ATOMS_+1)/2; +// modified for vina1.2, should be larger, n*(n+1)/2, n=num_of_atom, select n=140 +//#define MAX_NUM_OF_GRID_ATOMS 150 +static constexpr size_t FAST_SIZE_ =2051 ;// modified for vina1.2 m_max_cutoff^2 * factor + 3, ad4=13424 +static constexpr size_t SMOOTH_SIZE_ =2051; +static constexpr size_t MAX_CONTAINER_SIZE_EVERY_WI_ =5; + +static constexpr size_t MAX_THREAD_ = 41700000 ; // modified for vina1.2, to calculate random map memory upper bound +static constexpr size_t MAX_LIGAND_NUM_ = 10250; +}; +struct MaxConfig { static constexpr size_t MAX_NUM_OF_LIG_TORSION_ = 48; static constexpr size_t MAX_NUM_OF_FLEX_TORSION_ = 1; -static constexpr size_t MAX_NUM_OF_RIGID_ = 48; -static constexpr size_t MAX_NUM_OF_ATOMS_ = 150; +static constexpr size_t MAX_NUM_OF_RIGID_ = 128; +static constexpr size_t MAX_NUM_OF_ATOMS_ = 300; static constexpr size_t SIZE_OF_MOLEC_STRUC_ = ((3 + 4 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) * sizeof(float)); static constexpr size_t SIZE_OF_CHANGE_STRUC_ = @@ -215,7 +251,7 @@ static constexpr size_t SIZE_OF_CHANGE_STRUC_ = static constexpr size_t MAX_HESSIAN_MATRIX_D_SIZE_ = ((6 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_) * (6 + MAX_NUM_OF_LIG_TORSION_ + MAX_NUM_OF_FLEX_TORSION_ + 1) / 2); -static constexpr size_t MAX_NUM_OF_LIG_PAIRS_ =1024; +static constexpr size_t MAX_NUM_OF_LIG_PAIRS_ =4096; static constexpr size_t MAX_NUM_OF_BFGS_STEPS_ =64; static constexpr size_t MAX_NUM_OF_RANDOM_MAP_= 1000 ;// not too large (stack overflow!) static constexpr size_t GRIDS_SIZE_ =37 ; // larger than vina1.1, max(XS_TYPE_SIZE, AD_TYPE_SIZE + 2) diff --git a/unidock/src/cuda/monte_carlo.cu b/unidock/src/cuda/monte_carlo.cu index 979880e..d98625f 100644 --- a/unidock/src/cuda/monte_carlo.cu +++ b/unidock/src/cuda/monte_carlo.cu @@ -1130,6 +1130,34 @@ std::vector monte_carlo_template::cuda_to_vina(ou } return results_vina; } +template<> +std::vector monte_carlo_template::cuda_to_vina(output_type_cuda_t_ results_ptr[], + int thread) const { + // DEBUG_PRINTF("entering cuda_to_vina\n"); + std::vector results_vina; + for (int i = 0; i < thread; ++i) { + output_type_cuda_t_ results = results_ptr[i]; + conf tmp_c; + tmp_c.ligands.resize(1); + // Position + for (int j = 0; j < 3; j++) tmp_c.ligands[0].rigid.position[j] = results.position[j]; + // Orientation + qt q(results.orientation[0], results.orientation[1], results.orientation[2], + results.orientation[3]); + tmp_c.ligands[0].rigid.orientation = q; + output_type tmp_vina(tmp_c, results.e); + // torsion + for (int j = 0; j < results.lig_torsion_size; j++) + tmp_vina.c.ligands[0].torsions.push_back(results.lig_torsion[j]); + // coords + for (int j = 0; j < MaxConfig::MAX_NUM_OF_ATOMS_; j++) { + vec v_tmp(results.coords[j][0], results.coords[j][1], results.coords[j][2]); + if (v_tmp[0] * v_tmp[1] * v_tmp[2] != 0) tmp_vina.coords.push_back(v_tmp); + } + results_vina.push_back(tmp_vina); + } + return results_vina; +} __host__ void monte_carlo::operator()( std::vector &m_gpu, std::vector &out_gpu, std::vector &p_gpu, triangular_matrix_cuda_t *m_data_list_gpu, @@ -2972,6 +3000,13 @@ __host__ void monte_carlo_template::do_docking(std::vector> &bias_batch_list) const { monte_carlo_template::do_docking_base(m_gpu, out_gpu,p_gpu, m_data_list_gpu,ig, corner1, corner2, generator, verbosity,seed, bias_batch_list); } +template <> +__host__ void monte_carlo_template::do_docking(std::vector &m_gpu, std::vector &out_gpu, + std::vector &p_gpu, triangular_matrix_cuda_t *m_data_list_gpu, + const igrid &ig, const vec &corner1, const vec &corner2, rng &generator, int verbosity, + unsigned long long seed, std::vector> &bias_batch_list) const { + monte_carlo_template::do_docking_base(m_gpu, out_gpu,p_gpu, m_data_list_gpu,ig, corner1, corner2, generator, verbosity,seed, bias_batch_list); + } /* Above based on monte-carlo.cpp */ // #endif diff --git a/unidock/src/main/main.cpp b/unidock/src/main/main.cpp index 3d1a9f5..a230188 100644 --- a/unidock/src/main/main.cpp +++ b/unidock/src/main/main.cpp @@ -90,6 +90,30 @@ float calculateScore(const Ligand &ligand) { bool compareLigands(const Ligand &a, const Ligand &b) { return a.score < b.score; } +void classifyLigands(const std::vector& ligands,std::vector &smallGroup, std::vector &mediumGroup,std::vector & largeGroup, std::vector &extraLargeGroup, std::vector &overflowGroup) { + const int atomThresholds[5] = {40, 80, 120, 160, 300}; + const int torsionThresholds[5] = {8, 16, 24, 36, 48}; + const int rigidThresholds[5] = {12, 24, 36, 64, 128}; + const int pairThresholds[5] = {300, 600, 1024, 2048, 4096}; + + for (const auto& lig : ligands) { + if (lig.num_atoms <= atomThresholds[0] && lig.num_torsions <= torsionThresholds[0] && + lig.num_rigids <= rigidThresholds[0] && lig.num_lig_pairs <= pairThresholds[0]) { + smallGroup.push_back(lig); + } else if (lig.num_atoms <= atomThresholds[1] && lig.num_torsions <= torsionThresholds[1] && + lig.num_rigids <= rigidThresholds[1] && lig.num_lig_pairs <= pairThresholds[1]) { + mediumGroup.push_back(lig); + } else if (lig.num_atoms <= atomThresholds[2] && lig.num_torsions <= torsionThresholds[2] && + lig.num_rigids <= rigidThresholds[2] && lig.num_lig_pairs <= pairThresholds[2]) { + largeGroup.push_back(lig); + } else if (lig.num_atoms <= atomThresholds[3] && lig.num_torsions <= torsionThresholds[3] && + lig.num_rigids <= rigidThresholds[3] && lig.num_lig_pairs <= pairThresholds[3]) { + extraLargeGroup.push_back(lig); + } else { + overflowGroup.push_back(lig); + } + } +} void printMaxValues(const std::vector& group) { int max_atoms = std::numeric_limits::min(); int max_torsions = std::numeric_limits::min(); @@ -986,19 +1010,6 @@ bug reporting, license agreements, and more information. \n"; max_num_torsions = std::max(max_num_torsions, num_torsions_vector.at(i)); max_num_rigids = std::max(max_num_rigids,num_rigids_vector.at(i)); max_num_lig_pairs = std::max(max_num_lig_pairs,num_lig_pairs_vector.at(i)); - - // printf("num_atoms%ld\n",num_atoms_vector.at(i)); - // printf("num_torsions:%ld\n",num_torsions_vector.at(i)); - // printf("num_rigids:%ld\n",num_rigids_vector.at(i)); - // printf("num_internal_pairs:%ld\n",num_lig_pairs_vector.at(i)); - - // all_ligands[i].second.about(); - - // printf("max_num_ligands%ld\n",max_num_ligands); - // printf("max_num_other_pairs%ld\n",max_num_other_pairs); - // printf("max_num_flex%ld\n",max_num_flex); - // printf("max_num_ligand_degrees_of_freedom%ld\n",max_num_ligand_degrees_of_freedom); - // printf("max_num_internal_pairs%ld\n",max_num_internal_pairs); } printf("max_num_atoms%ld\n",max_num_atoms); @@ -1016,12 +1027,13 @@ bug reporting, license agreements, and more information. \n"; lig.score = calculateScore(lig); ligands.push_back(lig); } - std::sort(ligands.begin(), ligands.end(), compareLigands); - int groupSize = ligands.size() / 4; - std::vector smallGroup(ligands.begin(), ligands.begin() + groupSize); - std::vector mediumGroup(ligands.begin() + groupSize, ligands.begin() + 2 * groupSize); - std::vector largeGroup(ligands.begin() + 2 * groupSize, ligands.begin() + 3 * groupSize); - std::vector extraLargeGroup(ligands.begin() + 3 * groupSize, ligands.end()); + + std::vector smallGroup; + std::vector mediumGroup; + std::vector largeGroup; + std::vector extraLargeGroup; + std::vector maxGroup; + classifyLigands(ligands,smallGroup,mediumGroup,largeGroup,extraLargeGroup,maxGroup); std::cout << "Small Group:" << std::endl; printMaxValues(smallGroup); @@ -1053,6 +1065,9 @@ bug reporting, license agreements, and more information. \n"; template_batch_docking(v,all_ligands,extraLargeGroup,"Extra Large",exhaustiveness, multi_bias,max_memory, receptor_atom_numbers, out_dir,bias_file, num_modes, min_rmsd,max_evals,max_step,seed, refine_step, local_only, energy_range); + template_batch_docking(v,all_ligands,maxGroup,"Max",exhaustiveness, multi_bias,max_memory, + receptor_atom_numbers, out_dir,bias_file, num_modes, + min_rmsd,max_evals,max_step,seed, refine_step, local_only, energy_range); } } } diff --git a/unidock_tools/tests/ut/dock/test_run_unidock.py b/unidock_tools/tests/ut/dock/test_run_unidock.py index d4639e2..da7f662 100644 --- a/unidock_tools/tests/ut/dock/test_run_unidock.py +++ b/unidock_tools/tests/ut/dock/test_run_unidock.py @@ -40,7 +40,7 @@ def test_run_unidock_vina(receptor, ligand, pocket): size_z=pocket[5], scoring="vina", num_modes=10, - energy_range=6.0, + energy_range=9.0, seed=181129, ) @@ -73,8 +73,8 @@ def test_run_unidock_ad4(receptor, ligand, pocket): size_z=pocket[5], scoring="ad4", num_modes=5, - energy_range=6.0, - seed=181129, + energy_range=12.0, + seed=42, ) result_ligand = result_ligands[0] @@ -83,4 +83,4 @@ def test_run_unidock_ad4(receptor, ligand, pocket): scores = scores_list[0] assert len(scores) == 5 - shutil.rmtree(workdir, ignore_errors=True) \ No newline at end of file + shutil.rmtree(workdir, ignore_errors=True)