From 06478cf2df8910fd98842a692bba2cbccc03dfde Mon Sep 17 00:00:00 2001 From: Sergey O Date: Thu, 16 Jun 2022 20:11:42 -0400 Subject: [PATCH] adding support for MMalign (for TMscore compute) --- colab_notebooks/quickdemo_wAF2.ipynb | 36 +++++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/colab_notebooks/quickdemo_wAF2.ipynb b/colab_notebooks/quickdemo_wAF2.ipynb index 99c07b3b..37e155c8 100644 --- a/colab_notebooks/quickdemo_wAF2.ipynb +++ b/colab_notebooks/quickdemo_wAF2.ipynb @@ -21,7 +21,7 @@ "\n", "Examples: \n", "1. pdb: `6MRR`, homomer: `False`, designed_chain: `A`\n", - "2. pdb: `1O91`, homomer: `True`, designed_chain: `A,B,C` \n", + "2. pdb: `3DJI`, homomer: `True`, designed_chain: `A,B,C` \n", " (for correct symmetric tying lenghts of homomer chains should be the same)" ] }, @@ -124,6 +124,7 @@ "\n", "#@markdown #### Input Options\n", "pdb='6MRR' #@param {type:\"string\"}\n", + "pdb = pdb.replace(\" \",\"\")\n", "pdb_path = get_pdb(pdb)\n", "#@markdown - pdb code (leave blank to get an upload prompt)\n", "\n", @@ -151,7 +152,7 @@ "##@markdown - Define which chain to redesign\n", "\n", "#@markdown #### Design Options\n", - "num_seqs = 1 #@param [\"1\", \"2\", \"4\", \"8\", \"16\", \"32\", \"64\"] {type:\"raw\"}\n", + "num_seqs = 8 #@param [\"1\", \"2\", \"4\", \"8\", \"16\", \"32\", \"64\"] {type:\"raw\"}\n", "num_seq_per_target = num_seqs\n", "\n", "#@markdown - Sampling temperature for amino acids, T=0.0 means taking argmax, T>>1.0 means sample randomly.\n", @@ -340,6 +341,7 @@ " TQDM_BAR_FORMAT = '{l_bar}{bar}| {n_fmt}/{total_fmt} [elapsed: {elapsed} remaining: {remaining}]'\n", "\n", " with io.capture_output() as captured:\n", + " # install ALPHAFOLD\n", " if not os.path.isdir(\"af_backprop\"):\n", " %shell git clone -b beta https://github.com/sokrypton/af_backprop.git\n", " %shell pip -q install biopython dm-haiku ml-collections py3Dmol\n", @@ -348,6 +350,22 @@ " %shell mkdir params\n", " %shell curl -fsSL https://storage.googleapis.com/alphafold/alphafold_params_2021-07-14.tar | tar x -C params\n", "\n", + " if not os.path.exists(\"MMalign\"):\n", + " # install MMalign\n", + " os.system(\"wget -qnc https://zhanggroup.org/MM-align/bin/module/MMalign.cpp\")\n", + " os.system(\"g++ -static -O3 -ffast-math -o MMalign MMalign.cpp\")\n", + "\n", + " def mmalign(pdb_a,pdb_b):\n", + " # pass to MMalign\n", + " output = os.popen(f'./MMalign {pdb_a} {pdb_b}')\n", + " # parse outputs\n", + " parse_float = lambda x: float(x.split(\"=\")[1].split()[0])\n", + " tms = []\n", + " for line in output:\n", + " line = line.rstrip()\n", + " if line.startswith(\"TM-score\"): tms.append(parse_float(line))\n", + " return tms\n", + "\n", " # configure which device to use\n", " try:\n", " # check if TPU is available\n", @@ -455,10 +473,7 @@ " b_factors = 100 * p.pop(\"plddt\")[:,None] * p[\"atom_mask\"]\n", " p = alphafold_protein.Protein(**p,b_factors=b_factors)\n", " pdb_lines = alphafold_protein.to_pdb(p)\n", - " with open(filename, 'w') as f: f.write(pdb_lines)\n", - "\n", - "num_models = 1 #@param [\"1\",\"2\",\"3\",\"4\",\"5\"] {type:\"raw\"}\n", - "num_recycles = 1 #@param [\"0\",\"1\",\"2\",\"3\"] {type:\"raw\"}\n" + " with open(filename, 'w') as f: f.write(pdb_lines)" ], "metadata": { "cellView": "form", @@ -471,6 +486,8 @@ "cell_type": "code", "source": [ "#@title Run AlphaFold\n", + "num_models = 1 #@param [\"1\",\"2\",\"3\",\"4\",\"5\"] {type:\"raw\"}\n", + "num_recycles = 1 #@param [\"0\",\"1\",\"2\",\"3\"] {type:\"raw\"}\n", "num_sequences = len(sequences)\n", "outs = []\n", "positions = []\n", @@ -479,7 +496,7 @@ "LS = []\n", "\n", "with tqdm.notebook.tqdm(total=(num_recycles + 1) * num_models * num_sequences, bar_format=TQDM_BAR_FORMAT) as pbar:\n", - " print(\"seq_num\", \"model_num\", \"pLDDT\")\n", + " print(f\"seq_num model_num avg_pLDDT avg_pAE TMscore\")\n", " for s,ori_sequence in enumerate(sequences):\n", " Ls = [len(s) for s in ori_sequence.replace(\":\",\"/\").split(\"/\")]\n", " LS.append(Ls)\n", @@ -520,7 +537,8 @@ " paes[-1].append(O[\"pae\"][:length,:length])\n", " outs[-1].append(O)\n", " save_pdb(outs[-1][-1], f\"out_seq_{s}_model_{n}.pdb\")\n", - " print(s, n, plddts[-1][-1].mean())" + " tmscores = mmalign(pdb_path, f\"out_seq_{s}_model_{n}.pdb\")\n", + " print(f\"{s} {n}\\t{plddts[-1][-1].mean():.3}\\t{paes[-1][-1].mean():.3}\\t{tmscores[-1]:.3}\")" ], "metadata": { "cellView": "form", @@ -534,7 +552,7 @@ "source": [ "#@title Display 3D structure {run: \"auto\"}\n", "#@markdown #### select which sequence to show (if more than one designed example)\n", - "seq_num = 0 #@param [\"0\",\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\"] {type:\"raw\"}\n", + "seq_num = 0 #@param [\"0\",\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\"] {type:\"raw\"}\n", "assert seq_num < len(outs), f\"ERROR: seq_num ({seq_num}) exceeds number of designed sequences ({num_sequences})\"\n", "model_num = 0 #@param [\"0\",\"1\",\"2\",\"3\",\"4\"] {type:\"raw\"}\n", "assert model_num < len(outs[0]), f\"ERROR: model_num ({num_models}) exceeds number of model params used ({num_models})\"\n",