Skip to content

Commit

Permalink
Merge branch 'devel'
Browse files Browse the repository at this point in the history
  • Loading branch information
cescgina committed Jul 24, 2019
2 parents 8eefde0 + a728549 commit 635c710
Show file tree
Hide file tree
Showing 75 changed files with 1,138 additions and 509 deletions.
2 changes: 1 addition & 1 deletion AdaptivePELE/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "1.6.1"
__version__ = "1.6.2"
name = "AdaptivePELE"
13 changes: 9 additions & 4 deletions AdaptivePELE/adaptiveSampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,7 @@ def main(jsonParams, clusteringHook=None):
topologies = utilities.Topology(outputPathConstants.topologies)
if restart and firstRun is not None:
topology_files = glob.glob(os.path.join(outputPathConstants.topologies, "topology*.pdb"))
topology_files.sort(key=utilities.getTrajNum)
topologies.setTopologies(topology_files)
if firstRun == 0:
createMappingForFirstEpoch(initialStructures, topologies, simulationRunner.getWorkingProcessors())
Expand All @@ -681,11 +682,11 @@ def main(jsonParams, clusteringHook=None):
processManager.writeEquilibrationStructures(outputPathConstants.tmpFolder, initialStructures)
if processManager.isMaster() and simulationRunner.parameters.constraints:
# write the new constraints for synchronization
utilities.writeNewConstraints(outputPathConstants.tmpFolder, "new_constraints.txt", simulationRunner.parameters.constraints)
utilities.writeNewConstraints(outputPathConstants.topologies, "new_constraints.txt", simulationRunner.parameters.constraints)
processManager.barrier()

if not processManager.isMaster() and simulationRunner.parameters.constraints:
simulationRunner.parameters.constraints = utilities.readConstraints(outputPathConstants.tmpFolder, "new_constraints.txt")
simulationRunner.parameters.constraints = utilities.readConstraints(outputPathConstants.topologies, "new_constraints.txt")
# read all the equilibration structures
initialStructures = processManager.readEquilibrationStructures(outputPathConstants.tmpFolder)
topologies.setTopologies(initialStructures, cleanFiles=processManager.isMaster())
Expand Down Expand Up @@ -714,7 +715,10 @@ def main(jsonParams, clusteringHook=None):

simulationRunner.writeMappingToDisk(outputPathConstants.epochOutputPathTempletized % i)
topologies.writeMappingToDisk(outputPathConstants.epochOutputPathTempletized % i, i)

if i == 0:
# write the object to file at the start of the first epoch, so
# the topologies can always be loaded
topologies.writeTopologyObject()
processManager.barrier()
if processManager.isMaster():
utilities.print_unbuffered("Production run...")
Expand All @@ -723,7 +727,8 @@ def main(jsonParams, clusteringHook=None):
processManager.barrier()

if processManager.isMaster():
simulationRunner.processTrajectories(outputPathConstants.epochOutputPathTempletized % i, topologies, i)
if simulationRunner.parameters.postprocessing:
simulationRunner.processTrajectories(outputPathConstants.epochOutputPathTempletized % i, topologies, i)
utilities.print_unbuffered("Clustering...")
startTime = time.time()
clusterEpochTrajs(clusteringMethod, i, outputPathConstants.epochOutputPathTempletized, topologies, outputPathConstants)
Expand Down
44 changes: 44 additions & 0 deletions AdaptivePELE/analysis/analysis_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import glob
import numpy as np
from AdaptivePELE.utilities import utilities


def extendReportWithRmsd(reportFile, rmsds):
"""
Extend a previous report file with corrected rmsd values
:param reportFile: Report file to be corrected
:type reportFile: np.ndarray
:param rmsds: Rmsd corrected values
:type rmsds: np.ndarray
:returns: np.ndarray -- Extended report file with corrected rmsd values
"""
newShape = reportFile.shape
fixedReport = np.zeros((newShape[0], newShape[1]+1))
fixedReport[:, :-1] = reportFile
fixedReport[:, -1] = rmsds
return fixedReport


def process_folder(epoch, folder, trajName, reportName, output_filename, top):
if epoch is None:
allTrajs = glob.glob(os.path.join(folder, trajName))
full_reportName = os.path.join(folder, reportName)
else:
allTrajs = glob.glob(os.path.join(folder, epoch, trajName))
full_reportName = os.path.join(folder, epoch, reportName)
epoch = int(epoch)

allFiles = []
for traj in allTrajs:
trajNum = utilities.getTrajNum(traj)
if top is not None:
top_file = top.getTopologyFile(epoch, trajNum)
else:
top_file = None
report_file = full_reportName % trajNum
allFiles.append((traj, report_file, top_file, epoch, output_filename % trajNum))
return allFiles
11 changes: 7 additions & 4 deletions AdaptivePELE/analysis/backtrackAdaptiveTrajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ def main(trajectory, snapshot, epoch, outputPath, out_filename, topology, use_pd
else:
outputPath = ""
if topology is not None:
topology_contents = utilities.getTopologyFile(topology)
topology = utilities.getTopologyObject(topology)
else:
topology_contents = None
topology = None
topology_contents = None
if os.path.exists(outputPath+out_filename):
# If the specified name exists, append a number to distinguish the files
name, ext = os.path.splitext(out_filename)
Expand All @@ -69,6 +70,8 @@ def main(trajectory, snapshot, epoch, outputPath, out_filename, topology, use_pd
else:
# avoid repeating the initial snapshot
initial = 1
if topology is not None:
topology_contents = topology.getTopology(int(epoch), trajectory)
if not isinstance(snapshots[0], basestring):
new_snapshots = []
for i in range(initial, snapshot+1):
Expand All @@ -95,5 +98,5 @@ def main(trajectory, snapshot, epoch, outputPath, out_filename, topology, use_pd
f.write("ENDMDL\n".join(itertools.chain.from_iterable(pathway)))

if __name__ == "__main__":
traj, num_snapshot, num_epoch, output_path, output_filename, top, use_pdb = parseArguments()
main(traj, num_snapshot, num_epoch, output_path, output_filename, top, use_pdb)
traj, num_snapshot, num_epoch, output_path, output_filename, top, force_use_pdb = parseArguments()
main(traj, num_snapshot, num_epoch, output_path, output_filename, top, force_use_pdb)
31 changes: 6 additions & 25 deletions AdaptivePELE/analysis/calculateSASAvalues.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import mdtraj as md
import multiprocessing as mp
from AdaptivePELE.utilities import utilities
from AdaptivePELE.analysis import correctRMSD
from AdaptivePELE.analysis import analysis_utils


def parseArguments():
Expand Down Expand Up @@ -71,7 +71,7 @@ def process_file(traj, top_file, resname, report, outputFilename, format_out, ne
header = ""
reportFile = utilities.loadtxtfile(f)

fixedReport = correctRMSD.extendReportWithRmsd(reportFile, sasa_values)
fixedReport = analysis_utils.extendReportWithRmsd(reportFile, sasa_values)
else:
indexes = np.array(range(sasa_values.shape[0]))
fixedReport = np.concatenate((indexes[:, None], sasa_values[:, None]), axis=1)
Expand All @@ -86,27 +86,6 @@ def process_file(traj, top_file, resname, report, outputFilename, format_out, ne
print("Took %.2fs to process" % (end-start), traj)


def process_folder(epoch, folder, trajName, reportName, output_filename, top):
if epoch is None:
allTrajs = glob.glob(os.path.join(folder, trajName))
full_reportName = os.path.join(folder, reportName)
else:
allTrajs = glob.glob(os.path.join(folder, epoch, trajName))
full_reportName = os.path.join(folder, epoch, reportName)
epoch = int(epoch)

allFiles = []
for traj in allTrajs:
trajNum = utilities.getTrajNum(traj)
if top is not None:
top_file = top.getTopologyFile(epoch, trajNum)
else:
top_file = None
report_file = full_reportName % trajNum
allFiles.append((traj, report_file, top_file, epoch, output_filename % trajNum))
return allFiles


def main(resname, folder, top, out_report_name, format_out, nProcessors, output_folder, new_report):
"""
Calculate the relative SASA values of the ligand
Expand Down Expand Up @@ -148,15 +127,17 @@ def main(resname, folder, top, out_report_name, format_out, nProcessors, output_
if not epochs:
# path does not contain an adaptive simulation, we'll try to retrieve
# trajectories from the specified path
files = process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj)
files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj)
for epoch in epochs:
print("Epoch", epoch)
files.extend(process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj))
files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj))
results = []
for info in files:
results.append(pool.apply_async(process_file, args=(info[0], info[2], resname, info[1], info[4], format_out, new_report, info[3])))
for res in results:
res.get()
pool.close()
pool.terminate()

if __name__ == "__main__":
lig_name, path, topology_path, out_name, fmt_str, n_proc, out_folder, new_reports = parseArguments()
Expand Down
151 changes: 96 additions & 55 deletions AdaptivePELE/analysis/correctRMSD.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,12 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import json
import glob
import argparse
import numpy as np
import multiprocessing as mp
from AdaptivePELE.utilities import utilities
import AdaptivePELE.atomset.atomset as atomset


def extendReportWithRmsd(reportFile, rmsds):
"""
Extend a previous report file with corrected rmsd values
:param reportFile: Report file to be corrected
:type reportFile: np.ndarray
:param rmsds: Rmsd corrected values
:type rmsds: np.ndarray
:returns: np.ndarray -- Extended report file with corrected rmsd values
"""
newShape = reportFile.shape
fixedReport = np.zeros((newShape[0], newShape[1]+1))
fixedReport[:, :-1] = reportFile
fixedReport[:, -1] = rmsds
return fixedReport
from AdaptivePELE.analysis import analysis_utils


def parseArguments():
Expand All @@ -39,14 +22,23 @@ def parseArguments():
"{"\
"\"resname\" : \"K5Y\","\
"\"native\" : \"native.pdb\","\
"\"symmetries\" : {[\"4122:C12:K5Y\":\"4123:C13:K5Y\", \"4120:C10:K5Y\":\"4127:C17:K5Y\"]},"\
"\"symmetries\" : [{\"4122:C12:K5Y\":\"4123:C13:K5Y\", \"4120:C10:K5Y\":\"4127:C17:K5Y\"}],"\
"\"column\" = 5"\
"}"
parser = argparse.ArgumentParser(description=desc)
parser.add_argument("controlFile", type=str, help="Control File name")
parser.add_argument("--report_name", type=str, default=None, help="Name of report files, for example if reports are named 'report_1' use report")
parser.add_argument("--trajectory_name", type=str, default=None, help="Name of trajectory files, for example if reports are named 'run_trajectories_1' use run_trajectories")
parser.add_argument("--path", type=str, default=".", help="Path where the simulation is stored")
parser.add_argument("--top", type=str, default=None, help="Topology file for non-pdb trajectories or path to Adaptive topology object")
parser.add_argument("--out_name", type=str, default="fixedReport", help="Name of the modified report files (default is fixedReport)")
parser.add_argument("--out_folder", type=str, default=None, help="Path where to store the report files (default is fixedReport)")
parser.add_argument("-n", type=int, default=1, help="Number of processors to parallelize")
parser.add_argument("--fmt_str", type=str, default="%.4f", help="Format of the output file (default is .4f which means all floats with 4 decimal points)")
parser.add_argument("--new_report", action="store_true", help="Whether to create new report files")
args = parser.parse_args()

return args.controlFile
return args.controlFile, args.report_name, args.trajectory_name, args.path, args.top, args.out_name, args.n, args.out_folder, args.fmt_str, args.new_report


def readControlFile(controlFile):
Expand All @@ -72,55 +64,104 @@ def readControlFile(controlFile):
return resname, nativeFilename, symmetries, rmsdColInReport


def main(controlFile):
def calculate_rmsd_traj(nativePDB, resname, symmetries, rmsdColInReport, traj, reportName, top, epoch, outputFilename, fmt_str, new_report):
top_proc = None
if top is not None:
top_proc = utilities.getTopologyFile(top)
rmsds = utilities.getRMSD(traj, nativePDB, resname, symmetries, topology=top_proc)

if new_report:
fixedReport = np.zeros((rmsds.size, 2))
fixedReport[:, 0] = range(rmsds.size)
fixedReport[:, 1] = rmsds
header = ""
else:
with open(reportName) as f:
header = f.readline().rstrip()
if not header.startswith("#"):
header = ""
reportFile = utilities.loadtxtfile(reportName)
if rmsdColInReport > 0 and rmsdColInReport < reportFile.shape[1]:
reportFile[:, rmsdColInReport] = rmsds
fixedReport = reportFile
else:
fixedReport = analysis_utils.extendReportWithRmsd(reportFile, rmsds)

with open(outputFilename, "w") as fw:
if header:
fw.write("%s\tRMSD\n" % header)
else:
fw.write("# Step\tRMSD\n")
np.savetxt(fw, fixedReport, fmt=fmt_str)


def main(controlFile, trajName, reportName, folder, top, outputFilename, nProcessors, output_folder, format_str, new_report):
"""
Calculate the corrected rmsd values of conformation taking into account
molecule symmetries
:param controlFile: Control file
:type controlFile: str
:param folder: Path the simulation
:type folder: str
:param top: Path to the topology
:type top: str
:param outputFilename: Name of the output file
:type outputFilename: str
:param nProcessors: Number of processors to use
:type nProcessors: int
:param output_folder: Path where to store the new reports
:type output_folder: str
:param format_str: String with the format of the report
:type format_str: str
:param new_report: Whether to write rmsd to a new report file
:type new_report: bool
"""
# Constants
folder = "."
outputFilename = "fixedReport_%d"
trajName = "*traj*"
reportName = "*report_%d"
# end constants
if trajName is None:
trajName = "*traj*"
else:
trajName += "_*"
if reportName is None:
reportName = "report_%d"
else:
reportName += "_%d"
if output_folder is not None:
outputFilename = os.path.join(output_folder, outputFilename)
outputFilename += "_%d"
if nProcessors is None:
nProcessors = utilities.getCpuCount()
nProcessors = max(1, nProcessors)
print("Calculating RMSDs with %d processors" % nProcessors)
pool = mp.Pool(nProcessors)
epochs = utilities.get_epoch_folders(folder)
if top is not None:
top_obj = utilities.getTopologyObject(top)
else:
top_obj = None

resname, nativeFilename, symmetries, rmsdColInReport = readControlFile(controlFile)

nativePDB = atomset.PDB()
nativePDB.initialise(nativeFilename, resname=resname)

allFolders = os.listdir(folder)
epochs = [epoch for epoch in allFolders if epoch.isdigit()]

files = []
if not epochs:
# path does not contain an adaptive simulation, we'll try to retrieve
# trajectories from the specified path
files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj)
for epoch in epochs:
print("Epoch", epoch)
os.chdir(epoch)
allTrajs = glob.glob(trajName)

for traj in allTrajs:
rmsds = utilities.getRMSD(traj, nativePDB, resname, symmetries)
trajNum = utilities.getTrajNum(traj)
try:
reportFilename = glob.glob(reportName % trajNum)[0]
except IndexError:
raise IndexError("File %s not found in folder %s" % (reportName % trajNum, epoch))

reportFile = np.loadtxt(reportFilename, ndmin=2)

if rmsdColInReport > 0 and rmsdColInReport < reportFile.shape[1]:
reportFile[:, rmsdColInReport] = rmsds
fixedReport = reportFile
else:
fixedReport = extendReportWithRmsd(reportFile, rmsds)

# print(fixedReport)
np.savetxt(outputFilename % trajNum, fixedReport, fmt=b'%.4f')
files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj))
results = []
for info in files:
results.append(pool.apply_async(calculate_rmsd_traj, args=(nativePDB, resname, symmetries, rmsdColInReport, info[0], info[1], info[2], info[3], info[4], format_str, new_report)))
for res in results:
res.get()
pool.close()
pool.terminate()

os.chdir("..")

if __name__ == "__main__":
control_file = parseArguments()
main(control_file)
control_file, name_report, name_traj, path, topology_path, out_name, n_proc, out_folder, fmt, new_rep = parseArguments()
main(control_file, name_traj, name_report, path, topology_path, out_name, n_proc, out_folder, fmt, new_rep)
Loading

0 comments on commit 635c710

Please sign in to comment.