From dd7753f5c0b8834feaaa1518df0c221f1fb7e108 Mon Sep 17 00:00:00 2001 From: Logan Ward Date: Tue, 21 Nov 2023 16:54:39 -0500 Subject: [PATCH] Add an example for multi-fidelity optimization (#127) * Fix loop termination condition * Exposed proxystore threshold as option * Look farther back for dryrun message * Re-use process pool executor in thinker Speed up processing of tasks, etc * Use the pool in tests * Flake8 fix * Special case: multi-fidelity with only one recipe * Add demo application --- examol/score/rdkit/__init__.py | 2 + examol/simulate/base.py | 9 +- examol/specify/__init__.py | 27 +- examol/steer/base.py | 10 +- examol/steer/baseline.py | 4 +- examol/steer/multifi.py | 17 +- examol/steer/single.py | 31 +- examol/store/db/base.py | 5 +- examol/store/db/memory.py | 25 +- examples/redoxmers-multifidelity/README.md | 32 ++ .../assess-results.ipynb | 470 ++++++++++++++++++ examples/redoxmers-multifidelity/spec.py | 89 ++++ tests/cli/test_cli.py | 2 +- tests/score/test_rdkit.py | 12 +- tests/steer/conftest.py | 6 + tests/steer/test_base.py | 5 +- tests/steer/test_brute.py | 3 +- tests/steer/test_multi.py | 5 +- tests/steer/test_single.py | 3 +- 19 files changed, 699 insertions(+), 58 deletions(-) create mode 100644 examples/redoxmers-multifidelity/README.md create mode 100644 examples/redoxmers-multifidelity/assess-results.ipynb create mode 100644 examples/redoxmers-multifidelity/spec.py diff --git a/examol/score/rdkit/__init__.py b/examol/score/rdkit/__init__.py index d953a2a..028eb0d 100644 --- a/examol/score/rdkit/__init__.py +++ b/examol/score/rdkit/__init__.py @@ -107,6 +107,8 @@ def score(self, model_msg: ModelType, inputs: InputType, lower_fidelities: np.nd if not isinstance(model_msg, list): # Single objective return model_msg.predict(inputs) + elif len(model_msg) == 1: + return np.squeeze(model_msg[0].predict(inputs)) else: # Get the known deltas then append a NaN to the end (we don't know the last delta) if lower_fidelities is None: diff --git a/examol/simulate/base.py b/examol/simulate/base.py index 16cc881..c95ec9d 100644 --- a/examol/simulate/base.py +++ b/examol/simulate/base.py @@ -157,11 +157,12 @@ def _make_run_directory(self, run_type: str, mol_key: str, xyz: str, charge: int # Write a calculation summary to the run path with open(run_path / 'summary.json', 'w') as fp: + # Convert to strings because json.dump does not work with Proxy objects json.dump({ - 'xyz': xyz, - 'config_name': config_name, - 'charge': charge, - 'solvent': solvent + 'xyz': str(xyz), + 'config_name': str(config_name), + 'charge': str(charge), + 'solvent': str(solvent) }, fp, indent=2) return run_path diff --git a/examol/specify/__init__.py b/examol/specify/__init__.py index 753cfa0..1e02fa7 100644 --- a/examol/specify/__init__.py +++ b/examol/specify/__init__.py @@ -1,11 +1,13 @@ """Tool for defining then deploying an ExaMol application""" import contextlib +import os +from concurrent.futures import ProcessPoolExecutor from dataclasses import dataclass, field from typing import Sequence from pathlib import Path import logging -from colmena.queue import PipeQueues +from colmena.queue import PipeQueues, ColmenaQueues from colmena.task_server import ParslTaskServer from colmena.task_server.base import BaseTaskServer from parsl import Config @@ -36,17 +38,17 @@ class ExaMolSpecification: """ # Define the problem - database: Path | str | MoleculeStore = ... + database: Path | str | MoleculeStore """Path to the data as a line-delimited JSON file or an already-activated store""" - recipes: Sequence[PropertyRecipe] = ... + recipes: Sequence[PropertyRecipe] """Definition for how to compute the target properties""" - search_space: list[Path | str] = ... + search_space: list[Path | str] """Path to the molecules over which to search. Should be a list of ".smi" files""" - simulator: BaseSimulator = ... + simulator: BaseSimulator """Tool used to perform quantum chemistry computations""" # Define the solution - solution: SolutionSpecification = ... + solution: SolutionSpecification """Define how to solve the design challenge""" # Define how we create the thinker @@ -54,6 +56,8 @@ class ExaMolSpecification: """Policy used to schedule computations""" thinker_options: dict[str, object] = field(default_factory=dict) """Options passed forward to initializing the thinker""" + thinker_workers: int = min(4, os.cpu_count()) + """Number of workers to use in the steering process""" # Define how we communicate to the user reporters: list[BaseReporter] = field(default_factory=list) @@ -66,7 +70,11 @@ class ExaMolSpecification: """Proxy store(s) used to communicate large objects between Thinker and workers. Can be either a single store used for all task types, or a mapping between a task topic (inference, simulation, train) and the store used for that task type. - All messages larger than 10kB will be proxied using the store.""" + All messages larger than :attr:`proxystore_threshold` will be proxied using the store.""" + proxystore_threshold: float | int = 10000 + """Messages larger than this size will be sent via Proxystore rather than through the workflow engine. Units: bytes""" + colmena_queue: type[ColmenaQueues] = PipeQueues + """Class used to send messages between Thinker and Task Server.""" run_dir: Path | str = ... """Path in which to write output files""" @@ -95,7 +103,7 @@ def assemble(self) -> tuple[BaseTaskServer, MoleculeThinker, MoleculeStore]: logger.info(f'Using {store} for {name} tasks') else: raise NotImplementedError() - queues = PipeQueues(topics=['inference', 'simulation', 'train'], proxystore_threshold=10000, proxystore_name=proxy_name) + queues = self.colmena_queue(topics=['inference', 'simulation', 'train'], proxystore_threshold=self.proxystore_threshold, proxystore_name=proxy_name) # Make the functions associated with steering learning_functions = self.solution.generate_functions() @@ -119,7 +127,7 @@ def assemble(self) -> tuple[BaseTaskServer, MoleculeThinker, MoleculeStore]: # Create the thinker store = self.load_database() - with store: + with store, ProcessPoolExecutor(self.thinker_workers) as pool: thinker = self.thinker( queues=queues, run_dir=self.run_dir, @@ -127,6 +135,7 @@ def assemble(self) -> tuple[BaseTaskServer, MoleculeThinker, MoleculeStore]: search_space=self.search_space, solution=self.solution, database=store, + pool=pool, **self.thinker_options ) yield doer, thinker, store diff --git a/examol/steer/base.py b/examol/steer/base.py index fc4ee6b..1afc23c 100644 --- a/examol/steer/base.py +++ b/examol/steer/base.py @@ -6,6 +6,7 @@ from dataclasses import asdict from threading import Condition from collections import defaultdict +from concurrent.futures import ProcessPoolExecutor from typing import Iterator, Sequence, Iterable import numpy as np @@ -32,6 +33,7 @@ class MoleculeThinker(BaseThinker): solution: Description of how to solve the problem database: List of molecule records search_space: Lists of molecules to be evaluated as a list of ".smi" or ".json" files + num_workers: Number of workers to use locally for the thinker """ database: MoleculeStore @@ -49,7 +51,8 @@ def __init__(self, recipes: Sequence[PropertyRecipe], solution: SolutionSpecification, search_space: list[Path | str], - database: MoleculeStore): + database: MoleculeStore, + pool: ProcessPoolExecutor): super().__init__(queues, resource_counter=rec) self.database = database self.run_dir = run_dir @@ -76,6 +79,9 @@ def __init__(self, self.task_iterator = self.task_iterator() # Tool for pulling from the task queue self.recipe_types = dict((r.name, r) for r in recipes) + # Attributes related to performing compute on the thinker + self.pool: ProcessPoolExecutor = pool + def iterate_over_search_space(self, only_smiles: bool = False) -> Iterator[MoleculeRecord | str]: """Function to produce a stream of molecules from the input files @@ -105,7 +111,7 @@ def iterate_over_search_space(self, only_smiles: bool = False) -> Iterator[Molec try: yield MoleculeRecord.from_identifier(line.strip()) except ValidationError: - self.logger.warning(f'Parsing failed for molecule: {line}') + self.logger.warning(f'Parsing failed for molecule: {line.strip()}') else: raise ValueError(f'File type is unrecognized for {path}') diff --git a/examol/steer/baseline.py b/examol/steer/baseline.py index 4889b7d..3e45a28 100644 --- a/examol/steer/baseline.py +++ b/examol/steer/baseline.py @@ -1,4 +1,5 @@ """Baseline methods for steering a molecular design campaign""" +from concurrent.futures import ProcessPoolExecutor from pathlib import Path from typing import Sequence @@ -35,9 +36,10 @@ def __init__(self, solution: SolutionSpecification, search_space: list[Path | str], database: MoleculeStore, + pool: ProcessPoolExecutor, num_workers: int = 1, overselection: float = 0): - super().__init__(queues, ResourceCounter(num_workers), run_dir, recipes, solution, search_space, database) + super().__init__(queues, ResourceCounter(num_workers), run_dir, recipes, solution, search_space, database, pool) self.overselection = overselection @agent(startup=True) diff --git a/examol/steer/multifi.py b/examol/steer/multifi.py index 91930bc..465ec79 100644 --- a/examol/steer/multifi.py +++ b/examol/steer/multifi.py @@ -1,9 +1,9 @@ """Scheduling strategies for multi-fidelity design campaigns""" import math from pathlib import Path -from multiprocessing import Pool from functools import cached_property from typing import Sequence, Iterable +from concurrent.futures import ProcessPoolExecutor import numpy as np from colmena.queue import ColmenaQueues @@ -37,9 +37,10 @@ def __init__(self, database: MoleculeStore, solution: MultiFidelityActiveLearning, search_space: list[Path | str], + pool: ProcessPoolExecutor, num_workers: int = 2, inference_chunk_size: int = 10000): - super().__init__(queues, run_dir, recipes, solution, search_space, database, num_workers, inference_chunk_size) + super().__init__(queues, run_dir, recipes, solution, search_space, database, pool, num_workers, inference_chunk_size) self.inference_chunk_size = inference_chunk_size # Initialize the list of relevant database records @@ -142,11 +143,10 @@ def get_relevant_database_records(self) -> set[str]: # Evaluate against molecules from the search spaces in batches self.logger.info(f'Searching for {len(all_keys)} molecules from the database in our search space') - with Pool(4) as pool: - for search_key in pool.imap_unordered(get_inchi_key_from_molecule_string, self.iterate_over_search_space(only_smiles=True), chunksize=10000): - if search_key in all_keys: - matched.add(search_key) - all_keys.remove(search_key) + for search_key in self.pool.map(get_inchi_key_from_molecule_string, self.iterate_over_search_space(only_smiles=True), chunksize=10000): + if search_key in all_keys: + matched.add(search_key) + all_keys.remove(search_key) return matched @@ -206,8 +206,7 @@ def submit_inference(self) -> tuple[list[list[str]], np.ndarray, list[np.ndarray def _filter_inference_results(self, chunk_id: int, chunk_smiles: list[str], inference_results: np.ndarray) -> tuple[list[str], np.ndarray]: if chunk_id < len(self.search_space_smiles): # Remove molecules from the chunk which are in the database - # TODO (wardlt): Parallelize this - mask = [get_inchi_key_from_molecule_string(s) not in self.already_in_db for s in chunk_smiles] + mask = [s not in self.already_in_db for s in self.pool.map(get_inchi_key_from_molecule_string, chunk_smiles, chunksize=1000)] return [s for m, s in zip(mask, chunk_smiles) if m], inference_results[:, mask, :] else: return chunk_smiles, inference_results diff --git a/examol/steer/single.py b/examol/steer/single.py index c45861b..70a499a 100644 --- a/examol/steer/single.py +++ b/examol/steer/single.py @@ -1,16 +1,15 @@ """Single-objective and single-fidelity implementation of active learning. As easy as we get""" -import os import gzip import json import pickle as pkl import shutil -from concurrent.futures import ProcessPoolExecutor from functools import partial from pathlib import Path from queue import Queue from threading import Event from time import perf_counter from typing import Sequence +from concurrent.futures import ProcessPoolExecutor import numpy as np from colmena.proxy import get_store @@ -94,9 +93,10 @@ def __init__(self, solution: SingleFidelityActiveLearning, search_space: list[Path | str], database: MoleculeStore, + pool: ProcessPoolExecutor, num_workers: int = 2, inference_chunk_size: int = 10000): - super().__init__(queues, ResourceCounter(num_workers), run_dir, recipes, solution, search_space, database) + super().__init__(queues, ResourceCounter(num_workers), run_dir, recipes, solution, search_space, database, pool) self.search_space_dir = self.run_dir / 'search-space' self.scorer = solution.scorer self._cache_search_space(inference_chunk_size, search_space) @@ -151,22 +151,21 @@ def _cache_search_space(self, inference_chunk_size: int, search_space: list[str # Get the paths to inputs and keys, either by rebuilding or reading from disk search_space_keys = {} if rebuild: - # Build search space and save to disk - # Process the inputs and store them to disk search_size = 0 input_func = partial(_generate_inputs, scorer=self.scorer) - with ProcessPoolExecutor(min(4, os.cpu_count())) as pool: - mol_iter = pool.map(input_func, self.iterate_over_search_space(), chunksize=1000) - mol_iter_no_failures = filter(lambda x: x is not None, mol_iter) - for chunk_id, chunk in enumerate(batched(mol_iter_no_failures, inference_chunk_size)): - keys, objects = zip(*chunk) - search_size += len(keys) - chunk_path = self.search_space_dir / f'chunk-{chunk_id}.pkl.gz' - with gzip.open(chunk_path, 'wb') as fp: - pkl.dump(objects, fp) - - search_space_keys[chunk_path.name] = keys + + # Run asynchronously + mol_iter = self.pool.map(input_func, self.iterate_over_search_space(), chunksize=1000) + mol_iter_no_failures = filter(lambda x: x is not None, mol_iter) + for chunk_id, chunk in enumerate(batched(mol_iter_no_failures, inference_chunk_size)): + keys, objects = zip(*chunk) + search_size += len(keys) + chunk_path = self.search_space_dir / f'chunk-{chunk_id}.pkl.gz' + with gzip.open(chunk_path, 'wb') as fp: + pkl.dump(objects, fp) + + search_space_keys[chunk_path.name] = keys self.logger.info(f'Saved {search_size} search entries into {len(search_space_keys)} batches') # Save the keys and the configuration diff --git a/examol/store/db/base.py b/examol/store/db/base.py index ad61469..750d916 100644 --- a/examol/store/db/base.py +++ b/examol/store/db/base.py @@ -1,5 +1,6 @@ """Base classes for storage utilities""" import gzip +import logging from abc import ABC from pathlib import Path from typing import Iterable @@ -8,6 +9,8 @@ from examol.store.models import MoleculeRecord from examol.utils.chemistry import get_inchi_key_from_molecule_string +logger = logging.getLogger(__name__) + class MoleculeStore(AbstractContextManager, ABC): """Base class defining how to interface with a dataset of molecule records. @@ -77,7 +80,7 @@ def export_records(self, path: Path): Args: path: Path in which to save all data. Use a ".json.gz" """ - + logger.info(f'Started writing to {path}') with (gzip.open(path, 'wt') if path.name.endswith('.gz') else open(path, 'w')) as fp: for record in self.iterate_over_records(): print(record.json(), file=fp) diff --git a/examol/store/db/memory.py b/examol/store/db/memory.py index f4a409b..b79dd18 100644 --- a/examol/store/db/memory.py +++ b/examol/store/db/memory.py @@ -3,7 +3,7 @@ import logging from concurrent.futures import ThreadPoolExecutor, Future from pathlib import Path -from time import monotonic +from time import monotonic, sleep from threading import Event from typing import Iterable @@ -16,7 +16,8 @@ class InMemoryStore(MoleculeStore): """Store all molecule records in memory, write to disk as a single file - The class will start checkpointing as soon as any record is updated. + The class will start checkpointing as soon as any record is updated + but no more frequently than :attr:`write_freq` Args: path: Path from which to read data. Must be a JSON file, can be compressed with GZIP. @@ -42,6 +43,14 @@ def __enter__(self): if self.path is not None: logger.info('Start the writing thread') self._write_thread = self._thread_pool.submit(self._writer) + + # Add a callback to print a logging message if there is an error + def _write_if_error(future: Future): + if (exc := future.exception()) is not None: + logger.warning(f'Write thread failed: {exc}') + logger.info('Write thread has exited') + + self._write_thread.add_done_callback(_write_if_error) return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -67,7 +76,7 @@ def _load_molecules(self): logger.info(f'Loaded {len(self.db)} molecule records') def iterate_over_records(self) -> Iterable[MoleculeRecord]: - yield from list(self.db.values()) # Use `list` to copy the current state of the db and avoid errors due to concurrent writes + yield from list(self.db.values()) def __getitem__(self, item): return self.db[item] @@ -81,10 +90,14 @@ def __contains__(self, item: str | MoleculeRecord): def _writer(self): next_write = 0 - while not (self._closing.is_set() or self._updates_available.is_set()): # Loop until closing and no updates are available + while self._updates_available.is_set() or not self._closing.is_set(): # Wait until updates are available and the standoff is not met, or if we're closing - while (monotonic() < next_write or not self._updates_available.is_set()) and not self._closing.is_set(): - self._updates_available.wait(timeout=1) + while monotonic() < next_write or not self._closing.is_set(): + if self._updates_available.wait(timeout=1): # Check for termination condition once per second + to_sleep = next_write - monotonic() + if to_sleep > 0: + sleep(to_sleep) + break # Mark that we've caught up with whatever signaled this thread self._updates_available.clear() diff --git a/examples/redoxmers-multifidelity/README.md b/examples/redoxmers-multifidelity/README.md new file mode 100644 index 0000000..b2543e0 --- /dev/null +++ b/examples/redoxmers-multifidelity/README.md @@ -0,0 +1,32 @@ +# Multifidelity Active Learning + +An example that gradually runs computations in increasing over of complexity, +revisiting whether the molecule is promising enough at each step. +The goal is to find a molecule with a large oxidation potential. + +- _Simulation_ tasks are all using PM7 from MOPAC. The fidelity steps start from vertical ionization energies in vacuum, + then vertical energies in solution, then finish with adiabatic in solution. +- _Machine Learning_ models are Gaussian Process Regression using the feature set from + [Doan et al.](Doan et al. ). + The models predict the oxidation potential at the lowest fidelity and then the differences + between each subsequent steps. We use known values for each level in place of the machine + learning when available. +- _Active learning_ is based on expected improvement (EI). The next calculation to start after one finishes + determined by first picking a level of fidelity randomly and then finding the calculation with the highest + EI that is ready to run that step. + + + +## Running the example + +Navigate to this directory and then call + +``` +examol run spec.py:spec +``` + +It will output to `run` and eventually produce a file, `report.md`, that contains a summary of run. + +> Note: You will need to install Redis to run this example or change `num_workers` in `spec.py` to 1 then +> `colemna_queues` from `RedisQueues` to `colmena.queues.python.PipeQueues`. We use Redis to cope with the larger +> number of inference tasks produced by this application. diff --git a/examples/redoxmers-multifidelity/assess-results.ipynb b/examples/redoxmers-multifidelity/assess-results.ipynb new file mode 100644 index 0000000..d4bb312 --- /dev/null +++ b/examples/redoxmers-multifidelity/assess-results.ipynb @@ -0,0 +1,470 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "25741c16-74e4-4dc4-898f-d4e80bbaf129", + "metadata": {}, + "source": [ + "## Evaluate the Outcomes of Simulatinos\n", + "Make sure that the simulations ran as expected" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "37de0781-5159-4371-9a3f-4e9c7ad4de97", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "from matplotlib import pyplot as plt\n", + "import pandas as pd\n", + "import numpy as np\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "id": "4b344ba3-bd4b-49cc-a24c-bf208582da43", + "metadata": {}, + "source": [ + "## Load Simulation Results\n", + "Get the results from simulation tasks" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6f13017f-f171-4659-9ae3-d611e9db0427", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def load_tasks(run_path: str) -> pd.DataFrame:\n", + " task_info = []\n", + " with open(f'{run_path}/simulation-results.json') as fp:\n", + " for line in fp:\n", + " try:\n", + " record = json.loads(line)\n", + " except json.JSONDecodeError: \n", + " continue\n", + " for f in ['task_id', 'failure_info', 'worker_info', 'serialization_method',\n", + " 'additional_timing']:\n", + " record.pop(f)\n", + " record['recipes'] = [f'{x[\"name\"]}/{x[\"level\"]}' for x in record['task_info']['recipes']][0]\n", + " record['recipe'] = record['recipes']\n", + " record['property'], record['level'] = record['recipe'].split(\"/\")\n", + " record['molecule'] = record['task_info']['key']\n", + " task_info.append(record)\n", + " task_info = pd.DataFrame(task_info)\n", + " print(f'Loaded {len(task_info)} task records')\n", + " \n", + " # Group simulation tasks by recipe and molecule\n", + " grouped_tasks = task_info.groupby(['molecule', 'recipe', 'property', 'level']).agg({'time_running': 'sum', 'time_result_received': 'min'}).sort_values('time_result_received')\n", + " grouped_tasks['walltime'] = grouped_tasks['time_result_received'] - grouped_tasks['time_result_received'].min()\n", + " grouped_tasks['compute_time'] = grouped_tasks['time_running'].cumsum()\n", + " grouped_tasks.reset_index(inplace=True)\n", + " print(f'Consolidated to {len(grouped_tasks)} computations')\n", + " \n", + " # Store the results\n", + " db = {}\n", + " with open(f'{run_path}/database.json') as fp:\n", + " for line in fp:\n", + " record = json.loads(line)\n", + " db[record['key']] = record\n", + " grouped_tasks['result'] = grouped_tasks.apply(lambda x: db.get(x['molecule'], {}).get('properties', {}).get(x['property'], {}).get(x['level']), axis=1)\n", + " grouped_tasks['smiles'] = grouped_tasks.apply(lambda x: db.get(x['molecule'], {}).get('identifier', {}).get('smiles'), axis=1)\n", + " return grouped_tasks" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8dc12130-1c34-449f-9765-58479e12bc21", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded 1484 task records\n", + "Consolidated to 763 computations\n" + ] + } + ], + "source": [ + "multfi_tasks = load_tasks('run')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0507b028-b8a3-4bec-9b4b-df8a8f6556a5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded 747 task records\n", + "Consolidated to 200 computations\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
moleculerecipepropertyleveltime_runningtime_result_receivedwalltimecompute_timeresultsmiles
0QFBFWZJEZIPSJL-UHFFFAOYSA-Noxidation_potential/mopac_pm7-acn-adiabaticoxidation_potentialmopac_pm7-acn-adiabatic60.6921141.700582e+090.00000060.6921146.803651CCOC(C(C)OC)C(Br)(OC)c1ccc(CCOC)cc1
1SQTARUJWLRWTLF-UHFFFAOYSA-Noxidation_potential/mopac_pm7-acn-adiabaticoxidation_potentialmopac_pm7-acn-adiabatic107.3199201.700582e+0917.349539168.012034NaNCOCCc1ccc(C(CCOC)(OC(C)C)C(COC)OCOC)cc1
2QPOBPKSMCMYIIZ-UHFFFAOYSA-Noxidation_potential/mopac_pm7-acn-adiabaticoxidation_potentialmopac_pm7-acn-adiabatic69.7188921.700582e+0926.168842237.730927NaNCCOC(Oc1ccccc1)C(COC)(OC)c1ccc(COC)cc1
3VHOYIGKPMQZVGX-UHFFFAOYSA-Noxidation_potential/mopac_pm7-acn-adiabaticoxidation_potentialmopac_pm7-acn-adiabatic48.5760731.700582e+0934.116643286.3069995.634255CCOC(Oc1ccccc1)C(Br)(c1ccc(C(C)C)cc1)N(C)C
4PQCDVSSBIJWVQW-UHFFFAOYSA-Noxidation_potential/mopac_pm7-acn-adiabaticoxidation_potentialmopac_pm7-acn-adiabatic24.4583411.700582e+0965.090125310.7653406.369714CCOC(OC)(c1ccc(CCOC)cc1)C(CC)Oc1ccccc1
\n", + "
" + ], + "text/plain": [ + " molecule recipe \\\n", + "0 QFBFWZJEZIPSJL-UHFFFAOYSA-N oxidation_potential/mopac_pm7-acn-adiabatic \n", + "1 SQTARUJWLRWTLF-UHFFFAOYSA-N oxidation_potential/mopac_pm7-acn-adiabatic \n", + "2 QPOBPKSMCMYIIZ-UHFFFAOYSA-N oxidation_potential/mopac_pm7-acn-adiabatic \n", + "3 VHOYIGKPMQZVGX-UHFFFAOYSA-N oxidation_potential/mopac_pm7-acn-adiabatic \n", + "4 PQCDVSSBIJWVQW-UHFFFAOYSA-N oxidation_potential/mopac_pm7-acn-adiabatic \n", + "\n", + " property level time_running \\\n", + "0 oxidation_potential mopac_pm7-acn-adiabatic 60.692114 \n", + "1 oxidation_potential mopac_pm7-acn-adiabatic 107.319920 \n", + "2 oxidation_potential mopac_pm7-acn-adiabatic 69.718892 \n", + "3 oxidation_potential mopac_pm7-acn-adiabatic 48.576073 \n", + "4 oxidation_potential mopac_pm7-acn-adiabatic 24.458341 \n", + "\n", + " time_result_received walltime compute_time result \\\n", + "0 1.700582e+09 0.000000 60.692114 6.803651 \n", + "1 1.700582e+09 17.349539 168.012034 NaN \n", + "2 1.700582e+09 26.168842 237.730927 NaN \n", + "3 1.700582e+09 34.116643 286.306999 5.634255 \n", + "4 1.700582e+09 65.090125 310.765340 6.369714 \n", + "\n", + " smiles \n", + "0 CCOC(C(C)OC)C(Br)(OC)c1ccc(CCOC)cc1 \n", + "1 COCCc1ccc(C(CCOC)(OC(C)C)C(COC)OCOC)cc1 \n", + "2 CCOC(Oc1ccccc1)C(COC)(OC)c1ccc(COC)cc1 \n", + "3 CCOC(Oc1ccccc1)C(Br)(c1ccc(C(C)C)cc1)N(C)C \n", + "4 CCOC(OC)(c1ccc(CCOC)cc1)C(CC)Oc1ccccc1 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "singfi_tasks = load_tasks('run-single')\n", + "singfi_tasks.head()" + ] + }, + { + "cell_type": "markdown", + "id": "0ecbcc07-1bf4-4e61-8efd-00262985e120", + "metadata": {}, + "source": [ + "## Plot Task Mix by Time\n", + "See how many tasks of each time we have run by each timestamp" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a84a9a4b-7df3-4852-97d1-0bff96b4eb65", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "all_recipes = set(multfi_tasks['recipe'])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f754d9e5-20ae-4f33-bdf9-5dacb06ab8bb", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'Walltime (hr)')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUcAAADZCAYAAACzUvLsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA8j0lEQVR4nO3deVzUdf7A8ddw3yjIIYoCiZqKJ2ZQiqRhlkZrm5pWlrbpz2xj1XU9KrVcMWs9Ko8sV3Y9s8wz88gED9SQxPtIBTWFEEVuZoD5/P6Y5SsjA3IMDMfn+XjMg5nP9zPf72dGePs9Pt/3WyWEEEiSJEl6zEw9AEmSpLpIBkdJkiQDZHCUJEkyQAZHSZIkA2RwlCRJMkAGR0mSJANkcJQkSTJABkdJkiQDLEw9gKrQarXcunULR0dHVCqVqYcjSVI9IoQgKysLLy8vzMzK3j+sl8Hx1q1beHt7m3oYkiTVYzdu3KBly5ZlLq+XwdHR0RHQfTgnJycTj0aSpPokMzMTb29vJY6UpV4Gx+JDaScnJxkcJUmqkoedkpMXZCRJkgyQwVGSJMkAGRwlSaq3fk/PZdeZZP7IzDf6uuvlOceKEEJQWFhIUVGRqYci1SPm5uZYWFjIKWL1QGGRlic/3g/A8le680yn5kZdf4MMjhqNhuTkZHJzc009FKkesrOzo3nz5lhZWZl6KFI51h67rjx3srE0+vobXHDUarUkJiZibm6Ol5cXVlZWci9AqhAhBBqNhtu3b5OYmIi/v3+5k4Ql0zp8OU15HtymmdHX3+CCo0ajQavV4u3tjZ2dnamHI9Uztra2WFpacu3aNTQaDTY2NqYeklSGOzkaAAZ1Nu7hdLEG+9+i/B9fqir5u1M/xF9LB+BxP9caWb/8LZAkqd7JzC9Qngf6NK2RbTS4w2pJkhqGjLwCCou0Bpd9uuei8rydR/m3AVaVDI6NVN++fenatSuLFi0yyvpef/117t27x5YtW4yyPqlx6/vJfpLuVGy2SU1dcJXBsYGLjo4mNDSU9PR0mjRporR///33WFoaf/qDJFVXZn5BhQPjvCEBNTYOGRwbsIKCgjKXubi41OJIJOm+JfsvcztLbXDZqd/v8ev1e8rrE+8/TVN708w3bRQXZIQQ5GoKTfIQQlRojF9++SUtWrRAq9U/x/L8888zatQoALZv306PHj2wsbHBz8+P2bNnU1hYqPRVqVQsX76c8PBw7O3tefPNNwkNDQWgadOmqFQqXn/9dUB3WB0REaG8V61WM2XKFLy9vbG2tsbf35+VK1cCUFRUxJgxY/D19cXW1pZ27dqxePHiqv5zSI3Y9pO3+GT3RaJikww+SgbG14JamywwQjX3HCMjI5k+fTrvvvuucu5KCMHs2bNZsWIF6enp9OrViyVLltCxY0flfWq1msmTJ7N+/Xry8vLo168fS5cuLTfxZHXkFRTR4YPdNbLuhzn34QDsrB7+Nb/00kv89a9/Zf/+/fTr1w+A9PR0du/ezfbt29m9ezevvPIKn332Gb179+bKlSu89dZbAMycOVNZz8yZM4mMjGThwoWYm5sTHh7Oiy++yMWLF3FycsLW1tbg9l977TWOHDnCZ599RpcuXUhMTCQtTTfJVqvV0rJlSzZu3EizZs2IjY3lrbfeonnz5gwdOrS6X5HUiCyNvqI8nxDaxmAfdWERrwX54O1i2nnKVQ6OcXFxrFixgs6dO+u1z58/nwULFhAVFUXbtm2ZM2cOTz/9NBcvXlSSS0ZERLB9+3Y2bNiAq6srkyZNYtCgQcTHx2Nubl69T1RPubi48Mwzz7Bu3TolOH777be4uLjQr18/QkNDmTp1qrIX6efnx0cffcSUKVP0guOIESMYPXq08joxMREAd3d3vXOOJV26dImNGzeyd+9e+vfvr6y/mKWlJbNnz1Ze+/r6Ehsby8aNG2VwlPQIIRi3Jp5sdaHB5eeTMwEI8nNl8oB2tTm0SqtScMzOzmbkyJF89dVXzJkzR2kXQrBo0SJmzJjBkCFDAPjPf/6Dh4cH69atY+zYsWRkZLBy5UpWr16t/CGuWbMGb29vfvrpJwYMGGCEj6XP1tKccx8af70V3XZFjRw5krfeeoulS5dibW3N2rVrGT58OObm5sTHxxMXF8c///lPpX9RURH5+fnk5uYqdwMFBgZWeowJCQmYm5sTEhJSZp/ly5fz9ddfc+3aNfLy8tBoNHTt2rXS25IatoO/pbH77B8P7fd/fR+phdFUT5WC49tvv81zzz1H//799YJjYmIiKSkphIWFKW3W1taEhIQQGxvL2LFjiY+Pp6CgQK+Pl5cXnTp1IjY21mBwVKvVqNX3T+BmZmZWarwqlapCh7amNnjwYLRaLT/88AM9e/bk4MGDLFiwANAd2s6ePVv5T6ekkre42dvbV3q7ZR1qF9u4cSN/+9vf+Ne//kVQUBCOjo588sknHDt2rNLbkkwrJSOffv+KxsOpZm6LvJqWozxfPLyrwT5eTWzp6VP3LwhWOmJs2LCBX3/9lbi4uFLLUlJSAPDw8NBr9/Dw4Nq1a0ofKysrmjZtWqpP8fsfFBkZqXdY11DZ2toyZMgQ1q5dy+XLl2nbti09evQAoHv37ly8eJE2bQyfpylLcWaZ8lK3BQQEoNVqiYmJUfbmSzp48CDBwcGMHz9eabty5UqpflLdJoTg8ch9gH4QqwmvPN6K8K4tanQbNa1SwfHGjRu8++677Nmzp9wb8h+clCmEeOhEzfL6TJs2jYkTJyqviwvkNEQjR45k8ODBnD17lldeeUVp/+CDDxg0aBDe3t689NJLmJmZcerUKU6fPq239/6g1q1bo1Kp2LFjB88++yy2trY4ODjo9fHx8WHUqFGMHj1auSBz7do1UlNTGTp0KG3atOG///0vu3fvxtfXl9WrVxMXF4evr2+NfQ+S8UXFJinPO7VwYubgjmV3rgYrczM6tXCukXXXpkoFx/j4eFJTU5W9GdDtkRw4cIAvvviCixd1t/SkpKTQvPn9TBmpqanK3qSnpycajYb09HS9vcfU1FSCg4MNbtfa2hpra+vKDLXeeuqpp3BxceHixYuMGDFCaR8wYAA7duzgww8/ZP78+VhaWtK+fXvefPPNctfXokULZs+ezdSpU3njjTd47bXXiIqKKtVv2bJlTJ8+nfHjx3Pnzh1atWrF9OnTARg3bhwJCQkMGzYMlUrFyy+/zPjx4/nxxx+N+tmlmjV7+znl+Y53eptwJPWEqITMzExx+vRpvUdgYKB45ZVXxOnTp4VWqxWenp7i448/Vt6jVquFs7OzWL58uRBCiHv37glLS0vxzTffKH1u3bolzMzMxK5duyo0joyMDAGIjIyMUsvy8vLEuXPnRF5eXmU+miQpGurvUOt/7BCt/7FDTNqYYOqhmFR58aOkSu05Ojo60qlTJ702e3t7XF1dlfaIiAjmzp2Lv78//v7+zJ07Fzs7O2UvyNnZmTFjxjBp0iRcXV1xcXFh8uTJBAQEGDzfJUlSxWXlF3Djbl6p9oISCRzGhfiVWi6VZvRLuFOmTCEvL4/x48crk8D37NmjV0B74cKFWFhYMHToUGUSeFRUVKOd4yhJxqAuLCJg1p6H9nvEzeGhfSRQCVHB+9vqkMzMTJydncnIyMDJyUlvWX5+PomJifj6+soszlKV1Nffoa0JN3l3QwIA7o6Gz9EP6d6SqQPb1+Ko6p7y4kdJdX/ynyRJiuIAaGFWemZHofb+fs4vM+QpquqSwVGS6pHiPcOSgfBBEf39a2k0DZsMjpJUByVn5JGr0Z+4X/IE2JRn2vFi99KJWhysLbC3ln/WxiC/RUmqY7p+uId7uWXn4gQY/YQvNpW4b1+qPBkcJakOEULoBUYnG4sHlkM7T0cZGGuBDI5SrSqrbENVJSUl4evry4kTJxpElqCScxQvzRmIlUWjyEddJ8lvXqoxD2YbBwgODiY5ORln5/p/721NOHj5tvJcBkbTkt++ZHTl1a6xsrLC09OzxirG1XdRh5MAcJQXVUxOBsc6ZteuXTz55JM0adIEV1dXBg0apJce7Pfff2f48OG4uLhgb29PYGCgkldx1qxZdO3aldWrV+Pj44OzszPDhw8nKyvL4LaKyx8sX75cr/3XX39FpVJx9epVADIyMnjrrbdwd3fHycmJp556ipMnTyr9i7f773//Gz8/P6ytrRk1ahQxMTEsXrwYlUqFSqUiKSmJ6OhoVCoV9+7dU95/+PBhQkJCsLOzo2nTpgwYMID09PQKfR/11Yfbz/Hs4oMM+lz/8VtqNgC92zYz8QilxhEchQBNjmkelbwBKScnh4kTJxIXF8e+ffswMzPjT3/6E1qtluzsbEJCQrh16xbbtm3j5MmTTJkyRa8o15UrV9iyZQs7duxgx44dxMTEMG/ePIPbMjMzY/jw4axdu1avfd26dQQFBeHn54cQgueee46UlBR27txJfHw83bt3p1+/fty9e1d5z+XLl9m4cSObNm0iISGBzz77jKCgIP7yl7+QnJxMcnKywTRzCQkJ9OvXj44dO3LkyBEOHTrE4MGDlfyT5X0f9dm/DydyLjmTMzf1H8XeLqO+ilR7Gse+e0EuzPUyzban3wKrimfnfvHFF/Ver1y5End3d86dO0dsbCy3b98mLi5OKa36YPJbrVZLVFSUci/7q6++yr59+/TKK5Q0cuRIFixYwLVr12jdujVarZYNGzYo6cr279/P6dOnSU1NVdLGffrpp2zZsoXvvvtOKfKl0WhYvXo1bm5uyrqtrKyws7PD09OzzM87f/58AgMDWbp0qdJWshhbed/Hg0lQ6otrd+4nmv385W44PnBFurmzLe08HR98m1TLGkdwrEeuXLnC+++/z9GjR0lLS1P2kK5fv05CQgLdunUrt+a0j4+PXpKP5s2bk5qaCsDatWsZO3assuzHH3+kd+/etG/fnvXr1zN16lRiYmKUJLegy+GZnZ2Nq6ur3nby8vL0Dm9bt26tFxgrKiEhgZdeeqnM5eV9H3U9OF6/k0ufT/ZjZa5/gKYpkSFncBcT/actPVTjCI6Wdro9OFNtuxIGDx6Mt7c3X331FV5eXmi1Wjp16oRGo3lorRfQVQosSaVSKQHl+eefp1evXsqyFi10aexHjhzJunXrmDp1KuvWrWPAgAE0a6Y756XVamnevDnR0dGltlVyKk5VatfAw+vXlPd91HXzdp0H9INhSV1ayiv2dVnjCI4qVaUObU3lzp07nD9/ni+//JLevXWZmg8dOqQs79y5M19//TV3794td++xLI6Ojnp7lcVGjBjBe++9R3x8PN999x3Lli1TlnXv3p2UlBQsLCzw8fGp1PasrKzKrV0Dus+0b98+gzWCHvZ91HU/ndftsZup4NA/ntJbZm1hhqtD48huX181jgsy9UTTpk1xdXVlxYoVXL58mZ9//lmvds7LL7+Mp6cnL7zwAocPH+bq1ats2rSJI0eOVGu7vr6+BAcHM2bMGAoLCwkPD1eW9e/fn6CgIF544QV2795NUlISsbGxvPfeexw/frzc9fr4+HDs2DGSkpL0DolLmjZtGnFxcYwfP55Tp05x4cIFli1bRlpa2kO/j7pOU6j7vKOCffBqYqv3kIGx7pPBsQ4xMzNjw4YNxMfH06lTJ/72t7/xySefKMutrKzYs2cP7u7uPPvsswQEBDBv3jyjJAkeOXIkJ0+eZMiQIXqHuiqVip07d9KnTx9Gjx5N27ZtGT58OElJSaWqTD5o8uTJmJub06FDB9zc3Lh+/XqpPm3btmXPnj2cPHmSxx57jKCgILZu3YqFhcVDv4+6Ys6Oc3T/aC/BkfuUh8/UH5TlI3u1MuHopKqSyW4l6QGV+R0qLNLSZkb5hcaS5j1nzOFJ1SST3UpSLfgl6f5cz/+OfowmdvcviJmpVLT1kFNy6isZHCWpAoq0gtdX/YL2gQOtw5fvKM/7tK38VCap7pLBUZIq4Kfzf3Dwt7Qyl3dr1aT2BiPVikpdkFm2bBmdO3fGyckJJycngoKC9Aq7CyGYNWsWXl5e2Nra0rdvX86ePau3DrVazTvvvEOzZs2wt7fn+eef5/fffzfOp5GkGvLz/6blACwe3lXvEfVGT74bF2zC0Uk1oVLBsWXLlsybN4/jx49z/PhxnnrqKcLDw5UAOH/+fBYsWMAXX3xBXFwcnp6ePP3003qJDyIiIti8eTMbNmzg0KFDZGdnM2jQoIfOh5MkU7p2V3fLn5ONBeFdW+g9+rZzx9xAwSupnhPV1LRpU/H1118LrVYrPD09xbx585Rl+fn5wtnZWSxfvlwIIcS9e/eEpaWl2LBhg9Ln5s2bwszMTOzatavC28zIyBCAyMjIKLUsLy9PnDt3TuTl5VXjU0mNmaHfodb/2CFa/2OHmLrplAlHJhlDefGjpCqfcywqKuLbb78lJyeHoKAgEhMTSUlJISwsTOljbW1NSEgIsbGxjB07lvj4eAoKCvT6eHl50alTJ2JjYxkwYIDBbanVatRqtfI6MzPTYD9JMobUrHzSstV8tfk099SgLrx/VNOpRdlTP6SGpdLB8fTp0wQFBZGfn4+DgwObN2+mQ4cOxMbGApSaGOzh4cG1a9cASElJwcrKiqZNm5bqk5KSUuY2IyMjDd5eJknGpiksIj1HQ36BlmNX73AzS/90j6GKf1LDVOng2K5dOxISErh37x6bNm1SkpoWezDDsxDioVmfH9Zn2rRpereNZWZmGswNKEnVlZVfqDyfFNaWQnTzFgWCx/1cZWGrRqTStw9aWVnRpk0bAgMDiYyMpEuXLixevFjJ2ffgHmBqaqqyN+np6YlGo1GyPBvqY4i1tbVyhbz4IZnWgxm9o6KiKl0wy8fHh0WLFhl9bNVZd36B7n5ocxU8G+DF0J7eDO3pzbCerWjtWveTl0jGU+17q4UQqNVqfH198fT0ZO/evcoyjUZDTEwMwcG6aQ49evTA0tJSr09ycjJnzpxR+kj107Bhw7h06VKtb7esoBwXF6ck4q2MOzm6c9u2VnIPsbGr1GH19OnTGThwIN7e3mRlZbFhwwaio6PZtWsXKpWKiIgI5s6di7+/P/7+/sydOxc7OztGjBgBgLOzM2PGjGHSpEm4urri4uLC5MmTCQgIoH///jXyAaXaYWtrW6F8k7WlIol3k9JyyMwvQIXulI7g/t0vFuYyJ0tjV6nfgD/++INXX32Vdu3a0a9fP44dO8auXbt4+umnAZgyZQoRERGMHz+ewMBAbt68yZ49e/RyCC5cuJAXXniBoUOH8sQTT2BnZ8f27duNklmmIajNAlvF1qxZQ2BgII6Ojnh6ejJixAgle3ixnTt30rZtW2xtbQkNDSUpKUlv+YN7cFeuXCE8PBwPDw8cHBzo2bMnP/30U6ltZ2VlMWLECBwcHPDy8uLzzz/XW75gwQICAgKwt7fH29ub8ePHk52tK0IVHR3NG2+8QUZGhlLEa9asWUDpw+p79+7x1ltv4eHhgY2NDR06dmL7ju2ALiiWDIwADrL6n1Qb84qMrbLzHLVarcjR5JjkodVqK/XZvvvuO7Fp0yZx6dIlceLECTF48GAREBAgioqKRFZWlvDz8xO9e/cWBw8eFL/99pv45ptvRGxsrBBCiJkzZwoHBwcxZMgQcfr0aXHgwAHh6ekppk+fXu42V65cKXbu3CmuXLkijhw5Ih5//HExcOBAZfn169eFtbW1ePfdd8WFCxfEmjVrhIeHhwBEenq6EEKIVatWCWdnZ+U9CQkJYvny5eLUqVPi0qVLYsaMGcLGxkZcu3ZN6dO6dWvh6OgoIiMjxcWLF8Vnn30mzM3NxZ49e5Q+CxcuFD///LO4evWq2Ldvn2jXrp34v//7PyGEEGq1WixatEg4OTmJ5ORkkZycLLKyspR1L1y4UAghhKagUHQLfEz4t3tUrNq4Vew7dkp8tmq9WPLfjeLkjXSRpykUmsIi5ZGbmyvnyjZgNT7PsT7JK8yj17peD+9YA46NOIZdJUol1HaBLYDRo0crz/38/Pjss8947LHHyM7OxsHBgWXLluHn58fChQtRqVS0a9eO06dP8/HHH5e5zi5dutClSxfl9Zw5c9i8eTPbtm1jwoQJSvsTTzzB1KlTAV1ux8OHD7Nw4ULlaCQiIgJ1QRFFQuDRwpv3Zs4i4p0JfLroM8AMG3sHVCoVTi73S5nmagoR6Kbl5GoK2bFzFyd/Pc7m/cfw8dN9XyFeutkOzraWpa5AF8ma2hIy2W2dc+XKFUaMGIGfnx9OTk74+voCxiuw5eDgoDwOHjwIwIkTJwgPD6d169Y4OjrSt29fZZsA58+f5/HHH9ebbhUUFFTu58jJyWHKlCl06NCBJk2a4ODgwIULF0olvH1wPUFBQZw/f155vX3XHvr2649Pq1a4uTTlzTfe4M6dO5xK+oPLqdmkZqrRCsHl1Gy9R2GRlrRsDZdTszkSF49Hcy8ebdcOH1d75dHG3YFWLpWr8SM1Ho1iz9HWwpZjI46ZbNuVUdsFtnJycggLCyMsLIw1a9YoGbsHDBigFLESVciH/Pe//53du3fz6aef0qZNG2xtbfnzn/9cocJYxUH42rVrvPSncF4a+QZvT56Oq4srv/5yhPcmTcBMW4SVuRkWZipAVarCH6gwN9O129vaoUKFm6M1TraWpbYnSYY0iuCoUqkqdWhrKqYosBUfH09aWhrz5s1TJtY/WBumQ4cObNmyRa/t6NGj5W7r4MGDvP766/zpT38CIDs7u9RFHEPrOXr0KO3bt1fGUVRYyKQP5uDqYIO3ix3x0TsBaOvpRJMmTrR2d0Zoi2jfXH/uq6W5Cg8nG9o3d+Lp3o8ROfMmqb8n0aRt23LHLUnF5GF1HWKKAlutWrXCysqKzz//nKtXr7Jt2zY++ugjvT7jxo3jypUrTJw4kYsXL7Ju3TqioqLKXW+bNm34/vvvSUhI4OTJk4wYMcJgga3Dhw8zf/58Ll26xJIlS/j2228ZP+EdNIVaWrX2pbCwkPWrVpB68xqrV69m+fLleu/38fEhOzubffv2kZaWRm5ubqlthISE0KdPH1588UX27t1LYmIiP/74I7t27ar8FyY1GjI41iGmKLDl5uZGVFQU3377LR06dGDevHl8+umnen1atWrFpk2b2L59O126dGH58uXMnTu33PUuXLiQpk2bEhwczODBgxkwYADdu3cv1W/SpEnEx8fTrVs3PvzoIya9P4dWnYO4kJKJtacfkz/4J6uWLqZ3rx6sXbuWyMhIvfcHBwczbtw4hg0bhpubG/Pnzzc4nk2bNtGzZ09efvllOnTowJQpU2SaPKlcssCWVGdcu5NDRl4BoKu/AiAAFdDRy+mh9+gbi/wdathkgS2p3skr0O3JOdtayvuYJZOTh9VSnaEp1J2TlPc1S3WB3HOUTEYIQZG29FkdW5kWTKoDZHCUTEIrBGduZhhcZif3HKU6QAZHqcq0QqAuqNoV34y8QoPtVhZmmJvJsz2S6TXY4FgPL8LXK0VaLWdvGaeWT+eWTYyyHmORvzsSNMDgWHz7XG5ubp3KL9jQ3M0pUJ6bq1SYVaE0qUoFHk51b6pM8UTyB2/FlBqXBhcczc3NadKkiZJswc7OrtbmxzUmt+9lI4q0qFQqHvFwfPgbyqQlPz/faOOqDiEEubm5pKam0qRJE5ljtJFrcMERUOrZPJiwVao6Ie6nAgO4l6vbc7S2MCMx19p0A6sBTZo0UX6HpDos7x5sfxfy0iF0OrR63Kirb5DBUaVS0bx5c9zd3SkoKHj4G6SH6vevaIPt84Z0xte38kkw6ipLS0u5x1hffNz6/vNeY42++gYZHIuZm5vLX3QjyFYX6tVvfi6gOQD+Hg482b65PG0hGd/VGLi0C8ytDC+P1S+nQfOuRh9Cgw6OUtWFfhpNRl4B5mYqbmeplfakec+ZcFRSvVeQBzeOweWfQJ2tuyr3oOP/rtw6Z6SApfEvvsrgKJWScOMeiWk5ph6GVBMKNXA1Ggpq6d83PQmuHQEbJ9AWwtnNlXt/xz+BUwvDywrV0H9WjQRGkMFR+p9fEu+SmKar6vfB1rNK+86/9kalAktzMx5xk8kg6rUr+2H1C6YehY6lPZiZw+PjDe89qszgyb+BuemmU1UqOEZGRvL9999z4cIFbG1tCQ4O5uOPP6Zdu3ZKHyEEs2fPZsWKFaSnp9OrVy+WLFlCx44dlT5qtZrJkyezfv168vLy6NevH0uXLqVly5bG+2RShf32RxZDvyydMNfbxZYOXmWndJLqmKICOPM95KbpXp/bqvup+t959+ux+v1bP1E748q9Az5PgssjgIAWPYx+ZbkmVCo4xsTE8Pbbb9OzZ08KCwuZMWMGYWFhnDt3Dnt73V7F/PnzWbBgAVFRUbRt25Y5c+bw9NNPc/HiRSVFf0REBNu3b2fDhg24uroyadIkBg0aRHx8vLyAUksy8gq4mZ4HwMxtZ5T2/o+6A5CZV8inL3Ux+F7JhHLvwpElYGbgTzdmXsXW8dR7EPxXsGhYU7CMrVrJbm/fvo27uzsxMTH06dMHIQReXl5ERETwj3/8A9DtJXp4ePDxxx8zduxYMjIycHNzY/Xq1QwbNgyAW7du4e3tzc6dOxkwYMBDt1vRZJWSYdfv5NLnk/2l2p1tLTk5M8wEI5IqbJZzxfoFDNX9zL8HXUfeP3R1aw9u7cp8W2NQK8luMzJ0WVWKiz0lJiaSkpJCWNj9PzBra2tCQkKIjY1l7NixxMfHU1BQoNfHy8uLTp06ERsbazA4qtVq1Or7V0wzM41zT29jNXfn/dKn7o66vYe7ORq+HhVoqiFJFVH0QLKOwDEG+mhg4HywqvsF5eq6KgdHIQQTJ07kySefpFOnTgCkpKQA4OHhodfXw8ODa9euKX2srKxo2rRpqT7F739QZGQks2fPrupQpQfsOqv7ntt7OrIroo+JRyPpEQLmeIAwkO1IWyI4Tv4NHNxrb1yNUJWD44QJEzh16pRe6dBiD04KFkI8dKJweX2mTZumV4UvMzNTKSMqPVx6job3tp4hM0//bqGRj7cu4x2SyZxYA0Xq8vvYNpWBsRZUKTi+8847bNu2jQMHDuhdYS6+HzUlJYXmzZsr7ampqcrepKenJxqNhvT0dL29x9TUVIKDgw1uz9raGmtrefK4qt7873Hir6WXah/Q0cNAb6nGZafCqmfBxsD5w5slaoZPvFB6uYU12DWc2zXrskoFRyEE77zzDps3byY6OhpfX1+95b6+vnh6erJ37166desGgEajISYmho8//hiAHj16YGlpyd69exk6VHfSODk5mTNnzpRZVlOqnsup2crzhcN0V6D93R1xd6x76cIahe0RcOe38vuELwGn5uX3kWpUpYLj22+/zbp169i6dSuOjo7KOUJnZ2dsbW1RqVREREQwd+5c/P398ff3Z+7cudjZ2TFixAil75gxY5g0aRKurq64uLgwefJkAgIC6N+/v/E/oUSeRnf+6qPwjvypm5xLanRXfoYLO0u3p5yCm/Fg/8AhcNat+8+Hry/9Phc/cG9v3DFKlVap4Lhs2TIA+vbtq9e+atUqXn/9dQCmTJlCXl4e48ePVyaB79mzR5njCLqC7xYWFgwdOlSZBB4VFSXnONYAIQSaIl1Vv7qWcbvB2DIespLLXl4yGJY0fB20f7ZmxiRVW7XmOZqKnOdYcfdyNXT9cC8AJ2eG4Wwrs1sb1e2LsOQx3fMuL0OTVvrLC9Xg2wfsm+m32zhDU59aGaKkr1bmOUp139USCSRkYKwGIWDrBEi7dP/OEiHgWonZGoM/A4syUmxJ9Y4Mjg2UurCIpLRctiWUcUgnVc6W/4OTBs4PFnvybzIwNjAyODYQUYcTuZWhq8WyLeEWKZn6dVkebS5PP1RY7OeQ/Yd+W8nAGL5UP02WZ2do1qZ2xibVGhkcG4AzNzOYtf2cwWWO1hZYWZgxOaxtLY+qnroaA3veK3v5y99Au2dqbzySycjg2ADcuJurPB/bxw8AdaGWd55qg6uDnDxfKftK3KYa/Ff9ZZ6dZWBsRGRwbAASfr8HQDsPR6Y9+6hpB1NfaIvg/HZd5bqSbsbrfjb1gbCPan1YUt0hg2M9dT45k7k7z5OrKVJuDbSxNDPxqGpIVgpoqpHWX2jhtz26gFhs7/vlvyd8adW3JzUIMjjWU1/GXOHgb2l6beFdy6i1UZ9tHlf+VWJjaD9I/7VnAPjUUpZsqc6SwbGeEELw1L9iyC8oIju/kCy1Ln3V0MCW9HvUA0cbC3r5upp4lDWgZGC0ciy738MUqXV7kAEv3W+zcYY+U8C+AX5vUrXJ4FhPRF+8bbAi4LiQR/BzczDBiGrZ0NXQ4XlTj0JqRGRwrOO0WkFuQREbj99Q2rZN0B3ytfVwxMayAd2PvmMiHF8JFOf1LHFnazN/U4xIasRkcKzDEtNyCP00Wq+tX3v3hpNAQp0N6hIlL46v/N8TA7f7u8ksNVLtksGxjtFqBVfTchBCMGHdiVLL3+ztZ4JRGZB9G1Y9A9ZVPA+Ydhk0WYaXvbYN3EtMSbJ3M1zbWJJqkAyOdcxznx/ifLJ+AbHe/s346rVALMxUWJibaLrOtVi4uPN+DeTDi4y3brMSCTHc2umy2MhgKJmYDI51iBBCLzA2tbMkPbeAGc89atpzi0LAqoFlLx+xsWrrNTOHVsGyUp5UJ8ngWEcUFmm5XuI2wPMfPoOtlYkCojob9s+FnFTd69T7pVzpOlJX4AnAwgZCZ4BZA518LjVqMjjWAU/9K5qrt/Wn6dR6YLwRB7f/FwTPbtal/jfkBXnniNQ4yOBoRBm5BWw/dYv8AgM1h8ugFaJUYAxs3bSM3jXk93hYWUb9ngGRup+iCB4dXHtjkiQTk8HRiIZ+eYSLf5RxBbYCjk7rh721OY42NZyxW5Ojy1lYXCEjZt79ZW3/d27R3FKXlca7Z82ORZLqKBkcq2nV4USWx1zB0tyM39PzlPYXunpVaj2P+7ni6VxLpVK/7g+pBvI/dgiHof+tnTFIUh1X6eB44MABPvnkE+Lj40lOTmbz5s288MILynIhBLNnz2bFihVK9cElS5bQsWNHpY9arWby5MmsX79eqT64dOlSWrasu2VDL6Zk8Xu67oJJoVaw+debONlasPH476X6HpveDw+nOlQTuqhANxWn4H/Bu2RgDByj++noqUv1L0kSUIXgmJOTQ5cuXXjjjTd48cUXSy2fP38+CxYsICoqirZt2zJnzhyefvppLl68qJRnjYiIYPv27WzYsAFXV1cmTZrEoEGDiI+Pr5PlWRPTchiw6EC5fSaEtqHfo+60aGKLe10KjKArDHVqQ+n2d0/KCniSVIZKB8eBAwcycKDhOW9CCBYtWsSMGTMYMmQIAP/5z3/w8PBg3bp1jB07loyMDFauXMnq1avp3193EWDNmjV4e3vz008/MWDAgGp8nJqx/pfryvMu3k0A3dQbC3Mzwjp44OFkw5BuLTAzq6MTly/suP+8RQ/dT79QGRglqRxGPeeYmJhISkoKYWFhSpu1tTUhISHExsYyduxY4uPjKSgo0Ovj5eVFp06diI2NNRgc1Wo1arVaeZ2ZmVmqT01KybhfrGrr2/Uwz5+NM2iyYcjX0Pmlh/eXJAmjzt5NSUkBwMPDQ6/dw8NDWZaSkoKVlRVNmzYts8+DIiMjcXZ2Vh7e3t7GHPZDbTupK286vGftbtdoMm/qfnp0LL+fJEmKGrlarXrgvlghRKm2B5XXZ9q0aUycOFF5nZmZWWsBUoj7GWLqTXnT09/B9aO652c23W+3b2aa8UhSPWTU4Ojp6Qno9g6bN2+utKempip7k56enmg0GtLT0/X2HlNTUwkODja4Xmtra6yta7eK3i+Jd/km7gbX7tyfoP3nHnX0anqhGua4g5WD7vC5LA7utTcmSaoFz37/LGl5aXwa8il9WvYx6rqNGhx9fX3x9PRk7969dOvWDQCNRkNMTAwff/wxAD169MDS0pK9e/cydOhQAJKTkzlz5gzz58835nCqrLBIy9Avj5Rqt7euQ9NCCzVw6UfdfdB7P9C1PRgY+0zRZbcp0kDQhNofoyRVU35hPhqtxuCymYdnciNLlwT6bv5do2+70n/t2dnZXL58WXmdmJhIQkICLi4utGrVioiICObOnYu/vz/+/v7MnTsXOzs7RowYAYCzszNjxoxh0qRJuLq64uLiwuTJkwkICFCuXptSVn4BAbP2KK/f6uOHu6M1oe3rwF7Xrmnwe5wu4UPSQcN9/pqg++ncUneXiyTVQYXaQm7n3i5zeVJmEm/tfavC6wtrHfbwTpVU6eB4/PhxQkNDldfF5wJHjRpFVFQUU6ZMIS8vj/HjxyuTwPfs2aPMcQRYuHAhFhYWDB06VJkEHhUVZdI5jhl5BfSZv5+MvAK99ul1pQ70jTg4WkbSB/8wyEmDl1bJ6TlSuTLUGaw7v+6h1wBqikCw5twaMjXGm3Hy3eDvsLM0fto7lSh5xaGeyMzMxNnZmYyMDJycqneR5G/fJHDs6h1ulZiuU+zkzDCcbU2495WXDrcv6p7/u8QUp/AlYGkHNk7g2xfM69DhvmRSuQW5XEy/aHDZ71m/M/3Q9FoeUfnMVGZYmhn+G1MXqenXqh9znpiDtbnhaw7mZuaYqSo36aai8aNR/1Wl52jYfOKmXlvLprbsmxSCtYWJ9mJvJeiSQljawIk1pZfbNoVur9T6sKS6QSu0nLp9iiwDJSZu591mZuzMCq3H2tya5x8xTTVHgcDWwpZRHUbhYe/x8DeYSKMOjrvP3p9XuXFsEC72VrRxr0aZUyF0Fz8q69f/6u59vnMZUk6VXu7srTt/aG4Nr26u+vikeuFu/l1+Sf4FYaDQ2MzYmeQV5hl4lz5HK0eaWpdOfZeam8pfOv+FtzpX/HxeY9Wog+OpmxkAWJmb8ZivS8XepM6GCz9Awf+ydt+9AteOgG0TuPyT8QbXIRw8AqBVL11NFalBuZF5gyPJR0qd+7ty7wprz6+t0Do6uHYo1ZahzuClti8xJmCMUcbZmDXq4HjotzQAAn0eklxWCIgaBNoCuHGs5gb0zP/yKj76PDi3qLntSLXmbv5dsh+YYpVflM+L20onbXmQr7MvbrZupdqzNFmsHLASR6sqVn6UKqRRB8fimi0PvfPl+Eq4dqh0e/tBup9598DnCd2VYtc2VauxbOUga7HUU1czrrIrcRdFQj8D/PGU4/ya+mu57328+ePYWehfaTVTmfFGpzfo7NbZ6GOVKq7RBseSpQxe6FrOXpoQ8MOk+6+HrdUdQrcKlsGsnjt1+xRbLm/BytyqQv3Xnl+Lp71nqfaUHMM5AUqyt7TXe60VWl70f5F/PPaPig1WqnWNNjjuOfeH8ryjl4E9x6PL4GqM7i6UYr3+Dx4dVAuja5yEECTcTiA9P11pS85J5njK8VLBpbquZlzldNrpSr+vvED4hNcTtHJqpddmY27DiEdHGAyqUt3WaIPjX9efAMDeyrx0Hsbv34JT35R+U7/3a2FkdVOGOoPDNw/rHTpuvbwVB6tqXN1/wL7r+4y2rsp4zu85vOwrVtbCycqJnp6l6+q42bnhblcH7qKSjKZRBscfTycrz4PbNIPcu7opNDsm6m67S4y53/mp93UJYn1DGtVh9MaLG9lz7f5tlMeSa/BClAFd3Looz9Pz0+nu0R1fZ1+jbqNIW8Szfs/SwkFe/JJKa5TBcVVskvJ8xa0hMD/j/sK7V+4/H38U3OvI7YNGdi//Hnfy7wCQU5DDuJ/G0cpRd0iYpcnietZ1g+9zsHRQApdAoC5S86zvs0YbVwuHFjzRoh4mFJYanEYZHH9J1GXwGNjBFdXVDP2Fjz6vm2PY1LfeBsbbubdZe36twUnEAEeTj3LuTunqg2fvnC3VNvfJuZirdHcLudu508Ojh8nuy5Wk2tTogqNWez9gTDQrUXTqg7tgVveKe5WUoc7gbFrpAPagsT+NrfA6m1g3AXR7jy0cWvD3nn9XlnV174qTVT1J8CtJRtboguNvqfcn5PpfXnV/QR0PjFqh5ckNT1b6fa91eM1ge6G2kJfbv4yPs081RyZJDVOjC44xl1IBaKkqkUtuyNcmGk35CooK2H1tN5oiDRfv3s+04uPkg41F2eVfhRAUiSI2DtqIpczpKElV0uiC4/Ek3Ry6Q9bv3m8M+LNR1i2E4I/cPyjUFlZ5HbfzbpOQmoC5ypxPjn9isM/2P22v8volSaqYRhccD19OY4J5icw2LXvqSglUwqZLm/jsxGel7nstK4+eMfRt2Vd3T67/w+/JlSSp+hpVcDx54x6Fmjwm23x7v/HVLQ99X25BLufvnkcIwYW7F/g4TlcPp7y6FbYWtlUeZ15hHt3du9PcoTld3boyvP3wKq9LkqSqaTTB8cbdXMKXHGaKxff3G0fvAWvDd3hsvbyVZSeXcTP7psHlANN7Tae1U2u9NjdbN/yb+htlzJIkmU6jCY6rj14D4AmzM7oGc2tdrsQSNl7cyNHko+y/vp9CUfq8oYuNC05WTtzOu82UnlMY4j+kxsctSZJpNJrgeCIxlXCzQ3Qxu6prCNHN51txagVHbh0hrzDP4CTosZ3HEtIyBF9nX6PeRyxJUt3WaILjwNtfM9pqm/I6v/1z9PxPgMG+0x6bhoWZBYP8BtVIVTNJkuo+kwbHpUuX8sknn5CcnEzHjh1ZtGgRvXv3rpFt9dSeAjPQAD/0+T8++HGY3vL3H38fWwtbejXvJbOrSJJkuuD4zTffEBERwdKlS3niiSf48ssvGThwIOfOnaNVq1YPX0ElZOUXEGCWhAB6+LaCGz/oLT/+yvEySz9KktQ4mSwH14IFCxgzZgxvvvkmjz76KIsWLcLb25tly5YZfVuH4nfzk50tnX31g+5fu/2VU6+dkoFRkqRSTLLnqNFoiI+PZ+rUqXrtYWFhxMbGluqvVqtRq9XK68zMzEptLzHxG5Z56E/YPj2q8lmgJUlqPEyy55iWlkZRUREeHvoFvT08PEhJKZ2GPjIyEmdnZ+Xh7e1dqe21aNGFjgXmtLXwZHi74TIwSpL0UCa9IPNgXkAhhMFcgdOmTWPixInK68zMzEoFyPDQyYSHTq76QCVJanRMEhybNWuGubl5qb3E1NTUUnuTANbW1lhby/OCkiTVHpMcVltZWdGjRw/27t2r1753716Cg4NNMSRJkiQ9JjusnjhxIq+++iqBgYEEBQWxYsUKrl+/zrhx40w1JEmSJIXJguOwYcO4c+cOH374IcnJyXTq1ImdO3fSunXrh79ZkiSphqmEEIarMNVhGRkZNGnShBs3buDkJGucSJJUccUXdO/du4ezs3OZ/erlvdVZWVkAlZ7SI0mSVCwrK6vc4Fgv9xy1Wi23bt3C0dGxwmVCi/+3aGx7m/Jzy8/dWFT0swshyMrKwsvLCzOzsq9J18s9RzMzM1q2bFml9zo5OTW6XxqQn7uxaayfGyr22cvbYyxmsnurJUmS6jIZHCVJkgxoNMHR2tqamTNnNro7beTnlp+7sTD2Z6+XF2QkSZJqWqPZc5QkSaoMGRwlSZIMkMFRkiTJABkcJUmSDGgUwXHp0qX4+vpiY2NDjx49OHjwoKmHVOMOHDjA4MGD8fLyQqVSsWXLFlMPqVZERkbSs2dPHB0dcXd354UXXuDixYumHlaNW7ZsGZ07d1YmQAcFBfHjjz+aeli1LjIyEpVKRURERLXX1eCDY3GVwxkzZnDixAl69+7NwIEDuX79uqmHVqNycnLo0qULX3zxhamHUqtiYmJ4++23OXr0KHv37qWwsJCwsDBycnJMPbQa1bJlS+bNm8fx48c5fvw4Tz31FOHh4Zw9e9bUQ6s1cXFxrFixgs6dOxtnhaKBe+yxx8S4ceP02tq3by+mTp1qohHVPkBs3rzZ1MMwidTUVAGImJgYUw+l1jVt2lR8/fXXph5GrcjKyhL+/v5i7969IiQkRLz77rvVXmeD3nMsrnIYFham115WlUOp4cnIyADAxcXFxCOpPUVFRWzYsIGcnByCgoJMPZxa8fbbb/Pcc8/Rv39/o62zXiaeqKjKVjmUGhYhBBMnTuTJJ5+kU6dOph5OjTt9+jRBQUHk5+fj4ODA5s2b6dChg6mHVeM2bNjAr7/+SlxcnFHX26CDY7GKVjmUGpYJEyZw6tQpDh06ZOqh1Ip27dqRkJDAvXv32LRpE6NGjSImJqZBB8gbN27w7rvvsmfPHmxsbIy67gYdHCtb5VBqON555x22bdvGgQMHqpzerr6xsrKiTZs2AAQGBhIXF8fixYv58ssvTTyymhMfH09qaio9evRQ2oqKijhw4ABffPEFarUac3PzKq27QZ9zlFUOGx8hBBMmTOD777/n559/xtfX19RDMhkhBGq12tTDqFH9+vXj9OnTJCQkKI/AwEBGjhxJQkJClQMjNPA9R2i8VQ6zs7O5fPmy8joxMZGEhARcXFxo1aqVCUdWs95++23WrVvH1q1bcXR0VI4anJ2dsbW1NfHoas706dMZOHAg3t7eZGVlsWHDBqKjo9m1a5eph1ajHB0dS51Ptre3x9XVtfrnmat9vbseWLJkiWjdurWwsrIS3bt3bxTTOvbv3y+AUo9Ro0aZemg1ytBnBsSqVatMPbQaNXr0aOV33M3NTfTr10/s2bPH1MMyCWNN5ZEpyyRJkgxo0OccJUmSqkoGR0mSJANkcJQkSTJABkdJkiQDZHCUJEkyQAZHSZIkA2RwlCRJMkAGR0mSJANkcJRMJjo6GpVKxb179wCIioqiSZMmD32fKcs+3LlzB3d3d5KSkoDSn6Ey/vznP7NgwQLjDlAyGhkcpYdavnw5jo6OFBYWKm3Z2dlYWlrSu3dvvb4HDx5EpVJx6dKlam931qxZdO3atVR7cnIyAwcOrPb6qyIyMpLBgwfj4+NT7XV98MEH/POf/yQzM7P6A5OMTgZH6aFCQ0PJzs7m+PHjStvBgwfx9PQkLi6O3NxcpT06OhovLy/atm1bY+Px9PTE2tq6xtZflry8PFauXMmbb75ZrfUUFBQA0LlzZ3x8fFi7dq0xhicZmQyO0kO1a9cOLy8voqOjlbbo6GjCw8N55JFH9EpOREdHExoaCsCaNWsIDAzE0dERT09PRowYQWpqaoW2GRUVxezZszl58iQqlQqVSkVUVBSgf1idlJSESqVi48aN9O7dG1tbW3r27MmlS5eIi4sjMDAQBwcHnnnmGW7fvq23jVWrVvHoo49iY2ND+/btWbp0ablj+vHHH7GwsDBYeiA+Pp7AwEDs7OwIDg7Wq3hYvAf873//Gz8/P6ytrSlOafD888+zfv36Cn0nUu2SwVGqkL59+7J//37l9f79++nbty8hISFKu0aj4ciRI0pw1Gg0fPTRR5w8eZItW7aQmJjI66+/XqHtDRs2jEmTJtGxY0eSk5NJTk5m2LBhZfafOXMm7733Hr/++isWFha8/PLLTJkyhcWLF3Pw4EGuXLnCBx98oPT/6quvmDFjBv/85z85f/48c+fO5f333+c///lPmds4cOAAgYGBBpfNmDGDf/3rXxw/fhwLCwtGjx6tt/zy5cts3LiRTZs2kZCQoLQ/9thj/PLLLw0+72K9VO28PlKjsGLFCmFvby8KCgpEZmamsLCwEH/88YfYsGGDCA4OFkIIERMTIwBx5coVg+v45ZdfBCCysrKEEPfTqqWnpwshhFi1apVwdnZW+s+cOVN06dKl1HooUU0xMTFRAHpV9tavXy8AsW/fPqUtMjJStGvXTnnt7e0t1q1bp7fejz76SAQFBZX5HYSHh4vRo0frtRV/hp9++klp++GHHwQg8vLylM9haWkpUlNTS63z5MmTAhBJSUllblcyDbnnKFVIaGgoOTk5xMXFcfDgQdq2bYu7uzshISHExcWRk5NDdHQ0rVq1ws/PD4ATJ04QHh5O69atcXR0pG/fvgA1UjO8ZK3i4hIYAQEBem3Fh/S3b9/mxo0bjBkzBgcHB+UxZ84crly5UuY28vLyyqxTUnL7zZs3B9A7hdC6dWvc3NxKva84AW/J87ZS3dDgM4FLxtGmTRtatmzJ/v37SU9PJyQkBNBdHPH19eXw4cPs37+fp556CoCcnBzCwsIICwtjzZo1uLm5cf36dQYMGIBGozH6+CwtLZXnxcXTHmzTarUAys+vvvqKXr166a2nvLT6zZo1Iz09vcLbL94O6LJTG3L37l0Ag4FTMi0ZHKUKCw0NJTo6mvT0dP7+978r7SEhIezevZujR4/yxhtvAHDhwgXS0tKYN28e3t7eAHpXuyvCysqKoqIi432A//Hw8KBFixZcvXqVkSNHVvh93bp1Y82aNUYdy5kzZ2jZsiXNmjUz6nql6pOH1VKFhYaGcujQIRISEpQ9R9AFx6+++or8/HzlYkyrVq2wsrLi888/5+rVq2zbto2PPvqoUtvz8fFRat+kpaUZ9aLFrFmziIyMZPHixVy6dInTp0+zatWqcidlDxgwgLNnz5a591gVBw8eJCwszGjrk4xHBkepwkJDQ8nLy6NNmzZ6pW1DQkLIysrikUceUfYS3dzciIqK4ttvv6VDhw7MmzePTz/9tFLbe/HFF3nmmWcIDQ3Fzc3NqFNe3nzzTb7++muioqIICAggJCSEqKiocqsVBgQEEBgYyMaNG40yhvz8fDZv3sxf/vIXo6xPMi5ZQ0aSKmHnzp1MnjyZM2fOYGZWvX2LJUuWsHXrVvbs2WOk0UnGJM85SlIlPPvss/z222/cvHlT2UuuKktLSz7//HMjjUwyNrnnKEmSZIA85yhJkmSADI6SJEkGyOAoSZJkgAyOkiRJBsjgKEmSZIAMjpIkSQbI4ChJkmSADI6SJEkGyOAoSZJkwP8DutkYS4YCIrYAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(3.5, 2.))\n", + "\n", + "for recipe in all_recipes:\n", + " count = (multfi_tasks['recipe'] == recipe).cumsum()\n", + " ax.step(multfi_tasks['walltime'] / 3600, count, label=recipe[30:])\n", + " \n", + " ax.legend()\n", + "\n", + "ax.set_xlabel('Walltime (hr)')" + ] + }, + { + "cell_type": "markdown", + "id": "a8a0c09a-861c-4bdd-beb3-57aa38177fef", + "metadata": {}, + "source": [ + "## Plot IP by Walltime\n", + "See how well we're doing for each recipe" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9e4afbfa-0944-4437-93eb-0cbb4874f61f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(3.5, 2.))\n", + "\n", + "for color, label, tasks in zip(['red', 'blue'], ['Single', 'Multi'], [singfi_tasks, multfi_tasks]):\n", + " subset = tasks.query('level==\"mopac_pm7-acn-adiabatic\"')\n", + " subset = subset[~subset['result'].isnull()]\n", + " ax.scatter(subset['compute_time'] / 3600, subset['result'], s=10, \n", + " color=color, alpha=0.8, edgecolor='none')\n", + " cummax = subset['result'].cummax()\n", + " ax.step(subset['compute_time'] / 3600, cummax, '-', color=color, label=label)\n", + "\n", + "ax.set_xlabel('Compute Expended (hr)')\n", + "ax.set_ylabel('IP (V)')\n", + "\n", + "ax.legend()\n", + "\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "id": "cf5ab197-c725-4990-9926-18a70e156def", + "metadata": {}, + "source": [ + "Plot the distribution of IPs after 2 hours of compute" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4b774e3f-3137-4588-b600-3ea523f852ee", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Single - Mean: 6.26 V - Count: 118\n", + "Multi - Mean: 6.49 V - Count: 71\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(3.5, 2.))\n", + "\n", + "bins = np.linspace(3, 8, 32)\n", + "means = []\n", + "for color, label, tasks in zip(['red', 'blue'], ['Single', 'Multi'], [singfi_tasks, multfi_tasks]):\n", + " subset = tasks.query('level==\"mopac_pm7-acn-adiabatic\" and compute_time <= 7200')\n", + " subset = subset[~subset['result'].isnull()]\n", + " ax.hist(subset['result'], bins=bins, color=color, label=label, alpha=0.5)\n", + " print(f'{label} - Mean: {subset[\"result\"].mean():.2f} V - Count: {len(subset)}')\n", + " \n", + " means.append((color, subset['result'].mean()))\n", + "\n", + "ax.set_ylim(ax.get_ylim())\n", + "for color, mean in means:\n", + " ax.plot([mean] * 2, ax.get_ylim(), '--', color=color)\n", + "\n", + "ax.set_xlabel('IP (V)')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "ax.legend()\n", + "\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "id": "3c02e797-40c7-4226-91e7-f83dd650508f", + "metadata": {}, + "source": [ + "The average IP of the molecules run at the highest level is higher for the multi-fidelity case, though there are are fewer overall" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cf0c654-733b-43fb-9457-ca7b99bc4781", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/redoxmers-multifidelity/spec.py b/examples/redoxmers-multifidelity/spec.py new file mode 100644 index 0000000..0bbc601 --- /dev/null +++ b/examples/redoxmers-multifidelity/spec.py @@ -0,0 +1,89 @@ +"""Specification of the optimization problem""" +from functools import partial +from pathlib import Path + +from colmena.queue.redis import RedisQueues +from parsl import Config, HighThroughputExecutor +from parsl.addresses import address_by_hostname +from proxystore.connectors.file import FileConnector +from proxystore.store import Store + +from examol.reporting.markdown import MarkdownReporter +from examol.score.rdkit import RDKitScorer, make_gpr_model, FingerprintTransformer +from examol.simulate.ase import ASESimulator +from examol.solution import MultiFidelityActiveLearning +from examol.start.fast import RandomStarter +from examol.steer.multifi import PipelineThinker +from examol.store.recipes import RedoxEnergy +from examol.select.bayes import ExpectedImprovement +from examol.specify import ExaMolSpecification + +# Get my path. We'll want to provide everything as absolute paths, as they are relative to this file +my_path = Path().absolute() +num_workers: int = 4 +multi_fidelity: bool = True +colmena_queues = partial(RedisQueues) # Using a partial in case we need to pin options, such as port numbers + +run_dir = my_path / ('run' if multi_fidelity else 'run-single') + +# Make the recipe +recipe = RedoxEnergy(1, energy_config='mopac_pm7', solvent='acn') + +# Make the scorer +pipeline = make_gpr_model() +transform: FingerprintTransformer = pipeline.steps.pop(0)[1] +transform.n_jobs = 1 +pipeline[0].n_jobs = 1 +scorer = RDKitScorer(pre_transform=transform) + +# Mark how we report outcomes +reporter = MarkdownReporter() + +# Define how to run Gaussian +sim = ASESimulator( + scratch_dir=(my_path / 'ase-runs'), +) + +# Make the workflow configuration +store = Store(name='file', connector=FileConnector(str(run_dir / 'proxystore')), metrics=True) +htex = HighThroughputExecutor( + address=address_by_hostname(), + max_workers=num_workers, + cpu_affinity='block' +) +config = Config( + executors=[htex], + run_dir=str((my_path / 'parsl-logs')), +) + + +# Mark that we're going to solve this with multifidelity learning +solution = MultiFidelityActiveLearning( + selector=ExpectedImprovement(200, maximize=True, epsilon=0.1), + steps=[ # The order of codes to run + [RedoxEnergy(1, energy_config='mopac_pm7', vertical=True)], + [RedoxEnergy(1, energy_config='mopac_pm7', vertical=True, solvent='acn')] + ] if multi_fidelity else [], + starter=RandomStarter(), + scorer=scorer, + models=[[pipeline] * 8], + num_to_run=200, + pipeline_target=0.5, +) + +# Build the specification +spec = ExaMolSpecification( + database=run_dir / 'database.json', + recipes=[recipe], + solution=solution, + search_space=[(my_path / '..' / 'redoxmers-bebop' / 'search-space.smi')], + simulator=sim, + thinker=PipelineThinker, + compute_config=config, + reporters=[reporter], + run_dir=run_dir, + proxystore=store, + proxystore_threshold=10000, # Needs to be small to avoid filling Colmena's PipeQueues + colmena_queue=colmena_queues, + thinker_options={'num_workers': num_workers} +) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 98dc238..34c8c62 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -20,7 +20,7 @@ def test_version(capsys): def test_dryrun(caplog, capsys): with caplog.at_level(logging.INFO): main(['run', '--dry-run', f'{_spec_dir / "spec.py"}:spec']) - assert any('dry run' in m for m in caplog.messages[-3:]) + assert any('dry run' in m for m in caplog.messages[-6:]) @mark.skipif(on_mac, reason='Only test the CLI on Linux') diff --git a/tests/score/test_rdkit.py b/tests/score/test_rdkit.py index d893220..126f724 100644 --- a/tests/score/test_rdkit.py +++ b/tests/score/test_rdkit.py @@ -75,7 +75,12 @@ def test_gpr(training_set, scorer, recipe, num_pcs, pre_compute): @mark.parametrize('bootstrap', [False, True]) -def test_multifi(training_set, multifi_recipes, scorer, pipeline, bootstrap): +@mark.parametrize('actually_single', [False, True]) +def test_multifi(training_set, multifi_recipes, scorer, pipeline, bootstrap, actually_single): + # Emulate what happens if we don't have any steps + if actually_single: + multifi_recipes = multifi_recipes[:1] + # Test conversion to multi-fidelity inputs = scorer.transform_inputs(training_set) lower_fidelities = collect_outputs(training_set, multifi_recipes[:-1]) @@ -94,9 +99,10 @@ def test_multifi(training_set, multifi_recipes, scorer, pipeline, bootstrap): model_msg = scorer.prepare_message(pipeline, training=False) predictions = scorer.score(model_msg, inputs, lower_fidelities=lower_fidelities) assert predictions.shape == (len(training_set),) - assert np.isclose(predictions, outputs).all() # Should give exact result, since all values are known + if not (bootstrap or actually_single): + assert np.isclose(predictions, outputs).all() # Should give exact result, since all values are known predictions = scorer.score(model_msg, inputs) assert predictions.shape == (len(training_set),) - if not bootstrap: + if not (bootstrap or actually_single): assert np.isclose(predictions, outputs).all() # Should give exact result, since all values are known and we're using a KNN diff --git a/tests/steer/conftest.py b/tests/steer/conftest.py index cce2dec..c664128 100644 --- a/tests/steer/conftest.py +++ b/tests/steer/conftest.py @@ -1,3 +1,4 @@ +from concurrent.futures import ProcessPoolExecutor from pathlib import Path import sys @@ -54,6 +55,11 @@ def simulator(tmp_path) -> ASESimulator: return ASESimulator(scratch_dir=tmp_path / 'ase-temp') +@fixture() +def pool(): + yield ProcessPoolExecutor() + + @fixture() def queues(recipe, scorer, simulator, tmp_path) -> ColmenaQueues: """Make a start the task server""" diff --git a/tests/steer/test_base.py b/tests/steer/test_base.py index 5050ad8..90eb119 100644 --- a/tests/steer/test_base.py +++ b/tests/steer/test_base.py @@ -10,7 +10,7 @@ @mark.parametrize('use_json', [True, False]) -def test_search_space(queues, search_space, tmp_path, database, use_json): +def test_search_space(queues, search_space, tmp_path, database, use_json, pool): """Test using a JSON-format search space""" # Save the training data to JSON format @@ -32,7 +32,8 @@ def test_search_space(queues, search_space, tmp_path, database, use_json): solution=solution, run_dir=tmp_path / 'run', search_space=[json_search_space] if use_json else [search_space], - database=database + database=database, + pool=pool ) assert len(list(thinker.iterate_over_search_space())) == 5 smiles_only = list(thinker.iterate_over_search_space(only_smiles=True)) diff --git a/tests/steer/test_brute.py b/tests/steer/test_brute.py index 1e08c66..6b784f9 100644 --- a/tests/steer/test_brute.py +++ b/tests/steer/test_brute.py @@ -9,7 +9,7 @@ @fixture() -def thinker(queues, recipe, search_space, database, tmp_path) -> BruteForceThinker: +def thinker(queues, recipe, search_space, database, tmp_path, pool) -> BruteForceThinker: run_dir = tmp_path / 'run' solution = SolutionSpecification( starter=RandomStarter(), @@ -22,6 +22,7 @@ def thinker(queues, recipe, search_space, database, tmp_path) -> BruteForceThink database=database, num_workers=1, solution=solution, + pool=pool, search_space=[search_space], ) diff --git a/tests/steer/test_multi.py b/tests/steer/test_multi.py index 98040af..1a1886a 100644 --- a/tests/steer/test_multi.py +++ b/tests/steer/test_multi.py @@ -12,7 +12,7 @@ @fixture() -def thinker(queues, recipe, search_space, scorer, database, tmpdir) -> PipelineThinker: +def thinker(queues, recipe, search_space, scorer, database, tmpdir, pool) -> PipelineThinker: run_dir = Path(tmpdir / 'run') scorer, model = scorer solution = MultiFidelityActiveLearning( @@ -32,7 +32,8 @@ def thinker(queues, recipe, search_space, scorer, database, tmpdir) -> PipelineT database=database, num_workers=1, solution=solution, - search_space=[search_space], + pool=pool, + search_space=[search_space] ) diff --git a/tests/steer/test_single.py b/tests/steer/test_single.py index a9c4531..f9fb7fe 100644 --- a/tests/steer/test_single.py +++ b/tests/steer/test_single.py @@ -12,7 +12,7 @@ @fixture() -def thinker(queues, recipe, search_space, scorer, database, tmpdir) -> SingleStepThinker: +def thinker(queues, recipe, search_space, scorer, database, tmpdir, pool) -> SingleStepThinker: run_dir = Path(tmpdir / 'run') scorer, model = scorer solution = SingleFidelityActiveLearning( @@ -30,6 +30,7 @@ def thinker(queues, recipe, search_space, scorer, database, tmpdir) -> SingleSte database=database, num_workers=1, solution=solution, + pool=pool, search_space=[search_space], )