diff --git a/README.md b/README.md
index 106017e..3a44fca 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Argenomic is an open-source implementation of an illumination algorithm for opti
 
 ## Getting Started
 
-After installing the software and running the tests, a basic usage example of argenomic (i.e. the rediscovery of Thiotixene) can be called upon in the following manner:
+After installing the software and running the tests, a basic usage example of argenomic (i.e. the rediscovery of Troglitazone) can be called upon in the following manner:
 ```
 python3 illuminate.py generations=100
 ```
@@ -61,7 +61,7 @@ Important dependencies of the Argenomic software environment and where to find t
 
 * Jan Jensen for his work in developing and open-sourcing a graph-based genetic algorithm for molecular optimisation, which served as impetus for this project.
 
-* Jean-Baptiste Mouret and Jeff Clune for their breakthrough invention of illumination algorithms, providing a holistic view of high-performing solutions throughout a search space.  
+* Jean-Baptiste Mouret and Jeff Clune for their breakthrough invention of illumination algorithms, providing a holistic view of high-performing solutions throughout a search space.
 
 * Pat Walters for his scripts indicating how to run structural alerts using the RDKit and ChEMBL, and for his many enlightening medicinal chemistry blog posts.
 
diff --git a/__pycache__/cynosure.cpython-37.pyc b/__pycache__/cynosure.cpython-37.pyc
new file mode 100644
index 0000000..59612b5
Binary files /dev/null and b/__pycache__/cynosure.cpython-37.pyc differ
diff --git a/argenomic/__init__.py b/argenomic/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/argenomic/__init__.py
@@ -0,0 +1 @@
+
diff --git a/argenomic/__pycache__/__init__.cpython-37.pyc b/argenomic/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000..41c2eef
Binary files /dev/null and b/argenomic/__pycache__/__init__.cpython-37.pyc differ
diff --git a/argenomic/__pycache__/base.cpython-37.pyc b/argenomic/__pycache__/base.cpython-37.pyc
new file mode 100644
index 0000000..403cecc
Binary files /dev/null and b/argenomic/__pycache__/base.cpython-37.pyc differ
diff --git a/argenomic/__pycache__/infrastructure.cpython-37.pyc b/argenomic/__pycache__/infrastructure.cpython-37.pyc
index 6f20b5c..5e8e75b 100644
Binary files a/argenomic/__pycache__/infrastructure.cpython-37.pyc and b/argenomic/__pycache__/infrastructure.cpython-37.pyc differ
diff --git a/argenomic/__pycache__/mechanism.cpython-37.pyc b/argenomic/__pycache__/mechanism.cpython-37.pyc
index be8afdb..06fa850 100644
Binary files a/argenomic/__pycache__/mechanism.cpython-37.pyc and b/argenomic/__pycache__/mechanism.cpython-37.pyc differ
diff --git a/argenomic/__pycache__/operations.cpython-37.pyc b/argenomic/__pycache__/operations.cpython-37.pyc
index 8eb7fa7..f33b51d 100644
Binary files a/argenomic/__pycache__/operations.cpython-37.pyc and b/argenomic/__pycache__/operations.cpython-37.pyc differ
diff --git a/argenomic/base.py b/argenomic/base.py
new file mode 100644
index 0000000..7c0706f
--- /dev/null
+++ b/argenomic/base.py
@@ -0,0 +1,19 @@
+from typing import List, Tuple
+from dataclasses import dataclass
+
+class Elite:
+    def __init__(self, index):
+        self.index = index
+        self.molecule = None
+
+    def update(self, molecule):
+        if self.molecule is None or (molecule.fitness - self.molecule.fitness) > 0.0:
+            self.molecule = molecule
+        return None
+
+@dataclass
+class Molecule:
+    smiles: str
+    pedigree: Tuple[str, str ,str] 
+    fitness: float = None
+    descriptor: List[float] = None
diff --git a/argenomic/infrastructure.py b/argenomic/infrastructure.py
index c7f7136..e9cf14c 100644
--- a/argenomic/infrastructure.py
+++ b/argenomic/infrastructure.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pandas as pd
 from typing import List, Tuple
+from dataclasses import dataclass
 
 from datetime import datetime
 from sklearn.cluster import KMeans
@@ -18,22 +19,14 @@
 rdBase.DisableLog('rdApp.error')
 from rdkit.Chem import Lipinski
 
+from argenomic.base import Molecule, Elite
 
-class elite():
-    def __init__(self, index, descriptor):
-        self.index = index
-        self.fitness = 0.0
-        self.molecule = None
-        self.descriptor = descriptor
-
-    def update(self, fitness, molecule, descriptor):
-        if self.fitness < fitness:
-            self.fitness = fitness
-            self.molecule = molecule
-            self.descriptor = descriptor
-        return None
-
-class archive:
+class Archive:
+    """
+    A composite class containing the current elite molecules in a CVT tree structure. Allows for processing of 
+    new molecules, sampling of the existing elite molecules, and disk storage of the current state of the archive. 
+    The CVT centers are either loaded from or deposited to cache disk storage. 
+    """
     def __init__(self, archive_config, descriptor_config) -> None:
         self.archive_size = archive_config.size
         self.archive_accuracy = archive_config.accuracy
@@ -46,132 +39,162 @@ def __init__(self, archive_config, descriptor_config) -> None:
             kmeans = KMeans(n_clusters=self.archive_size)
             kmeans = kmeans.fit(np.random.rand(archive_config.accuracy, self.archive_dimensions))
             self.cvt_centers = kmeans.cluster_centers_
-            np.savetxt(self.cvt_location, self.cvt_centers)
+            np.savetxt(self.cvt_location, self.cvt_centers)        
         self.cvt = KDTree(self.cvt_centers, metric='euclidean')
-        self.elites = [elite(index, cvt_center) for index, cvt_center in enumerate(self.cvt_centers, start=0)]
+        self.elites = [Elite(index) for index, _ in enumerate(self.cvt_centers, start=0)]
         return None
 
     def cvt_index(self, descriptor: List[float]) -> int:
+        """
+        Returns CVT index for the niche nearest to the given discriptor. 
+        """
         return self.cvt.query([descriptor], k=1)[1][0][0]
 
-    def add_to_archive(self, molecules: List[Chem.Mol], descriptors: List[List[float]], fitnesses: List[float]) -> None:
-        for molecule, descriptor, fitness in zip(molecules, descriptors, fitnesses):
-            self.elites[self.cvt_index(descriptor)].update(fitness, molecule, descriptor)
+    def add_to_archive(self, molecules) -> None:
+        """
+        Takes in a list of molecules and adds them to the archive as prescribed by the MAP-Elites algorithm, 
+        i.e. each niche only contains the most fit molecule. Other molecules are discarded. 
+        """
+        for molecule in molecules:
+            self.elites[self.cvt_index(molecule.descriptor)].update(molecule)
         return None
 
     def sample(self, size: int) -> List[Chem.Mol]:
-        pairs = [(elite.molecule, elite.fitness) for elite in self.elites if elite.fitness > 0.0]
+        """
+        Returns a list of elite molecules of the requisted length. 
+        The elite molecules are randomly drawn, weighted by their fitness. 
+        """
+        pairs = [(elite.molecule, elite.molecule.fitness) for elite in self.elites if elite.molecule]
         molecules, weights = map(list, zip(*pairs))
         return random.choices(molecules, k=size, weights=weights)
 
     def sample_pairs(self, size: int) -> List[Tuple[Chem.Mol, Chem.Mol]]:
-        pairs = [(elite.molecule, elite.fitness) for elite in self.elites if elite.fitness > 0.0]
+        """
+        Returns a list of pairs of elite molecules of the requisted length. 
+        The elite molecules are randomly drawn, weighted by their fitness. 
+        """
+        pairs = [(elite.molecule, elite.molecule.fitness) for elite in self.elites if elite.molecule]
         molecules, weights = map(list, zip(*pairs))
         sample_molecules = random.choices(molecules, k=size, weights=weights)
         sample_pairs = np.random.choice(list(filter(None, sample_molecules)), size=(size, 2), replace=True)
-        sample_pairs = [tuple(sample_pair) for sample_pair in sample_pairs]
+        sample_pairs = [tuple(sample_pair) for sample_pair in sample_pairs]       
         return sample_pairs
 
-    def store_archive(self, generation: float) -> None:
-        elites_smiles, elites_descriptors, elites_fitnesses = self.elites_data()
-        data = {'elites': elites_smiles, 'descriptors': elites_descriptors, 'fitnesses': elites_fitnesses}
-        pd.DataFrame(data=data).to_csv("archive_{}.csv".format(generation), index=False)
-        return None
-
-    def store_statistics(self, generation: float) -> None:
-        elites_smiles, elites_descriptors, elites_fitnesses = self.elites_data()
-        fractional_size = len(elites_smiles)/self.archive_size
-        statistics = [generation, np.max(elites_fitnesses), np.mean(elites_fitnesses), np.std(elites_fitnesses), fractional_size]
+    def store_data(self, generation: float) -> None:
+        """
+        Creates a dataframe representing the archive and writes it to disk. In addtion, basic statistics about 
+        the state of the archive are saved to disk and printed to the IO stream.           
+        """
+        archive_data = self.get_archive_data()
+        fractional_size = len(archive_data["smiles"])/self.archive_size
+        max_fitness, mean_fitness = np.max(archive_data["fitnesses"]), np.mean(archive_data["fitnesses"])
         if os.path.isfile('statistics.csv'):
             with open('statistics.csv', 'a') as file:
-                csv.writer(file).writerow(statistics)
+                csv.writer(file).writerow([generation, max_fitness, mean_fitness, fractional_size])
                 file.close()
         else:
             with open('statistics.csv', 'w') as file:
                 file.close()
-        print('Generation: {}, Size: {:.2f}'.format(statistics[0], statistics[4]))
-        print('Fitness Max: {:.7f}, Mean: {:.7f}, Std: {:.7f}'.format(statistics[1], statistics[2], statistics[3]))
+        pd.DataFrame(data=archive_data).to_csv("archive_{}.csv".format(generation), index=False)
+        print('Generation: {}, Size: {:.2f}'.format(generation, fractional_size))
+        print('Fitness Max: {:.5f}, Fitness Mean: {:.5f}'.format(max_fitness, mean_fitness))
         return None
-
-    def elites_data(self) -> Tuple[List[str], List[float], List[float]]:
-        elites_list = [elite for elite in self.elites if elite.molecule]
-        elites_smiles = [Chem.MolToSmiles(elite.molecule) for elite in elites_list]
-        elites_descriptors = [elite.descriptor for elite in elites_list]
-        elites_fitnesses = [elite.fitness for elite in elites_list]
-        return elites_smiles, elites_descriptors, elites_fitnesses
-
-
-class arbiter:
+    
+    def get_archive_data(self) -> None:
+        elite_indices = [elite.index for elite in self.elites if elite.molecule]
+        elite_molecules = [elite.molecule for elite in self.elites if elite.molecule]
+        elites_smiles = [molecule.smiles for molecule in elite_molecules]
+        elites_pedigree = [molecule.pedigree for molecule in elite_molecules]
+        elites_descriptors = [molecule.descriptor for molecule in elite_molecules]
+        elites_fitnesses = [molecule.fitness for molecule in elite_molecules]
+        archive_data = {'index': elite_indices, 'smiles': elites_smiles, 'pedigree': elites_pedigree, 'descriptors': elites_descriptors, 'fitnesses': elites_fitnesses}
+        return archive_data
+
+class Arbiter:
     """
     A catalog class containing different druglike filters for small molecules.
     Includes the option to run the structural filters from ChEMBL.
     """
     def __init__(self, arbiter_config) -> None:
+        self.cache_smiles = []
         self.rules_dict = pd.read_csv(hydra.utils.to_absolute_path("data/smarts/alert_collection.csv"))
         self.rules_dict= self.rules_dict[self.rules_dict.rule_set_name.isin(arbiter_config.rules)]
         self.rules_list = self.rules_dict["smarts"].values.tolist()
         self.tolerance_list = pd.to_numeric(self.rules_dict["max"]).values.tolist()
         self.pattern_list = [Chem.MolFromSmarts(smarts) for smarts in self.rules_list]
 
-    def __call__(self, molecules:List[Chem.Mol]) -> List[Chem.Mol]:
+    def __call__(self, molecules):
         """
         Applies the chosen filters (hologenicity, veber_infractions,
-        ChEMBL structural alerts, ...) to a list of molecules.
+        ChEMBL structural alerts, ...) to a list of molecules and removes duplicates.
         """
         filtered_molecules = []
+        molecules = self.unique_molecules(molecules)
         for molecule in molecules:
-            if self.molecule_validity(molecule):
+            molecular_graph = Chem.MolFromSmiles(molecule.smiles)
+            if self.molecule_filter(molecular_graph):
                 filtered_molecules.append(molecule)
         return filtered_molecules
 
-    def molecule_validity(self, molecule: Chem.Mol) -> bool:
+    def unique_molecules(self, molecules: List[Molecule]) -> List[Molecule]:
+        """
+        Checks if a molecule in a lost of molcules is duplicated, either in this batch or before.
+        """
+        unique_molecules = []
+        for molecule in molecules:
+            if molecule.smiles not in self.cache_smiles:
+                unique_molecules.append(molecule)
+                self.cache_smiles.append(molecule.smiles)
+        return unique_molecules
+
+    def molecule_filter(self, molecular_graph: Chem.Mol) -> bool:
         """
-        Checks if a given molecule passes through the chosen filters (hologenicity,
+        Checks if a given molecular structure passes through the chosen filters (hologenicity,
         veber_infractions, ChEMBL structural alerts, ...).
         """
-        toxicity = self.toxicity(molecule)
-        hologenicity = self.hologenicity(molecule)
-        veber_infraction = self.veber_infraction(molecule)
+        toxicity = self.toxicity(molecular_graph)
+        hologenicity = self.hologenicity(molecular_graph)
+        veber_infraction = self.veber_infraction(molecular_graph)
         validity = not (toxicity or hologenicity or veber_infraction)
-        if molecule.HasSubstructMatch(Chem.MolFromSmarts('[R]')):
-            ring_infraction = self.ring_infraction(molecule)
+        if molecular_graph.HasSubstructMatch(Chem.MolFromSmarts('[R]')):
+            ring_infraction = self.ring_infraction(molecular_graph)
             validity = validity and not (ring_infraction)
         return validity
 
-    def toxicity(self, molecule: Chem.Mol) -> bool:
+    def toxicity(self, molecular_graph: Chem.Mol) -> bool:
         """
         Checks if a given molecule fails the structural filters.
         """
         for (pattern, tolerance) in zip(self.pattern_list, self.tolerance_list):
-            if len(molecule.GetSubstructMatches(pattern)) > tolerance:
+            if len(molecular_graph.GetSubstructMatches(pattern)) > tolerance:
                 return True
         return False
 
     @staticmethod
-    def hologenicity(molecule: Chem.Mol) -> bool:
+    def hologenicity(molecular_graph: Chem.Mol) -> bool:
         """
         Checks if a given molecule fails the hologenicity filters.
         """
-        fluorine_saturation = len(molecule.GetSubstructMatches(Chem.MolFromSmarts('[F]'))) > 6
-        bromide_saturation = len(molecule.GetSubstructMatches(Chem.MolFromSmarts('[Br]'))) > 3
-        chlorine_saturation = len(molecule.GetSubstructMatches(Chem.MolFromSmarts('[Cl]'))) > 3
+        fluorine_saturation = len(molecular_graph.GetSubstructMatches(Chem.MolFromSmarts('[F]'))) > 6
+        bromide_saturation = len(molecular_graph.GetSubstructMatches(Chem.MolFromSmarts('[Br]'))) > 3
+        chlorine_saturation = len(molecular_graph.GetSubstructMatches(Chem.MolFromSmarts('[Cl]'))) > 3
         return chlorine_saturation or bromide_saturation or fluorine_saturation
 
     @staticmethod
-    def ring_infraction(molecule: Chem.Mol) -> bool:
+    def ring_infraction(molecular_graph: Chem.Mol) -> bool:
         """
         Checks if a given molecule fails the ring infraction filters.
         """
-        ring_allene = molecule.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]'))
-        macro_cycle = max([len(j) for j in molecule.GetRingInfo().AtomRings()]) > 6
-        double_bond_in_small_ring = molecule.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]'))
+        ring_allene = molecular_graph.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]'))
+        macro_cycle = max([len(j) for j in molecular_graph.GetRingInfo().AtomRings()]) > 6
+        double_bond_in_small_ring = molecular_graph.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]'))
         return ring_allene or macro_cycle or double_bond_in_small_ring
 
     @staticmethod
-    def veber_infraction(molecule: Chem.Mol) -> bool:
+    def veber_infraction(molecular_graph: Chem.Mol) -> bool:
         """
         Checks if a given molecule fails the veber infraction filters.
         """
-        rotatable_bond_saturation = Lipinski.NumRotatableBonds(molecule) > 10
-        hydrogen_bond_saturation = Lipinski.NumHAcceptors(molecule) + Lipinski.NumHDonors(molecule) > 10
+        rotatable_bond_saturation = Lipinski.NumRotatableBonds(molecular_graph) > 10
+        hydrogen_bond_saturation = Lipinski.NumHAcceptors(molecular_graph) + Lipinski.NumHDonors(molecular_graph) > 10
         return rotatable_bond_saturation or hydrogen_bond_saturation
diff --git a/argenomic/mechanism.py b/argenomic/mechanism.py
index 0c363ff..a09c9cb 100644
--- a/argenomic/mechanism.py
+++ b/argenomic/mechanism.py
@@ -15,13 +15,13 @@
 from rdkit.Chem import rdMolDescriptors
 from rdkit.DataStructs.cDataStructs import TanimotoSimilarity
 
-class descriptor:
+class Descriptor:
     """
     A strategy class for calculating the descriptor vector of a molecule.
     """
     def __init__(self, config_descriptor) -> None:
         self.properties = []
-        self.ranges = config_descriptor.ranges
+        self.ranges = config_descriptor.ranges   
         self.property_names = config_descriptor.properties
         for name in self.property_names:
             module, fuction = name.split(".")
@@ -29,59 +29,61 @@ def __init__(self, config_descriptor) -> None:
             self.properties.append(getattr(module, fuction))
         return None
 
-    def __call__(self, molecule: Chem.Mol) -> List[float]:
+    def __call__(self, molecule) -> None:
         """
-        Calculating the descriptor vector of a molecule.
+        Updates the descriptor vector of a molecule.
         """
         descriptor = []
+        molecular_graph = Chem.MolFromSmiles(molecule.smiles)
         for property, range in zip(self.properties, self.ranges):
-            descriptor.append(self.rescale(property(molecule), range))
-        return descriptor
+            descriptor.append(self.rescale(property(molecular_graph), range))
+        molecule.descriptor = descriptor
+        return molecule
 
     @staticmethod
     def rescale(feature: List[float], range: List[float]) -> List[float]:
         """
-        Rescaling the feature to the unit range.
+        Rescales the feature to the unit range.
         """
         rescaled_feature = (feature - range[0])/(range[1] - range[0])
         return rescaled_feature
 
-class fitness:
+class Fitness:
     """
     A strategy class for calculating the fitness of a molecule.
     """
     def __init__(self, config_fitness) -> None:
-        self.memoized_cache = dict()
         self.fingerprint_type = config_fitness.type
         self.target = Chem.MolFromSmiles(config_fitness.target)
         self.target_fingerprint = self.get_fingerprint(self.target, self.fingerprint_type)
         return None
 
-    def __call__(self, molecule: Chem.Mol) -> float:
-        smiles = Chem.MolToSmiles(molecule)
-        if smiles in self.memoized_cache:
-            fitness = self.memoized_cache[smiles]
-        else:
-            molecule_fingerprint = self.get_fingerprint(molecule, self.fingerprint_type)
-            fitness = TanimotoSimilarity(self.target_fingerprint, molecule_fingerprint)
-            self.memoized_cache[smiles] = fitness
-        return fitness
+    def __call__(self, molecule) -> None:
+        """
+        Updates the fitness value of a molecule.
+        """
+        molecular_graph = Chem.MolFromSmiles(Chem.CanonSmiles(molecule.smiles))
+        molecule_fingerprint = self.get_fingerprint(molecular_graph, self.fingerprint_type)
+        fitness = TanimotoSimilarity(self.target_fingerprint, molecule_fingerprint)
+        molecule.fitness = fitness
+        return molecule
 
-    def get_fingerprint(self, molecule: Chem.Mol, fingerprint_type: str):
+    def get_fingerprint(self, molecular_graph: Chem.Mol, fingerprint_type: str):
         method_name = 'get_' + fingerprint_type
         method = getattr(self, method_name)
         if method is None:
             raise Exception('{} is not a supported fingerprint type.'.format(fingerprint_type))
-        return method(molecule)
+        return method(molecular_graph)
+
+    def get_ECFP4(self, molecular_graph: Chem.Mol):
+        return AllChem.GetMorganFingerprint(molecular_graph, 2)
 
-    def get_ECFP4(self, molecule: Chem.Mol):
-        return AllChem.GetMorganFingerprint(molecule, 2)
+    def get_ECFP6(self, molecular_graph: Chem.Mol):
+        return AllChem.GetMorganFingerprint(molecular_graph, 3)
 
-    def get_ECFP6(self, molecule: Chem.Mol):
-        return AllChem.GetMorganFingerprint(molecule, 3)
+    def get_FCFP4(self, molecular_graph: Chem.Mol):
+        return AllChem.GetMorganFingerprint(molecular_graph, 2, useFeatures=True)
 
-    def get_FCFP4(self, molecule: Chem.Mol):
-        return AllChem.GetMorganFingerprint(molecule, 2, useFeatures=True)
+    def get_FCFP6(self, molecular_graph: Chem.Mol):
+        return AllChem.GetMorganFingerprint(molecular_graph, 3, useFeatures=True)
 
-    def get_FCFP6(self, molecule: Chem.Mol):
-        return AllChem.GetMorganFingerprint(molecule, 3, useFeatures=True)
diff --git a/argenomic/operations.py b/argenomic/operations.py
index bf6af71..f473dcb 100644
--- a/argenomic/operations.py
+++ b/argenomic/operations.py
@@ -12,52 +12,59 @@
 from rdkit.Chem import AllChem
 from rdkit.Chem import rdMMPA
 
-class mutator:
+from argenomic.base import Molecule
+   
+class Mutator:
     """
-    A catalog class containing and implementing mutations to small molecules
-    according to the principles of positional analogue scanning.
+    A catalog class containing and implementing mutations to small molecules according to the principles of positional analogue scanning. 
     """
-    def __init__(self) -> None:
-        self.mutation_data = pd.read_csv(hydra.utils.to_absolute_path("data/smarts/mutation_collection.tsv"), sep='\t')
+    def __init__(self, config_mutator) -> None:
+        self.mutation_data = pd.read_csv(hydra.utils.to_absolute_path(config_mutator.data_file), sep='\t')
 
-    def __call__(self, molecule:Chem.Mol) -> List[Chem.Mol]:
+    def __call__(self, molecule) -> List[Molecule]:
         sampled_mutation = self.mutation_data.sample(n=1, weights='probability').iloc[0]
         reaction = AllChem.ReactionFromSmarts(sampled_mutation['smarts'])
+        pedigree = ("mutation", sampled_mutation['smarts'], molecule.smiles)   
         try:
-            molecules = [products[0] for products in reaction.RunReactants([molecule])]
+            molecular_graphs = [products[0] for products in reaction.RunReactants([Chem.MolFromSmiles(molecule.smiles)])]
+            smiles_list = [Chem.MolToSmiles(molecular_graph) for molecular_graph in molecular_graphs if molecular_graph is not None]
+            molecules = [Molecule(Chem.CanonSmiles(smiles), pedigree) for smiles in smiles_list if Chem.MolFromSmiles(smiles)]
         except:
             molecules = []
         return molecules
 
-class crossover:
+class Crossover:
     """
     A strategy class implementing a parent-centric crossover of small molecules.
     """
     def __init__(self):
         pass
 
-    def __call__(self, molecule_pair:Tuple[Chem.Mol, Chem.Mol]) -> List[Chem.Mol]:
-        molecule_cores, molecule_sidechains = self.fragmentate(molecule_pair)
-        molecules = self.merge(molecule_cores, molecule_sidechains)
+    def __call__(self, molecule_pair):
+        pedigree = ("crossover", molecule_pair[0].smiles, molecule_pair[1].smiles)
+        smiles_list = self.merge(molecule_pair)
+        molecules = [Molecule(Chem.CanonSmiles(smiles), pedigree) for smiles in smiles_list if Chem.MolFromSmiles(smiles)]
         return molecules
 
-    def merge(self, molecule_cores:List[Chem.Mol], molecule_sidechains:List[Chem.Mol]) -> List[Chem.Mol]:
-        molecules = []
-        random.shuffle(molecule_sidechains)
+    def merge(self, molecule_pair):
+        molecular_graphs = []
+        graph_cores, graph_sidechains = self.fragment(molecule_pair)
+        random.shuffle(graph_sidechains)
         reaction = AllChem.ReactionFromSmarts('[*:1]-[1*].[1*]-[*:2]>>[*:1]-[*:2]')
-        for core, sidechain in zip(molecule_cores, molecule_sidechains):
-            molecules.append(reaction.RunReactants((core, sidechain))[0][0])
-        return molecules
+        for core, sidechain in zip(graph_cores, graph_sidechains):
+            molecular_graphs.append(reaction.RunReactants((core, sidechain))[0][0])
+        smiles_list = [Chem.MolToSmiles(molecular_graph) for molecular_graph in molecular_graphs if molecular_graph is not None]
+        return smiles_list
 
-    def fragmentate(self, molecule_pair:Tuple[Chem.Mol, Chem.Mol]) -> Tuple[List[Chem.Mol], List[Chem.Mol]]:
-        molecule_cores = []
-        molecule_sidechains = []
+    def fragment(self, molecule_pair):
+        graph_cores = []
+        graph_sidechains = []
         for molecule in molecule_pair:
-            molecule_frags = rdMMPA.FragmentMol(molecule, maxCuts=1, resultsAsMols=False)
-            if len(molecule_frags) > 0:
-                _, molecule_frags = map(list, zip(*molecule_frags))
-                for molecule_pair in molecule_frags:
-                    core, sidechain = molecule_pair.split(".")
-                    molecule_cores.append(Chem.MolFromSmiles(core.replace("[*:1]", "[1*]")))
-                    molecule_sidechains.append(Chem.MolFromSmiles(sidechain.replace("[*:1]", "[1*]")))
-        return molecule_cores, molecule_sidechains
+            graph_frags = rdMMPA.FragmentMol(Chem.MolFromSmiles(molecule.smiles), maxCuts=1, resultsAsMols=False)
+            if len(graph_frags) > 0:
+                _, graph_frags = map(list, zip(*graph_frags))
+                for frag_pair in graph_frags:
+                    core, sidechain = frag_pair.split(".")
+                    graph_cores.append(Chem.MolFromSmiles(core.replace("[*:1]", "[1*]")))
+                    graph_sidechains.append(Chem.MolFromSmiles(sidechain.replace("[*:1]", "[1*]")))
+        return graph_cores, graph_sidechains
diff --git a/configuration/config.yaml b/configuration/config.yaml
index f7387f3..0136a7e 100644
--- a/configuration/config.yaml
+++ b/configuration/config.yaml
@@ -1,5 +1,5 @@
 ---
-data_file: data/smiles/guacamol_initial_rediscovery_thiotixene.smi
+data_file: data/smiles/guacamol_intitial_rediscovery_troglitazone.smi
 batch_size: 40
 initial_size: 100
 workers: 1
@@ -25,9 +25,11 @@ descriptor:
   - - 40
     - 130
 fitness:
-  target: CN(C)S(=O)(=O)c1ccc2Sc3ccccc3C(=CCCN4CCN(C)CC4)c2c1
+  target: "O=C1NC(=O)SC1Cc4ccc(OCC3(Oc2c(c(c(O)c(c2CC3)C)C)C)C)cc4"
   type: ECFP4
 arbiter:
   rules:
   - Glaxo
+mutator:
+  data_file: data/smarts/mutation_collection.tsv
 
diff --git a/data/README.md b/data/README.md
index d465147..6dc6d9f 100644
--- a/data/README.md
+++ b/data/README.md
@@ -1,21 +1,13 @@
-# Sample Package Data
+# Data 
+This directory contains data that were included with the argenomic package. This is also a place where non-code related additional information (such as data files, molecular structures,  etc.) can 
+conveniently stored. Please note that the output files are automatically stored in their own folder. 
 
-This directory contains sample additional data you may want to include with your package.
-This is a place where non-code related additional information (such as data files, molecular structures,  etc.) can 
-go that you want to ship alongside your code.
+An overview of the subdirectories: 
 
-Please note that it is not recommended to place large files in your git directory. If your project requires files larger
-than a few megabytes in size it is recommended to host these files elsewhere. This is especially true for binary files
-as the `git` structure is unable to correctly take updates to these files and will store a complete copy of every version
-in your `git` history which can quickly add up. As a note most `git` hosting services like GitHub have a 1 GB per repository
-cap.
+* cvt: Contains the cvt centroids as stored in cache. New cvt centroids, as a generated by argenomic, will be stored here automaticaly. 
 
-## Including package data
+* figures: A collection of figures generated with data from argenomic.
 
-Modify your package's `setup.py` file and the `setup()` command. Include the 
-[`package_data`](http://setuptools.readthedocs.io/en/latest/setuptools.html#basic-use) keyword and point it at the 
-correct files.
+* smarts: Two smarts-files:  alert_collection.csv (containing the smarts needed to remove unwanted molecules) and mutation_collection.tsv containing the smarts causing the mutations an their probability weights. 
 
-## Manifest
-
-* `look_and_say.dat`: first entries of the "Look and Say" integer series, sequence [A005150](https://oeis.org/A005150)
+* smiles: a number of files contianing databases of smiles from ZINC, ChEMBL and the Guacamol projects.
diff --git a/data/cvt/cache_2_25000.csv b/data/cvt/cache_2_25000.csv
new file mode 100644
index 0000000..2d43a16
--- /dev/null
+++ b/data/cvt/cache_2_25000.csv
@@ -0,0 +1,150 @@
+5.214209003438609047e-01 3.491316341809322932e-01
+6.572639038034633341e-01 8.567305610109627878e-01
+1.303307299222913884e-01 5.618729675703061366e-01
+8.897109583652169640e-01 1.823853610120633184e-01
+1.112530223485239667e-01 2.125501570542110974e-01
+8.810485063108196568e-01 6.122016082233591172e-01
+3.172242608672095621e-01 7.877221983336107902e-01
+5.638951314398871206e-01 1.390602294501244174e-01
+3.747759490579619657e-01 5.166290158658712883e-01
+7.156505009795108840e-01 5.158020582770069806e-01
+8.855806372593864051e-01 7.760577077222354792e-01
+1.932917979672891251e-01 8.695757025678164664e-01
+5.193996142549586192e-01 6.819997613639713352e-01
+7.720992401187742882e-01 2.812513181445543964e-01
+1.967793652955975303e-01 2.039334324085603223e-01
+2.352549408824252120e-01 3.542762225886592065e-02
+4.177055087677166156e-01 8.890403494605696455e-01
+7.693683978100818166e-01 4.049010623334153847e-02
+8.902332386247120422e-01 3.473863236770655671e-01
+3.922246776846531335e-01 1.488768714888923927e-01
+2.559342321943171017e-01 4.216102113216871339e-01
+8.798530818662737563e-01 8.592640781857143750e-01
+3.612778158118024052e-02 1.036870890873520246e-01
+3.510767573549956611e-01 9.637000722224431382e-01
+1.224900179721745141e-01 7.161863573661265692e-01
+8.612922384536358944e-02 4.426260471780079642e-01
+6.555067788482615843e-01 6.677712290512817228e-01
+5.504482583622519742e-01 5.197246061218662172e-01
+2.831731491435160453e-01 6.398368855366747265e-01
+5.314617680314005499e-01 2.296585498913069068e-01
+3.984072570036540917e-01 3.337886928463870140e-01
+8.333238035694940926e-01 1.134076843720980277e-01
+3.163692070297491332e-02 9.512468980935808727e-01
+7.205792391115668494e-01 2.041550631812601435e-01
+8.087555260558467740e-01 7.138242631876635658e-01
+6.794411380054670158e-01 4.416724920884920746e-01
+5.393332710466596236e-01 9.658971387152128774e-01
+3.702092163590727791e-01 6.038501060352695626e-01
+9.681447104216851507e-01 9.601857249202805367e-01
+9.616608612212861340e-01 7.270940358476680743e-01
+1.387118746086496990e-01 2.850561698172589531e-01
+3.195399580739361478e-01 2.629479407135443814e-01
+7.573648509812191953e-01 4.274626637112869942e-01
+7.694632049488048242e-01 9.593063682048701413e-01
+5.853447568827151448e-01 8.066919752545115285e-01
+2.170097874003349969e-01 5.061635451836496991e-01
+3.970089323290530992e-01 7.505582749837897971e-01
+8.050315478644549660e-01 1.984576609380733214e-01
+4.812033557458149091e-01 3.097518436134361064e-02
+9.604678072063290717e-01 8.117042389708366557e-01
+2.008381024663641878e-01 7.794389048430581113e-01
+4.692908801723650325e-01 2.865736759496665398e-01
+8.073741232549369595e-01 3.662128565463809715e-01
+7.382048913037019267e-01 7.913194942197723236e-01
+1.761703699387680011e-01 9.639133896156604209e-01
+3.496167881473810635e-02 5.804975950556504616e-01
+1.928213400661419885e-01 3.575214785642295001e-01
+1.077711933279865697e-01 8.939322765817070415e-01
+6.374027015053089773e-01 6.113297330041378430e-02
+1.445612372438661031e-01 3.788953199110395298e-02
+2.633122054127355893e-01 8.438824838695733188e-01
+7.963634650595494691e-01 6.323165707597924179e-01
+6.737949008337890122e-01 2.766189733124589933e-01
+5.274702277241158921e-01 4.373025059938003278e-01
+4.304150612853208102e-02 2.846488432599008633e-01
+9.647113321368945282e-01 8.882386748209299965e-01
+4.532849626097593676e-01 4.923688927297610318e-01
+7.310348249978582125e-01 8.857302526180675928e-01
+9.579362671208806468e-01 2.252990049955453378e-01
+9.535781752654851617e-01 4.777069044746957149e-01
+9.040628422715089041e-01 8.111615467527399925e-02
+6.182423593013797181e-01 9.590362561636518457e-01
+1.695183616744483923e-01 1.261849951640344747e-01
+3.912728348717070537e-02 8.599571285833029766e-01
+2.643933415487987881e-01 9.489729036337286594e-01
+3.203002274601614574e-01 1.065119796974987043e-01
+8.876138547138956447e-02 6.331635974709677317e-01
+8.813397046919623223e-01 5.209305045404831747e-01
+2.993241591845399174e-01 3.495323031802242131e-01
+2.707368953274269519e-01 7.325525350208567099e-01
+9.604093329178933214e-01 3.069369274839566231e-01
+3.465402413666332504e-01 6.908887071187498341e-01
+7.127350216961223506e-01 6.052670469128804687e-01
+5.888521290504437067e-01 6.310897921876609251e-01
+4.373809204843845944e-01 9.663557115721628232e-01
+4.506617210695661457e-01 4.030904916641723235e-01
+8.532788650421817556e-01 3.281661719360373652e-02
+3.478303020998657624e-02 1.859259674906943438e-01
+1.626961335708531631e-01 6.430188206375511140e-01
+3.988095228969700257e-01 8.184398893195028446e-01
+1.047020046074574884e-01 3.549161442712227554e-01
+6.394421950638540242e-01 5.547949071104841812e-01
+7.341152933424237581e-01 1.215903491414830895e-01
+2.863958769918517611e-02 3.846665214332258831e-01
+5.081150544763712862e-02 3.576573965102103347e-02
+2.153929104872027156e-01 5.866033110529029759e-01
+4.958841042067115290e-01 7.522586885094930675e-01
+6.146361852750878985e-01 2.078919852605342622e-01
+6.973805024019630316e-01 3.730181185213271577e-02
+6.739361084321531248e-01 7.581619302505517854e-01
+8.878768404318930774e-01 6.939084709914972171e-01
+5.882726959291096902e-01 2.985770494865925828e-01
+3.385692702107084839e-02 4.962881216910638993e-01
+3.649212802963819335e-01 4.229624519518367931e-01
+6.099014853388573520e-01 4.675345834068886375e-01
+4.694455197182448858e-01 1.013493495098539143e-01
+5.169048233667209447e-01 5.814256101608156202e-01
+8.034783458500311948e-01 5.592932459274203572e-01
+4.373281756997882130e-01 6.662247776421277301e-01
+3.945369906704876972e-01 4.896706580165904876e-02
+1.299618742969136376e-01 5.004433189751911204e-01
+8.648002620121615358e-01 2.776245412123384826e-01
+9.591990260907414800e-01 4.054440505067447020e-02
+5.777460896787789402e-01 8.884680499674402210e-01
+2.365335527911612834e-01 2.783666312626085215e-01
+3.658831115952154711e-02 6.836500758455124060e-01
+5.657516560805646577e-01 4.101823160162576531e-02
+9.031657002227472253e-01 9.463393694189856653e-01
+5.940950057534915008e-01 7.269034663708533461e-01
+3.981189669637171802e-01 2.371576058571607803e-01
+2.931982213424287886e-01 1.894775508857726720e-01
+2.163667400438791799e-01 6.912413185724788356e-01
+3.365126995284586364e-01 8.836112486165982327e-01
+6.216043792037700966e-01 3.736353447618341317e-01
+8.103602201395208038e-01 7.923019377477317216e-01
+4.400573378598088281e-02 7.765960057559868623e-01
+2.913708750605449316e-01 5.625327134151457686e-01
+2.991312276686851623e-01 4.888558862352131995e-01
+4.899724900427019358e-01 9.087993656686448318e-01
+4.716804015209817846e-01 1.871896263836810848e-01
+3.191700905342936823e-01 3.501468874862628811e-02
+9.572032351641173165e-01 6.354205068526487254e-01
+7.124398600540035043e-01 3.531460747367958786e-01
+1.624822601983763604e-01 4.286909982746384129e-01
+1.007349828790176649e-01 9.660750420822548090e-01
+1.239233587164308426e-01 8.113025574272882956e-01
+9.617165396406597289e-01 3.963283021692636243e-01
+6.502062645640349636e-01 1.430817365785390094e-01
+7.378736400348993030e-01 6.933054273821108726e-01
+4.886903807060516414e-01 8.305469251510426698e-01
+8.432538584906650314e-01 9.549108008529461067e-01
+9.644638498155262418e-01 1.347018225560029925e-01
+6.883723988416640127e-01 9.531433944856376739e-01
+8.083828824495542520e-01 4.806184952696599955e-01
+4.458542432605734351e-01 5.796875996515719054e-01
+8.016880637375696228e-01 8.726789294931538432e-01
+2.454045033751177107e-01 1.169903171997553892e-01
+9.606265008504202285e-01 5.589727681639240497e-01
+8.758801013322721207e-01 4.318442003214353897e-01
+1.017579884037961846e-01 1.227205974955919432e-01
diff --git a/data/figures/logo.png b/data/figures/logo.png
index b6f2a09..a19d9ff 100644
Binary files a/data/figures/logo.png and b/data/figures/logo.png differ
diff --git a/data/figures/logo_old.png b/data/figures/logo_old.png
new file mode 100644
index 0000000..b6f2a09
Binary files /dev/null and b/data/figures/logo_old.png differ
diff --git a/data/figures/new_logo.png b/data/figures/new_logo.png
new file mode 100644
index 0000000..e6f078c
Binary files /dev/null and b/data/figures/new_logo.png differ
diff --git a/data/smarts/mutation_collection.tsv b/data/smarts/mutation_collection.tsv
index 23cb007..99bedce 100644
--- a/data/smarts/mutation_collection.tsv
+++ b/data/smarts/mutation_collection.tsv
@@ -94,27 +94,3 @@ add_ring	[*;!r;!H0:1]~[*;!r:2]~[*;!r;!H0:3]>>[*:1]1~[*:2]~[*:3]1	0.05
 add_ring	[*;!r;!H0:1]~[*!r:2]~[*!r:3]~[*;!r;!H0:4]>>[*:1]1~[*:2]~[*:3]~[*:4]1	0.05
 add_ring	[*;!r;!H0:1]~[*!r:2]~[*:3]~[*:4]~[*;!r;!H0:5]>>[*:1]1~[*:2]~[*:3]~[*:4]~[*:5]1	0.45
 add_ring	[*;!r;!H0:1]~[*!r:2]~[*:3]~[*:4]~[*!r:5]~[*;!r;!H0:6]>>[*:1]1~[*:2]~[*:3]~[*:4]~[*:5]~[*:6]1	0.45
-append_group	[*;!H0:1]>>[*:1]-C-O	0.15
-append_group	[*;!H0:1]>>[*:1]-[C](=O)-N	0.15
-append_group	[*;!H0:1]>>[*:1]-C#N	0.15
-append_group	[*;!H0:1]>>[*:1]-C=N	0.15
-append_group	[*;!H0:1]>>[*:1]-[C](=N)-N	0.1
-append_group	[*;!H0:1]>>[*:1]-N-[C](=N)-N	0.1
-append_group	[*;!H0:1]>>[*:1]-N-[C](=O)-O	0.1
-append_group	[*;!H0:1]>>[*:1]-[S](=O)(=O)-O	0.05
-append_group	[*;!H0:1]>>[*:1]-[S](=O)(=O)-N	0.05
-delete_group	[*:1]~C-O>>[*:1]	0.15
-delete_group	[*:1]~[C](=O)-N>>[*:1]	0.15
-delete_group	[*:1]~C#N>>[*:1]	0.15
-delete_group	[*:1]~C=N>>[*:1]	0.15
-delete_group	[*:1]~[C](=N)-N>>[*:1]	0.1
-delete_group	[*:1]~N-[C](=N)-N>>[*:1]	0.1
-delete_group	[*:1]~N-[C](=O)-O>>[*:1]	0.1
-delete_group	[*:1]~[S](=O)(=O)-O>>[*:1]	0.05
-delete_group	[*:1]~[S](=O)(=O)-N>>[*:1]	0.05
-insert_group	[*:1]~[*:2]>>[*:1]-C-O-[*:2]	0.2
-insert_group	[*:1]~[*:2]>>[*:1]-[C](=O)-N-[*:2]	0.2
-insert_group	[*:1]~[*:2]>>[*:1]-[S](=O)(=O)-N-[*:2]	0.2
-insert_group	[*:1]~[*:2]>>[*:1]-[C](=O)-O-[*:2]	0.1
-insert_group	[*:1]~[*:2]>>[*:1]-[C](=O)-[N](-F)-[*:2]	0.15
-insert_group	[*:1]~[*:2]>>[*:1]-C-[C](-F)=C-C-[*:2]	0.15
diff --git a/environment.yml b/environment.yml
index 66ca998..ca14446 100644
--- a/environment.yml
+++ b/environment.yml
@@ -77,6 +77,7 @@ dependencies:
   - python-dateutil=2.8.1=py_0
   - python_abi=3.7=1_cp37m
   - pytz=2020.1=py_0
+  - pyyaml=5.3.1=py37h7b6447c_1
   - rdkit=2020.03.3=py37hdd87690_0
   - readline=8.0=h7b6447c_0
   - scikit-learn=0.23.1=py37h423224d_0
@@ -115,6 +116,4 @@ dependencies:
     - hydra-core==1.0.0rc2
     - importlib-resources==3.0.0
     - omegaconf==2.0.1rc11
-    - pyyaml==5.3.1
-prefix: /home/jonas/anaconda3/envs/argenomic-stable
-
+prefix: /home/jonasver/anaconda3/envs/argenomic-stable
diff --git a/illuminate.py b/illuminate.py
index 001d5bc..b985891 100644
--- a/illuminate.py
+++ b/illuminate.py
@@ -1,6 +1,7 @@
 import hydra
+import numpy as np
 import pandas as pd
-from typing import List, Tuple
+from typing import List, Tuple, Type
 
 from rdkit import Chem
 from rdkit.Chem import PandasTools as pdtl
@@ -8,78 +9,81 @@
 from dask import bag
 from dask.distributed import Client
 
-from argenomic.operations import crossover, mutator
-from argenomic.mechanism import descriptor, fitness
-from argenomic.infrastructure import archive, arbiter
+from argenomic.base import Molecule
+from argenomic.operations import Mutator, Crossover
+from argenomic.infrastructure import Arbiter, Archive
+from argenomic.mechanism import Fitness, Descriptor
 
-class illumination:
+class Illuminate:
     def __init__(self, config) -> None:
         self.data_file = config.data_file
+        self.generations = config.generations
         self.batch_size = config.batch_size
         self.initial_size = config.initial_size
-        self.generations = config.generations
 
-        self.mutator = mutator()
-        self.crossover = crossover()
-        self.arbiter = arbiter(config.arbiter)
-        self.descriptor = descriptor(config.descriptor)
-        self.archive = archive(config.archive, config.descriptor)
-        self.fitness = fitness(config.fitness)
+        self.arbiter = Arbiter(config.arbiter)
+        self.fitness = Fitness(config.fitness)
+        self.mutator = Mutator(config.mutator)
+        self.crossover = Crossover()
+        self.descriptor = Descriptor(config.descriptor)
+        self.archive = Archive(config.archive, config.descriptor)
 
         self.client = Client(n_workers=config.workers, threads_per_worker=config.threads)
         return None
 
     def __call__(self) -> None:
         self.initial_population()
-        for generation in range(self.generations):
+        for generation in range(1, self.generations):
             molecules = self.generate_molecules()
-            molecules, descriptors, fitnesses = self.process_molecules(molecules)
-            self.archive.add_to_archive(molecules, descriptors, fitnesses)
-            self.archive.store_statistics(generation)
-            self.archive.store_archive(generation)
+            molecules = self.process_molecules(molecules)
+            self.archive.add_to_archive(molecules)
+            self.archive.store_data(generation)
         return None
 
     def initial_population(self) -> None:
-        dataframe = pd.read_csv(hydra.utils.to_absolute_path(self.data_file))
-        pdtl.AddMoleculeColumnToFrame(dataframe, 'smiles', 'molecule')
-        molecules = dataframe['molecule'].sample(n=self.initial_size).tolist()
-        molecules = self.arbiter(self.unique_molecules(molecules))
-        molecules, descriptors, fitnesses = self.process_molecules(molecules)
-        self.archive.add_to_archive(molecules, descriptors, fitnesses)
+        molecules = self.arbiter(self.load_from_database())
+        molecules = self.calculate_descriptors(molecules)
+        molecules = self.calculate_fitnesses(molecules)
+        self.archive.add_to_archive(molecules)
+        self.archive.store_data(0)
         return None
 
-    def generate_molecules(self) -> None:
+    def load_from_database(self) -> List[Molecule]:
+        dataframe = pd.read_csv(hydra.utils.to_absolute_path(self.data_file))
+        smiles_list = dataframe['smiles'].sample(n=self.initial_size).tolist()
+        pedigree = ("database", "no reaction", "no parent")   
+        molecules = [Molecule(Chem.CanonSmiles(smiles), pedigree) for smiles in smiles_list]
+        return molecules
+
+    def generate_molecules(self) -> List[Molecule]:
         molecules = []
-        sample_molecules = self.archive.sample(self.batch_size)
-        sample_molecule_pairs = self.archive.sample_pairs(self.batch_size)
-        for molecule in sample_molecules:
-            molecules.extend(self.mutator(molecule))
-        for molecule_pair in sample_molecule_pairs:
-            molecules.extend(self.crossover(molecule_pair))
-        molecules = self.arbiter(self.unique_molecules(molecules))
+        molecule_samples = self.archive.sample(self.batch_size)
+        molecule_sample_pairs = self.archive.sample_pairs(self.batch_size)
+        for molecule in molecule_samples:
+            molecules.extend(self.mutator(molecule)) 
+        for molecule_pair in molecule_sample_pairs:
+            molecules.extend(self.crossover(molecule_pair)) 
         return molecules
 
-    def process_molecules(self, molecules: List[Chem.Mol]) -> Tuple[List[List[float]],List[float]]:
-        descriptors = bag.map(self.descriptor, bag.from_sequence(molecules)).compute()
-        molecules, descriptors = zip(*[(molecule, descriptor) for molecule, descriptor in zip(molecules, descriptors)\
-                if all(1.0 > property > 0.0 for property in descriptor)])
-        molecules, descriptors = list(molecules), list(descriptors)
-        fitnesses = bag.map(self.fitness, bag.from_sequence(molecules)).compute()
-        return molecules, descriptors, fitnesses
+    def process_molecules(self, molecules: List[Molecule]) -> List[Molecule]:
+        molecules = self.arbiter(molecules)
+        molecules = self.calculate_descriptors(molecules)
+        molecules = self.calculate_fitnesses(molecules)
+        return molecules
 
-    @staticmethod
-    def unique_molecules(molecules: List[Chem.Mol]) -> List[Chem.Mol]:
-        molecules = [Chem.MolFromSmiles(Chem.MolToSmiles(molecule)) for molecule in molecules if molecule is not None]
-        molecule_records = [(molecule, Chem.MolToSmiles(molecule)) for molecule in molecules if molecule is not None]
-        molecule_dataframe = pd.DataFrame(molecule_records, columns = ['molecules', 'smiles'])
-        molecule_dataframe.drop_duplicates('smiles', inplace = True)
-        return molecule_dataframe['molecules']
+    def calculate_fitnesses(self, molecules: List[Molecule]) -> List[Molecule]:
+        molecules = bag.map(self.fitness, bag.from_sequence(molecules)).compute()
+        return molecules
 
+    def calculate_descriptors(self, molecules: List[Molecule]) -> List[Molecule]:
+        molecules = bag.map(self.descriptor, bag.from_sequence(molecules)).compute()
+        molecules = [molecule for molecule in molecules if all(1.0 > property > 0.0 for property in molecule.descriptor)]
+        return molecules
 
 @hydra.main(config_path="configuration", config_name="config.yaml")
 def launch(config) -> None:
     print(config.pretty())
-    current_instance = illumination(config)
+    current_instance = Illuminate(config)
     current_instance()
     current_instance.client.close()
 
diff --git a/tests/__init__.pyc b/tests/__init__.pyc
new file mode 100644
index 0000000..288db23
Binary files /dev/null and b/tests/__init__.pyc differ
diff --git a/tests/__pycache__/__init__.cpython-36.pyc b/tests/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 293048f..0000000
Binary files a/tests/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/tests/__pycache__/__init__.cpython-37.pyc b/tests/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index 6b277ce..0000000
Binary files a/tests/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/tests/__pycache__/test_infrastructure.cpython-36-pytest-5.4.3.pyc b/tests/__pycache__/test_infrastructure.cpython-36-pytest-5.4.3.pyc
deleted file mode 100644
index e78492f..0000000
Binary files a/tests/__pycache__/test_infrastructure.cpython-36-pytest-5.4.3.pyc and /dev/null differ
diff --git a/tests/__pycache__/test_infrastructure.cpython-37-PYTEST.pyc b/tests/__pycache__/test_infrastructure.cpython-37-PYTEST.pyc
deleted file mode 100644
index 548384a..0000000
Binary files a/tests/__pycache__/test_infrastructure.cpython-37-PYTEST.pyc and /dev/null differ
diff --git a/tests/__pycache__/test_infrastructure.cpython-37-pytest-5.4.3.pyc b/tests/__pycache__/test_infrastructure.cpython-37-pytest-5.4.3.pyc
deleted file mode 100644
index 2653076..0000000
Binary files a/tests/__pycache__/test_infrastructure.cpython-37-pytest-5.4.3.pyc and /dev/null differ
diff --git a/tests/__pycache__/test_mechanism.cpython-36-pytest-5.4.3.pyc b/tests/__pycache__/test_mechanism.cpython-36-pytest-5.4.3.pyc
deleted file mode 100644
index 76d2df0..0000000
Binary files a/tests/__pycache__/test_mechanism.cpython-36-pytest-5.4.3.pyc and /dev/null differ
diff --git a/tests/__pycache__/test_mechanism.cpython-37-PYTEST.pyc b/tests/__pycache__/test_mechanism.cpython-37-PYTEST.pyc
deleted file mode 100644
index 3a52a69..0000000
Binary files a/tests/__pycache__/test_mechanism.cpython-37-PYTEST.pyc and /dev/null differ
diff --git a/tests/__pycache__/test_mechanism.cpython-37-pytest-5.4.3.pyc b/tests/__pycache__/test_mechanism.cpython-37-pytest-5.4.3.pyc
deleted file mode 100644
index c03beea..0000000
Binary files a/tests/__pycache__/test_mechanism.cpython-37-pytest-5.4.3.pyc and /dev/null differ
diff --git a/tests/__pycache__/test_operations.cpython-36-pytest-5.4.3.pyc b/tests/__pycache__/test_operations.cpython-36-pytest-5.4.3.pyc
deleted file mode 100644
index bcebbc2..0000000
Binary files a/tests/__pycache__/test_operations.cpython-36-pytest-5.4.3.pyc and /dev/null differ
diff --git a/tests/__pycache__/test_operations.cpython-37-PYTEST.pyc b/tests/__pycache__/test_operations.cpython-37-PYTEST.pyc
deleted file mode 100644
index bb5aa63..0000000
Binary files a/tests/__pycache__/test_operations.cpython-37-PYTEST.pyc and /dev/null differ
diff --git a/tests/__pycache__/test_operations.cpython-37-pytest-5.4.3.pyc b/tests/__pycache__/test_operations.cpython-37-pytest-5.4.3.pyc
deleted file mode 100644
index f9885bc..0000000
Binary files a/tests/__pycache__/test_operations.cpython-37-pytest-5.4.3.pyc and /dev/null differ
diff --git a/tests/test_config.yaml b/tests/test_config.yaml
index d6177d1..33566a3 100644
--- a/tests/test_config.yaml
+++ b/tests/test_config.yaml
@@ -21,4 +21,5 @@ fitness:
 arbiter:
   rules:
   - Glaxo
-
+mutator:
+  data_file: data/smarts/mutation_collection.tsv
diff --git a/tests/test_infrastructure.py b/tests/test_infrastructure.py
index e306dbc..74a50e0 100644
--- a/tests/test_infrastructure.py
+++ b/tests/test_infrastructure.py
@@ -1,7 +1,9 @@
 import pytest
 import omegaconf
+import numpy as np
 from rdkit import Chem
-from argenomic.infrastructure import archive, arbiter
+from argenomic.infrastructure import Archive, Arbiter
+from argenomic.base import Molecule
 
 @pytest.fixture
 def default_archive():
@@ -9,7 +11,7 @@ def default_archive():
     '''
     Returns an archive instance of a 150 niches, spanned by ExactMolWt and MolLogP.
     '''
-    return archive(configuration_file.archive, configuration_file.descriptor)
+    return Archive(configuration_file.archive, configuration_file.descriptor)
 
 @pytest.fixture
 def default_arbiter():
@@ -17,16 +19,17 @@ def default_arbiter():
     '''
     Returns an arbiter instance, initialised with GSK structural alerts.
     '''
-    return arbiter(configuration_file.arbiter)
+    return Arbiter(configuration_file.arbiter)
 
 @pytest.fixture
 def default_molecules():
-    smiles = ["Clc1ccc(cc1)C(c2ccccc2)N3CCN(CC3)CCOCC(=O)O", "CC1=CC(Cl)=CC(C(=O)N[C@@H]2C[C@@H]3CCCC[C@@H]32)=C1C"]
-    molecules = [Chem.MolFromSmiles(individual_smiles) for individual_smiles in smiles]
+    smiles_list = ["Clc1ccc(cc1)C(c2ccccc2)N3CCN(CC3)CCOCC(=O)O", "CC1=CC(Cl)=CC(C(=O)N[C@@H]2C[C@@H]3CCCC[C@@H]32)=C1C"]
+    pedigree = ("database", "no reaction", "no parent")   
+    molecules = [Molecule(Chem.CanonSmiles(smiles), pedigree, fitness=0.2 , descriptor=[np.random.rand(), np.random.rand()]) for smiles in smiles_list]
     return molecules
 
 def test_default_archive(default_archive, default_molecules):
-    default_archive.add_to_archive(default_molecules, [[0.1, 0.1], [0.9, 0.9]], [0.0, 1.0])
+    default_archive.add_to_archive(default_molecules)
     assert len(default_archive.sample(2)) == 2
     assert len(default_archive.sample_pairs(5)) == 5
 
diff --git a/tests/test_mechanism.py b/tests/test_mechanism.py
index 76744f8..eff1c5c 100644
--- a/tests/test_mechanism.py
+++ b/tests/test_mechanism.py
@@ -1,7 +1,8 @@
 import pytest
 import omegaconf
 from rdkit import Chem
-from argenomic.mechanism import descriptor, fitness
+from argenomic.mechanism import Descriptor, Fitness
+from argenomic.base import Molecule
 
 @pytest.fixture
 def default_descriptor():
@@ -9,30 +10,30 @@ def default_descriptor():
     Returns a descriptor instance, set-up to calculate normalised ExactMolWt and MolLogP.
     '''
     configuration_file = omegaconf.OmegaConf.load("./tests/test_config.yaml")
-    return descriptor(configuration_file.descriptor)
+    return Descriptor(configuration_file.descriptor)
 
 @pytest.fixture
 def default_fitness():
     configuration_file = omegaconf.OmegaConf.load("./tests/test_config.yaml")
-    return fitness(configuration_file.fitness)
+    return Fitness(configuration_file.fitness)
 
 @pytest.fixture
 def default_molecules():
-    '''
-    Returns a list of two molecules.
-    '''
-    smiles = ["Clc1ccc(cc1)C(c2ccccc2)N3CCN(CC3)CCOCC(=O)O", "CC1=CC(Cl)=CC(C(=O)N[C@@H]2C[C@@H]3CCCC[C@@H]32)=C1C"]
-    molecules = [Chem.MolFromSmiles(individual_smiles) for individual_smiles in smiles]
+    smiles_list = ["Clc1ccc(cc1)C(c2ccccc2)N3CCN(CC3)CCOCC(=O)O", "CC1=CC(Cl)=CC(C(=O)N[C@@H]2C[C@@H]3CCCC[C@@H]32)=C1C"]
+    pedigree = ("database", "no reaction", "no parent")   
+    molecules = [Molecule(Chem.CanonSmiles(smiles), pedigree) for smiles in smiles_list]
     return molecules
 
 def test_default_descriptor(default_descriptor, default_molecules):
-    descriptors = default_descriptor(default_molecules)
-    for descriptor in descriptors:
-        assert 0.00 <= descriptor
-        assert descriptor <= 1.00
+    for molecule in default_molecules:
+        molecule = default_descriptor(molecule)
+        for descriptor in molecule.descriptors:
+            assert 0.00 <= descriptor
+            assert descriptor <= 1.00
+    
 
 def test_default_descriptor(default_fitness, default_molecules):
     for molecule in default_molecules:
-        fitness = default_fitness(molecule)
-        assert 0.00 <= fitness
-        assert fitness <= 1.00
+        molecule = default_fitness(molecule)
+        assert 0.00 <= molecule.fitness
+        assert molecule.fitness <= 1.00
diff --git a/tests/test_operations.py b/tests/test_operations.py
index bfa9dad..f9c50f6 100644
--- a/tests/test_operations.py
+++ b/tests/test_operations.py
@@ -1,29 +1,29 @@
 import pytest
 import omegaconf
 from rdkit import Chem
-from argenomic.operations import mutator, crossover
-
+from argenomic.operations import Mutator, Crossover
+from argenomic.base import Molecule
+ 
 @pytest.fixture
 def default_mutator():
     '''
     Returns an instance of a mutator.
     '''
-    return mutator()
+    configuration_file = omegaconf.OmegaConf.load("./tests/test_config.yaml")
+    return Mutator(configuration_file.mutator)
 
 @pytest.fixture
 def default_crossover():
     '''
     Returns an instance of a crossover.
     '''
-    return crossover()
+    return Crossover()
 
 @pytest.fixture
 def default_molecules():
-    '''
-    Returns a list of two molecules.
-    '''
-    smiles = ["Clc1ccc(cc1)C(c2ccccc2)N3CCN(CC3)CCOCC(=O)O", "CC1=CC(Cl)=CC(C(=O)N[C@@H]2C[C@@H]3CCCC[C@@H]32)=C1C"]
-    molecules = [Chem.MolFromSmiles(individual_smiles) for individual_smiles in smiles]
+    smiles_list = ["Clc1ccc(cc1)C(c2ccccc2)N3CCN(CC3)CCOCC(=O)O", "CC1=CC(Cl)=CC(C(=O)N[C@@H]2C[C@@H]3CCCC[C@@H]32)=C1C"]
+    pedigree = ("database", "no reaction", "no parent")   
+    molecules = [Molecule(Chem.CanonSmiles(smiles), pedigree) for smiles in smiles_list]
     return molecules
 
 @pytest.mark.xfail
@@ -32,11 +32,13 @@ def test_default_mutator(default_mutator, default_molecules):
     Tests the action of the mutator. May fail occasionally due to stochasticity.
     The result of this test is reported separtely.
     '''
-    for molecule in default_molecules:
+    molecules = default_molecules
+    for molecule in molecules:
         assert len(default_mutator(molecule)) > 0
 
 def test_default_crossover(default_crossover, default_molecules):
     '''
     Tests the action of the crossover.
     '''
-    assert len(default_crossover(default_molecules)) > 0
+    molecules = default_molecules
+    assert len(default_crossover(molecules)) > 0