changing the yaml configs with the new caching logic

datamol-io · Aug 10, 2023 · bdf14d1 · bdf14d1
1 parent cc27c8f
commit bdf14d1
Show file tree

Hide file tree

Showing 33 changed files with 13 additions and 43 deletions.
diff --git a/expts/configs/config_gps_10M_pcqm4m.yaml b/expts/configs/config_gps_10M_pcqm4m.yaml
@@ -112,7 +112,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 0 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/configs/config_gps_10M_pcqm4m_mod.yaml b/expts/configs/config_gps_10M_pcqm4m_mod.yaml
@@ -81,7 +81,6 @@ datamodule:
     # Data handling-related
     batch_size_training: 64
     batch_size_inference: 16
-    # cache_data_path: .
     num_workers: 0 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/configs/config_mpnn_10M_b3lyp.yaml b/expts/configs/config_mpnn_10M_b3lyp.yaml
@@ -93,6 +93,7 @@ datamodule:
     featurization_progress: True
     featurization_backend: "loky"
     processed_graph_data_path: "../datacache/b3lyp/"
+    dataloading_from: ram
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
     # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
@@ -123,7 +124,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 0 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/configs/config_mpnn_pcqm4m.yaml b/expts/configs/config_mpnn_pcqm4m.yaml
@@ -30,8 +30,8 @@ datamodule:
     featurization_n_jobs: 20
     featurization_progress: True
     featurization_backend: "loky"
-    cache_data_path: "./datacache"
     processed_graph_data_path: "graphium/data/PCQM4Mv2/"
+    dataloading_from: ram
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
     # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
@@ -58,7 +58,6 @@ datamodule:
     # Data handling-related
     batch_size_training: 64
     batch_size_inference: 16
-    # cache_data_path: .
     num_workers: 40 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/base_config/large.yaml b/expts/neurips2023_configs/base_config/large.yaml
@@ -168,7 +168,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 32 # -1 to use all
     persistent_workers: True # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/base_config/small.yaml b/expts/neurips2023_configs/base_config/small.yaml
@@ -132,7 +132,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/baseline/config_small_gcn_baseline.yaml b/expts/neurips2023_configs/baseline/config_small_gcn_baseline.yaml
@@ -131,7 +131,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/config_classifigression_l1000.yaml b/expts/neurips2023_configs/config_classifigression_l1000.yaml
@@ -111,7 +111,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 5 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/config_luis_jama.yaml b/expts/neurips2023_configs/config_luis_jama.yaml
@@ -119,7 +119,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 4 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/debug/config_debug.yaml b/expts/neurips2023_configs/debug/config_debug.yaml
@@ -105,7 +105,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 0 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml b/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml
@@ -166,7 +166,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
@@ -327,7 +326,7 @@ predictor:
     l1000_mcf7: []
     pcba_1328: []
     pcqm4m_g25: []
-    pcqm4m_n4: [] 
+    pcqm4m_n4: []
   loss_fun:
     l1000_vcap:
       name: hybrid_ce_ipu

diff --git a/expts/neurips2023_configs/debug/config_small_gcn_debug.yaml b/expts/neurips2023_configs/debug/config_small_gcn_debug.yaml
@@ -119,7 +119,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gcn/config_large_gcn_mcf7.yaml b/expts/neurips2023_configs/single_task_gcn/config_large_gcn_mcf7.yaml
@@ -100,7 +100,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gcn/config_large_gcn_pcba.yaml b/expts/neurips2023_configs/single_task_gcn/config_large_gcn_pcba.yaml
@@ -100,7 +100,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gcn/config_large_gcn_vcap.yaml b/expts/neurips2023_configs/single_task_gcn/config_large_gcn_vcap.yaml
@@ -100,7 +100,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_g25.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_g25.yaml
@@ -103,7 +103,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_mcf7.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_mcf7.yaml
@@ -100,7 +100,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_n4.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_n4.yaml
@@ -104,7 +104,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_pcba.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_pcba.yaml
@@ -100,7 +100,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_pcq.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_pcq.yaml
@@ -118,7 +118,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_vcap.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_vcap.yaml
@@ -100,7 +100,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_g25.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_g25.yaml
@@ -103,7 +103,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_mcf7.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_mcf7.yaml
@@ -100,7 +100,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_n4.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_n4.yaml
@@ -104,7 +104,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_pcba.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_pcba.yaml
@@ -100,7 +100,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_pcq.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_pcq.yaml
@@ -118,7 +118,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_vcap.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_vcap.yaml
@@ -100,7 +100,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/graphium/config/fake_and_missing_multilevel_multitask_pyg.yaml b/graphium/config/fake_and_missing_multilevel_multitask_pyg.yaml
@@ -58,7 +58,6 @@ datamodule:
     # Data handling-related
     batch_size_training: 16
     batch_size_inference: 16
-    # cache_data_path: null
 
 architecture:     # The parameters for the full graph network are taken from `config_micro_ZINC.yaml`
   model_type: FullGraphMultiTaskNetwork
@@ -111,7 +110,7 @@ architecture:     # The parameters for the full graph network are taken from `co
       dropout: *dropout
       normalization: *normalization
       last_normalization: "none"
-      residual_type: none 
+      residual_type: none
     graph:
       pooling: [sum, max]
       out_dim: 1
@@ -122,7 +121,7 @@ architecture:     # The parameters for the full graph network are taken from `co
       dropout: *dropout
       normalization: *normalization
       last_normalization: "none"
-      residual_type: none 
+      residual_type: none
     edge:
       out_dim: 16
       hidden_dims: 32
@@ -132,7 +131,7 @@ architecture:     # The parameters for the full graph network are taken from `co
       dropout: *dropout
       normalization: *normalization
       last_normalization: "none"
-      residual_type: none 
+      residual_type: none
     nodepair:
       out_dim: 16
       hidden_dims: 32
@@ -142,7 +141,7 @@ architecture:     # The parameters for the full graph network are taken from `co
       dropout: *dropout
       normalization: *normalization
       last_normalization: "none"
-      residual_type: none     
+      residual_type: none
 
   task_heads:     # Set as null to avoid task heads. Recall that the arguments for the TaskHeads is a List of TaskHeadParams
     task_1:

diff --git a/graphium/config/fake_multilevel_multitask_pyg.yaml b/graphium/config/fake_multilevel_multitask_pyg.yaml
@@ -58,7 +58,6 @@ datamodule:
     # Data handling-related
     batch_size_training: 16
     batch_size_inference: 16
-    # cache_data_path: null
 
 architecture:     # The parameters for the full graph network are taken from `config_micro_ZINC.yaml`
   model_type: FullGraphMultiTaskNetwork
@@ -111,7 +110,7 @@ architecture:     # The parameters for the full graph network are taken from `co
       dropout: *dropout
       normalization: *normalization
       last_normalization: "none"
-      residual_type: none 
+      residual_type: none
     graph:
       pooling: [sum, max]
       out_dim: 1
@@ -122,7 +121,7 @@ architecture:     # The parameters for the full graph network are taken from `co
       dropout: *dropout
       normalization: *normalization
       last_normalization: "none"
-      residual_type: none 
+      residual_type: none
     edge:
       out_dim: 16
       hidden_dims: 32
@@ -132,7 +131,7 @@ architecture:     # The parameters for the full graph network are taken from `co
       dropout: *dropout
       normalization: *normalization
       last_normalization: "none"
-      residual_type: none 
+      residual_type: none
     nodepair:
       out_dim: 16
       hidden_dims: 32
@@ -142,7 +141,7 @@ architecture:     # The parameters for the full graph network are taken from `co
       dropout: *dropout
       normalization: *normalization
       last_normalization: "none"
-      residual_type: none     
+      residual_type: none
 
   task_heads:     # Set as null to avoid task heads. Recall that the arguments for the TaskHeads is a List of TaskHeadParams
     task_1:

diff --git a/graphium/config/zinc_default_multitask_pyg.yaml b/graphium/config/zinc_default_multitask_pyg.yaml
@@ -58,7 +58,6 @@ datamodule:
     # Data handling-related
     batch_size_training: 16
     batch_size_inference: 16
-    # cache_data_path: null
 
 architecture:     # The parameters for the full graph network are taken from `config_micro_ZINC.yaml`
   model_type: FullGraphMultiTaskNetwork

diff --git a/profiling/configs_profiling.yaml b/profiling/configs_profiling.yaml
@@ -6,7 +6,7 @@ datamodule:
   module_type: "DGLFromSmilesDataModule"
   args:
     df_path: https://storage.googleapis.com/graphium-public/datasets/graphium-zinc-bench-gnn/smiles_score.csv.gz
-    cache_data_path: null # graphium/data/cache/ZINC_bench_gnn/smiles_score.cache
+    processed_graph_data_path: null
     label_cols: ['score']
     smiles_col: SMILES
 

diff --git a/tests/config_test_ipu_dataloader_multitask.yaml b/tests/config_test_ipu_dataloader_multitask.yaml
@@ -130,7 +130,6 @@ datamodule:
             pos_type: rw_return_probs
             ksteps: 16
 
-    # cache_data_path: .
     num_workers: -1 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.

diff --git a/tests/data/config_micro_ZINC.yaml b/tests/data/config_micro_ZINC.yaml
@@ -6,7 +6,7 @@ datamodule:
   module_type: "DGLFromSmilesDataModule"
   args:
     df_path: graphium/data/micro_ZINC/micro_ZINC.csv
-    cache_data_path: graphium/data/cache/micro_ZINC/full.cache
+    processed_graph_data_path: graphium/data/cache/micro_ZINC/
     label_cols: ['score']
     smiles_col: SMILES